@hazeljs/data 0.2.0-beta.68 → 0.2.0-beta.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +175 -61
- package/dist/connectors/connector.interface.d.ts +29 -0
- package/dist/connectors/connector.interface.d.ts.map +1 -0
- package/dist/connectors/connector.interface.js +6 -0
- package/dist/connectors/csv.connector.d.ts +63 -0
- package/dist/connectors/csv.connector.d.ts.map +1 -0
- package/dist/connectors/csv.connector.js +147 -0
- package/dist/connectors/http.connector.d.ts +68 -0
- package/dist/connectors/http.connector.d.ts.map +1 -0
- package/dist/connectors/http.connector.js +131 -0
- package/dist/connectors/index.d.ts +7 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +12 -0
- package/dist/connectors/memory.connector.d.ts +38 -0
- package/dist/connectors/memory.connector.d.ts.map +1 -0
- package/dist/connectors/memory.connector.js +56 -0
- package/dist/connectors/memory.connector.test.d.ts +2 -0
- package/dist/connectors/memory.connector.test.d.ts.map +1 -0
- package/dist/connectors/memory.connector.test.js +43 -0
- package/dist/data.types.d.ts +16 -0
- package/dist/data.types.d.ts.map +1 -1
- package/dist/decorators/index.d.ts +1 -0
- package/dist/decorators/index.d.ts.map +1 -1
- package/dist/decorators/index.js +8 -1
- package/dist/decorators/pii.decorator.d.ts +59 -0
- package/dist/decorators/pii.decorator.d.ts.map +1 -0
- package/dist/decorators/pii.decorator.js +197 -0
- package/dist/decorators/pii.decorator.test.d.ts +2 -0
- package/dist/decorators/pii.decorator.test.d.ts.map +1 -0
- package/dist/decorators/pii.decorator.test.js +150 -0
- package/dist/decorators/pipeline.decorator.js +1 -1
- package/dist/decorators/pipeline.decorator.test.js +8 -0
- package/dist/decorators/transform.decorator.d.ts +9 -1
- package/dist/decorators/transform.decorator.d.ts.map +1 -1
- package/dist/decorators/transform.decorator.js +4 -0
- package/dist/decorators/validate.decorator.d.ts +5 -1
- package/dist/decorators/validate.decorator.d.ts.map +1 -1
- package/dist/decorators/validate.decorator.js +4 -0
- package/dist/flink.service.d.ts +30 -0
- package/dist/flink.service.d.ts.map +1 -1
- package/dist/flink.service.js +50 -2
- package/dist/index.d.ts +13 -7
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +36 -8
- package/dist/pipelines/etl.service.d.ts +41 -2
- package/dist/pipelines/etl.service.d.ts.map +1 -1
- package/dist/pipelines/etl.service.js +143 -6
- package/dist/pipelines/etl.service.test.js +215 -0
- package/dist/pipelines/pipeline.builder.d.ts +86 -13
- package/dist/pipelines/pipeline.builder.d.ts.map +1 -1
- package/dist/pipelines/pipeline.builder.js +177 -27
- package/dist/pipelines/pipeline.builder.test.js +160 -12
- package/dist/pipelines/stream.service.test.js +49 -0
- package/dist/quality/quality.service.d.ts +67 -5
- package/dist/quality/quality.service.d.ts.map +1 -1
- package/dist/quality/quality.service.js +259 -20
- package/dist/quality/quality.service.test.js +94 -0
- package/dist/schema/schema.d.ts +92 -12
- package/dist/schema/schema.d.ts.map +1 -1
- package/dist/schema/schema.js +395 -83
- package/dist/schema/schema.test.js +292 -0
- package/dist/streaming/flink/flink.client.d.ts +41 -3
- package/dist/streaming/flink/flink.client.d.ts.map +1 -1
- package/dist/streaming/flink/flink.client.js +171 -8
- package/dist/streaming/flink/flink.client.test.js +2 -2
- package/dist/streaming/flink/flink.job.d.ts +2 -1
- package/dist/streaming/flink/flink.job.d.ts.map +1 -1
- package/dist/streaming/flink/flink.job.js +2 -2
- package/dist/streaming/stream.processor.d.ts +56 -2
- package/dist/streaming/stream.processor.d.ts.map +1 -1
- package/dist/streaming/stream.processor.js +149 -2
- package/dist/streaming/stream.processor.test.js +99 -0
- package/dist/streaming/stream.processor.windowing.test.d.ts +2 -0
- package/dist/streaming/stream.processor.windowing.test.d.ts.map +1 -0
- package/dist/streaming/stream.processor.windowing.test.js +69 -0
- package/dist/telemetry/telemetry.d.ts +124 -0
- package/dist/telemetry/telemetry.d.ts.map +1 -0
- package/dist/telemetry/telemetry.js +259 -0
- package/dist/telemetry/telemetry.test.d.ts +2 -0
- package/dist/telemetry/telemetry.test.d.ts.map +1 -0
- package/dist/telemetry/telemetry.test.js +51 -0
- package/dist/testing/index.d.ts +12 -0
- package/dist/testing/index.d.ts.map +1 -0
- package/dist/testing/index.js +18 -0
- package/dist/testing/pipeline-test-harness.d.ts +40 -0
- package/dist/testing/pipeline-test-harness.d.ts.map +1 -0
- package/dist/testing/pipeline-test-harness.js +55 -0
- package/dist/testing/pipeline-test-harness.test.d.ts +2 -0
- package/dist/testing/pipeline-test-harness.test.d.ts.map +1 -0
- package/dist/testing/pipeline-test-harness.test.js +102 -0
- package/dist/testing/schema-faker.d.ts +32 -0
- package/dist/testing/schema-faker.d.ts.map +1 -0
- package/dist/testing/schema-faker.js +91 -0
- package/dist/testing/schema-faker.test.d.ts +2 -0
- package/dist/testing/schema-faker.test.d.ts.map +1 -0
- package/dist/testing/schema-faker.test.js +66 -0
- package/dist/transformers/built-in.transformers.test.js +28 -0
- package/dist/transformers/transformer.service.test.js +10 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @hazeljs/data
|
|
2
2
|
|
|
3
|
-
Data Processing & ETL for HazelJS - pipelines, schema validation, streaming,
|
|
3
|
+
Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, data quality, and more.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/@hazeljs/data)
|
|
6
6
|
[](https://www.npmjs.com/package/@hazeljs/data)
|
|
@@ -9,11 +9,16 @@ Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, and
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
11
11
|
- **Pipelines** – Declarative ETL with `@Pipeline`, `@Transform`, `@Validate` decorators
|
|
12
|
-
- **Schema validation** – Fluent Schema API (string, number, object, array,
|
|
13
|
-
- **
|
|
14
|
-
- **
|
|
15
|
-
- **
|
|
16
|
-
- **
|
|
12
|
+
- **Schema validation** – Fluent Schema API (string, number, boolean, date, object, array, literal, union) with `.optional()`, `.nullable()`, `.default()`, `.transform()`, `.refine()`, `Infer<T>`, `.toJsonSchema()`
|
|
13
|
+
- **Pipeline options** – Conditional steps (`when`), per-step retry, timeout, dead letter queue (DLQ)
|
|
14
|
+
- **PipelineBuilder** – Programmatic pipelines with `.branch()`, `.parallel()`, `.catch()`, `.toSchema()`
|
|
15
|
+
- **ETL service** – Execute multi-step pipelines with `executeBatch`, `onStepComplete`
|
|
16
|
+
- **Stream processing** – StreamService, StreamProcessor with tumbling/sliding/session windows and stream join
|
|
17
|
+
- **Built-in transformers** – trimString, toLowerCase, toUpperCase, parseJson, stringifyJson, pick, omit, renameKeys
|
|
18
|
+
- **Data quality** – QualityService with completeness, notNull, uniqueness, range, pattern, referentialIntegrity, profile(), detectAnomalies()
|
|
19
|
+
- **Connectors** – DataSource/DataSink (MemorySource, MemorySink, CsvSource, HttpSource)
|
|
20
|
+
- **PII decorators** – @Mask, @Redact, @Encrypt, @Decrypt for sensitive data
|
|
21
|
+
- **Test utilities** – SchemaFaker, PipelineTestHarness, MockSource, MockSink
|
|
17
22
|
- **Flink integration** – Optional Apache Flink deployment for distributed stream processing
|
|
18
23
|
|
|
19
24
|
## Installation
|
|
@@ -48,18 +53,24 @@ import {
|
|
|
48
53
|
Validate,
|
|
49
54
|
ETLService,
|
|
50
55
|
Schema,
|
|
56
|
+
Infer,
|
|
51
57
|
} from '@hazeljs/data';
|
|
52
58
|
|
|
53
|
-
const OrderSchema = Schema.object(
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
const OrderSchema = Schema.object({
|
|
60
|
+
id: Schema.string().min(1),
|
|
61
|
+
customerId: Schema.string().min(1),
|
|
62
|
+
items: Schema.array(
|
|
63
|
+
Schema.object({
|
|
64
|
+
sku: Schema.string().min(1),
|
|
65
|
+
qty: Schema.number().min(1),
|
|
66
|
+
price: Schema.number().min(0),
|
|
67
|
+
})
|
|
68
|
+
),
|
|
69
|
+
status: Schema.string().oneOf(['pending', 'paid', 'shipped', 'delivered', 'cancelled']),
|
|
70
|
+
createdAt: Schema.string().min(1),
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
type Order = Infer<typeof OrderSchema>;
|
|
63
74
|
|
|
64
75
|
@Pipeline('order-processing')
|
|
65
76
|
@Injectable()
|
|
@@ -69,30 +80,21 @@ export class OrderProcessingPipeline extends PipelineBase {
|
|
|
69
80
|
}
|
|
70
81
|
|
|
71
82
|
@Transform({ step: 1, name: 'normalize' })
|
|
72
|
-
async normalize(data:
|
|
73
|
-
return {
|
|
74
|
-
...data,
|
|
75
|
-
status: String(data.status).toLowerCase(),
|
|
76
|
-
};
|
|
83
|
+
async normalize(data: unknown): Promise<Order> {
|
|
84
|
+
return { ...(data as Order), status: String((data as Order).status).toLowerCase() };
|
|
77
85
|
}
|
|
78
86
|
|
|
79
|
-
@Validate({ step: 2, schema: OrderSchema })
|
|
80
|
-
async validate(data:
|
|
87
|
+
@Validate({ step: 2, name: 'validate', schema: OrderSchema })
|
|
88
|
+
async validate(data: Order): Promise<Order> {
|
|
81
89
|
return data;
|
|
82
90
|
}
|
|
83
91
|
|
|
84
92
|
@Transform({ step: 3, name: 'enrich' })
|
|
85
|
-
async enrich(data:
|
|
93
|
+
async enrich(data: Order): Promise<Order & { total: number; tax: number }> {
|
|
86
94
|
const items = data.items ?? [];
|
|
87
95
|
const subtotal = items.reduce((sum, i) => sum + i.qty * i.price, 0);
|
|
88
96
|
const tax = subtotal * 0.1;
|
|
89
|
-
return {
|
|
90
|
-
...data,
|
|
91
|
-
subtotal,
|
|
92
|
-
tax,
|
|
93
|
-
total: subtotal + tax,
|
|
94
|
-
processedAt: new Date().toISOString(),
|
|
95
|
-
};
|
|
97
|
+
return { ...data, subtotal, tax, total: subtotal + tax };
|
|
96
98
|
}
|
|
97
99
|
}
|
|
98
100
|
```
|
|
@@ -117,58 +119,159 @@ export class DataController {
|
|
|
117
119
|
}
|
|
118
120
|
```
|
|
119
121
|
|
|
120
|
-
##
|
|
122
|
+
## Schema validation
|
|
121
123
|
|
|
122
|
-
|
|
124
|
+
Build schemas with the fluent API. Full type inference via `Infer<T>`:
|
|
123
125
|
|
|
124
126
|
```typescript
|
|
125
|
-
import {
|
|
127
|
+
import { Schema, Infer, SchemaValidator } from '@hazeljs/data';
|
|
128
|
+
|
|
129
|
+
const UserSchema = Schema.object({
|
|
130
|
+
email: Schema.string().email(),
|
|
131
|
+
name: Schema.string().min(1).max(200),
|
|
132
|
+
age: Schema.number().min(0).max(150),
|
|
133
|
+
role: Schema.string().oneOf(['user', 'admin', 'moderator', 'guest']),
|
|
134
|
+
active: Schema.boolean().default(true),
|
|
135
|
+
});
|
|
126
136
|
|
|
127
|
-
|
|
128
|
-
|
|
137
|
+
type User = Infer<typeof UserSchema>;
|
|
138
|
+
|
|
139
|
+
// Validate (throws on failure)
|
|
140
|
+
const validator = new SchemaValidator();
|
|
141
|
+
const user = validator.validate(UserSchema, rawData);
|
|
142
|
+
|
|
143
|
+
// Safe validate (returns result)
|
|
144
|
+
const result = validator.safeValidate(UserSchema, rawData);
|
|
145
|
+
if (result.success) {
|
|
146
|
+
const user = result.data;
|
|
147
|
+
} else {
|
|
148
|
+
console.error(result.errors);
|
|
149
|
+
}
|
|
129
150
|
```
|
|
130
151
|
|
|
131
|
-
|
|
152
|
+
### Schema types and modifiers
|
|
153
|
+
|
|
154
|
+
| Type | Example |
|
|
155
|
+
|------|---------|
|
|
156
|
+
| `Schema.string()` | `.email()`, `.url()`, `.min()`, `.max()`, `.uuid()`, `.oneOf()`, `.pattern()`, `.required()`, `.trim()` |
|
|
157
|
+
| `Schema.number()` | `.min()`, `.max()`, `.integer()`, `.positive()`, `.negative()`, `.multipleOf()` |
|
|
158
|
+
| `Schema.boolean()` | `.default()` |
|
|
159
|
+
| `Schema.date()` | `.min()`, `.max()`, `.default()` |
|
|
160
|
+
| `Schema.object({...})` | `.strict()`, `.pick()`, `.omit()`, `.extend()` |
|
|
161
|
+
| `Schema.array(itemSchema)` | `.min()`, `.max()`, `.nonempty()` |
|
|
162
|
+
| `Schema.literal(value)` | Literal values |
|
|
163
|
+
| `Schema.union([a, b])` | Discriminated unions |
|
|
164
|
+
| Modifiers | `.optional()`, `.nullable()`, `.default()`, `.transform()`, `.refine()`, `.refineAsync()` |
|
|
132
165
|
|
|
133
|
-
|
|
166
|
+
## Pipeline options
|
|
167
|
+
|
|
168
|
+
Steps support conditional execution, retry, timeout, and DLQ:
|
|
134
169
|
|
|
135
170
|
```typescript
|
|
136
|
-
|
|
171
|
+
@Transform({
|
|
172
|
+
step: 2,
|
|
173
|
+
name: 'enrich',
|
|
174
|
+
when: (data) => (data as { type: string }).type === 'order',
|
|
175
|
+
retry: { attempts: 3, delay: 500, backoff: 'exponential' },
|
|
176
|
+
timeoutMs: 5000,
|
|
177
|
+
dlq: { handler: (item, err, step) => logger.error('DLQ', { item, err, step }) },
|
|
178
|
+
})
|
|
179
|
+
async enrich(data: unknown) {
|
|
180
|
+
return { ...data, enriched: true };
|
|
181
|
+
}
|
|
182
|
+
```
|
|
137
183
|
|
|
138
|
-
|
|
139
|
-
.prop('email', Schema.string().format('email').required())
|
|
140
|
-
.prop('name', Schema.string().minLength(1).maxLength(200))
|
|
141
|
-
.prop('age', Schema.number().min(0).max(150))
|
|
142
|
-
.prop('role', Schema.string().oneOf(['user', 'admin', 'moderator', 'guest']))
|
|
143
|
-
.required();
|
|
184
|
+
## PipelineBuilder (programmatic pipelines)
|
|
144
185
|
|
|
145
|
-
|
|
146
|
-
|
|
186
|
+
Build pipelines in code without decorators:
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
import { PipelineBuilder } from '@hazeljs/data';
|
|
190
|
+
|
|
191
|
+
const pipeline = new PipelineBuilder('orders')
|
|
192
|
+
.addTransform('normalize', (d) => ({ ...d, email: (d as { email: string }).email?.toLowerCase() }))
|
|
193
|
+
.branch(
|
|
194
|
+
'classify',
|
|
195
|
+
(d) => (d as { type: string }).type === 'premium',
|
|
196
|
+
(b) => b.addTransform('enrichPremium', enrichPremium),
|
|
197
|
+
(b) => b.addTransform('enrichStandard', enrichStandard)
|
|
198
|
+
)
|
|
199
|
+
.parallel('enrich', [
|
|
200
|
+
(d) => ({ ...d, a: 1 }),
|
|
201
|
+
(d) => ({ ...d, b: 2 }),
|
|
202
|
+
])
|
|
203
|
+
.catch((data, err) => ({ ...data, error: err.message }));
|
|
204
|
+
|
|
205
|
+
const result = await pipeline.execute(rawData);
|
|
147
206
|
```
|
|
148
207
|
|
|
149
|
-
##
|
|
208
|
+
## Batch and stream processing
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
import { StreamService, StreamProcessor } from '@hazeljs/data';
|
|
212
|
+
|
|
213
|
+
// Batch
|
|
214
|
+
const results = await streamService.processBatch(pipeline, items);
|
|
215
|
+
|
|
216
|
+
// Streaming with windowing
|
|
217
|
+
const processor = new StreamProcessor(etlService);
|
|
218
|
+
for await (const batch of processor.tumblingWindow(source, 60_000)) {
|
|
219
|
+
console.log(batch.items, batch.windowStart, batch.windowEnd);
|
|
220
|
+
}
|
|
221
|
+
// Also: slidingWindow, sessionWindow, joinStreams
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## Data quality
|
|
150
225
|
|
|
151
226
|
```typescript
|
|
152
227
|
import { QualityService } from '@hazeljs/data';
|
|
153
228
|
|
|
154
229
|
const qualityService = new QualityService();
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
230
|
+
|
|
231
|
+
qualityService.registerCheck('completeness', qualityService.completeness(['id', 'email']));
|
|
232
|
+
qualityService.registerCheck('notNull', qualityService.notNull(['id']));
|
|
233
|
+
qualityService.registerCheck('uniqueness', qualityService.uniqueness(['id']));
|
|
234
|
+
qualityService.registerCheck('range', qualityService.range('age', { min: 0, max: 120 }));
|
|
235
|
+
qualityService.registerCheck('pattern', qualityService.pattern('phone', /^\d{10}$/));
|
|
236
|
+
qualityService.registerCheck('ref', qualityService.referentialIntegrity('status', ['active', 'inactive']));
|
|
237
|
+
|
|
238
|
+
const report = await qualityService.runChecks('users', records);
|
|
239
|
+
const profile = qualityService.profile('users', records);
|
|
240
|
+
const anomalies = qualityService.detectAnomalies(records, ['value'], 2);
|
|
159
241
|
```
|
|
160
242
|
|
|
161
|
-
##
|
|
243
|
+
## PII decorators
|
|
162
244
|
|
|
163
|
-
|
|
245
|
+
```typescript
|
|
246
|
+
import { Transform, Mask, Redact } from '@hazeljs/data';
|
|
247
|
+
|
|
248
|
+
@Transform({ step: 1, name: 'sanitize' })
|
|
249
|
+
@Mask({ fields: ['email', 'ssn'], showLast: 4 })
|
|
250
|
+
sanitize(data: User) {
|
|
251
|
+
return data; // email/ssn already masked
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
@Transform({ step: 2, name: 'redact' })
|
|
255
|
+
@Redact({ fields: ['internalId'] })
|
|
256
|
+
redact(data: Record<string, unknown>) {
|
|
257
|
+
return data; // internalId removed
|
|
258
|
+
}
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Test utilities
|
|
164
262
|
|
|
165
263
|
```typescript
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
264
|
+
import { SchemaFaker, PipelineTestHarness, MockSource, MockSink } from '@hazeljs/data';
|
|
265
|
+
|
|
266
|
+
const fake = SchemaFaker.generate(UserSchema);
|
|
267
|
+
const many = SchemaFaker.generateMany(UserSchema, 10);
|
|
268
|
+
|
|
269
|
+
const harness = PipelineTestHarness.create(etlService, pipeline);
|
|
270
|
+
const { result, events } = await harness.run(input);
|
|
271
|
+
await harness.runAndAssertSuccess(input);
|
|
272
|
+
|
|
273
|
+
const source = new MockSource([{ x: 1 }]);
|
|
274
|
+
const sink = new MockSink();
|
|
172
275
|
```
|
|
173
276
|
|
|
174
277
|
## Built-in transformers
|
|
@@ -182,9 +285,20 @@ DataModule.forRoot({
|
|
|
182
285
|
| `omit` | Remove specific keys from objects |
|
|
183
286
|
| `renameKeys` | Rename object keys |
|
|
184
287
|
|
|
288
|
+
## Flink configuration (optional)
|
|
289
|
+
|
|
290
|
+
```typescript
|
|
291
|
+
DataModule.forRoot({
|
|
292
|
+
flink: {
|
|
293
|
+
url: process.env.FLINK_REST_URL ?? 'http://localhost:8081',
|
|
294
|
+
timeout: 30000,
|
|
295
|
+
},
|
|
296
|
+
});
|
|
297
|
+
```
|
|
298
|
+
|
|
185
299
|
## Example
|
|
186
300
|
|
|
187
|
-
See [hazeljs-data-starter](../../../hazeljs-data-starter) for a full example with order and user pipelines,
|
|
301
|
+
See [hazeljs-data-starter](../../../hazeljs-data-starter) for a full example with order and user pipelines, PipelineBuilder, PII decorators, quality profiling, anomaly detection, and REST API.
|
|
188
302
|
|
|
189
303
|
## Links
|
|
190
304
|
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Connector interfaces for data sources and sinks.
|
|
3
|
+
* All connectors implement DataSource<T> or DataSink<T>.
|
|
4
|
+
*/
|
|
5
|
+
export interface DataSource<T = unknown> {
|
|
6
|
+
readonly name: string;
|
|
7
|
+
/** Open/initialize the connection */
|
|
8
|
+
open(): Promise<void>;
|
|
9
|
+
/** Close/release the connection */
|
|
10
|
+
close(): Promise<void>;
|
|
11
|
+
/** Read all records as an array */
|
|
12
|
+
readAll(): Promise<T[]>;
|
|
13
|
+
/** Read records as an async generator (streaming) */
|
|
14
|
+
read(): AsyncGenerator<T>;
|
|
15
|
+
}
|
|
16
|
+
export interface DataSink<T = unknown> {
|
|
17
|
+
readonly name: string;
|
|
18
|
+
open(): Promise<void>;
|
|
19
|
+
close(): Promise<void>;
|
|
20
|
+
/** Write a single record */
|
|
21
|
+
write(record: T): Promise<void>;
|
|
22
|
+
/** Write a batch of records (more efficient when supported) */
|
|
23
|
+
writeBatch(records: T[]): Promise<void>;
|
|
24
|
+
}
|
|
25
|
+
export interface ConnectorOptions {
|
|
26
|
+
/** Connector display name */
|
|
27
|
+
name?: string;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=connector.interface.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"connector.interface.d.ts","sourceRoot":"","sources":["../../src/connectors/connector.interface.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,UAAU,CAAC,CAAC,GAAG,OAAO;IACrC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,mCAAmC;IACnC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,mCAAmC;IACnC,OAAO,IAAI,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC;IACxB,qDAAqD;IACrD,IAAI,IAAI,cAAc,CAAC,CAAC,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,QAAQ,CAAC,CAAC,GAAG,OAAO;IACnC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,4BAA4B;IAC5B,KAAK,CAAC,MAAM,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChC,+DAA+D;IAC/D,UAAU,CAAC,OAAO,EAAE,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC;AAED,MAAM,WAAW,gBAAgB;IAC/B,6BAA6B;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { DataSource, DataSink } from './connector.interface';
|
|
2
|
+
export interface CsvSourceOptions {
|
|
3
|
+
filePath: string;
|
|
4
|
+
delimiter?: string;
|
|
5
|
+
hasHeader?: boolean;
|
|
6
|
+
/** Custom header names (used when hasHeader is false) */
|
|
7
|
+
headers?: string[];
|
|
8
|
+
name?: string;
|
|
9
|
+
}
|
|
10
|
+
export interface CsvSinkOptions {
|
|
11
|
+
filePath: string;
|
|
12
|
+
delimiter?: string;
|
|
13
|
+
/** Write header row on open */
|
|
14
|
+
writeHeader?: boolean;
|
|
15
|
+
name?: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* CSV file data source — reads records from a CSV file.
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* const source = new CsvSource({ filePath: './data.csv', hasHeader: true });
|
|
22
|
+
* await source.open();
|
|
23
|
+
* const records = await source.readAll();
|
|
24
|
+
* await source.close();
|
|
25
|
+
*/
|
|
26
|
+
export declare class CsvSource implements DataSource<Record<string, string>> {
|
|
27
|
+
readonly name: string;
|
|
28
|
+
private readonly filePath;
|
|
29
|
+
private readonly delimiter;
|
|
30
|
+
private readonly hasHeader;
|
|
31
|
+
private readonly customHeaders?;
|
|
32
|
+
constructor(options: CsvSourceOptions);
|
|
33
|
+
open(): Promise<void>;
|
|
34
|
+
close(): Promise<void>;
|
|
35
|
+
readAll(): Promise<Record<string, string>[]>;
|
|
36
|
+
read(): AsyncGenerator<Record<string, string>>;
|
|
37
|
+
private parseLine;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* CSV file data sink — writes records to a CSV file.
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* const sink = new CsvSink({ filePath: './output.csv', writeHeader: true });
|
|
44
|
+
* await sink.open();
|
|
45
|
+
* await sink.writeBatch(records);
|
|
46
|
+
* await sink.close();
|
|
47
|
+
*/
|
|
48
|
+
export declare class CsvSink implements DataSink<Record<string, unknown>> {
|
|
49
|
+
readonly name: string;
|
|
50
|
+
private readonly filePath;
|
|
51
|
+
private readonly delimiter;
|
|
52
|
+
private readonly writeHeader;
|
|
53
|
+
private headers;
|
|
54
|
+
private stream;
|
|
55
|
+
private headerWritten;
|
|
56
|
+
constructor(options: CsvSinkOptions);
|
|
57
|
+
open(): Promise<void>;
|
|
58
|
+
close(): Promise<void>;
|
|
59
|
+
write(record: Record<string, unknown>): Promise<void>;
|
|
60
|
+
writeBatch(records: Record<string, unknown>[]): Promise<void>;
|
|
61
|
+
private writeLine;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=csv.connector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.connector.d.ts","sourceRoot":"","sources":["../../src/connectors/csv.connector.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;GAQG;AACH,qBAAa,SAAU,YAAW,UAAU,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAU;IACpC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAW;gBAE9B,OAAO,EAAE,gBAAgB;IAQ/B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAItB,OAAO,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAQ3C,IAAI,IAAI,cAAc,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IA4BrD,OAAO,CAAC,SAAS;CAwBlB;AAED;;;;;;;;GAQG;AACH,qBAAa,OAAQ,YAAW,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IACtC,OAAO,CAAC,OAAO,CAAyB;IACxC,OAAO,CAAC,MAAM,CAAqD;IACnE,OAAO,CAAC,aAAa,CAAS;gBAElB,OAAO,EAAE,cAAc;IAO7B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAIrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAUtB,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAerD,UAAU,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAInE,OAAO,CAAC,SAAS;CAWlB"}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CsvSink = exports.CsvSource = void 0;
|
|
4
|
+
const fs_1 = require("fs");
|
|
5
|
+
const readline_1 = require("readline");
|
|
6
|
+
/**
|
|
7
|
+
* CSV file data source — reads records from a CSV file.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* const source = new CsvSource({ filePath: './data.csv', hasHeader: true });
|
|
11
|
+
* await source.open();
|
|
12
|
+
* const records = await source.readAll();
|
|
13
|
+
* await source.close();
|
|
14
|
+
*/
|
|
15
|
+
class CsvSource {
|
|
16
|
+
constructor(options) {
|
|
17
|
+
this.name = options.name ?? `csv:${options.filePath}`;
|
|
18
|
+
this.filePath = options.filePath;
|
|
19
|
+
this.delimiter = options.delimiter ?? ',';
|
|
20
|
+
this.hasHeader = options.hasHeader ?? true;
|
|
21
|
+
this.customHeaders = options.headers;
|
|
22
|
+
}
|
|
23
|
+
async open() {
|
|
24
|
+
if (!(0, fs_1.existsSync)(this.filePath)) {
|
|
25
|
+
throw new Error(`CSV file not found: ${this.filePath}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
async close() {
|
|
29
|
+
// No-op for file reads
|
|
30
|
+
}
|
|
31
|
+
async readAll() {
|
|
32
|
+
const records = [];
|
|
33
|
+
for await (const record of this.read()) {
|
|
34
|
+
records.push(record);
|
|
35
|
+
}
|
|
36
|
+
return records;
|
|
37
|
+
}
|
|
38
|
+
async *read() {
|
|
39
|
+
const rl = (0, readline_1.createInterface)({
|
|
40
|
+
input: (0, fs_1.createReadStream)(this.filePath),
|
|
41
|
+
crlfDelay: Infinity,
|
|
42
|
+
});
|
|
43
|
+
let headers = this.customHeaders ?? null;
|
|
44
|
+
let isFirst = true;
|
|
45
|
+
for await (const line of rl) {
|
|
46
|
+
const cols = this.parseLine(line);
|
|
47
|
+
if (isFirst && this.hasHeader && !this.customHeaders) {
|
|
48
|
+
headers = cols;
|
|
49
|
+
isFirst = false;
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
isFirst = false;
|
|
53
|
+
if (!headers) {
|
|
54
|
+
headers = cols.map((_, i) => `col${i}`);
|
|
55
|
+
}
|
|
56
|
+
const record = {};
|
|
57
|
+
headers.forEach((h, i) => {
|
|
58
|
+
record[h] = cols[i] ?? '';
|
|
59
|
+
});
|
|
60
|
+
yield record;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
parseLine(line) {
|
|
64
|
+
const result = [];
|
|
65
|
+
let current = '';
|
|
66
|
+
let inQuote = false;
|
|
67
|
+
for (let i = 0; i < line.length; i++) {
|
|
68
|
+
const ch = line[i];
|
|
69
|
+
if (ch === '"') {
|
|
70
|
+
if (inQuote && line[i + 1] === '"') {
|
|
71
|
+
current += '"';
|
|
72
|
+
i++;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
inQuote = !inQuote;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
else if (ch === this.delimiter && !inQuote) {
|
|
79
|
+
result.push(current);
|
|
80
|
+
current = '';
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
current += ch;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
result.push(current);
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
exports.CsvSource = CsvSource;
|
|
91
|
+
/**
|
|
92
|
+
* CSV file data sink — writes records to a CSV file.
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* const sink = new CsvSink({ filePath: './output.csv', writeHeader: true });
|
|
96
|
+
* await sink.open();
|
|
97
|
+
* await sink.writeBatch(records);
|
|
98
|
+
* await sink.close();
|
|
99
|
+
*/
|
|
100
|
+
class CsvSink {
|
|
101
|
+
constructor(options) {
|
|
102
|
+
this.headers = null;
|
|
103
|
+
this.stream = null;
|
|
104
|
+
this.headerWritten = false;
|
|
105
|
+
this.name = options.name ?? `csv:${options.filePath}`;
|
|
106
|
+
this.filePath = options.filePath;
|
|
107
|
+
this.delimiter = options.delimiter ?? ',';
|
|
108
|
+
this.writeHeader = options.writeHeader ?? true;
|
|
109
|
+
}
|
|
110
|
+
async open() {
|
|
111
|
+
this.stream = (0, fs_1.createWriteStream)(this.filePath, { encoding: 'utf8' });
|
|
112
|
+
}
|
|
113
|
+
async close() {
|
|
114
|
+
return new Promise((resolve, reject) => {
|
|
115
|
+
if (!this.stream) {
|
|
116
|
+
resolve();
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
this.stream.end((err) => (err ? reject(err) : resolve()));
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
async write(record) {
|
|
123
|
+
if (!this.stream)
|
|
124
|
+
throw new Error('CsvSink: call open() before write()');
|
|
125
|
+
if (!this.headers) {
|
|
126
|
+
this.headers = Object.keys(record);
|
|
127
|
+
}
|
|
128
|
+
if (this.writeHeader && !this.headerWritten) {
|
|
129
|
+
this.writeLine(this.headers);
|
|
130
|
+
this.headerWritten = true;
|
|
131
|
+
}
|
|
132
|
+
this.writeLine(this.headers.map((h) => String(record[h] ?? '')));
|
|
133
|
+
}
|
|
134
|
+
async writeBatch(records) {
|
|
135
|
+
for (const r of records)
|
|
136
|
+
await this.write(r);
|
|
137
|
+
}
|
|
138
|
+
writeLine(cols) {
|
|
139
|
+
const line = cols
|
|
140
|
+
.map((c) => c.includes(this.delimiter) || c.includes('"') || c.includes('\n')
|
|
141
|
+
? `"${c.replace(/"/g, '""')}"`
|
|
142
|
+
: c)
|
|
143
|
+
.join(this.delimiter) + '\n';
|
|
144
|
+
this.stream.write(line);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
exports.CsvSink = CsvSink;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { DataSource, DataSink } from './connector.interface';
|
|
2
|
+
export interface HttpSourceOptions {
|
|
3
|
+
url: string;
|
|
4
|
+
method?: 'GET' | 'POST';
|
|
5
|
+
headers?: Record<string, string>;
|
|
6
|
+
body?: unknown;
|
|
7
|
+
/** JSON path to the array in the response (e.g. "data.items") */
|
|
8
|
+
dataPath?: string;
|
|
9
|
+
/** Pagination: next page URL extracted from response (e.g. "meta.next") */
|
|
10
|
+
nextPagePath?: string;
|
|
11
|
+
name?: string;
|
|
12
|
+
timeoutMs?: number;
|
|
13
|
+
}
|
|
14
|
+
export interface HttpSinkOptions {
|
|
15
|
+
url: string;
|
|
16
|
+
method?: 'POST' | 'PUT' | 'PATCH';
|
|
17
|
+
headers?: Record<string, string>;
|
|
18
|
+
/** Batch size — how many records to send per request (default: 1) */
|
|
19
|
+
batchSize?: number;
|
|
20
|
+
/** JSON field name to wrap records in (e.g. "records" → { records: [...] }) */
|
|
21
|
+
bodyKey?: string;
|
|
22
|
+
name?: string;
|
|
23
|
+
timeoutMs?: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* HTTP API data source — reads records from a REST API.
|
|
27
|
+
* Supports pagination via `nextPagePath`.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* const source = new HttpSource({
|
|
31
|
+
* url: 'https://api.example.com/users',
|
|
32
|
+
* dataPath: 'data',
|
|
33
|
+
* nextPagePath: 'meta.next',
|
|
34
|
+
* });
|
|
35
|
+
*/
|
|
36
|
+
export declare class HttpSource implements DataSource<unknown> {
|
|
37
|
+
readonly name: string;
|
|
38
|
+
private readonly options;
|
|
39
|
+
constructor(options: HttpSourceOptions);
|
|
40
|
+
open(): Promise<void>;
|
|
41
|
+
close(): Promise<void>;
|
|
42
|
+
readAll(): Promise<unknown[]>;
|
|
43
|
+
read(): AsyncGenerator<unknown>;
|
|
44
|
+
private fetchPage;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* HTTP API data sink — writes records to a REST API endpoint.
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* const sink = new HttpSink({
|
|
51
|
+
* url: 'https://api.example.com/ingest',
|
|
52
|
+
* method: 'POST',
|
|
53
|
+
* batchSize: 100,
|
|
54
|
+
* bodyKey: 'records',
|
|
55
|
+
* });
|
|
56
|
+
*/
|
|
57
|
+
export declare class HttpSink implements DataSink<unknown> {
|
|
58
|
+
readonly name: string;
|
|
59
|
+
private readonly options;
|
|
60
|
+
private buffer;
|
|
61
|
+
constructor(options: HttpSinkOptions);
|
|
62
|
+
open(): Promise<void>;
|
|
63
|
+
close(): Promise<void>;
|
|
64
|
+
write(record: unknown): Promise<void>;
|
|
65
|
+
writeBatch(records: unknown[]): Promise<void>;
|
|
66
|
+
private flush;
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=http.connector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http.connector.d.ts","sourceRoot":"","sources":["../../src/connectors/http.connector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,OAAO,CAAC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,qEAAqE;IACrE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+EAA+E;IAC/E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD;;;;;;;;;;GAUG;AACH,qBAAa,UAAW,YAAW,UAAU,CAAC,OAAO,CAAC;IACpD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;gBAEhC,OAAO,EAAE,iBAAiB;IAKhC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAEtB,OAAO,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;IAM5B,IAAI,IAAI,cAAc,CAAC,OAAO,CAAC;IAwBtC,OAAO,CAAC,SAAS;CAYlB;AAED;;;;;;;;;;GAUG;AACH,qBAAa,QAAS,YAAW,QAAQ,CAAC,OAAO,CAAC;IAChD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;IAC1C,OAAO,CAAC,MAAM,CAAiB;gBAEnB,OAAO,EAAE,eAAe;IAK9B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAErB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAItB,KAAK,CAAC,MAAM,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAOrC,UAAU,CAAC,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;YAKrC,KAAK;CA2BpB"}
|