@hazeljs/data 0.2.0-beta.67 → 0.2.0-beta.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/README.md +175 -61
  2. package/dist/connectors/connector.interface.d.ts +29 -0
  3. package/dist/connectors/connector.interface.d.ts.map +1 -0
  4. package/dist/connectors/connector.interface.js +6 -0
  5. package/dist/connectors/csv.connector.d.ts +63 -0
  6. package/dist/connectors/csv.connector.d.ts.map +1 -0
  7. package/dist/connectors/csv.connector.js +147 -0
  8. package/dist/connectors/http.connector.d.ts +68 -0
  9. package/dist/connectors/http.connector.d.ts.map +1 -0
  10. package/dist/connectors/http.connector.js +131 -0
  11. package/dist/connectors/index.d.ts +7 -0
  12. package/dist/connectors/index.d.ts.map +1 -0
  13. package/dist/connectors/index.js +12 -0
  14. package/dist/connectors/memory.connector.d.ts +38 -0
  15. package/dist/connectors/memory.connector.d.ts.map +1 -0
  16. package/dist/connectors/memory.connector.js +56 -0
  17. package/dist/connectors/memory.connector.test.d.ts +2 -0
  18. package/dist/connectors/memory.connector.test.d.ts.map +1 -0
  19. package/dist/connectors/memory.connector.test.js +43 -0
  20. package/dist/data.types.d.ts +16 -0
  21. package/dist/data.types.d.ts.map +1 -1
  22. package/dist/decorators/index.d.ts +1 -0
  23. package/dist/decorators/index.d.ts.map +1 -1
  24. package/dist/decorators/index.js +8 -1
  25. package/dist/decorators/pii.decorator.d.ts +59 -0
  26. package/dist/decorators/pii.decorator.d.ts.map +1 -0
  27. package/dist/decorators/pii.decorator.js +197 -0
  28. package/dist/decorators/pii.decorator.test.d.ts +2 -0
  29. package/dist/decorators/pii.decorator.test.d.ts.map +1 -0
  30. package/dist/decorators/pii.decorator.test.js +150 -0
  31. package/dist/decorators/pipeline.decorator.js +1 -1
  32. package/dist/decorators/pipeline.decorator.test.js +8 -0
  33. package/dist/decorators/transform.decorator.d.ts +9 -1
  34. package/dist/decorators/transform.decorator.d.ts.map +1 -1
  35. package/dist/decorators/transform.decorator.js +4 -0
  36. package/dist/decorators/validate.decorator.d.ts +5 -1
  37. package/dist/decorators/validate.decorator.d.ts.map +1 -1
  38. package/dist/decorators/validate.decorator.js +4 -0
  39. package/dist/flink.service.d.ts +30 -0
  40. package/dist/flink.service.d.ts.map +1 -1
  41. package/dist/flink.service.js +50 -2
  42. package/dist/index.d.ts +13 -7
  43. package/dist/index.d.ts.map +1 -1
  44. package/dist/index.js +36 -8
  45. package/dist/pipelines/etl.service.d.ts +41 -2
  46. package/dist/pipelines/etl.service.d.ts.map +1 -1
  47. package/dist/pipelines/etl.service.js +143 -6
  48. package/dist/pipelines/etl.service.test.js +215 -0
  49. package/dist/pipelines/pipeline.builder.d.ts +86 -13
  50. package/dist/pipelines/pipeline.builder.d.ts.map +1 -1
  51. package/dist/pipelines/pipeline.builder.js +177 -27
  52. package/dist/pipelines/pipeline.builder.test.js +160 -12
  53. package/dist/pipelines/stream.service.test.js +49 -0
  54. package/dist/quality/quality.service.d.ts +67 -5
  55. package/dist/quality/quality.service.d.ts.map +1 -1
  56. package/dist/quality/quality.service.js +259 -20
  57. package/dist/quality/quality.service.test.js +94 -0
  58. package/dist/schema/schema.d.ts +92 -12
  59. package/dist/schema/schema.d.ts.map +1 -1
  60. package/dist/schema/schema.js +395 -83
  61. package/dist/schema/schema.test.js +292 -0
  62. package/dist/streaming/flink/flink.client.d.ts +41 -3
  63. package/dist/streaming/flink/flink.client.d.ts.map +1 -1
  64. package/dist/streaming/flink/flink.client.js +171 -8
  65. package/dist/streaming/flink/flink.client.test.js +2 -2
  66. package/dist/streaming/flink/flink.job.d.ts +2 -1
  67. package/dist/streaming/flink/flink.job.d.ts.map +1 -1
  68. package/dist/streaming/flink/flink.job.js +2 -2
  69. package/dist/streaming/stream.processor.d.ts +56 -2
  70. package/dist/streaming/stream.processor.d.ts.map +1 -1
  71. package/dist/streaming/stream.processor.js +149 -2
  72. package/dist/streaming/stream.processor.test.js +99 -0
  73. package/dist/streaming/stream.processor.windowing.test.d.ts +2 -0
  74. package/dist/streaming/stream.processor.windowing.test.d.ts.map +1 -0
  75. package/dist/streaming/stream.processor.windowing.test.js +69 -0
  76. package/dist/telemetry/telemetry.d.ts +124 -0
  77. package/dist/telemetry/telemetry.d.ts.map +1 -0
  78. package/dist/telemetry/telemetry.js +259 -0
  79. package/dist/telemetry/telemetry.test.d.ts +2 -0
  80. package/dist/telemetry/telemetry.test.d.ts.map +1 -0
  81. package/dist/telemetry/telemetry.test.js +51 -0
  82. package/dist/testing/index.d.ts +12 -0
  83. package/dist/testing/index.d.ts.map +1 -0
  84. package/dist/testing/index.js +18 -0
  85. package/dist/testing/pipeline-test-harness.d.ts +40 -0
  86. package/dist/testing/pipeline-test-harness.d.ts.map +1 -0
  87. package/dist/testing/pipeline-test-harness.js +55 -0
  88. package/dist/testing/pipeline-test-harness.test.d.ts +2 -0
  89. package/dist/testing/pipeline-test-harness.test.d.ts.map +1 -0
  90. package/dist/testing/pipeline-test-harness.test.js +102 -0
  91. package/dist/testing/schema-faker.d.ts +32 -0
  92. package/dist/testing/schema-faker.d.ts.map +1 -0
  93. package/dist/testing/schema-faker.js +91 -0
  94. package/dist/testing/schema-faker.test.d.ts +2 -0
  95. package/dist/testing/schema-faker.test.d.ts.map +1 -0
  96. package/dist/testing/schema-faker.test.js +66 -0
  97. package/dist/transformers/built-in.transformers.test.js +28 -0
  98. package/dist/transformers/transformer.service.test.js +10 -0
  99. package/package.json +2 -2
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @hazeljs/data
2
2
 
3
- Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, and data quality.
3
+ Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, data quality, and more.
4
4
 
5
5
  [![npm version](https://img.shields.io/npm/v/@hazeljs/data.svg)](https://www.npmjs.com/package/@hazeljs/data)
6
6
  [![npm downloads](https://img.shields.io/npm/dm/@hazeljs/data)](https://www.npmjs.com/package/@hazeljs/data)
@@ -9,11 +9,16 @@ Data Processing & ETL for HazelJS - pipelines, schema validation, streaming, and
9
9
  ## Features
10
10
 
11
11
  - **Pipelines** – Declarative ETL with `@Pipeline`, `@Transform`, `@Validate` decorators
12
- - **Schema validation** – Fluent Schema API (string, number, object, array, email, oneOf)
13
- - **ETL service** – Execute multi-step pipelines with ordering and error handling
14
- - **Stream processing** – StreamBuilder, StreamProcessor for batch and streaming workloads
15
- - **Built-in transformers** – trimString, toLowerCase, parseJson, pick, omit, renameKeys
16
- - **Data quality** – QualityService for completeness, notNull, and custom checks
12
+ - **Schema validation** – Fluent Schema API (string, number, boolean, date, object, array, literal, union) with `.optional()`, `.nullable()`, `.default()`, `.transform()`, `.refine()`, `Infer<T>`, `.toJsonSchema()`
13
+ - **Pipeline options** – Conditional steps (`when`), per-step retry, timeout, dead letter queue (DLQ)
14
+ - **PipelineBuilder** – Programmatic pipelines with `.branch()`, `.parallel()`, `.catch()`, `.toSchema()`
15
+ - **ETL service** – Execute multi-step pipelines with `executeBatch`, `onStepComplete`
16
+ - **Stream processing** – StreamService, StreamProcessor with tumbling/sliding/session windows and stream join
17
+ - **Built-in transformers** – trimString, toLowerCase, toUpperCase, parseJson, stringifyJson, pick, omit, renameKeys
18
+ - **Data quality** – QualityService with completeness, notNull, uniqueness, range, pattern, referentialIntegrity, profile(), detectAnomalies()
19
+ - **Connectors** – DataSource/DataSink (MemorySource, MemorySink, CsvSource, HttpSource)
20
+ - **PII decorators** – @Mask, @Redact, @Encrypt, @Decrypt for sensitive data
21
+ - **Test utilities** – SchemaFaker, PipelineTestHarness, MockSource, MockSink
17
22
  - **Flink integration** – Optional Apache Flink deployment for distributed stream processing
18
23
 
19
24
  ## Installation
@@ -48,18 +53,24 @@ import {
48
53
  Validate,
49
54
  ETLService,
50
55
  Schema,
56
+ Infer,
51
57
  } from '@hazeljs/data';
52
58
 
53
- const OrderSchema = Schema.object()
54
- .prop('id', Schema.string().required())
55
- .prop('customerId', Schema.string().required())
56
- .prop('status', Schema.string().oneOf(['pending', 'paid', 'shipped', 'delivered', 'cancelled']))
57
- .prop('items', Schema.array().items(Schema.object()
58
- .prop('sku', Schema.string().minLength(1))
59
- .prop('qty', Schema.number().min(1))
60
- .prop('price', Schema.number().min(0))
61
- ))
62
- .required();
59
+ const OrderSchema = Schema.object({
60
+ id: Schema.string().min(1),
61
+ customerId: Schema.string().min(1),
62
+ items: Schema.array(
63
+ Schema.object({
64
+ sku: Schema.string().min(1),
65
+ qty: Schema.number().min(1),
66
+ price: Schema.number().min(0),
67
+ })
68
+ ),
69
+ status: Schema.string().oneOf(['pending', 'paid', 'shipped', 'delivered', 'cancelled']),
70
+ createdAt: Schema.string().min(1),
71
+ });
72
+
73
+ type Order = Infer<typeof OrderSchema>;
63
74
 
64
75
  @Pipeline('order-processing')
65
76
  @Injectable()
@@ -69,30 +80,21 @@ export class OrderProcessingPipeline extends PipelineBase {
69
80
  }
70
81
 
71
82
  @Transform({ step: 1, name: 'normalize' })
72
- async normalize(data: Record<string, unknown>): Promise<Record<string, unknown>> {
73
- return {
74
- ...data,
75
- status: String(data.status).toLowerCase(),
76
- };
83
+ async normalize(data: unknown): Promise<Order> {
84
+ return { ...(data as Order), status: String((data as Order).status).toLowerCase() };
77
85
  }
78
86
 
79
- @Validate({ step: 2, schema: OrderSchema })
80
- async validate(data: Record<string, unknown>): Promise<Record<string, unknown>> {
87
+ @Validate({ step: 2, name: 'validate', schema: OrderSchema })
88
+ async validate(data: Order): Promise<Order> {
81
89
  return data;
82
90
  }
83
91
 
84
92
  @Transform({ step: 3, name: 'enrich' })
85
- async enrich(data: Record<string, unknown> & { items?: { qty: number; price: number }[] }): Promise<Record<string, unknown>> {
93
+ async enrich(data: Order): Promise<Order & { total: number; tax: number }> {
86
94
  const items = data.items ?? [];
87
95
  const subtotal = items.reduce((sum, i) => sum + i.qty * i.price, 0);
88
96
  const tax = subtotal * 0.1;
89
- return {
90
- ...data,
91
- subtotal,
92
- tax,
93
- total: subtotal + tax,
94
- processedAt: new Date().toISOString(),
95
- };
97
+ return { ...data, subtotal, tax, total: subtotal + tax };
96
98
  }
97
99
  }
98
100
  ```
@@ -117,58 +119,159 @@ export class DataController {
117
119
  }
118
120
  ```
119
121
 
120
- ## Batch processing with StreamService
122
+ ## Schema validation
121
123
 
122
- Process arrays through pipelines in batches:
124
+ Build schemas with the fluent API. Full type inference via `Infer<T>`:
123
125
 
124
126
  ```typescript
125
- import { StreamService } from '@hazeljs/data';
127
+ import { Schema, Infer, SchemaValidator } from '@hazeljs/data';
128
+
129
+ const UserSchema = Schema.object({
130
+ email: Schema.string().email(),
131
+ name: Schema.string().min(1).max(200),
132
+ age: Schema.number().min(0).max(150),
133
+ role: Schema.string().oneOf(['user', 'admin', 'moderator', 'guest']),
134
+ active: Schema.boolean().default(true),
135
+ });
126
136
 
127
- const streamService = new StreamService(etlService);
128
- const results = await streamService.processBatch(OrderProcessingPipeline, orders);
137
+ type User = Infer<typeof UserSchema>;
138
+
139
+ // Validate (throws on failure)
140
+ const validator = new SchemaValidator();
141
+ const user = validator.validate(UserSchema, rawData);
142
+
143
+ // Safe validate (returns result)
144
+ const result = validator.safeValidate(UserSchema, rawData);
145
+ if (result.success) {
146
+ const user = result.data;
147
+ } else {
148
+ console.error(result.errors);
149
+ }
129
150
  ```
130
151
 
131
- ## Schema validation
152
+ ### Schema types and modifiers
153
+
154
+ | Type | Example |
155
+ |------|---------|
156
+ | `Schema.string()` | `.email()`, `.url()`, `.min()`, `.max()`, `.uuid()`, `.oneOf()`, `.pattern()`, `.required()`, `.trim()` |
157
+ | `Schema.number()` | `.min()`, `.max()`, `.integer()`, `.positive()`, `.negative()`, `.multipleOf()` |
158
+ | `Schema.boolean()` | `.default()` |
159
+ | `Schema.date()` | `.min()`, `.max()`, `.default()` |
160
+ | `Schema.object({...})` | `.strict()`, `.pick()`, `.omit()`, `.extend()` |
161
+ | `Schema.array(itemSchema)` | `.min()`, `.max()`, `.nonempty()` |
162
+ | `Schema.literal(value)` | Literal values |
163
+ | `Schema.union([a, b])` | Discriminated unions |
164
+ | Modifiers | `.optional()`, `.nullable()`, `.default()`, `.transform()`, `.refine()`, `.refineAsync()` |
132
165
 
133
- Build schemas with the fluent API:
166
+ ## Pipeline options
167
+
168
+ Steps support conditional execution, retry, timeout, and DLQ:
134
169
 
135
170
  ```typescript
136
- import { Schema } from '@hazeljs/data';
171
+ @Transform({
172
+ step: 2,
173
+ name: 'enrich',
174
+ when: (data) => (data as { type: string }).type === 'order',
175
+ retry: { attempts: 3, delay: 500, backoff: 'exponential' },
176
+ timeoutMs: 5000,
177
+ dlq: { handler: (item, err, step) => logger.error('DLQ', { item, err, step }) },
178
+ })
179
+ async enrich(data: unknown) {
180
+ return { ...data, enriched: true };
181
+ }
182
+ ```
137
183
 
138
- const UserSchema = Schema.object()
139
- .prop('email', Schema.string().format('email').required())
140
- .prop('name', Schema.string().minLength(1).maxLength(200))
141
- .prop('age', Schema.number().min(0).max(150))
142
- .prop('role', Schema.string().oneOf(['user', 'admin', 'moderator', 'guest']))
143
- .required();
184
+ ## PipelineBuilder (programmatic pipelines)
144
185
 
145
- const validator = new SchemaValidator();
146
- const { value, error } = validator.validate(UserSchema, rawData);
186
+ Build pipelines in code without decorators:
187
+
188
+ ```typescript
189
+ import { PipelineBuilder } from '@hazeljs/data';
190
+
191
+ const pipeline = new PipelineBuilder('orders')
192
+ .addTransform('normalize', (d) => ({ ...d, email: (d as { email: string }).email?.toLowerCase() }))
193
+ .branch(
194
+ 'classify',
195
+ (d) => (d as { type: string }).type === 'premium',
196
+ (b) => b.addTransform('enrichPremium', enrichPremium),
197
+ (b) => b.addTransform('enrichStandard', enrichStandard)
198
+ )
199
+ .parallel('enrich', [
200
+ (d) => ({ ...d, a: 1 }),
201
+ (d) => ({ ...d, b: 2 }),
202
+ ])
203
+ .catch((data, err) => ({ ...data, error: err.message }));
204
+
205
+ const result = await pipeline.execute(rawData);
147
206
  ```
148
207
 
149
- ## Data quality checks
208
+ ## Batch and stream processing
209
+
210
+ ```typescript
211
+ import { StreamService, StreamProcessor } from '@hazeljs/data';
212
+
213
+ // Batch
214
+ const results = await streamService.processBatch(pipeline, items);
215
+
216
+ // Streaming with windowing
217
+ const processor = new StreamProcessor(etlService);
218
+ for await (const batch of processor.tumblingWindow(source, 60_000)) {
219
+ console.log(batch.items, batch.windowStart, batch.windowEnd);
220
+ }
221
+ // Also: slidingWindow, sessionWindow, joinStreams
222
+ ```
223
+
224
+ ## Data quality
150
225
 
151
226
  ```typescript
152
227
  import { QualityService } from '@hazeljs/data';
153
228
 
154
229
  const qualityService = new QualityService();
155
- const report = await qualityService.check(records, {
156
- completeness: ['id', 'email', 'createdAt'],
157
- notNull: ['id', 'status'],
158
- });
230
+
231
+ qualityService.registerCheck('completeness', qualityService.completeness(['id', 'email']));
232
+ qualityService.registerCheck('notNull', qualityService.notNull(['id']));
233
+ qualityService.registerCheck('uniqueness', qualityService.uniqueness(['id']));
234
+ qualityService.registerCheck('range', qualityService.range('age', { min: 0, max: 120 }));
235
+ qualityService.registerCheck('pattern', qualityService.pattern('phone', /^\d{10}$/));
236
+ qualityService.registerCheck('ref', qualityService.referentialIntegrity('status', ['active', 'inactive']));
237
+
238
+ const report = await qualityService.runChecks('users', records);
239
+ const profile = qualityService.profile('users', records);
240
+ const anomalies = qualityService.detectAnomalies(records, ['value'], 2);
159
241
  ```
160
242
 
161
- ## Flink configuration (optional)
243
+ ## PII decorators
162
244
 
163
- For distributed stream processing with Apache Flink:
245
+ ```typescript
246
+ import { Transform, Mask, Redact } from '@hazeljs/data';
247
+
248
+ @Transform({ step: 1, name: 'sanitize' })
249
+ @Mask({ fields: ['email', 'ssn'], showLast: 4 })
250
+ sanitize(data: User) {
251
+ return data; // email/ssn already masked
252
+ }
253
+
254
+ @Transform({ step: 2, name: 'redact' })
255
+ @Redact({ fields: ['internalId'] })
256
+ redact(data: Record<string, unknown>) {
257
+ return data; // internalId removed
258
+ }
259
+ ```
260
+
261
+ ## Test utilities
164
262
 
165
263
  ```typescript
166
- DataModule.forRoot({
167
- flink: {
168
- url: process.env.FLINK_REST_URL ?? 'http://localhost:8081',
169
- timeout: 30000,
170
- },
171
- });
264
+ import { SchemaFaker, PipelineTestHarness, MockSource, MockSink } from '@hazeljs/data';
265
+
266
+ const fake = SchemaFaker.generate(UserSchema);
267
+ const many = SchemaFaker.generateMany(UserSchema, 10);
268
+
269
+ const harness = PipelineTestHarness.create(etlService, pipeline);
270
+ const { result, events } = await harness.run(input);
271
+ await harness.runAndAssertSuccess(input);
272
+
273
+ const source = new MockSource([{ x: 1 }]);
274
+ const sink = new MockSink();
172
275
  ```
173
276
 
174
277
  ## Built-in transformers
@@ -182,9 +285,20 @@ DataModule.forRoot({
182
285
  | `omit` | Remove specific keys from objects |
183
286
  | `renameKeys` | Rename object keys |
184
287
 
288
+ ## Flink configuration (optional)
289
+
290
+ ```typescript
291
+ DataModule.forRoot({
292
+ flink: {
293
+ url: process.env.FLINK_REST_URL ?? 'http://localhost:8081',
294
+ timeout: 30000,
295
+ },
296
+ });
297
+ ```
298
+
185
299
  ## Example
186
300
 
187
- See [hazeljs-data-starter](../../../hazeljs-data-starter) for a full example with order and user pipelines, REST API, and quality reports.
301
+ See [hazeljs-data-starter](../../../hazeljs-data-starter) for a full example with order and user pipelines, PipelineBuilder, PII decorators, quality profiling, anomaly detection, and REST API.
188
302
 
189
303
  ## Links
190
304
 
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Connector interfaces for data sources and sinks.
3
+ * All connectors implement DataSource<T> or DataSink<T>.
4
+ */
5
+ export interface DataSource<T = unknown> {
6
+ readonly name: string;
7
+ /** Open/initialize the connection */
8
+ open(): Promise<void>;
9
+ /** Close/release the connection */
10
+ close(): Promise<void>;
11
+ /** Read all records as an array */
12
+ readAll(): Promise<T[]>;
13
+ /** Read records as an async generator (streaming) */
14
+ read(): AsyncGenerator<T>;
15
+ }
16
+ export interface DataSink<T = unknown> {
17
+ readonly name: string;
18
+ open(): Promise<void>;
19
+ close(): Promise<void>;
20
+ /** Write a single record */
21
+ write(record: T): Promise<void>;
22
+ /** Write a batch of records (more efficient when supported) */
23
+ writeBatch(records: T[]): Promise<void>;
24
+ }
25
+ export interface ConnectorOptions {
26
+ /** Connector display name */
27
+ name?: string;
28
+ }
29
+ //# sourceMappingURL=connector.interface.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"connector.interface.d.ts","sourceRoot":"","sources":["../../src/connectors/connector.interface.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,WAAW,UAAU,CAAC,CAAC,GAAG,OAAO;IACrC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,qCAAqC;IACrC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,mCAAmC;IACnC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,mCAAmC;IACnC,OAAO,IAAI,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC;IACxB,qDAAqD;IACrD,IAAI,IAAI,cAAc,CAAC,CAAC,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,QAAQ,CAAC,CAAC,GAAG,OAAO;IACnC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,4BAA4B;IAC5B,KAAK,CAAC,MAAM,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAChC,+DAA+D;IAC/D,UAAU,CAAC,OAAO,EAAE,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC;AAED,MAAM,WAAW,gBAAgB;IAC/B,6BAA6B;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf"}
@@ -0,0 +1,6 @@
1
+ "use strict";
2
+ /**
3
+ * Connector interfaces for data sources and sinks.
4
+ * All connectors implement DataSource<T> or DataSink<T>.
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,63 @@
1
+ import type { DataSource, DataSink } from './connector.interface';
2
+ export interface CsvSourceOptions {
3
+ filePath: string;
4
+ delimiter?: string;
5
+ hasHeader?: boolean;
6
+ /** Custom header names (used when hasHeader is false) */
7
+ headers?: string[];
8
+ name?: string;
9
+ }
10
+ export interface CsvSinkOptions {
11
+ filePath: string;
12
+ delimiter?: string;
13
+ /** Write header row on open */
14
+ writeHeader?: boolean;
15
+ name?: string;
16
+ }
17
+ /**
18
+ * CSV file data source — reads records from a CSV file.
19
+ *
20
+ * @example
21
+ * const source = new CsvSource({ filePath: './data.csv', hasHeader: true });
22
+ * await source.open();
23
+ * const records = await source.readAll();
24
+ * await source.close();
25
+ */
26
+ export declare class CsvSource implements DataSource<Record<string, string>> {
27
+ readonly name: string;
28
+ private readonly filePath;
29
+ private readonly delimiter;
30
+ private readonly hasHeader;
31
+ private readonly customHeaders?;
32
+ constructor(options: CsvSourceOptions);
33
+ open(): Promise<void>;
34
+ close(): Promise<void>;
35
+ readAll(): Promise<Record<string, string>[]>;
36
+ read(): AsyncGenerator<Record<string, string>>;
37
+ private parseLine;
38
+ }
39
+ /**
40
+ * CSV file data sink — writes records to a CSV file.
41
+ *
42
+ * @example
43
+ * const sink = new CsvSink({ filePath: './output.csv', writeHeader: true });
44
+ * await sink.open();
45
+ * await sink.writeBatch(records);
46
+ * await sink.close();
47
+ */
48
+ export declare class CsvSink implements DataSink<Record<string, unknown>> {
49
+ readonly name: string;
50
+ private readonly filePath;
51
+ private readonly delimiter;
52
+ private readonly writeHeader;
53
+ private headers;
54
+ private stream;
55
+ private headerWritten;
56
+ constructor(options: CsvSinkOptions);
57
+ open(): Promise<void>;
58
+ close(): Promise<void>;
59
+ write(record: Record<string, unknown>): Promise<void>;
60
+ writeBatch(records: Record<string, unknown>[]): Promise<void>;
61
+ private writeLine;
62
+ }
63
+ //# sourceMappingURL=csv.connector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.connector.d.ts","sourceRoot":"","sources":["../../src/connectors/csv.connector.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;GAQG;AACH,qBAAa,SAAU,YAAW,UAAU,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAU;IACpC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAW;gBAE9B,OAAO,EAAE,gBAAgB;IAQ/B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAItB,OAAO,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAQ3C,IAAI,IAAI,cAAc,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IA4BrD,OAAO,CAAC,SAAS;CAwBlB;AAED;;;;;;;;GAQG;AACH,qBAAa,OAAQ,YAAW,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC/D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IACtC,OAAO,CAAC,OAAO,CAAyB;IACxC,OAAO,CAAC,MAAM,CAAqD;IACnE,OAAO,CAAC,aAAa,CAAS;gBAElB,OAAO,EAAE,cAAc;IAO7B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAIrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAUtB,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAerD,UAAU,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAInE,OAAO,CAAC,SAAS;CAWlB"}
@@ -0,0 +1,147 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CsvSink = exports.CsvSource = void 0;
4
+ const fs_1 = require("fs");
5
+ const readline_1 = require("readline");
6
+ /**
7
+ * CSV file data source — reads records from a CSV file.
8
+ *
9
+ * @example
10
+ * const source = new CsvSource({ filePath: './data.csv', hasHeader: true });
11
+ * await source.open();
12
+ * const records = await source.readAll();
13
+ * await source.close();
14
+ */
15
+ class CsvSource {
16
+ constructor(options) {
17
+ this.name = options.name ?? `csv:${options.filePath}`;
18
+ this.filePath = options.filePath;
19
+ this.delimiter = options.delimiter ?? ',';
20
+ this.hasHeader = options.hasHeader ?? true;
21
+ this.customHeaders = options.headers;
22
+ }
23
+ async open() {
24
+ if (!(0, fs_1.existsSync)(this.filePath)) {
25
+ throw new Error(`CSV file not found: ${this.filePath}`);
26
+ }
27
+ }
28
+ async close() {
29
+ // No-op for file reads
30
+ }
31
+ async readAll() {
32
+ const records = [];
33
+ for await (const record of this.read()) {
34
+ records.push(record);
35
+ }
36
+ return records;
37
+ }
38
+ async *read() {
39
+ const rl = (0, readline_1.createInterface)({
40
+ input: (0, fs_1.createReadStream)(this.filePath),
41
+ crlfDelay: Infinity,
42
+ });
43
+ let headers = this.customHeaders ?? null;
44
+ let isFirst = true;
45
+ for await (const line of rl) {
46
+ const cols = this.parseLine(line);
47
+ if (isFirst && this.hasHeader && !this.customHeaders) {
48
+ headers = cols;
49
+ isFirst = false;
50
+ continue;
51
+ }
52
+ isFirst = false;
53
+ if (!headers) {
54
+ headers = cols.map((_, i) => `col${i}`);
55
+ }
56
+ const record = {};
57
+ headers.forEach((h, i) => {
58
+ record[h] = cols[i] ?? '';
59
+ });
60
+ yield record;
61
+ }
62
+ }
63
+ parseLine(line) {
64
+ const result = [];
65
+ let current = '';
66
+ let inQuote = false;
67
+ for (let i = 0; i < line.length; i++) {
68
+ const ch = line[i];
69
+ if (ch === '"') {
70
+ if (inQuote && line[i + 1] === '"') {
71
+ current += '"';
72
+ i++;
73
+ }
74
+ else {
75
+ inQuote = !inQuote;
76
+ }
77
+ }
78
+ else if (ch === this.delimiter && !inQuote) {
79
+ result.push(current);
80
+ current = '';
81
+ }
82
+ else {
83
+ current += ch;
84
+ }
85
+ }
86
+ result.push(current);
87
+ return result;
88
+ }
89
+ }
90
+ exports.CsvSource = CsvSource;
91
+ /**
92
+ * CSV file data sink — writes records to a CSV file.
93
+ *
94
+ * @example
95
+ * const sink = new CsvSink({ filePath: './output.csv', writeHeader: true });
96
+ * await sink.open();
97
+ * await sink.writeBatch(records);
98
+ * await sink.close();
99
+ */
100
+ class CsvSink {
101
+ constructor(options) {
102
+ this.headers = null;
103
+ this.stream = null;
104
+ this.headerWritten = false;
105
+ this.name = options.name ?? `csv:${options.filePath}`;
106
+ this.filePath = options.filePath;
107
+ this.delimiter = options.delimiter ?? ',';
108
+ this.writeHeader = options.writeHeader ?? true;
109
+ }
110
+ async open() {
111
+ this.stream = (0, fs_1.createWriteStream)(this.filePath, { encoding: 'utf8' });
112
+ }
113
+ async close() {
114
+ return new Promise((resolve, reject) => {
115
+ if (!this.stream) {
116
+ resolve();
117
+ return;
118
+ }
119
+ this.stream.end((err) => (err ? reject(err) : resolve()));
120
+ });
121
+ }
122
+ async write(record) {
123
+ if (!this.stream)
124
+ throw new Error('CsvSink: call open() before write()');
125
+ if (!this.headers) {
126
+ this.headers = Object.keys(record);
127
+ }
128
+ if (this.writeHeader && !this.headerWritten) {
129
+ this.writeLine(this.headers);
130
+ this.headerWritten = true;
131
+ }
132
+ this.writeLine(this.headers.map((h) => String(record[h] ?? '')));
133
+ }
134
+ async writeBatch(records) {
135
+ for (const r of records)
136
+ await this.write(r);
137
+ }
138
+ writeLine(cols) {
139
+ const line = cols
140
+ .map((c) => c.includes(this.delimiter) || c.includes('"') || c.includes('\n')
141
+ ? `"${c.replace(/"/g, '""')}"`
142
+ : c)
143
+ .join(this.delimiter) + '\n';
144
+ this.stream.write(line);
145
+ }
146
+ }
147
+ exports.CsvSink = CsvSink;
@@ -0,0 +1,68 @@
1
+ import type { DataSource, DataSink } from './connector.interface';
2
+ export interface HttpSourceOptions {
3
+ url: string;
4
+ method?: 'GET' | 'POST';
5
+ headers?: Record<string, string>;
6
+ body?: unknown;
7
+ /** JSON path to the array in the response (e.g. "data.items") */
8
+ dataPath?: string;
9
+ /** Pagination: next page URL extracted from response (e.g. "meta.next") */
10
+ nextPagePath?: string;
11
+ name?: string;
12
+ timeoutMs?: number;
13
+ }
14
+ export interface HttpSinkOptions {
15
+ url: string;
16
+ method?: 'POST' | 'PUT' | 'PATCH';
17
+ headers?: Record<string, string>;
18
+ /** Batch size — how many records to send per request (default: 1) */
19
+ batchSize?: number;
20
+ /** JSON field name to wrap records in (e.g. "records" → { records: [...] }) */
21
+ bodyKey?: string;
22
+ name?: string;
23
+ timeoutMs?: number;
24
+ }
25
+ /**
26
+ * HTTP API data source — reads records from a REST API.
27
+ * Supports pagination via `nextPagePath`.
28
+ *
29
+ * @example
30
+ * const source = new HttpSource({
31
+ * url: 'https://api.example.com/users',
32
+ * dataPath: 'data',
33
+ * nextPagePath: 'meta.next',
34
+ * });
35
+ */
36
+ export declare class HttpSource implements DataSource<unknown> {
37
+ readonly name: string;
38
+ private readonly options;
39
+ constructor(options: HttpSourceOptions);
40
+ open(): Promise<void>;
41
+ close(): Promise<void>;
42
+ readAll(): Promise<unknown[]>;
43
+ read(): AsyncGenerator<unknown>;
44
+ private fetchPage;
45
+ }
46
+ /**
47
+ * HTTP API data sink — writes records to a REST API endpoint.
48
+ *
49
+ * @example
50
+ * const sink = new HttpSink({
51
+ * url: 'https://api.example.com/ingest',
52
+ * method: 'POST',
53
+ * batchSize: 100,
54
+ * bodyKey: 'records',
55
+ * });
56
+ */
57
+ export declare class HttpSink implements DataSink<unknown> {
58
+ readonly name: string;
59
+ private readonly options;
60
+ private buffer;
61
+ constructor(options: HttpSinkOptions);
62
+ open(): Promise<void>;
63
+ close(): Promise<void>;
64
+ write(record: unknown): Promise<void>;
65
+ writeBatch(records: unknown[]): Promise<void>;
66
+ private flush;
67
+ }
68
+ //# sourceMappingURL=http.connector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"http.connector.d.ts","sourceRoot":"","sources":["../../src/connectors/http.connector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAElE,MAAM,WAAW,iBAAiB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,IAAI,CAAC,EAAE,OAAO,CAAC;IACf,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,OAAO,CAAC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,qEAAqE;IACrE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,+EAA+E;IAC/E,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD;;;;;;;;;;GAUG;AACH,qBAAa,UAAW,YAAW,UAAU,CAAC,OAAO,CAAC;IACpD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoB;gBAEhC,OAAO,EAAE,iBAAiB;IAKhC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAEtB,OAAO,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;IAM5B,IAAI,IAAI,cAAc,CAAC,OAAO,CAAC;IAwBtC,OAAO,CAAC,SAAS;CAYlB;AAED;;;;;;;;;;GAUG;AACH,qBAAa,QAAS,YAAW,QAAQ,CAAC,OAAO,CAAC;IAChD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;IAC1C,OAAO,CAAC,MAAM,CAAiB;gBAEnB,OAAO,EAAE,eAAe;IAK9B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAErB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAItB,KAAK,CAAC,MAAM,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAOrC,UAAU,CAAC,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;YAKrC,KAAK;CA2BpB"}