@hazeljs/data 0.2.0-beta.67 → 0.2.0-beta.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +175 -61
- package/dist/connectors/connector.interface.d.ts +29 -0
- package/dist/connectors/connector.interface.d.ts.map +1 -0
- package/dist/connectors/connector.interface.js +6 -0
- package/dist/connectors/csv.connector.d.ts +63 -0
- package/dist/connectors/csv.connector.d.ts.map +1 -0
- package/dist/connectors/csv.connector.js +147 -0
- package/dist/connectors/http.connector.d.ts +68 -0
- package/dist/connectors/http.connector.d.ts.map +1 -0
- package/dist/connectors/http.connector.js +131 -0
- package/dist/connectors/index.d.ts +7 -0
- package/dist/connectors/index.d.ts.map +1 -0
- package/dist/connectors/index.js +12 -0
- package/dist/connectors/memory.connector.d.ts +38 -0
- package/dist/connectors/memory.connector.d.ts.map +1 -0
- package/dist/connectors/memory.connector.js +56 -0
- package/dist/connectors/memory.connector.test.d.ts +2 -0
- package/dist/connectors/memory.connector.test.d.ts.map +1 -0
- package/dist/connectors/memory.connector.test.js +43 -0
- package/dist/data.types.d.ts +16 -0
- package/dist/data.types.d.ts.map +1 -1
- package/dist/decorators/index.d.ts +1 -0
- package/dist/decorators/index.d.ts.map +1 -1
- package/dist/decorators/index.js +8 -1
- package/dist/decorators/pii.decorator.d.ts +59 -0
- package/dist/decorators/pii.decorator.d.ts.map +1 -0
- package/dist/decorators/pii.decorator.js +197 -0
- package/dist/decorators/pii.decorator.test.d.ts +2 -0
- package/dist/decorators/pii.decorator.test.d.ts.map +1 -0
- package/dist/decorators/pii.decorator.test.js +150 -0
- package/dist/decorators/pipeline.decorator.js +1 -1
- package/dist/decorators/pipeline.decorator.test.js +8 -0
- package/dist/decorators/transform.decorator.d.ts +9 -1
- package/dist/decorators/transform.decorator.d.ts.map +1 -1
- package/dist/decorators/transform.decorator.js +4 -0
- package/dist/decorators/validate.decorator.d.ts +5 -1
- package/dist/decorators/validate.decorator.d.ts.map +1 -1
- package/dist/decorators/validate.decorator.js +4 -0
- package/dist/flink.service.d.ts +30 -0
- package/dist/flink.service.d.ts.map +1 -1
- package/dist/flink.service.js +50 -2
- package/dist/index.d.ts +13 -7
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +36 -8
- package/dist/pipelines/etl.service.d.ts +41 -2
- package/dist/pipelines/etl.service.d.ts.map +1 -1
- package/dist/pipelines/etl.service.js +143 -6
- package/dist/pipelines/etl.service.test.js +215 -0
- package/dist/pipelines/pipeline.builder.d.ts +86 -13
- package/dist/pipelines/pipeline.builder.d.ts.map +1 -1
- package/dist/pipelines/pipeline.builder.js +177 -27
- package/dist/pipelines/pipeline.builder.test.js +160 -12
- package/dist/pipelines/stream.service.test.js +49 -0
- package/dist/quality/quality.service.d.ts +67 -5
- package/dist/quality/quality.service.d.ts.map +1 -1
- package/dist/quality/quality.service.js +259 -20
- package/dist/quality/quality.service.test.js +94 -0
- package/dist/schema/schema.d.ts +92 -12
- package/dist/schema/schema.d.ts.map +1 -1
- package/dist/schema/schema.js +395 -83
- package/dist/schema/schema.test.js +292 -0
- package/dist/streaming/flink/flink.client.d.ts +41 -3
- package/dist/streaming/flink/flink.client.d.ts.map +1 -1
- package/dist/streaming/flink/flink.client.js +171 -8
- package/dist/streaming/flink/flink.client.test.js +2 -2
- package/dist/streaming/flink/flink.job.d.ts +2 -1
- package/dist/streaming/flink/flink.job.d.ts.map +1 -1
- package/dist/streaming/flink/flink.job.js +2 -2
- package/dist/streaming/stream.processor.d.ts +56 -2
- package/dist/streaming/stream.processor.d.ts.map +1 -1
- package/dist/streaming/stream.processor.js +149 -2
- package/dist/streaming/stream.processor.test.js +99 -0
- package/dist/streaming/stream.processor.windowing.test.d.ts +2 -0
- package/dist/streaming/stream.processor.windowing.test.d.ts.map +1 -0
- package/dist/streaming/stream.processor.windowing.test.js +69 -0
- package/dist/telemetry/telemetry.d.ts +124 -0
- package/dist/telemetry/telemetry.d.ts.map +1 -0
- package/dist/telemetry/telemetry.js +259 -0
- package/dist/telemetry/telemetry.test.d.ts +2 -0
- package/dist/telemetry/telemetry.test.d.ts.map +1 -0
- package/dist/telemetry/telemetry.test.js +51 -0
- package/dist/testing/index.d.ts +12 -0
- package/dist/testing/index.d.ts.map +1 -0
- package/dist/testing/index.js +18 -0
- package/dist/testing/pipeline-test-harness.d.ts +40 -0
- package/dist/testing/pipeline-test-harness.d.ts.map +1 -0
- package/dist/testing/pipeline-test-harness.js +55 -0
- package/dist/testing/pipeline-test-harness.test.d.ts +2 -0
- package/dist/testing/pipeline-test-harness.test.d.ts.map +1 -0
- package/dist/testing/pipeline-test-harness.test.js +102 -0
- package/dist/testing/schema-faker.d.ts +32 -0
- package/dist/testing/schema-faker.d.ts.map +1 -0
- package/dist/testing/schema-faker.js +91 -0
- package/dist/testing/schema-faker.test.d.ts +2 -0
- package/dist/testing/schema-faker.test.d.ts.map +1 -0
- package/dist/testing/schema-faker.test.js +66 -0
- package/dist/transformers/built-in.transformers.test.js +28 -0
- package/dist/transformers/transformer.service.test.js +10 -0
- package/package.json +2 -2
|
@@ -5,58 +5,208 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
|
|
|
5
5
|
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
6
|
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
7
|
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
8
11
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
9
12
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
10
13
|
};
|
|
14
|
+
var PipelineBuilder_1;
|
|
11
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
16
|
exports.PipelineBuilder = void 0;
|
|
13
17
|
const core_1 = require("@hazeljs/core");
|
|
14
18
|
const core_2 = __importDefault(require("@hazeljs/core"));
|
|
19
|
+
async function runWithRetry(fn, retry, _stepName) {
|
|
20
|
+
const { attempts, delay = 500, backoff = 'fixed' } = retry;
|
|
21
|
+
let lastError = new Error('Unknown');
|
|
22
|
+
for (let attempt = 1; attempt <= attempts; attempt++) {
|
|
23
|
+
try {
|
|
24
|
+
return await fn();
|
|
25
|
+
}
|
|
26
|
+
catch (err) {
|
|
27
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
28
|
+
if (attempt < attempts) {
|
|
29
|
+
const wait = backoff === 'exponential' ? delay * Math.pow(2, attempt - 1) : delay;
|
|
30
|
+
await new Promise((r) => setTimeout(r, wait));
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
throw lastError;
|
|
35
|
+
}
|
|
36
|
+
async function runWithTimeout(fn, ms, stepName) {
|
|
37
|
+
return new Promise((resolve, reject) => {
|
|
38
|
+
const id = setTimeout(() => reject(new Error(`Step "${stepName}" timed out after ${ms}ms`)), ms);
|
|
39
|
+
fn().then((v) => {
|
|
40
|
+
clearTimeout(id);
|
|
41
|
+
resolve(v);
|
|
42
|
+
}, (e) => {
|
|
43
|
+
clearTimeout(id);
|
|
44
|
+
reject(e);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
}
|
|
15
48
|
/**
|
|
16
|
-
*
|
|
49
|
+
* PipelineBuilder — immutable, fluent DSL for building ETL pipelines programmatically.
|
|
50
|
+
*
|
|
51
|
+
* Each builder method returns a **new** instance — the original is never mutated.
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* const pipeline = new PipelineBuilder()
|
|
55
|
+
* .setName('orders')
|
|
56
|
+
* .addTransform('normalize', (d) => ({ ...d, email: d.email.toLowerCase() }))
|
|
57
|
+
* .when((d) => d.active, (b) => b.addTransform('enrich', enrich))
|
|
58
|
+
* .addValidate('validate', validateFn);
|
|
59
|
+
*
|
|
60
|
+
* const result = await pipeline.execute(rawData);
|
|
17
61
|
*/
|
|
18
|
-
let PipelineBuilder = class PipelineBuilder {
|
|
19
|
-
constructor() {
|
|
20
|
-
this.
|
|
21
|
-
this.
|
|
62
|
+
let PipelineBuilder = PipelineBuilder_1 = class PipelineBuilder {
|
|
63
|
+
constructor(name = 'unnamed-pipeline', steps = []) {
|
|
64
|
+
this._name = name;
|
|
65
|
+
this._steps = steps;
|
|
22
66
|
}
|
|
67
|
+
// ─── Identity ──────────────────────────────────────────────────────────────
|
|
23
68
|
setName(name) {
|
|
24
|
-
|
|
25
|
-
|
|
69
|
+
return new PipelineBuilder_1(name, [...this._steps]);
|
|
70
|
+
}
|
|
71
|
+
// ─── Steps ─────────────────────────────────────────────────────────────────
|
|
72
|
+
addTransform(name, transform, options = {}) {
|
|
73
|
+
return new PipelineBuilder_1(this._name, [...this._steps, { name, transform, ...options }]);
|
|
74
|
+
}
|
|
75
|
+
addValidate(name, validate, options = {}) {
|
|
76
|
+
return new PipelineBuilder_1(this._name, [...this._steps, { name, validate, ...options }]);
|
|
26
77
|
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
78
|
+
/**
|
|
79
|
+
* Run multiple transforms concurrently. Results are merged (Object.assign) into
|
|
80
|
+
* the current data if they are objects, otherwise replaced with an array of results.
|
|
81
|
+
*/
|
|
82
|
+
parallel(name, transforms) {
|
|
83
|
+
return new PipelineBuilder_1(this._name, [...this._steps, { name, parallel: transforms }]);
|
|
30
84
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
85
|
+
/**
|
|
86
|
+
* Conditional branch: if `condition(data)` is true, run `thenBuilder` steps,
|
|
87
|
+
* otherwise run `elseBuilder` steps (default: identity).
|
|
88
|
+
*/
|
|
89
|
+
branch(name, condition, thenBuilder, elseBuilder) {
|
|
90
|
+
const left = thenBuilder(new PipelineBuilder_1());
|
|
91
|
+
const right = elseBuilder ? elseBuilder(new PipelineBuilder_1()) : new PipelineBuilder_1();
|
|
92
|
+
return new PipelineBuilder_1(this._name, [
|
|
93
|
+
...this._steps,
|
|
94
|
+
{ name, branch: { condition, left, right } },
|
|
95
|
+
]);
|
|
34
96
|
}
|
|
97
|
+
/**
|
|
98
|
+
* Attach a per-step error handler. If the previous step throws, `handler` is
|
|
99
|
+
* called with `(data, error)` and its return value becomes the new data.
|
|
100
|
+
*/
|
|
101
|
+
catch(handler) {
|
|
102
|
+
if (this._steps.length === 0)
|
|
103
|
+
return this;
|
|
104
|
+
const steps = [...this._steps];
|
|
105
|
+
const last = { ...steps[steps.length - 1], catch: handler };
|
|
106
|
+
steps[steps.length - 1] = last;
|
|
107
|
+
return new PipelineBuilder_1(this._name, steps);
|
|
108
|
+
}
|
|
109
|
+
// ─── Execution ─────────────────────────────────────────────────────────────
|
|
35
110
|
async execute(input) {
|
|
36
111
|
let data = input;
|
|
37
|
-
for (let i = 0; i < this.
|
|
38
|
-
const step = this.
|
|
39
|
-
core_2.default.debug(`Pipeline ${this.
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
112
|
+
for (let i = 0; i < this._steps.length; i++) {
|
|
113
|
+
const step = this._steps[i];
|
|
114
|
+
core_2.default.debug(`Pipeline "${this._name}": step ${i + 1} — ${step.name}`);
|
|
115
|
+
// Conditional skip
|
|
116
|
+
if (step.when && !step.when(data)) {
|
|
117
|
+
core_2.default.debug(`Step "${step.name}" skipped`);
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
const runStep = async () => {
|
|
121
|
+
if (step.branch) {
|
|
122
|
+
const { condition, left, right } = step.branch;
|
|
123
|
+
return condition(data) ? left.execute(data) : right.execute(data);
|
|
124
|
+
}
|
|
125
|
+
if (step.parallel && step.parallel.length > 0) {
|
|
126
|
+
const results = await Promise.all(step.parallel.map((fn) => {
|
|
127
|
+
const r = fn(data);
|
|
128
|
+
return r instanceof Promise ? r : Promise.resolve(r);
|
|
129
|
+
}));
|
|
130
|
+
if (results.every((r) => r !== null && typeof r === 'object' && !Array.isArray(r))) {
|
|
131
|
+
return Object.assign({}, data, ...results);
|
|
132
|
+
}
|
|
133
|
+
return results;
|
|
134
|
+
}
|
|
135
|
+
if (step.transform) {
|
|
136
|
+
const r = step.transform(data);
|
|
137
|
+
return r instanceof Promise ? await r : r;
|
|
138
|
+
}
|
|
139
|
+
if (step.validate) {
|
|
140
|
+
return step.validate(data);
|
|
141
|
+
}
|
|
142
|
+
return data;
|
|
143
|
+
};
|
|
144
|
+
try {
|
|
145
|
+
let promise;
|
|
146
|
+
if (step.retry) {
|
|
147
|
+
promise = runWithRetry(runStep, step.retry, step.name);
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
promise = runStep();
|
|
151
|
+
}
|
|
152
|
+
if (step.timeoutMs) {
|
|
153
|
+
promise = runWithTimeout(() => promise, step.timeoutMs, step.name);
|
|
154
|
+
}
|
|
155
|
+
data = await promise;
|
|
43
156
|
}
|
|
44
|
-
|
|
45
|
-
|
|
157
|
+
catch (err) {
|
|
158
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
159
|
+
if (step.dlq) {
|
|
160
|
+
await Promise.resolve(step.dlq.handler(data, error, step.name));
|
|
161
|
+
}
|
|
162
|
+
else if (step.catch) {
|
|
163
|
+
data = await Promise.resolve(step.catch(data, error));
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
throw error;
|
|
167
|
+
}
|
|
46
168
|
}
|
|
47
169
|
}
|
|
48
170
|
return data;
|
|
49
171
|
}
|
|
172
|
+
// ─── Serialization ─────────────────────────────────────────────────────────
|
|
173
|
+
/** Serialize the pipeline definition to a plain object (steps with functions are omitted). */
|
|
174
|
+
toSchema() {
|
|
175
|
+
return {
|
|
176
|
+
name: this._name,
|
|
177
|
+
steps: this._steps.map((s) => ({
|
|
178
|
+
name: s.name,
|
|
179
|
+
...(s.when ? { conditional: true } : {}),
|
|
180
|
+
...(s.parallel ? { parallel: true, parallelCount: s.parallel.length } : {}),
|
|
181
|
+
...(s.branch ? { branch: true } : {}),
|
|
182
|
+
...(s.retry ? { retry: s.retry } : {}),
|
|
183
|
+
...(s.timeoutMs ? { timeoutMs: s.timeoutMs } : {}),
|
|
184
|
+
})),
|
|
185
|
+
};
|
|
186
|
+
}
|
|
50
187
|
build() {
|
|
51
|
-
return
|
|
188
|
+
return this.toSchema();
|
|
52
189
|
}
|
|
190
|
+
/** Create a fresh pipeline from a definition (transforms must be re-registered). */
|
|
191
|
+
static create(name) {
|
|
192
|
+
return new PipelineBuilder_1(name);
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* @deprecated Use `new PipelineBuilder()` directly. Kept for backward compat.
|
|
196
|
+
* Note: this instance is now immutable — reset() returns a new empty builder.
|
|
197
|
+
*/
|
|
53
198
|
reset() {
|
|
54
|
-
this.
|
|
55
|
-
|
|
56
|
-
|
|
199
|
+
return new PipelineBuilder_1(this._name);
|
|
200
|
+
}
|
|
201
|
+
get name() {
|
|
202
|
+
return this._name;
|
|
203
|
+
}
|
|
204
|
+
get steps() {
|
|
205
|
+
return this._steps;
|
|
57
206
|
}
|
|
58
207
|
};
|
|
59
208
|
exports.PipelineBuilder = PipelineBuilder;
|
|
60
|
-
exports.PipelineBuilder = PipelineBuilder = __decorate([
|
|
61
|
-
(0, core_1.Service)()
|
|
209
|
+
exports.PipelineBuilder = PipelineBuilder = PipelineBuilder_1 = __decorate([
|
|
210
|
+
(0, core_1.Service)(),
|
|
211
|
+
__metadata("design:paramtypes", [Object, Array])
|
|
62
212
|
], PipelineBuilder);
|
|
@@ -7,31 +7,179 @@ describe('PipelineBuilder', () => {
|
|
|
7
7
|
builder = new pipeline_builder_1.PipelineBuilder();
|
|
8
8
|
});
|
|
9
9
|
it('adds transform and executes', async () => {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
const pipeline = builder
|
|
11
|
+
.addTransform('step1', (d) => Object.assign({}, d, { a: 1 }))
|
|
12
|
+
.addTransform('step2', (d) => Object.assign({}, d, { b: 2 }));
|
|
13
|
+
const result = await pipeline.execute({ x: 0 });
|
|
13
14
|
expect(result).toEqual({ x: 0, a: 1, b: 2 });
|
|
14
15
|
});
|
|
15
16
|
it('handles async transforms', async () => {
|
|
16
|
-
builder.addTransform('async', async (d) => Object.assign({}, d, { done: true }));
|
|
17
|
-
const result = await
|
|
17
|
+
const pipeline = builder.addTransform('async', async (d) => Object.assign({}, d, { done: true }));
|
|
18
|
+
const result = await pipeline.execute({});
|
|
18
19
|
expect(result.done).toBe(true);
|
|
19
20
|
});
|
|
20
21
|
it('setName sets pipeline name', () => {
|
|
21
|
-
builder.setName('my-pipeline');
|
|
22
|
-
const built = builder.build();
|
|
22
|
+
const built = builder.setName('my-pipeline').build();
|
|
23
23
|
expect(built.name).toBe('my-pipeline');
|
|
24
24
|
});
|
|
25
25
|
it('build returns config', () => {
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
const config = builder
|
|
27
|
+
.addTransform('s1', (d) => d)
|
|
28
|
+
.addValidate('s2', (d) => d)
|
|
29
|
+
.build();
|
|
28
30
|
expect(config.steps).toHaveLength(2);
|
|
29
31
|
expect(config.name).toBe('unnamed-pipeline');
|
|
30
32
|
});
|
|
31
33
|
it('reset clears steps', async () => {
|
|
32
|
-
builder.addTransform('s1', (d) => d);
|
|
33
|
-
|
|
34
|
-
const config =
|
|
34
|
+
const withStep = builder.addTransform('s1', (d) => d);
|
|
35
|
+
const cleared = withStep.reset();
|
|
36
|
+
const config = cleared.build();
|
|
35
37
|
expect(config.steps).toHaveLength(0);
|
|
36
38
|
});
|
|
39
|
+
it('is immutable — original is unmodified after chaining', () => {
|
|
40
|
+
builder.addTransform('step1', (d) => d);
|
|
41
|
+
expect(builder.steps).toHaveLength(0); // original unchanged
|
|
42
|
+
});
|
|
43
|
+
it('branch runs left path when condition is true', async () => {
|
|
44
|
+
const pipeline = builder.branch('classify', (d) => d.type === 'a', (b) => b.addTransform('enrichA', (d) => Object.assign({}, d, { enriched: 'A' })), (b) => b.addTransform('enrichB', (d) => Object.assign({}, d, { enriched: 'B' })));
|
|
45
|
+
const resultA = await pipeline.execute({ type: 'a' });
|
|
46
|
+
expect(resultA.enriched).toBe('A');
|
|
47
|
+
const resultB = await pipeline.execute({ type: 'b' });
|
|
48
|
+
expect(resultB.enriched).toBe('B');
|
|
49
|
+
});
|
|
50
|
+
it('parallel runs transforms concurrently and merges results', async () => {
|
|
51
|
+
const pipeline = builder.parallel('enrich', [
|
|
52
|
+
(d) => Object.assign({}, d, { a: 1 }),
|
|
53
|
+
(d) => Object.assign({}, d, { b: 2 }),
|
|
54
|
+
]);
|
|
55
|
+
const result = await pipeline.execute({});
|
|
56
|
+
expect(result.a).toBe(1);
|
|
57
|
+
expect(result.b).toBe(2);
|
|
58
|
+
});
|
|
59
|
+
it('catch handles step errors', async () => {
|
|
60
|
+
const pipeline = builder
|
|
61
|
+
.addTransform('fail', () => {
|
|
62
|
+
throw new Error('step failed');
|
|
63
|
+
})
|
|
64
|
+
.catch((_data, _err) => ({ recovered: true }));
|
|
65
|
+
const result = await pipeline.execute({});
|
|
66
|
+
expect(result.recovered).toBe(true);
|
|
67
|
+
});
|
|
68
|
+
it('toSchema returns serializable definition', () => {
|
|
69
|
+
const schema = builder
|
|
70
|
+
.addTransform('step1', (d) => d)
|
|
71
|
+
.addValidate('step2', (d) => d)
|
|
72
|
+
.toSchema();
|
|
73
|
+
expect(schema.name).toBe('unnamed-pipeline');
|
|
74
|
+
expect(schema.steps).toHaveLength(2);
|
|
75
|
+
expect(schema.steps[0].name).toBe('step1');
|
|
76
|
+
});
|
|
77
|
+
it('retry retries failed steps', async () => {
|
|
78
|
+
let attempts = 0;
|
|
79
|
+
const pipeline = builder.addTransform('flaky', () => {
|
|
80
|
+
attempts++;
|
|
81
|
+
if (attempts < 3)
|
|
82
|
+
throw new Error('transient');
|
|
83
|
+
return { ok: true };
|
|
84
|
+
}, { retry: { attempts: 3, delay: 0 } });
|
|
85
|
+
const result = await pipeline.execute({});
|
|
86
|
+
expect(result.ok).toBe(true);
|
|
87
|
+
expect(attempts).toBe(3);
|
|
88
|
+
});
|
|
89
|
+
it('addTransform with when option skips when false', async () => {
|
|
90
|
+
const pipeline = builder.addTransform('cond', (d) => Object.assign({}, d, { ran: true }), {
|
|
91
|
+
when: (d) => d.run === true,
|
|
92
|
+
});
|
|
93
|
+
const result = await pipeline.execute({ run: false });
|
|
94
|
+
expect(result.ran).toBeUndefined();
|
|
95
|
+
const result2 = await pipeline.execute({ run: true });
|
|
96
|
+
expect(result2.ran).toBe(true);
|
|
97
|
+
});
|
|
98
|
+
it('addValidate with when option', async () => {
|
|
99
|
+
const pipeline = builder.addValidate('v', (d) => d, { when: (d) => d.ok });
|
|
100
|
+
const result = await pipeline.execute({ ok: true });
|
|
101
|
+
expect(result).toEqual({ ok: true });
|
|
102
|
+
});
|
|
103
|
+
it('PipelineBuilder.create returns new instance', () => {
|
|
104
|
+
const b = pipeline_builder_1.PipelineBuilder.create('test');
|
|
105
|
+
expect(b.name).toBe('test');
|
|
106
|
+
expect(b.steps).toHaveLength(0);
|
|
107
|
+
});
|
|
108
|
+
it('conditional step is skipped when predicate returns false', async () => {
|
|
109
|
+
let ran = false;
|
|
110
|
+
const pipeline = builder.addTransform('conditional', (d) => {
|
|
111
|
+
ran = true;
|
|
112
|
+
return d;
|
|
113
|
+
}, { when: () => false });
|
|
114
|
+
await pipeline.execute({});
|
|
115
|
+
expect(ran).toBe(false);
|
|
116
|
+
});
|
|
117
|
+
it('parallel returns array when results are not all objects', async () => {
|
|
118
|
+
const pipeline = builder.parallel('mixed', [() => 1, () => 2]);
|
|
119
|
+
const result = await pipeline.execute({});
|
|
120
|
+
expect(result).toEqual([1, 2]);
|
|
121
|
+
});
|
|
122
|
+
it('parallel returns array when some results are null', async () => {
|
|
123
|
+
const pipeline = builder.parallel('withNull', [
|
|
124
|
+
(d) => Object.assign({}, d, { a: 1 }),
|
|
125
|
+
() => null,
|
|
126
|
+
]);
|
|
127
|
+
const result = await pipeline.execute({});
|
|
128
|
+
expect(Array.isArray(result)).toBe(true);
|
|
129
|
+
expect(result).toHaveLength(2);
|
|
130
|
+
});
|
|
131
|
+
it('branch without elseBuilder uses identity for right path', async () => {
|
|
132
|
+
const pipeline = builder.branch('cond', (d) => d.flag, (b) => b.addTransform('left', (d) => Object.assign({}, d, { side: 'left' })));
|
|
133
|
+
const left = await pipeline.execute({ flag: true });
|
|
134
|
+
expect(left.side).toBe('left');
|
|
135
|
+
const right = await pipeline.execute({ flag: false });
|
|
136
|
+
expect(right).toEqual({ flag: false });
|
|
137
|
+
});
|
|
138
|
+
it('catch on empty steps returns this', () => {
|
|
139
|
+
const result = builder.catch(() => ({}));
|
|
140
|
+
expect(result).toBe(builder);
|
|
141
|
+
expect(result.steps).toHaveLength(0);
|
|
142
|
+
});
|
|
143
|
+
it('timeoutMs rejects when step exceeds timeout', async () => {
|
|
144
|
+
const pipeline = builder.addTransform('slow', () => new Promise((r) => setTimeout(() => r({}), 200)), { timeoutMs: 10 });
|
|
145
|
+
await expect(pipeline.execute({})).rejects.toThrow('timed out');
|
|
146
|
+
});
|
|
147
|
+
it('dlq handler is called on step failure', async () => {
|
|
148
|
+
const dlqItems = [];
|
|
149
|
+
const pipeline = builder.addTransform('fail', () => {
|
|
150
|
+
throw new Error('fail');
|
|
151
|
+
}, {
|
|
152
|
+
dlq: {
|
|
153
|
+
handler: (data, err, step) => {
|
|
154
|
+
dlqItems.push({ data, err: err.message, step });
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
const result = await pipeline.execute({});
|
|
159
|
+
expect(result).toEqual({});
|
|
160
|
+
expect(dlqItems).toHaveLength(1);
|
|
161
|
+
expect(dlqItems[0]).toMatchObject({ step: 'fail', err: 'fail' });
|
|
162
|
+
});
|
|
163
|
+
it('validate step runs when no transform', async () => {
|
|
164
|
+
const pipeline = builder.addValidate('v', (d) => d);
|
|
165
|
+
const result = await pipeline.execute({ x: 1 });
|
|
166
|
+
expect(result).toEqual({ x: 1 });
|
|
167
|
+
});
|
|
168
|
+
it('step with no transform or validate returns data', async () => {
|
|
169
|
+
const pipeline = builder.addTransform('id', (d) => d);
|
|
170
|
+
const result = await pipeline.execute({ a: 1 });
|
|
171
|
+
expect(result).toEqual({ a: 1 });
|
|
172
|
+
});
|
|
173
|
+
it('retry with exponential backoff', async () => {
|
|
174
|
+
let attempts = 0;
|
|
175
|
+
const pipeline = builder.addTransform('flaky', () => {
|
|
176
|
+
attempts++;
|
|
177
|
+
if (attempts < 2)
|
|
178
|
+
throw new Error('retry');
|
|
179
|
+
return { ok: true };
|
|
180
|
+
}, { retry: { attempts: 2, delay: 1, backoff: 'exponential' } });
|
|
181
|
+
const result = await pipeline.execute({});
|
|
182
|
+
expect(result.ok).toBe(true);
|
|
183
|
+
expect(attempts).toBe(2);
|
|
184
|
+
});
|
|
37
185
|
});
|
|
@@ -51,4 +51,53 @@ describe('StreamService', () => {
|
|
|
51
51
|
}
|
|
52
52
|
expect(results).toEqual([{ v: 1 }, { v: 6 }]);
|
|
53
53
|
});
|
|
54
|
+
it('processStream throws when pipeline not @Stream decorated', async () => {
|
|
55
|
+
let NoStreamPipeline = class NoStreamPipeline {
|
|
56
|
+
x(d) {
|
|
57
|
+
return d;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
__decorate([
|
|
61
|
+
(0, decorators_1.Transform)({ step: 1, name: 'x' }),
|
|
62
|
+
__metadata("design:type", Function),
|
|
63
|
+
__metadata("design:paramtypes", [Object]),
|
|
64
|
+
__metadata("design:returntype", void 0)
|
|
65
|
+
], NoStreamPipeline.prototype, "x", null);
|
|
66
|
+
NoStreamPipeline = __decorate([
|
|
67
|
+
(0, decorators_1.Pipeline)('no-stream')
|
|
68
|
+
], NoStreamPipeline);
|
|
69
|
+
async function* source() {
|
|
70
|
+
yield {};
|
|
71
|
+
}
|
|
72
|
+
await expect((async () => {
|
|
73
|
+
for await (const _ of streamService.processStream(new NoStreamPipeline(), source())) {
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
})()).rejects.toThrow('not decorated with @Stream');
|
|
77
|
+
});
|
|
78
|
+
it('processStream throws when item processing fails', async () => {
|
|
79
|
+
let FailStreamPipeline = class FailStreamPipeline {
|
|
80
|
+
fail() {
|
|
81
|
+
throw new Error('Item failed');
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
__decorate([
|
|
85
|
+
(0, decorators_1.Transform)({ step: 1, name: 'fail' }),
|
|
86
|
+
__metadata("design:type", Function),
|
|
87
|
+
__metadata("design:paramtypes", []),
|
|
88
|
+
__metadata("design:returntype", void 0)
|
|
89
|
+
], FailStreamPipeline.prototype, "fail", null);
|
|
90
|
+
FailStreamPipeline = __decorate([
|
|
91
|
+
(0, decorators_1.Pipeline)('fail-stream'),
|
|
92
|
+
(0, decorators_1.Stream)({ name: 'fail', source: 'kafka://in', sink: 'kafka://out' })
|
|
93
|
+
], FailStreamPipeline);
|
|
94
|
+
async function* source() {
|
|
95
|
+
yield {};
|
|
96
|
+
}
|
|
97
|
+
await expect((async () => {
|
|
98
|
+
for await (const _ of streamService.processStream(new FailStreamPipeline(), source())) {
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
})()).rejects.toThrow('Item failed');
|
|
102
|
+
});
|
|
54
103
|
});
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
export interface QualityCheckResult {
|
|
2
2
|
name: string;
|
|
3
3
|
passed: boolean;
|
|
4
|
+
score?: number;
|
|
4
5
|
message?: string;
|
|
5
6
|
details?: Record<string, unknown>;
|
|
6
7
|
}
|
|
@@ -10,16 +11,77 @@ export interface DataQualityReport {
|
|
|
10
11
|
totalRows: number;
|
|
11
12
|
checks: QualityCheckResult[];
|
|
12
13
|
passed: boolean;
|
|
14
|
+
/** Composite quality score 0–100 (average of individual check scores). */
|
|
15
|
+
score: number;
|
|
13
16
|
}
|
|
17
|
+
export interface FieldProfile {
|
|
18
|
+
count: number;
|
|
19
|
+
nullCount: number;
|
|
20
|
+
nullPct: number;
|
|
21
|
+
uniqueCount: number;
|
|
22
|
+
cardinality: number;
|
|
23
|
+
min?: number | string;
|
|
24
|
+
max?: number | string;
|
|
25
|
+
mean?: number;
|
|
26
|
+
stddev?: number;
|
|
27
|
+
topValues: Array<{
|
|
28
|
+
value: unknown;
|
|
29
|
+
count: number;
|
|
30
|
+
}>;
|
|
31
|
+
}
|
|
32
|
+
export interface DataProfile {
|
|
33
|
+
dataset: string;
|
|
34
|
+
totalRows: number;
|
|
35
|
+
fields: Record<string, FieldProfile>;
|
|
36
|
+
generatedAt: Date;
|
|
37
|
+
}
|
|
38
|
+
export interface AnomalyResult {
|
|
39
|
+
field: string;
|
|
40
|
+
rowIndex: number;
|
|
41
|
+
value: unknown;
|
|
42
|
+
zScore: number;
|
|
43
|
+
message: string;
|
|
44
|
+
}
|
|
45
|
+
export type CheckFn = (data: unknown) => QualityCheckResult | Promise<QualityCheckResult>;
|
|
46
|
+
type SyncCheckFn = (data: unknown) => QualityCheckResult;
|
|
14
47
|
/**
|
|
15
|
-
* Quality Service
|
|
16
|
-
*
|
|
48
|
+
* Quality Service — data quality checks, profiling, and anomaly detection.
|
|
49
|
+
*
|
|
50
|
+
* Built-in check factories:
|
|
51
|
+
* - completeness(fields[])
|
|
52
|
+
* - notNull(fields[])
|
|
53
|
+
* - uniqueness(fields[])
|
|
54
|
+
* - range(field, { min, max })
|
|
55
|
+
* - pattern(field, regex, message?)
|
|
56
|
+
* - referentialIntegrity(field, allowedValues[])
|
|
57
|
+
*
|
|
58
|
+
* Profiling:
|
|
59
|
+
* - profile(dataset, records[]) → DataProfile
|
|
60
|
+
*
|
|
61
|
+
* Anomaly detection:
|
|
62
|
+
* - detectAnomalies(records[], fields[], threshold?) → AnomalyResult[]
|
|
17
63
|
*/
|
|
18
64
|
export declare class QualityService {
|
|
19
65
|
private checks;
|
|
20
|
-
registerCheck(name: string, check:
|
|
66
|
+
registerCheck(name: string, check: CheckFn): void;
|
|
21
67
|
runChecks(dataset: string, data: unknown): Promise<DataQualityReport>;
|
|
22
|
-
completeness(requiredFields: string[]):
|
|
23
|
-
notNull(fields: string[]):
|
|
68
|
+
completeness(requiredFields: string[]): SyncCheckFn;
|
|
69
|
+
notNull(fields: string[]): SyncCheckFn;
|
|
70
|
+
uniqueness(fields: string[]): SyncCheckFn;
|
|
71
|
+
range(field: string, options: {
|
|
72
|
+
min?: number;
|
|
73
|
+
max?: number;
|
|
74
|
+
}): SyncCheckFn;
|
|
75
|
+
pattern(field: string, regex: RegExp, message?: string): SyncCheckFn;
|
|
76
|
+
referentialIntegrity(field: string, allowedValues: unknown[]): SyncCheckFn;
|
|
77
|
+
profile(dataset: string, records: Record<string, unknown>[]): DataProfile;
|
|
78
|
+
/**
|
|
79
|
+
* Detect statistical anomalies using Z-score.
|
|
80
|
+
* @param records Dataset rows
|
|
81
|
+
* @param fields Numeric fields to analyze
|
|
82
|
+
* @param threshold Z-score threshold (default: 3.0)
|
|
83
|
+
*/
|
|
84
|
+
detectAnomalies(records: Record<string, unknown>[], fields: string[], threshold?: number): AnomalyResult[];
|
|
24
85
|
}
|
|
86
|
+
export {};
|
|
25
87
|
//# sourceMappingURL=quality.service.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"quality.service.d.ts","sourceRoot":"","sources":["../../src/quality/quality.service.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,IAAI,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,kBAAkB,EAAE,CAAC;IAC7B,MAAM,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"quality.service.d.ts","sourceRoot":"","sources":["../../src/quality/quality.service.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,IAAI,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,kBAAkB,EAAE,CAAC;IAC7B,MAAM,EAAE,OAAO,CAAC;IAChB,0EAA0E;IAC1E,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACtB,GAAG,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACrD;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IACrC,WAAW,EAAE,IAAI,CAAC;CACnB;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,OAAO,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,MAAM,OAAO,GAAG,CAAC,IAAI,EAAE,OAAO,KAAK,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;AAC1F,KAAK,WAAW,GAAG,CAAC,IAAI,EAAE,OAAO,KAAK,kBAAkB,CAAC;AAEzD;;;;;;;;;;;;;;;;GAgBG;AACH,qBACa,cAAc;IACzB,OAAO,CAAC,MAAM,CAAmC;IAEjD,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,GAAG,IAAI;IAK3C,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,GAAG,OAAO,CAAC,iBAAiB,CAAC;IA6B3E,YAAY,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,WAAW;IAgCnD,OAAO,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,WAAW;IA4BtC,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,WAAW;IAmCzC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,WAAW;IAiC1E,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,WAAW;IA8BpE,oBAAoB,CAAC,KAAK,EAAE,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,GAAG,WAAW;IAiC1E,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,GAAG,WAAW;IAgEzE;;;;;OAKG;IACH,eAAe,CACb,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,EAClC,MAAM,EAAE,MAAM,EAAE,EAChB,SAAS,SAAM,GACd,aAAa,EAAE;CAiCnB"}
|