goldenpipe 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/cli.cjs +1055 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +1053 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/index.cjs +898 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +439 -0
- package/dist/core/index.d.ts +439 -0
- package/dist/core/index.js +861 -0
- package/dist/core/index.js.map +1 -0
- package/dist/index.cjs +898 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +861 -0
- package/dist/index.js.map +1 -0
- package/dist/node/index.cjs +1081 -0
- package/dist/node/index.cjs.map +1 -0
- package/dist/node/index.d.cts +43 -0
- package/dist/node/index.d.ts +43 -0
- package/dist/node/index.js +1039 -0
- package/dist/node/index.js.map +1 -0
- package/package.json +90 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
import { GoldenMatchConfig } from 'goldenmatch/core';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Core data models — port of goldenpipe/models/{context,config,stage}.py.
|
|
5
|
+
*
|
|
6
|
+
* Edge-safe: no `node:` imports. Data flows as `Row[]` (arrays of plain row
|
|
7
|
+
* objects) instead of Polars DataFrames — the TS siblings all operate on
|
|
8
|
+
* `Row[]`.
|
|
9
|
+
*/
|
|
10
|
+
/** A single tabular record. Mirrors the siblings' `Row` type. */
|
|
11
|
+
type Row = Record<string, unknown>;
|
|
12
|
+
declare const StageStatus: {
|
|
13
|
+
readonly SUCCESS: "success";
|
|
14
|
+
readonly SKIPPED: "skipped";
|
|
15
|
+
readonly FAILED: "failed";
|
|
16
|
+
};
|
|
17
|
+
type StageStatus = (typeof StageStatus)[keyof typeof StageStatus];
|
|
18
|
+
declare const PipeStatus: {
|
|
19
|
+
readonly SUCCESS: "success";
|
|
20
|
+
readonly PARTIAL: "partial";
|
|
21
|
+
readonly FAILED: "failed";
|
|
22
|
+
};
|
|
23
|
+
type PipeStatus = (typeof PipeStatus)[keyof typeof PipeStatus];
|
|
24
|
+
interface Decision {
|
|
25
|
+
/** Stage names to drop from the remaining plan. */
|
|
26
|
+
skip: string[];
|
|
27
|
+
/** Abort the whole pipeline after this stage. */
|
|
28
|
+
abort: boolean;
|
|
29
|
+
/** Stage names to prepend to the remaining plan. */
|
|
30
|
+
insert: string[];
|
|
31
|
+
/** Human-readable explanation, surfaced in `reasoning._router`. */
|
|
32
|
+
reason: string;
|
|
33
|
+
}
|
|
34
|
+
/** Construct a Decision with Python-parity defaults. */
|
|
35
|
+
declare function makeDecision(input?: Partial<Decision>): Decision;
|
|
36
|
+
interface StageResult {
|
|
37
|
+
status: StageStatus;
|
|
38
|
+
decision?: Decision | null;
|
|
39
|
+
error?: string | null;
|
|
40
|
+
}
|
|
41
|
+
interface PipeContext {
|
|
42
|
+
/** Working data. `null` until the load stage populates it. */
|
|
43
|
+
df: Row[] | null;
|
|
44
|
+
artifacts: Record<string, unknown>;
|
|
45
|
+
metadata: Record<string, unknown>;
|
|
46
|
+
timing: Record<string, number>;
|
|
47
|
+
reasoning: Record<string, string>;
|
|
48
|
+
/** Per-stage config made available to the adapter by the runner. */
|
|
49
|
+
stageConfig: Record<string, unknown>;
|
|
50
|
+
}
|
|
51
|
+
declare function makePipeContext(input?: Partial<PipeContext>): PipeContext;
|
|
52
|
+
interface PipeResult {
|
|
53
|
+
status: PipeStatus;
|
|
54
|
+
source: string;
|
|
55
|
+
inputRows: number;
|
|
56
|
+
stages: Record<string, StageResult>;
|
|
57
|
+
artifacts: Record<string, unknown>;
|
|
58
|
+
skipped: string[];
|
|
59
|
+
errors: string[];
|
|
60
|
+
reasoning: Record<string, string>;
|
|
61
|
+
timing: Record<string, number>;
|
|
62
|
+
}
|
|
63
|
+
type OnError = "continue" | "abort";
|
|
64
|
+
interface StageSpec {
|
|
65
|
+
name?: string | undefined;
|
|
66
|
+
use: string;
|
|
67
|
+
needs: string[];
|
|
68
|
+
skipIf?: string | undefined;
|
|
69
|
+
onError: OnError;
|
|
70
|
+
config: Record<string, unknown>;
|
|
71
|
+
}
|
|
72
|
+
/** Normalize a raw stage spec (object or bare string) into a full StageSpec. */
|
|
73
|
+
declare function makeStageSpec(input: string | Partial<StageSpec> & {
|
|
74
|
+
use: string;
|
|
75
|
+
}): StageSpec;
|
|
76
|
+
interface PipelineConfig {
|
|
77
|
+
pipeline: string;
|
|
78
|
+
source?: string | undefined;
|
|
79
|
+
output?: string | undefined;
|
|
80
|
+
/** Stages may be bare strings or full specs; normalize via `makeStageSpec`. */
|
|
81
|
+
stages: Array<string | StageSpec>;
|
|
82
|
+
decisions: string[];
|
|
83
|
+
}
|
|
84
|
+
declare function makePipelineConfig(input: Partial<PipelineConfig> & {
|
|
85
|
+
pipeline: string;
|
|
86
|
+
stages: Array<string | StageSpec>;
|
|
87
|
+
}): PipelineConfig;
|
|
88
|
+
interface StageInfo {
|
|
89
|
+
name: string;
|
|
90
|
+
produces: string[];
|
|
91
|
+
consumes: string[];
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Full contract for pipeline stages. `run` is async so it can await the
|
|
95
|
+
* async GoldenMatch `dedupe` adapter.
|
|
96
|
+
*/
|
|
97
|
+
interface Stage {
|
|
98
|
+
readonly info: StageInfo;
|
|
99
|
+
validate(ctx: PipeContext): void | Promise<void>;
|
|
100
|
+
run(ctx: PipeContext): Promise<StageResult>;
|
|
101
|
+
rollback?: ((ctx: PipeContext) => void | Promise<void>) | null;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Wrap a plain async function into a Stage. Port of the Python `@stage`
|
|
105
|
+
* decorator + `_FunctionStage`.
|
|
106
|
+
*/
|
|
107
|
+
declare function stage(info: StageInfo, fn: (ctx: PipeContext) => Promise<StageResult> | StageResult): Stage;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Column context — shared column metadata flowing between pipeline stages.
|
|
111
|
+
* Port of goldenpipe/models/column_context.py.
|
|
112
|
+
*
|
|
113
|
+
* Built by GoldenCheck (scan), enriched by GoldenFlow (transform), consumed by
|
|
114
|
+
* GoldenMatch (auto-config) to avoid re-profiling.
|
|
115
|
+
*
|
|
116
|
+
* Edge-safe: no `node:` imports.
|
|
117
|
+
*/
|
|
118
|
+
|
|
119
|
+
declare const ColumnType: {
|
|
120
|
+
readonly NAME: "name";
|
|
121
|
+
readonly EMAIL: "email";
|
|
122
|
+
readonly PHONE: "phone";
|
|
123
|
+
readonly DATE: "date";
|
|
124
|
+
readonly GEO: "geo";
|
|
125
|
+
readonly ADDRESS: "address";
|
|
126
|
+
readonly ZIP: "zip";
|
|
127
|
+
readonly IDENTIFIER: "identifier";
|
|
128
|
+
readonly NUMERIC: "numeric";
|
|
129
|
+
readonly STRING: "string";
|
|
130
|
+
readonly DESCRIPTION: "description";
|
|
131
|
+
};
|
|
132
|
+
type ColumnType = (typeof ColumnType)[keyof typeof ColumnType];
|
|
133
|
+
declare const CardinalityBand: {
|
|
134
|
+
readonly UNSET: "";
|
|
135
|
+
readonly LOW: "low";
|
|
136
|
+
readonly MID: "mid";
|
|
137
|
+
readonly HIGH: "high";
|
|
138
|
+
readonly SKIP: "skip";
|
|
139
|
+
};
|
|
140
|
+
type CardinalityBand = (typeof CardinalityBand)[keyof typeof CardinalityBand];
|
|
141
|
+
/** Floor — below this, the signal is too weak to act on. */
|
|
142
|
+
declare const MIN_CONFIDENCE = 0.3;
|
|
143
|
+
interface ColumnContext {
|
|
144
|
+
name: string;
|
|
145
|
+
inferredType: ColumnType;
|
|
146
|
+
nullRate: number;
|
|
147
|
+
cardinality: number;
|
|
148
|
+
isIdentifier: boolean;
|
|
149
|
+
transformsApplied: string[];
|
|
150
|
+
findings: string[];
|
|
151
|
+
confidence: number;
|
|
152
|
+
cardinalityBand: CardinalityBand;
|
|
153
|
+
}
|
|
154
|
+
/** Construct a ColumnContext, validating invariants (port of `__post_init__`). */
|
|
155
|
+
declare function makeColumnContext(input: Partial<ColumnContext> & {
|
|
156
|
+
name: string;
|
|
157
|
+
}): ColumnContext;
|
|
158
|
+
/** Classify a column by name pattern matching. Returns null when no match. */
|
|
159
|
+
declare function classifyByName(colName: string): ColumnType | null;
|
|
160
|
+
/** Map a profiler dtype string to a ColumnType. */
|
|
161
|
+
declare function normalizeDtype(rawType: string): ColumnType;
|
|
162
|
+
/** Minimal shape of a GoldenCheck ColumnProfile that we consume. */
|
|
163
|
+
interface ColumnProfileLike {
|
|
164
|
+
name: string;
|
|
165
|
+
inferredType?: string;
|
|
166
|
+
nullPct?: number;
|
|
167
|
+
uniqueCount?: number;
|
|
168
|
+
}
|
|
169
|
+
/** Minimal shape of a GoldenCheck finding that we consume. */
|
|
170
|
+
interface FindingLike {
|
|
171
|
+
column?: string;
|
|
172
|
+
check?: string;
|
|
173
|
+
message?: string;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Build ColumnContexts from GoldenCheck scan results. Combines three signals:
|
|
177
|
+
* 1. Column-name heuristics (regex patterns).
|
|
178
|
+
* 2. Profile data (null rate, cardinality, dtype).
|
|
179
|
+
* 3. Cardinality IQR bands.
|
|
180
|
+
*/
|
|
181
|
+
declare function buildContextsFromCheck(findings: readonly FindingLike[], columnProfiles: readonly ColumnProfileLike[] | null | undefined): ColumnContext[];
|
|
182
|
+
/** Minimal shape of a GoldenFlow manifest record we consume. */
|
|
183
|
+
interface ManifestRecordLike {
|
|
184
|
+
column?: string;
|
|
185
|
+
transform?: string;
|
|
186
|
+
affectedRows?: number;
|
|
187
|
+
}
|
|
188
|
+
/** Enrich ColumnContexts with GoldenFlow transform information. */
|
|
189
|
+
declare function enrichContextsFromFlow(contexts: ColumnContext[], records: readonly ManifestRecordLike[] | null | undefined): void;
|
|
190
|
+
declare function distinctNonNull(rows: readonly Row[], col: string): number;
|
|
191
|
+
declare function nullRateOf(rows: readonly Row[], col: string): number;
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Stage registry — register and retrieve stages.
|
|
195
|
+
* Port of goldenpipe/engine/registry.py.
|
|
196
|
+
*
|
|
197
|
+
* Unlike the Python version, which discovers stages via importlib entry points,
|
|
198
|
+
* the TS registry is STATIC: built-in stages (load, goldencheck.scan,
|
|
199
|
+
* goldenflow.transform, goldenmatch.dedupe) are registered explicitly in
|
|
200
|
+
* `defaultRegistry()`. Custom stages can be added with `register()`.
|
|
201
|
+
*
|
|
202
|
+
* Edge-safe: no `node:` imports.
|
|
203
|
+
*/
|
|
204
|
+
|
|
205
|
+
declare class StageRegistry {
|
|
206
|
+
private readonly stages;
|
|
207
|
+
/** Register a stage under its `info.name`. */
|
|
208
|
+
register(stage: Stage): void;
|
|
209
|
+
/** Retrieve a stage by name. Throws if not found. */
|
|
210
|
+
get(name: string): Stage;
|
|
211
|
+
/** True when a stage with this name is registered. */
|
|
212
|
+
has(name: string): boolean;
|
|
213
|
+
/** Return `{ name: StageInfo }` for all registered stages. */
|
|
214
|
+
listAll(): Record<string, StageInfo>;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Build a registry with the built-in suite stages registered. This is the TS
|
|
218
|
+
* analogue of Python's entry-point discovery — wiring goldencheck.scan,
|
|
219
|
+
* goldenflow.transform, and goldenmatch.dedupe (+ the built-in `load` stage).
|
|
220
|
+
*/
|
|
221
|
+
declare function defaultRegistry(): StageRegistry;
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Pipeline resolver — build an ExecutionPlan and validate wiring.
|
|
225
|
+
* Port of goldenpipe/engine/resolver.py.
|
|
226
|
+
*
|
|
227
|
+
* Edge-safe: no `node:` imports.
|
|
228
|
+
*/
|
|
229
|
+
|
|
230
|
+
/** Raised when a stage's `consumes` can't be satisfied by prior `produces`. */
|
|
231
|
+
declare class WiringError extends Error {
|
|
232
|
+
constructor(message: string);
|
|
233
|
+
}
|
|
234
|
+
interface PlannedStage {
|
|
235
|
+
name: string;
|
|
236
|
+
stage: Stage;
|
|
237
|
+
spec: StageSpec;
|
|
238
|
+
config: Record<string, unknown>;
|
|
239
|
+
}
|
|
240
|
+
interface ExecutionPlan {
|
|
241
|
+
stages: PlannedStage[];
|
|
242
|
+
}
|
|
243
|
+
declare const Resolver: {
|
|
244
|
+
/**
|
|
245
|
+
* Resolve a config + registry into an ordered ExecutionPlan. Auto-prepends
|
|
246
|
+
* the built-in `load` stage when available and validates that every stage's
|
|
247
|
+
* `consumes` is produced by an earlier stage.
|
|
248
|
+
*/
|
|
249
|
+
resolve(config: PipelineConfig, registry: StageRegistry): ExecutionPlan;
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Decision router — apply routing decisions to the remaining execution plan.
|
|
254
|
+
* Port of goldenpipe/engine/router.py.
|
|
255
|
+
*
|
|
256
|
+
* Edge-safe: no `node:` imports.
|
|
257
|
+
*/
|
|
258
|
+
|
|
259
|
+
declare const Router: {
|
|
260
|
+
/**
|
|
261
|
+
* Apply a Decision (skip / abort / insert) to the remaining stages and
|
|
262
|
+
* return the new remaining list. Records `decision.reason` in
|
|
263
|
+
* `ctx.reasoning._router`.
|
|
264
|
+
*/
|
|
265
|
+
apply(decision: Decision, remaining: PlannedStage[], ctx: PipeContext, registry: StageRegistry): PlannedStage[];
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Pipeline runner — execute stages with error handling and routing.
|
|
270
|
+
* Port of goldenpipe/engine/runner.py.
|
|
271
|
+
*
|
|
272
|
+
* ASYNC: stage execution awaits each stage's `run`, because the GoldenMatch
|
|
273
|
+
* `dedupe` adapter is async.
|
|
274
|
+
*
|
|
275
|
+
* Edge-safe: no `node:` imports.
|
|
276
|
+
*/
|
|
277
|
+
|
|
278
|
+
declare class Runner {
|
|
279
|
+
private readonly registry;
|
|
280
|
+
constructor(registry: StageRegistry);
|
|
281
|
+
/** Execute an ExecutionPlan against a PipeContext, returning per-stage results. */
|
|
282
|
+
run(plan: ExecutionPlan, ctx: PipeContext): Promise<Record<string, StageResult>>;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Reporter — build a PipeResult from a PipeContext after execution.
|
|
287
|
+
* Port of goldenpipe/engine/reporter.py.
|
|
288
|
+
*
|
|
289
|
+
* Edge-safe: no `node:` imports.
|
|
290
|
+
*/
|
|
291
|
+
|
|
292
|
+
declare const Reporter: {
|
|
293
|
+
build(ctx: PipeContext, stages: Record<string, StageResult>): PipeResult;
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Built-in decision functions for pipeline routing.
|
|
298
|
+
* Port of goldenpipe/decisions.py.
|
|
299
|
+
*
|
|
300
|
+
* Edge-safe: no `node:` imports.
|
|
301
|
+
*
|
|
302
|
+
* NOTE on TS sibling skew: GoldenCheck-JS `Finding.severity` is a numeric enum
|
|
303
|
+
* (INFO=1, WARNING=2, ERROR=3) and has no `"critical"` level, and there is no
|
|
304
|
+
* `"pii_detection"` check. The check adapter normalizes findings to a string
|
|
305
|
+
* `severity` label ("info"/"warning"/"error") and a `check` string. So in
|
|
306
|
+
* practice `severityGate` and `piiRouter` are no-ops against current
|
|
307
|
+
* GoldenCheck-JS output — they are ported for structural parity and so that
|
|
308
|
+
* custom stages emitting `"critical"` / `"pii_detection"` findings still route.
|
|
309
|
+
*/
|
|
310
|
+
|
|
311
|
+
/** Abort the pipeline if any finding has `critical` severity. */
|
|
312
|
+
declare function severityGate(ctx: PipeContext): Decision | null;
|
|
313
|
+
/** Route to PPRL matching if PII is detected. */
|
|
314
|
+
declare function piiRouter(ctx: PipeContext): Decision | null;
|
|
315
|
+
/** Skip matching if fewer than 2 rows. */
|
|
316
|
+
declare function rowCountGate(ctx: PipeContext): Decision | null;
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Built-in LoadStage — marks `df` as available. The actual data loading is
|
|
320
|
+
* handled by the Pipeline (file read) or supplied by the caller (runDf).
|
|
321
|
+
* Port of goldenpipe/adapters/__init__.py LoadStage.
|
|
322
|
+
*
|
|
323
|
+
* Edge-safe: no `node:` imports.
|
|
324
|
+
*/
|
|
325
|
+
|
|
326
|
+
declare const LoadStage: Stage;
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* GoldenCheck adapter — wraps GoldenCheck-JS `scanData`.
|
|
330
|
+
* Port of goldenpipe/adapters/check.py.
|
|
331
|
+
*
|
|
332
|
+
* Shape divergence vs Python: the Python adapter calls `scan_file(path)` and
|
|
333
|
+
* reads `ctx.metadata["source"]`. GoldenCheck-JS's edge-safe `scanData` instead
|
|
334
|
+
* operates on a `TabularData` built from rows, so the TS adapter scans
|
|
335
|
+
* `ctx.df` directly. This means `goldencheck.scan` succeeds in the in-memory
|
|
336
|
+
* (`runDf`) path here, whereas the Python `run_df` path fails the scan stage
|
|
337
|
+
* (it has no file). Use `run(source)` for cross-language parity.
|
|
338
|
+
*
|
|
339
|
+
* Edge-safe: no `node:` imports (GoldenCheck-JS core is edge-safe).
|
|
340
|
+
*/
|
|
341
|
+
|
|
342
|
+
declare const ScanStage: Stage;
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* GoldenFlow adapter — wraps GoldenFlow-JS `TransformEngine.transformDf`.
|
|
346
|
+
* Port of goldenpipe/adapters/flow.py.
|
|
347
|
+
*
|
|
348
|
+
* Shape note: GoldenFlow-JS exposes `new TransformEngine(config).transformDf(rows)`
|
|
349
|
+
* which returns `{ rows, columns, manifest }` (the Python sibling's
|
|
350
|
+
* `transform_df(df)` returns an object with `.df` + `.manifest`). We read
|
|
351
|
+
* `.rows` back into `ctx.df` and surface `.manifest` as an artifact.
|
|
352
|
+
*
|
|
353
|
+
* Edge-safe: no `node:` imports (GoldenFlow-JS core is edge-safe).
|
|
354
|
+
*/
|
|
355
|
+
|
|
356
|
+
declare const TransformStage: Stage;
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* GoldenMatch adapter — wraps GoldenMatch-JS `dedupe`.
|
|
360
|
+
* Port of goldenpipe/adapters/match.py (+ `_build_config_from_contexts`).
|
|
361
|
+
*
|
|
362
|
+
* `dedupe` is ASYNC, so this stage's `run` awaits it (and the whole runner is
|
|
363
|
+
* async). Config selection priority mirrors Python:
|
|
364
|
+
* 1. explicit stage config (from YAML / PipelineConfig.config)
|
|
365
|
+
* 2. config built from upstream column contexts
|
|
366
|
+
* 3. GoldenMatch auto-configure (no config → shorthand path)
|
|
367
|
+
*
|
|
368
|
+
* Shape divergences from the Python sibling, surfaced as artifacts:
|
|
369
|
+
* - GoldenMatch-JS `DedupeResult` exposes `.goldenRecords` (not `.golden`),
|
|
370
|
+
* `.dupes`, `.unique`, `.stats`, `.scoredPairs`. We map `goldenRecords` to
|
|
371
|
+
* the `golden` artifact for parity with the Python pipeline's artifact name.
|
|
372
|
+
* - `matchkey_used` is derived from the built config's first matchkey (the
|
|
373
|
+
* JS result does not carry the resolved matchkey list back).
|
|
374
|
+
*
|
|
375
|
+
* Edge-safe: no `node:` imports (GoldenMatch-JS core is edge-safe).
|
|
376
|
+
*/
|
|
377
|
+
|
|
378
|
+
declare const DedupeStage: Stage;
|
|
379
|
+
/**
|
|
380
|
+
* Build a GoldenMatchConfig from pipeline column contexts. Returns `null` if no
|
|
381
|
+
* usable matchkeys can be built (caller then falls back to auto-configure).
|
|
382
|
+
* Port of `_build_config_from_contexts`.
|
|
383
|
+
*/
|
|
384
|
+
declare function buildConfigFromContexts(contexts: readonly ColumnContext[], rows: readonly Row[]): GoldenMatchConfig | null;
|
|
385
|
+
|
|
386
|
+
/**
|
|
387
|
+
* Adapters index — built-in suite stages + registry wiring.
|
|
388
|
+
* Replaces Python's importlib entry-point discovery with a STATIC registry.
|
|
389
|
+
*
|
|
390
|
+
* Edge-safe: no `node:` imports.
|
|
391
|
+
*/
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Build a registry with all built-in suite stages registered:
|
|
395
|
+
* - `load` (built-in)
|
|
396
|
+
* - `goldencheck.scan`
|
|
397
|
+
* - `goldenflow.transform`
|
|
398
|
+
* - `goldenmatch.dedupe`
|
|
399
|
+
*
|
|
400
|
+
* This is the TS analogue of Python's `StageRegistry.discover()`.
|
|
401
|
+
*/
|
|
402
|
+
declare function buildDefaultRegistry(): StageRegistry;
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Pipeline — high-level orchestrator + programmatic run helpers.
|
|
406
|
+
* Port of goldenpipe/pipeline.py + goldenpipe/_api.py (the DataFrame paths).
|
|
407
|
+
*
|
|
408
|
+
* Operates on `Row[]`. The file-loading `run(source)` entry point lives in
|
|
409
|
+
* `node/` (it needs `node:fs`); the edge-safe core only exposes the in-memory
|
|
410
|
+
* `runDf` / `runStages` paths.
|
|
411
|
+
*
|
|
412
|
+
* Edge-safe: no `node:` imports.
|
|
413
|
+
*/
|
|
414
|
+
|
|
415
|
+
interface PipelineOptions {
|
|
416
|
+
config?: PipelineConfig | undefined;
|
|
417
|
+
registry?: StageRegistry | undefined;
|
|
418
|
+
}
|
|
419
|
+
declare class Pipeline {
|
|
420
|
+
private readonly config;
|
|
421
|
+
private readonly registry;
|
|
422
|
+
constructor(options?: PipelineOptions);
|
|
423
|
+
/** Run the pipeline on an array of rows. */
|
|
424
|
+
run(rows: readonly Row[], source?: string): Promise<PipeResult>;
|
|
425
|
+
/** Build the default check→flow→dedupe config from the available stages. */
|
|
426
|
+
private autoConfig;
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Run a pipeline on an array of rows. Zero-config (default suite chain) or with
|
|
430
|
+
* an explicit PipelineConfig. Port of `_api.run_df`.
|
|
431
|
+
*/
|
|
432
|
+
declare function runDf(rows: readonly Row[], config?: PipelineConfig, source?: string): Promise<PipeResult>;
|
|
433
|
+
/**
|
|
434
|
+
* Run specific stages programmatically against rows. Port of `_api.run_stages`.
|
|
435
|
+
* The auto-prepended `load` stage is removed since rows are already supplied.
|
|
436
|
+
*/
|
|
437
|
+
declare function runStages(stages: readonly Stage[], rows: readonly Row[]): Promise<PipeResult>;
|
|
438
|
+
|
|
439
|
+
export { CardinalityBand, type ColumnContext, type ColumnProfileLike, ColumnType, type Decision, DedupeStage, type ExecutionPlan, type FindingLike, LoadStage, MIN_CONFIDENCE, type ManifestRecordLike, type OnError, type PipeContext, type PipeResult, PipeStatus, Pipeline, type PipelineConfig, type PipelineOptions, type PlannedStage, Reporter, Resolver, Router, type Row, Runner, ScanStage, type Stage, type StageInfo, StageRegistry, type StageResult, type StageSpec, StageStatus, TransformStage, WiringError, buildConfigFromContexts, buildContextsFromCheck, buildDefaultRegistry, classifyByName, defaultRegistry, distinctNonNull, enrichContextsFromFlow, makeColumnContext, makeDecision, makePipeContext, makePipelineConfig, makeStageSpec, normalizeDtype, nullRateOf, piiRouter, rowCountGate, runDf, runStages, severityGate, stage };
|