@ucdjs/pipelines-core 0.0.1-beta.7 → 0.0.1-beta.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,178 @@
1
+ import { n as FileContext } from "../types-B_R0NiDI.mjs";
2
+
3
+ //#region src/tracing/events.d.ts
4
+ type PipelineErrorScope = "pipeline" | "version" | "file" | "route";
5
+ interface PipelineError {
6
+ scope: PipelineErrorScope;
7
+ message: string;
8
+ error?: unknown;
9
+ file?: FileContext;
10
+ routeId?: string;
11
+ version?: string;
12
+ }
13
+ //#endregion
14
+ //#region src/tracing/types.d.ts
15
+ type PipelineTraceKind = "pipeline" | "version" | "source.provided" | "source.listing" | "file.route" | "file.matched" | "file.fallback" | "file.skipped" | "file.queued" | "file.dequeued" | "parse" | "resolve" | "cache.hit" | "cache.miss" | "cache.store" | "output.produced" | "output" | "output.written" | "dependency.resolve" | "error" | (string & {});
16
+ interface PipelineTraceBase<TKind extends PipelineTraceKind> {
17
+ id: string;
18
+ kind: TKind;
19
+ pipelineId: string;
20
+ traceId: string;
21
+ spanId?: string;
22
+ parentSpanId?: string;
23
+ timestamp: number;
24
+ schemaVersion?: number;
25
+ }
26
+ interface PipelineSpanRecord extends PipelineTraceBase<"pipeline"> {
27
+ versions: string[];
28
+ startTimestamp: number;
29
+ durationMs: number;
30
+ }
31
+ interface VersionSpanRecord extends PipelineTraceBase<"version"> {
32
+ version: string;
33
+ startTimestamp: number;
34
+ durationMs: number;
35
+ }
36
+ interface SourceProvidedTraceRecord extends PipelineTraceBase<"source.provided"> {
37
+ version: string;
38
+ file?: FileContext;
39
+ }
40
+ interface SourceListingSpanRecord extends PipelineTraceBase<"source.listing"> {
41
+ version: string;
42
+ fileCount: number;
43
+ startTimestamp: number;
44
+ durationMs: number;
45
+ }
46
+ interface FileRouteSpanRecord extends PipelineTraceBase<"file.route"> {
47
+ version: string;
48
+ file: FileContext;
49
+ routeId: string;
50
+ startTimestamp: number;
51
+ durationMs: number;
52
+ }
53
+ interface FileMatchedTraceRecord extends PipelineTraceBase<"file.matched"> {
54
+ version: string;
55
+ file: FileContext;
56
+ routeId: string;
57
+ }
58
+ interface FileFallbackTraceRecord extends PipelineTraceBase<"file.fallback"> {
59
+ version: string;
60
+ file: FileContext;
61
+ }
62
+ interface FileSkippedTraceRecord extends PipelineTraceBase<"file.skipped"> {
63
+ version: string;
64
+ file: FileContext;
65
+ reason: "no-match" | "filtered";
66
+ }
67
+ interface FileQueuedTraceRecord extends PipelineTraceBase<"file.queued"> {
68
+ version: string;
69
+ file: FileContext;
70
+ routeId: string;
71
+ }
72
+ interface FileDequeuedTraceRecord extends PipelineTraceBase<"file.dequeued"> {
73
+ version: string;
74
+ file: FileContext;
75
+ routeId: string;
76
+ waitDurationMs: number;
77
+ }
78
+ interface ParseSpanRecord extends PipelineTraceBase<"parse"> {
79
+ version: string;
80
+ file: FileContext;
81
+ routeId: string;
82
+ rowCount: number;
83
+ filteredRowCount: number;
84
+ startTimestamp: number;
85
+ durationMs: number;
86
+ }
87
+ interface ResolveSpanRecord extends PipelineTraceBase<"resolve"> {
88
+ version: string;
89
+ file: FileContext;
90
+ routeId: string;
91
+ outputCount: number;
92
+ startTimestamp: number;
93
+ durationMs: number;
94
+ }
95
+ interface CacheHitTraceRecord extends PipelineTraceBase<"cache.hit"> {
96
+ version: string;
97
+ routeId: string;
98
+ file: FileContext;
99
+ }
100
+ interface CacheMissTraceRecord extends PipelineTraceBase<"cache.miss"> {
101
+ version: string;
102
+ routeId: string;
103
+ file: FileContext;
104
+ }
105
+ interface CacheStoreTraceRecord extends PipelineTraceBase<"cache.store"> {
106
+ version: string;
107
+ routeId: string;
108
+ file: FileContext;
109
+ }
110
+ type CacheTraceRecord = CacheHitTraceRecord | CacheMissTraceRecord | CacheStoreTraceRecord;
111
+ interface OutputProducedTraceRecord extends PipelineTraceBase<"output.produced"> {
112
+ version: string;
113
+ routeId: string;
114
+ file?: FileContext;
115
+ outputIndex: number;
116
+ property?: string;
117
+ }
118
+ interface OutputTraceRecord extends PipelineTraceBase<"output"> {
119
+ version: string;
120
+ routeId: string;
121
+ file: FileContext;
122
+ outputIndex: number;
123
+ outputId: string;
124
+ property?: string;
125
+ sink: string;
126
+ format: "json" | "text";
127
+ locator: string;
128
+ }
129
+ interface OutputWrittenTraceRecord extends PipelineTraceBase<"output.written"> {
130
+ version: string;
131
+ routeId: string;
132
+ file: FileContext;
133
+ outputIndex: number;
134
+ outputId: string;
135
+ property?: string;
136
+ sink: string;
137
+ locator: string;
138
+ status: "written" | "failed";
139
+ error?: string;
140
+ }
141
+ interface DependencyResolveTraceRecord extends PipelineTraceBase<"dependency.resolve"> {
142
+ version: string;
143
+ file: FileContext;
144
+ routeId: string;
145
+ dependsOnRouteId: string;
146
+ status: "resolved" | "missing";
147
+ }
148
+ interface ErrorTraceRecord extends PipelineTraceBase<"error"> {
149
+ error: PipelineError;
150
+ stack?: string;
151
+ }
152
+ type PipelineTraceRecord = PipelineSpanRecord | VersionSpanRecord | SourceProvidedTraceRecord | SourceListingSpanRecord | FileRouteSpanRecord | FileMatchedTraceRecord | FileFallbackTraceRecord | FileSkippedTraceRecord | FileQueuedTraceRecord | FileDequeuedTraceRecord | ParseSpanRecord | ResolveSpanRecord | CacheHitTraceRecord | CacheMissTraceRecord | CacheStoreTraceRecord | OutputProducedTraceRecord | OutputTraceRecord | OutputWrittenTraceRecord | DependencyResolveTraceRecord | ErrorTraceRecord;
153
+ type PipelineTraceRecordByKind<TKind extends PipelineTraceKind> = Extract<PipelineTraceRecord, {
154
+ kind: TKind;
155
+ }>;
156
+ type PipelineTraceInput = { [K in PipelineTraceKind]: Omit<PipelineTraceRecordByKind<K>, "id" | "traceId" | "spanId" | "parentSpanId" | "timestamp"> }[PipelineTraceKind];
157
+ type PipelineTraceEmitInput = { [K in PipelineTraceKind]: Omit<PipelineTraceRecordByKind<K>, "id" | "traceId" | "spanId" | "parentSpanId" | "timestamp" | "pipelineId"> }[PipelineTraceKind];
158
+ interface PipelineOutputManifestEntry {
159
+ outputIndex: number;
160
+ outputId: string;
161
+ routeId: string;
162
+ pipelineId: string;
163
+ version: string;
164
+ property?: string;
165
+ sink: string;
166
+ format: "json" | "text";
167
+ locator: string;
168
+ status: "resolved" | "written" | "failed";
169
+ error?: string;
170
+ }
171
+ //#endregion
172
+ //#region src/tracing/utils.d.ts
173
+ declare function buildOutputManifestFromTraces(traces: readonly PipelineTraceRecord[]): PipelineOutputManifestEntry[];
174
+ declare const PIPELINE_TRACE_PHASES: readonly ["Pipeline", "Version", "Parse", "Resolve", "File", "Cache", "Error", "Other"];
175
+ type PipelineTracePhase = typeof PIPELINE_TRACE_PHASES[number];
176
+ declare function getTracePhase(kind: PipelineTraceKind): PipelineTracePhase;
177
+ //#endregion
178
+ export { CacheHitTraceRecord, CacheMissTraceRecord, CacheStoreTraceRecord, CacheTraceRecord, DependencyResolveTraceRecord, ErrorTraceRecord, FileDequeuedTraceRecord, FileFallbackTraceRecord, FileMatchedTraceRecord, FileQueuedTraceRecord, FileRouteSpanRecord, FileSkippedTraceRecord, OutputProducedTraceRecord, OutputTraceRecord, OutputWrittenTraceRecord, PIPELINE_TRACE_PHASES, ParseSpanRecord, PipelineError, PipelineErrorScope, PipelineOutputManifestEntry, PipelineSpanRecord, PipelineTraceEmitInput, PipelineTraceInput, PipelineTraceKind, PipelineTracePhase, PipelineTraceRecord, PipelineTraceRecordByKind, ResolveSpanRecord, SourceListingSpanRecord, SourceProvidedTraceRecord, VersionSpanRecord, buildOutputManifestFromTraces, getTracePhase };
@@ -0,0 +1,60 @@
1
+ //#region src/tracing/utils.ts
2
+ function buildOutputManifestFromTraces(traces) {
3
+ const manifest = /* @__PURE__ */ new Map();
4
+ for (const trace of traces) {
5
+ if (trace.kind === "output") {
6
+ const key = getOutputManifestKey(trace.pipelineId, trace.version, trace.routeId, trace.outputIndex, trace.outputId, trace.locator);
7
+ manifest.set(key, {
8
+ outputIndex: trace.outputIndex,
9
+ outputId: trace.outputId,
10
+ routeId: trace.routeId,
11
+ pipelineId: trace.pipelineId,
12
+ version: trace.version,
13
+ property: trace.property,
14
+ sink: trace.sink,
15
+ format: trace.format,
16
+ locator: trace.locator,
17
+ status: "resolved"
18
+ });
19
+ continue;
20
+ }
21
+ if (trace.kind === "output.written") {
22
+ const key = getOutputManifestKey(trace.pipelineId, trace.version, trace.routeId, trace.outputIndex, trace.outputId, trace.locator);
23
+ const entry = manifest.get(key);
24
+ if (entry) manifest.set(key, {
25
+ ...entry,
26
+ status: trace.status,
27
+ error: trace.error
28
+ });
29
+ }
30
+ }
31
+ return [...manifest.values()].toSorted((left, right) => {
32
+ return left.pipelineId.localeCompare(right.pipelineId) || left.version.localeCompare(right.version) || left.outputIndex - right.outputIndex || left.routeId.localeCompare(right.routeId) || left.outputId.localeCompare(right.outputId) || left.locator.localeCompare(right.locator);
33
+ });
34
+ }
35
+ function getOutputManifestKey(pipelineId, version, routeId, outputIndex, outputId, locator) {
36
+ return `${pipelineId}:${version}:${routeId}:${outputIndex}:${outputId}:${locator}`;
37
+ }
38
+ const PIPELINE_TRACE_PHASES = [
39
+ "Pipeline",
40
+ "Version",
41
+ "Parse",
42
+ "Resolve",
43
+ "File",
44
+ "Cache",
45
+ "Error",
46
+ "Other"
47
+ ];
48
+ function getTracePhase(kind) {
49
+ if (kind === "error") return "Error";
50
+ if (kind === "pipeline") return "Pipeline";
51
+ if (kind === "version") return "Version";
52
+ if (kind === "parse") return "Parse";
53
+ if (kind === "resolve") return "Resolve";
54
+ if (kind === "source.listing" || kind.startsWith("file.") || kind.startsWith("source.")) return "File";
55
+ if (kind.startsWith("cache.")) return "Cache";
56
+ if (kind.startsWith("output.")) return "Other";
57
+ return "Other";
58
+ }
59
+ //#endregion
60
+ export { PIPELINE_TRACE_PHASES, buildOutputManifestFromTraces, getTracePhase };
@@ -1,9 +1,10 @@
1
- import { n as FileContext } from "./types-T24cxilU.mjs";
1
+ import { l as PipelineLogger, n as FileContext } from "./types-B_R0NiDI.mjs";
2
2
 
3
3
  //#region src/transform.d.ts
4
4
  interface TransformContext {
5
5
  version: string;
6
6
  file: FileContext;
7
+ logger: PipelineLogger;
7
8
  }
8
9
  type PipelineTransformFunction<TInput, TOutput> = (ctx: TransformContext, rows: AsyncIterable<TInput>) => AsyncIterable<TOutput>;
9
10
  interface PipelineTransformDefinition<TInput = unknown, TOutput = unknown> {
@@ -61,7 +62,7 @@ type ChainTransforms<TInput, TTransforms extends readonly PipelineTransformDefin
61
62
  * Applies a sequence of transforms to an async iterable, composing them together.
62
63
  *
63
64
  * This function threads the output of one transform into the input of the next,
64
- * creating a pipeline. All iteration is lazyvalues are pulled through the pipeline
65
+ * creating a pipeline. All iteration is lazy-values are pulled through the pipeline
65
66
  * only as they are consumed.
66
67
  *
67
68
  * @typeParam TInput - The input type of the first transform.
@@ -30,7 +30,7 @@ function definePipelineTransform(definition) {
30
30
  * Applies a sequence of transforms to an async iterable, composing them together.
31
31
  *
32
32
  * This function threads the output of one transform into the input of the next,
33
- * creating a pipeline. All iteration is lazyvalues are pulled through the pipeline
33
+ * creating a pipeline. All iteration is lazy-values are pulled through the pipeline
34
34
  * only as they are consumed.
35
35
  *
36
36
  * @typeParam TInput - The input type of the first transform.
@@ -57,6 +57,5 @@ async function* applyTransforms(ctx, rows, transforms) {
57
57
  for (const transform of transforms) current = transform.fn(ctx, current);
58
58
  yield* current;
59
59
  }
60
-
61
60
  //#endregion
62
- export { definePipelineTransform as n, applyTransforms as t };
61
+ export { definePipelineTransform as n, applyTransforms as t };
@@ -1,5 +1,5 @@
1
- import { a as ParsedRow } from "../types-T24cxilU.mjs";
2
- import { a as PipelineTransformDefinition } from "../transform-C2GdfzQ-.mjs";
1
+ import { a as ParsedRow } from "../types-B_R0NiDI.mjs";
2
+ import { a as PipelineTransformDefinition } from "../transform-BkBLzZAP.mjs";
3
3
 
4
4
  //#region src/transforms/deduplicate.d.ts
5
5
  declare const deduplicateRows: PipelineTransformDefinition<ParsedRow, ParsedRow>;
@@ -1,5 +1,4 @@
1
- import { n as definePipelineTransform } from "../transform-BGOswbNk.mjs";
2
-
1
+ import { n as definePipelineTransform } from "../transform-C7y-ZJKs.mjs";
3
2
  //#region src/transforms/deduplicate.ts
4
3
  function getRowKey(row) {
5
4
  if (row.kind === "point" && row.codePoint) return `point:${row.codePoint}`;
@@ -47,7 +46,6 @@ function createDeduplicateTransform(options = {}) {
47
46
  }
48
47
  });
49
48
  }
50
-
51
49
  //#endregion
52
50
  //#region src/transforms/expand-ranges.ts
53
51
  function hexToNumber$1(hex) {
@@ -95,11 +93,11 @@ function createExpandRangesTransform(options = {}) {
95
93
  }
96
94
  });
97
95
  }
98
-
99
96
  //#endregion
100
97
  //#region src/transforms/normalize.ts
98
+ const LEADING_ZEROES_RE = /^0+/;
101
99
  function normalizeHex(hex) {
102
- return hex.toUpperCase().replace(/^0+/, "") || "0";
100
+ return hex.toUpperCase().replace(LEADING_ZEROES_RE, "") || "0";
103
101
  }
104
102
  function padHex(hex, length = 4) {
105
103
  return hex.toUpperCase().padStart(length, "0");
@@ -132,7 +130,6 @@ function createNormalizeTransform(padLength = 4) {
132
130
  }
133
131
  });
134
132
  }
135
-
136
133
  //#endregion
137
134
  //#region src/transforms/sort.ts
138
135
  function hexToNumber(hex) {
@@ -169,6 +166,5 @@ function createSortTransform(options = {}) {
169
166
  }
170
167
  });
171
168
  }
172
-
173
169
  //#endregion
174
- export { createDeduplicateTransform, createExpandRangesTransform, createNormalizeTransform, createSortTransform, deduplicateRows, expandRanges, normalizeCodePoints, sortByCodePoint };
170
+ export { createDeduplicateTransform, createExpandRangesTransform, createNormalizeTransform, createSortTransform, deduplicateRows, expandRanges, normalizeCodePoints, sortByCodePoint };
@@ -1,4 +1,11 @@
1
1
  //#region src/types.d.ts
2
+ type PipelineLogLevel = "debug" | "info" | "warn" | "error";
3
+ interface PipelineLogger {
4
+ debug: (message: string, meta?: Record<string, unknown>) => void;
5
+ info: (message: string, meta?: Record<string, unknown>) => void;
6
+ warn: (message: string, meta?: Record<string, unknown>) => void;
7
+ error: (message: string, meta?: Record<string, unknown>) => void;
8
+ }
2
9
  interface FileContext {
3
10
  /**
4
11
  * The Unicode version being processed (e.g., "16.0.0").
@@ -45,6 +52,10 @@ interface FilterContext {
45
52
  */
46
53
  id: string;
47
54
  };
55
+ /**
56
+ * Execution-aware logger for pipeline code.
57
+ */
58
+ logger: PipelineLogger;
48
59
  }
49
60
  type PipelineFilter = (ctx: FilterContext) => boolean;
50
61
  interface ParsedRow {
@@ -90,6 +101,10 @@ interface ParseContext {
90
101
  * The file being parsed.
91
102
  */
92
103
  file: FileContext;
104
+ /**
105
+ * Execution-aware logger for pipeline code.
106
+ */
107
+ logger: PipelineLogger;
93
108
  /**
94
109
  * Read the raw content of the file.
95
110
  */
@@ -158,45 +173,5 @@ interface PropertyJson {
158
173
  */
159
174
  meta?: Record<string, unknown>;
160
175
  }
161
- interface ResolveContext<TArtifacts extends Record<string, unknown> = Record<string, unknown>> {
162
- /**
163
- * The Unicode version being processed.
164
- */
165
- version: string;
166
- /**
167
- * The file being resolved.
168
- */
169
- file: FileContext;
170
- /**
171
- * Get an artifact by ID.
172
- */
173
- getArtifact: <K extends keyof TArtifacts>(id: K) => TArtifacts[K];
174
- /**
175
- * Emit an artifact for subsequent routes.
176
- */
177
- emitArtifact: <K extends string, V>(id: K, value: V) => void;
178
- /**
179
- * Normalize and sort entries by code point range.
180
- */
181
- normalizeEntries: (entries: ResolvedEntry[]) => ResolvedEntry[];
182
- /**
183
- * Get current timestamp in ISO 8601 format.
184
- */
185
- now: () => string;
186
- }
187
- type ResolverFn<TArtifacts extends Record<string, unknown> = Record<string, unknown>, TOutput = PropertyJson[]> = (ctx: ResolveContext<TArtifacts>, rows: AsyncIterable<ParsedRow>) => Promise<TOutput>;
188
- /**
189
- * Output configuration for a route.
190
- */
191
- interface RouteOutput {
192
- /**
193
- * Custom output directory.
194
- */
195
- dir?: string;
196
- /**
197
- * Custom file name generator.
198
- */
199
- fileName?: (pj: PropertyJson) => string;
200
- }
201
176
  //#endregion
202
- export { ParsedRow as a, PropertyJson as c, ResolverFn as d, RouteOutput as f, ParseContext as i, ResolveContext as l, FileContext as n, ParserFn as o, RowContext as p, FilterContext as r, PipelineFilter as s, DefaultRange as t, ResolvedEntry as u };
177
+ export { ParsedRow as a, PipelineLogLevel as c, ResolvedEntry as d, RowContext as f, ParseContext as i, PipelineLogger as l, FileContext as n, ParserFn as o, FilterContext as r, PipelineFilter as s, DefaultRange as t, PropertyJson as u };
@@ -0,0 +1,19 @@
1
+ //#region src/outputs/types.ts
2
+ function filesystemSink(options = {}) {
3
+ return {
4
+ type: "filesystem",
5
+ ...options
6
+ };
7
+ }
8
+ function normalizeRouteOutputs(route) {
9
+ return (route.outputs?.length ? route.outputs : [{ id: "default" }]).map((output, index) => {
10
+ return {
11
+ id: output.id ?? `output-${index + 1}`,
12
+ sink: output.sink,
13
+ format: output.format ?? "json",
14
+ path: output.path
15
+ };
16
+ });
17
+ }
18
+ //#endregion
19
+ export { normalizeRouteOutputs as n, filesystemSink as t };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ucdjs/pipelines-core",
3
- "version": "0.0.1-beta.7",
3
+ "version": "0.0.1-beta.8",
4
4
  "type": "module",
5
5
  "author": {
6
6
  "name": "Lucas Nørgård",
@@ -17,9 +17,14 @@
17
17
  "bugs": {
18
18
  "url": "https://github.com/ucdjs/ucd/issues"
19
19
  },
20
+ "imports": {
21
+ "#builtin-transforms/*": "./src/transforms/*.ts"
22
+ },
20
23
  "exports": {
21
24
  ".": "./dist/index.mjs",
25
+ "./outputs": "./dist/outputs/index.mjs",
22
26
  "./sources": "./dist/sources/index.mjs",
27
+ "./tracing": "./dist/tracing/index.mjs",
23
28
  "./transforms": "./dist/transforms/index.mjs",
24
29
  "./package.json": "./package.json"
25
30
  },
@@ -31,18 +36,18 @@
31
36
  "node": ">=24.13"
32
37
  },
33
38
  "dependencies": {
34
- "picomatch": "4.0.3",
39
+ "picomatch": "4.0.4",
35
40
  "zod": "4.3.6",
36
- "@ucdjs-internal/shared": "0.1.1-beta.7"
41
+ "@ucdjs-internal/shared": "0.1.1-beta.8"
37
42
  },
38
43
  "devDependencies": {
39
- "@luxass/eslint-config": "7.2.1",
44
+ "@luxass/eslint-config": "7.4.1",
40
45
  "@types/picomatch": "4.0.2",
41
- "eslint": "10.0.2",
42
- "publint": "0.3.17",
43
- "tsdown": "0.20.3",
46
+ "eslint": "10.1.0",
47
+ "publint": "0.3.18",
48
+ "tsdown": "0.21.4",
44
49
  "tsx": "4.21.0",
45
- "typescript": "5.9.3",
50
+ "typescript": "6.0.2",
46
51
  "@ucdjs-tooling/tsconfig": "1.0.0",
47
52
  "@ucdjs-tooling/tsdown-config": "1.0.0"
48
53
  },
@@ -1,26 +0,0 @@
1
- //#region src/source.ts
2
- function definePipelineSource(definition) {
3
- return definition;
4
- }
5
- async function resolveSourceFiles(source, version) {
6
- return (await source.backend.listFiles(version)).filter((file) => {
7
- const ctx = { file };
8
- if (source.includes && !source.includes(ctx)) return false;
9
- if (source.excludes && source.excludes(ctx)) return false;
10
- return true;
11
- }).map((file) => ({
12
- ...file,
13
- source: { id: source.id }
14
- }));
15
- }
16
- async function resolveMultipleSourceFiles(sources, version) {
17
- const filesByPath = /* @__PURE__ */ new Map();
18
- for (const source of sources) {
19
- const files = await resolveSourceFiles(source, version);
20
- for (const file of files) filesByPath.set(file.path, file);
21
- }
22
- return Array.from(filesByPath.values());
23
- }
24
-
25
- //#endregion
26
- export { resolveMultipleSourceFiles as n, resolveSourceFiles as r, definePipelineSource as t };