@ucdjs/pipelines-core 0.0.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +27 -0
- package/dist/index.d.mts +531 -0
- package/dist/index.mjs +338 -0
- package/dist/source-DYH0SvDh.d.mts +37 -0
- package/dist/source-EtWCktj_.mjs +26 -0
- package/dist/sources/index.d.mts +44 -0
- package/dist/sources/index.mjs +113 -0
- package/dist/transform-BDsUG3EV.mjs +62 -0
- package/dist/transform-BSrhinRr.d.mts +88 -0
- package/dist/transforms/index.d.mts +33 -0
- package/dist/transforms/index.mjs +174 -0
- package/dist/types-t7nns1Hx.d.mts +202 -0
- package/package.json +59 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
import { n as resolveMultipleSourceFiles, r as resolveSourceFiles, t as definePipelineSource } from "./source-EtWCktj_.mjs";
|
|
2
|
+
import { n as definePipelineTransform, t as applyTransforms } from "./transform-BDsUG3EV.mjs";
|
|
3
|
+
import picomatch from "picomatch";
|
|
4
|
+
|
|
5
|
+
//#region src/dependencies.ts
|
|
6
|
+
function parseDependency(dep) {
|
|
7
|
+
if (dep.startsWith("route:")) {
|
|
8
|
+
const routeId = dep.slice(6);
|
|
9
|
+
if (!routeId) throw new Error(`Invalid route dependency format: ${dep}. Expected "route:<id>" with non-empty id`);
|
|
10
|
+
return {
|
|
11
|
+
type: "route",
|
|
12
|
+
routeId
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
if (dep.startsWith("artifact:")) {
|
|
16
|
+
const rest = dep.slice(9);
|
|
17
|
+
const sepIndex = rest.indexOf(":");
|
|
18
|
+
if (sepIndex <= 0 || sepIndex >= rest.length - 1) throw new Error(`Invalid dependency format: ${dep}. Expected "route:<id>" or "artifact:<routeId>:<artifactName>"`);
|
|
19
|
+
return {
|
|
20
|
+
type: "artifact",
|
|
21
|
+
routeId: rest.slice(0, sepIndex),
|
|
22
|
+
artifactName: rest.slice(sepIndex + 1)
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
throw new Error(`Invalid dependency format: ${dep}. Expected "route:<id>" or "artifact:<routeId>:<artifactName>"`);
|
|
26
|
+
}
|
|
27
|
+
function isRouteDependency(dep) {
|
|
28
|
+
return dep.startsWith("route:");
|
|
29
|
+
}
|
|
30
|
+
function isArtifactDependency(dep) {
|
|
31
|
+
return dep.startsWith("artifact:");
|
|
32
|
+
}
|
|
33
|
+
function createRouteDependency(routeId) {
|
|
34
|
+
return `route:${routeId}`;
|
|
35
|
+
}
|
|
36
|
+
function createArtifactDependency(routeId, artifactName) {
|
|
37
|
+
return `artifact:${routeId}:${artifactName}`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
//#endregion
|
|
41
|
+
//#region src/dag.ts
|
|
42
|
+
function buildDAG(routes) {
|
|
43
|
+
const errors = [];
|
|
44
|
+
const nodes = /* @__PURE__ */ new Map();
|
|
45
|
+
const artifactsByRoute = /* @__PURE__ */ new Map();
|
|
46
|
+
const seenIds = /* @__PURE__ */ new Map();
|
|
47
|
+
for (let i = 0; i < routes.length; i++) {
|
|
48
|
+
const route = routes[i];
|
|
49
|
+
if (!route) continue;
|
|
50
|
+
const id = route.id;
|
|
51
|
+
if (seenIds.has(id)) errors.push({
|
|
52
|
+
type: "duplicate-route",
|
|
53
|
+
message: `Duplicate route ID "${id}" found at index ${seenIds.get(id)} and ${i}`,
|
|
54
|
+
details: { routeId: id }
|
|
55
|
+
});
|
|
56
|
+
else seenIds.set(id, i);
|
|
57
|
+
}
|
|
58
|
+
if (errors.length > 0) return {
|
|
59
|
+
valid: false,
|
|
60
|
+
errors
|
|
61
|
+
};
|
|
62
|
+
const routeIds = new Set(routes.map((r) => r.id));
|
|
63
|
+
for (const route of routes) {
|
|
64
|
+
const emittedArtifacts = /* @__PURE__ */ new Set();
|
|
65
|
+
if (route.emits) for (const artifactName of Object.keys(route.emits)) emittedArtifacts.add(`${route.id}:${artifactName}`);
|
|
66
|
+
artifactsByRoute.set(route.id, emittedArtifacts);
|
|
67
|
+
nodes.set(route.id, {
|
|
68
|
+
id: route.id,
|
|
69
|
+
dependencies: /* @__PURE__ */ new Set(),
|
|
70
|
+
dependents: /* @__PURE__ */ new Set(),
|
|
71
|
+
emittedArtifacts
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
for (const route of routes) {
|
|
75
|
+
const node = nodes.get(route.id);
|
|
76
|
+
if (!route.depends) continue;
|
|
77
|
+
for (const dep of route.depends) {
|
|
78
|
+
const parsed = parseDependency(dep);
|
|
79
|
+
if (isRouteDependency(dep)) {
|
|
80
|
+
if (!routeIds.has(parsed.routeId)) {
|
|
81
|
+
errors.push({
|
|
82
|
+
type: "missing-route",
|
|
83
|
+
message: `Route "${route.id}" depends on non-existent route "${parsed.routeId}"`,
|
|
84
|
+
details: {
|
|
85
|
+
routeId: route.id,
|
|
86
|
+
dependencyId: parsed.routeId
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
node.dependencies.add(parsed.routeId);
|
|
92
|
+
nodes.get(parsed.routeId).dependents.add(route.id);
|
|
93
|
+
} else if (isArtifactDependency(dep)) {
|
|
94
|
+
if (parsed.type !== "artifact") continue;
|
|
95
|
+
if (!routeIds.has(parsed.routeId)) {
|
|
96
|
+
errors.push({
|
|
97
|
+
type: "missing-route",
|
|
98
|
+
message: `Route "${route.id}" depends on artifact from non-existent route "${parsed.routeId}"`,
|
|
99
|
+
details: {
|
|
100
|
+
routeId: route.id,
|
|
101
|
+
dependencyId: parsed.routeId
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
const routeArtifacts = artifactsByRoute.get(parsed.routeId);
|
|
107
|
+
const artifactKey = `${parsed.routeId}:${parsed.artifactName}`;
|
|
108
|
+
if (!routeArtifacts?.has(artifactKey)) {
|
|
109
|
+
errors.push({
|
|
110
|
+
type: "missing-artifact",
|
|
111
|
+
message: `Route "${route.id}" depends on non-existent artifact "${parsed.artifactName}" from route "${parsed.routeId}"`,
|
|
112
|
+
details: {
|
|
113
|
+
routeId: route.id,
|
|
114
|
+
dependencyId: artifactKey
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
node.dependencies.add(parsed.routeId);
|
|
120
|
+
nodes.get(parsed.routeId).dependents.add(route.id);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
const cycleResult = detectCycle(nodes);
|
|
125
|
+
if (cycleResult) errors.push({
|
|
126
|
+
type: "cycle",
|
|
127
|
+
message: `Circular dependency detected: ${cycleResult.join(" -> ")}`,
|
|
128
|
+
details: { cycle: cycleResult }
|
|
129
|
+
});
|
|
130
|
+
if (errors.length > 0) return {
|
|
131
|
+
valid: false,
|
|
132
|
+
errors
|
|
133
|
+
};
|
|
134
|
+
return {
|
|
135
|
+
valid: true,
|
|
136
|
+
errors: [],
|
|
137
|
+
dag: {
|
|
138
|
+
nodes,
|
|
139
|
+
executionOrder: topologicalSort(nodes)
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function detectCycle(nodes) {
|
|
144
|
+
const visited = /* @__PURE__ */ new Set();
|
|
145
|
+
const recursionStack = /* @__PURE__ */ new Set();
|
|
146
|
+
const path = [];
|
|
147
|
+
function dfs(nodeId) {
|
|
148
|
+
visited.add(nodeId);
|
|
149
|
+
recursionStack.add(nodeId);
|
|
150
|
+
path.push(nodeId);
|
|
151
|
+
const node = nodes.get(nodeId);
|
|
152
|
+
if (node) {
|
|
153
|
+
for (const depId of node.dependencies) if (!visited.has(depId)) {
|
|
154
|
+
const cycle = dfs(depId);
|
|
155
|
+
if (cycle) return cycle;
|
|
156
|
+
} else if (recursionStack.has(depId)) {
|
|
157
|
+
const cycleStart = path.indexOf(depId);
|
|
158
|
+
return [...path.slice(cycleStart), depId];
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
path.pop();
|
|
162
|
+
recursionStack.delete(nodeId);
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
for (const nodeId of nodes.keys()) if (!visited.has(nodeId)) {
|
|
166
|
+
const cycle = dfs(nodeId);
|
|
167
|
+
if (cycle) return cycle;
|
|
168
|
+
}
|
|
169
|
+
return null;
|
|
170
|
+
}
|
|
171
|
+
function topologicalSort(nodes) {
|
|
172
|
+
const result = [];
|
|
173
|
+
const visited = /* @__PURE__ */ new Set();
|
|
174
|
+
const temp = /* @__PURE__ */ new Set();
|
|
175
|
+
function visit(nodeId) {
|
|
176
|
+
if (temp.has(nodeId)) return;
|
|
177
|
+
if (visited.has(nodeId)) return;
|
|
178
|
+
temp.add(nodeId);
|
|
179
|
+
const node = nodes.get(nodeId);
|
|
180
|
+
if (node) for (const depId of node.dependencies) visit(depId);
|
|
181
|
+
temp.delete(nodeId);
|
|
182
|
+
visited.add(nodeId);
|
|
183
|
+
result.push(nodeId);
|
|
184
|
+
}
|
|
185
|
+
for (const nodeId of nodes.keys()) if (!visited.has(nodeId)) visit(nodeId);
|
|
186
|
+
return result;
|
|
187
|
+
}
|
|
188
|
+
function getExecutionLayers(dag) {
|
|
189
|
+
const layers = [];
|
|
190
|
+
const scheduled = /* @__PURE__ */ new Set();
|
|
191
|
+
const remaining = new Set(dag.nodes.keys());
|
|
192
|
+
while (remaining.size > 0) {
|
|
193
|
+
const layer = [];
|
|
194
|
+
for (const nodeId of remaining) if ([...dag.nodes.get(nodeId).dependencies].every((dep) => scheduled.has(dep))) layer.push(nodeId);
|
|
195
|
+
if (layer.length === 0) break;
|
|
196
|
+
for (const nodeId of layer) {
|
|
197
|
+
remaining.delete(nodeId);
|
|
198
|
+
scheduled.add(nodeId);
|
|
199
|
+
}
|
|
200
|
+
layers.push(layer);
|
|
201
|
+
}
|
|
202
|
+
return layers;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
//#endregion
|
|
206
|
+
//#region src/filters.ts
|
|
207
|
+
function byName(name) {
|
|
208
|
+
return (ctx) => ctx.file.name === name;
|
|
209
|
+
}
|
|
210
|
+
function byDir(dir) {
|
|
211
|
+
return (ctx) => ctx.file.dir === dir;
|
|
212
|
+
}
|
|
213
|
+
function byExt(ext) {
|
|
214
|
+
if (ext === "") return (ctx) => ctx.file.ext === "";
|
|
215
|
+
const normalizedExt = ext.startsWith(".") ? ext : `.${ext}`;
|
|
216
|
+
return (ctx) => ctx.file.ext === normalizedExt;
|
|
217
|
+
}
|
|
218
|
+
function byGlob(pattern) {
|
|
219
|
+
const matcher = picomatch(pattern);
|
|
220
|
+
return (ctx) => matcher(ctx.file.path);
|
|
221
|
+
}
|
|
222
|
+
function byPath(pathPattern) {
|
|
223
|
+
if (typeof pathPattern === "string") return (ctx) => ctx.file.path === pathPattern;
|
|
224
|
+
return (ctx) => pathPattern.test(ctx.file.path);
|
|
225
|
+
}
|
|
226
|
+
function byProp(pattern) {
|
|
227
|
+
if (typeof pattern === "string") return (ctx) => ctx.row?.property === pattern;
|
|
228
|
+
return (ctx) => !!ctx.row?.property && pattern.test(ctx.row.property);
|
|
229
|
+
}
|
|
230
|
+
function bySource(sourceIds) {
|
|
231
|
+
const ids = Array.isArray(sourceIds) ? sourceIds : [sourceIds];
|
|
232
|
+
return (ctx) => ctx.source != null && ids.includes(ctx.source.id);
|
|
233
|
+
}
|
|
234
|
+
function and(...filters) {
|
|
235
|
+
return (ctx) => filters.every((f) => f(ctx));
|
|
236
|
+
}
|
|
237
|
+
function or(...filters) {
|
|
238
|
+
return (ctx) => filters.some((f) => f(ctx));
|
|
239
|
+
}
|
|
240
|
+
function not(filter) {
|
|
241
|
+
return (ctx) => !filter(ctx);
|
|
242
|
+
}
|
|
243
|
+
function always() {
|
|
244
|
+
return () => true;
|
|
245
|
+
}
|
|
246
|
+
function never() {
|
|
247
|
+
return () => false;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
//#endregion
|
|
251
|
+
//#region src/pipeline.ts
|
|
252
|
+
/**
|
|
253
|
+
* Define a pipeline configuration.
|
|
254
|
+
*
|
|
255
|
+
* This returns a pure data structure describing the pipeline.
|
|
256
|
+
* To execute the pipeline, pass it to a pipeline executor.
|
|
257
|
+
*
|
|
258
|
+
* @example
|
|
259
|
+
* ```ts
|
|
260
|
+
* const pipeline = definePipeline({
|
|
261
|
+
* id: "my-pipeline",
|
|
262
|
+
* name: "My Pipeline",
|
|
263
|
+
* versions: ["16.0.0"],
|
|
264
|
+
* inputs: [mySource],
|
|
265
|
+
* routes: [myRoute],
|
|
266
|
+
* });
|
|
267
|
+
*
|
|
268
|
+
* // Execute with an executor
|
|
269
|
+
* const executor = createPipelineExecutor({ pipelines: [pipeline] });
|
|
270
|
+
* const result = await executor.run();
|
|
271
|
+
* ```
|
|
272
|
+
*/
|
|
273
|
+
function definePipeline(options) {
|
|
274
|
+
const dagResult = buildDAG(options.routes);
|
|
275
|
+
if (!dagResult.valid) {
|
|
276
|
+
const errorMessages = dagResult.errors.map((e) => e.message).join("\n ");
|
|
277
|
+
throw new Error(`Pipeline "${options.id}" has invalid route dependencies:\n ${errorMessages}`);
|
|
278
|
+
}
|
|
279
|
+
return {
|
|
280
|
+
_type: "pipeline-definition",
|
|
281
|
+
id: options.id,
|
|
282
|
+
name: options.name,
|
|
283
|
+
description: options.description,
|
|
284
|
+
versions: options.versions,
|
|
285
|
+
inputs: options.inputs,
|
|
286
|
+
routes: options.routes,
|
|
287
|
+
include: options.include,
|
|
288
|
+
strict: options.strict ?? false,
|
|
289
|
+
concurrency: options.concurrency ?? 4,
|
|
290
|
+
fallback: options.fallback,
|
|
291
|
+
onEvent: options.onEvent,
|
|
292
|
+
dag: dagResult.dag,
|
|
293
|
+
tags: options.tags ?? []
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
function isPipelineDefinition(value) {
|
|
297
|
+
return typeof value === "object" && value !== null && "_type" in value && typeof value._type === "string" && value._type === "pipeline-definition" && "id" in value && typeof value.id === "string" && "inputs" in value && Array.isArray(value.inputs) && "routes" in value && Array.isArray(value.routes);
|
|
298
|
+
}
|
|
299
|
+
function getPipelineRouteIds(pipeline) {
|
|
300
|
+
return pipeline.routes.map((route) => route.id);
|
|
301
|
+
}
|
|
302
|
+
function getPipelineSourceIds(pipeline) {
|
|
303
|
+
return pipeline.inputs.map((source) => source.id);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
//#endregion
|
|
307
|
+
//#region src/route.ts
|
|
308
|
+
function definePipelineRoute(definition) {
|
|
309
|
+
return definition;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
//#endregion
|
|
313
|
+
//#region src/utils/arrays.ts
|
|
314
|
+
/**
|
|
315
|
+
* Split a line into exactly 2 fields.
|
|
316
|
+
* Returns a tuple [first, second] or null if not enough fields.
|
|
317
|
+
*
|
|
318
|
+
* After null check, TypeScript knows both elements exist and are strings.
|
|
319
|
+
*/
|
|
320
|
+
function splitTwoFields(line, delimiter) {
|
|
321
|
+
const parts = line.split(delimiter);
|
|
322
|
+
if (parts.length < 2) return null;
|
|
323
|
+
return [parts[0], parts[1]];
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Split a line with minimum field count check.
|
|
327
|
+
* Returns the array or null if not enough fields.
|
|
328
|
+
*
|
|
329
|
+
* Use when you need more than 2 fields (e.g., UnicodeData with 14 fields).
|
|
330
|
+
* Access fields with nullish coalescing: `fields[0] ?? ""`
|
|
331
|
+
*/
|
|
332
|
+
function splitMinFields(line, delimiter, minFields) {
|
|
333
|
+
const parts = line.split(delimiter);
|
|
334
|
+
return parts.length >= minFields ? parts : null;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
//#endregion
|
|
338
|
+
export { always, and, applyTransforms, buildDAG, byDir, byExt, byGlob, byName, byPath, byProp, bySource, createArtifactDependency, createRouteDependency, definePipeline, definePipelineRoute, definePipelineSource, definePipelineTransform, getExecutionLayers, getPipelineRouteIds, getPipelineSourceIds, isArtifactDependency, isPipelineDefinition, isRouteDependency, never, not, or, parseDependency, resolveMultipleSourceFiles, resolveSourceFiles, splitMinFields, splitTwoFields };
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { n as FileContext, s as PipelineFilter } from "./types-t7nns1Hx.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/source.d.ts
|
|
4
|
+
interface StreamOptions {
|
|
5
|
+
chunkSize?: number;
|
|
6
|
+
start?: number;
|
|
7
|
+
end?: number;
|
|
8
|
+
}
|
|
9
|
+
interface FileMetadata {
|
|
10
|
+
size: number;
|
|
11
|
+
hash?: string;
|
|
12
|
+
lastModified?: string;
|
|
13
|
+
}
|
|
14
|
+
interface SourceBackend {
|
|
15
|
+
listFiles: (version: string) => Promise<FileContext[]>;
|
|
16
|
+
readFile: (file: FileContext) => Promise<string>;
|
|
17
|
+
readFileStream?: (file: FileContext, options?: StreamOptions) => AsyncIterable<Uint8Array>;
|
|
18
|
+
getMetadata?: (file: FileContext) => Promise<FileMetadata>;
|
|
19
|
+
}
|
|
20
|
+
interface PipelineSourceDefinition<TId extends string = string> {
|
|
21
|
+
id: TId;
|
|
22
|
+
backend: SourceBackend;
|
|
23
|
+
includes?: PipelineFilter;
|
|
24
|
+
excludes?: PipelineFilter;
|
|
25
|
+
}
|
|
26
|
+
interface SourceFileContext extends FileContext {
|
|
27
|
+
source: {
|
|
28
|
+
id: string;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
declare function definePipelineSource<const TId extends string>(definition: PipelineSourceDefinition<TId>): PipelineSourceDefinition<TId>;
|
|
32
|
+
declare function resolveSourceFiles(source: PipelineSourceDefinition, version: string): Promise<SourceFileContext[]>;
|
|
33
|
+
declare function resolveMultipleSourceFiles(sources: PipelineSourceDefinition[] | readonly PipelineSourceDefinition[], version: string): Promise<SourceFileContext[]>;
|
|
34
|
+
type InferSourceId<T> = T extends PipelineSourceDefinition<infer TId> ? TId : never;
|
|
35
|
+
type InferSourceIds<T extends readonly PipelineSourceDefinition[]> = { [K in keyof T]: InferSourceId<T[K]> }[number];
|
|
36
|
+
//#endregion
|
|
37
|
+
export { SourceBackend as a, definePipelineSource as c, PipelineSourceDefinition as i, resolveMultipleSourceFiles as l, InferSourceId as n, SourceFileContext as o, InferSourceIds as r, StreamOptions as s, FileMetadata as t, resolveSourceFiles as u };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
//#region src/source.ts
|
|
2
|
+
function definePipelineSource(definition) {
|
|
3
|
+
return definition;
|
|
4
|
+
}
|
|
5
|
+
async function resolveSourceFiles(source, version) {
|
|
6
|
+
return (await source.backend.listFiles(version)).filter((file) => {
|
|
7
|
+
const ctx = { file };
|
|
8
|
+
if (source.includes && !source.includes(ctx)) return false;
|
|
9
|
+
if (source.excludes && source.excludes(ctx)) return false;
|
|
10
|
+
return true;
|
|
11
|
+
}).map((file) => ({
|
|
12
|
+
...file,
|
|
13
|
+
source: { id: source.id }
|
|
14
|
+
}));
|
|
15
|
+
}
|
|
16
|
+
async function resolveMultipleSourceFiles(sources, version) {
|
|
17
|
+
const filesByPath = /* @__PURE__ */ new Map();
|
|
18
|
+
for (const source of sources) {
|
|
19
|
+
const files = await resolveSourceFiles(source, version);
|
|
20
|
+
for (const file of files) filesByPath.set(file.path, file);
|
|
21
|
+
}
|
|
22
|
+
return Array.from(filesByPath.values());
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
//#endregion
|
|
26
|
+
export { resolveMultipleSourceFiles as n, resolveSourceFiles as r, definePipelineSource as t };
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { n as FileContext } from "../types-t7nns1Hx.mjs";
|
|
2
|
+
import { a as SourceBackend, i as PipelineSourceDefinition } from "../source-DYH0SvDh.mjs";
|
|
3
|
+
|
|
4
|
+
//#region src/sources/http.d.ts
|
|
5
|
+
interface HttpBackendOptions {
|
|
6
|
+
baseUrl: string;
|
|
7
|
+
headers?: Record<string, string>;
|
|
8
|
+
timeout?: number;
|
|
9
|
+
}
|
|
10
|
+
declare const UNICODE_ORG_BASE_URL = "https://www.unicode.org/Public/";
|
|
11
|
+
declare function createHttpBackend(options: HttpBackendOptions): SourceBackend;
|
|
12
|
+
interface HttpSourceOptions extends HttpBackendOptions {
|
|
13
|
+
id?: string;
|
|
14
|
+
}
|
|
15
|
+
declare function createHttpSource(): PipelineSourceDefinition<"http">;
|
|
16
|
+
declare function createHttpSource<TId extends string>(options: HttpSourceOptions & {
|
|
17
|
+
id: TId;
|
|
18
|
+
}): PipelineSourceDefinition<TId>;
|
|
19
|
+
declare function createUnicodeOrgSource(): PipelineSourceDefinition<"unicode-org">;
|
|
20
|
+
declare function createUnicodeOrgSource<TId extends string>(id: TId): PipelineSourceDefinition<TId>;
|
|
21
|
+
declare const unicodeOrgSource: PipelineSourceDefinition<"unicode-org">;
|
|
22
|
+
//#endregion
|
|
23
|
+
//#region src/sources/memory.d.ts
|
|
24
|
+
interface MemoryFile {
|
|
25
|
+
path: string;
|
|
26
|
+
content: string;
|
|
27
|
+
dir?: FileContext["dir"];
|
|
28
|
+
}
|
|
29
|
+
interface MemoryBackendOptions {
|
|
30
|
+
files: Record<string, MemoryFile[]>;
|
|
31
|
+
}
|
|
32
|
+
declare function createMemoryBackend(options: MemoryBackendOptions): SourceBackend;
|
|
33
|
+
interface MemorySourceOptions {
|
|
34
|
+
id?: string;
|
|
35
|
+
files: Record<string, MemoryFile[]>;
|
|
36
|
+
}
|
|
37
|
+
declare function createMemorySource(options: MemorySourceOptions & {
|
|
38
|
+
id?: undefined;
|
|
39
|
+
}): PipelineSourceDefinition<"memory">;
|
|
40
|
+
declare function createMemorySource<TId extends string>(options: MemorySourceOptions & {
|
|
41
|
+
id: TId;
|
|
42
|
+
}): PipelineSourceDefinition<TId>;
|
|
43
|
+
//#endregion
|
|
44
|
+
export { type HttpBackendOptions, type HttpSourceOptions, type MemoryBackendOptions, type MemoryFile, type MemorySourceOptions, UNICODE_ORG_BASE_URL, createHttpBackend, createHttpSource, createMemoryBackend, createMemorySource, createUnicodeOrgSource, unicodeOrgSource };
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { t as definePipelineSource } from "../source-EtWCktj_.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/sources/http.ts
|
|
4
|
+
const UNICODE_ORG_BASE_URL = "https://www.unicode.org/Public/";
|
|
5
|
+
function createHttpBackend(options) {
|
|
6
|
+
const { baseUrl, headers = {}, timeout = 3e4 } = options;
|
|
7
|
+
const normalizedBaseUrl = baseUrl.endsWith("/") ? baseUrl : `${baseUrl}/`;
|
|
8
|
+
return {
|
|
9
|
+
async listFiles(version) {
|
|
10
|
+
throw new Error(`HTTP backend does not support listing files. Use a file manifest or provide explicit file list for version ${version}.`);
|
|
11
|
+
},
|
|
12
|
+
async readFile(file) {
|
|
13
|
+
const url = `${normalizedBaseUrl}${file.version}/${file.path}`;
|
|
14
|
+
const controller = new AbortController();
|
|
15
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
16
|
+
try {
|
|
17
|
+
const response = await fetch(url, {
|
|
18
|
+
headers,
|
|
19
|
+
signal: controller.signal
|
|
20
|
+
});
|
|
21
|
+
if (!response.ok) throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
|
22
|
+
return response.text();
|
|
23
|
+
} finally {
|
|
24
|
+
clearTimeout(timeoutId);
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
async getMetadata(file) {
|
|
28
|
+
const url = `${normalizedBaseUrl}${file.version}/${file.path}`;
|
|
29
|
+
const controller = new AbortController();
|
|
30
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
31
|
+
try {
|
|
32
|
+
const response = await fetch(url, {
|
|
33
|
+
method: "HEAD",
|
|
34
|
+
headers,
|
|
35
|
+
signal: controller.signal
|
|
36
|
+
});
|
|
37
|
+
if (!response.ok) throw new Error(`Failed to get metadata for ${url}: ${response.status}`);
|
|
38
|
+
const contentLength = response.headers.get("content-length");
|
|
39
|
+
const lastModified = response.headers.get("last-modified");
|
|
40
|
+
return {
|
|
41
|
+
size: contentLength ? Number.parseInt(contentLength, 10) : 0,
|
|
42
|
+
lastModified: lastModified || void 0
|
|
43
|
+
};
|
|
44
|
+
} finally {
|
|
45
|
+
clearTimeout(timeoutId);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
function createHttpSource(options) {
|
|
51
|
+
return definePipelineSource({
|
|
52
|
+
id: options?.id ?? "http",
|
|
53
|
+
backend: createHttpBackend(options ?? { baseUrl: "" })
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
function createUnicodeOrgSource(id) {
|
|
57
|
+
return createHttpSource({
|
|
58
|
+
id: id ?? "unicode-org",
|
|
59
|
+
baseUrl: UNICODE_ORG_BASE_URL
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
const unicodeOrgSource = createUnicodeOrgSource();
|
|
63
|
+
|
|
64
|
+
//#endregion
|
|
65
|
+
//#region src/sources/memory.ts
|
|
66
|
+
function getFileContext(version, file) {
|
|
67
|
+
const path = file.path;
|
|
68
|
+
const parts = path.split("/");
|
|
69
|
+
const name = parts[parts.length - 1];
|
|
70
|
+
if (!name) throw new Error(`Invalid file path: ${file.path}`);
|
|
71
|
+
const extIndex = name.lastIndexOf(".");
|
|
72
|
+
const ext = extIndex >= 0 ? name.slice(extIndex) : "";
|
|
73
|
+
return {
|
|
74
|
+
version,
|
|
75
|
+
dir: file.dir || parts[0] || "ucd",
|
|
76
|
+
path,
|
|
77
|
+
name,
|
|
78
|
+
ext
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
function createMemoryBackend(options) {
|
|
82
|
+
const { files } = options;
|
|
83
|
+
return {
|
|
84
|
+
async listFiles(version) {
|
|
85
|
+
const versionFiles = files[version];
|
|
86
|
+
if (!versionFiles) return [];
|
|
87
|
+
return versionFiles.map((f) => getFileContext(version, f));
|
|
88
|
+
},
|
|
89
|
+
async readFile(file) {
|
|
90
|
+
const versionFiles = files[file.version];
|
|
91
|
+
if (!versionFiles) throw new Error(`Version ${file.version} not found in memory backend`);
|
|
92
|
+
const memFile = versionFiles.find((f) => f.path === file.path);
|
|
93
|
+
if (!memFile) throw new Error(`File ${file.path} not found in version ${file.version}`);
|
|
94
|
+
return memFile.content;
|
|
95
|
+
},
|
|
96
|
+
async getMetadata(file) {
|
|
97
|
+
const versionFiles = files[file.version];
|
|
98
|
+
if (!versionFiles) throw new Error(`Version ${file.version} not found in memory backend`);
|
|
99
|
+
const memFile = versionFiles.find((f) => f.path === file.path);
|
|
100
|
+
if (!memFile) throw new Error(`File ${file.path} not found in version ${file.version}`);
|
|
101
|
+
return { size: new TextEncoder().encode(memFile.content).length };
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
function createMemorySource(options) {
|
|
106
|
+
return definePipelineSource({
|
|
107
|
+
id: options?.id ?? "memory",
|
|
108
|
+
backend: createMemoryBackend({ files: options?.files ?? {} })
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
//#endregion
|
|
113
|
+
export { UNICODE_ORG_BASE_URL, createHttpBackend, createHttpSource, createMemoryBackend, createMemorySource, createUnicodeOrgSource, unicodeOrgSource };
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
//#region src/transform.ts
|
|
2
|
+
/**
|
|
3
|
+
* Creates and returns a validated pipeline transform definition.
|
|
4
|
+
*
|
|
5
|
+
* This function is primarily a type-safe way to define transforms. It ensures that
|
|
6
|
+
* the transform adheres to the `PipelineTransformDefinition` interface and preserves
|
|
7
|
+
* type information for chaining.
|
|
8
|
+
*
|
|
9
|
+
* @typeParam TInput - The input type accepted by the transform.
|
|
10
|
+
* @typeParam TOutput - The output type produced by the transform.
|
|
11
|
+
* @param {PipelineTransformDefinition<TInput, TOutput>} definition - The transform definition.
|
|
12
|
+
* @returns {PipelineTransformDefinition<TInput, TOutput>} The same definition, typed correctly.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```ts
|
|
16
|
+
* const uppercase = definePipelineTransform({
|
|
17
|
+
* id: 'uppercase',
|
|
18
|
+
* fn: async function* (_ctx, rows) {
|
|
19
|
+
* for await (const row of rows) {
|
|
20
|
+
* yield row.toUpperCase();
|
|
21
|
+
* }
|
|
22
|
+
* },
|
|
23
|
+
* });
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
function definePipelineTransform(definition) {
|
|
27
|
+
return definition;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Applies a sequence of transforms to an async iterable, composing them together.
|
|
31
|
+
*
|
|
32
|
+
* This function threads the output of one transform into the input of the next,
|
|
33
|
+
* creating a pipeline. All iteration is lazy—values are pulled through the pipeline
|
|
34
|
+
* only as they are consumed.
|
|
35
|
+
*
|
|
36
|
+
* @typeParam TInput - The input type of the first transform.
|
|
37
|
+
* @param {TransformContext} ctx - The context to pass to each transform.
|
|
38
|
+
* @param {AsyncIterable<TInput>} rows - The initial data source.
|
|
39
|
+
* @param {readonly PipelineTransformDefinition<any, any>[]} transforms - The transforms to apply in order.
|
|
40
|
+
* @returns {AsyncIterable<unknown>} An async iterable of the final output (typed as `unknown` since
|
|
41
|
+
* the result type depends on the transform sequence).
|
|
42
|
+
*
|
|
43
|
+
* @remarks
|
|
44
|
+
* The output type can be narrowed using the `ChainTransforms` utility type if the
|
|
45
|
+
* exact sequence of transforms is known at compile time.
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```ts
|
|
49
|
+
* const output = applyTransforms(ctx, sourceRows, [
|
|
50
|
+
* definePipelineTransform({ id: 'filter', fn: filterFn }),
|
|
51
|
+
* definePipelineTransform({ id: 'map', fn: mapFn }),
|
|
52
|
+
* ]);
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
async function* applyTransforms(ctx, rows, transforms) {
|
|
56
|
+
let current = rows;
|
|
57
|
+
for (const transform of transforms) current = transform.fn(ctx, current);
|
|
58
|
+
yield* current;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
//#endregion
|
|
62
|
+
export { definePipelineTransform as n, applyTransforms as t };
|