@ucdjs/pipelines-core 0.0.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +27 -0
- package/dist/index.d.mts +531 -0
- package/dist/index.mjs +338 -0
- package/dist/source-DYH0SvDh.d.mts +37 -0
- package/dist/source-EtWCktj_.mjs +26 -0
- package/dist/sources/index.d.mts +44 -0
- package/dist/sources/index.mjs +113 -0
- package/dist/transform-BDsUG3EV.mjs +62 -0
- package/dist/transform-BSrhinRr.d.mts +88 -0
- package/dist/transforms/index.d.mts +33 -0
- package/dist/transforms/index.mjs +174 -0
- package/dist/types-t7nns1Hx.d.mts +202 -0
- package/package.json +59 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { n as FileContext } from "./types-t7nns1Hx.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/transform.d.ts
|
|
4
|
+
interface TransformContext {
|
|
5
|
+
version: string;
|
|
6
|
+
file: FileContext;
|
|
7
|
+
}
|
|
8
|
+
type PipelineTransformFunction<TInput, TOutput> = (ctx: TransformContext, rows: AsyncIterable<TInput>) => AsyncIterable<TOutput>;
|
|
9
|
+
interface PipelineTransformDefinition<TInput = unknown, TOutput = unknown> {
|
|
10
|
+
id: string;
|
|
11
|
+
fn: PipelineTransformFunction<TInput, TOutput>;
|
|
12
|
+
}
|
|
13
|
+
type AnyPipelineTransformDefinition = PipelineTransformDefinition<any, any>;
|
|
14
|
+
/**
|
|
15
|
+
* Creates and returns a validated pipeline transform definition.
|
|
16
|
+
*
|
|
17
|
+
* This function is primarily a type-safe way to define transforms. It ensures that
|
|
18
|
+
* the transform adheres to the `PipelineTransformDefinition` interface and preserves
|
|
19
|
+
* type information for chaining.
|
|
20
|
+
*
|
|
21
|
+
* @typeParam TInput - The input type accepted by the transform.
|
|
22
|
+
* @typeParam TOutput - The output type produced by the transform.
|
|
23
|
+
* @param {PipelineTransformDefinition<TInput, TOutput>} definition - The transform definition.
|
|
24
|
+
* @returns {PipelineTransformDefinition<TInput, TOutput>} The same definition, typed correctly.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```ts
|
|
28
|
+
* const uppercase = definePipelineTransform({
|
|
29
|
+
* id: 'uppercase',
|
|
30
|
+
* fn: async function* (_ctx, rows) {
|
|
31
|
+
* for await (const row of rows) {
|
|
32
|
+
* yield row.toUpperCase();
|
|
33
|
+
* }
|
|
34
|
+
* },
|
|
35
|
+
* });
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
declare function definePipelineTransform<TInput, TOutput>(definition: PipelineTransformDefinition<TInput, TOutput>): PipelineTransformDefinition<TInput, TOutput>;
|
|
39
|
+
type InferTransformInput<T> = T extends PipelineTransformDefinition<infer TInput, unknown> ? TInput : never;
|
|
40
|
+
type InferTransformOutput<T> = T extends PipelineTransformDefinition<unknown, infer TOutput> ? TOutput : never;
|
|
41
|
+
/**
|
|
42
|
+
* Recursively composes a sequence of transform definitions into a single output type.
|
|
43
|
+
*
|
|
44
|
+
* Given an input type and a sequence of transforms, this type infers the final output
|
|
45
|
+
* type by threading the output of each transform into the input of the next.
|
|
46
|
+
*
|
|
47
|
+
* @typeParam TInput - The initial input type.
|
|
48
|
+
* @typeParam TTransforms - A readonly tuple of transform definitions, starting with one
|
|
49
|
+
* that accepts `TInput`.
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```ts
|
|
53
|
+
* type T1 = PipelineTransformDefinition<string, number>;
|
|
54
|
+
* type T2 = PipelineTransformDefinition<number, boolean>;
|
|
55
|
+
* type Result = ChainTransforms<string, [T1, T2]>; // boolean
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
type ChainTransformsHelper<TInput, TTransforms extends readonly PipelineTransformDefinition<any, any>[]> = TTransforms extends readonly [PipelineTransformDefinition<TInput, infer Output>, ...infer Rest] ? ChainTransformsHelper<Output, Extract<Rest, readonly PipelineTransformDefinition<any, any>[]>> : TInput;
|
|
59
|
+
type ChainTransforms<TInput, TTransforms extends readonly PipelineTransformDefinition<any, any>[]> = ChainTransformsHelper<TInput, TTransforms>;
|
|
60
|
+
/**
|
|
61
|
+
* Applies a sequence of transforms to an async iterable, composing them together.
|
|
62
|
+
*
|
|
63
|
+
* This function threads the output of one transform into the input of the next,
|
|
64
|
+
* creating a pipeline. All iteration is lazy—values are pulled through the pipeline
|
|
65
|
+
* only as they are consumed.
|
|
66
|
+
*
|
|
67
|
+
* @typeParam TInput - The input type of the first transform.
|
|
68
|
+
* @param {TransformContext} ctx - The context to pass to each transform.
|
|
69
|
+
* @param {AsyncIterable<TInput>} rows - The initial data source.
|
|
70
|
+
* @param {readonly PipelineTransformDefinition<any, any>[]} transforms - The transforms to apply in order.
|
|
71
|
+
* @returns {AsyncIterable<unknown>} An async iterable of the final output (typed as `unknown` since
|
|
72
|
+
* the result type depends on the transform sequence).
|
|
73
|
+
*
|
|
74
|
+
* @remarks
|
|
75
|
+
* The output type can be narrowed using the `ChainTransforms` utility type if the
|
|
76
|
+
* exact sequence of transforms is known at compile time.
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* ```ts
|
|
80
|
+
* const output = applyTransforms(ctx, sourceRows, [
|
|
81
|
+
* definePipelineTransform({ id: 'filter', fn: filterFn }),
|
|
82
|
+
* definePipelineTransform({ id: 'map', fn: mapFn }),
|
|
83
|
+
* ]);
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
declare function applyTransforms<TInput>(ctx: TransformContext, rows: AsyncIterable<TInput>, transforms: readonly PipelineTransformDefinition<any, any>[]): AsyncIterable<unknown>;
|
|
87
|
+
//#endregion
|
|
88
|
+
export { PipelineTransformDefinition as a, definePipelineTransform as c, InferTransformOutput as i, ChainTransforms as n, TransformContext as o, InferTransformInput as r, applyTransforms as s, AnyPipelineTransformDefinition as t };
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { a as ParsedRow } from "../types-t7nns1Hx.mjs";
|
|
2
|
+
import { a as PipelineTransformDefinition } from "../transform-BSrhinRr.mjs";
|
|
3
|
+
|
|
4
|
+
//#region src/transforms/deduplicate.d.ts
|
|
5
|
+
declare const deduplicateRows: PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
6
|
+
type DeduplicateStrategy = "first" | "last" | "merge";
|
|
7
|
+
interface DeduplicateOptions {
|
|
8
|
+
strategy?: DeduplicateStrategy;
|
|
9
|
+
keyFn?: (row: ParsedRow) => string;
|
|
10
|
+
}
|
|
11
|
+
declare function createDeduplicateTransform(options?: DeduplicateOptions): PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
12
|
+
//#endregion
|
|
13
|
+
//#region src/transforms/expand-ranges.d.ts
|
|
14
|
+
declare const expandRanges: PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
15
|
+
interface ExpandRangesOptions {
|
|
16
|
+
maxExpansion?: number;
|
|
17
|
+
}
|
|
18
|
+
declare function createExpandRangesTransform(options?: ExpandRangesOptions): PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
19
|
+
//#endregion
|
|
20
|
+
//#region src/transforms/normalize.d.ts
|
|
21
|
+
declare const normalizeCodePoints: PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
22
|
+
declare function createNormalizeTransform(padLength?: number): PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
23
|
+
//#endregion
|
|
24
|
+
//#region src/transforms/sort.d.ts
|
|
25
|
+
declare const sortByCodePoint: PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
26
|
+
type SortDirection = "asc" | "desc";
|
|
27
|
+
interface SortOptions {
|
|
28
|
+
direction?: SortDirection;
|
|
29
|
+
keyFn?: (row: ParsedRow) => number;
|
|
30
|
+
}
|
|
31
|
+
declare function createSortTransform(options?: SortOptions): PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
32
|
+
//#endregion
|
|
33
|
+
export { type DeduplicateOptions, type DeduplicateStrategy, type ExpandRangesOptions, type SortDirection, type SortOptions, createDeduplicateTransform, createExpandRangesTransform, createNormalizeTransform, createSortTransform, deduplicateRows, expandRanges, normalizeCodePoints, sortByCodePoint };
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import { n as definePipelineTransform } from "../transform-BDsUG3EV.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/transforms/deduplicate.ts
|
|
4
|
+
function getRowKey(row) {
|
|
5
|
+
if (row.kind === "point" && row.codePoint) return `point:${row.codePoint}`;
|
|
6
|
+
if (row.kind === "range" && row.start && row.end) return `range:${row.start}..${row.end}`;
|
|
7
|
+
if (row.kind === "sequence" && row.sequence) return `seq:${row.sequence.join("-")}`;
|
|
8
|
+
return `unknown:${JSON.stringify(row)}`;
|
|
9
|
+
}
|
|
10
|
+
const deduplicateRows = definePipelineTransform({
|
|
11
|
+
id: "deduplicate-rows",
|
|
12
|
+
async *fn(_ctx, rows) {
|
|
13
|
+
const seen = /* @__PURE__ */ new Set();
|
|
14
|
+
for await (const row of rows) {
|
|
15
|
+
const key = getRowKey(row);
|
|
16
|
+
if (!seen.has(key)) {
|
|
17
|
+
seen.add(key);
|
|
18
|
+
yield row;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
function createDeduplicateTransform(options = {}) {
|
|
24
|
+
const { strategy = "first", keyFn = getRowKey } = options;
|
|
25
|
+
if (strategy === "last") return definePipelineTransform({
|
|
26
|
+
id: "deduplicate-rows-last",
|
|
27
|
+
async *fn(_ctx, rows) {
|
|
28
|
+
const byKey = /* @__PURE__ */ new Map();
|
|
29
|
+
for await (const row of rows) {
|
|
30
|
+
const key = keyFn(row);
|
|
31
|
+
byKey.set(key, row);
|
|
32
|
+
}
|
|
33
|
+
yield* byKey.values();
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
return definePipelineTransform({
|
|
37
|
+
id: "deduplicate-rows-first",
|
|
38
|
+
async *fn(_ctx, rows) {
|
|
39
|
+
const seen = /* @__PURE__ */ new Set();
|
|
40
|
+
for await (const row of rows) {
|
|
41
|
+
const key = keyFn(row);
|
|
42
|
+
if (!seen.has(key)) {
|
|
43
|
+
seen.add(key);
|
|
44
|
+
yield row;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
//#endregion
|
|
52
|
+
//#region src/transforms/expand-ranges.ts
|
|
53
|
+
function hexToNumber$1(hex) {
|
|
54
|
+
return Number.parseInt(hex, 16);
|
|
55
|
+
}
|
|
56
|
+
function numberToHex(num) {
|
|
57
|
+
return num.toString(16).toUpperCase().padStart(4, "0");
|
|
58
|
+
}
|
|
59
|
+
const expandRanges = definePipelineTransform({
|
|
60
|
+
id: "expand-ranges",
|
|
61
|
+
async *fn(_ctx, rows) {
|
|
62
|
+
for await (const row of rows) if (row.kind === "range" && row.start && row.end) {
|
|
63
|
+
const start = hexToNumber$1(row.start);
|
|
64
|
+
const end = hexToNumber$1(row.end);
|
|
65
|
+
for (let i = start; i <= end; i++) yield {
|
|
66
|
+
...row,
|
|
67
|
+
kind: "point",
|
|
68
|
+
codePoint: numberToHex(i),
|
|
69
|
+
start: void 0,
|
|
70
|
+
end: void 0
|
|
71
|
+
};
|
|
72
|
+
} else yield row;
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
function createExpandRangesTransform(options = {}) {
|
|
76
|
+
const { maxExpansion = 1e4 } = options;
|
|
77
|
+
return definePipelineTransform({
|
|
78
|
+
id: "expand-ranges-limited",
|
|
79
|
+
async *fn(_ctx, rows) {
|
|
80
|
+
for await (const row of rows) if (row.kind === "range" && row.start && row.end) {
|
|
81
|
+
const start = hexToNumber$1(row.start);
|
|
82
|
+
const end = hexToNumber$1(row.end);
|
|
83
|
+
if (end - start + 1 > maxExpansion) {
|
|
84
|
+
yield row;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
for (let i = start; i <= end; i++) yield {
|
|
88
|
+
...row,
|
|
89
|
+
kind: "point",
|
|
90
|
+
codePoint: numberToHex(i),
|
|
91
|
+
start: void 0,
|
|
92
|
+
end: void 0
|
|
93
|
+
};
|
|
94
|
+
} else yield row;
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
//#endregion
|
|
100
|
+
//#region src/transforms/normalize.ts
|
|
101
|
+
function normalizeHex(hex) {
|
|
102
|
+
return hex.toUpperCase().replace(/^0+/, "") || "0";
|
|
103
|
+
}
|
|
104
|
+
function padHex(hex, length = 4) {
|
|
105
|
+
return hex.toUpperCase().padStart(length, "0");
|
|
106
|
+
}
|
|
107
|
+
const normalizeCodePoints = definePipelineTransform({
|
|
108
|
+
id: "normalize-code-points",
|
|
109
|
+
async *fn(_ctx, rows) {
|
|
110
|
+
for await (const row of rows) {
|
|
111
|
+
const normalized = { ...row };
|
|
112
|
+
if (normalized.codePoint) normalized.codePoint = padHex(normalizeHex(normalized.codePoint));
|
|
113
|
+
if (normalized.start) normalized.start = padHex(normalizeHex(normalized.start));
|
|
114
|
+
if (normalized.end) normalized.end = padHex(normalizeHex(normalized.end));
|
|
115
|
+
if (normalized.sequence) normalized.sequence = normalized.sequence.map((cp) => padHex(normalizeHex(cp)));
|
|
116
|
+
yield normalized;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
function createNormalizeTransform(padLength = 4) {
|
|
121
|
+
return definePipelineTransform({
|
|
122
|
+
id: `normalize-code-points-${padLength}`,
|
|
123
|
+
async *fn(_ctx, rows) {
|
|
124
|
+
for await (const row of rows) {
|
|
125
|
+
const normalized = { ...row };
|
|
126
|
+
if (normalized.codePoint) normalized.codePoint = padHex(normalizeHex(normalized.codePoint), padLength);
|
|
127
|
+
if (normalized.start) normalized.start = padHex(normalizeHex(normalized.start), padLength);
|
|
128
|
+
if (normalized.end) normalized.end = padHex(normalizeHex(normalized.end), padLength);
|
|
129
|
+
if (normalized.sequence) normalized.sequence = normalized.sequence.map((cp) => padHex(normalizeHex(cp), padLength));
|
|
130
|
+
yield normalized;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
//#endregion
|
|
137
|
+
//#region src/transforms/sort.ts
|
|
138
|
+
function hexToNumber(hex) {
|
|
139
|
+
return Number.parseInt(hex, 16);
|
|
140
|
+
}
|
|
141
|
+
function getRowSortKey(row) {
|
|
142
|
+
if (row.codePoint) return hexToNumber(row.codePoint);
|
|
143
|
+
if (row.start) return hexToNumber(row.start);
|
|
144
|
+
if (row.sequence && row.sequence.length > 0) {
|
|
145
|
+
const first = row.sequence[0];
|
|
146
|
+
if (first) return hexToNumber(first);
|
|
147
|
+
}
|
|
148
|
+
return 0;
|
|
149
|
+
}
|
|
150
|
+
const sortByCodePoint = definePipelineTransform({
|
|
151
|
+
id: "sort-by-code-point",
|
|
152
|
+
async *fn(_ctx, rows) {
|
|
153
|
+
const collected = [];
|
|
154
|
+
for await (const row of rows) collected.push(row);
|
|
155
|
+
collected.sort((a, b) => getRowSortKey(a) - getRowSortKey(b));
|
|
156
|
+
yield* collected;
|
|
157
|
+
}
|
|
158
|
+
});
|
|
159
|
+
function createSortTransform(options = {}) {
|
|
160
|
+
const { direction = "asc", keyFn = getRowSortKey } = options;
|
|
161
|
+
const multiplier = direction === "asc" ? 1 : -1;
|
|
162
|
+
return definePipelineTransform({
|
|
163
|
+
id: `sort-${direction}`,
|
|
164
|
+
async *fn(_ctx, rows) {
|
|
165
|
+
const collected = [];
|
|
166
|
+
for await (const row of rows) collected.push(row);
|
|
167
|
+
collected.sort((a, b) => multiplier * (keyFn(a) - keyFn(b)));
|
|
168
|
+
yield* collected;
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
//#endregion
|
|
174
|
+
export { createDeduplicateTransform, createExpandRangesTransform, createNormalizeTransform, createSortTransform, deduplicateRows, expandRanges, normalizeCodePoints, sortByCodePoint };
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
//#region src/types.d.ts
|
|
2
|
+
interface FileContext {
|
|
3
|
+
/**
|
|
4
|
+
* The Unicode version being processed (e.g., "16.0.0").
|
|
5
|
+
*/
|
|
6
|
+
version: string;
|
|
7
|
+
/**
|
|
8
|
+
* The directory category of the file.
|
|
9
|
+
*/
|
|
10
|
+
dir: "ucd" | "extracted" | "auxiliary" | "emoji" | "unihan" | (string & {});
|
|
11
|
+
/**
|
|
12
|
+
* The relative path from the version root (e.g., "ucd/LineBreak.txt").
|
|
13
|
+
*/
|
|
14
|
+
path: string;
|
|
15
|
+
/**
|
|
16
|
+
* The file name (e.g., "LineBreak.txt").
|
|
17
|
+
*/
|
|
18
|
+
name: string;
|
|
19
|
+
/**
|
|
20
|
+
* The file extension (e.g., ".txt").
|
|
21
|
+
*/
|
|
22
|
+
ext: string;
|
|
23
|
+
}
|
|
24
|
+
interface RowContext {
|
|
25
|
+
/**
|
|
26
|
+
* The property name for multi-property files (e.g., "NFKC_Casefold").
|
|
27
|
+
*/
|
|
28
|
+
property?: string;
|
|
29
|
+
}
|
|
30
|
+
interface FilterContext {
|
|
31
|
+
/**
|
|
32
|
+
* The file context.
|
|
33
|
+
*/
|
|
34
|
+
file: FileContext;
|
|
35
|
+
/**
|
|
36
|
+
* The row context (only defined during row-level filtering).
|
|
37
|
+
*/
|
|
38
|
+
row?: RowContext;
|
|
39
|
+
/**
|
|
40
|
+
* The source context (only defined when using multiple sources).
|
|
41
|
+
*/
|
|
42
|
+
source?: {
|
|
43
|
+
/**
|
|
44
|
+
* The source ID.
|
|
45
|
+
*/
|
|
46
|
+
id: string;
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
type PipelineFilter = (ctx: FilterContext) => boolean;
|
|
50
|
+
interface ParsedRow {
|
|
51
|
+
/**
|
|
52
|
+
* The source file path relative to the version root.
|
|
53
|
+
*/
|
|
54
|
+
sourceFile: string;
|
|
55
|
+
/**
|
|
56
|
+
* The kind of entry.
|
|
57
|
+
*/
|
|
58
|
+
kind: "range" | "point" | "sequence" | "alias";
|
|
59
|
+
/**
|
|
60
|
+
* Start of range (hex string, e.g., "0041").
|
|
61
|
+
*/
|
|
62
|
+
start?: string;
|
|
63
|
+
/**
|
|
64
|
+
* End of range (hex string, e.g., "005A").
|
|
65
|
+
*/
|
|
66
|
+
end?: string;
|
|
67
|
+
/**
|
|
68
|
+
* Single code point (hex string).
|
|
69
|
+
*/
|
|
70
|
+
codePoint?: string;
|
|
71
|
+
/**
|
|
72
|
+
* Sequence of code points (hex strings).
|
|
73
|
+
*/
|
|
74
|
+
sequence?: string[];
|
|
75
|
+
/**
|
|
76
|
+
* Property name for multi-property files.
|
|
77
|
+
*/
|
|
78
|
+
property?: string;
|
|
79
|
+
/**
|
|
80
|
+
* The value(s) associated with this entry.
|
|
81
|
+
*/
|
|
82
|
+
value?: string | string[];
|
|
83
|
+
/**
|
|
84
|
+
* Additional metadata (comments, line numbers, etc.).
|
|
85
|
+
*/
|
|
86
|
+
meta?: Record<string, unknown>;
|
|
87
|
+
}
|
|
88
|
+
interface ParseContext {
|
|
89
|
+
/**
|
|
90
|
+
* The file being parsed.
|
|
91
|
+
*/
|
|
92
|
+
file: FileContext;
|
|
93
|
+
/**
|
|
94
|
+
* Read the raw content of the file.
|
|
95
|
+
*/
|
|
96
|
+
readContent: () => Promise<string>;
|
|
97
|
+
/**
|
|
98
|
+
* Read the file line by line.
|
|
99
|
+
*/
|
|
100
|
+
readLines: () => AsyncIterable<string>;
|
|
101
|
+
/**
|
|
102
|
+
* Check if a line is a comment.
|
|
103
|
+
*/
|
|
104
|
+
isComment: (line: string) => boolean;
|
|
105
|
+
}
|
|
106
|
+
type ParserFn = (ctx: ParseContext) => AsyncIterable<ParsedRow>;
|
|
107
|
+
interface ResolvedEntry {
|
|
108
|
+
/**
|
|
109
|
+
* Range in "XXXX..YYYY" format (hex, inclusive).
|
|
110
|
+
*/
|
|
111
|
+
range?: `${string}..${string}`;
|
|
112
|
+
/**
|
|
113
|
+
* Single code point in hex.
|
|
114
|
+
*/
|
|
115
|
+
codePoint?: string;
|
|
116
|
+
/**
|
|
117
|
+
* Sequence of code points.
|
|
118
|
+
*/
|
|
119
|
+
sequence?: string[];
|
|
120
|
+
/**
|
|
121
|
+
* The value(s) for this entry.
|
|
122
|
+
*/
|
|
123
|
+
value: string | string[];
|
|
124
|
+
}
|
|
125
|
+
interface DefaultRange {
|
|
126
|
+
/**
|
|
127
|
+
* The range this default applies to.
|
|
128
|
+
*/
|
|
129
|
+
range: `${string}..${string}`;
|
|
130
|
+
/**
|
|
131
|
+
* The default value.
|
|
132
|
+
*/
|
|
133
|
+
value: string | string[];
|
|
134
|
+
}
|
|
135
|
+
interface PropertyJson {
|
|
136
|
+
/**
|
|
137
|
+
* The Unicode version (e.g., "16.0.0").
|
|
138
|
+
*/
|
|
139
|
+
version: string;
|
|
140
|
+
/**
|
|
141
|
+
* The property name (e.g., "Line_Break").
|
|
142
|
+
*/
|
|
143
|
+
property: string;
|
|
144
|
+
/**
|
|
145
|
+
* The source file name (e.g., "LineBreak.txt").
|
|
146
|
+
*/
|
|
147
|
+
file: string;
|
|
148
|
+
/**
|
|
149
|
+
* The resolved entries.
|
|
150
|
+
*/
|
|
151
|
+
entries: ResolvedEntry[];
|
|
152
|
+
/**
|
|
153
|
+
* Default ranges from @missing (in encounter order).
|
|
154
|
+
*/
|
|
155
|
+
defaults?: DefaultRange[];
|
|
156
|
+
/**
|
|
157
|
+
* Additional metadata.
|
|
158
|
+
*/
|
|
159
|
+
meta?: Record<string, unknown>;
|
|
160
|
+
}
|
|
161
|
+
interface ResolveContext<TArtifacts extends Record<string, unknown> = Record<string, unknown>> {
|
|
162
|
+
/**
|
|
163
|
+
* The Unicode version being processed.
|
|
164
|
+
*/
|
|
165
|
+
version: string;
|
|
166
|
+
/**
|
|
167
|
+
* The file being resolved.
|
|
168
|
+
*/
|
|
169
|
+
file: FileContext;
|
|
170
|
+
/**
|
|
171
|
+
* Get an artifact by ID.
|
|
172
|
+
*/
|
|
173
|
+
getArtifact: <K extends keyof TArtifacts>(id: K) => TArtifacts[K];
|
|
174
|
+
/**
|
|
175
|
+
* Emit an artifact for subsequent routes.
|
|
176
|
+
*/
|
|
177
|
+
emitArtifact: <K extends string, V>(id: K, value: V) => void;
|
|
178
|
+
/**
|
|
179
|
+
* Normalize and sort entries by code point range.
|
|
180
|
+
*/
|
|
181
|
+
normalizeEntries: (entries: ResolvedEntry[]) => ResolvedEntry[];
|
|
182
|
+
/**
|
|
183
|
+
* Get current timestamp in ISO 8601 format.
|
|
184
|
+
*/
|
|
185
|
+
now: () => string;
|
|
186
|
+
}
|
|
187
|
+
type ResolverFn<TArtifacts extends Record<string, unknown> = Record<string, unknown>, TOutput = PropertyJson[]> = (ctx: ResolveContext<TArtifacts>, rows: AsyncIterable<ParsedRow>) => Promise<TOutput>;
|
|
188
|
+
/**
|
|
189
|
+
* Output configuration for a route.
|
|
190
|
+
*/
|
|
191
|
+
interface RouteOutput {
|
|
192
|
+
/**
|
|
193
|
+
* Custom output directory.
|
|
194
|
+
*/
|
|
195
|
+
dir?: string;
|
|
196
|
+
/**
|
|
197
|
+
* Custom file name generator.
|
|
198
|
+
*/
|
|
199
|
+
fileName?: (pj: PropertyJson) => string;
|
|
200
|
+
}
|
|
201
|
+
//#endregion
|
|
202
|
+
export { ParsedRow as a, PropertyJson as c, ResolverFn as d, RouteOutput as f, ParseContext as i, ResolveContext as l, FileContext as n, ParserFn as o, RowContext as p, FilterContext as r, PipelineFilter as s, DefaultRange as t, ResolvedEntry as u };
|
package/package.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ucdjs/pipelines-core",
|
|
3
|
+
"version": "0.0.1-beta.1",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Lucas Nørgård",
|
|
7
|
+
"email": "lucasnrgaard@gmail.com",
|
|
8
|
+
"url": "https://luxass.dev"
|
|
9
|
+
},
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"homepage": "https://github.com/ucdjs/ucd",
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "git+https://github.com/ucdjs/ucd.git",
|
|
15
|
+
"directory": "packages/pipelines/pipeline-core"
|
|
16
|
+
},
|
|
17
|
+
"bugs": {
|
|
18
|
+
"url": "https://github.com/ucdjs/ucd/issues"
|
|
19
|
+
},
|
|
20
|
+
"exports": {
|
|
21
|
+
".": "./dist/index.mjs",
|
|
22
|
+
"./sources": "./dist/sources/index.mjs",
|
|
23
|
+
"./transforms": "./dist/transforms/index.mjs",
|
|
24
|
+
"./package.json": "./package.json"
|
|
25
|
+
},
|
|
26
|
+
"types": "./dist/index.d.mts",
|
|
27
|
+
"files": [
|
|
28
|
+
"dist"
|
|
29
|
+
],
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=22.18"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"picomatch": "4.0.3",
|
|
35
|
+
"zod": "4.3.6",
|
|
36
|
+
"@ucdjs-internal/shared": "0.1.1-beta.1"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@luxass/eslint-config": "7.2.0",
|
|
40
|
+
"@types/picomatch": "4.0.2",
|
|
41
|
+
"eslint": "10.0.0",
|
|
42
|
+
"publint": "0.3.17",
|
|
43
|
+
"tsdown": "0.20.3",
|
|
44
|
+
"tsx": "4.21.0",
|
|
45
|
+
"typescript": "5.9.3",
|
|
46
|
+
"@ucdjs-tooling/tsdown-config": "1.0.0",
|
|
47
|
+
"@ucdjs-tooling/tsconfig": "1.0.0"
|
|
48
|
+
},
|
|
49
|
+
"publishConfig": {
|
|
50
|
+
"access": "public"
|
|
51
|
+
},
|
|
52
|
+
"scripts": {
|
|
53
|
+
"build": "tsdown --tsconfig=./tsconfig.build.json",
|
|
54
|
+
"dev": "tsdown --watch",
|
|
55
|
+
"clean": "git clean -xdf dist node_modules",
|
|
56
|
+
"lint": "eslint .",
|
|
57
|
+
"typecheck": "tsc --noEmit -p tsconfig.build.json"
|
|
58
|
+
}
|
|
59
|
+
}
|