@ucdjs/pipelines-presets 0.0.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.d.mts +127 -0
- package/dist/index.mjs +481 -0
- package/package.json +54 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-PRESENT Lucas Nørgård
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import * as _ucdjs_pipelines_core0 from "@ucdjs/pipelines-core";
|
|
2
|
+
import { ArtifactDefinition, ParsedRow, ParserFn, PipelineFilter, PropertyJson, ResolveContext, RouteResolveContext } from "@ucdjs/pipelines-core";
|
|
3
|
+
import * as zod_v4_core0 from "zod/v4/core";
|
|
4
|
+
import * as zod from "zod";
|
|
5
|
+
|
|
6
|
+
//#region src/parsers/multi-property.d.ts
|
|
7
|
+
interface MultiPropertyParserOptions {
|
|
8
|
+
delimiter?: string;
|
|
9
|
+
propertyMarker?: string;
|
|
10
|
+
trimFields?: boolean;
|
|
11
|
+
}
|
|
12
|
+
declare function createMultiPropertyParser(options?: MultiPropertyParserOptions): ParserFn;
|
|
13
|
+
declare const multiPropertyParser: ParserFn;
|
|
14
|
+
//#endregion
|
|
15
|
+
//#region src/parsers/sequence.d.ts
|
|
16
|
+
interface SequenceParserOptions {
|
|
17
|
+
delimiter?: string;
|
|
18
|
+
sequenceDelimiter?: string;
|
|
19
|
+
trimFields?: boolean;
|
|
20
|
+
}
|
|
21
|
+
declare function createSequenceParser(options?: SequenceParserOptions): ParserFn;
|
|
22
|
+
declare const sequenceParser: ParserFn;
|
|
23
|
+
//#endregion
|
|
24
|
+
//#region src/parsers/standard.d.ts
|
|
25
|
+
interface StandardParserOptions {
|
|
26
|
+
delimiter?: string;
|
|
27
|
+
trimFields?: boolean;
|
|
28
|
+
skipEmpty?: boolean;
|
|
29
|
+
}
|
|
30
|
+
declare function createStandardParser(options?: StandardParserOptions): ParserFn;
|
|
31
|
+
declare const standardParser: ParserFn;
|
|
32
|
+
//#endregion
|
|
33
|
+
//#region src/parsers/unicode-data.d.ts
|
|
34
|
+
interface UnicodeDataMeta {
|
|
35
|
+
characterName: string;
|
|
36
|
+
generalCategory: string;
|
|
37
|
+
canonicalCombiningClass: string;
|
|
38
|
+
bidiClass: string;
|
|
39
|
+
decompositionMapping: string;
|
|
40
|
+
numericType: string;
|
|
41
|
+
numericValue: string;
|
|
42
|
+
bidiMirrored: string;
|
|
43
|
+
unicode1Name: string;
|
|
44
|
+
isoComment: string;
|
|
45
|
+
simpleUppercaseMapping: string;
|
|
46
|
+
simpleLowercaseMapping: string;
|
|
47
|
+
simpleTitlecaseMapping: string;
|
|
48
|
+
}
|
|
49
|
+
type UnicodeDataRow = ParsedRow & {
|
|
50
|
+
meta: UnicodeDataMeta;
|
|
51
|
+
};
|
|
52
|
+
declare const unicodeDataParser: ParserFn;
|
|
53
|
+
//#endregion
|
|
54
|
+
//#region src/pipelines/basic.d.ts
|
|
55
|
+
interface BasicPipelineOptions {
|
|
56
|
+
id?: string;
|
|
57
|
+
versions: string[];
|
|
58
|
+
concurrency?: number;
|
|
59
|
+
strict?: boolean;
|
|
60
|
+
}
|
|
61
|
+
declare function createBasicPipeline(options: BasicPipelineOptions): _ucdjs_pipelines_core0.PipelineDefinition<string, [], [_ucdjs_pipelines_core0.PipelineRouteDefinition<"line-break", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"scripts", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"blocks", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"general-category", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"prop-list", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"derived-core-properties", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"unicode-data", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>], {
|
|
62
|
+
readonly parser: _ucdjs_pipelines_core0.ParserFn;
|
|
63
|
+
readonly resolver: (ctx: ResolveContext, rows: AsyncIterable<ParsedRow>) => Promise<PropertyJson[]>;
|
|
64
|
+
}>;
|
|
65
|
+
//#endregion
|
|
66
|
+
//#region src/pipelines/emoji.d.ts
|
|
67
|
+
interface EmojiPipelineOptions {
|
|
68
|
+
id?: string;
|
|
69
|
+
versions: string[];
|
|
70
|
+
concurrency?: number;
|
|
71
|
+
strict?: boolean;
|
|
72
|
+
}
|
|
73
|
+
declare function createEmojiPipeline(options: EmojiPipelineOptions): _ucdjs_pipelines_core0.PipelineDefinition<string, [], [_ucdjs_pipelines_core0.PipelineRouteDefinition<"emoji-data", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>], {
|
|
74
|
+
readonly parser: _ucdjs_pipelines_core0.ParserFn;
|
|
75
|
+
readonly resolver: (ctx: ResolveContext, rows: AsyncIterable<ParsedRow>) => Promise<PropertyJson[]>;
|
|
76
|
+
}>;
|
|
77
|
+
//#endregion
|
|
78
|
+
//#region src/pipelines/full.d.ts
|
|
79
|
+
interface FullPipelineOptions {
|
|
80
|
+
id?: string;
|
|
81
|
+
versions: string[];
|
|
82
|
+
concurrency?: number;
|
|
83
|
+
strict?: boolean;
|
|
84
|
+
}
|
|
85
|
+
declare function createFullPipeline(options: FullPipelineOptions): _ucdjs_pipelines_core0.PipelineDefinition<string, [], [_ucdjs_pipelines_core0.PipelineRouteDefinition<"line-break", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"scripts", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"blocks", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"general-category", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"prop-list", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"derived-core-properties", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"unicode-data", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"emoji-data", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>], PropertyJson[]>], {
|
|
86
|
+
readonly parser: _ucdjs_pipelines_core0.ParserFn;
|
|
87
|
+
readonly resolver: (ctx: ResolveContext, rows: AsyncIterable<ParsedRow>) => Promise<PropertyJson[]>;
|
|
88
|
+
}>;
|
|
89
|
+
//#endregion
|
|
90
|
+
//#region src/resolvers/grouped.d.ts
|
|
91
|
+
interface GroupedResolverOptions {
|
|
92
|
+
groupBy: "property" | "value" | ((row: ParsedRow) => string);
|
|
93
|
+
propertyNameFn?: (groupKey: string, ctx: RouteResolveContext) => string;
|
|
94
|
+
}
|
|
95
|
+
declare function createGroupedResolver(options: GroupedResolverOptions): (ctx: RouteResolveContext, rows: AsyncIterable<ParsedRow>) => Promise<PropertyJson[]>;
|
|
96
|
+
//#endregion
|
|
97
|
+
//#region src/resolvers/property-json.d.ts
|
|
98
|
+
interface PropertyJsonResolverOptions {
|
|
99
|
+
property?: string;
|
|
100
|
+
includeDefaults?: boolean;
|
|
101
|
+
}
|
|
102
|
+
declare function createPropertyJsonResolver(options?: PropertyJsonResolverOptions): <TArtifactKeys extends string, TEmits extends Record<string, ArtifactDefinition>>(ctx: RouteResolveContext<TArtifactKeys, TEmits>, rows: AsyncIterable<ParsedRow>) => Promise<PropertyJson[]>;
|
|
103
|
+
declare const propertyJsonResolver: <TArtifactKeys extends string, TEmits extends Record<string, ArtifactDefinition>>(ctx: RouteResolveContext<TArtifactKeys, TEmits>, rows: AsyncIterable<ParsedRow>) => Promise<PropertyJson[]>;
|
|
104
|
+
//#endregion
|
|
105
|
+
//#region src/routes/common.d.ts
|
|
106
|
+
declare const lineBreakRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"line-break", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
107
|
+
declare const scriptsRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"scripts", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
108
|
+
declare const blocksRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"blocks", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
109
|
+
declare const generalCategoryRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"general-category", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
110
|
+
declare const propListRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"prop-list", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
111
|
+
declare const derivedCorePropertiesRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"derived-core-properties", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
112
|
+
declare const emojiDataRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"emoji-data", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
113
|
+
declare const unicodeDataRoute: _ucdjs_pipelines_core0.PipelineRouteDefinition<"unicode-data", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>;
|
|
114
|
+
declare const coreRoutes: readonly [_ucdjs_pipelines_core0.PipelineRouteDefinition<"line-break", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"scripts", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"blocks", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"general-category", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"prop-list", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"derived-core-properties", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"unicode-data", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>];
|
|
115
|
+
declare const emojiRoutes: readonly [_ucdjs_pipelines_core0.PipelineRouteDefinition<"emoji-data", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>];
|
|
116
|
+
declare const allRoutes: readonly [_ucdjs_pipelines_core0.PipelineRouteDefinition<"line-break", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"scripts", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"blocks", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"general-category", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"prop-list", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"derived-core-properties", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"unicode-data", readonly [], Record<string, never>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>, _ucdjs_pipelines_core0.PipelineRouteDefinition<"emoji-data", readonly [], Record<string, _ucdjs_pipelines_core0.ArtifactDefinition<zod.ZodType<unknown, unknown, zod_v4_core0.$ZodTypeInternals<unknown, unknown>>>>, readonly [_ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>, _ucdjs_pipelines_core0.PipelineTransformDefinition<_ucdjs_pipelines_core0.ParsedRow, _ucdjs_pipelines_core0.ParsedRow>], _ucdjs_pipelines_core0.PropertyJson[]>];
|
|
117
|
+
//#endregion
|
|
118
|
+
//#region src/transforms/filter.d.ts
|
|
119
|
+
interface RowFilterOptions {
|
|
120
|
+
property?: string | RegExp;
|
|
121
|
+
value?: string | RegExp;
|
|
122
|
+
kind?: ParsedRow["kind"] | ParsedRow["kind"][];
|
|
123
|
+
}
|
|
124
|
+
declare function createRowFilter(options: RowFilterOptions): _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
125
|
+
declare function createFilterByPipelineFilter(filter: PipelineFilter): _ucdjs_pipelines_core0.PipelineTransformDefinition<ParsedRow, ParsedRow>;
|
|
126
|
+
//#endregion
|
|
127
|
+
export { type BasicPipelineOptions, type EmojiPipelineOptions, type FullPipelineOptions, type GroupedResolverOptions, type MultiPropertyParserOptions, type PropertyJsonResolverOptions, type RowFilterOptions, type SequenceParserOptions, type StandardParserOptions, type UnicodeDataMeta, type UnicodeDataRow, allRoutes, blocksRoute, coreRoutes, createBasicPipeline, createEmojiPipeline, createFilterByPipelineFilter, createFullPipeline, createGroupedResolver, createMultiPropertyParser, createPropertyJsonResolver, createRowFilter, createSequenceParser, createStandardParser, derivedCorePropertiesRoute, emojiDataRoute, emojiRoutes, generalCategoryRoute, lineBreakRoute, multiPropertyParser, propListRoute, propertyJsonResolver, scriptsRoute, sequenceParser, standardParser, unicodeDataParser, unicodeDataRoute };
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
import { and, byDir, byExt, byGlob, byName, definePipeline, definePipelineRoute, definePipelineTransform, splitMinFields, splitTwoFields } from "@ucdjs/pipelines-core";
|
|
2
|
+
import { normalizeCodePoints, sortByCodePoint } from "@ucdjs/pipelines-core/transforms";
|
|
3
|
+
|
|
4
|
+
//#region src/parsers/multi-property.ts
|
|
5
|
+
function parseCodePointOrRange$1(field) {
|
|
6
|
+
const trimmed = field.trim();
|
|
7
|
+
if (trimmed.includes("..")) {
|
|
8
|
+
const parts = trimmed.split("..");
|
|
9
|
+
if (parts.length >= 2 && parts[0] && parts[1]) return {
|
|
10
|
+
kind: "range",
|
|
11
|
+
start: parts[0].trim(),
|
|
12
|
+
end: parts[1].trim()
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
return {
|
|
16
|
+
kind: "point",
|
|
17
|
+
codePoint: trimmed
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
function createMultiPropertyParser(options = {}) {
|
|
21
|
+
const { delimiter = ";", propertyMarker = "@", trimFields = true } = options;
|
|
22
|
+
return async function* multiPropertyParser(ctx) {
|
|
23
|
+
let currentProperty;
|
|
24
|
+
for await (const line of ctx.readLines()) {
|
|
25
|
+
const trimmedLine = line.trim();
|
|
26
|
+
if (trimmedLine.startsWith(`# ${propertyMarker}`)) {
|
|
27
|
+
const match = trimmedLine.match(/# @(\w+)=(\w+)/);
|
|
28
|
+
if (match && match[2]) currentProperty = match[2];
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (ctx.isComment(line) || trimmedLine === "") continue;
|
|
32
|
+
const commentIndex = trimmedLine.indexOf("#");
|
|
33
|
+
const dataLine = commentIndex >= 0 ? trimmedLine.slice(0, commentIndex) : trimmedLine;
|
|
34
|
+
if (dataLine.trim() === "") continue;
|
|
35
|
+
const fields = splitTwoFields(dataLine, delimiter);
|
|
36
|
+
if (!fields) continue;
|
|
37
|
+
const [rawCodePoint, rawValue] = fields;
|
|
38
|
+
const codePointField = trimFields ? rawCodePoint.trim() : rawCodePoint;
|
|
39
|
+
const valueField = trimFields ? rawValue.trim() : rawValue;
|
|
40
|
+
const { kind, start, end, codePoint } = parseCodePointOrRange$1(codePointField);
|
|
41
|
+
yield {
|
|
42
|
+
sourceFile: ctx.file.path,
|
|
43
|
+
kind,
|
|
44
|
+
start,
|
|
45
|
+
end,
|
|
46
|
+
codePoint,
|
|
47
|
+
property: currentProperty || valueField,
|
|
48
|
+
value: valueField
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
const multiPropertyParser = createMultiPropertyParser();
|
|
54
|
+
|
|
55
|
+
//#endregion
|
|
56
|
+
//#region src/parsers/sequence.ts
|
|
57
|
+
function createSequenceParser(options = {}) {
|
|
58
|
+
const { delimiter = ";", sequenceDelimiter = " ", trimFields = true } = options;
|
|
59
|
+
return async function* sequenceParser(ctx) {
|
|
60
|
+
for await (const line of ctx.readLines()) {
|
|
61
|
+
if (ctx.isComment(line)) continue;
|
|
62
|
+
const trimmedLine = line.trim();
|
|
63
|
+
if (trimmedLine === "") continue;
|
|
64
|
+
const commentIndex = trimmedLine.indexOf("#");
|
|
65
|
+
const dataLine = commentIndex >= 0 ? trimmedLine.slice(0, commentIndex) : trimmedLine;
|
|
66
|
+
if (dataLine.trim() === "") continue;
|
|
67
|
+
const fields = splitTwoFields(dataLine, delimiter);
|
|
68
|
+
if (!fields) continue;
|
|
69
|
+
const [rawSequence, rawValue] = fields;
|
|
70
|
+
const sequenceField = trimFields ? rawSequence.trim() : rawSequence;
|
|
71
|
+
const valueField = trimFields ? rawValue.trim() : rawValue;
|
|
72
|
+
const codePoints = sequenceField.split(sequenceDelimiter).filter(Boolean);
|
|
73
|
+
if (codePoints.length === 0) continue;
|
|
74
|
+
if (codePoints.length === 1) yield {
|
|
75
|
+
sourceFile: ctx.file.path,
|
|
76
|
+
kind: "point",
|
|
77
|
+
codePoint: codePoints[0],
|
|
78
|
+
value: valueField
|
|
79
|
+
};
|
|
80
|
+
else yield {
|
|
81
|
+
sourceFile: ctx.file.path,
|
|
82
|
+
kind: "sequence",
|
|
83
|
+
sequence: codePoints,
|
|
84
|
+
value: valueField
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
const sequenceParser = createSequenceParser();
|
|
90
|
+
|
|
91
|
+
//#endregion
|
|
92
|
+
//#region src/parsers/standard.ts
|
|
93
|
+
function parseCodePointOrRange(field) {
|
|
94
|
+
const trimmed = field.trim();
|
|
95
|
+
if (trimmed.includes("..")) {
|
|
96
|
+
const parts = trimmed.split("..");
|
|
97
|
+
if (parts.length >= 2 && parts[0] && parts[1]) return {
|
|
98
|
+
kind: "range",
|
|
99
|
+
start: parts[0].trim(),
|
|
100
|
+
end: parts[1].trim()
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
return {
|
|
104
|
+
kind: "point",
|
|
105
|
+
codePoint: trimmed
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
function createStandardParser(options = {}) {
|
|
109
|
+
const { delimiter = ";", trimFields = true, skipEmpty = true } = options;
|
|
110
|
+
return async function* standardParser(ctx) {
|
|
111
|
+
for await (const line of ctx.readLines()) {
|
|
112
|
+
if (ctx.isComment(line)) continue;
|
|
113
|
+
const trimmedLine = line.trim();
|
|
114
|
+
if (skipEmpty && trimmedLine === "") continue;
|
|
115
|
+
const commentIndex = trimmedLine.indexOf("#");
|
|
116
|
+
const dataLine = commentIndex >= 0 ? trimmedLine.slice(0, commentIndex) : trimmedLine;
|
|
117
|
+
if (dataLine.trim() === "") continue;
|
|
118
|
+
const fields = splitTwoFields(dataLine, delimiter);
|
|
119
|
+
if (!fields) continue;
|
|
120
|
+
const [rawCodePoint, rawValue] = fields;
|
|
121
|
+
const codePointField = trimFields ? rawCodePoint.trim() : rawCodePoint;
|
|
122
|
+
const valueField = trimFields ? rawValue.trim() : rawValue;
|
|
123
|
+
const { kind, start, end, codePoint } = parseCodePointOrRange(codePointField);
|
|
124
|
+
yield {
|
|
125
|
+
sourceFile: ctx.file.path,
|
|
126
|
+
kind,
|
|
127
|
+
start,
|
|
128
|
+
end,
|
|
129
|
+
codePoint,
|
|
130
|
+
value: valueField
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
const standardParser = createStandardParser();
|
|
136
|
+
|
|
137
|
+
//#endregion
|
|
138
|
+
//#region src/parsers/unicode-data.ts
|
|
139
|
+
const unicodeDataParser = async function* (ctx) {
|
|
140
|
+
let rangeStart = null;
|
|
141
|
+
let rangeName = null;
|
|
142
|
+
for await (const line of ctx.readLines()) {
|
|
143
|
+
if (ctx.isComment(line)) continue;
|
|
144
|
+
const trimmedLine = line.trim();
|
|
145
|
+
if (trimmedLine === "") continue;
|
|
146
|
+
const fields = splitMinFields(trimmedLine, ";", 14);
|
|
147
|
+
if (!fields) continue;
|
|
148
|
+
const codePoint = fields[0]?.trim() ?? "";
|
|
149
|
+
const characterName = fields[1]?.trim() ?? "";
|
|
150
|
+
const generalCategory = fields[2]?.trim() ?? "";
|
|
151
|
+
if (characterName.endsWith(", First>")) {
|
|
152
|
+
rangeStart = codePoint;
|
|
153
|
+
rangeName = characterName.replace(", First>", "").replace("<", "");
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (characterName.endsWith(", Last>") && rangeStart !== null) {
|
|
157
|
+
const row = {
|
|
158
|
+
sourceFile: ctx.file.path,
|
|
159
|
+
kind: "range",
|
|
160
|
+
start: rangeStart,
|
|
161
|
+
end: codePoint,
|
|
162
|
+
value: generalCategory,
|
|
163
|
+
meta: {
|
|
164
|
+
characterName: rangeName || "",
|
|
165
|
+
generalCategory,
|
|
166
|
+
canonicalCombiningClass: fields[3]?.trim() ?? "",
|
|
167
|
+
bidiClass: fields[4]?.trim() ?? "",
|
|
168
|
+
decompositionMapping: fields[5]?.trim() ?? "",
|
|
169
|
+
numericType: fields[6]?.trim() ?? "",
|
|
170
|
+
numericValue: fields[7]?.trim() ?? "",
|
|
171
|
+
bidiMirrored: fields[9]?.trim() ?? "",
|
|
172
|
+
unicode1Name: fields[10]?.trim() ?? "",
|
|
173
|
+
isoComment: fields[11]?.trim() ?? "",
|
|
174
|
+
simpleUppercaseMapping: fields[12]?.trim() ?? "",
|
|
175
|
+
simpleLowercaseMapping: fields[13]?.trim() ?? "",
|
|
176
|
+
simpleTitlecaseMapping: fields[14]?.trim() ?? ""
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
rangeStart = null;
|
|
180
|
+
rangeName = null;
|
|
181
|
+
yield row;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
yield {
|
|
185
|
+
sourceFile: ctx.file.path,
|
|
186
|
+
kind: "point",
|
|
187
|
+
codePoint,
|
|
188
|
+
value: generalCategory,
|
|
189
|
+
meta: {
|
|
190
|
+
characterName,
|
|
191
|
+
generalCategory,
|
|
192
|
+
canonicalCombiningClass: fields[3]?.trim() ?? "",
|
|
193
|
+
bidiClass: fields[4]?.trim() ?? "",
|
|
194
|
+
decompositionMapping: fields[5]?.trim() ?? "",
|
|
195
|
+
numericType: fields[6]?.trim() ?? "",
|
|
196
|
+
numericValue: fields[7]?.trim() ?? "",
|
|
197
|
+
bidiMirrored: fields[9]?.trim() ?? "",
|
|
198
|
+
unicode1Name: fields[10]?.trim() ?? "",
|
|
199
|
+
isoComment: fields[11]?.trim() ?? "",
|
|
200
|
+
simpleUppercaseMapping: fields[12]?.trim() ?? "",
|
|
201
|
+
simpleLowercaseMapping: fields[13]?.trim() ?? "",
|
|
202
|
+
simpleTitlecaseMapping: fields[14]?.trim() ?? ""
|
|
203
|
+
}
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
|
|
208
|
+
//#endregion
|
|
209
|
+
//#region src/resolvers/property-json.ts
|
|
210
|
+
function rowToResolvedEntry$1(row) {
|
|
211
|
+
const value = row.value;
|
|
212
|
+
if (value === void 0) return null;
|
|
213
|
+
if (row.kind === "point" && row.codePoint) return {
|
|
214
|
+
codePoint: row.codePoint,
|
|
215
|
+
value
|
|
216
|
+
};
|
|
217
|
+
if (row.kind === "range" && row.start && row.end) return {
|
|
218
|
+
range: `${row.start}..${row.end}`,
|
|
219
|
+
value
|
|
220
|
+
};
|
|
221
|
+
if (row.kind === "sequence" && row.sequence) return {
|
|
222
|
+
sequence: row.sequence,
|
|
223
|
+
value
|
|
224
|
+
};
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
function createPropertyJsonResolver(options = {}) {
|
|
228
|
+
return async function propertyJsonResolver(ctx, rows) {
|
|
229
|
+
const entries = [];
|
|
230
|
+
for await (const row of rows) {
|
|
231
|
+
const entry = rowToResolvedEntry$1(row);
|
|
232
|
+
if (entry) entries.push(entry);
|
|
233
|
+
}
|
|
234
|
+
const propertyName = options.property || ctx.file.name.replace(/\.txt$/, "");
|
|
235
|
+
return [{
|
|
236
|
+
version: ctx.version,
|
|
237
|
+
property: propertyName,
|
|
238
|
+
file: ctx.file.name,
|
|
239
|
+
entries: ctx.normalizeEntries(entries)
|
|
240
|
+
}];
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
const propertyJsonResolver = createPropertyJsonResolver();
|
|
244
|
+
|
|
245
|
+
//#endregion
|
|
246
|
+
//#region src/resolvers/grouped.ts
|
|
247
|
+
function rowToResolvedEntry(row) {
|
|
248
|
+
const value = row.value;
|
|
249
|
+
if (value === void 0) return null;
|
|
250
|
+
if (row.kind === "point" && row.codePoint) return {
|
|
251
|
+
codePoint: row.codePoint,
|
|
252
|
+
value
|
|
253
|
+
};
|
|
254
|
+
if (row.kind === "range" && row.start && row.end) return {
|
|
255
|
+
range: `${row.start}..${row.end}`,
|
|
256
|
+
value
|
|
257
|
+
};
|
|
258
|
+
if (row.kind === "sequence" && row.sequence) return {
|
|
259
|
+
sequence: row.sequence,
|
|
260
|
+
value
|
|
261
|
+
};
|
|
262
|
+
return null;
|
|
263
|
+
}
|
|
264
|
+
function createGroupedResolver(options) {
|
|
265
|
+
const { groupBy, propertyNameFn } = options;
|
|
266
|
+
const getGroupKey = typeof groupBy === "function" ? groupBy : groupBy === "property" ? (row) => row.property || "unknown" : (row) => {
|
|
267
|
+
const v = row.value;
|
|
268
|
+
return Array.isArray(v) ? v.join(",") : v || "unknown";
|
|
269
|
+
};
|
|
270
|
+
return async function groupedResolver(ctx, rows) {
|
|
271
|
+
const groups = /* @__PURE__ */ new Map();
|
|
272
|
+
for await (const row of rows) {
|
|
273
|
+
const key = getGroupKey(row);
|
|
274
|
+
const entry = rowToResolvedEntry(row);
|
|
275
|
+
if (entry) {
|
|
276
|
+
const existing = groups.get(key) || [];
|
|
277
|
+
existing.push(entry);
|
|
278
|
+
groups.set(key, existing);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
const results = [];
|
|
282
|
+
for (const [key, entries] of groups) {
|
|
283
|
+
const propertyName = propertyNameFn ? propertyNameFn(key, ctx) : key;
|
|
284
|
+
results.push({
|
|
285
|
+
version: ctx.version,
|
|
286
|
+
property: propertyName,
|
|
287
|
+
file: ctx.file.name,
|
|
288
|
+
entries: ctx.normalizeEntries(entries)
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
return results;
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
//#endregion
|
|
296
|
+
//#region src/routes/common.ts
|
|
297
|
+
const lineBreakRoute = definePipelineRoute({
|
|
298
|
+
id: "line-break",
|
|
299
|
+
filter: byName("LineBreak.txt"),
|
|
300
|
+
parser: standardParser,
|
|
301
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
302
|
+
resolver: propertyJsonResolver
|
|
303
|
+
});
|
|
304
|
+
const scriptsRoute = definePipelineRoute({
|
|
305
|
+
id: "scripts",
|
|
306
|
+
filter: byName("Scripts.txt"),
|
|
307
|
+
parser: standardParser,
|
|
308
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
309
|
+
resolver: propertyJsonResolver
|
|
310
|
+
});
|
|
311
|
+
const blocksRoute = definePipelineRoute({
|
|
312
|
+
id: "blocks",
|
|
313
|
+
filter: byName("Blocks.txt"),
|
|
314
|
+
parser: standardParser,
|
|
315
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
316
|
+
resolver: propertyJsonResolver
|
|
317
|
+
});
|
|
318
|
+
const generalCategoryRoute = definePipelineRoute({
|
|
319
|
+
id: "general-category",
|
|
320
|
+
filter: byName("extracted/DerivedGeneralCategory.txt"),
|
|
321
|
+
parser: standardParser,
|
|
322
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
323
|
+
resolver: propertyJsonResolver
|
|
324
|
+
});
|
|
325
|
+
const propListRoute = definePipelineRoute({
|
|
326
|
+
id: "prop-list",
|
|
327
|
+
filter: byName("PropList.txt"),
|
|
328
|
+
parser: standardParser,
|
|
329
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
330
|
+
resolver: createGroupedResolver({
|
|
331
|
+
groupBy: "value",
|
|
332
|
+
propertyNameFn: (value) => value
|
|
333
|
+
})
|
|
334
|
+
});
|
|
335
|
+
const derivedCorePropertiesRoute = definePipelineRoute({
|
|
336
|
+
id: "derived-core-properties",
|
|
337
|
+
filter: byName("DerivedCoreProperties.txt"),
|
|
338
|
+
parser: standardParser,
|
|
339
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
340
|
+
resolver: createGroupedResolver({
|
|
341
|
+
groupBy: "value",
|
|
342
|
+
propertyNameFn: (value) => value
|
|
343
|
+
})
|
|
344
|
+
});
|
|
345
|
+
const emojiDataRoute = definePipelineRoute({
|
|
346
|
+
id: "emoji-data",
|
|
347
|
+
filter: byGlob("emoji/emoji-data.txt"),
|
|
348
|
+
parser: standardParser,
|
|
349
|
+
transforms: [normalizeCodePoints, sortByCodePoint],
|
|
350
|
+
resolver: createGroupedResolver({
|
|
351
|
+
groupBy: "value",
|
|
352
|
+
propertyNameFn: (value) => `Emoji_${value}`
|
|
353
|
+
})
|
|
354
|
+
});
|
|
355
|
+
const unicodeDataRoute = definePipelineRoute({
|
|
356
|
+
id: "unicode-data",
|
|
357
|
+
filter: byName("UnicodeData.txt"),
|
|
358
|
+
parser: unicodeDataParser,
|
|
359
|
+
transforms: [normalizeCodePoints],
|
|
360
|
+
resolver: propertyJsonResolver
|
|
361
|
+
});
|
|
362
|
+
const coreRoutes = [
|
|
363
|
+
lineBreakRoute,
|
|
364
|
+
scriptsRoute,
|
|
365
|
+
blocksRoute,
|
|
366
|
+
generalCategoryRoute,
|
|
367
|
+
propListRoute,
|
|
368
|
+
derivedCorePropertiesRoute,
|
|
369
|
+
unicodeDataRoute
|
|
370
|
+
];
|
|
371
|
+
const emojiRoutes = [emojiDataRoute];
|
|
372
|
+
const allRoutes = [...coreRoutes, ...emojiRoutes];
|
|
373
|
+
|
|
374
|
+
//#endregion
|
|
375
|
+
//#region src/pipelines/basic.ts
|
|
376
|
+
function createBasicPipeline(options) {
|
|
377
|
+
const { id = "basic-ucd", versions, concurrency = 4, strict = false } = options;
|
|
378
|
+
const resolver = createPropertyJsonResolver();
|
|
379
|
+
return definePipeline({
|
|
380
|
+
id,
|
|
381
|
+
name: "Basic UCD Pipeline",
|
|
382
|
+
description: "Processes core Unicode Character Database files",
|
|
383
|
+
versions,
|
|
384
|
+
inputs: [],
|
|
385
|
+
routes: [...coreRoutes],
|
|
386
|
+
include: byExt(".txt"),
|
|
387
|
+
concurrency,
|
|
388
|
+
strict,
|
|
389
|
+
fallback: {
|
|
390
|
+
parser: standardParser,
|
|
391
|
+
resolver
|
|
392
|
+
}
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
//#endregion
|
|
397
|
+
//#region src/pipelines/emoji.ts
|
|
398
|
+
function createEmojiPipeline(options) {
|
|
399
|
+
const { id = "emoji", versions, concurrency = 4, strict = false } = options;
|
|
400
|
+
const resolver = createPropertyJsonResolver();
|
|
401
|
+
return definePipeline({
|
|
402
|
+
id,
|
|
403
|
+
name: "Emoji Pipeline",
|
|
404
|
+
description: "Processes Unicode emoji data files",
|
|
405
|
+
versions,
|
|
406
|
+
inputs: [],
|
|
407
|
+
routes: [...emojiRoutes],
|
|
408
|
+
include: and(byDir("emoji"), byExt(".txt")),
|
|
409
|
+
concurrency,
|
|
410
|
+
strict,
|
|
411
|
+
fallback: {
|
|
412
|
+
parser: sequenceParser,
|
|
413
|
+
resolver
|
|
414
|
+
}
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
//#endregion
|
|
419
|
+
//#region src/pipelines/full.ts
|
|
420
|
+
function createFullPipeline(options) {
|
|
421
|
+
const { id = "full-ucd", versions, concurrency = 4, strict = false } = options;
|
|
422
|
+
const resolver = createPropertyJsonResolver();
|
|
423
|
+
return definePipeline({
|
|
424
|
+
id,
|
|
425
|
+
name: "Full UCD Pipeline",
|
|
426
|
+
description: "Processes all Unicode Character Database files",
|
|
427
|
+
versions,
|
|
428
|
+
inputs: [],
|
|
429
|
+
routes: [...allRoutes],
|
|
430
|
+
include: byExt(".txt"),
|
|
431
|
+
concurrency,
|
|
432
|
+
strict,
|
|
433
|
+
fallback: {
|
|
434
|
+
parser: standardParser,
|
|
435
|
+
resolver
|
|
436
|
+
}
|
|
437
|
+
});
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
//#endregion
|
|
441
|
+
//#region src/transforms/filter.ts
|
|
442
|
+
function createRowFilter(options) {
|
|
443
|
+
return definePipelineTransform({
|
|
444
|
+
id: "row-filter",
|
|
445
|
+
async *fn(ctx, rows) {
|
|
446
|
+
for await (const row of rows) {
|
|
447
|
+
if (options.property) {
|
|
448
|
+
if (!row.property) continue;
|
|
449
|
+
if (typeof options.property === "string") {
|
|
450
|
+
if (row.property !== options.property) continue;
|
|
451
|
+
} else if (!options.property.test(row.property)) continue;
|
|
452
|
+
}
|
|
453
|
+
if (options.value) {
|
|
454
|
+
const rowValue = Array.isArray(row.value) ? row.value.join(",") : row.value;
|
|
455
|
+
if (!rowValue) continue;
|
|
456
|
+
if (typeof options.value === "string") {
|
|
457
|
+
if (rowValue !== options.value) continue;
|
|
458
|
+
} else if (!options.value.test(rowValue)) continue;
|
|
459
|
+
}
|
|
460
|
+
if (options.kind) {
|
|
461
|
+
if (!(Array.isArray(options.kind) ? options.kind : [options.kind]).includes(row.kind)) continue;
|
|
462
|
+
}
|
|
463
|
+
yield row;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
function createFilterByPipelineFilter(filter) {
|
|
469
|
+
return definePipelineTransform({
|
|
470
|
+
id: "filter-by-pipeline-filter",
|
|
471
|
+
async *fn(ctx, rows) {
|
|
472
|
+
for await (const row of rows) if (filter({
|
|
473
|
+
file: ctx.file,
|
|
474
|
+
row: row.property ? { property: row.property } : void 0
|
|
475
|
+
})) yield row;
|
|
476
|
+
}
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
//#endregion
|
|
481
|
+
export { allRoutes, blocksRoute, coreRoutes, createBasicPipeline, createEmojiPipeline, createFilterByPipelineFilter, createFullPipeline, createGroupedResolver, createMultiPropertyParser, createPropertyJsonResolver, createRowFilter, createSequenceParser, createStandardParser, derivedCorePropertiesRoute, emojiDataRoute, emojiRoutes, generalCategoryRoute, lineBreakRoute, multiPropertyParser, propListRoute, propertyJsonResolver, scriptsRoute, sequenceParser, standardParser, unicodeDataParser, unicodeDataRoute };
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@ucdjs/pipelines-presets",
|
|
3
|
+
"version": "0.0.1-beta.1",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Lucas Nørgård",
|
|
7
|
+
"email": "lucasnrgaard@gmail.com",
|
|
8
|
+
"url": "https://luxass.dev"
|
|
9
|
+
},
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"homepage": "https://github.com/ucdjs/ucd",
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "git+https://github.com/ucdjs/ucd.git",
|
|
15
|
+
"directory": "packages/pipelines/pipeline-presets"
|
|
16
|
+
},
|
|
17
|
+
"bugs": {
|
|
18
|
+
"url": "https://github.com/ucdjs/ucd/issues"
|
|
19
|
+
},
|
|
20
|
+
"exports": {
|
|
21
|
+
".": "./dist/index.mjs",
|
|
22
|
+
"./package.json": "./package.json"
|
|
23
|
+
},
|
|
24
|
+
"types": "./dist/index.d.mts",
|
|
25
|
+
"files": [
|
|
26
|
+
"dist"
|
|
27
|
+
],
|
|
28
|
+
"engines": {
|
|
29
|
+
"node": ">=22.18"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"zod": "4.3.6",
|
|
33
|
+
"@ucdjs/pipelines-core": "0.0.1-beta.1"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@luxass/eslint-config": "7.2.0",
|
|
37
|
+
"eslint": "10.0.0",
|
|
38
|
+
"publint": "0.3.17",
|
|
39
|
+
"tsdown": "0.20.3",
|
|
40
|
+
"typescript": "5.9.3",
|
|
41
|
+
"@ucdjs-tooling/tsdown-config": "1.0.0",
|
|
42
|
+
"@ucdjs-tooling/tsconfig": "1.0.0"
|
|
43
|
+
},
|
|
44
|
+
"publishConfig": {
|
|
45
|
+
"access": "public"
|
|
46
|
+
},
|
|
47
|
+
"scripts": {
|
|
48
|
+
"build": "tsdown --tsconfig=./tsconfig.build.json",
|
|
49
|
+
"dev": "tsdown --watch",
|
|
50
|
+
"clean": "git clean -xdf dist node_modules",
|
|
51
|
+
"lint": "eslint .",
|
|
52
|
+
"typecheck": "tsc --noEmit -p tsconfig.build.json"
|
|
53
|
+
}
|
|
54
|
+
}
|