@unicode-utils/core 0.12.0-beta.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +41 -0
- package/dist/chunk-Bp6m_JJh.js +13 -0
- package/dist/constants-BclbCHPC.d.ts +2 -0
- package/dist/constants-DygWVxzp.js +3 -0
- package/dist/constants.d.ts +2 -0
- package/dist/constants.js +3 -0
- package/dist/datafile-CWbGVIAa.js +877 -0
- package/dist/datafile-UqVC4xXw.d.ts +495 -0
- package/dist/datafile.d.ts +3 -0
- package/dist/datafile.js +4 -0
- package/dist/index.d.ts +153 -0
- package/dist/index.js +216 -0
- package/dist/line-helpers-CYDQ0FnQ.js +373 -0
- package/dist/line-helpers-DGsVuiW2.d.ts +305 -0
- package/dist/line-helpers.d.ts +2 -0
- package/dist/line-helpers.js +3 -0
- package/package.json +65 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
import { t as __export } from "./chunk-Bp6m_JJh.js";
|
|
2
|
+
import { t as BoundaryStyle } from "./line-helpers-DGsVuiW2.js";
|
|
3
|
+
|
|
4
|
+
//#region src/datafile/ast.d.ts
|
|
5
|
+
declare const NodeTypes: {
|
|
6
|
+
readonly ROOT: "root";
|
|
7
|
+
readonly COMMENT: "comment";
|
|
8
|
+
readonly EMPTY_COMMENT: "empty-comment";
|
|
9
|
+
readonly BOUNDARY: "boundary";
|
|
10
|
+
readonly DATA: "data";
|
|
11
|
+
readonly EMPTY: "empty";
|
|
12
|
+
readonly EOF: "eof";
|
|
13
|
+
readonly PROPERTY: "property";
|
|
14
|
+
readonly UNKNOWN: "unknown";
|
|
15
|
+
};
|
|
16
|
+
type NodeType = typeof NodeTypes[keyof typeof NodeTypes];
|
|
17
|
+
interface BaseNode {
|
|
18
|
+
type: NodeType;
|
|
19
|
+
value: string;
|
|
20
|
+
raw: string;
|
|
21
|
+
line: number;
|
|
22
|
+
}
|
|
23
|
+
interface RootNode extends BaseNode {
|
|
24
|
+
type: "root";
|
|
25
|
+
children: ChildNode[];
|
|
26
|
+
fileName?: string;
|
|
27
|
+
version?: string;
|
|
28
|
+
}
|
|
29
|
+
interface PropertyNode extends BaseNode {
|
|
30
|
+
type: "property";
|
|
31
|
+
propertyValue: string | undefined;
|
|
32
|
+
}
|
|
33
|
+
interface CommentNode extends BaseNode {
|
|
34
|
+
type: "comment";
|
|
35
|
+
}
|
|
36
|
+
interface EmptyCommentNode extends BaseNode {
|
|
37
|
+
type: "empty-comment";
|
|
38
|
+
}
|
|
39
|
+
interface BoundaryNode extends BaseNode {
|
|
40
|
+
type: "boundary";
|
|
41
|
+
style: BoundaryStyle;
|
|
42
|
+
}
|
|
43
|
+
interface DataNode extends BaseNode {
|
|
44
|
+
type: "data";
|
|
45
|
+
}
|
|
46
|
+
interface EmptyNode extends BaseNode {
|
|
47
|
+
type: "empty";
|
|
48
|
+
}
|
|
49
|
+
interface EOFNode extends BaseNode {
|
|
50
|
+
type: "eof";
|
|
51
|
+
}
|
|
52
|
+
interface UnknownNode extends BaseNode {
|
|
53
|
+
type: "unknown";
|
|
54
|
+
[key: string]: unknown;
|
|
55
|
+
}
|
|
56
|
+
type ChildNode = CommentNode | EmptyCommentNode | BoundaryNode | DataNode | EmptyNode | EOFNode | PropertyNode | UnknownNode;
|
|
57
|
+
type Node = RootNode | ChildNode;
|
|
58
|
+
declare namespace ast_utils_d_exports {
|
|
59
|
+
export { VisitCallback, VisitContext, allNodesAreOfType, endsWithSequence, findNodePattern, hasBoundaryWithinRange, hasConsecutiveNodesOfType, hasMinNodesOfType, hasNextNComments, hasNextNCommentsFrom, hasNodePattern, hasPrevNCommentsFrom, isCommentOnlyDocument, startsWithSequence, visit };
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Checks if the next N nodes from a given index are all comment nodes
|
|
63
|
+
* @param {RootNode} root - The root node containing children
|
|
64
|
+
* @param {number} startIndex - The starting index to check from
|
|
65
|
+
* @param {number} count - Number of nodes to check
|
|
66
|
+
* @returns {boolean} true if the next N nodes are all comment nodes, false otherwise
|
|
67
|
+
*/
|
|
68
|
+
declare function hasNextNCommentsFrom(root: RootNode, startIndex: number, count: number): boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Checks if the next N nodes from the current node are all comment nodes
|
|
71
|
+
* @param {RootNode} root - The root node containing children
|
|
72
|
+
* @param {ChildNode} currentNode - The current node to find in the children array
|
|
73
|
+
* @param {number} count - Number of nodes to check after the current node
|
|
74
|
+
* @returns {boolean} true if the next N nodes are all comment nodes, false otherwise
|
|
75
|
+
*/
|
|
76
|
+
declare function hasNextNComments(root: RootNode, currentNode: ChildNode, count: number): boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Checks if the previous N nodes from a given index are all comment nodes
|
|
79
|
+
* @param {RootNode} root - The root node containing children
|
|
80
|
+
* @param {number} startIndex - The starting index to check backwards from
|
|
81
|
+
* @param {number} count - Number of nodes to check backwards
|
|
82
|
+
* @returns {boolean} true if the previous N nodes are all comment nodes, false otherwise
|
|
83
|
+
*/
|
|
84
|
+
declare function hasPrevNCommentsFrom(root: RootNode, startIndex: number, count: number): boolean;
|
|
85
|
+
/**
|
|
86
|
+
* Checks if there are N consecutive nodes of a specific type starting from an index
|
|
87
|
+
* @param {RootNode} root - The root node containing children
|
|
88
|
+
* @param {number} startIndex - The starting index to check from
|
|
89
|
+
* @param {number} count - Number of consecutive nodes to check
|
|
90
|
+
* @param {ChildNode["type"]} nodeType - The type of node to check for ('comment', 'data', 'boundary', 'empty', 'unknown')
|
|
91
|
+
* @returns {boolean} true if there are N consecutive nodes of the specified type, false otherwise
|
|
92
|
+
*/
|
|
93
|
+
declare function hasConsecutiveNodesOfType(root: RootNode, startIndex: number, count: number, nodeType: ChildNode["type"]): boolean;
|
|
94
|
+
/**
|
|
95
|
+
* Checks if the root contains a specific pattern of node types
|
|
96
|
+
* @param {RootNode} root - The root node containing children
|
|
97
|
+
* @param {ChildNode["type"][]} pattern - Array of node types that should appear consecutively
|
|
98
|
+
* @param {number} [startIndex] - Optional starting index to check from
|
|
99
|
+
* @returns {boolean} true if the pattern is found, false otherwise
|
|
100
|
+
*/
|
|
101
|
+
declare function hasNodePattern(root: RootNode, pattern: ChildNode["type"][], startIndex?: number): boolean;
|
|
102
|
+
/**
|
|
103
|
+
* Finds the first occurrence of a node pattern in the root's children
|
|
104
|
+
* @param {RootNode} root - The root node containing children
|
|
105
|
+
* @param {ChildNode["type"][]} pattern - Array of node types to search for
|
|
106
|
+
* @returns {number} The index of the first occurrence, or -1 if not found
|
|
107
|
+
*/
|
|
108
|
+
declare function findNodePattern(root: RootNode, pattern: ChildNode["type"][]): number;
|
|
109
|
+
/**
|
|
110
|
+
* Checks if the root starts with a specific sequence of node types
|
|
111
|
+
* @param {RootNode} root - The root node containing children
|
|
112
|
+
* @param {ChildNode["type"][]} sequence - Array of node types that should appear at the beginning
|
|
113
|
+
* @returns {boolean} true if the root starts with the sequence, false otherwise
|
|
114
|
+
*/
|
|
115
|
+
declare function startsWithSequence(root: RootNode, sequence: ChildNode["type"][]): boolean;
|
|
116
|
+
/**
|
|
117
|
+
* Checks if the root ends with a specific sequence of node types
|
|
118
|
+
* @param {RootNode} root - The root node containing children
|
|
119
|
+
* @param {ChildNode["type"][]} sequence - Array of node types that should appear at the end
|
|
120
|
+
* @returns {boolean} true if the root ends with the sequence, false otherwise
|
|
121
|
+
*/
|
|
122
|
+
declare function endsWithSequence(root: RootNode, sequence: ChildNode["type"][]): boolean;
|
|
123
|
+
/**
|
|
124
|
+
* Checks if there are at least N nodes of a specific type in the root
|
|
125
|
+
* @param {RootNode} root - The root node containing children
|
|
126
|
+
* @param {ChildNode["type"]} nodeType - The type of node to count
|
|
127
|
+
* @param {number} minCount - Minimum number of nodes required
|
|
128
|
+
* @returns {boolean} true if there are at least minCount nodes of the specified type
|
|
129
|
+
*/
|
|
130
|
+
declare function hasMinNodesOfType(root: RootNode, nodeType: ChildNode["type"], minCount: number): boolean;
|
|
131
|
+
/**
|
|
132
|
+
* Checks if all nodes in the root are of a specific type
|
|
133
|
+
* @param {RootNode} root - The root node containing children
|
|
134
|
+
* @param {ChildNode["type"]} nodeType - The type of node to check for
|
|
135
|
+
* @returns {boolean} true if all nodes are of the specified type, false otherwise
|
|
136
|
+
*/
|
|
137
|
+
declare function allNodesAreOfType(root: RootNode, nodeType: ChildNode["type"]): boolean;
|
|
138
|
+
/**
|
|
139
|
+
* Checks if the root contains only comment and empty nodes
|
|
140
|
+
* @param {RootNode} root - The root node containing children
|
|
141
|
+
* @returns {boolean} true if the root contains only comments and empty nodes
|
|
142
|
+
*/
|
|
143
|
+
declare function isCommentOnlyDocument(root: RootNode): boolean;
|
|
144
|
+
/**
|
|
145
|
+
* Checks if there's a boundary node within the next N nodes
|
|
146
|
+
* @param {RootNode} root - The root node containing children
|
|
147
|
+
* @param {number} startIndex - The starting index to check from
|
|
148
|
+
* @param {number} lookAhead - Number of nodes to look ahead
|
|
149
|
+
* @returns {boolean} true if a boundary node is found within the range
|
|
150
|
+
*/
|
|
151
|
+
declare function hasBoundaryWithinRange(root: RootNode, startIndex: number, lookAhead: number): boolean;
|
|
152
|
+
interface VisitContext {
|
|
153
|
+
settings: any;
|
|
154
|
+
currentNode: ChildNode;
|
|
155
|
+
nextNode?: ChildNode;
|
|
156
|
+
prevNode?: ChildNode;
|
|
157
|
+
}
|
|
158
|
+
type VisitCallback = (ctx: VisitContext) => void;
|
|
159
|
+
declare function visit(root: RootNode, callback: VisitCallback): void;
|
|
160
|
+
//#endregion
|
|
161
|
+
//#region src/datafile/sections.d.ts
|
|
162
|
+
interface UCDSectionWithLines {
|
|
163
|
+
description: string;
|
|
164
|
+
lines: string[];
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Determines whether the given Unicode data file content contains sections.
|
|
168
|
+
*
|
|
169
|
+
* Sections in Unicode data files are typically delimited by special comment
|
|
170
|
+
* patterns and contain related data grouped together.
|
|
171
|
+
*
|
|
172
|
+
* @param {string} content - The Unicode data file content to check
|
|
173
|
+
* @returns {boolean} True if the content contains sections, false otherwise
|
|
174
|
+
*
|
|
175
|
+
* @example
|
|
176
|
+
* ```ts
|
|
177
|
+
* const fileContent = "# Section 1\ndata1\n\n# Section 2\ndata2";
|
|
178
|
+
* const hasFileSections = hasSections(fileContent); // true
|
|
179
|
+
* ```
|
|
180
|
+
*/
|
|
181
|
+
declare function hasSections(content: string): boolean;
|
|
182
|
+
/**
|
|
183
|
+
* Parses Unicode data file content into sections.
|
|
184
|
+
*
|
|
185
|
+
* This function divides the file content into logical sections based on comment blocks
|
|
186
|
+
* followed by data lines. Each section consists of a name (the first comment line),
|
|
187
|
+
* a description (subsequent comment lines), and associated data lines.
|
|
188
|
+
*
|
|
189
|
+
* The function handles various formatting patterns found in Unicode data files,
|
|
190
|
+
* including handling of empty lines, consecutive comments, and section boundaries.
|
|
191
|
+
*
|
|
192
|
+
* @param {string} content - The Unicode data file content to parse
|
|
193
|
+
* @returns {Map<string, UCDSectionWithLines>} A map where keys are section names and
|
|
194
|
+
* values are objects containing the
|
|
195
|
+
* section description and associated data lines
|
|
196
|
+
*
|
|
197
|
+
* @example
|
|
198
|
+
* ```ts
|
|
199
|
+
* const content = `# Section 1
|
|
200
|
+
* # Description of section 1
|
|
201
|
+
* data1
|
|
202
|
+
* data2
|
|
203
|
+
*
|
|
204
|
+
* # Section 2
|
|
205
|
+
* # Description of section 2
|
|
206
|
+
* data3
|
|
207
|
+
* data4`;
|
|
208
|
+
*
|
|
209
|
+
* const sections = parseSections(content);
|
|
210
|
+
* // sections will contain two entries:
|
|
211
|
+
* // "Section 1" -> { description: "Description of section 1", lines: ["data1", "data2"] }
|
|
212
|
+
* // "Section 2" -> { description: "Description of section 2", lines: ["data3", "data4"] }
|
|
213
|
+
* ```
|
|
214
|
+
*/
|
|
215
|
+
declare function parseSections(content: string): Map<string, UCDSectionWithLines>;
|
|
216
|
+
//#endregion
|
|
217
|
+
//#region src/datafile/model.d.ts
|
|
218
|
+
/**
|
|
219
|
+
* Represents a raw Unicode data file with methods to access its content.
|
|
220
|
+
*
|
|
221
|
+
* This class parses and provides access to various components of Unicode data files,
|
|
222
|
+
* including the raw content, individual lines, file metadata (like heading, version),
|
|
223
|
+
* and determines if the file has an EOF marker.
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```ts
|
|
227
|
+
* // Create a RawDataFile from a string content
|
|
228
|
+
* const content = "# UnicodeData-14.0.0.txt\n# Some Unicode data\n\nU+0020;SPACE\n# EOF";
|
|
229
|
+
* const dataFile = new RawDataFile(content);
|
|
230
|
+
*
|
|
231
|
+
* // Access file properties
|
|
232
|
+
* console.log(dataFile.fileName); // "UnicodeData"
|
|
233
|
+
* console.log(dataFile.version); // "14.0.0"
|
|
234
|
+
* console.log(dataFile.hasEOF); // true
|
|
235
|
+
* console.log(dataFile.heading); // "# UnicodeData-14.0.0.txt\n# Some Unicode data"
|
|
236
|
+
* ```
|
|
237
|
+
*/
|
|
238
|
+
declare class RawDataFile {
|
|
239
|
+
/** The content includes everything */
|
|
240
|
+
readonly rawContent: string;
|
|
241
|
+
/**
|
|
242
|
+
* The content without the heading section.
|
|
243
|
+
*
|
|
244
|
+
* NOTE:
|
|
245
|
+
* If we couldn't find a heading, this will be the same as `rawContent`.
|
|
246
|
+
*/
|
|
247
|
+
readonly content: string;
|
|
248
|
+
/** The lines of the content, will not include the heading */
|
|
249
|
+
readonly lines: string[];
|
|
250
|
+
readonly heading: string | null;
|
|
251
|
+
/**
|
|
252
|
+
* The AST representation of the data file.
|
|
253
|
+
* This is typically used for further processing or analysis of the file structure.
|
|
254
|
+
* If the file is not parsed into an AST, this will be undefined.
|
|
255
|
+
*/
|
|
256
|
+
readonly ast: RootNode | undefined;
|
|
257
|
+
readonly sections: Map<string, UCDSectionWithLines>;
|
|
258
|
+
/**
|
|
259
|
+
* The name of the file, if available.
|
|
260
|
+
* This is typically extracted from the first line of the file.
|
|
261
|
+
* It may not always be present, especially if the file is empty or malformed.
|
|
262
|
+
*/
|
|
263
|
+
readonly fileName: string | undefined;
|
|
264
|
+
/**
|
|
265
|
+
* The version of the file, if available.
|
|
266
|
+
* This is typically extracted from the first line of the file.
|
|
267
|
+
*/
|
|
268
|
+
readonly version: string | undefined;
|
|
269
|
+
/**
|
|
270
|
+
* Indicates if the file has an EOF marker.
|
|
271
|
+
* This is typically used to indicate the end of the file in Unicode data files.
|
|
272
|
+
*/
|
|
273
|
+
readonly hasEOF: boolean;
|
|
274
|
+
constructor(content: string, fileName?: string);
|
|
275
|
+
}
|
|
276
|
+
//#endregion
|
|
277
|
+
//#region src/datafile/typeguards.d.ts
|
|
278
|
+
/**
|
|
279
|
+
* Type guard function that checks if an unknown value is a Node.
|
|
280
|
+
* A Node must be an object with 'type', 'value', 'raw', and 'line' properties of the correct types.
|
|
281
|
+
*
|
|
282
|
+
* @param {unknown} node - The unknown value to check
|
|
283
|
+
* @returns {node is Node} True if the node is a valid Node, false otherwise
|
|
284
|
+
*
|
|
285
|
+
* @example
|
|
286
|
+
* ```typescript
|
|
287
|
+
* import { parseDataFile } from './parser';
|
|
288
|
+
*
|
|
289
|
+
* const parsedData = parseDataFile('# Comment\n0000..007F; Basic Latin');
|
|
290
|
+
* const firstChild = parsedData.children[0];
|
|
291
|
+
*
|
|
292
|
+
* if (isNode(firstChild)) {
|
|
293
|
+
* console.log(`Node type: ${firstChild.type}`);
|
|
294
|
+
* console.log(`Raw content: ${firstChild.raw}`);
|
|
295
|
+
* console.log(`Line number: ${firstChild.line}`);
|
|
296
|
+
* }
|
|
297
|
+
* ```
|
|
298
|
+
*/
|
|
299
|
+
declare function isNode(node: unknown): node is Node;
|
|
300
|
+
/**
|
|
301
|
+
* Type guard function that checks if an unknown value is a CommentNode.
|
|
302
|
+
* A CommentNode must be a valid Node with the type property set to "comment".
|
|
303
|
+
*
|
|
304
|
+
* @param {unknown} node - The unknown value to check
|
|
305
|
+
* @returns {node is CommentNode} True if the node is a valid CommentNode, false otherwise
|
|
306
|
+
*
|
|
307
|
+
* @example
|
|
308
|
+
* ```typescript
|
|
309
|
+
* import { parseDataFile } from './parser';
|
|
310
|
+
*
|
|
311
|
+
* const parsedData = parseDataFile('# This is a comment\n0000; NULL');
|
|
312
|
+
* const commentNode = parsedData.children[0];
|
|
313
|
+
*
|
|
314
|
+
* if (isCommentNode(commentNode)) {
|
|
315
|
+
* console.log(`Comment content: ${commentNode.value}`); // "This is a comment"
|
|
316
|
+
* console.log(`Raw line: ${commentNode.raw}`); // "# This is a comment"
|
|
317
|
+
* }
|
|
318
|
+
* ```
|
|
319
|
+
*/
|
|
320
|
+
declare function isCommentNode(node: unknown): node is CommentNode;
|
|
321
|
+
/**
|
|
322
|
+
* Type guard function that checks if an unknown value is an EmptyCommentNode.
|
|
323
|
+
* An EmptyCommentNode must be a valid Node with the type property set to "empty-comment".
|
|
324
|
+
*
|
|
325
|
+
* @param {unknown} node - The unknown value to check
|
|
326
|
+
* @returns {node is EmptyCommentNode} True if the node is a valid EmptyCommentNode, false otherwise
|
|
327
|
+
*
|
|
328
|
+
* @example
|
|
329
|
+
* ```typescript
|
|
330
|
+
* import { parseDataFile } from './parser';
|
|
331
|
+
*
|
|
332
|
+
* const parsedData = parseDataFile('#\n0000; NULL');
|
|
333
|
+
* const emptyCommentNode = parsedData.children[0];
|
|
334
|
+
*
|
|
335
|
+
* if (isEmptyCommentNode(emptyCommentNode)) {
|
|
336
|
+
* console.log(`Empty comment raw: ${emptyCommentNode.raw}`); // "#"
|
|
337
|
+
* console.log(`Empty comment value: "${emptyCommentNode.value}"`); // ""
|
|
338
|
+
* }
|
|
339
|
+
* ```
|
|
340
|
+
*/
|
|
341
|
+
declare function isEmptyCommentNode(node: unknown): node is EmptyCommentNode;
|
|
342
|
+
/**
|
|
343
|
+
* Type guard function that checks if an unknown value is a BoundaryNode.
|
|
344
|
+
* A BoundaryNode must be a valid Node with the type property set to "boundary".
|
|
345
|
+
*
|
|
346
|
+
* @param {unknown} node - The unknown value to check
|
|
347
|
+
* @returns {node is BoundaryNode} True if the node is a valid BoundaryNode, false otherwise
|
|
348
|
+
*
|
|
349
|
+
* @example
|
|
350
|
+
* ```typescript
|
|
351
|
+
* import { parseDataFile } from './parser';
|
|
352
|
+
*
|
|
353
|
+
* const parsedData = parseDataFile('# ================================================\n0000; NULL');
|
|
354
|
+
* const boundaryNode = parsedData.children[0];
|
|
355
|
+
*
|
|
356
|
+
* if (isBoundaryNode(boundaryNode)) {
|
|
357
|
+
* console.log(`Boundary style: ${boundaryNode.style}`); // "equals"
|
|
358
|
+
* console.log(`Boundary raw: ${boundaryNode.raw}`); // "# ================================================"
|
|
359
|
+
* }
|
|
360
|
+
* ```
|
|
361
|
+
*/
|
|
362
|
+
declare function isBoundaryNode(node: unknown): node is BoundaryNode;
|
|
363
|
+
/**
|
|
364
|
+
* Type guard function that checks if an unknown value is a DataNode.
|
|
365
|
+
* A DataNode must be a valid Node with the type property set to "data".
|
|
366
|
+
*
|
|
367
|
+
* @param {unknown} node - The unknown value to check
|
|
368
|
+
* @returns {node is DataNode} True if the node is a valid DataNode, false otherwise
|
|
369
|
+
*
|
|
370
|
+
* @example
|
|
371
|
+
* ```typescript
|
|
372
|
+
* import { parseDataFile } from './parser';
|
|
373
|
+
*
|
|
374
|
+
* const parsedData = parseDataFile('0000..007F ; Basic Latin # [128] <control-0000>..<control-007F>');
|
|
375
|
+
* const dataNode = parsedData.children[0];
|
|
376
|
+
*
|
|
377
|
+
* if (isDataNode(dataNode)) {
|
|
378
|
+
* console.log(`Data value: ${dataNode.value}`); // "0000..007F ; Basic Latin # [128] <control-0000>..<control-007F>"
|
|
379
|
+
* console.log(`Raw content: ${dataNode.raw}`); // Same as value for data nodes
|
|
380
|
+
* console.log(`Line number: ${dataNode.line}`); // 1
|
|
381
|
+
* }
|
|
382
|
+
* ```
|
|
383
|
+
*/
|
|
384
|
+
declare function isDataNode(node: unknown): node is DataNode;
|
|
385
|
+
/**
|
|
386
|
+
* Type guard function that checks if an unknown value is an EmptyNode.
|
|
387
|
+
* An EmptyNode must be a valid Node with the type property set to "empty".
|
|
388
|
+
*
|
|
389
|
+
* @param {unknown} node - The unknown value to check
|
|
390
|
+
* @returns {node is EmptyNode} True if the node is a valid EmptyNode, false otherwise
|
|
391
|
+
*
|
|
392
|
+
* @example
|
|
393
|
+
* ```typescript
|
|
394
|
+
* import { parseDataFile } from './parser';
|
|
395
|
+
*
|
|
396
|
+
* const parsedData = parseDataFile('# Comment\n\n0000; NULL');
|
|
397
|
+
* const emptyNode = parsedData.children[1]; // The blank line
|
|
398
|
+
*
|
|
399
|
+
* if (isEmptyNode(emptyNode)) {
|
|
400
|
+
* console.log(`Empty node raw: "${emptyNode.raw}"`); // ""
|
|
401
|
+
* console.log(`Empty node value: "${emptyNode.value}"`); // ""
|
|
402
|
+
* console.log(`Line number: ${emptyNode.line}`); // 2
|
|
403
|
+
* }
|
|
404
|
+
* ```
|
|
405
|
+
*/
|
|
406
|
+
declare function isEmptyNode(node: unknown): node is EmptyNode;
|
|
407
|
+
/**
|
|
408
|
+
* Type guard function that checks if an unknown value is a RootNode.
|
|
409
|
+
* A RootNode must be a valid Node with the type property set to "root".
|
|
410
|
+
*
|
|
411
|
+
* @param {unknown} node - The unknown value to check
|
|
412
|
+
* @returns {node is RootNode} True if the node is a valid RootNode, false otherwise
|
|
413
|
+
*
|
|
414
|
+
* @example
|
|
415
|
+
* ```typescript
|
|
416
|
+
* import { parseDataFile } from './parser';
|
|
417
|
+
*
|
|
418
|
+
* const parsedData = parseDataFile('# Unicode Block Data\n0000..007F; Basic Latin');
|
|
419
|
+
*
|
|
420
|
+
* if (isRootNode(parsedData)) {
|
|
421
|
+
* console.log(`Root has ${parsedData.children.length} children`); // 2
|
|
422
|
+
* console.log(`File name: ${parsedData.fileName}`); // May be undefined
|
|
423
|
+
* console.log(`Version: ${parsedData.version}`); // May be undefined
|
|
424
|
+
* }
|
|
425
|
+
* ```
|
|
426
|
+
*/
|
|
427
|
+
declare function isRootNode(node: unknown): node is RootNode;
|
|
428
|
+
/**
|
|
429
|
+
* Type guard function that checks if an unknown value is an UnknownNode.
|
|
430
|
+
* An UnknownNode must be a valid Node with the type property set to "unknown".
|
|
431
|
+
*
|
|
432
|
+
* @param {unknown} node - The unknown value to check
|
|
433
|
+
* @returns {node is UnknownNode} True if the node is a valid UnknownNode, false otherwise
|
|
434
|
+
*
|
|
435
|
+
* @example
|
|
436
|
+
* ```typescript
|
|
437
|
+
* import { parseDataFile } from './parser';
|
|
438
|
+
*
|
|
439
|
+
* // Assuming some unusual content that doesn't match known patterns
|
|
440
|
+
* const parsedData = parseDataFile('@@UNUSUAL_SYNTAX@@\n0000; NULL');
|
|
441
|
+
* const unknownNode = parsedData.children[0];
|
|
442
|
+
*
|
|
443
|
+
* if (isUnknownNode(unknownNode)) {
|
|
444
|
+
* console.log(`Unknown node raw: ${unknownNode.raw}`); // "@@UNUSUAL_SYNTAX@@"
|
|
445
|
+
* console.log(`Unknown node value: ${unknownNode.value}`); // "@@UNUSUAL_SYNTAX@@"
|
|
446
|
+
* console.log(`Line number: ${unknownNode.line}`); // 1
|
|
447
|
+
* }
|
|
448
|
+
* ```
|
|
449
|
+
*/
|
|
450
|
+
declare function isUnknownNode(node: unknown): node is UnknownNode;
|
|
451
|
+
/**
|
|
452
|
+
* Type guard function that checks if an unknown value is an EOFNode.
|
|
453
|
+
* An EOFNode must be a valid Node with the type property set to "eof".
|
|
454
|
+
*
|
|
455
|
+
* @param {unknown} node - The unknown value to check
|
|
456
|
+
* @returns {node is EOFNode} True if the node is a valid EOFNode, false otherwise
|
|
457
|
+
*
|
|
458
|
+
* @example
|
|
459
|
+
* ```typescript
|
|
460
|
+
* import { parseDataFile } from './parser';
|
|
461
|
+
*
|
|
462
|
+
* const parsedData = parseDataFile('0000; NULL');
|
|
463
|
+
* const lastNode = parsedData.children[parsedData.children.length - 1];
|
|
464
|
+
*
|
|
465
|
+
* if (isEOFNode(lastNode)) {
|
|
466
|
+
* console.log(`EOF node detected at line: ${lastNode.line}`);
|
|
467
|
+
* console.log(`EOF raw value: ${lastNode.raw}`); // Empty string
|
|
468
|
+
* }
|
|
469
|
+
* ```
|
|
470
|
+
*/
|
|
471
|
+
declare function isEOFNode(node: unknown): node is EOFNode;
|
|
472
|
+
/**
|
|
473
|
+
* Type guard function that checks if an unknown value is a PropertyNode.
|
|
474
|
+
* A PropertyNode must be a valid Node with the type property set to "property" and have a defined propertyValue.
|
|
475
|
+
*
|
|
476
|
+
* @param {unknown} node - The unknown value to check
|
|
477
|
+
* @returns {node is PropertyNode} True if the node is a valid PropertyNode, false otherwise
|
|
478
|
+
*
|
|
479
|
+
* @example
|
|
480
|
+
* ```typescript
|
|
481
|
+
* import { parseDataFile } from './parser';
|
|
482
|
+
*
|
|
483
|
+
* const parsedData = parseDataFile('# @key=value\n0000; NULL');
|
|
484
|
+
* const propertyNode = parsedData.children[0];
|
|
485
|
+
*
|
|
486
|
+
* if (isPropertyNode(propertyNode)) {
|
|
487
|
+
* console.log(`Property key: ${propertyNode.propertyKey}`); // "key"
|
|
488
|
+
* console.log(`Property value: ${propertyNode.propertyValue}`); // "value"
|
|
489
|
+
* console.log(`Raw content: ${propertyNode.raw}`); // "# @key=value"
|
|
490
|
+
* }
|
|
491
|
+
* ```
|
|
492
|
+
*/
|
|
493
|
+
declare function isPropertyNode(node: unknown): node is PropertyNode;
|
|
494
|
+
//#endregion
|
|
495
|
+
export { EmptyNode as C, PropertyNode as D, NodeTypes as E, RootNode as O, EmptyCommentNode as S, NodeType as T, BoundaryNode as _, isEmptyCommentNode as a, DataNode as b, isPropertyNode as c, RawDataFile as d, UCDSectionWithLines as f, BaseNode as g, ast_utils_d_exports as h, isEOFNode as i, UnknownNode as k, isRootNode as l, parseSections as m, isCommentNode as n, isEmptyNode as o, hasSections as p, isDataNode as r, isNode as s, isBoundaryNode as t, isUnknownNode as u, ChildNode as v, Node as w, EOFNode as x, CommentNode as y };
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import "./line-helpers-DGsVuiW2.js";
|
|
2
|
+
import { C as EmptyNode, D as PropertyNode, E as NodeTypes, O as RootNode, S as EmptyCommentNode, T as NodeType, _ as BoundaryNode, a as isEmptyCommentNode, b as DataNode, c as isPropertyNode, d as RawDataFile, f as UCDSectionWithLines, g as BaseNode, h as ast_utils_d_exports, i as isEOFNode, k as UnknownNode, l as isRootNode, m as parseSections, n as isCommentNode, o as isEmptyNode, p as hasSections, r as isDataNode, s as isNode, t as isBoundaryNode, u as isUnknownNode, v as ChildNode, w as Node, x as EOFNode, y as CommentNode } from "./datafile-UqVC4xXw.js";
|
|
3
|
+
export { BaseNode, BoundaryNode, ChildNode, CommentNode, DataNode, EOFNode, EmptyCommentNode, EmptyNode, Node, NodeType, NodeTypes, PropertyNode, RawDataFile, RootNode, UCDSectionWithLines, UnknownNode, ast_utils_d_exports as astUtils, hasSections, isBoundaryNode, isCommentNode, isDataNode, isEOFNode, isEmptyCommentNode, isEmptyNode, isNode, isPropertyNode, isRootNode, isUnknownNode, parseSections };
|
package/dist/datafile.js
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { a as isBoundaryNode, c as isEOFNode, d as isNode, f as isPropertyNode, h as NodeTypes, i as ast_utils_exports, l as isEmptyCommentNode, m as isUnknownNode, n as hasSections, o as isCommentNode, p as isRootNode, r as parseSections, s as isDataNode, t as RawDataFile, u as isEmptyNode } from "./datafile-CWbGVIAa.js";
|
|
2
|
+
import "./line-helpers-CYDQ0FnQ.js";
|
|
3
|
+
|
|
4
|
+
export { NodeTypes, RawDataFile, ast_utils_exports as astUtils, hasSections, isBoundaryNode, isCommentNode, isDataNode, isEOFNode, isEmptyCommentNode, isEmptyNode, isNode, isPropertyNode, isRootNode, isUnknownNode, parseSections };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { i as UnicodeVersionMetadata, n as UNICODE_STABLE_VERSION, r as UNICODE_VERSION_METADATA, t as UNICODE_DRAFT_VERSION } from "./constants-BclbCHPC.js";
|
|
2
|
+
import { _ as isMissingAnnotation, a as getBoundaryLineStyle, b as parseMissingAnnotation, c as inferVersion, d as isDashBoundary, f as isEOFMarker, g as isLineWithData, h as isHashBoundary, i as SpecialTag, l as isBoundaryLine, m as isEqualsBoundary, n as MissingAnnotation, o as getPropertyValue, p as isEmptyLine, r as ParsedFileName, s as inferFileName, t as BoundaryStyle, u as isCommentLine, v as isPropertyLine, x as trimCommentLine, y as parseFileNameLine } from "./line-helpers-DGsVuiW2.js";
|
|
3
|
+
import { d as RawDataFile } from "./datafile-UqVC4xXw.js";
|
|
4
|
+
|
|
5
|
+
//#region src/draft.d.ts
|
|
6
|
+
interface GetCurrentDraftVersionOptions {
|
|
7
|
+
/**
|
|
8
|
+
* Custom URL to fetch the Unicode draft ReadMe from
|
|
9
|
+
* @default "https://unicode-proxy.ucdjs.dev/draft/ReadMe.txt"
|
|
10
|
+
*/
|
|
11
|
+
url?: string;
|
|
12
|
+
/**
|
|
13
|
+
* Custom fetch options to use when fetching the ReadMe
|
|
14
|
+
*/
|
|
15
|
+
fetchOptions?: RequestInit;
|
|
16
|
+
/**
|
|
17
|
+
* Custom regex patterns to use for extracting the version
|
|
18
|
+
* Each pattern must include exactly one capturing group that matches the version
|
|
19
|
+
*/
|
|
20
|
+
patterns?: RegExp[];
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Retrieves the current Unicode Standard draft version by fetching and parsing
|
|
24
|
+
* the Unicode draft ReadMe file.
|
|
25
|
+
*
|
|
26
|
+
* The function tries to extract the version number using several regex patterns,
|
|
27
|
+
* starting with the most explicit match and falling back to less specific patterns.
|
|
28
|
+
*
|
|
29
|
+
* @param {GetCurrentDraftVersionOptions} options - Configuration options for the function
|
|
30
|
+
* @returns {Promise<string | null>} A promise that resolves to:
|
|
31
|
+
* - The Unicode draft version as a string (e.g., "15.1.0" or "15.1")
|
|
32
|
+
* - `null` if the version couldn't be determined or if an error occurred during fetching
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```ts
|
|
36
|
+
* // Using default options
|
|
37
|
+
* const version = await getCurrentDraftVersion();
|
|
38
|
+
* ```
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```ts
|
|
42
|
+
* // Using custom options
|
|
43
|
+
* const version = await getCurrentDraftVersion({
|
|
44
|
+
* url: "https://luxass.dev/readme",
|
|
45
|
+
* patterns: [/MyCustomPattern-(\d+\.\d+)/],
|
|
46
|
+
* fetchOptions: { headers: { "Authorization": "token" } }
|
|
47
|
+
* });
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
declare function getCurrentDraftVersion(options?: GetCurrentDraftVersionOptions): Promise<string | null>;
|
|
51
|
+
//#endregion
|
|
52
|
+
//#region src/hexcodes.d.ts
|
|
53
|
+
/**
|
|
54
|
+
* Converts a hex string to an array of unicode codepoints.
|
|
55
|
+
*
|
|
56
|
+
* @param {string} hex - The hexadecimal string to convert
|
|
57
|
+
* @param {string} joiner - The string that separates the hex values
|
|
58
|
+
* @param {boolean} strict - If true, throws errors for invalid input. If false, returns NaN for invalid parts.
|
|
59
|
+
* @returns {number[]} An array of numbers representing unicode codepoints
|
|
60
|
+
*
|
|
61
|
+
* @example
|
|
62
|
+
* ```ts
|
|
63
|
+
* fromHexToCodepoint('1F600-1F64F', '-') // [128512, 128591]
|
|
64
|
+
* fromHexToCodepoint('1F600,1F64F', ',') // [128512, 128591]
|
|
65
|
+
* fromHexToCodepoint('1F600-', '-', true) // throws Error
|
|
66
|
+
* fromHexToCodepoint('1F600-', '-', false) // [128512, NaN]
|
|
67
|
+
* ```
|
|
68
|
+
*/
|
|
69
|
+
declare function fromHexToCodepoint(hex: string, joiner: string, strict?: boolean): number[];
|
|
70
|
+
/**
|
|
71
|
+
* Expands a hexadecimal range into an array of individual hexadecimal values.
|
|
72
|
+
* If the input contains ".." it treats it as a range and expands it,
|
|
73
|
+
* otherwise returns the input hex as a single-element array.
|
|
74
|
+
*
|
|
75
|
+
* @param {string} hex - The hexadecimal string, optionally containing ".." to denote a range
|
|
76
|
+
* @returns {string[]} An array of hexadecimal strings. If given a range (e.g. "0000..0010"),
|
|
77
|
+
* returns all values in that range. If given a single hex value,
|
|
78
|
+
* returns an array containing just that value.
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* ```ts
|
|
82
|
+
* expandHexRange("0000..0002") // Returns ["0000", "0001", "0002"]
|
|
83
|
+
* expandHexRange("0000") // Returns ["0000"]
|
|
84
|
+
* ```
|
|
85
|
+
*/
|
|
86
|
+
declare function expandHexRange(hex: string): string[];
|
|
87
|
+
/**
|
|
88
|
+
* Removes specific unicode variation selectors from a hex string.
|
|
89
|
+
* Specifically removes:
|
|
90
|
+
* - 200D (Zero Width Joiner)
|
|
91
|
+
* - FE0E (Variation Selector-15, text style)
|
|
92
|
+
* - FE0F (Variation Selector-16, emoji style)
|
|
93
|
+
*
|
|
94
|
+
* @param {string} hex - The hex string to strip variation selectors from
|
|
95
|
+
* @returns {string} The hex string with variation selectors removed
|
|
96
|
+
*/
|
|
97
|
+
declare function stripHex(hex: string): string;
|
|
98
|
+
//#endregion
|
|
99
|
+
//#region src/mappings.d.ts
|
|
100
|
+
/**
|
|
101
|
+
* Maps Unicode standard version numbers to their corresponding UCD (Unicode Character Database) version identifiers.
|
|
102
|
+
*
|
|
103
|
+
* The Unicode Character Database (UCD) files are available at https://unicode.org/Public/{version}
|
|
104
|
+
* where {version} is not always the same as the Unicode standard version.
|
|
105
|
+
*
|
|
106
|
+
* For example:
|
|
107
|
+
* - Unicode 4.0.1 corresponds to UCD version "4.0-Update1"
|
|
108
|
+
* - Unicode 2.1.9 corresponds to UCD version "2.1-Update4"
|
|
109
|
+
*
|
|
110
|
+
* Note: Only versions with special UCD paths are included here.
|
|
111
|
+
* Versions 4.1.0 and later use their version number directly as the UCD path.
|
|
112
|
+
*/
|
|
113
|
+
declare const UNICODE_TO_UCD_VERSION_MAPPINGS: Record<string, string>;
|
|
114
|
+
/**
|
|
115
|
+
* Resolves a Unicode version to its corresponding UCD (Unicode Character Database) version identifier.
|
|
116
|
+
*
|
|
117
|
+
* Some Unicode versions don't have directly corresponding UCD version identifiers. For example,
|
|
118
|
+
* Unicode 4.0.1's files are found using UCD version '4.0-Update1'
|
|
119
|
+
* rather than '4.0.1'.
|
|
120
|
+
*
|
|
121
|
+
* If the version is not found in the mappings, returns the original version.
|
|
122
|
+
* This is useful for handling newer Unicode versions that use the version number directly.
|
|
123
|
+
*
|
|
124
|
+
* @param {string} unicodeVersion - The Unicode version to resolve to a UCD version identifier
|
|
125
|
+
* @returns {string} The corresponding UCD version identifier or the original version if not mapped
|
|
126
|
+
*/
|
|
127
|
+
declare function resolveUCDVersion(unicodeVersion: string): string;
|
|
128
|
+
//#endregion
|
|
129
|
+
//#region src/path.d.ts
|
|
130
|
+
/**
|
|
131
|
+
* Builds file paths for Unicode Character Database (UCD) files
|
|
132
|
+
*
|
|
133
|
+
* @param {string} version - The Unicode version (e.g., "15.1.0")
|
|
134
|
+
* @param {string} path - The filename to access (e.g., "PropList.txt", "DerivedLineBreak.txt")
|
|
135
|
+
* @returns {string} The complete file path for the UCD file
|
|
136
|
+
*/
|
|
137
|
+
declare function buildUCDPath(version: string, path: string): string;
|
|
138
|
+
/**
|
|
139
|
+
* Determines whether a Unicode version has the UCD folder structure.
|
|
140
|
+
*
|
|
141
|
+
* Newer Unicode versions typically use a UCD subfolder structure, while older versions
|
|
142
|
+
* use special version formats (like '4.0-Update1' instead of '4.0.1') without UCD folders.
|
|
143
|
+
* This function checks if a version:
|
|
144
|
+
* 1. Contains "Update" in its name (indicating no UCD folder structure)
|
|
145
|
+
* 2. Exists in our UNICODE_TO_UCD_VERSION_MAPPINGS (meaning it doesn't use UCD folders)
|
|
146
|
+
*
|
|
147
|
+
* @param {string} version - The Unicode version string to check
|
|
148
|
+
* @returns {boolean} - Returns true if the version uses UCD folder structure (e.g., '15.0.0'),
|
|
149
|
+
* false if it doesn't use UCD folders (e.g., '4.0.1' uses '4.0-Update1')
|
|
150
|
+
*/
|
|
151
|
+
declare function hasUCDFolderPath(version: string): boolean;
|
|
152
|
+
//#endregion
|
|
153
|
+
export { BoundaryStyle, type GetCurrentDraftVersionOptions, MissingAnnotation, ParsedFileName, RawDataFile, SpecialTag, UNICODE_DRAFT_VERSION, UNICODE_STABLE_VERSION, UNICODE_TO_UCD_VERSION_MAPPINGS, UNICODE_VERSION_METADATA, type UnicodeVersionMetadata, buildUCDPath, expandHexRange, fromHexToCodepoint, getBoundaryLineStyle, getCurrentDraftVersion, getPropertyValue, hasUCDFolderPath, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, resolveUCDVersion, stripHex, trimCommentLine };
|