@unicode-utils/parser 0.12.0-beta.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/chunk-Bp6m_JJh.js +13 -0
- package/dist/datafile-B9Fta4dI.js +877 -0
- package/dist/datafile-D70biLnW.d.ts +495 -0
- package/dist/datafile.d.ts +3 -0
- package/dist/datafile.js +4 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.js +4 -0
- package/dist/line-helpers-tsCF16UF.js +374 -0
- package/dist/line-helpers-upUikru9.d.ts +305 -0
- package/dist/line-helpers.d.ts +2 -0
- package/dist/line-helpers.js +3 -0
- package/package.json +62 -0
|
@@ -0,0 +1,877 @@
|
|
|
1
|
+
import { t as __export } from "./chunk-Bp6m_JJh.js";
|
|
2
|
+
import { _ as trimCommentLine, a as isBoundaryLine, c as isEOFMarker, f as isLineWithData, i as inferVersion, l as isEmptyLine, m as isPropertyLine, n as getPropertyValue, o as isCommentLine, r as inferFileName, t as getBoundaryLineStyle } from "./line-helpers-tsCF16UF.js";
|
|
3
|
+
import { invariant } from "@luxass/utils";
|
|
4
|
+
import defu from "defu";
|
|
5
|
+
|
|
6
|
+
//#region src/datafile/ast.ts
|
|
7
|
+
const NodeTypes = {
|
|
8
|
+
ROOT: "root",
|
|
9
|
+
COMMENT: "comment",
|
|
10
|
+
EMPTY_COMMENT: "empty-comment",
|
|
11
|
+
BOUNDARY: "boundary",
|
|
12
|
+
DATA: "data",
|
|
13
|
+
EMPTY: "empty",
|
|
14
|
+
EOF: "eof",
|
|
15
|
+
PROPERTY: "property",
|
|
16
|
+
UNKNOWN: "unknown"
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
//#endregion
|
|
20
|
+
//#region src/datafile/typeguards.ts
|
|
21
|
+
/**
|
|
22
|
+
* Type guard function that checks if an unknown value is a Node.
|
|
23
|
+
* A Node must be an object with 'type', 'value', 'raw', and 'line' properties of the correct types.
|
|
24
|
+
*
|
|
25
|
+
* @param {unknown} node - The unknown value to check
|
|
26
|
+
* @returns {node is Node} True if the node is a valid Node, false otherwise
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```typescript
|
|
30
|
+
* import { parseDataFile } from './parser';
|
|
31
|
+
*
|
|
32
|
+
* const parsedData = parseDataFile('# Comment\n0000..007F; Basic Latin');
|
|
33
|
+
* const firstChild = parsedData.children[0];
|
|
34
|
+
*
|
|
35
|
+
* if (isNode(firstChild)) {
|
|
36
|
+
* console.log(`Node type: ${firstChild.type}`);
|
|
37
|
+
* console.log(`Raw content: ${firstChild.raw}`);
|
|
38
|
+
* console.log(`Line number: ${firstChild.line}`);
|
|
39
|
+
* }
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
42
|
+
function isNode(node) {
|
|
43
|
+
return typeof node === "object" && node !== null && "type" in node && typeof node.type === "string" && "raw" in node && typeof node.raw === "string" && "line" in node && typeof node.line === "number" && "value" in node && typeof node.value === "string";
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Type guard function that checks if an unknown value is a CommentNode.
|
|
47
|
+
* A CommentNode must be a valid Node with the type property set to "comment".
|
|
48
|
+
*
|
|
49
|
+
* @param {unknown} node - The unknown value to check
|
|
50
|
+
* @returns {node is CommentNode} True if the node is a valid CommentNode, false otherwise
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ```typescript
|
|
54
|
+
* import { parseDataFile } from './parser';
|
|
55
|
+
*
|
|
56
|
+
* const parsedData = parseDataFile('# This is a comment\n0000; NULL');
|
|
57
|
+
* const commentNode = parsedData.children[0];
|
|
58
|
+
*
|
|
59
|
+
* if (isCommentNode(commentNode)) {
|
|
60
|
+
* console.log(`Comment content: ${commentNode.value}`); // "This is a comment"
|
|
61
|
+
* console.log(`Raw line: ${commentNode.raw}`); // "# This is a comment"
|
|
62
|
+
* }
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
function isCommentNode(node) {
|
|
66
|
+
return isNode(node) && node.type === "comment";
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Type guard function that checks if an unknown value is an EmptyCommentNode.
|
|
70
|
+
* An EmptyCommentNode must be a valid Node with the type property set to "empty-comment".
|
|
71
|
+
*
|
|
72
|
+
* @param {unknown} node - The unknown value to check
|
|
73
|
+
* @returns {node is EmptyCommentNode} True if the node is a valid EmptyCommentNode, false otherwise
|
|
74
|
+
*
|
|
75
|
+
* @example
|
|
76
|
+
* ```typescript
|
|
77
|
+
* import { parseDataFile } from './parser';
|
|
78
|
+
*
|
|
79
|
+
* const parsedData = parseDataFile('#\n0000; NULL');
|
|
80
|
+
* const emptyCommentNode = parsedData.children[0];
|
|
81
|
+
*
|
|
82
|
+
* if (isEmptyCommentNode(emptyCommentNode)) {
|
|
83
|
+
* console.log(`Empty comment raw: ${emptyCommentNode.raw}`); // "#"
|
|
84
|
+
* console.log(`Empty comment value: "${emptyCommentNode.value}"`); // ""
|
|
85
|
+
* }
|
|
86
|
+
* ```
|
|
87
|
+
*/
|
|
88
|
+
function isEmptyCommentNode(node) {
|
|
89
|
+
return isNode(node) && node.type === "empty-comment";
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Type guard function that checks if an unknown value is a BoundaryNode.
|
|
93
|
+
* A BoundaryNode must be a valid Node with the type property set to "boundary".
|
|
94
|
+
*
|
|
95
|
+
* @param {unknown} node - The unknown value to check
|
|
96
|
+
* @returns {node is BoundaryNode} True if the node is a valid BoundaryNode, false otherwise
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* ```typescript
|
|
100
|
+
* import { parseDataFile } from './parser';
|
|
101
|
+
*
|
|
102
|
+
* const parsedData = parseDataFile('# ================================================\n0000; NULL');
|
|
103
|
+
* const boundaryNode = parsedData.children[0];
|
|
104
|
+
*
|
|
105
|
+
* if (isBoundaryNode(boundaryNode)) {
|
|
106
|
+
* console.log(`Boundary style: ${boundaryNode.style}`); // "equals"
|
|
107
|
+
* console.log(`Boundary raw: ${boundaryNode.raw}`); // "# ================================================"
|
|
108
|
+
* }
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
function isBoundaryNode(node) {
|
|
112
|
+
return isNode(node) && node.type === "boundary";
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Type guard function that checks if an unknown value is a DataNode.
|
|
116
|
+
* A DataNode must be a valid Node with the type property set to "data".
|
|
117
|
+
*
|
|
118
|
+
* @param {unknown} node - The unknown value to check
|
|
119
|
+
* @returns {node is DataNode} True if the node is a valid DataNode, false otherwise
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* import { parseDataFile } from './parser';
|
|
124
|
+
*
|
|
125
|
+
* const parsedData = parseDataFile('0000..007F ; Basic Latin # [128] <control-0000>..<control-007F>');
|
|
126
|
+
* const dataNode = parsedData.children[0];
|
|
127
|
+
*
|
|
128
|
+
* if (isDataNode(dataNode)) {
|
|
129
|
+
* console.log(`Data value: ${dataNode.value}`); // "0000..007F ; Basic Latin # [128] <control-0000>..<control-007F>"
|
|
130
|
+
* console.log(`Raw content: ${dataNode.raw}`); // Same as value for data nodes
|
|
131
|
+
* console.log(`Line number: ${dataNode.line}`); // 1
|
|
132
|
+
* }
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
135
|
+
function isDataNode(node) {
|
|
136
|
+
return isNode(node) && node.type === "data";
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Type guard function that checks if an unknown value is an EmptyNode.
|
|
140
|
+
* An EmptyNode must be a valid Node with the type property set to "empty".
|
|
141
|
+
*
|
|
142
|
+
* @param {unknown} node - The unknown value to check
|
|
143
|
+
* @returns {node is EmptyNode} True if the node is a valid EmptyNode, false otherwise
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* ```typescript
|
|
147
|
+
* import { parseDataFile } from './parser';
|
|
148
|
+
*
|
|
149
|
+
* const parsedData = parseDataFile('# Comment\n\n0000; NULL');
|
|
150
|
+
* const emptyNode = parsedData.children[1]; // The blank line
|
|
151
|
+
*
|
|
152
|
+
* if (isEmptyNode(emptyNode)) {
|
|
153
|
+
* console.log(`Empty node raw: "${emptyNode.raw}"`); // ""
|
|
154
|
+
* console.log(`Empty node value: "${emptyNode.value}"`); // ""
|
|
155
|
+
* console.log(`Line number: ${emptyNode.line}`); // 2
|
|
156
|
+
* }
|
|
157
|
+
* ```
|
|
158
|
+
*/
|
|
159
|
+
function isEmptyNode(node) {
|
|
160
|
+
return isNode(node) && node.type === "empty";
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Type guard function that checks if an unknown value is a RootNode.
|
|
164
|
+
* A RootNode must be a valid Node with the type property set to "root".
|
|
165
|
+
*
|
|
166
|
+
* @param {unknown} node - The unknown value to check
|
|
167
|
+
* @returns {node is RootNode} True if the node is a valid RootNode, false otherwise
|
|
168
|
+
*
|
|
169
|
+
* @example
|
|
170
|
+
* ```typescript
|
|
171
|
+
* import { parseDataFile } from './parser';
|
|
172
|
+
*
|
|
173
|
+
* const parsedData = parseDataFile('# Unicode Block Data\n0000..007F; Basic Latin');
|
|
174
|
+
*
|
|
175
|
+
* if (isRootNode(parsedData)) {
|
|
176
|
+
* console.log(`Root has ${parsedData.children.length} children`); // 2
|
|
177
|
+
* console.log(`File name: ${parsedData.fileName}`); // May be undefined
|
|
178
|
+
* console.log(`Version: ${parsedData.version}`); // May be undefined
|
|
179
|
+
* }
|
|
180
|
+
* ```
|
|
181
|
+
*/
|
|
182
|
+
function isRootNode(node) {
|
|
183
|
+
return isNode(node) && node.type === "root";
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Type guard function that checks if an unknown value is an UnknownNode.
|
|
187
|
+
* An UnknownNode must be a valid Node with the type property set to "unknown".
|
|
188
|
+
*
|
|
189
|
+
* @param {unknown} node - The unknown value to check
|
|
190
|
+
* @returns {node is UnknownNode} True if the node is a valid UnknownNode, false otherwise
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```typescript
|
|
194
|
+
* import { parseDataFile } from './parser';
|
|
195
|
+
*
|
|
196
|
+
* // Assuming some unusual content that doesn't match known patterns
|
|
197
|
+
* const parsedData = parseDataFile('@@UNUSUAL_SYNTAX@@\n0000; NULL');
|
|
198
|
+
* const unknownNode = parsedData.children[0];
|
|
199
|
+
*
|
|
200
|
+
* if (isUnknownNode(unknownNode)) {
|
|
201
|
+
* console.log(`Unknown node raw: ${unknownNode.raw}`); // "@@UNUSUAL_SYNTAX@@"
|
|
202
|
+
* console.log(`Unknown node value: ${unknownNode.value}`); // "@@UNUSUAL_SYNTAX@@"
|
|
203
|
+
* console.log(`Line number: ${unknownNode.line}`); // 1
|
|
204
|
+
* }
|
|
205
|
+
* ```
|
|
206
|
+
*/
|
|
207
|
+
function isUnknownNode(node) {
|
|
208
|
+
return isNode(node) && node.type === "unknown";
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Type guard function that checks if an unknown value is an EOFNode.
|
|
212
|
+
* An EOFNode must be a valid Node with the type property set to "eof".
|
|
213
|
+
*
|
|
214
|
+
* @param {unknown} node - The unknown value to check
|
|
215
|
+
* @returns {node is EOFNode} True if the node is a valid EOFNode, false otherwise
|
|
216
|
+
*
|
|
217
|
+
* @example
|
|
218
|
+
* ```typescript
|
|
219
|
+
* import { parseDataFile } from './parser';
|
|
220
|
+
*
|
|
221
|
+
* const parsedData = parseDataFile('0000; NULL');
|
|
222
|
+
* const lastNode = parsedData.children[parsedData.children.length - 1];
|
|
223
|
+
*
|
|
224
|
+
* if (isEOFNode(lastNode)) {
|
|
225
|
+
* console.log(`EOF node detected at line: ${lastNode.line}`);
|
|
226
|
+
* console.log(`EOF raw value: ${lastNode.raw}`); // Empty string
|
|
227
|
+
* }
|
|
228
|
+
* ```
|
|
229
|
+
*/
|
|
230
|
+
function isEOFNode(node) {
|
|
231
|
+
return isNode(node) && node.type === "eof";
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Type guard function that checks if an unknown value is a PropertyNode.
|
|
235
|
+
* A PropertyNode must be a valid Node with the type property set to "property" and have a defined propertyValue.
|
|
236
|
+
*
|
|
237
|
+
* @param {unknown} node - The unknown value to check
|
|
238
|
+
* @returns {node is PropertyNode} True if the node is a valid PropertyNode, false otherwise
|
|
239
|
+
*
|
|
240
|
+
* @example
|
|
241
|
+
* ```typescript
|
|
242
|
+
* import { parseDataFile } from './parser';
|
|
243
|
+
*
|
|
244
|
+
* const parsedData = parseDataFile('# @key=value\n0000; NULL');
|
|
245
|
+
* const propertyNode = parsedData.children[0];
|
|
246
|
+
*
|
|
247
|
+
* if (isPropertyNode(propertyNode)) {
|
|
248
|
+
* console.log(`Property key: ${propertyNode.propertyKey}`); // "key"
|
|
249
|
+
* console.log(`Property value: ${propertyNode.propertyValue}`); // "value"
|
|
250
|
+
* console.log(`Raw content: ${propertyNode.raw}`); // "# @key=value"
|
|
251
|
+
* }
|
|
252
|
+
* ```
|
|
253
|
+
*/
|
|
254
|
+
function isPropertyNode(node) {
|
|
255
|
+
return isNode(node) && node.type === "property" && node.propertyValue !== void 0;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
//#endregion
|
|
259
|
+
//#region src/datafile/ast-utils.ts
|
|
260
|
+
var ast_utils_exports = /* @__PURE__ */ __export({
|
|
261
|
+
allNodesAreOfType: () => allNodesAreOfType,
|
|
262
|
+
endsWithSequence: () => endsWithSequence,
|
|
263
|
+
findNodePattern: () => findNodePattern,
|
|
264
|
+
hasBoundaryWithinRange: () => hasBoundaryWithinRange,
|
|
265
|
+
hasConsecutiveNodesOfType: () => hasConsecutiveNodesOfType,
|
|
266
|
+
hasMinNodesOfType: () => hasMinNodesOfType,
|
|
267
|
+
hasNextNComments: () => hasNextNComments,
|
|
268
|
+
hasNextNCommentsFrom: () => hasNextNCommentsFrom,
|
|
269
|
+
hasNodePattern: () => hasNodePattern,
|
|
270
|
+
hasPrevNCommentsFrom: () => hasPrevNCommentsFrom,
|
|
271
|
+
isCommentOnlyDocument: () => isCommentOnlyDocument,
|
|
272
|
+
startsWithSequence: () => startsWithSequence,
|
|
273
|
+
visit: () => visit
|
|
274
|
+
});
|
|
275
|
+
const NODE_TYPE_CHECKERS = {
|
|
276
|
+
"comment": isCommentNode,
|
|
277
|
+
"empty-comment": isEmptyCommentNode,
|
|
278
|
+
"boundary": isBoundaryNode,
|
|
279
|
+
"data": isDataNode,
|
|
280
|
+
"empty": isEmptyNode,
|
|
281
|
+
"unknown": isUnknownNode,
|
|
282
|
+
"eof": isEOFNode,
|
|
283
|
+
"property": isPropertyNode
|
|
284
|
+
};
|
|
285
|
+
/**
|
|
286
|
+
* Checks if the next N nodes from a given index are all comment nodes
|
|
287
|
+
* @param {RootNode} root - The root node containing children
|
|
288
|
+
* @param {number} startIndex - The starting index to check from
|
|
289
|
+
* @param {number} count - Number of nodes to check
|
|
290
|
+
* @returns {boolean} true if the next N nodes are all comment nodes, false otherwise
|
|
291
|
+
*/
|
|
292
|
+
function hasNextNCommentsFrom(root, startIndex, count) {
|
|
293
|
+
if (startIndex < 0 || count <= 0) return false;
|
|
294
|
+
if (startIndex + count > root.children.length) return false;
|
|
295
|
+
for (let i = startIndex; i < startIndex + count; i++) if (!isCommentNode(root.children[i])) return false;
|
|
296
|
+
return true;
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Checks if the next N nodes from the current node are all comment nodes
|
|
300
|
+
* @param {RootNode} root - The root node containing children
|
|
301
|
+
* @param {ChildNode} currentNode - The current node to find in the children array
|
|
302
|
+
* @param {number} count - Number of nodes to check after the current node
|
|
303
|
+
* @returns {boolean} true if the next N nodes are all comment nodes, false otherwise
|
|
304
|
+
*/
|
|
305
|
+
function hasNextNComments(root, currentNode, count) {
|
|
306
|
+
const currentIndex = root.children.indexOf(currentNode);
|
|
307
|
+
if (currentIndex === -1) return false;
|
|
308
|
+
return hasNextNCommentsFrom(root, currentIndex + 1, count);
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Checks if the previous N nodes from a given index are all comment nodes
|
|
312
|
+
* @param {RootNode} root - The root node containing children
|
|
313
|
+
* @param {number} startIndex - The starting index to check backwards from
|
|
314
|
+
* @param {number} count - Number of nodes to check backwards
|
|
315
|
+
* @returns {boolean} true if the previous N nodes are all comment nodes, false otherwise
|
|
316
|
+
*/
|
|
317
|
+
function hasPrevNCommentsFrom(root, startIndex, count) {
|
|
318
|
+
if (startIndex >= root.children.length || count <= 0) return false;
|
|
319
|
+
if (startIndex - count + 1 < 0) return false;
|
|
320
|
+
for (let i = startIndex - count + 1; i <= startIndex; i++) if (!isCommentNode(root.children[i])) return false;
|
|
321
|
+
return true;
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Checks if there are N consecutive nodes of a specific type starting from an index
|
|
325
|
+
* @param {RootNode} root - The root node containing children
|
|
326
|
+
* @param {number} startIndex - The starting index to check from
|
|
327
|
+
* @param {number} count - Number of consecutive nodes to check
|
|
328
|
+
* @param {ChildNode["type"]} nodeType - The type of node to check for ('comment', 'data', 'boundary', 'empty', 'unknown')
|
|
329
|
+
* @returns {boolean} true if there are N consecutive nodes of the specified type, false otherwise
|
|
330
|
+
*/
|
|
331
|
+
function hasConsecutiveNodesOfType(root, startIndex, count, nodeType) {
|
|
332
|
+
if (startIndex < 0 || count <= 0) return false;
|
|
333
|
+
if (startIndex + count > root.children.length) return false;
|
|
334
|
+
const checker = NODE_TYPE_CHECKERS[nodeType];
|
|
335
|
+
if (!checker) return false;
|
|
336
|
+
for (let i = startIndex; i < startIndex + count; i++) if (!checker(root.children[i])) return false;
|
|
337
|
+
return true;
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Checks if the root contains a specific pattern of node types
|
|
341
|
+
* @param {RootNode} root - The root node containing children
|
|
342
|
+
* @param {ChildNode["type"][]} pattern - Array of node types that should appear consecutively
|
|
343
|
+
* @param {number} [startIndex] - Optional starting index to check from
|
|
344
|
+
* @returns {boolean} true if the pattern is found, false otherwise
|
|
345
|
+
*/
|
|
346
|
+
function hasNodePattern(root, pattern, startIndex = 0) {
|
|
347
|
+
if (pattern.length === 0) return true;
|
|
348
|
+
if (startIndex < 0 || startIndex + pattern.length > root.children.length) return false;
|
|
349
|
+
for (let i = 0; i < pattern.length; i++) {
|
|
350
|
+
const nodeIndex = startIndex + i;
|
|
351
|
+
const expectedType = pattern[i];
|
|
352
|
+
if (expectedType == null) throw new Error(`Invalid node type at index ${i} in pattern: ${JSON.stringify(pattern)}`);
|
|
353
|
+
const checker = NODE_TYPE_CHECKERS[expectedType];
|
|
354
|
+
if (!checker || !checker(root.children[nodeIndex])) return false;
|
|
355
|
+
}
|
|
356
|
+
return true;
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Finds the first occurrence of a node pattern in the root's children
|
|
360
|
+
* @param {RootNode} root - The root node containing children
|
|
361
|
+
* @param {ChildNode["type"][]} pattern - Array of node types to search for
|
|
362
|
+
* @returns {number} The index of the first occurrence, or -1 if not found
|
|
363
|
+
*/
|
|
364
|
+
function findNodePattern(root, pattern) {
|
|
365
|
+
if (pattern.length === 0) return 0;
|
|
366
|
+
for (let i = 0; i <= root.children.length - pattern.length; i++) if (hasNodePattern(root, pattern, i)) return i;
|
|
367
|
+
return -1;
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Checks if the root starts with a specific sequence of node types
|
|
371
|
+
* @param {RootNode} root - The root node containing children
|
|
372
|
+
* @param {ChildNode["type"][]} sequence - Array of node types that should appear at the beginning
|
|
373
|
+
* @returns {boolean} true if the root starts with the sequence, false otherwise
|
|
374
|
+
*/
|
|
375
|
+
function startsWithSequence(root, sequence) {
|
|
376
|
+
return hasNodePattern(root, sequence, 0);
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Checks if the root ends with a specific sequence of node types
|
|
380
|
+
* @param {RootNode} root - The root node containing children
|
|
381
|
+
* @param {ChildNode["type"][]} sequence - Array of node types that should appear at the end
|
|
382
|
+
* @returns {boolean} true if the root ends with the sequence, false otherwise
|
|
383
|
+
*/
|
|
384
|
+
function endsWithSequence(root, sequence) {
|
|
385
|
+
if (sequence.length === 0) return true;
|
|
386
|
+
if (sequence.length > root.children.length) return false;
|
|
387
|
+
return hasNodePattern(root, sequence, root.children.length - sequence.length);
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Checks if there are at least N nodes of a specific type in the root
|
|
391
|
+
* @param {RootNode} root - The root node containing children
|
|
392
|
+
* @param {ChildNode["type"]} nodeType - The type of node to count
|
|
393
|
+
* @param {number} minCount - Minimum number of nodes required
|
|
394
|
+
* @returns {boolean} true if there are at least minCount nodes of the specified type
|
|
395
|
+
*/
|
|
396
|
+
function hasMinNodesOfType(root, nodeType, minCount) {
|
|
397
|
+
const checker = NODE_TYPE_CHECKERS[nodeType];
|
|
398
|
+
if (!checker) return false;
|
|
399
|
+
let count = 0;
|
|
400
|
+
for (const child of root.children) if (checker(child)) {
|
|
401
|
+
count++;
|
|
402
|
+
if (count >= minCount) return true;
|
|
403
|
+
}
|
|
404
|
+
return false;
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Checks if all nodes in the root are of a specific type
|
|
408
|
+
* @param {RootNode} root - The root node containing children
|
|
409
|
+
* @param {ChildNode["type"]} nodeType - The type of node to check for
|
|
410
|
+
* @returns {boolean} true if all nodes are of the specified type, false otherwise
|
|
411
|
+
*/
|
|
412
|
+
function allNodesAreOfType(root, nodeType) {
|
|
413
|
+
if (root.children.length === 0) return false;
|
|
414
|
+
const checker = NODE_TYPE_CHECKERS[nodeType];
|
|
415
|
+
if (!checker) return false;
|
|
416
|
+
return root.children.every((child) => checker(child));
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Checks if the root contains only comment and empty nodes
|
|
420
|
+
* @param {RootNode} root - The root node containing children
|
|
421
|
+
* @returns {boolean} true if the root contains only comments and empty nodes
|
|
422
|
+
*/
|
|
423
|
+
function isCommentOnlyDocument(root) {
|
|
424
|
+
return root.children.every((child) => isCommentNode(child) || isEmptyNode(child));
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Checks if there's a boundary node within the next N nodes
|
|
428
|
+
* @param {RootNode} root - The root node containing children
|
|
429
|
+
* @param {number} startIndex - The starting index to check from
|
|
430
|
+
* @param {number} lookAhead - Number of nodes to look ahead
|
|
431
|
+
* @returns {boolean} true if a boundary node is found within the range
|
|
432
|
+
*/
|
|
433
|
+
function hasBoundaryWithinRange(root, startIndex, lookAhead) {
|
|
434
|
+
if (startIndex < 0 || lookAhead <= 0) return false;
|
|
435
|
+
const endIndex = Math.min(startIndex + lookAhead, root.children.length);
|
|
436
|
+
for (let i = startIndex; i < endIndex; i++) if (isBoundaryNode(root.children[i])) return true;
|
|
437
|
+
return false;
|
|
438
|
+
}
|
|
439
|
+
function visit(root, callback) {
|
|
440
|
+
if (!root || !root.children) return;
|
|
441
|
+
for (let i = 0; i < root.children.length; i++) {
|
|
442
|
+
const currentNode = root.children[i];
|
|
443
|
+
const nextNode = root.children[i + 1];
|
|
444
|
+
const prevNode = root.children[i - 1];
|
|
445
|
+
if (currentNode == null) throw new Error(`Node at index ${i} is null or undefined`);
|
|
446
|
+
callback({
|
|
447
|
+
settings: null,
|
|
448
|
+
currentNode,
|
|
449
|
+
nextNode,
|
|
450
|
+
prevNode
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
//#endregion
|
|
456
|
+
//#region src/inference/heading-settings.ts
|
|
457
|
+
const HEADING_SETTINGS_CONFIG = [];
|
|
458
|
+
function getHeadingSettings(fileName, version) {
|
|
459
|
+
if (!fileName || !version) return null;
|
|
460
|
+
const entry = HEADING_SETTINGS_CONFIG.find((config) => config.fileName === fileName && config.version === version);
|
|
461
|
+
if (!entry) return null;
|
|
462
|
+
return entry.settings;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
//#endregion
|
|
466
|
+
//#region src/inference/heading.ts
|
|
467
|
+
/**
|
|
468
|
+
* Helper function to check if a node is a comment node or empty comment node
|
|
469
|
+
*/
|
|
470
|
+
function isAnyCommentNode(node) {
|
|
471
|
+
return isCommentNode(node) || isEmptyCommentNode(node) || isBoundaryNode(node);
|
|
472
|
+
}
|
|
473
|
+
function inferHeadingFromAST(root, settings) {
|
|
474
|
+
if (!root || !root.children || root.children.length === 0) return null;
|
|
475
|
+
let heading = null;
|
|
476
|
+
let isInHeading = false;
|
|
477
|
+
let headingEndNodeIndex = -1;
|
|
478
|
+
let shouldStop = false;
|
|
479
|
+
const nodes = root.children;
|
|
480
|
+
const { allowEmptyLines, allowMultipleBoundaries } = defu(settings ?? {}, getHeadingSettings(root.fileName, root.version) ?? {}, {
|
|
481
|
+
allowEmptyLines: true,
|
|
482
|
+
allowMultipleBoundaries: true
|
|
483
|
+
});
|
|
484
|
+
visit(root, (ctx) => {
|
|
485
|
+
const { currentNode, nextNode, prevNode } = ctx;
|
|
486
|
+
if (shouldStop) return;
|
|
487
|
+
const currentIndex = nodes.indexOf(currentNode);
|
|
488
|
+
const value = currentNode.value.trim();
|
|
489
|
+
if (isEOFMarker(currentNode.raw) || nextNode && isEOFMarker(nextNode.raw)) {
|
|
490
|
+
invariant(heading == null, "heading should be null");
|
|
491
|
+
shouldStop = true;
|
|
492
|
+
}
|
|
493
|
+
if (isEOFNode(currentNode)) {
|
|
494
|
+
invariant(heading == null, "heading should be null");
|
|
495
|
+
shouldStop = true;
|
|
496
|
+
}
|
|
497
|
+
if (shouldStop) return;
|
|
498
|
+
if (value.startsWith("@")) {
|
|
499
|
+
if (!(prevNode && isAnyCommentNode(prevNode)) || !nextNode || !isAnyCommentNode(nextNode)) {
|
|
500
|
+
headingEndNodeIndex = currentIndex;
|
|
501
|
+
shouldStop = true;
|
|
502
|
+
}
|
|
503
|
+
} else if (isAnyCommentNode(currentNode)) {
|
|
504
|
+
isInHeading = true;
|
|
505
|
+
if (heading == null) heading = "";
|
|
506
|
+
if (!heading && value === "#") {} else if (value.startsWith("# Property:")) {
|
|
507
|
+
headingEndNodeIndex = currentIndex;
|
|
508
|
+
shouldStop = true;
|
|
509
|
+
} else {
|
|
510
|
+
if (isBoundaryNode(currentNode)) {
|
|
511
|
+
if (!allowMultipleBoundaries) {
|
|
512
|
+
let hasPreviousBoundary = false;
|
|
513
|
+
for (let k = 0; k < currentIndex; k++) if (isBoundaryNode(nodes[k])) {
|
|
514
|
+
hasPreviousBoundary = true;
|
|
515
|
+
break;
|
|
516
|
+
}
|
|
517
|
+
if (hasPreviousBoundary) {
|
|
518
|
+
headingEndNodeIndex = currentIndex;
|
|
519
|
+
shouldStop = true;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
if (!shouldStop) {
|
|
523
|
+
let j = currentIndex + 1;
|
|
524
|
+
let foundDataLine = false;
|
|
525
|
+
while (j < nodes.length && j < currentIndex + 5) {
|
|
526
|
+
const lookAheadNode = nodes[j];
|
|
527
|
+
if (!lookAheadNode || lookAheadNode.value.trim() === "#") {
|
|
528
|
+
j++;
|
|
529
|
+
continue;
|
|
530
|
+
}
|
|
531
|
+
const nextIsBoundary = isBoundaryNode(lookAheadNode);
|
|
532
|
+
const nextIsExample = isAnyCommentNode(lookAheadNode) && nodes[j + 1]?.value.trim().startsWith("@") && nodes[j + 2] && isAnyCommentNode(nodes[j + 2]);
|
|
533
|
+
const nextIsProperty = lookAheadNode.value.trim().startsWith("# Property:");
|
|
534
|
+
if (!nextIsBoundary && !nextIsExample && !nextIsProperty) foundDataLine = true;
|
|
535
|
+
break;
|
|
536
|
+
}
|
|
537
|
+
if (foundDataLine) {
|
|
538
|
+
headingEndNodeIndex = currentIndex + 2;
|
|
539
|
+
shouldStop = true;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
if (!shouldStop) heading = `${heading}${currentNode.raw}\n`;
|
|
544
|
+
}
|
|
545
|
+
} else if (isEmptyNode(currentNode)) {
|
|
546
|
+
if (heading && nextNode && isAnyCommentNode(nextNode)) if (allowEmptyLines) heading = `${heading}${currentNode.raw}\n`;
|
|
547
|
+
else {
|
|
548
|
+
headingEndNodeIndex = currentIndex;
|
|
549
|
+
shouldStop = true;
|
|
550
|
+
}
|
|
551
|
+
else if (isInHeading) if (!allowEmptyLines) {
|
|
552
|
+
headingEndNodeIndex = currentIndex;
|
|
553
|
+
shouldStop = true;
|
|
554
|
+
} else {
|
|
555
|
+
let hasMoreComments = false;
|
|
556
|
+
for (let j = currentIndex + 1; j < nodes.length && j < currentIndex + 5; j++) {
|
|
557
|
+
const lookAheadNode = nodes[j];
|
|
558
|
+
const nextValue = lookAheadNode?.value.trim();
|
|
559
|
+
if (nextValue !== "" && !isAnyCommentNode(lookAheadNode)) {
|
|
560
|
+
headingEndNodeIndex = currentIndex;
|
|
561
|
+
shouldStop = true;
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
if (isAnyCommentNode(lookAheadNode) && nextValue !== "#") {
|
|
565
|
+
if (nextValue?.startsWith("# Property:")) break;
|
|
566
|
+
hasMoreComments = true;
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
if (!shouldStop) if (hasMoreComments) heading = `${heading}${currentNode.raw}\n`;
|
|
571
|
+
else {
|
|
572
|
+
headingEndNodeIndex = currentIndex;
|
|
573
|
+
shouldStop = true;
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
} else if (isInHeading) {
|
|
577
|
+
headingEndNodeIndex = currentIndex;
|
|
578
|
+
shouldStop = true;
|
|
579
|
+
}
|
|
580
|
+
});
|
|
581
|
+
if (headingEndNodeIndex !== -1) {
|
|
582
|
+
let endNodesWithoutEmpty = headingEndNodeIndex;
|
|
583
|
+
if (allowMultipleBoundaries) {
|
|
584
|
+
let lastBoundaryNodeIndex = -1;
|
|
585
|
+
for (let i = 0; i <= endNodesWithoutEmpty; i++) if (isBoundaryNode(nodes[i])) lastBoundaryNodeIndex = i;
|
|
586
|
+
if (lastBoundaryNodeIndex !== -1) endNodesWithoutEmpty = lastBoundaryNodeIndex + 1;
|
|
587
|
+
}
|
|
588
|
+
if (allowEmptyLines) while (endNodesWithoutEmpty > 0) {
|
|
589
|
+
const prevNode = nodes[endNodesWithoutEmpty - 1];
|
|
590
|
+
const prevValue = prevNode?.value.trim();
|
|
591
|
+
if (prevValue !== "" && prevValue !== "#" && !isEmptyCommentNode(prevNode)) break;
|
|
592
|
+
endNodesWithoutEmpty--;
|
|
593
|
+
}
|
|
594
|
+
else while (endNodesWithoutEmpty > 0) {
|
|
595
|
+
const prevNode = nodes[endNodesWithoutEmpty - 1];
|
|
596
|
+
if (!isEmptyCommentNode(prevNode)) break;
|
|
597
|
+
endNodesWithoutEmpty--;
|
|
598
|
+
}
|
|
599
|
+
heading = `${nodes.slice(0, endNodesWithoutEmpty).map((node) => node.raw).join("\n")}\n`;
|
|
600
|
+
}
|
|
601
|
+
return heading;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
//#endregion
|
|
605
|
+
//#region src/datafile/parser.ts
|
|
606
|
+
/**
|
|
607
|
+
* Creates a node object from a single line of a data file.
|
|
608
|
+
*
|
|
609
|
+
* This function analyzes the given line and converts it to the appropriate
|
|
610
|
+
* DataFileChildNode type (Empty, Boundary, Comment, Data, or Unknown)
|
|
611
|
+
* based on the line's content and structure.
|
|
612
|
+
*
|
|
613
|
+
* @param {string} line - The text line to parse into a node
|
|
614
|
+
* @param {number} lineNumber - The line number in the original file (0-based index)
|
|
615
|
+
* @returns {ChildNode} A node object representing the parsed line
|
|
616
|
+
*/
|
|
617
|
+
function createNode(line, lineNumber) {
|
|
618
|
+
const trimmedLine = line.trim();
|
|
619
|
+
if (isEmptyLine(line)) return {
|
|
620
|
+
type: NodeTypes.EMPTY,
|
|
621
|
+
value: "",
|
|
622
|
+
raw: line,
|
|
623
|
+
line: lineNumber
|
|
624
|
+
};
|
|
625
|
+
if (isBoundaryLine(line)) {
|
|
626
|
+
let style;
|
|
627
|
+
try {
|
|
628
|
+
style = getBoundaryLineStyle(line);
|
|
629
|
+
} catch {
|
|
630
|
+
return {
|
|
631
|
+
type: NodeTypes.UNKNOWN,
|
|
632
|
+
value: trimmedLine,
|
|
633
|
+
raw: line,
|
|
634
|
+
line: lineNumber
|
|
635
|
+
};
|
|
636
|
+
}
|
|
637
|
+
return {
|
|
638
|
+
type: NodeTypes.BOUNDARY,
|
|
639
|
+
value: trimmedLine,
|
|
640
|
+
raw: line,
|
|
641
|
+
line: lineNumber,
|
|
642
|
+
style
|
|
643
|
+
};
|
|
644
|
+
}
|
|
645
|
+
if (isEOFMarker(line)) return {
|
|
646
|
+
type: NodeTypes.EOF,
|
|
647
|
+
value: trimmedLine,
|
|
648
|
+
raw: line,
|
|
649
|
+
line: lineNumber
|
|
650
|
+
};
|
|
651
|
+
if (isPropertyLine(line)) return {
|
|
652
|
+
type: NodeTypes.PROPERTY,
|
|
653
|
+
value: trimmedLine,
|
|
654
|
+
raw: line,
|
|
655
|
+
line: lineNumber,
|
|
656
|
+
propertyValue: getPropertyValue(trimmedLine)
|
|
657
|
+
};
|
|
658
|
+
if (isCommentLine(line)) {
|
|
659
|
+
const trimmedComment = trimCommentLine(line);
|
|
660
|
+
return {
|
|
661
|
+
type: trimmedComment === "" ? NodeTypes.EMPTY_COMMENT : NodeTypes.COMMENT,
|
|
662
|
+
value: trimmedComment,
|
|
663
|
+
raw: line,
|
|
664
|
+
line: lineNumber
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
if (isLineWithData(line)) return {
|
|
668
|
+
type: NodeTypes.DATA,
|
|
669
|
+
value: trimmedLine,
|
|
670
|
+
raw: line,
|
|
671
|
+
line: lineNumber
|
|
672
|
+
};
|
|
673
|
+
/* v8 ignore next 7 */
|
|
674
|
+
return {
|
|
675
|
+
type: NodeTypes.UNKNOWN,
|
|
676
|
+
value: trimmedLine,
|
|
677
|
+
raw: line,
|
|
678
|
+
line: lineNumber
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Parses a data file content string into a structured DataFileRootNode object.
|
|
683
|
+
*
|
|
684
|
+
* This function splits the content by line breaks, processes each line into
|
|
685
|
+
* appropriate node types (Empty, Boundary, Comment, Data, or Unknown), and
|
|
686
|
+
* assembles them into a root node with metadata.
|
|
687
|
+
*
|
|
688
|
+
* @param {string} content - The full content of the data file to parse
|
|
689
|
+
* @param {string} [fileName] - Optional explicit file name. If not provided, will be inferred from content
|
|
690
|
+
* @returns {RootNode} A structured representation of the data file
|
|
691
|
+
*/
|
|
692
|
+
function parseDataFileIntoAst(content, fileName) {
|
|
693
|
+
const children = content.split(/\r?\n/).map((line, index) => createNode(line, index));
|
|
694
|
+
return {
|
|
695
|
+
type: NodeTypes.ROOT,
|
|
696
|
+
value: "",
|
|
697
|
+
raw: content,
|
|
698
|
+
line: 0,
|
|
699
|
+
children,
|
|
700
|
+
fileName: fileName ?? inferFileName(content),
|
|
701
|
+
version: inferVersion(content)
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
//#endregion
|
|
706
|
+
//#region src/datafile/sections.ts
|
|
707
|
+
/**
|
|
708
|
+
* Determines whether the given Unicode data file content contains sections.
|
|
709
|
+
*
|
|
710
|
+
* Sections in Unicode data files are typically delimited by special comment
|
|
711
|
+
* patterns and contain related data grouped together.
|
|
712
|
+
*
|
|
713
|
+
* @param {string} content - The Unicode data file content to check
|
|
714
|
+
* @returns {boolean} True if the content contains sections, false otherwise
|
|
715
|
+
*
|
|
716
|
+
* @example
|
|
717
|
+
* ```ts
|
|
718
|
+
* const fileContent = "# Section 1\ndata1\n\n# Section 2\ndata2";
|
|
719
|
+
* const hasFileSections = hasSections(fileContent); // true
|
|
720
|
+
* ```
|
|
721
|
+
*/
|
|
722
|
+
function hasSections(content) {
|
|
723
|
+
if (!content) return false;
|
|
724
|
+
return parseSections(content).size > 0;
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* Parses Unicode data file content into sections.
|
|
728
|
+
*
|
|
729
|
+
* This function divides the file content into logical sections based on comment blocks
|
|
730
|
+
* followed by data lines. Each section consists of a name (the first comment line),
|
|
731
|
+
* a description (subsequent comment lines), and associated data lines.
|
|
732
|
+
*
|
|
733
|
+
* The function handles various formatting patterns found in Unicode data files,
|
|
734
|
+
* including handling of empty lines, consecutive comments, and section boundaries.
|
|
735
|
+
*
|
|
736
|
+
* @param {string} content - The Unicode data file content to parse
|
|
737
|
+
* @returns {Map<string, UCDSectionWithLines>} A map where keys are section names and
|
|
738
|
+
* values are objects containing the
|
|
739
|
+
* section description and associated data lines
|
|
740
|
+
*
|
|
741
|
+
* @example
|
|
742
|
+
* ```ts
|
|
743
|
+
* const content = `# Section 1
|
|
744
|
+
* # Description of section 1
|
|
745
|
+
* data1
|
|
746
|
+
* data2
|
|
747
|
+
*
|
|
748
|
+
* # Section 2
|
|
749
|
+
* # Description of section 2
|
|
750
|
+
* data3
|
|
751
|
+
* data4`;
|
|
752
|
+
*
|
|
753
|
+
* const sections = parseSections(content);
|
|
754
|
+
* // sections will contain two entries:
|
|
755
|
+
* // "Section 1" -> { description: "Description of section 1", lines: ["data1", "data2"] }
|
|
756
|
+
* // "Section 2" -> { description: "Description of section 2", lines: ["data3", "data4"] }
|
|
757
|
+
* ```
|
|
758
|
+
*/
|
|
759
|
+
function parseSections(content) {
|
|
760
|
+
const sections = /* @__PURE__ */ new Map();
|
|
761
|
+
if (!content) return sections;
|
|
762
|
+
const lines = content.split("\n");
|
|
763
|
+
let currentSection = null;
|
|
764
|
+
let currentDescription = "";
|
|
765
|
+
let currentLines = [];
|
|
766
|
+
let pendingComments = [];
|
|
767
|
+
for (let i = 0; i < lines.length; i++) {
|
|
768
|
+
const line = lines[i];
|
|
769
|
+
if (line == null) continue;
|
|
770
|
+
if (isEmptyLine(line)) {
|
|
771
|
+
let nextNonEmptyIsData = false;
|
|
772
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
773
|
+
const lineJ = lines[j];
|
|
774
|
+
if (lineJ == null) continue;
|
|
775
|
+
if (!isEmptyLine(lineJ)) {
|
|
776
|
+
nextNonEmptyIsData = !isCommentLine(lineJ);
|
|
777
|
+
break;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
if (!nextNonEmptyIsData) pendingComments = [];
|
|
781
|
+
continue;
|
|
782
|
+
}
|
|
783
|
+
if (isCommentLine(line)) {
|
|
784
|
+
if (isBoundaryLine(line)) continue;
|
|
785
|
+
pendingComments.push(line.replace(/^#\s*/, ""));
|
|
786
|
+
} else if (pendingComments.length > 0) {
|
|
787
|
+
if (currentSection !== null) sections.set(currentSection, {
|
|
788
|
+
description: currentDescription,
|
|
789
|
+
lines: currentLines
|
|
790
|
+
});
|
|
791
|
+
currentSection = pendingComments[0];
|
|
792
|
+
currentDescription = pendingComments.slice(1).join("\n");
|
|
793
|
+
currentLines = [line];
|
|
794
|
+
pendingComments = [];
|
|
795
|
+
} else if (currentSection !== null) currentLines.push(line);
|
|
796
|
+
}
|
|
797
|
+
if (currentSection !== null) sections.set(currentSection, {
|
|
798
|
+
description: currentDescription,
|
|
799
|
+
lines: currentLines
|
|
800
|
+
});
|
|
801
|
+
return sections;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
//#endregion
|
|
805
|
+
//#region src/datafile/model.ts
|
|
806
|
+
/**
|
|
807
|
+
* Represents a raw Unicode data file with methods to access its content.
|
|
808
|
+
*
|
|
809
|
+
* This class parses and provides access to various components of Unicode data files,
|
|
810
|
+
* including the raw content, individual lines, file metadata (like heading, version),
|
|
811
|
+
* and determines if the file has an EOF marker.
|
|
812
|
+
*
|
|
813
|
+
* @example
|
|
814
|
+
* ```ts
|
|
815
|
+
* // Create a RawDataFile from a string content
|
|
816
|
+
* const content = "# UnicodeData-14.0.0.txt\n# Some Unicode data\n\nU+0020;SPACE\n# EOF";
|
|
817
|
+
* const dataFile = new RawDataFile(content);
|
|
818
|
+
*
|
|
819
|
+
* // Access file properties
|
|
820
|
+
* console.log(dataFile.fileName); // "UnicodeData"
|
|
821
|
+
* console.log(dataFile.version); // "14.0.0"
|
|
822
|
+
* console.log(dataFile.hasEOF); // true
|
|
823
|
+
* console.log(dataFile.heading); // "# UnicodeData-14.0.0.txt\n# Some Unicode data"
|
|
824
|
+
* ```
|
|
825
|
+
*/
|
|
826
|
+
var RawDataFile = class {
|
|
827
|
+
/** The content includes everything */
|
|
828
|
+
rawContent = "";
|
|
829
|
+
/**
|
|
830
|
+
* The content without the heading section.
|
|
831
|
+
*
|
|
832
|
+
* NOTE:
|
|
833
|
+
* If we couldn't find a heading, this will be the same as `rawContent`.
|
|
834
|
+
*/
|
|
835
|
+
content = "";
|
|
836
|
+
/** The lines of the content, will not include the heading */
|
|
837
|
+
lines = [];
|
|
838
|
+
heading = null;
|
|
839
|
+
/**
|
|
840
|
+
* The AST representation of the data file.
|
|
841
|
+
* This is typically used for further processing or analysis of the file structure.
|
|
842
|
+
* If the file is not parsed into an AST, this will be undefined.
|
|
843
|
+
*/
|
|
844
|
+
ast = void 0;
|
|
845
|
+
sections = /* @__PURE__ */ new Map();
|
|
846
|
+
/**
|
|
847
|
+
* The name of the file, if available.
|
|
848
|
+
* This is typically extracted from the first line of the file.
|
|
849
|
+
* It may not always be present, especially if the file is empty or malformed.
|
|
850
|
+
*/
|
|
851
|
+
fileName = void 0;
|
|
852
|
+
/**
|
|
853
|
+
* The version of the file, if available.
|
|
854
|
+
* This is typically extracted from the first line of the file.
|
|
855
|
+
*/
|
|
856
|
+
version = void 0;
|
|
857
|
+
/**
|
|
858
|
+
* Indicates if the file has an EOF marker.
|
|
859
|
+
* This is typically used to indicate the end of the file in Unicode data files.
|
|
860
|
+
*/
|
|
861
|
+
hasEOF = false;
|
|
862
|
+
constructor(content, fileName) {
|
|
863
|
+
if (content == null || content.trim() === "") throw new Error("content is empty");
|
|
864
|
+
this.ast = parseDataFileIntoAst(content, fileName);
|
|
865
|
+
this.rawContent = this.content = content;
|
|
866
|
+
this.heading = inferHeadingFromAST(this.ast);
|
|
867
|
+
if (this.heading != null) this.content = content.replace(this.heading, "").trim();
|
|
868
|
+
this.lines = this.content.split("\n");
|
|
869
|
+
this.fileName = fileName ?? this.ast.fileName;
|
|
870
|
+
this.version = this.ast.version;
|
|
871
|
+
this.hasEOF = isEOFMarker(this.lines.at(-1));
|
|
872
|
+
this.sections = parseSections(this.content);
|
|
873
|
+
}
|
|
874
|
+
};
|
|
875
|
+
|
|
876
|
+
//#endregion
|
|
877
|
+
export { isUnknownNode as _, HEADING_SETTINGS_CONFIG as a, isBoundaryNode as c, isEOFNode as d, isEmptyCommentNode as f, isRootNode as g, isPropertyNode as h, inferHeadingFromAST as i, isCommentNode as l, isNode as m, hasSections as n, getHeadingSettings as o, isEmptyNode as p, parseSections as r, ast_utils_exports as s, RawDataFile as t, isDataNode as u, NodeTypes as v };
|