@unicode-utils/parser 0.12.0-beta.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,877 @@
1
+ import { t as __export } from "./chunk-Bp6m_JJh.js";
2
+ import { _ as trimCommentLine, a as isBoundaryLine, c as isEOFMarker, f as isLineWithData, i as inferVersion, l as isEmptyLine, m as isPropertyLine, n as getPropertyValue, o as isCommentLine, r as inferFileName, t as getBoundaryLineStyle } from "./line-helpers-tsCF16UF.js";
3
+ import { invariant } from "@luxass/utils";
4
+ import defu from "defu";
5
+
6
+ //#region src/datafile/ast.ts
7
+ const NodeTypes = {
8
+ ROOT: "root",
9
+ COMMENT: "comment",
10
+ EMPTY_COMMENT: "empty-comment",
11
+ BOUNDARY: "boundary",
12
+ DATA: "data",
13
+ EMPTY: "empty",
14
+ EOF: "eof",
15
+ PROPERTY: "property",
16
+ UNKNOWN: "unknown"
17
+ };
18
+
19
+ //#endregion
20
+ //#region src/datafile/typeguards.ts
21
+ /**
22
+ * Type guard function that checks if an unknown value is a Node.
23
+ * A Node must be an object with 'type', 'value', 'raw', and 'line' properties of the correct types.
24
+ *
25
+ * @param {unknown} node - The unknown value to check
26
+ * @returns {node is Node} True if the node is a valid Node, false otherwise
27
+ *
28
+ * @example
29
+ * ```typescript
30
+ * import { parseDataFile } from './parser';
31
+ *
32
+ * const parsedData = parseDataFile('# Comment\n0000..007F; Basic Latin');
33
+ * const firstChild = parsedData.children[0];
34
+ *
35
+ * if (isNode(firstChild)) {
36
+ * console.log(`Node type: ${firstChild.type}`);
37
+ * console.log(`Raw content: ${firstChild.raw}`);
38
+ * console.log(`Line number: ${firstChild.line}`);
39
+ * }
40
+ * ```
41
+ */
42
+ function isNode(node) {
43
+ return typeof node === "object" && node !== null && "type" in node && typeof node.type === "string" && "raw" in node && typeof node.raw === "string" && "line" in node && typeof node.line === "number" && "value" in node && typeof node.value === "string";
44
+ }
45
+ /**
46
+ * Type guard function that checks if an unknown value is a CommentNode.
47
+ * A CommentNode must be a valid Node with the type property set to "comment".
48
+ *
49
+ * @param {unknown} node - The unknown value to check
50
+ * @returns {node is CommentNode} True if the node is a valid CommentNode, false otherwise
51
+ *
52
+ * @example
53
+ * ```typescript
54
+ * import { parseDataFile } from './parser';
55
+ *
56
+ * const parsedData = parseDataFile('# This is a comment\n0000; NULL');
57
+ * const commentNode = parsedData.children[0];
58
+ *
59
+ * if (isCommentNode(commentNode)) {
60
+ * console.log(`Comment content: ${commentNode.value}`); // "This is a comment"
61
+ * console.log(`Raw line: ${commentNode.raw}`); // "# This is a comment"
62
+ * }
63
+ * ```
64
+ */
65
+ function isCommentNode(node) {
66
+ return isNode(node) && node.type === "comment";
67
+ }
68
+ /**
69
+ * Type guard function that checks if an unknown value is an EmptyCommentNode.
70
+ * An EmptyCommentNode must be a valid Node with the type property set to "empty-comment".
71
+ *
72
+ * @param {unknown} node - The unknown value to check
73
+ * @returns {node is EmptyCommentNode} True if the node is a valid EmptyCommentNode, false otherwise
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * import { parseDataFile } from './parser';
78
+ *
79
+ * const parsedData = parseDataFile('#\n0000; NULL');
80
+ * const emptyCommentNode = parsedData.children[0];
81
+ *
82
+ * if (isEmptyCommentNode(emptyCommentNode)) {
83
+ * console.log(`Empty comment raw: ${emptyCommentNode.raw}`); // "#"
84
+ * console.log(`Empty comment value: "${emptyCommentNode.value}"`); // ""
85
+ * }
86
+ * ```
87
+ */
88
+ function isEmptyCommentNode(node) {
89
+ return isNode(node) && node.type === "empty-comment";
90
+ }
91
+ /**
92
+ * Type guard function that checks if an unknown value is a BoundaryNode.
93
+ * A BoundaryNode must be a valid Node with the type property set to "boundary".
94
+ *
95
+ * @param {unknown} node - The unknown value to check
96
+ * @returns {node is BoundaryNode} True if the node is a valid BoundaryNode, false otherwise
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * import { parseDataFile } from './parser';
101
+ *
102
+ * const parsedData = parseDataFile('# ================================================\n0000; NULL');
103
+ * const boundaryNode = parsedData.children[0];
104
+ *
105
+ * if (isBoundaryNode(boundaryNode)) {
106
+ * console.log(`Boundary style: ${boundaryNode.style}`); // "equals"
107
+ * console.log(`Boundary raw: ${boundaryNode.raw}`); // "# ================================================"
108
+ * }
109
+ * ```
110
+ */
111
+ function isBoundaryNode(node) {
112
+ return isNode(node) && node.type === "boundary";
113
+ }
114
+ /**
115
+ * Type guard function that checks if an unknown value is a DataNode.
116
+ * A DataNode must be a valid Node with the type property set to "data".
117
+ *
118
+ * @param {unknown} node - The unknown value to check
119
+ * @returns {node is DataNode} True if the node is a valid DataNode, false otherwise
120
+ *
121
+ * @example
122
+ * ```typescript
123
+ * import { parseDataFile } from './parser';
124
+ *
125
+ * const parsedData = parseDataFile('0000..007F ; Basic Latin # [128] <control-0000>..<control-007F>');
126
+ * const dataNode = parsedData.children[0];
127
+ *
128
+ * if (isDataNode(dataNode)) {
129
+ * console.log(`Data value: ${dataNode.value}`); // "0000..007F ; Basic Latin # [128] <control-0000>..<control-007F>"
130
+ * console.log(`Raw content: ${dataNode.raw}`); // Same as value for data nodes
131
+ * console.log(`Line number: ${dataNode.line}`); // 1
132
+ * }
133
+ * ```
134
+ */
135
+ function isDataNode(node) {
136
+ return isNode(node) && node.type === "data";
137
+ }
138
+ /**
139
+ * Type guard function that checks if an unknown value is an EmptyNode.
140
+ * An EmptyNode must be a valid Node with the type property set to "empty".
141
+ *
142
+ * @param {unknown} node - The unknown value to check
143
+ * @returns {node is EmptyNode} True if the node is a valid EmptyNode, false otherwise
144
+ *
145
+ * @example
146
+ * ```typescript
147
+ * import { parseDataFile } from './parser';
148
+ *
149
+ * const parsedData = parseDataFile('# Comment\n\n0000; NULL');
150
+ * const emptyNode = parsedData.children[1]; // The blank line
151
+ *
152
+ * if (isEmptyNode(emptyNode)) {
153
+ * console.log(`Empty node raw: "${emptyNode.raw}"`); // ""
154
+ * console.log(`Empty node value: "${emptyNode.value}"`); // ""
155
+ * console.log(`Line number: ${emptyNode.line}`); // 2
156
+ * }
157
+ * ```
158
+ */
159
+ function isEmptyNode(node) {
160
+ return isNode(node) && node.type === "empty";
161
+ }
162
+ /**
163
+ * Type guard function that checks if an unknown value is a RootNode.
164
+ * A RootNode must be a valid Node with the type property set to "root".
165
+ *
166
+ * @param {unknown} node - The unknown value to check
167
+ * @returns {node is RootNode} True if the node is a valid RootNode, false otherwise
168
+ *
169
+ * @example
170
+ * ```typescript
171
+ * import { parseDataFile } from './parser';
172
+ *
173
+ * const parsedData = parseDataFile('# Unicode Block Data\n0000..007F; Basic Latin');
174
+ *
175
+ * if (isRootNode(parsedData)) {
176
+ * console.log(`Root has ${parsedData.children.length} children`); // 2
177
+ * console.log(`File name: ${parsedData.fileName}`); // May be undefined
178
+ * console.log(`Version: ${parsedData.version}`); // May be undefined
179
+ * }
180
+ * ```
181
+ */
182
+ function isRootNode(node) {
183
+ return isNode(node) && node.type === "root";
184
+ }
185
+ /**
186
+ * Type guard function that checks if an unknown value is an UnknownNode.
187
+ * An UnknownNode must be a valid Node with the type property set to "unknown".
188
+ *
189
+ * @param {unknown} node - The unknown value to check
190
+ * @returns {node is UnknownNode} True if the node is a valid UnknownNode, false otherwise
191
+ *
192
+ * @example
193
+ * ```typescript
194
+ * import { parseDataFile } from './parser';
195
+ *
196
+ * // Assuming some unusual content that doesn't match known patterns
197
+ * const parsedData = parseDataFile('@@UNUSUAL_SYNTAX@@\n0000; NULL');
198
+ * const unknownNode = parsedData.children[0];
199
+ *
200
+ * if (isUnknownNode(unknownNode)) {
201
+ * console.log(`Unknown node raw: ${unknownNode.raw}`); // "@@UNUSUAL_SYNTAX@@"
202
+ * console.log(`Unknown node value: ${unknownNode.value}`); // "@@UNUSUAL_SYNTAX@@"
203
+ * console.log(`Line number: ${unknownNode.line}`); // 1
204
+ * }
205
+ * ```
206
+ */
207
+ function isUnknownNode(node) {
208
+ return isNode(node) && node.type === "unknown";
209
+ }
210
+ /**
211
+ * Type guard function that checks if an unknown value is an EOFNode.
212
+ * An EOFNode must be a valid Node with the type property set to "eof".
213
+ *
214
+ * @param {unknown} node - The unknown value to check
215
+ * @returns {node is EOFNode} True if the node is a valid EOFNode, false otherwise
216
+ *
217
+ * @example
218
+ * ```typescript
219
+ * import { parseDataFile } from './parser';
220
+ *
221
+ * const parsedData = parseDataFile('0000; NULL');
222
+ * const lastNode = parsedData.children[parsedData.children.length - 1];
223
+ *
224
+ * if (isEOFNode(lastNode)) {
225
+ * console.log(`EOF node detected at line: ${lastNode.line}`);
226
+ * console.log(`EOF raw value: ${lastNode.raw}`); // Empty string
227
+ * }
228
+ * ```
229
+ */
230
+ function isEOFNode(node) {
231
+ return isNode(node) && node.type === "eof";
232
+ }
233
+ /**
234
+ * Type guard function that checks if an unknown value is a PropertyNode.
235
+ * A PropertyNode must be a valid Node with the type property set to "property" and have a defined propertyValue.
236
+ *
237
+ * @param {unknown} node - The unknown value to check
238
+ * @returns {node is PropertyNode} True if the node is a valid PropertyNode, false otherwise
239
+ *
240
+ * @example
241
+ * ```typescript
242
+ * import { parseDataFile } from './parser';
243
+ *
244
+ * const parsedData = parseDataFile('# @key=value\n0000; NULL');
245
+ * const propertyNode = parsedData.children[0];
246
+ *
247
+ * if (isPropertyNode(propertyNode)) {
248
+ * console.log(`Property key: ${propertyNode.propertyKey}`); // "key"
249
+ * console.log(`Property value: ${propertyNode.propertyValue}`); // "value"
250
+ * console.log(`Raw content: ${propertyNode.raw}`); // "# @key=value"
251
+ * }
252
+ * ```
253
+ */
254
+ function isPropertyNode(node) {
255
+ return isNode(node) && node.type === "property" && node.propertyValue !== void 0;
256
+ }
257
+
258
+ //#endregion
259
+ //#region src/datafile/ast-utils.ts
260
+ var ast_utils_exports = /* @__PURE__ */ __export({
261
+ allNodesAreOfType: () => allNodesAreOfType,
262
+ endsWithSequence: () => endsWithSequence,
263
+ findNodePattern: () => findNodePattern,
264
+ hasBoundaryWithinRange: () => hasBoundaryWithinRange,
265
+ hasConsecutiveNodesOfType: () => hasConsecutiveNodesOfType,
266
+ hasMinNodesOfType: () => hasMinNodesOfType,
267
+ hasNextNComments: () => hasNextNComments,
268
+ hasNextNCommentsFrom: () => hasNextNCommentsFrom,
269
+ hasNodePattern: () => hasNodePattern,
270
+ hasPrevNCommentsFrom: () => hasPrevNCommentsFrom,
271
+ isCommentOnlyDocument: () => isCommentOnlyDocument,
272
+ startsWithSequence: () => startsWithSequence,
273
+ visit: () => visit
274
+ });
275
+ const NODE_TYPE_CHECKERS = {
276
+ "comment": isCommentNode,
277
+ "empty-comment": isEmptyCommentNode,
278
+ "boundary": isBoundaryNode,
279
+ "data": isDataNode,
280
+ "empty": isEmptyNode,
281
+ "unknown": isUnknownNode,
282
+ "eof": isEOFNode,
283
+ "property": isPropertyNode
284
+ };
285
+ /**
286
+ * Checks if the next N nodes from a given index are all comment nodes
287
+ * @param {RootNode} root - The root node containing children
288
+ * @param {number} startIndex - The starting index to check from
289
+ * @param {number} count - Number of nodes to check
290
+ * @returns {boolean} true if the next N nodes are all comment nodes, false otherwise
291
+ */
292
+ function hasNextNCommentsFrom(root, startIndex, count) {
293
+ if (startIndex < 0 || count <= 0) return false;
294
+ if (startIndex + count > root.children.length) return false;
295
+ for (let i = startIndex; i < startIndex + count; i++) if (!isCommentNode(root.children[i])) return false;
296
+ return true;
297
+ }
298
+ /**
299
+ * Checks if the next N nodes from the current node are all comment nodes
300
+ * @param {RootNode} root - The root node containing children
301
+ * @param {ChildNode} currentNode - The current node to find in the children array
302
+ * @param {number} count - Number of nodes to check after the current node
303
+ * @returns {boolean} true if the next N nodes are all comment nodes, false otherwise
304
+ */
305
+ function hasNextNComments(root, currentNode, count) {
306
+ const currentIndex = root.children.indexOf(currentNode);
307
+ if (currentIndex === -1) return false;
308
+ return hasNextNCommentsFrom(root, currentIndex + 1, count);
309
+ }
310
+ /**
311
+ * Checks if the previous N nodes from a given index are all comment nodes
312
+ * @param {RootNode} root - The root node containing children
313
+ * @param {number} startIndex - The starting index to check backwards from
314
+ * @param {number} count - Number of nodes to check backwards
315
+ * @returns {boolean} true if the previous N nodes are all comment nodes, false otherwise
316
+ */
317
+ function hasPrevNCommentsFrom(root, startIndex, count) {
318
+ if (startIndex >= root.children.length || count <= 0) return false;
319
+ if (startIndex - count + 1 < 0) return false;
320
+ for (let i = startIndex - count + 1; i <= startIndex; i++) if (!isCommentNode(root.children[i])) return false;
321
+ return true;
322
+ }
323
+ /**
324
+ * Checks if there are N consecutive nodes of a specific type starting from an index
325
+ * @param {RootNode} root - The root node containing children
326
+ * @param {number} startIndex - The starting index to check from
327
+ * @param {number} count - Number of consecutive nodes to check
328
+ * @param {ChildNode["type"]} nodeType - The type of node to check for ('comment', 'data', 'boundary', 'empty', 'unknown')
329
+ * @returns {boolean} true if there are N consecutive nodes of the specified type, false otherwise
330
+ */
331
+ function hasConsecutiveNodesOfType(root, startIndex, count, nodeType) {
332
+ if (startIndex < 0 || count <= 0) return false;
333
+ if (startIndex + count > root.children.length) return false;
334
+ const checker = NODE_TYPE_CHECKERS[nodeType];
335
+ if (!checker) return false;
336
+ for (let i = startIndex; i < startIndex + count; i++) if (!checker(root.children[i])) return false;
337
+ return true;
338
+ }
339
+ /**
340
+ * Checks if the root contains a specific pattern of node types
341
+ * @param {RootNode} root - The root node containing children
342
+ * @param {ChildNode["type"][]} pattern - Array of node types that should appear consecutively
343
+ * @param {number} [startIndex] - Optional starting index to check from
344
+ * @returns {boolean} true if the pattern is found, false otherwise
345
+ */
346
+ function hasNodePattern(root, pattern, startIndex = 0) {
347
+ if (pattern.length === 0) return true;
348
+ if (startIndex < 0 || startIndex + pattern.length > root.children.length) return false;
349
+ for (let i = 0; i < pattern.length; i++) {
350
+ const nodeIndex = startIndex + i;
351
+ const expectedType = pattern[i];
352
+ if (expectedType == null) throw new Error(`Invalid node type at index ${i} in pattern: ${JSON.stringify(pattern)}`);
353
+ const checker = NODE_TYPE_CHECKERS[expectedType];
354
+ if (!checker || !checker(root.children[nodeIndex])) return false;
355
+ }
356
+ return true;
357
+ }
358
+ /**
359
+ * Finds the first occurrence of a node pattern in the root's children
360
+ * @param {RootNode} root - The root node containing children
361
+ * @param {ChildNode["type"][]} pattern - Array of node types to search for
362
+ * @returns {number} The index of the first occurrence, or -1 if not found
363
+ */
364
+ function findNodePattern(root, pattern) {
365
+ if (pattern.length === 0) return 0;
366
+ for (let i = 0; i <= root.children.length - pattern.length; i++) if (hasNodePattern(root, pattern, i)) return i;
367
+ return -1;
368
+ }
369
+ /**
370
+ * Checks if the root starts with a specific sequence of node types
371
+ * @param {RootNode} root - The root node containing children
372
+ * @param {ChildNode["type"][]} sequence - Array of node types that should appear at the beginning
373
+ * @returns {boolean} true if the root starts with the sequence, false otherwise
374
+ */
375
+ function startsWithSequence(root, sequence) {
376
+ return hasNodePattern(root, sequence, 0);
377
+ }
378
+ /**
379
+ * Checks if the root ends with a specific sequence of node types
380
+ * @param {RootNode} root - The root node containing children
381
+ * @param {ChildNode["type"][]} sequence - Array of node types that should appear at the end
382
+ * @returns {boolean} true if the root ends with the sequence, false otherwise
383
+ */
384
+ function endsWithSequence(root, sequence) {
385
+ if (sequence.length === 0) return true;
386
+ if (sequence.length > root.children.length) return false;
387
+ return hasNodePattern(root, sequence, root.children.length - sequence.length);
388
+ }
389
+ /**
390
+ * Checks if there are at least N nodes of a specific type in the root
391
+ * @param {RootNode} root - The root node containing children
392
+ * @param {ChildNode["type"]} nodeType - The type of node to count
393
+ * @param {number} minCount - Minimum number of nodes required
394
+ * @returns {boolean} true if there are at least minCount nodes of the specified type
395
+ */
396
+ function hasMinNodesOfType(root, nodeType, minCount) {
397
+ const checker = NODE_TYPE_CHECKERS[nodeType];
398
+ if (!checker) return false;
399
+ let count = 0;
400
+ for (const child of root.children) if (checker(child)) {
401
+ count++;
402
+ if (count >= minCount) return true;
403
+ }
404
+ return false;
405
+ }
406
+ /**
407
+ * Checks if all nodes in the root are of a specific type
408
+ * @param {RootNode} root - The root node containing children
409
+ * @param {ChildNode["type"]} nodeType - The type of node to check for
410
+ * @returns {boolean} true if all nodes are of the specified type, false otherwise
411
+ */
412
+ function allNodesAreOfType(root, nodeType) {
413
+ if (root.children.length === 0) return false;
414
+ const checker = NODE_TYPE_CHECKERS[nodeType];
415
+ if (!checker) return false;
416
+ return root.children.every((child) => checker(child));
417
+ }
418
+ /**
419
+ * Checks if the root contains only comment and empty nodes
420
+ * @param {RootNode} root - The root node containing children
421
+ * @returns {boolean} true if the root contains only comments and empty nodes
422
+ */
423
+ function isCommentOnlyDocument(root) {
424
+ return root.children.every((child) => isCommentNode(child) || isEmptyNode(child));
425
+ }
426
+ /**
427
+ * Checks if there's a boundary node within the next N nodes
428
+ * @param {RootNode} root - The root node containing children
429
+ * @param {number} startIndex - The starting index to check from
430
+ * @param {number} lookAhead - Number of nodes to look ahead
431
+ * @returns {boolean} true if a boundary node is found within the range
432
+ */
433
+ function hasBoundaryWithinRange(root, startIndex, lookAhead) {
434
+ if (startIndex < 0 || lookAhead <= 0) return false;
435
+ const endIndex = Math.min(startIndex + lookAhead, root.children.length);
436
+ for (let i = startIndex; i < endIndex; i++) if (isBoundaryNode(root.children[i])) return true;
437
+ return false;
438
+ }
439
+ function visit(root, callback) {
440
+ if (!root || !root.children) return;
441
+ for (let i = 0; i < root.children.length; i++) {
442
+ const currentNode = root.children[i];
443
+ const nextNode = root.children[i + 1];
444
+ const prevNode = root.children[i - 1];
445
+ if (currentNode == null) throw new Error(`Node at index ${i} is null or undefined`);
446
+ callback({
447
+ settings: null,
448
+ currentNode,
449
+ nextNode,
450
+ prevNode
451
+ });
452
+ }
453
+ }
454
+
455
+ //#endregion
456
+ //#region src/inference/heading-settings.ts
457
+ const HEADING_SETTINGS_CONFIG = [];
458
+ function getHeadingSettings(fileName, version) {
459
+ if (!fileName || !version) return null;
460
+ const entry = HEADING_SETTINGS_CONFIG.find((config) => config.fileName === fileName && config.version === version);
461
+ if (!entry) return null;
462
+ return entry.settings;
463
+ }
464
+
465
+ //#endregion
466
+ //#region src/inference/heading.ts
467
+ /**
468
+ * Helper function to check if a node is a comment node or empty comment node
469
+ */
470
+ function isAnyCommentNode(node) {
471
+ return isCommentNode(node) || isEmptyCommentNode(node) || isBoundaryNode(node);
472
+ }
473
+ function inferHeadingFromAST(root, settings) {
474
+ if (!root || !root.children || root.children.length === 0) return null;
475
+ let heading = null;
476
+ let isInHeading = false;
477
+ let headingEndNodeIndex = -1;
478
+ let shouldStop = false;
479
+ const nodes = root.children;
480
+ const { allowEmptyLines, allowMultipleBoundaries } = defu(settings ?? {}, getHeadingSettings(root.fileName, root.version) ?? {}, {
481
+ allowEmptyLines: true,
482
+ allowMultipleBoundaries: true
483
+ });
484
+ visit(root, (ctx) => {
485
+ const { currentNode, nextNode, prevNode } = ctx;
486
+ if (shouldStop) return;
487
+ const currentIndex = nodes.indexOf(currentNode);
488
+ const value = currentNode.value.trim();
489
+ if (isEOFMarker(currentNode.raw) || nextNode && isEOFMarker(nextNode.raw)) {
490
+ invariant(heading == null, "heading should be null");
491
+ shouldStop = true;
492
+ }
493
+ if (isEOFNode(currentNode)) {
494
+ invariant(heading == null, "heading should be null");
495
+ shouldStop = true;
496
+ }
497
+ if (shouldStop) return;
498
+ if (value.startsWith("@")) {
499
+ if (!(prevNode && isAnyCommentNode(prevNode)) || !nextNode || !isAnyCommentNode(nextNode)) {
500
+ headingEndNodeIndex = currentIndex;
501
+ shouldStop = true;
502
+ }
503
+ } else if (isAnyCommentNode(currentNode)) {
504
+ isInHeading = true;
505
+ if (heading == null) heading = "";
506
+ if (!heading && value === "#") {} else if (value.startsWith("# Property:")) {
507
+ headingEndNodeIndex = currentIndex;
508
+ shouldStop = true;
509
+ } else {
510
+ if (isBoundaryNode(currentNode)) {
511
+ if (!allowMultipleBoundaries) {
512
+ let hasPreviousBoundary = false;
513
+ for (let k = 0; k < currentIndex; k++) if (isBoundaryNode(nodes[k])) {
514
+ hasPreviousBoundary = true;
515
+ break;
516
+ }
517
+ if (hasPreviousBoundary) {
518
+ headingEndNodeIndex = currentIndex;
519
+ shouldStop = true;
520
+ }
521
+ }
522
+ if (!shouldStop) {
523
+ let j = currentIndex + 1;
524
+ let foundDataLine = false;
525
+ while (j < nodes.length && j < currentIndex + 5) {
526
+ const lookAheadNode = nodes[j];
527
+ if (!lookAheadNode || lookAheadNode.value.trim() === "#") {
528
+ j++;
529
+ continue;
530
+ }
531
+ const nextIsBoundary = isBoundaryNode(lookAheadNode);
532
+ const nextIsExample = isAnyCommentNode(lookAheadNode) && nodes[j + 1]?.value.trim().startsWith("@") && nodes[j + 2] && isAnyCommentNode(nodes[j + 2]);
533
+ const nextIsProperty = lookAheadNode.value.trim().startsWith("# Property:");
534
+ if (!nextIsBoundary && !nextIsExample && !nextIsProperty) foundDataLine = true;
535
+ break;
536
+ }
537
+ if (foundDataLine) {
538
+ headingEndNodeIndex = currentIndex + 2;
539
+ shouldStop = true;
540
+ }
541
+ }
542
+ }
543
+ if (!shouldStop) heading = `${heading}${currentNode.raw}\n`;
544
+ }
545
+ } else if (isEmptyNode(currentNode)) {
546
+ if (heading && nextNode && isAnyCommentNode(nextNode)) if (allowEmptyLines) heading = `${heading}${currentNode.raw}\n`;
547
+ else {
548
+ headingEndNodeIndex = currentIndex;
549
+ shouldStop = true;
550
+ }
551
+ else if (isInHeading) if (!allowEmptyLines) {
552
+ headingEndNodeIndex = currentIndex;
553
+ shouldStop = true;
554
+ } else {
555
+ let hasMoreComments = false;
556
+ for (let j = currentIndex + 1; j < nodes.length && j < currentIndex + 5; j++) {
557
+ const lookAheadNode = nodes[j];
558
+ const nextValue = lookAheadNode?.value.trim();
559
+ if (nextValue !== "" && !isAnyCommentNode(lookAheadNode)) {
560
+ headingEndNodeIndex = currentIndex;
561
+ shouldStop = true;
562
+ break;
563
+ }
564
+ if (isAnyCommentNode(lookAheadNode) && nextValue !== "#") {
565
+ if (nextValue?.startsWith("# Property:")) break;
566
+ hasMoreComments = true;
567
+ break;
568
+ }
569
+ }
570
+ if (!shouldStop) if (hasMoreComments) heading = `${heading}${currentNode.raw}\n`;
571
+ else {
572
+ headingEndNodeIndex = currentIndex;
573
+ shouldStop = true;
574
+ }
575
+ }
576
+ } else if (isInHeading) {
577
+ headingEndNodeIndex = currentIndex;
578
+ shouldStop = true;
579
+ }
580
+ });
581
+ if (headingEndNodeIndex !== -1) {
582
+ let endNodesWithoutEmpty = headingEndNodeIndex;
583
+ if (allowMultipleBoundaries) {
584
+ let lastBoundaryNodeIndex = -1;
585
+ for (let i = 0; i <= endNodesWithoutEmpty; i++) if (isBoundaryNode(nodes[i])) lastBoundaryNodeIndex = i;
586
+ if (lastBoundaryNodeIndex !== -1) endNodesWithoutEmpty = lastBoundaryNodeIndex + 1;
587
+ }
588
+ if (allowEmptyLines) while (endNodesWithoutEmpty > 0) {
589
+ const prevNode = nodes[endNodesWithoutEmpty - 1];
590
+ const prevValue = prevNode?.value.trim();
591
+ if (prevValue !== "" && prevValue !== "#" && !isEmptyCommentNode(prevNode)) break;
592
+ endNodesWithoutEmpty--;
593
+ }
594
+ else while (endNodesWithoutEmpty > 0) {
595
+ const prevNode = nodes[endNodesWithoutEmpty - 1];
596
+ if (!isEmptyCommentNode(prevNode)) break;
597
+ endNodesWithoutEmpty--;
598
+ }
599
+ heading = `${nodes.slice(0, endNodesWithoutEmpty).map((node) => node.raw).join("\n")}\n`;
600
+ }
601
+ return heading;
602
+ }
603
+
604
+ //#endregion
605
+ //#region src/datafile/parser.ts
606
+ /**
607
+ * Creates a node object from a single line of a data file.
608
+ *
609
+ * This function analyzes the given line and converts it to the appropriate
610
+ * DataFileChildNode type (Empty, Boundary, Comment, Data, or Unknown)
611
+ * based on the line's content and structure.
612
+ *
613
+ * @param {string} line - The text line to parse into a node
614
+ * @param {number} lineNumber - The line number in the original file (0-based index)
615
+ * @returns {ChildNode} A node object representing the parsed line
616
+ */
617
+ function createNode(line, lineNumber) {
618
+ const trimmedLine = line.trim();
619
+ if (isEmptyLine(line)) return {
620
+ type: NodeTypes.EMPTY,
621
+ value: "",
622
+ raw: line,
623
+ line: lineNumber
624
+ };
625
+ if (isBoundaryLine(line)) {
626
+ let style;
627
+ try {
628
+ style = getBoundaryLineStyle(line);
629
+ } catch {
630
+ return {
631
+ type: NodeTypes.UNKNOWN,
632
+ value: trimmedLine,
633
+ raw: line,
634
+ line: lineNumber
635
+ };
636
+ }
637
+ return {
638
+ type: NodeTypes.BOUNDARY,
639
+ value: trimmedLine,
640
+ raw: line,
641
+ line: lineNumber,
642
+ style
643
+ };
644
+ }
645
+ if (isEOFMarker(line)) return {
646
+ type: NodeTypes.EOF,
647
+ value: trimmedLine,
648
+ raw: line,
649
+ line: lineNumber
650
+ };
651
+ if (isPropertyLine(line)) return {
652
+ type: NodeTypes.PROPERTY,
653
+ value: trimmedLine,
654
+ raw: line,
655
+ line: lineNumber,
656
+ propertyValue: getPropertyValue(trimmedLine)
657
+ };
658
+ if (isCommentLine(line)) {
659
+ const trimmedComment = trimCommentLine(line);
660
+ return {
661
+ type: trimmedComment === "" ? NodeTypes.EMPTY_COMMENT : NodeTypes.COMMENT,
662
+ value: trimmedComment,
663
+ raw: line,
664
+ line: lineNumber
665
+ };
666
+ }
667
+ if (isLineWithData(line)) return {
668
+ type: NodeTypes.DATA,
669
+ value: trimmedLine,
670
+ raw: line,
671
+ line: lineNumber
672
+ };
673
+ /* v8 ignore next 7 */
674
+ return {
675
+ type: NodeTypes.UNKNOWN,
676
+ value: trimmedLine,
677
+ raw: line,
678
+ line: lineNumber
679
+ };
680
+ }
681
+ /**
682
+ * Parses a data file content string into a structured DataFileRootNode object.
683
+ *
684
+ * This function splits the content by line breaks, processes each line into
685
+ * appropriate node types (Empty, Boundary, Comment, Data, or Unknown), and
686
+ * assembles them into a root node with metadata.
687
+ *
688
+ * @param {string} content - The full content of the data file to parse
689
+ * @param {string} [fileName] - Optional explicit file name. If not provided, will be inferred from content
690
+ * @returns {RootNode} A structured representation of the data file
691
+ */
692
+ function parseDataFileIntoAst(content, fileName) {
693
+ const children = content.split(/\r?\n/).map((line, index) => createNode(line, index));
694
+ return {
695
+ type: NodeTypes.ROOT,
696
+ value: "",
697
+ raw: content,
698
+ line: 0,
699
+ children,
700
+ fileName: fileName ?? inferFileName(content),
701
+ version: inferVersion(content)
702
+ };
703
+ }
704
+
705
+ //#endregion
706
+ //#region src/datafile/sections.ts
707
+ /**
708
+ * Determines whether the given Unicode data file content contains sections.
709
+ *
710
+ * Sections in Unicode data files are typically delimited by special comment
711
+ * patterns and contain related data grouped together.
712
+ *
713
+ * @param {string} content - The Unicode data file content to check
714
+ * @returns {boolean} True if the content contains sections, false otherwise
715
+ *
716
+ * @example
717
+ * ```ts
718
+ * const fileContent = "# Section 1\ndata1\n\n# Section 2\ndata2";
719
+ * const hasFileSections = hasSections(fileContent); // true
720
+ * ```
721
+ */
722
+ function hasSections(content) {
723
+ if (!content) return false;
724
+ return parseSections(content).size > 0;
725
+ }
726
+ /**
727
+ * Parses Unicode data file content into sections.
728
+ *
729
+ * This function divides the file content into logical sections based on comment blocks
730
+ * followed by data lines. Each section consists of a name (the first comment line),
731
+ * a description (subsequent comment lines), and associated data lines.
732
+ *
733
+ * The function handles various formatting patterns found in Unicode data files,
734
+ * including handling of empty lines, consecutive comments, and section boundaries.
735
+ *
736
+ * @param {string} content - The Unicode data file content to parse
737
+ * @returns {Map<string, UCDSectionWithLines>} A map where keys are section names and
738
+ * values are objects containing the
739
+ * section description and associated data lines
740
+ *
741
+ * @example
742
+ * ```ts
743
+ * const content = `# Section 1
744
+ * # Description of section 1
745
+ * data1
746
+ * data2
747
+ *
748
+ * # Section 2
749
+ * # Description of section 2
750
+ * data3
751
+ * data4`;
752
+ *
753
+ * const sections = parseSections(content);
754
+ * // sections will contain two entries:
755
+ * // "Section 1" -> { description: "Description of section 1", lines: ["data1", "data2"] }
756
+ * // "Section 2" -> { description: "Description of section 2", lines: ["data3", "data4"] }
757
+ * ```
758
+ */
759
+ function parseSections(content) {
760
+ const sections = /* @__PURE__ */ new Map();
761
+ if (!content) return sections;
762
+ const lines = content.split("\n");
763
+ let currentSection = null;
764
+ let currentDescription = "";
765
+ let currentLines = [];
766
+ let pendingComments = [];
767
+ for (let i = 0; i < lines.length; i++) {
768
+ const line = lines[i];
769
+ if (line == null) continue;
770
+ if (isEmptyLine(line)) {
771
+ let nextNonEmptyIsData = false;
772
+ for (let j = i + 1; j < lines.length; j++) {
773
+ const lineJ = lines[j];
774
+ if (lineJ == null) continue;
775
+ if (!isEmptyLine(lineJ)) {
776
+ nextNonEmptyIsData = !isCommentLine(lineJ);
777
+ break;
778
+ }
779
+ }
780
+ if (!nextNonEmptyIsData) pendingComments = [];
781
+ continue;
782
+ }
783
+ if (isCommentLine(line)) {
784
+ if (isBoundaryLine(line)) continue;
785
+ pendingComments.push(line.replace(/^#\s*/, ""));
786
+ } else if (pendingComments.length > 0) {
787
+ if (currentSection !== null) sections.set(currentSection, {
788
+ description: currentDescription,
789
+ lines: currentLines
790
+ });
791
+ currentSection = pendingComments[0];
792
+ currentDescription = pendingComments.slice(1).join("\n");
793
+ currentLines = [line];
794
+ pendingComments = [];
795
+ } else if (currentSection !== null) currentLines.push(line);
796
+ }
797
+ if (currentSection !== null) sections.set(currentSection, {
798
+ description: currentDescription,
799
+ lines: currentLines
800
+ });
801
+ return sections;
802
+ }
803
+
804
+ //#endregion
805
+ //#region src/datafile/model.ts
806
+ /**
807
+ * Represents a raw Unicode data file with methods to access its content.
808
+ *
809
+ * This class parses and provides access to various components of Unicode data files,
810
+ * including the raw content, individual lines, file metadata (like heading, version),
811
+ * and determines if the file has an EOF marker.
812
+ *
813
+ * @example
814
+ * ```ts
815
+ * // Create a RawDataFile from a string content
816
+ * const content = "# UnicodeData-14.0.0.txt\n# Some Unicode data\n\nU+0020;SPACE\n# EOF";
817
+ * const dataFile = new RawDataFile(content);
818
+ *
819
+ * // Access file properties
820
+ * console.log(dataFile.fileName); // "UnicodeData"
821
+ * console.log(dataFile.version); // "14.0.0"
822
+ * console.log(dataFile.hasEOF); // true
823
+ * console.log(dataFile.heading); // "# UnicodeData-14.0.0.txt\n# Some Unicode data"
824
+ * ```
825
+ */
826
+ var RawDataFile = class {
827
+ /** The content includes everything */
828
+ rawContent = "";
829
+ /**
830
+ * The content without the heading section.
831
+ *
832
+ * NOTE:
833
+ * If we couldn't find a heading, this will be the same as `rawContent`.
834
+ */
835
+ content = "";
836
+ /** The lines of the content, will not include the heading */
837
+ lines = [];
838
+ heading = null;
839
+ /**
840
+ * The AST representation of the data file.
841
+ * This is typically used for further processing or analysis of the file structure.
842
+ * If the file is not parsed into an AST, this will be undefined.
843
+ */
844
+ ast = void 0;
845
+ sections = /* @__PURE__ */ new Map();
846
+ /**
847
+ * The name of the file, if available.
848
+ * This is typically extracted from the first line of the file.
849
+ * It may not always be present, especially if the file is empty or malformed.
850
+ */
851
+ fileName = void 0;
852
+ /**
853
+ * The version of the file, if available.
854
+ * This is typically extracted from the first line of the file.
855
+ */
856
+ version = void 0;
857
+ /**
858
+ * Indicates if the file has an EOF marker.
859
+ * This is typically used to indicate the end of the file in Unicode data files.
860
+ */
861
+ hasEOF = false;
862
+ constructor(content, fileName) {
863
+ if (content == null || content.trim() === "") throw new Error("content is empty");
864
+ this.ast = parseDataFileIntoAst(content, fileName);
865
+ this.rawContent = this.content = content;
866
+ this.heading = inferHeadingFromAST(this.ast);
867
+ if (this.heading != null) this.content = content.replace(this.heading, "").trim();
868
+ this.lines = this.content.split("\n");
869
+ this.fileName = fileName ?? this.ast.fileName;
870
+ this.version = this.ast.version;
871
+ this.hasEOF = isEOFMarker(this.lines.at(-1));
872
+ this.sections = parseSections(this.content);
873
+ }
874
+ };
875
+
876
+ //#endregion
877
+ export { isUnknownNode as _, HEADING_SETTINGS_CONFIG as a, isBoundaryNode as c, isEOFNode as d, isEmptyCommentNode as f, isRootNode as g, isPropertyNode as h, inferHeadingFromAST as i, isCommentNode as l, isNode as m, hasSections as n, getHeadingSettings as o, isEmptyNode as p, parseSections as r, ast_utils_exports as s, RawDataFile as t, isDataNode as u, NodeTypes as v };