@unicode-utils/parser 0.12.0-beta.19 → 0.12.0-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-DgIJwGJC.mjs → chunk-DQk6qfdC.mjs} +3 -3
- package/dist/{datafile-C8Gow2D5.mjs → datafile-BwUQI7Vy.mjs} +2 -2
- package/dist/{datafile-BcAce1H5.d.mts → datafile-FYBnmeYg.d.mts} +1 -1
- package/dist/datafile.d.mts +1 -1
- package/dist/datafile.mjs +1 -1
- package/dist/index.d.mts +2 -2
- package/dist/index.mjs +2 -2
- package/dist/line-helpers.d.mts +1 -1
- package/dist/line-helpers.mjs +372 -1
- package/package.json +9 -11
- package/dist/line-helpers-C2zYZ-C-.mjs +0 -374
- /package/dist/{line-helpers-DEQe0sT8.d.mts → line-helpers-Cx9GykB0.d.mts} +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
//#region
|
|
1
|
+
//#region \0rolldown/runtime.js
|
|
2
2
|
var __defProp = Object.defineProperty;
|
|
3
|
-
var __exportAll = (all,
|
|
3
|
+
var __exportAll = (all, no_symbols) => {
|
|
4
4
|
let target = {};
|
|
5
5
|
for (var name in all) {
|
|
6
6
|
__defProp(target, name, {
|
|
@@ -8,7 +8,7 @@ var __exportAll = (all, symbols) => {
|
|
|
8
8
|
enumerable: true
|
|
9
9
|
});
|
|
10
10
|
}
|
|
11
|
-
if (
|
|
11
|
+
if (!no_symbols) {
|
|
12
12
|
__defProp(target, Symbol.toStringTag, { value: "Module" });
|
|
13
13
|
}
|
|
14
14
|
return target;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { t as __exportAll } from "./chunk-
|
|
2
|
-
import {
|
|
1
|
+
import { t as __exportAll } from "./chunk-DQk6qfdC.mjs";
|
|
2
|
+
import { getBoundaryLineStyle, getPropertyValue, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isEOFMarker, isEmptyLine, isLineWithData, isPropertyLine, trimCommentLine } from "./line-helpers.mjs";
|
|
3
3
|
import { invariant } from "@luxass/utils";
|
|
4
4
|
import defu from "defu";
|
|
5
5
|
|
package/dist/datafile.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { C as EmptyNode, D as PropertyNode, E as NodeTypes, O as RootNode, S as EmptyCommentNode, T as NodeType, _ as BoundaryNode, a as isEmptyCommentNode, b as DataNode, c as isPropertyNode, d as RawDataFile, f as UCDSectionWithLines, g as BaseNode, h as ast_utils_d_exports, i as isEOFNode, k as UnknownNode, l as isRootNode, m as parseSections, n as isCommentNode, o as isEmptyNode, p as hasSections, r as isDataNode, s as isNode, t as isBoundaryNode, u as isUnknownNode, v as ChildNode, w as Node, x as EOFNode, y as CommentNode } from "./datafile-
|
|
1
|
+
import { C as EmptyNode, D as PropertyNode, E as NodeTypes, O as RootNode, S as EmptyCommentNode, T as NodeType, _ as BoundaryNode, a as isEmptyCommentNode, b as DataNode, c as isPropertyNode, d as RawDataFile, f as UCDSectionWithLines, g as BaseNode, h as ast_utils_d_exports, i as isEOFNode, k as UnknownNode, l as isRootNode, m as parseSections, n as isCommentNode, o as isEmptyNode, p as hasSections, r as isDataNode, s as isNode, t as isBoundaryNode, u as isUnknownNode, v as ChildNode, w as Node, x as EOFNode, y as CommentNode } from "./datafile-FYBnmeYg.mjs";
|
|
2
2
|
export { BaseNode, BoundaryNode, ChildNode, CommentNode, DataNode, EOFNode, EmptyCommentNode, EmptyNode, Node, NodeType, NodeTypes, PropertyNode, RawDataFile, RootNode, UCDSectionWithLines, UnknownNode, ast_utils_d_exports as astUtils, hasSections, isBoundaryNode, isCommentNode, isDataNode, isEOFNode, isEmptyCommentNode, isEmptyNode, isNode, isPropertyNode, isRootNode, isUnknownNode, parseSections };
|
package/dist/datafile.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { _ as isUnknownNode, c as isBoundaryNode, d as isEOFNode, f as isEmptyCommentNode, g as isRootNode, h as isPropertyNode, l as isCommentNode, m as isNode, n as hasSections, p as isEmptyNode, r as parseSections, s as ast_utils_exports, t as RawDataFile, u as isDataNode, v as NodeTypes } from "./datafile-
|
|
1
|
+
import { _ as isUnknownNode, c as isBoundaryNode, d as isEOFNode, f as isEmptyCommentNode, g as isRootNode, h as isPropertyNode, l as isCommentNode, m as isNode, n as hasSections, p as isEmptyNode, r as parseSections, s as ast_utils_exports, t as RawDataFile, u as isDataNode, v as NodeTypes } from "./datafile-BwUQI7Vy.mjs";
|
|
2
2
|
|
|
3
3
|
export { NodeTypes, RawDataFile, ast_utils_exports as astUtils, hasSections, isBoundaryNode, isCommentNode, isDataNode, isEOFNode, isEmptyCommentNode, isEmptyNode, isNode, isPropertyNode, isRootNode, isUnknownNode, parseSections };
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { _ as isMissingAnnotation, a as getBoundaryLineStyle, b as parseMissingAnnotation, c as inferVersion, d as isDashBoundary, f as isEOFMarker, g as isLineWithData, h as isHashBoundary, i as SpecialTag, l as isBoundaryLine, m as isEqualsBoundary, n as MissingAnnotation, o as getPropertyValue, p as isEmptyLine, r as ParsedFileName, s as inferFileName, t as BoundaryStyle, u as isCommentLine, v as isPropertyLine, x as trimCommentLine, y as parseFileNameLine } from "./line-helpers-
|
|
2
|
-
import { C as EmptyNode, D as PropertyNode, E as NodeTypes, O as RootNode, S as EmptyCommentNode, T as NodeType, _ as BoundaryNode, a as isEmptyCommentNode, b as DataNode, c as isPropertyNode, d as RawDataFile, f as UCDSectionWithLines, g as BaseNode, h as ast_utils_d_exports, i as isEOFNode, k as UnknownNode, l as isRootNode, m as parseSections, n as isCommentNode, o as isEmptyNode, p as hasSections, r as isDataNode, s as isNode, t as isBoundaryNode, u as isUnknownNode, v as ChildNode, w as Node, x as EOFNode, y as CommentNode } from "./datafile-
|
|
1
|
+
import { _ as isMissingAnnotation, a as getBoundaryLineStyle, b as parseMissingAnnotation, c as inferVersion, d as isDashBoundary, f as isEOFMarker, g as isLineWithData, h as isHashBoundary, i as SpecialTag, l as isBoundaryLine, m as isEqualsBoundary, n as MissingAnnotation, o as getPropertyValue, p as isEmptyLine, r as ParsedFileName, s as inferFileName, t as BoundaryStyle, u as isCommentLine, v as isPropertyLine, x as trimCommentLine, y as parseFileNameLine } from "./line-helpers-Cx9GykB0.mjs";
|
|
2
|
+
import { C as EmptyNode, D as PropertyNode, E as NodeTypes, O as RootNode, S as EmptyCommentNode, T as NodeType, _ as BoundaryNode, a as isEmptyCommentNode, b as DataNode, c as isPropertyNode, d as RawDataFile, f as UCDSectionWithLines, g as BaseNode, h as ast_utils_d_exports, i as isEOFNode, k as UnknownNode, l as isRootNode, m as parseSections, n as isCommentNode, o as isEmptyNode, p as hasSections, r as isDataNode, s as isNode, t as isBoundaryNode, u as isUnknownNode, v as ChildNode, w as Node, x as EOFNode, y as CommentNode } from "./datafile-FYBnmeYg.mjs";
|
|
3
3
|
|
|
4
4
|
//#region src/inference/heading.d.ts
|
|
5
5
|
interface InferHeadingSettings {
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { _ as isUnknownNode, a as HEADING_SETTINGS_CONFIG, c as isBoundaryNode, d as isEOFNode, f as isEmptyCommentNode, g as isRootNode, h as isPropertyNode, i as inferHeadingFromAST, l as isCommentNode, m as isNode, n as hasSections, o as getHeadingSettings, p as isEmptyNode, r as parseSections, s as ast_utils_exports, t as RawDataFile, u as isDataNode, v as NodeTypes } from "./datafile-
|
|
2
|
-
import {
|
|
1
|
+
import { _ as isUnknownNode, a as HEADING_SETTINGS_CONFIG, c as isBoundaryNode, d as isEOFNode, f as isEmptyCommentNode, g as isRootNode, h as isPropertyNode, i as inferHeadingFromAST, l as isCommentNode, m as isNode, n as hasSections, o as getHeadingSettings, p as isEmptyNode, r as parseSections, s as ast_utils_exports, t as RawDataFile, u as isDataNode, v as NodeTypes } from "./datafile-BwUQI7Vy.mjs";
|
|
2
|
+
import { getBoundaryLineStyle, getPropertyValue, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, trimCommentLine } from "./line-helpers.mjs";
|
|
3
3
|
|
|
4
4
|
export { HEADING_SETTINGS_CONFIG, NodeTypes, RawDataFile, ast_utils_exports as astUtils, getBoundaryLineStyle, getHeadingSettings, getPropertyValue, hasSections, inferFileName, inferHeadingFromAST, inferVersion, isBoundaryLine, isBoundaryNode, isCommentLine, isCommentNode, isDashBoundary, isDataNode, isEOFMarker, isEOFNode, isEmptyCommentNode, isEmptyLine, isEmptyNode, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isNode, isPropertyLine, isPropertyNode, isRootNode, isUnknownNode, parseFileNameLine, parseMissingAnnotation, parseSections, trimCommentLine };
|
package/dist/line-helpers.d.mts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { _ as isMissingAnnotation, a as getBoundaryLineStyle, b as parseMissingAnnotation, c as inferVersion, d as isDashBoundary, f as isEOFMarker, g as isLineWithData, h as isHashBoundary, i as SpecialTag, l as isBoundaryLine, m as isEqualsBoundary, n as MissingAnnotation, o as getPropertyValue, p as isEmptyLine, r as ParsedFileName, s as inferFileName, t as BoundaryStyle, u as isCommentLine, v as isPropertyLine, x as trimCommentLine, y as parseFileNameLine } from "./line-helpers-
|
|
1
|
+
import { _ as isMissingAnnotation, a as getBoundaryLineStyle, b as parseMissingAnnotation, c as inferVersion, d as isDashBoundary, f as isEOFMarker, g as isLineWithData, h as isHashBoundary, i as SpecialTag, l as isBoundaryLine, m as isEqualsBoundary, n as MissingAnnotation, o as getPropertyValue, p as isEmptyLine, r as ParsedFileName, s as inferFileName, t as BoundaryStyle, u as isCommentLine, v as isPropertyLine, x as trimCommentLine, y as parseFileNameLine } from "./line-helpers-Cx9GykB0.mjs";
|
|
2
2
|
export { BoundaryStyle, MissingAnnotation, ParsedFileName, SpecialTag, getBoundaryLineStyle, getPropertyValue, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, trimCommentLine };
|
package/dist/line-helpers.mjs
CHANGED
|
@@ -1,3 +1,374 @@
|
|
|
1
|
-
|
|
1
|
+
//#region src/line-helpers.ts
|
|
2
|
+
const HASH_BOUNDARY_REGEX = /^\s*#\s*#{2,}\s*$/;
|
|
3
|
+
const EQUALS_BOUNDARY_REGEX = /^\s*#\s*={2,}\s*$/;
|
|
4
|
+
const DASH_BOUNDARY_REGEX = /^\s*#\s*-{2,}\s*$/;
|
|
5
|
+
/**
|
|
6
|
+
* Determines if a line is an End-of-File (EOF) marker.
|
|
7
|
+
*
|
|
8
|
+
* In Unicode data files, the EOF marker is typically represented
|
|
9
|
+
* as a line containing only "# EOF".
|
|
10
|
+
*
|
|
11
|
+
* @param {string} [line] - The line to check
|
|
12
|
+
* @returns {boolean} True if the line is an EOF marker, false otherwise
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```ts
|
|
16
|
+
* isEOFMarker("# EOF"); // true
|
|
17
|
+
* isEOFMarker("Some text"); // false
|
|
18
|
+
* isEOFMarker(); // false
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
function isEOFMarker(line) {
|
|
22
|
+
if (!line) return false;
|
|
23
|
+
return line.trim() === "# EOF";
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Determines if a line contains a hash boundary pattern.
|
|
27
|
+
*
|
|
28
|
+
* A hash boundary is a line containing a pattern like "# ###" (# followed by multiple #).
|
|
29
|
+
* These patterns are used in Unicode data files to separate different sections of content.
|
|
30
|
+
*
|
|
31
|
+
* @param {string} line - The line to check
|
|
32
|
+
* @returns {boolean} True if the line contains a hash boundary pattern, false otherwise
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```ts
|
|
36
|
+
* isHashBoundary("# #####"); // true
|
|
37
|
+
* isHashBoundary("# Some text"); // false
|
|
38
|
+
* isHashBoundary(""); // false
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
function isHashBoundary(line) {
|
|
42
|
+
if (!line) return false;
|
|
43
|
+
return HASH_BOUNDARY_REGEX.test(line);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Determines if a line contains an equals boundary pattern.
|
|
47
|
+
*
|
|
48
|
+
* An equals boundary is a line containing a pattern like "# ===" (# followed by multiple =).
|
|
49
|
+
* These patterns are used in Unicode data files to separate different sections of content.
|
|
50
|
+
*
|
|
51
|
+
* @param {string} line - The line to check
|
|
52
|
+
* @returns {boolean} True if the line contains an equals boundary pattern, false otherwise
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* isEqualsBoundary("# ====="); // true
|
|
57
|
+
* isEqualsBoundary("# Some text"); // false
|
|
58
|
+
* isEqualsBoundary(""); // false
|
|
59
|
+
* ```
|
|
60
|
+
*/
|
|
61
|
+
function isEqualsBoundary(line) {
|
|
62
|
+
if (!line) return false;
|
|
63
|
+
return EQUALS_BOUNDARY_REGEX.test(line);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Determines if a line contains a dash boundary pattern.
|
|
67
|
+
*
|
|
68
|
+
* A dash boundary is a line containing a pattern like "# ---" (# followed by multiple -).
|
|
69
|
+
* These patterns are used in Unicode data files to separate different sections of content.
|
|
70
|
+
*
|
|
71
|
+
* @param {string} line - The line to check
|
|
72
|
+
* @returns {boolean} True if the line contains a dash boundary pattern, false otherwise
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```ts
|
|
76
|
+
* isDashBoundary("# -----"); // true
|
|
77
|
+
* isDashBoundary("# Some text"); // false
|
|
78
|
+
* isDashBoundary(""); // false
|
|
79
|
+
* ```
|
|
80
|
+
*/
|
|
81
|
+
function isDashBoundary(line) {
|
|
82
|
+
if (!line) return false;
|
|
83
|
+
return DASH_BOUNDARY_REGEX.test(line);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Determines if a line is any type of boundary line.
|
|
87
|
+
*
|
|
88
|
+
* A boundary line is any line that matches one of the boundary patterns:
|
|
89
|
+
* hash boundary, equals boundary, or dash boundary. These patterns are used
|
|
90
|
+
* in Unicode data files to separate different sections of content.
|
|
91
|
+
*
|
|
92
|
+
* @param {string} line - The line to check
|
|
93
|
+
* @returns {boolean} True if the line is a boundary line, false otherwise
|
|
94
|
+
*
|
|
95
|
+
* @example
|
|
96
|
+
* ```ts
|
|
97
|
+
* isBoundaryLine("# #####"); // true (hash boundary)
|
|
98
|
+
* isBoundaryLine("# ====="); // true (equals boundary)
|
|
99
|
+
* isBoundaryLine("# -----"); // true (dash boundary)
|
|
100
|
+
* isBoundaryLine("# Some text"); // false
|
|
101
|
+
* isBoundaryLine(""); // false
|
|
102
|
+
* ```
|
|
103
|
+
*/
|
|
104
|
+
function isBoundaryLine(line) {
|
|
105
|
+
if (!line) return false;
|
|
106
|
+
return isHashBoundary(line) || isEqualsBoundary(line) || isDashBoundary(line);
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Extracts the style character from a boundary line.
|
|
110
|
+
*
|
|
111
|
+
* This function determines which type of boundary character is used in the line:
|
|
112
|
+
* '#', '=', or '-'. It checks the line against each boundary pattern and returns
|
|
113
|
+
* the corresponding character.
|
|
114
|
+
*
|
|
115
|
+
* @param {string} line - The boundary line to analyze
|
|
116
|
+
* @returns {BoundaryStyle} The boundary style character ('#', '=', or '-')
|
|
117
|
+
* @throws {Error} If the line is not a valid boundary line
|
|
118
|
+
*
|
|
119
|
+
* @example
|
|
120
|
+
* ```ts
|
|
121
|
+
* getBoundaryLineStyle("# #####"); // returns "#"
|
|
122
|
+
* getBoundaryLineStyle("# ====="); // returns "="
|
|
123
|
+
* getBoundaryLineStyle("# -----"); // returns "-"
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
function getBoundaryLineStyle(line) {
|
|
127
|
+
if (isHashBoundary(line)) return "#";
|
|
128
|
+
if (isEqualsBoundary(line)) return "=";
|
|
129
|
+
if (isDashBoundary(line)) return "-";
|
|
130
|
+
throw new Error(`invalid boundary style for line: ${line}`);
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Determines if a line is a comment line.
|
|
134
|
+
*
|
|
135
|
+
* A comment line is either a line that starts with "# " or
|
|
136
|
+
* a line that only contains "#" (possibly with whitespace).
|
|
137
|
+
*
|
|
138
|
+
* @param {string} line - The line to check
|
|
139
|
+
* @returns {boolean} True if the line is a comment line, false otherwise
|
|
140
|
+
*/
|
|
141
|
+
function isCommentLine(line) {
|
|
142
|
+
if (!line) return false;
|
|
143
|
+
const trimmed = line.trimStart();
|
|
144
|
+
return trimmed.startsWith("#") && trimmed.length > 0;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Removes the comment marker ('#') and any following whitespace from a line.
|
|
148
|
+
*
|
|
149
|
+
* This function is designed to extract the actual content from comment lines
|
|
150
|
+
* in Unicode data files by removing the leading '#' character and any whitespace
|
|
151
|
+
* that follows it.
|
|
152
|
+
*
|
|
153
|
+
* @param {string} line - The comment line to trim
|
|
154
|
+
* @returns {string} The content of the comment line without the comment marker
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* ```ts
|
|
158
|
+
* trimCommentLine("# Some comment"); // returns "Some comment"
|
|
159
|
+
* trimCommentLine("#\tTabbed comment"); // returns "Tabbed comment"
|
|
160
|
+
* trimCommentLine(""); // returns ""
|
|
161
|
+
* ```
|
|
162
|
+
*/
|
|
163
|
+
function trimCommentLine(line) {
|
|
164
|
+
if (!line) return "";
|
|
165
|
+
return line.trim().replace(/^#\s*/, "");
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Checks if a string line is empty after trimming whitespace.
|
|
169
|
+
*
|
|
170
|
+
* @param {string} line - The string to check for emptiness
|
|
171
|
+
* @returns {boolean} A boolean indicating whether the trimmed line is empty
|
|
172
|
+
*/
|
|
173
|
+
function isEmptyLine(line) {
|
|
174
|
+
if (!line) return true;
|
|
175
|
+
return line.trim() === "";
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Determines if a line contains data in a Unicode data file.
|
|
179
|
+
*
|
|
180
|
+
* A line is considered to contain data if it is neither a comment line
|
|
181
|
+
* (starting with '#') nor an empty line.
|
|
182
|
+
*
|
|
183
|
+
* @param {string} line - The line to check
|
|
184
|
+
* @returns {boolean} True if the line contains data, false otherwise
|
|
185
|
+
*
|
|
186
|
+
* @example
|
|
187
|
+
* ```ts
|
|
188
|
+
* isLineWithData("U+0020;SPACE"); // true
|
|
189
|
+
* isLineWithData("# Comment line"); // false
|
|
190
|
+
* isLineWithData(""); // false
|
|
191
|
+
* ```
|
|
192
|
+
*/
|
|
193
|
+
function isLineWithData(line) {
|
|
194
|
+
return !isCommentLine(line) && !isEmptyLine(line);
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Check if a given line from a Unicode data file is a 'missing' annotation.
|
|
198
|
+
*
|
|
199
|
+
* In Unicode data files, lines starting with '# @missing:' indicate
|
|
200
|
+
* a range of code points that are not assigned.
|
|
201
|
+
*
|
|
202
|
+
* @param {string} line - The line to check
|
|
203
|
+
* @returns {boolean} True if the line is a missing annotation, false otherwise
|
|
204
|
+
*/
|
|
205
|
+
function isMissingAnnotation(line) {
|
|
206
|
+
return line.startsWith("# @missing:");
|
|
207
|
+
}
|
|
208
|
+
const MISSING_ANNOTATION_SPECIAL_TAGS = {
|
|
209
|
+
"<none>": "none",
|
|
210
|
+
"<script>": "script",
|
|
211
|
+
"<code-point>": "code-point"
|
|
212
|
+
};
|
|
213
|
+
/**
|
|
214
|
+
* Parses a line into a MissingAnnotation object.
|
|
215
|
+
*
|
|
216
|
+
* This function attempts to extract information from a line that follows the
|
|
217
|
+
* format of a missing annotation in Unicode data files.
|
|
218
|
+
*
|
|
219
|
+
* The format being parsed is:
|
|
220
|
+
* `# @missing: START..END; DEFAULT_PROP_VALUE_OR_PROPERTY_NAME[; DEFAULT_PROPERTY_VALUE]`
|
|
221
|
+
*
|
|
222
|
+
* @param {string} line - The line to parse
|
|
223
|
+
* @returns {MissingAnnotation | null} A MissingAnnotation object if the line is a valid missing annotation, null otherwise
|
|
224
|
+
*
|
|
225
|
+
* @example
|
|
226
|
+
* ```ts
|
|
227
|
+
* parseMissingAnnotation("# @missing: 0000..007F; NA")
|
|
228
|
+
* // -> { start: "0000", end: "007F", defaultPropertyValue: "NA" }
|
|
229
|
+
*
|
|
230
|
+
* parseMissingAnnotation("# @missing: 0000..007F; Script; Unknown")
|
|
231
|
+
* // -> { start: "0000", end: "007F", propertyName: "Script", defaultPropertyValue: "Unknown" }
|
|
232
|
+
* ```
|
|
233
|
+
*/
|
|
234
|
+
function parseMissingAnnotation(line) {
|
|
235
|
+
if (!isMissingAnnotation(line)) return null;
|
|
236
|
+
const match = line.match(/^# @missing: ([0-9A-F]+)\.\.([0-9A-F]+); ([^;\n]+)(?:; ([^\n]+))?$/m);
|
|
237
|
+
if (match == null) return null;
|
|
238
|
+
const [_, start, end, defaultPropValueOrPropertyName, defaultPropertyValue] = match;
|
|
239
|
+
const defaultProperty = defaultPropertyValue == null ? defaultPropValueOrPropertyName : defaultPropertyValue;
|
|
240
|
+
const specialTag = defaultProperty && defaultProperty in MISSING_ANNOTATION_SPECIAL_TAGS ? MISSING_ANNOTATION_SPECIAL_TAGS[defaultProperty] : void 0;
|
|
241
|
+
if (start == null || end == null || defaultPropValueOrPropertyName == null) return null;
|
|
242
|
+
return {
|
|
243
|
+
start,
|
|
244
|
+
end,
|
|
245
|
+
propertyName: defaultPropertyValue == null ? void 0 : defaultPropValueOrPropertyName,
|
|
246
|
+
defaultPropertyValue: defaultProperty || "",
|
|
247
|
+
specialTag
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Attempts to infer the file name from the first line of a Unicode data file.
|
|
252
|
+
*
|
|
253
|
+
* This function extracts the file name from the first line of the content,
|
|
254
|
+
* assuming it's a comment line. It removes any leading '#' characters and whitespace.
|
|
255
|
+
*
|
|
256
|
+
* For example:
|
|
257
|
+
* - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "ArabicShaping"
|
|
258
|
+
* - From a file with first line "# UnicodeData-5.0.0.txt", it returns "UnicodeData"
|
|
259
|
+
*
|
|
260
|
+
* @param {string} line - The first line of the file
|
|
261
|
+
* @returns {string | undefined} The inferred file name, or undefined if it can't be determined
|
|
262
|
+
*/
|
|
263
|
+
function inferFileName(line) {
|
|
264
|
+
return parseFileNameLine(line)?.fileName;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Attempts to infer the version from the first line of a Unicode data file.
|
|
268
|
+
*
|
|
269
|
+
* This function extracts the version number from the first line of the content,
|
|
270
|
+
* assuming it's a comment line. It looks for a pattern like "Name-X.Y.Z.txt"
|
|
271
|
+
* and extracts the X.Y.Z part as the version.
|
|
272
|
+
*
|
|
273
|
+
* For example:
|
|
274
|
+
* - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "5.0.0"
|
|
275
|
+
* - From a file with first line "# UnicodeData-14.0.0.txt", it returns "14.0.0"
|
|
276
|
+
*
|
|
277
|
+
* @param {string} line - The first line of the file
|
|
278
|
+
* @returns {string | undefined} The inferred version number, or undefined if it can't be determined
|
|
279
|
+
*/
|
|
280
|
+
function inferVersion(line) {
|
|
281
|
+
return parseFileNameLine(line)?.version;
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Parses a line from a Unicode data file to extract the file name and version information.
|
|
285
|
+
*
|
|
286
|
+
* This function tries to extract file name and version information from a line that
|
|
287
|
+
* typically appears at the beginning of Unicode data files. It handles various formats:
|
|
288
|
+
* - "FileName-1.2.3.txt"
|
|
289
|
+
* - "FileName-1.2.3"
|
|
290
|
+
* - "FileName.txt"
|
|
291
|
+
*
|
|
292
|
+
* The function also properly handles comment markers at the beginning of the line.
|
|
293
|
+
*
|
|
294
|
+
* @param {string} line - The line to parse, typically the first line of a Unicode data file
|
|
295
|
+
* @returns {ParsedFileName | undefined} An object containing the file name and version if
|
|
296
|
+
* successfully parsed, or undefined if parsing fails
|
|
297
|
+
*
|
|
298
|
+
* @example
|
|
299
|
+
* ```ts
|
|
300
|
+
* parseFileNameLine("# UnicodeData-14.0.0.txt");
|
|
301
|
+
* // Returns { fileName: "UnicodeData", version: "14.0.0" }
|
|
302
|
+
*
|
|
303
|
+
* parseFileNameLine("# ArabicShaping.txt");
|
|
304
|
+
* // Returns { fileName: "ArabicShaping", version: undefined }
|
|
305
|
+
* ```
|
|
306
|
+
*/
|
|
307
|
+
function parseFileNameLine(line) {
|
|
308
|
+
if (!line) return;
|
|
309
|
+
line = line.split("\n")[0].trim();
|
|
310
|
+
if (!isCommentLine(line)) return;
|
|
311
|
+
line = line.trim().replace(/^#\s*/, "");
|
|
312
|
+
if (line === "") return;
|
|
313
|
+
let match = line.match(/^(.*?)(?:-([0-9.]+))?\.txt$/);
|
|
314
|
+
if (match == null) {
|
|
315
|
+
match = line.match(/^(.*?)(?:-([0-9.]+))?$/);
|
|
316
|
+
/* v8 ignore next 3 */
|
|
317
|
+
if (match == null) return;
|
|
318
|
+
}
|
|
319
|
+
const [_, fileName, version] = match;
|
|
320
|
+
if (!fileName || fileName.trim() === "") return;
|
|
321
|
+
return {
|
|
322
|
+
fileName,
|
|
323
|
+
version
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Determines if a line represents a property definition in Unicode data files.
|
|
328
|
+
*
|
|
329
|
+
* In Unicode data files, properties are typically defined in comment lines that
|
|
330
|
+
* start with "# Property:" followed by the property name.
|
|
331
|
+
*
|
|
332
|
+
* @param {string} line - The line to check
|
|
333
|
+
* @returns {boolean} True if the line is a property definition, false otherwise
|
|
334
|
+
*
|
|
335
|
+
* @example
|
|
336
|
+
* ```ts
|
|
337
|
+
* isPropertyLine("# Property: Age"); // true
|
|
338
|
+
* isPropertyLine("# Some other comment"); // false
|
|
339
|
+
* isPropertyLine(""); // false
|
|
340
|
+
* ```
|
|
341
|
+
*/
|
|
342
|
+
function isPropertyLine(line) {
|
|
343
|
+
if (!line) return false;
|
|
344
|
+
if (!isCommentLine(line)) return false;
|
|
345
|
+
const val = getPropertyValue(line);
|
|
346
|
+
return val !== void 0 && val.trim() !== "";
|
|
347
|
+
}
|
|
348
|
+
/**
|
|
349
|
+
* Extracts the property value from a property definition line in Unicode data files.
|
|
350
|
+
*
|
|
351
|
+
* This function parses a line that follows the format '# Property: [PropertyValue]'
|
|
352
|
+
* and returns the PropertyValue part. It is used internally by isPropertyLine
|
|
353
|
+
* to parse property definitions in Unicode data files.
|
|
354
|
+
*
|
|
355
|
+
* @param {string} line - The line to extract the property value from
|
|
356
|
+
* @returns {string | undefined} The extracted property value, or undefined if
|
|
357
|
+
* the line is not a valid property definition
|
|
358
|
+
*
|
|
359
|
+
* @example
|
|
360
|
+
* ```ts
|
|
361
|
+
* getPropertyValue("# Property: Age"); // returns "Age"
|
|
362
|
+
* getPropertyValue("# Property: "); // returns undefined
|
|
363
|
+
* getPropertyValue("# Not a property line"); // returns undefined
|
|
364
|
+
* ```
|
|
365
|
+
*/
|
|
366
|
+
function getPropertyValue(line) {
|
|
367
|
+
const trimmedComment = trimCommentLine(line).trim();
|
|
368
|
+
if (trimmedComment === "") return;
|
|
369
|
+
if (!trimmedComment.startsWith("Property:")) return;
|
|
370
|
+
return trimmedComment.slice(9).trim();
|
|
371
|
+
}
|
|
2
372
|
|
|
373
|
+
//#endregion
|
|
3
374
|
export { getBoundaryLineStyle, getPropertyValue, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, trimCommentLine };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@unicode-utils/parser",
|
|
3
|
-
"version": "0.12.0-beta.
|
|
3
|
+
"version": "0.12.0-beta.20",
|
|
4
4
|
"description": "Unicode data file parser with AST support",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": {
|
|
@@ -28,8 +28,6 @@
|
|
|
28
28
|
"./line-helpers": "./dist/line-helpers.mjs",
|
|
29
29
|
"./package.json": "./package.json"
|
|
30
30
|
},
|
|
31
|
-
"main": "./dist/index.mjs",
|
|
32
|
-
"module": "./dist/index.mjs",
|
|
33
31
|
"types": "./dist/index.d.mts",
|
|
34
32
|
"publishConfig": {
|
|
35
33
|
"access": "public"
|
|
@@ -38,19 +36,19 @@
|
|
|
38
36
|
"dist"
|
|
39
37
|
],
|
|
40
38
|
"dependencies": {
|
|
41
|
-
"@luxass/utils": "2.7.
|
|
39
|
+
"@luxass/utils": "2.7.3",
|
|
42
40
|
"defu": "6.1.4"
|
|
43
41
|
},
|
|
44
42
|
"devDependencies": {
|
|
45
|
-
"@luxass/eslint-config": "
|
|
43
|
+
"@luxass/eslint-config": "7.2.0",
|
|
46
44
|
"@types/node": "24.9.1",
|
|
47
|
-
"eslint": "
|
|
48
|
-
"eslint-plugin-format": "1.
|
|
49
|
-
"publint": "0.3.
|
|
50
|
-
"tsdown": "0.
|
|
45
|
+
"eslint": "10.0.0",
|
|
46
|
+
"eslint-plugin-format": "1.4.0",
|
|
47
|
+
"publint": "0.3.17",
|
|
48
|
+
"tsdown": "0.20.3",
|
|
51
49
|
"typescript": "5.9.3",
|
|
52
|
-
"@unicode-utils-tooling/
|
|
53
|
-
"@unicode-utils-tooling/
|
|
50
|
+
"@unicode-utils-tooling/tsdown-config": "0.12.0-beta.20",
|
|
51
|
+
"@unicode-utils-tooling/tsconfig": "0.12.0-beta.20"
|
|
54
52
|
},
|
|
55
53
|
"scripts": {
|
|
56
54
|
"build": "tsdown --tsconfig=./tsconfig.build.json",
|
|
@@ -1,374 +0,0 @@
|
|
|
1
|
-
//#region src/line-helpers.ts
|
|
2
|
-
const HASH_BOUNDARY_REGEX = /^\s*#\s*#{2,}\s*$/;
|
|
3
|
-
const EQUALS_BOUNDARY_REGEX = /^\s*#\s*={2,}\s*$/;
|
|
4
|
-
const DASH_BOUNDARY_REGEX = /^\s*#\s*-{2,}\s*$/;
|
|
5
|
-
/**
|
|
6
|
-
* Determines if a line is an End-of-File (EOF) marker.
|
|
7
|
-
*
|
|
8
|
-
* In Unicode data files, the EOF marker is typically represented
|
|
9
|
-
* as a line containing only "# EOF".
|
|
10
|
-
*
|
|
11
|
-
* @param {string} [line] - The line to check
|
|
12
|
-
* @returns {boolean} True if the line is an EOF marker, false otherwise
|
|
13
|
-
*
|
|
14
|
-
* @example
|
|
15
|
-
* ```ts
|
|
16
|
-
* isEOFMarker("# EOF"); // true
|
|
17
|
-
* isEOFMarker("Some text"); // false
|
|
18
|
-
* isEOFMarker(); // false
|
|
19
|
-
* ```
|
|
20
|
-
*/
|
|
21
|
-
function isEOFMarker(line) {
|
|
22
|
-
if (!line) return false;
|
|
23
|
-
return line.trim() === "# EOF";
|
|
24
|
-
}
|
|
25
|
-
/**
|
|
26
|
-
* Determines if a line contains a hash boundary pattern.
|
|
27
|
-
*
|
|
28
|
-
* A hash boundary is a line containing a pattern like "# ###" (# followed by multiple #).
|
|
29
|
-
* These patterns are used in Unicode data files to separate different sections of content.
|
|
30
|
-
*
|
|
31
|
-
* @param {string} line - The line to check
|
|
32
|
-
* @returns {boolean} True if the line contains a hash boundary pattern, false otherwise
|
|
33
|
-
*
|
|
34
|
-
* @example
|
|
35
|
-
* ```ts
|
|
36
|
-
* isHashBoundary("# #####"); // true
|
|
37
|
-
* isHashBoundary("# Some text"); // false
|
|
38
|
-
* isHashBoundary(""); // false
|
|
39
|
-
* ```
|
|
40
|
-
*/
|
|
41
|
-
function isHashBoundary(line) {
|
|
42
|
-
if (!line) return false;
|
|
43
|
-
return HASH_BOUNDARY_REGEX.test(line);
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* Determines if a line contains an equals boundary pattern.
|
|
47
|
-
*
|
|
48
|
-
* An equals boundary is a line containing a pattern like "# ===" (# followed by multiple =).
|
|
49
|
-
* These patterns are used in Unicode data files to separate different sections of content.
|
|
50
|
-
*
|
|
51
|
-
* @param {string} line - The line to check
|
|
52
|
-
* @returns {boolean} True if the line contains an equals boundary pattern, false otherwise
|
|
53
|
-
*
|
|
54
|
-
* @example
|
|
55
|
-
* ```ts
|
|
56
|
-
* isEqualsBoundary("# ====="); // true
|
|
57
|
-
* isEqualsBoundary("# Some text"); // false
|
|
58
|
-
* isEqualsBoundary(""); // false
|
|
59
|
-
* ```
|
|
60
|
-
*/
|
|
61
|
-
function isEqualsBoundary(line) {
|
|
62
|
-
if (!line) return false;
|
|
63
|
-
return EQUALS_BOUNDARY_REGEX.test(line);
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Determines if a line contains a dash boundary pattern.
|
|
67
|
-
*
|
|
68
|
-
* A dash boundary is a line containing a pattern like "# ---" (# followed by multiple -).
|
|
69
|
-
* These patterns are used in Unicode data files to separate different sections of content.
|
|
70
|
-
*
|
|
71
|
-
* @param {string} line - The line to check
|
|
72
|
-
* @returns {boolean} True if the line contains a dash boundary pattern, false otherwise
|
|
73
|
-
*
|
|
74
|
-
* @example
|
|
75
|
-
* ```ts
|
|
76
|
-
* isDashBoundary("# -----"); // true
|
|
77
|
-
* isDashBoundary("# Some text"); // false
|
|
78
|
-
* isDashBoundary(""); // false
|
|
79
|
-
* ```
|
|
80
|
-
*/
|
|
81
|
-
function isDashBoundary(line) {
|
|
82
|
-
if (!line) return false;
|
|
83
|
-
return DASH_BOUNDARY_REGEX.test(line);
|
|
84
|
-
}
|
|
85
|
-
/**
|
|
86
|
-
* Determines if a line is any type of boundary line.
|
|
87
|
-
*
|
|
88
|
-
* A boundary line is any line that matches one of the boundary patterns:
|
|
89
|
-
* hash boundary, equals boundary, or dash boundary. These patterns are used
|
|
90
|
-
* in Unicode data files to separate different sections of content.
|
|
91
|
-
*
|
|
92
|
-
* @param {string} line - The line to check
|
|
93
|
-
* @returns {boolean} True if the line is a boundary line, false otherwise
|
|
94
|
-
*
|
|
95
|
-
* @example
|
|
96
|
-
* ```ts
|
|
97
|
-
* isBoundaryLine("# #####"); // true (hash boundary)
|
|
98
|
-
* isBoundaryLine("# ====="); // true (equals boundary)
|
|
99
|
-
* isBoundaryLine("# -----"); // true (dash boundary)
|
|
100
|
-
* isBoundaryLine("# Some text"); // false
|
|
101
|
-
* isBoundaryLine(""); // false
|
|
102
|
-
* ```
|
|
103
|
-
*/
|
|
104
|
-
function isBoundaryLine(line) {
|
|
105
|
-
if (!line) return false;
|
|
106
|
-
return isHashBoundary(line) || isEqualsBoundary(line) || isDashBoundary(line);
|
|
107
|
-
}
|
|
108
|
-
/**
|
|
109
|
-
* Extracts the style character from a boundary line.
|
|
110
|
-
*
|
|
111
|
-
* This function determines which type of boundary character is used in the line:
|
|
112
|
-
* '#', '=', or '-'. It checks the line against each boundary pattern and returns
|
|
113
|
-
* the corresponding character.
|
|
114
|
-
*
|
|
115
|
-
* @param {string} line - The boundary line to analyze
|
|
116
|
-
* @returns {BoundaryStyle} The boundary style character ('#', '=', or '-')
|
|
117
|
-
* @throws {Error} If the line is not a valid boundary line
|
|
118
|
-
*
|
|
119
|
-
* @example
|
|
120
|
-
* ```ts
|
|
121
|
-
* getBoundaryLineStyle("# #####"); // returns "#"
|
|
122
|
-
* getBoundaryLineStyle("# ====="); // returns "="
|
|
123
|
-
* getBoundaryLineStyle("# -----"); // returns "-"
|
|
124
|
-
* ```
|
|
125
|
-
*/
|
|
126
|
-
function getBoundaryLineStyle(line) {
|
|
127
|
-
if (isHashBoundary(line)) return "#";
|
|
128
|
-
if (isEqualsBoundary(line)) return "=";
|
|
129
|
-
if (isDashBoundary(line)) return "-";
|
|
130
|
-
throw new Error(`invalid boundary style for line: ${line}`);
|
|
131
|
-
}
|
|
132
|
-
/**
|
|
133
|
-
* Determines if a line is a comment line.
|
|
134
|
-
*
|
|
135
|
-
* A comment line is either a line that starts with "# " or
|
|
136
|
-
* a line that only contains "#" (possibly with whitespace).
|
|
137
|
-
*
|
|
138
|
-
* @param {string} line - The line to check
|
|
139
|
-
* @returns {boolean} True if the line is a comment line, false otherwise
|
|
140
|
-
*/
|
|
141
|
-
function isCommentLine(line) {
|
|
142
|
-
if (!line) return false;
|
|
143
|
-
const trimmed = line.trimStart();
|
|
144
|
-
return trimmed.startsWith("#") && trimmed.length > 0;
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Removes the comment marker ('#') and any following whitespace from a line.
|
|
148
|
-
*
|
|
149
|
-
* This function is designed to extract the actual content from comment lines
|
|
150
|
-
* in Unicode data files by removing the leading '#' character and any whitespace
|
|
151
|
-
* that follows it.
|
|
152
|
-
*
|
|
153
|
-
* @param {string} line - The comment line to trim
|
|
154
|
-
* @returns {string} The content of the comment line without the comment marker
|
|
155
|
-
*
|
|
156
|
-
* @example
|
|
157
|
-
* ```ts
|
|
158
|
-
* trimCommentLine("# Some comment"); // returns "Some comment"
|
|
159
|
-
* trimCommentLine("#\tTabbed comment"); // returns "Tabbed comment"
|
|
160
|
-
* trimCommentLine(""); // returns ""
|
|
161
|
-
* ```
|
|
162
|
-
*/
|
|
163
|
-
function trimCommentLine(line) {
|
|
164
|
-
if (!line) return "";
|
|
165
|
-
return line.trim().replace(/^#\s*/, "");
|
|
166
|
-
}
|
|
167
|
-
/**
|
|
168
|
-
* Checks if a string line is empty after trimming whitespace.
|
|
169
|
-
*
|
|
170
|
-
* @param {string} line - The string to check for emptiness
|
|
171
|
-
* @returns {boolean} A boolean indicating whether the trimmed line is empty
|
|
172
|
-
*/
|
|
173
|
-
function isEmptyLine(line) {
|
|
174
|
-
if (!line) return true;
|
|
175
|
-
return line.trim() === "";
|
|
176
|
-
}
|
|
177
|
-
/**
|
|
178
|
-
* Determines if a line contains data in a Unicode data file.
|
|
179
|
-
*
|
|
180
|
-
* A line is considered to contain data if it is neither a comment line
|
|
181
|
-
* (starting with '#') nor an empty line.
|
|
182
|
-
*
|
|
183
|
-
* @param {string} line - The line to check
|
|
184
|
-
* @returns {boolean} True if the line contains data, false otherwise
|
|
185
|
-
*
|
|
186
|
-
* @example
|
|
187
|
-
* ```ts
|
|
188
|
-
* isLineWithData("U+0020;SPACE"); // true
|
|
189
|
-
* isLineWithData("# Comment line"); // false
|
|
190
|
-
* isLineWithData(""); // false
|
|
191
|
-
* ```
|
|
192
|
-
*/
|
|
193
|
-
function isLineWithData(line) {
|
|
194
|
-
return !isCommentLine(line) && !isEmptyLine(line);
|
|
195
|
-
}
|
|
196
|
-
/**
|
|
197
|
-
* Check if a given line from a Unicode data file is a 'missing' annotation.
|
|
198
|
-
*
|
|
199
|
-
* In Unicode data files, lines starting with '# @missing:' indicate
|
|
200
|
-
* a range of code points that are not assigned.
|
|
201
|
-
*
|
|
202
|
-
* @param {string} line - The line to check
|
|
203
|
-
* @returns {boolean} True if the line is a missing annotation, false otherwise
|
|
204
|
-
*/
|
|
205
|
-
function isMissingAnnotation(line) {
|
|
206
|
-
return line.startsWith("# @missing:");
|
|
207
|
-
}
|
|
208
|
-
const MISSING_ANNOTATION_SPECIAL_TAGS = {
|
|
209
|
-
"<none>": "none",
|
|
210
|
-
"<script>": "script",
|
|
211
|
-
"<code-point>": "code-point"
|
|
212
|
-
};
|
|
213
|
-
/**
|
|
214
|
-
* Parses a line into a MissingAnnotation object.
|
|
215
|
-
*
|
|
216
|
-
* This function attempts to extract information from a line that follows the
|
|
217
|
-
* format of a missing annotation in Unicode data files.
|
|
218
|
-
*
|
|
219
|
-
* The format being parsed is:
|
|
220
|
-
* `# @missing: START..END; DEFAULT_PROP_VALUE_OR_PROPERTY_NAME[; DEFAULT_PROPERTY_VALUE]`
|
|
221
|
-
*
|
|
222
|
-
* @param {string} line - The line to parse
|
|
223
|
-
* @returns {MissingAnnotation | null} A MissingAnnotation object if the line is a valid missing annotation, null otherwise
|
|
224
|
-
*
|
|
225
|
-
* @example
|
|
226
|
-
* ```ts
|
|
227
|
-
* parseMissingAnnotation("# @missing: 0000..007F; NA")
|
|
228
|
-
* // -> { start: "0000", end: "007F", defaultPropertyValue: "NA" }
|
|
229
|
-
*
|
|
230
|
-
* parseMissingAnnotation("# @missing: 0000..007F; Script; Unknown")
|
|
231
|
-
* // -> { start: "0000", end: "007F", propertyName: "Script", defaultPropertyValue: "Unknown" }
|
|
232
|
-
* ```
|
|
233
|
-
*/
|
|
234
|
-
function parseMissingAnnotation(line) {
|
|
235
|
-
if (!isMissingAnnotation(line)) return null;
|
|
236
|
-
const match = line.match(/^# @missing: ([0-9A-F]+)\.\.([0-9A-F]+); ([^;\n]+)(?:; ([^\n]+))?$/m);
|
|
237
|
-
if (match == null) return null;
|
|
238
|
-
const [_, start, end, defaultPropValueOrPropertyName, defaultPropertyValue] = match;
|
|
239
|
-
const defaultProperty = defaultPropertyValue == null ? defaultPropValueOrPropertyName : defaultPropertyValue;
|
|
240
|
-
const specialTag = defaultProperty && defaultProperty in MISSING_ANNOTATION_SPECIAL_TAGS ? MISSING_ANNOTATION_SPECIAL_TAGS[defaultProperty] : void 0;
|
|
241
|
-
if (start == null || end == null || defaultPropValueOrPropertyName == null) return null;
|
|
242
|
-
return {
|
|
243
|
-
start,
|
|
244
|
-
end,
|
|
245
|
-
propertyName: defaultPropertyValue == null ? void 0 : defaultPropValueOrPropertyName,
|
|
246
|
-
defaultPropertyValue: defaultProperty || "",
|
|
247
|
-
specialTag
|
|
248
|
-
};
|
|
249
|
-
}
|
|
250
|
-
/**
|
|
251
|
-
* Attempts to infer the file name from the first line of a Unicode data file.
|
|
252
|
-
*
|
|
253
|
-
* This function extracts the file name from the first line of the content,
|
|
254
|
-
* assuming it's a comment line. It removes any leading '#' characters and whitespace.
|
|
255
|
-
*
|
|
256
|
-
* For example:
|
|
257
|
-
* - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "ArabicShaping"
|
|
258
|
-
* - From a file with first line "# UnicodeData-5.0.0.txt", it returns "UnicodeData"
|
|
259
|
-
*
|
|
260
|
-
* @param {string} line - The first line of the file
|
|
261
|
-
* @returns {string | undefined} The inferred file name, or undefined if it can't be determined
|
|
262
|
-
*/
|
|
263
|
-
function inferFileName(line) {
|
|
264
|
-
return parseFileNameLine(line)?.fileName;
|
|
265
|
-
}
|
|
266
|
-
/**
|
|
267
|
-
* Attempts to infer the version from the first line of a Unicode data file.
|
|
268
|
-
*
|
|
269
|
-
* This function extracts the version number from the first line of the content,
|
|
270
|
-
* assuming it's a comment line. It looks for a pattern like "Name-X.Y.Z.txt"
|
|
271
|
-
* and extracts the X.Y.Z part as the version.
|
|
272
|
-
*
|
|
273
|
-
* For example:
|
|
274
|
-
* - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "5.0.0"
|
|
275
|
-
* - From a file with first line "# UnicodeData-14.0.0.txt", it returns "14.0.0"
|
|
276
|
-
*
|
|
277
|
-
* @param {string} line - The first line of the file
|
|
278
|
-
* @returns {string | undefined} The inferred version number, or undefined if it can't be determined
|
|
279
|
-
*/
|
|
280
|
-
function inferVersion(line) {
|
|
281
|
-
return parseFileNameLine(line)?.version;
|
|
282
|
-
}
|
|
283
|
-
/**
|
|
284
|
-
* Parses a line from a Unicode data file to extract the file name and version information.
|
|
285
|
-
*
|
|
286
|
-
* This function tries to extract file name and version information from a line that
|
|
287
|
-
* typically appears at the beginning of Unicode data files. It handles various formats:
|
|
288
|
-
* - "FileName-1.2.3.txt"
|
|
289
|
-
* - "FileName-1.2.3"
|
|
290
|
-
* - "FileName.txt"
|
|
291
|
-
*
|
|
292
|
-
* The function also properly handles comment markers at the beginning of the line.
|
|
293
|
-
*
|
|
294
|
-
* @param {string} line - The line to parse, typically the first line of a Unicode data file
|
|
295
|
-
* @returns {ParsedFileName | undefined} An object containing the file name and version if
|
|
296
|
-
* successfully parsed, or undefined if parsing fails
|
|
297
|
-
*
|
|
298
|
-
* @example
|
|
299
|
-
* ```ts
|
|
300
|
-
* parseFileNameLine("# UnicodeData-14.0.0.txt");
|
|
301
|
-
* // Returns { fileName: "UnicodeData", version: "14.0.0" }
|
|
302
|
-
*
|
|
303
|
-
* parseFileNameLine("# ArabicShaping.txt");
|
|
304
|
-
* // Returns { fileName: "ArabicShaping", version: undefined }
|
|
305
|
-
* ```
|
|
306
|
-
*/
|
|
307
|
-
function parseFileNameLine(line) {
|
|
308
|
-
if (!line) return;
|
|
309
|
-
line = line.split("\n")[0].trim();
|
|
310
|
-
if (!isCommentLine(line)) return;
|
|
311
|
-
line = line.trim().replace(/^#\s*/, "");
|
|
312
|
-
if (line === "") return;
|
|
313
|
-
let match = line.match(/^(.*?)(?:-([0-9.]+))?\.txt$/);
|
|
314
|
-
if (match == null) {
|
|
315
|
-
match = line.match(/^(.*?)(?:-([0-9.]+))?$/);
|
|
316
|
-
/* v8 ignore next 3 */
|
|
317
|
-
if (match == null) return;
|
|
318
|
-
}
|
|
319
|
-
const [_, fileName, version] = match;
|
|
320
|
-
if (!fileName || fileName.trim() === "") return;
|
|
321
|
-
return {
|
|
322
|
-
fileName,
|
|
323
|
-
version
|
|
324
|
-
};
|
|
325
|
-
}
|
|
326
|
-
/**
|
|
327
|
-
* Determines if a line represents a property definition in Unicode data files.
|
|
328
|
-
*
|
|
329
|
-
* In Unicode data files, properties are typically defined in comment lines that
|
|
330
|
-
* start with "# Property:" followed by the property name.
|
|
331
|
-
*
|
|
332
|
-
* @param {string} line - The line to check
|
|
333
|
-
* @returns {boolean} True if the line is a property definition, false otherwise
|
|
334
|
-
*
|
|
335
|
-
* @example
|
|
336
|
-
* ```ts
|
|
337
|
-
* isPropertyLine("# Property: Age"); // true
|
|
338
|
-
* isPropertyLine("# Some other comment"); // false
|
|
339
|
-
* isPropertyLine(""); // false
|
|
340
|
-
* ```
|
|
341
|
-
*/
|
|
342
|
-
function isPropertyLine(line) {
|
|
343
|
-
if (!line) return false;
|
|
344
|
-
if (!isCommentLine(line)) return false;
|
|
345
|
-
const val = getPropertyValue(line);
|
|
346
|
-
return val !== void 0 && val.trim() !== "";
|
|
347
|
-
}
|
|
348
|
-
/**
|
|
349
|
-
* Extracts the property value from a property definition line in Unicode data files.
|
|
350
|
-
*
|
|
351
|
-
* This function parses a line that follows the format '# Property: [PropertyValue]'
|
|
352
|
-
* and returns the PropertyValue part. It is used internally by isPropertyLine
|
|
353
|
-
* to parse property definitions in Unicode data files.
|
|
354
|
-
*
|
|
355
|
-
* @param {string} line - The line to extract the property value from
|
|
356
|
-
* @returns {string | undefined} The extracted property value, or undefined if
|
|
357
|
-
* the line is not a valid property definition
|
|
358
|
-
*
|
|
359
|
-
* @example
|
|
360
|
-
* ```ts
|
|
361
|
-
* getPropertyValue("# Property: Age"); // returns "Age"
|
|
362
|
-
* getPropertyValue("# Property: "); // returns undefined
|
|
363
|
-
* getPropertyValue("# Not a property line"); // returns undefined
|
|
364
|
-
* ```
|
|
365
|
-
*/
|
|
366
|
-
function getPropertyValue(line) {
|
|
367
|
-
const trimmedComment = trimCommentLine(line).trim();
|
|
368
|
-
if (trimmedComment === "") return;
|
|
369
|
-
if (!trimmedComment.startsWith("Property:")) return;
|
|
370
|
-
return trimmedComment.slice(9).trim();
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
//#endregion
|
|
374
|
-
export { trimCommentLine as _, isBoundaryLine as a, isEOFMarker as c, isHashBoundary as d, isLineWithData as f, parseMissingAnnotation as g, parseFileNameLine as h, inferVersion as i, isEmptyLine as l, isPropertyLine as m, getPropertyValue as n, isCommentLine as o, isMissingAnnotation as p, inferFileName as r, isDashBoundary as s, getBoundaryLineStyle as t, isEqualsBoundary as u };
|
|
File without changes
|