@unicode-utils/parser 0.12.0-beta.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ //#region src/line-helpers.ts
2
+ const HASH_BOUNDARY_REGEX = /^\s*#\s*#{2,}\s*$/;
3
+ const EQUALS_BOUNDARY_REGEX = /^\s*#\s*={2,}\s*$/;
4
+ const DASH_BOUNDARY_REGEX = /^\s*#\s*-{2,}\s*$/;
5
+ /**
6
+ * Determines if a line is an End-of-File (EOF) marker.
7
+ *
8
+ * In Unicode data files, the EOF marker is typically represented
9
+ * as a line containing only "# EOF".
10
+ *
11
+ * @param {string} [line] - The line to check
12
+ * @returns {boolean} True if the line is an EOF marker, false otherwise
13
+ *
14
+ * @example
15
+ * ```ts
16
+ * isEOFMarker("# EOF"); // true
17
+ * isEOFMarker("Some text"); // false
18
+ * isEOFMarker(); // false
19
+ * ```
20
+ */
21
+ function isEOFMarker(line) {
22
+ if (!line) return false;
23
+ return line.trim() === "# EOF";
24
+ }
25
+ /**
26
+ * Determines if a line contains a hash boundary pattern.
27
+ *
28
+ * A hash boundary is a line containing a pattern like "# ###" (# followed by multiple #).
29
+ * These patterns are used in Unicode data files to separate different sections of content.
30
+ *
31
+ * @param {string} line - The line to check
32
+ * @returns {boolean} True if the line contains a hash boundary pattern, false otherwise
33
+ *
34
+ * @example
35
+ * ```ts
36
+ * isHashBoundary("# #####"); // true
37
+ * isHashBoundary("# Some text"); // false
38
+ * isHashBoundary(""); // false
39
+ * ```
40
+ */
41
+ function isHashBoundary(line) {
42
+ if (!line) return false;
43
+ return HASH_BOUNDARY_REGEX.test(line);
44
+ }
45
+ /**
46
+ * Determines if a line contains an equals boundary pattern.
47
+ *
48
+ * An equals boundary is a line containing a pattern like "# ===" (# followed by multiple =).
49
+ * These patterns are used in Unicode data files to separate different sections of content.
50
+ *
51
+ * @param {string} line - The line to check
52
+ * @returns {boolean} True if the line contains an equals boundary pattern, false otherwise
53
+ *
54
+ * @example
55
+ * ```ts
56
+ * isEqualsBoundary("# ====="); // true
57
+ * isEqualsBoundary("# Some text"); // false
58
+ * isEqualsBoundary(""); // false
59
+ * ```
60
+ */
61
+ function isEqualsBoundary(line) {
62
+ if (!line) return false;
63
+ return EQUALS_BOUNDARY_REGEX.test(line);
64
+ }
65
+ /**
66
+ * Determines if a line contains a dash boundary pattern.
67
+ *
68
+ * A dash boundary is a line containing a pattern like "# ---" (# followed by multiple -).
69
+ * These patterns are used in Unicode data files to separate different sections of content.
70
+ *
71
+ * @param {string} line - The line to check
72
+ * @returns {boolean} True if the line contains a dash boundary pattern, false otherwise
73
+ *
74
+ * @example
75
+ * ```ts
76
+ * isDashBoundary("# -----"); // true
77
+ * isDashBoundary("# Some text"); // false
78
+ * isDashBoundary(""); // false
79
+ * ```
80
+ */
81
+ function isDashBoundary(line) {
82
+ if (!line) return false;
83
+ return DASH_BOUNDARY_REGEX.test(line);
84
+ }
85
+ /**
86
+ * Determines if a line is any type of boundary line.
87
+ *
88
+ * A boundary line is any line that matches one of the boundary patterns:
89
+ * hash boundary, equals boundary, or dash boundary. These patterns are used
90
+ * in Unicode data files to separate different sections of content.
91
+ *
92
+ * @param {string} line - The line to check
93
+ * @returns {boolean} True if the line is a boundary line, false otherwise
94
+ *
95
+ * @example
96
+ * ```ts
97
+ * isBoundaryLine("# #####"); // true (hash boundary)
98
+ * isBoundaryLine("# ====="); // true (equals boundary)
99
+ * isBoundaryLine("# -----"); // true (dash boundary)
100
+ * isBoundaryLine("# Some text"); // false
101
+ * isBoundaryLine(""); // false
102
+ * ```
103
+ */
104
+ function isBoundaryLine(line) {
105
+ if (!line) return false;
106
+ return isHashBoundary(line) || isEqualsBoundary(line) || isDashBoundary(line);
107
+ }
108
+ /**
109
+ * Extracts the style character from a boundary line.
110
+ *
111
+ * This function determines which type of boundary character is used in the line:
112
+ * '#', '=', or '-'. It checks the line against each boundary pattern and returns
113
+ * the corresponding character.
114
+ *
115
+ * @param {string} line - The boundary line to analyze
116
+ * @returns {BoundaryStyle} The boundary style character ('#', '=', or '-')
117
+ * @throws {Error} If the line is not a valid boundary line
118
+ *
119
+ * @example
120
+ * ```ts
121
+ * getBoundaryLineStyle("# #####"); // returns "#"
122
+ * getBoundaryLineStyle("# ====="); // returns "="
123
+ * getBoundaryLineStyle("# -----"); // returns "-"
124
+ * ```
125
+ */
126
+ function getBoundaryLineStyle(line) {
127
+ if (isHashBoundary(line)) return "#";
128
+ if (isEqualsBoundary(line)) return "=";
129
+ if (isDashBoundary(line)) return "-";
130
+ throw new Error(`invalid boundary style for line: ${line}`);
131
+ }
132
+ /**
133
+ * Determines if a line is a comment line.
134
+ *
135
+ * A comment line is either a line that starts with "# " or
136
+ * a line that only contains "#" (possibly with whitespace).
137
+ *
138
+ * @param {string} line - The line to check
139
+ * @returns {boolean} True if the line is a comment line, false otherwise
140
+ */
141
+ function isCommentLine(line) {
142
+ if (!line) return false;
143
+ const trimmed = line.trimStart();
144
+ return trimmed.startsWith("#") && trimmed.length > 0;
145
+ }
146
+ /**
147
+ * Removes the comment marker ('#') and any following whitespace from a line.
148
+ *
149
+ * This function is designed to extract the actual content from comment lines
150
+ * in Unicode data files by removing the leading '#' character and any whitespace
151
+ * that follows it.
152
+ *
153
+ * @param {string} line - The comment line to trim
154
+ * @returns {string} The content of the comment line without the comment marker
155
+ *
156
+ * @example
157
+ * ```ts
158
+ * trimCommentLine("# Some comment"); // returns "Some comment"
159
+ * trimCommentLine("#\tTabbed comment"); // returns "Tabbed comment"
160
+ * trimCommentLine(""); // returns ""
161
+ * ```
162
+ */
163
+ function trimCommentLine(line) {
164
+ if (!line) return "";
165
+ return line.trim().replace(/^#\s*/, "");
166
+ }
167
+ /**
168
+ * Checks if a string line is empty after trimming whitespace.
169
+ *
170
+ * @param {string} line - The string to check for emptiness
171
+ * @returns {boolean} A boolean indicating whether the trimmed line is empty
172
+ */
173
+ function isEmptyLine(line) {
174
+ if (!line) return true;
175
+ return line.trim() === "";
176
+ }
177
+ /**
178
+ * Determines if a line contains data in a Unicode data file.
179
+ *
180
+ * A line is considered to contain data if it is neither a comment line
181
+ * (starting with '#') nor an empty line.
182
+ *
183
+ * @param {string} line - The line to check
184
+ * @returns {boolean} True if the line contains data, false otherwise
185
+ *
186
+ * @example
187
+ * ```ts
188
+ * isLineWithData("U+0020;SPACE"); // true
189
+ * isLineWithData("# Comment line"); // false
190
+ * isLineWithData(""); // false
191
+ * ```
192
+ */
193
+ function isLineWithData(line) {
194
+ return !isCommentLine(line) && !isEmptyLine(line);
195
+ }
196
+ /**
197
+ * Check if a given line from a Unicode data file is a 'missing' annotation.
198
+ *
199
+ * In Unicode data files, lines starting with '# @missing:' indicate
200
+ * a range of code points that are not assigned.
201
+ *
202
+ * @param {string} line - The line to check
203
+ * @returns {boolean} True if the line is a missing annotation, false otherwise
204
+ */
205
+ function isMissingAnnotation(line) {
206
+ return line.startsWith("# @missing:");
207
+ }
208
+ const MISSING_ANNOTATION_SPECIAL_TAGS = {
209
+ "<none>": "none",
210
+ "<script>": "script",
211
+ "<code-point>": "code-point"
212
+ };
213
+ /**
214
+ * Parses a line into a MissingAnnotation object.
215
+ *
216
+ * This function attempts to extract information from a line that follows the
217
+ * format of a missing annotation in Unicode data files.
218
+ *
219
+ * The format being parsed is:
220
+ * `# @missing: START..END; DEFAULT_PROP_VALUE_OR_PROPERTY_NAME[; DEFAULT_PROPERTY_VALUE]`
221
+ *
222
+ * @param {string} line - The line to parse
223
+ * @returns {MissingAnnotation | null} A MissingAnnotation object if the line is a valid missing annotation, null otherwise
224
+ *
225
+ * @example
226
+ * ```ts
227
+ * parseMissingAnnotation("# @missing: 0000..007F; NA")
228
+ * // -> { start: "0000", end: "007F", defaultPropertyValue: "NA" }
229
+ *
230
+ * parseMissingAnnotation("# @missing: 0000..007F; Script; Unknown")
231
+ * // -> { start: "0000", end: "007F", propertyName: "Script", defaultPropertyValue: "Unknown" }
232
+ * ```
233
+ */
234
+ function parseMissingAnnotation(line) {
235
+ if (!isMissingAnnotation(line)) return null;
236
+ const match = line.match(/^# @missing: ([0-9A-F]+)\.\.([0-9A-F]+); ([^;\n]+)(?:; ([^\n]+))?$/m);
237
+ if (match == null) return null;
238
+ const [_, start, end, defaultPropValueOrPropertyName, defaultPropertyValue] = match;
239
+ const defaultProperty = defaultPropertyValue == null ? defaultPropValueOrPropertyName : defaultPropertyValue;
240
+ const specialTag = defaultProperty && defaultProperty in MISSING_ANNOTATION_SPECIAL_TAGS ? MISSING_ANNOTATION_SPECIAL_TAGS[defaultProperty] : void 0;
241
+ if (start == null || end == null || defaultPropValueOrPropertyName == null) return null;
242
+ return {
243
+ start,
244
+ end,
245
+ propertyName: defaultPropertyValue == null ? void 0 : defaultPropValueOrPropertyName,
246
+ defaultPropertyValue: defaultProperty || "",
247
+ specialTag
248
+ };
249
+ }
250
+ /**
251
+ * Attempts to infer the file name from the first line of a Unicode data file.
252
+ *
253
+ * This function extracts the file name from the first line of the content,
254
+ * assuming it's a comment line. It removes any leading '#' characters and whitespace.
255
+ *
256
+ * For example:
257
+ * - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "ArabicShaping"
258
+ * - From a file with first line "# UnicodeData-5.0.0.txt", it returns "UnicodeData"
259
+ *
260
+ * @param {string} line - The first line of the file
261
+ * @returns {string | undefined} The inferred file name, or undefined if it can't be determined
262
+ */
263
+ function inferFileName(line) {
264
+ return parseFileNameLine(line)?.fileName;
265
+ }
266
+ /**
267
+ * Attempts to infer the version from the first line of a Unicode data file.
268
+ *
269
+ * This function extracts the version number from the first line of the content,
270
+ * assuming it's a comment line. It looks for a pattern like "Name-X.Y.Z.txt"
271
+ * and extracts the X.Y.Z part as the version.
272
+ *
273
+ * For example:
274
+ * - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "5.0.0"
275
+ * - From a file with first line "# UnicodeData-14.0.0.txt", it returns "14.0.0"
276
+ *
277
+ * @param {string} line - The first line of the file
278
+ * @returns {string | undefined} The inferred version number, or undefined if it can't be determined
279
+ */
280
+ function inferVersion(line) {
281
+ return parseFileNameLine(line)?.version;
282
+ }
283
+ /**
284
+ * Parses a line from a Unicode data file to extract the file name and version information.
285
+ *
286
+ * This function tries to extract file name and version information from a line that
287
+ * typically appears at the beginning of Unicode data files. It handles various formats:
288
+ * - "FileName-1.2.3.txt"
289
+ * - "FileName-1.2.3"
290
+ * - "FileName.txt"
291
+ *
292
+ * The function also properly handles comment markers at the beginning of the line.
293
+ *
294
+ * @param {string} line - The line to parse, typically the first line of a Unicode data file
295
+ * @returns {ParsedFileName | undefined} An object containing the file name and version if
296
+ * successfully parsed, or undefined if parsing fails
297
+ *
298
+ * @example
299
+ * ```ts
300
+ * parseFileNameLine("# UnicodeData-14.0.0.txt");
301
+ * // Returns { fileName: "UnicodeData", version: "14.0.0" }
302
+ *
303
+ * parseFileNameLine("# ArabicShaping.txt");
304
+ * // Returns { fileName: "ArabicShaping", version: undefined }
305
+ * ```
306
+ */
307
+ function parseFileNameLine(line) {
308
+ if (!line) return;
309
+ line = line.split("\n")[0].trim();
310
+ if (!isCommentLine(line)) return;
311
+ line = line.trim().replace(/^#\s*/, "");
312
+ if (line === "") return;
313
+ let match = line.match(/^(.*?)(?:-([0-9.]+))?\.txt$/);
314
+ if (match == null) {
315
+ match = line.match(/^(.*?)(?:-([0-9.]+))?$/);
316
+ /* v8 ignore next 3 */
317
+ if (match == null) return;
318
+ }
319
+ const [_, fileName, version] = match;
320
+ if (!fileName || fileName.trim() === "") return;
321
+ return {
322
+ fileName,
323
+ version
324
+ };
325
+ }
326
+ /**
327
+ * Determines if a line represents a property definition in Unicode data files.
328
+ *
329
+ * In Unicode data files, properties are typically defined in comment lines that
330
+ * start with "# Property:" followed by the property name.
331
+ *
332
+ * @param {string} line - The line to check
333
+ * @returns {boolean} True if the line is a property definition, false otherwise
334
+ *
335
+ * @example
336
+ * ```ts
337
+ * isPropertyLine("# Property: Age"); // true
338
+ * isPropertyLine("# Some other comment"); // false
339
+ * isPropertyLine(""); // false
340
+ * ```
341
+ */
342
+ function isPropertyLine(line) {
343
+ if (!line) return false;
344
+ if (!isCommentLine(line)) return false;
345
+ const val = getPropertyValue(line);
346
+ return val !== void 0 && val.trim() !== "";
347
+ }
348
+ /**
349
+ * Extracts the property value from a property definition line in Unicode data files.
350
+ *
351
+ * This function parses a line that follows the format '# Property: [PropertyValue]'
352
+ * and returns the PropertyValue part. It is used internally by isPropertyLine
353
+ * to parse property definitions in Unicode data files.
354
+ *
355
+ * @param {string} line - The line to extract the property value from
356
+ * @returns {string | undefined} The extracted property value, or undefined if
357
+ * the line is not a valid property definition
358
+ *
359
+ * @example
360
+ * ```ts
361
+ * getPropertyValue("# Property: Age"); // returns "Age"
362
+ * getPropertyValue("# Property: "); // returns undefined
363
+ * getPropertyValue("# Not a property line"); // returns undefined
364
+ * ```
365
+ */
366
+ function getPropertyValue(line) {
367
+ const trimmedComment = trimCommentLine(line).trim();
368
+ if (trimmedComment === "") return;
369
+ if (!trimmedComment.startsWith("Property:")) return;
370
+ return trimmedComment.slice(9).trim();
371
+ }
372
+
373
+ //#endregion
374
+ export { trimCommentLine as _, isBoundaryLine as a, isEOFMarker as c, isHashBoundary as d, isLineWithData as f, parseMissingAnnotation as g, parseFileNameLine as h, inferVersion as i, isEmptyLine as l, isPropertyLine as m, getPropertyValue as n, isCommentLine as o, isMissingAnnotation as p, inferFileName as r, isDashBoundary as s, getBoundaryLineStyle as t, isEqualsBoundary as u };
@@ -0,0 +1,305 @@
1
+ //#region src/line-helpers.d.ts
2
+ /**
3
+ * Determines if a line is an End-of-File (EOF) marker.
4
+ *
5
+ * In Unicode data files, the EOF marker is typically represented
6
+ * as a line containing only "# EOF".
7
+ *
8
+ * @param {string} [line] - The line to check
9
+ * @returns {boolean} True if the line is an EOF marker, false otherwise
10
+ *
11
+ * @example
12
+ * ```ts
13
+ * isEOFMarker("# EOF"); // true
14
+ * isEOFMarker("Some text"); // false
15
+ * isEOFMarker(); // false
16
+ * ```
17
+ */
18
+ declare function isEOFMarker(line?: string): boolean;
19
+ /**
20
+ * Determines if a line contains a hash boundary pattern.
21
+ *
22
+ * A hash boundary is a line containing a pattern like "# ###" (# followed by multiple #).
23
+ * These patterns are used in Unicode data files to separate different sections of content.
24
+ *
25
+ * @param {string} line - The line to check
26
+ * @returns {boolean} True if the line contains a hash boundary pattern, false otherwise
27
+ *
28
+ * @example
29
+ * ```ts
30
+ * isHashBoundary("# #####"); // true
31
+ * isHashBoundary("# Some text"); // false
32
+ * isHashBoundary(""); // false
33
+ * ```
34
+ */
35
+ declare function isHashBoundary(line: string): boolean;
36
+ /**
37
+ * Determines if a line contains an equals boundary pattern.
38
+ *
39
+ * An equals boundary is a line containing a pattern like "# ===" (# followed by multiple =).
40
+ * These patterns are used in Unicode data files to separate different sections of content.
41
+ *
42
+ * @param {string} line - The line to check
43
+ * @returns {boolean} True if the line contains an equals boundary pattern, false otherwise
44
+ *
45
+ * @example
46
+ * ```ts
47
+ * isEqualsBoundary("# ====="); // true
48
+ * isEqualsBoundary("# Some text"); // false
49
+ * isEqualsBoundary(""); // false
50
+ * ```
51
+ */
52
+ declare function isEqualsBoundary(line: string): boolean;
53
+ /**
54
+ * Determines if a line contains a dash boundary pattern.
55
+ *
56
+ * A dash boundary is a line containing a pattern like "# ---" (# followed by multiple -).
57
+ * These patterns are used in Unicode data files to separate different sections of content.
58
+ *
59
+ * @param {string} line - The line to check
60
+ * @returns {boolean} True if the line contains a dash boundary pattern, false otherwise
61
+ *
62
+ * @example
63
+ * ```ts
64
+ * isDashBoundary("# -----"); // true
65
+ * isDashBoundary("# Some text"); // false
66
+ * isDashBoundary(""); // false
67
+ * ```
68
+ */
69
+ declare function isDashBoundary(line: string): boolean;
70
+ /**
71
+ * Determines if a line is any type of boundary line.
72
+ *
73
+ * A boundary line is any line that matches one of the boundary patterns:
74
+ * hash boundary, equals boundary, or dash boundary. These patterns are used
75
+ * in Unicode data files to separate different sections of content.
76
+ *
77
+ * @param {string} line - The line to check
78
+ * @returns {boolean} True if the line is a boundary line, false otherwise
79
+ *
80
+ * @example
81
+ * ```ts
82
+ * isBoundaryLine("# #####"); // true (hash boundary)
83
+ * isBoundaryLine("# ====="); // true (equals boundary)
84
+ * isBoundaryLine("# -----"); // true (dash boundary)
85
+ * isBoundaryLine("# Some text"); // false
86
+ * isBoundaryLine(""); // false
87
+ * ```
88
+ */
89
+ declare function isBoundaryLine(line: string): boolean;
90
+ type BoundaryStyle = "#" | "=" | "-";
91
+ /**
92
+ * Extracts the style character from a boundary line.
93
+ *
94
+ * This function determines which type of boundary character is used in the line:
95
+ * '#', '=', or '-'. It checks the line against each boundary pattern and returns
96
+ * the corresponding character.
97
+ *
98
+ * @param {string} line - The boundary line to analyze
99
+ * @returns {BoundaryStyle} The boundary style character ('#', '=', or '-')
100
+ * @throws {Error} If the line is not a valid boundary line
101
+ *
102
+ * @example
103
+ * ```ts
104
+ * getBoundaryLineStyle("# #####"); // returns "#"
105
+ * getBoundaryLineStyle("# ====="); // returns "="
106
+ * getBoundaryLineStyle("# -----"); // returns "-"
107
+ * ```
108
+ */
109
+ declare function getBoundaryLineStyle(line: string): BoundaryStyle;
110
+ /**
111
+ * Determines if a line is a comment line.
112
+ *
113
+ * A comment line is either a line that starts with "# " or
114
+ * a line that only contains "#" (possibly with whitespace).
115
+ *
116
+ * @param {string} line - The line to check
117
+ * @returns {boolean} True if the line is a comment line, false otherwise
118
+ */
119
+ declare function isCommentLine(line: string): boolean;
120
+ /**
121
+ * Removes the comment marker ('#') and any following whitespace from a line.
122
+ *
123
+ * This function is designed to extract the actual content from comment lines
124
+ * in Unicode data files by removing the leading '#' character and any whitespace
125
+ * that follows it.
126
+ *
127
+ * @param {string} line - The comment line to trim
128
+ * @returns {string} The content of the comment line without the comment marker
129
+ *
130
+ * @example
131
+ * ```ts
132
+ * trimCommentLine("# Some comment"); // returns "Some comment"
133
+ * trimCommentLine("#\tTabbed comment"); // returns "Tabbed comment"
134
+ * trimCommentLine(""); // returns ""
135
+ * ```
136
+ */
137
+ declare function trimCommentLine(line: string): string;
138
+ /**
139
+ * Checks if a string line is empty after trimming whitespace.
140
+ *
141
+ * @param {string} line - The string to check for emptiness
142
+ * @returns {boolean} A boolean indicating whether the trimmed line is empty
143
+ */
144
+ declare function isEmptyLine(line: string): boolean;
145
+ /**
146
+ * Determines if a line contains data in a Unicode data file.
147
+ *
148
+ * A line is considered to contain data if it is neither a comment line
149
+ * (starting with '#') nor an empty line.
150
+ *
151
+ * @param {string} line - The line to check
152
+ * @returns {boolean} True if the line contains data, false otherwise
153
+ *
154
+ * @example
155
+ * ```ts
156
+ * isLineWithData("U+0020;SPACE"); // true
157
+ * isLineWithData("# Comment line"); // false
158
+ * isLineWithData(""); // false
159
+ * ```
160
+ */
161
+ declare function isLineWithData(line: string): boolean;
162
+ /**
163
+ * Check if a given line from a Unicode data file is a 'missing' annotation.
164
+ *
165
+ * In Unicode data files, lines starting with '# @missing:' indicate
166
+ * a range of code points that are not assigned.
167
+ *
168
+ * @param {string} line - The line to check
169
+ * @returns {boolean} True if the line is a missing annotation, false otherwise
170
+ */
171
+ declare function isMissingAnnotation(line: string): boolean;
172
+ type SpecialTag = "none" | "script" | "code-point";
173
+ interface MissingAnnotation {
174
+ start: string;
175
+ end: string;
176
+ propertyName?: string;
177
+ defaultPropertyValue: string;
178
+ /**
179
+ * The special tag used in the Annotation.
180
+ *
181
+ * NOTE:
182
+ * - "none" no value is defined
183
+ * - "script" the value equal to the Script property value for this code point
184
+ * - "code-point" the string representation of the code point value
185
+ */
186
+ specialTag?: SpecialTag;
187
+ }
188
+ /**
189
+ * Parses a line into a MissingAnnotation object.
190
+ *
191
+ * This function attempts to extract information from a line that follows the
192
+ * format of a missing annotation in Unicode data files.
193
+ *
194
+ * The format being parsed is:
195
+ * `# @missing: START..END; DEFAULT_PROP_VALUE_OR_PROPERTY_NAME[; DEFAULT_PROPERTY_VALUE]`
196
+ *
197
+ * @param {string} line - The line to parse
198
+ * @returns {MissingAnnotation | null} A MissingAnnotation object if the line is a valid missing annotation, null otherwise
199
+ *
200
+ * @example
201
+ * ```ts
202
+ * parseMissingAnnotation("# @missing: 0000..007F; NA")
203
+ * // -> { start: "0000", end: "007F", defaultPropertyValue: "NA" }
204
+ *
205
+ * parseMissingAnnotation("# @missing: 0000..007F; Script; Unknown")
206
+ * // -> { start: "0000", end: "007F", propertyName: "Script", defaultPropertyValue: "Unknown" }
207
+ * ```
208
+ */
209
+ declare function parseMissingAnnotation(line: string): MissingAnnotation | null;
210
+ /**
211
+ * Attempts to infer the file name from the first line of a Unicode data file.
212
+ *
213
+ * This function extracts the file name from the first line of the content,
214
+ * assuming it's a comment line. It removes any leading '#' characters and whitespace.
215
+ *
216
+ * For example:
217
+ * - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "ArabicShaping"
218
+ * - From a file with first line "# UnicodeData-5.0.0.txt", it returns "UnicodeData"
219
+ *
220
+ * @param {string} line - The first line of the file
221
+ * @returns {string | undefined} The inferred file name, or undefined if it can't be determined
222
+ */
223
+ declare function inferFileName(line: string): string | undefined;
224
+ /**
225
+ * Attempts to infer the version from the first line of a Unicode data file.
226
+ *
227
+ * This function extracts the version number from the first line of the content,
228
+ * assuming it's a comment line. It looks for a pattern like "Name-X.Y.Z.txt"
229
+ * and extracts the X.Y.Z part as the version.
230
+ *
231
+ * For example:
232
+ * - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "5.0.0"
233
+ * - From a file with first line "# UnicodeData-14.0.0.txt", it returns "14.0.0"
234
+ *
235
+ * @param {string} line - The first line of the file
236
+ * @returns {string | undefined} The inferred version number, or undefined if it can't be determined
237
+ */
238
+ declare function inferVersion(line: string): string | undefined;
239
+ interface ParsedFileName {
240
+ fileName: string;
241
+ version: string | undefined;
242
+ }
243
+ /**
244
+ * Parses a line from a Unicode data file to extract the file name and version information.
245
+ *
246
+ * This function tries to extract file name and version information from a line that
247
+ * typically appears at the beginning of Unicode data files. It handles various formats:
248
+ * - "FileName-1.2.3.txt"
249
+ * - "FileName-1.2.3"
250
+ * - "FileName.txt"
251
+ *
252
+ * The function also properly handles comment markers at the beginning of the line.
253
+ *
254
+ * @param {string} line - The line to parse, typically the first line of a Unicode data file
255
+ * @returns {ParsedFileName | undefined} An object containing the file name and version if
256
+ * successfully parsed, or undefined if parsing fails
257
+ *
258
+ * @example
259
+ * ```ts
260
+ * parseFileNameLine("# UnicodeData-14.0.0.txt");
261
+ * // Returns { fileName: "UnicodeData", version: "14.0.0" }
262
+ *
263
+ * parseFileNameLine("# ArabicShaping.txt");
264
+ * // Returns { fileName: "ArabicShaping", version: undefined }
265
+ * ```
266
+ */
267
+ declare function parseFileNameLine(line: string): ParsedFileName | undefined;
268
+ /**
269
+ * Determines if a line represents a property definition in Unicode data files.
270
+ *
271
+ * In Unicode data files, properties are typically defined in comment lines that
272
+ * start with "# Property:" followed by the property name.
273
+ *
274
+ * @param {string} line - The line to check
275
+ * @returns {boolean} True if the line is a property definition, false otherwise
276
+ *
277
+ * @example
278
+ * ```ts
279
+ * isPropertyLine("# Property: Age"); // true
280
+ * isPropertyLine("# Some other comment"); // false
281
+ * isPropertyLine(""); // false
282
+ * ```
283
+ */
284
+ declare function isPropertyLine(line: string): boolean;
285
+ /**
286
+ * Extracts the property value from a property definition line in Unicode data files.
287
+ *
288
+ * This function parses a line that follows the format '# Property: [PropertyValue]'
289
+ * and returns the PropertyValue part. It is used internally by isPropertyLine
290
+ * to parse property definitions in Unicode data files.
291
+ *
292
+ * @param {string} line - The line to extract the property value from
293
+ * @returns {string | undefined} The extracted property value, or undefined if
294
+ * the line is not a valid property definition
295
+ *
296
+ * @example
297
+ * ```ts
298
+ * getPropertyValue("# Property: Age"); // returns "Age"
299
+ * getPropertyValue("# Property: "); // returns undefined
300
+ * getPropertyValue("# Not a property line"); // returns undefined
301
+ * ```
302
+ */
303
+ declare function getPropertyValue(line: string): string | undefined;
304
+ //#endregion
305
+ export { isMissingAnnotation as _, getBoundaryLineStyle as a, parseMissingAnnotation as b, inferVersion as c, isDashBoundary as d, isEOFMarker as f, isLineWithData as g, isHashBoundary as h, SpecialTag as i, isBoundaryLine as l, isEqualsBoundary as m, MissingAnnotation as n, getPropertyValue as o, isEmptyLine as p, ParsedFileName as r, inferFileName as s, BoundaryStyle as t, isCommentLine as u, isPropertyLine as v, trimCommentLine as x, parseFileNameLine as y };
@@ -0,0 +1,2 @@
1
+ import { _ as isMissingAnnotation, a as getBoundaryLineStyle, b as parseMissingAnnotation, c as inferVersion, d as isDashBoundary, f as isEOFMarker, g as isLineWithData, h as isHashBoundary, i as SpecialTag, l as isBoundaryLine, m as isEqualsBoundary, n as MissingAnnotation, o as getPropertyValue, p as isEmptyLine, r as ParsedFileName, s as inferFileName, t as BoundaryStyle, u as isCommentLine, v as isPropertyLine, x as trimCommentLine, y as parseFileNameLine } from "./line-helpers-upUikru9.js";
2
+ export { BoundaryStyle, MissingAnnotation, ParsedFileName, SpecialTag, getBoundaryLineStyle, getPropertyValue, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, trimCommentLine };
@@ -0,0 +1,3 @@
1
+ import { _ as trimCommentLine, a as isBoundaryLine, c as isEOFMarker, d as isHashBoundary, f as isLineWithData, g as parseMissingAnnotation, h as parseFileNameLine, i as inferVersion, l as isEmptyLine, m as isPropertyLine, n as getPropertyValue, o as isCommentLine, p as isMissingAnnotation, r as inferFileName, s as isDashBoundary, t as getBoundaryLineStyle, u as isEqualsBoundary } from "./line-helpers-tsCF16UF.js";
2
+
3
+ export { getBoundaryLineStyle, getPropertyValue, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, trimCommentLine };