@unicode-utils/core 0.12.0-beta.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,216 @@
1
+ import { n as UNICODE_STABLE_VERSION, r as UNICODE_VERSION_METADATA, t as UNICODE_DRAFT_VERSION } from "./constants-DygWVxzp.js";
2
+ import { t as RawDataFile } from "./datafile-CWbGVIAa.js";
3
+ import { _ as trimCommentLine, a as isBoundaryLine, c as isEOFMarker, d as isHashBoundary, f as isLineWithData, g as parseMissingAnnotation, h as parseFileNameLine, i as inferVersion, l as isEmptyLine, m as isPropertyLine, n as getPropertyValue, o as isCommentLine, p as isMissingAnnotation, r as inferFileName, s as isDashBoundary, t as getBoundaryLineStyle, u as isEqualsBoundary } from "./line-helpers-CYDQ0FnQ.js";
4
+
5
+ //#region src/draft.ts
6
+ /**
7
+ * Retrieves the current Unicode Standard draft version by fetching and parsing
8
+ * the Unicode draft ReadMe file.
9
+ *
10
+ * The function tries to extract the version number using several regex patterns,
11
+ * starting with the most explicit match and falling back to less specific patterns.
12
+ *
13
+ * @param {GetCurrentDraftVersionOptions} options - Configuration options for the function
14
+ * @returns {Promise<string | null>} A promise that resolves to:
15
+ * - The Unicode draft version as a string (e.g., "15.1.0" or "15.1")
16
+ * - `null` if the version couldn't be determined or if an error occurred during fetching
17
+ *
18
+ * @example
19
+ * ```ts
20
+ * // Using default options
21
+ * const version = await getCurrentDraftVersion();
22
+ * ```
23
+ *
24
+ * @example
25
+ * ```ts
26
+ * // Using custom options
27
+ * const version = await getCurrentDraftVersion({
28
+ * url: "https://luxass.dev/readme",
29
+ * patterns: [/MyCustomPattern-(\d+\.\d+)/],
30
+ * fetchOptions: { headers: { "Authorization": "token" } }
31
+ * });
32
+ * ```
33
+ */
34
+ async function getCurrentDraftVersion(options = {}) {
35
+ const { url = "https://unicode-proxy.ucdjs.dev/draft/ReadMe.txt", fetchOptions = {}, patterns = [
36
+ /Version (\d+\.\d+(?:\.\d+)?) of the Unicode Standard/,
37
+ /Unicode(\d+\.\d+(?:\.\d+)?)/,
38
+ /Version (\d+\.\d+)(?!\.\d)/
39
+ ] } = options;
40
+ try {
41
+ const res = await fetch(url, fetchOptions);
42
+ if (!res.ok) throw new Error("failed to fetch draft ReadMe");
43
+ const text = await res.text();
44
+ for (const pattern of patterns) {
45
+ const match = text.match(pattern);
46
+ if (match == null || match[1] == null) continue;
47
+ return match[1];
48
+ }
49
+ return null;
50
+ } catch {
51
+ return null;
52
+ }
53
+ }
54
+
55
+ //#endregion
56
+ //#region src/hexcodes.ts
57
+ /**
58
+ * Converts a hex string to an array of unicode codepoints.
59
+ *
60
+ * @param {string} hex - The hexadecimal string to convert
61
+ * @param {string} joiner - The string that separates the hex values
62
+ * @param {boolean} strict - If true, throws errors for invalid input. If false, returns NaN for invalid parts.
63
+ * @returns {number[]} An array of numbers representing unicode codepoints
64
+ *
65
+ * @example
66
+ * ```ts
67
+ * fromHexToCodepoint('1F600-1F64F', '-') // [128512, 128591]
68
+ * fromHexToCodepoint('1F600,1F64F', ',') // [128512, 128591]
69
+ * fromHexToCodepoint('1F600-', '-', true) // throws Error
70
+ * fromHexToCodepoint('1F600-', '-', false) // [128512, NaN]
71
+ * ```
72
+ */
73
+ function fromHexToCodepoint(hex, joiner, strict = false) {
74
+ if (strict) {
75
+ if (typeof hex !== "string" || typeof joiner !== "string") throw new TypeError("Both hex and joiner must be strings");
76
+ if (hex.trim() === "") throw new Error("Hex string cannot be empty");
77
+ }
78
+ const hexParts = hex.split(joiner);
79
+ const codepoints = [];
80
+ for (const part of hexParts) {
81
+ const trimmedPart = part.trim();
82
+ if (strict && trimmedPart === "") throw new Error("Empty hex part found");
83
+ if (strict && !/^[0-9a-f]+$/i.test(trimmedPart)) throw new Error(`Invalid hex format: "${trimmedPart}"`);
84
+ const codepoint = Number.parseInt(trimmedPart, 16);
85
+ if (strict && Number.isNaN(codepoint)) throw new Error(`Failed to parse hex value: "${trimmedPart}"`);
86
+ if (strict && codepoint > 1114111) throw new Error(`Invalid Unicode codepoint: ${codepoint.toString(16).toUpperCase()} (exceeds U+10FFFF)`);
87
+ codepoints.push(codepoint);
88
+ }
89
+ return codepoints;
90
+ }
91
+ /**
92
+ * Expands a hexadecimal range into an array of individual hexadecimal values.
93
+ * If the input contains ".." it treats it as a range and expands it,
94
+ * otherwise returns the input hex as a single-element array.
95
+ *
96
+ * @param {string} hex - The hexadecimal string, optionally containing ".." to denote a range
97
+ * @returns {string[]} An array of hexadecimal strings. If given a range (e.g. "0000..0010"),
98
+ * returns all values in that range. If given a single hex value,
99
+ * returns an array containing just that value.
100
+ *
101
+ * @example
102
+ * ```ts
103
+ * expandHexRange("0000..0002") // Returns ["0000", "0001", "0002"]
104
+ * expandHexRange("0000") // Returns ["0000"]
105
+ * ```
106
+ */
107
+ function expandHexRange(hex) {
108
+ if (hex.includes("..")) {
109
+ const [start, end] = fromHexToCodepoint(hex, "..");
110
+ if (start == null || Number.isNaN(start) || end == null || Number.isNaN(end)) return [];
111
+ const points = [];
112
+ for (let codepoint = start; codepoint <= end; codepoint++) points.push(codepoint.toString(16).padStart(4, "0").toUpperCase());
113
+ return points;
114
+ }
115
+ return [hex];
116
+ }
117
+ /**
118
+ * Removes specific unicode variation selectors from a hex string.
119
+ * Specifically removes:
120
+ * - 200D (Zero Width Joiner)
121
+ * - FE0E (Variation Selector-15, text style)
122
+ * - FE0F (Variation Selector-16, emoji style)
123
+ *
124
+ * @param {string} hex - The hex string to strip variation selectors from
125
+ * @returns {string} The hex string with variation selectors removed
126
+ */
127
+ function stripHex(hex) {
128
+ return hex.replace(/[-\s]?(?:200D|FE0E|FE0F)/g, "");
129
+ }
130
+
131
+ //#endregion
132
+ //#region src/mappings.ts
133
+ /**
134
+ * Maps Unicode standard version numbers to their corresponding UCD (Unicode Character Database) version identifiers.
135
+ *
136
+ * The Unicode Character Database (UCD) files are available at https://unicode.org/Public/{version}
137
+ * where {version} is not always the same as the Unicode standard version.
138
+ *
139
+ * For example:
140
+ * - Unicode 4.0.1 corresponds to UCD version "4.0-Update1"
141
+ * - Unicode 2.1.9 corresponds to UCD version "2.1-Update4"
142
+ *
143
+ * Note: Only versions with special UCD paths are included here.
144
+ * Versions 4.1.0 and later use their version number directly as the UCD path.
145
+ */
146
+ const UNICODE_TO_UCD_VERSION_MAPPINGS = {
147
+ "1.0.0": "1.1-Update",
148
+ "1.0.1": "1.1-Update",
149
+ "1.1.0": "1.1-Update",
150
+ "1.1.5": "1.1-Update",
151
+ "2.0.0": "2.0-Update",
152
+ "2.1.0": "2.1-Update4",
153
+ "2.1.1": "2.1-Update",
154
+ "2.1.2": "2.1-Update",
155
+ "2.1.5": "2.1-Update2",
156
+ "2.1.8": "2.1-Update3",
157
+ "2.1.9": "2.1-Update4",
158
+ "3.0.0": "3.0-Update",
159
+ "3.0.1": "3.0-Update1",
160
+ "3.1.0": "3.1-Update",
161
+ "3.1.1": "3.1-Update1",
162
+ "3.2.0": "3.2-Update",
163
+ "4.0.0": "4.0-Update",
164
+ "4.0.1": "4.0-Update1"
165
+ };
166
+ /**
167
+ * Resolves a Unicode version to its corresponding UCD (Unicode Character Database) version identifier.
168
+ *
169
+ * Some Unicode versions don't have directly corresponding UCD version identifiers. For example,
170
+ * Unicode 4.0.1's files are found using UCD version '4.0-Update1'
171
+ * rather than '4.0.1'.
172
+ *
173
+ * If the version is not found in the mappings, returns the original version.
174
+ * This is useful for handling newer Unicode versions that use the version number directly.
175
+ *
176
+ * @param {string} unicodeVersion - The Unicode version to resolve to a UCD version identifier
177
+ * @returns {string} The corresponding UCD version identifier or the original version if not mapped
178
+ */
179
+ function resolveUCDVersion(unicodeVersion) {
180
+ const ucdVersion = UNICODE_TO_UCD_VERSION_MAPPINGS[unicodeVersion];
181
+ if (ucdVersion) return ucdVersion;
182
+ return unicodeVersion;
183
+ }
184
+
185
+ //#endregion
186
+ //#region src/path.ts
187
+ /**
188
+ * Builds file paths for Unicode Character Database (UCD) files
189
+ *
190
+ * @param {string} version - The Unicode version (e.g., "15.1.0")
191
+ * @param {string} path - The filename to access (e.g., "PropList.txt", "DerivedLineBreak.txt")
192
+ * @returns {string} The complete file path for the UCD file
193
+ */
194
+ function buildUCDPath(version, path) {
195
+ return new URL(`${version}/${hasUCDFolderPath(version) ? "ucd/" : ""}${path}`, "https://www.unicode.org/").pathname;
196
+ }
197
+ /**
198
+ * Determines whether a Unicode version has the UCD folder structure.
199
+ *
200
+ * Newer Unicode versions typically use a UCD subfolder structure, while older versions
201
+ * use special version formats (like '4.0-Update1' instead of '4.0.1') without UCD folders.
202
+ * This function checks if a version:
203
+ * 1. Contains "Update" in its name (indicating no UCD folder structure)
204
+ * 2. Exists in our UNICODE_TO_UCD_VERSION_MAPPINGS (meaning it doesn't use UCD folders)
205
+ *
206
+ * @param {string} version - The Unicode version string to check
207
+ * @returns {boolean} - Returns true if the version uses UCD folder structure (e.g., '15.0.0'),
208
+ * false if it doesn't use UCD folders (e.g., '4.0.1' uses '4.0-Update1')
209
+ */
210
+ function hasUCDFolderPath(version) {
211
+ if (version.includes("Update")) return false;
212
+ return !Object.keys(UNICODE_TO_UCD_VERSION_MAPPINGS).includes(version);
213
+ }
214
+
215
+ //#endregion
216
+ export { RawDataFile, UNICODE_DRAFT_VERSION, UNICODE_STABLE_VERSION, UNICODE_TO_UCD_VERSION_MAPPINGS, UNICODE_VERSION_METADATA, buildUCDPath, expandHexRange, fromHexToCodepoint, getBoundaryLineStyle, getCurrentDraftVersion, getPropertyValue, hasUCDFolderPath, inferFileName, inferVersion, isBoundaryLine, isCommentLine, isDashBoundary, isEOFMarker, isEmptyLine, isEqualsBoundary, isHashBoundary, isLineWithData, isMissingAnnotation, isPropertyLine, parseFileNameLine, parseMissingAnnotation, resolveUCDVersion, stripHex, trimCommentLine };
@@ -0,0 +1,373 @@
1
+ //#region src/line-helpers.ts
2
+ const HASH_BOUNDARY_REGEX = /^\s*#\s*#{2,}\s*$/;
3
+ const EQUALS_BOUNDARY_REGEX = /^\s*#\s*={2,}\s*$/;
4
+ const DASH_BOUNDARY_REGEX = /^\s*#\s*-{2,}\s*$/;
5
+ /**
6
+ * Determines if a line is an End-of-File (EOF) marker.
7
+ *
8
+ * In Unicode data files, the EOF marker is typically represented
9
+ * as a line containing only "# EOF".
10
+ *
11
+ * @param {string} [line] - The line to check
12
+ * @returns {boolean} True if the line is an EOF marker, false otherwise
13
+ *
14
+ * @example
15
+ * ```ts
16
+ * isEOFMarker("# EOF"); // true
17
+ * isEOFMarker("Some text"); // false
18
+ * isEOFMarker(); // false
19
+ * ```
20
+ */
21
+ function isEOFMarker(line) {
22
+ if (!line) return false;
23
+ return line.trim() === "# EOF";
24
+ }
25
+ /**
26
+ * Determines if a line contains a hash boundary pattern.
27
+ *
28
+ * A hash boundary is a line containing a pattern like "# ###" (# followed by multiple #).
29
+ * These patterns are used in Unicode data files to separate different sections of content.
30
+ *
31
+ * @param {string} line - The line to check
32
+ * @returns {boolean} True if the line contains a hash boundary pattern, false otherwise
33
+ *
34
+ * @example
35
+ * ```ts
36
+ * isHashBoundary("# #####"); // true
37
+ * isHashBoundary("# Some text"); // false
38
+ * isHashBoundary(""); // false
39
+ * ```
40
+ */
41
+ function isHashBoundary(line) {
42
+ if (!line) return false;
43
+ return HASH_BOUNDARY_REGEX.test(line);
44
+ }
45
+ /**
46
+ * Determines if a line contains an equals boundary pattern.
47
+ *
48
+ * An equals boundary is a line containing a pattern like "# ===" (# followed by multiple =).
49
+ * These patterns are used in Unicode data files to separate different sections of content.
50
+ *
51
+ * @param {string} line - The line to check
52
+ * @returns {boolean} True if the line contains an equals boundary pattern, false otherwise
53
+ *
54
+ * @example
55
+ * ```ts
56
+ * isEqualsBoundary("# ====="); // true
57
+ * isEqualsBoundary("# Some text"); // false
58
+ * isEqualsBoundary(""); // false
59
+ * ```
60
+ */
61
+ function isEqualsBoundary(line) {
62
+ if (!line) return false;
63
+ return EQUALS_BOUNDARY_REGEX.test(line);
64
+ }
65
+ /**
66
+ * Determines if a line contains a dash boundary pattern.
67
+ *
68
+ * A dash boundary is a line containing a pattern like "# ---" (# followed by multiple -).
69
+ * These patterns are used in Unicode data files to separate different sections of content.
70
+ *
71
+ * @param {string} line - The line to check
72
+ * @returns {boolean} True if the line contains a dash boundary pattern, false otherwise
73
+ *
74
+ * @example
75
+ * ```ts
76
+ * isDashBoundary("# -----"); // true
77
+ * isDashBoundary("# Some text"); // false
78
+ * isDashBoundary(""); // false
79
+ * ```
80
+ */
81
+ function isDashBoundary(line) {
82
+ if (!line) return false;
83
+ return DASH_BOUNDARY_REGEX.test(line);
84
+ }
85
+ /**
86
+ * Determines if a line is any type of boundary line.
87
+ *
88
+ * A boundary line is any line that matches one of the boundary patterns:
89
+ * hash boundary, equals boundary, or dash boundary. These patterns are used
90
+ * in Unicode data files to separate different sections of content.
91
+ *
92
+ * @param {string} line - The line to check
93
+ * @returns {boolean} True if the line is a boundary line, false otherwise
94
+ *
95
+ * @example
96
+ * ```ts
97
+ * isBoundaryLine("# #####"); // true (hash boundary)
98
+ * isBoundaryLine("# ====="); // true (equals boundary)
99
+ * isBoundaryLine("# -----"); // true (dash boundary)
100
+ * isBoundaryLine("# Some text"); // false
101
+ * isBoundaryLine(""); // false
102
+ * ```
103
+ */
104
+ function isBoundaryLine(line) {
105
+ if (!line) return false;
106
+ return isHashBoundary(line) || isEqualsBoundary(line) || isDashBoundary(line);
107
+ }
108
+ /**
109
+ * Extracts the style character from a boundary line.
110
+ *
111
+ * This function determines which type of boundary character is used in the line:
112
+ * '#', '=', or '-'. It checks the line against each boundary pattern and returns
113
+ * the corresponding character.
114
+ *
115
+ * @param {string} line - The boundary line to analyze
116
+ * @returns {BoundaryStyle} The boundary style character ('#', '=', or '-')
117
+ * @throws {Error} If the line is not a valid boundary line
118
+ *
119
+ * @example
120
+ * ```ts
121
+ * getBoundaryLineStyle("# #####"); // returns "#"
122
+ * getBoundaryLineStyle("# ====="); // returns "="
123
+ * getBoundaryLineStyle("# -----"); // returns "-"
124
+ * ```
125
+ */
126
+ function getBoundaryLineStyle(line) {
127
+ if (isHashBoundary(line)) return "#";
128
+ if (isEqualsBoundary(line)) return "=";
129
+ if (isDashBoundary(line)) return "-";
130
+ throw new Error(`invalid boundary style for line: ${line}`);
131
+ }
132
+ /**
133
+ * Determines if a line is a comment line.
134
+ *
135
+ * A comment line is either a line that starts with "# " or
136
+ * a line that only contains "#" (possibly with whitespace).
137
+ *
138
+ * @param {string} line - The line to check
139
+ * @returns {boolean} True if the line is a comment line, false otherwise
140
+ */
141
+ function isCommentLine(line) {
142
+ if (!line) return false;
143
+ return line.startsWith("# ") || line.startsWith("# ") || line.trim() === "#";
144
+ }
145
+ /**
146
+ * Removes the comment marker ('#') and any following whitespace from a line.
147
+ *
148
+ * This function is designed to extract the actual content from comment lines
149
+ * in Unicode data files by removing the leading '#' character and any whitespace
150
+ * that follows it.
151
+ *
152
+ * @param {string} line - The comment line to trim
153
+ * @returns {string} The content of the comment line without the comment marker
154
+ *
155
+ * @example
156
+ * ```ts
157
+ * trimCommentLine("# Some comment"); // returns "Some comment"
158
+ * trimCommentLine("#\tTabbed comment"); // returns "Tabbed comment"
159
+ * trimCommentLine(""); // returns ""
160
+ * ```
161
+ */
162
+ function trimCommentLine(line) {
163
+ if (!line) return "";
164
+ return line.trim().replace(/^#\s*/, "");
165
+ }
166
+ /**
167
+ * Checks if a string line is empty after trimming whitespace.
168
+ *
169
+ * @param {string} line - The string to check for emptiness
170
+ * @returns {boolean} A boolean indicating whether the trimmed line is empty
171
+ */
172
+ function isEmptyLine(line) {
173
+ if (!line) return true;
174
+ return line.trim() === "";
175
+ }
176
+ /**
177
+ * Determines if a line contains data in a Unicode data file.
178
+ *
179
+ * A line is considered to contain data if it is neither a comment line
180
+ * (starting with '#') nor an empty line.
181
+ *
182
+ * @param {string} line - The line to check
183
+ * @returns {boolean} True if the line contains data, false otherwise
184
+ *
185
+ * @example
186
+ * ```ts
187
+ * isLineWithData("U+0020;SPACE"); // true
188
+ * isLineWithData("# Comment line"); // false
189
+ * isLineWithData(""); // false
190
+ * ```
191
+ */
192
+ function isLineWithData(line) {
193
+ return !isCommentLine(line) && !isEmptyLine(line);
194
+ }
195
+ /**
196
+ * Check if a given line from a Unicode data file is a 'missing' annotation.
197
+ *
198
+ * In Unicode data files, lines starting with '# @missing:' indicate
199
+ * a range of code points that are not assigned.
200
+ *
201
+ * @param {string} line - The line to check
202
+ * @returns {boolean} True if the line is a missing annotation, false otherwise
203
+ */
204
+ function isMissingAnnotation(line) {
205
+ return line.startsWith("# @missing:");
206
+ }
207
+ const MISSING_ANNOTATION_SPECIAL_TAGS = {
208
+ "<none>": "none",
209
+ "<script>": "script",
210
+ "<code-point>": "code-point"
211
+ };
212
+ /**
213
+ * Parses a line into a MissingAnnotation object.
214
+ *
215
+ * This function attempts to extract information from a line that follows the
216
+ * format of a missing annotation in Unicode data files.
217
+ *
218
+ * The format being parsed is:
219
+ * `# @missing: START..END; DEFAULT_PROP_VALUE_OR_PROPERTY_NAME[; DEFAULT_PROPERTY_VALUE]`
220
+ *
221
+ * @param {string} line - The line to parse
222
+ * @returns {MissingAnnotation | null} A MissingAnnotation object if the line is a valid missing annotation, null otherwise
223
+ *
224
+ * @example
225
+ * ```ts
226
+ * parseMissingAnnotation("# @missing: 0000..007F; NA")
227
+ * // -> { start: "0000", end: "007F", defaultPropertyValue: "NA" }
228
+ *
229
+ * parseMissingAnnotation("# @missing: 0000..007F; Script; Unknown")
230
+ * // -> { start: "0000", end: "007F", propertyName: "Script", defaultPropertyValue: "Unknown" }
231
+ * ```
232
+ */
233
+ function parseMissingAnnotation(line) {
234
+ if (!isMissingAnnotation(line)) return null;
235
+ const match = line.match(/^# @missing: ([0-9A-F]+)\.\.([0-9A-F]+); ([^;\n]+)(?:; ([^\n]+))?$/m);
236
+ if (match == null) return null;
237
+ const [_, start, end, defaultPropValueOrPropertyName, defaultPropertyValue] = match;
238
+ const defaultProperty = defaultPropertyValue == null ? defaultPropValueOrPropertyName : defaultPropertyValue;
239
+ const specialTag = defaultProperty && defaultProperty in MISSING_ANNOTATION_SPECIAL_TAGS ? MISSING_ANNOTATION_SPECIAL_TAGS[defaultProperty] : void 0;
240
+ if (start == null || end == null || defaultPropValueOrPropertyName == null) return null;
241
+ return {
242
+ start,
243
+ end,
244
+ propertyName: defaultPropertyValue == null ? void 0 : defaultPropValueOrPropertyName,
245
+ defaultPropertyValue: defaultProperty || "",
246
+ specialTag
247
+ };
248
+ }
249
+ /**
250
+ * Attempts to infer the file name from the first line of a Unicode data file.
251
+ *
252
+ * This function extracts the file name from the first line of the content,
253
+ * assuming it's a comment line. It removes any leading '#' characters and whitespace.
254
+ *
255
+ * For example:
256
+ * - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "ArabicShaping"
257
+ * - From a file with first line "# UnicodeData-5.0.0.txt", it returns "UnicodeData"
258
+ *
259
+ * @param {string} line - The first line of the file
260
+ * @returns {string | undefined} The inferred file name, or undefined if it can't be determined
261
+ */
262
+ function inferFileName(line) {
263
+ return parseFileNameLine(line)?.fileName;
264
+ }
265
+ /**
266
+ * Attempts to infer the version from the first line of a Unicode data file.
267
+ *
268
+ * This function extracts the version number from the first line of the content,
269
+ * assuming it's a comment line. It looks for a pattern like "Name-X.Y.Z.txt"
270
+ * and extracts the X.Y.Z part as the version.
271
+ *
272
+ * For example:
273
+ * - From a file with first line "# ArabicShaping-5.0.0.txt", it returns "5.0.0"
274
+ * - From a file with first line "# UnicodeData-14.0.0.txt", it returns "14.0.0"
275
+ *
276
+ * @param {string} line - The first line of the file
277
+ * @returns {string | undefined} The inferred version number, or undefined if it can't be determined
278
+ */
279
+ function inferVersion(line) {
280
+ return parseFileNameLine(line)?.version;
281
+ }
282
+ /**
283
+ * Parses a line from a Unicode data file to extract the file name and version information.
284
+ *
285
+ * This function tries to extract file name and version information from a line that
286
+ * typically appears at the beginning of Unicode data files. It handles various formats:
287
+ * - "FileName-1.2.3.txt"
288
+ * - "FileName-1.2.3"
289
+ * - "FileName.txt"
290
+ *
291
+ * The function also properly handles comment markers at the beginning of the line.
292
+ *
293
+ * @param {string} line - The line to parse, typically the first line of a Unicode data file
294
+ * @returns {ParsedFileName | undefined} An object containing the file name and version if
295
+ * successfully parsed, or undefined if parsing fails
296
+ *
297
+ * @example
298
+ * ```ts
299
+ * parseFileNameLine("# UnicodeData-14.0.0.txt");
300
+ * // Returns { fileName: "UnicodeData", version: "14.0.0" }
301
+ *
302
+ * parseFileNameLine("# ArabicShaping.txt");
303
+ * // Returns { fileName: "ArabicShaping", version: undefined }
304
+ * ```
305
+ */
306
+ function parseFileNameLine(line) {
307
+ if (!line) return;
308
+ line = line.split("\n")[0].trim();
309
+ if (!isCommentLine(line)) return;
310
+ line = line.trim().replace(/^#\s*/, "");
311
+ if (line === "") return;
312
+ let match = line.match(/^(.*?)(?:-([0-9.]+))?\.txt$/);
313
+ if (match == null) {
314
+ match = line.match(/^(.*?)(?:-([0-9.]+))?$/);
315
+ /* v8 ignore next 3 */
316
+ if (match == null) return;
317
+ }
318
+ const [_, fileName, version] = match;
319
+ if (!fileName || fileName.trim() === "") return;
320
+ return {
321
+ fileName,
322
+ version
323
+ };
324
+ }
325
+ /**
326
+ * Determines if a line represents a property definition in Unicode data files.
327
+ *
328
+ * In Unicode data files, properties are typically defined in comment lines that
329
+ * start with "# Property:" followed by the property name.
330
+ *
331
+ * @param {string} line - The line to check
332
+ * @returns {boolean} True if the line is a property definition, false otherwise
333
+ *
334
+ * @example
335
+ * ```ts
336
+ * isPropertyLine("# Property: Age"); // true
337
+ * isPropertyLine("# Some other comment"); // false
338
+ * isPropertyLine(""); // false
339
+ * ```
340
+ */
341
+ function isPropertyLine(line) {
342
+ if (!line) return false;
343
+ if (!isCommentLine(line)) return false;
344
+ const val = getPropertyValue(line);
345
+ return val !== void 0 && val.trim() !== "";
346
+ }
347
+ /**
348
+ * Extracts the property value from a property definition line in Unicode data files.
349
+ *
350
+ * This function parses a line that follows the format '# Property: [PropertyValue]'
351
+ * and returns the PropertyValue part. It is used internally by isPropertyLine
352
+ * to parse property definitions in Unicode data files.
353
+ *
354
+ * @param {string} line - The line to extract the property value from
355
+ * @returns {string | undefined} The extracted property value, or undefined if
356
+ * the line is not a valid property definition
357
+ *
358
+ * @example
359
+ * ```ts
360
+ * getPropertyValue("# Property: Age"); // returns "Age"
361
+ * getPropertyValue("# Property: "); // returns undefined
362
+ * getPropertyValue("# Not a property line"); // returns undefined
363
+ * ```
364
+ */
365
+ function getPropertyValue(line) {
366
+ const trimmedComment = trimCommentLine(line).trim();
367
+ if (trimmedComment === "") return;
368
+ if (!trimmedComment.startsWith("Property:")) return;
369
+ return trimmedComment.slice(9).trim();
370
+ }
371
+
372
+ //#endregion
373
+ export { trimCommentLine as _, isBoundaryLine as a, isEOFMarker as c, isHashBoundary as d, isLineWithData as f, parseMissingAnnotation as g, parseFileNameLine as h, inferVersion as i, isEmptyLine as l, isPropertyLine as m, getPropertyValue as n, isCommentLine as o, isMissingAnnotation as p, inferFileName as r, isDashBoundary as s, getBoundaryLineStyle as t, isEqualsBoundary as u };