@dev-pi2pie/word-counter 0.1.3 → 0.1.4-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/dist/esm/bin.mjs +2033 -1656
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/worker/count-worker.mjs +1370 -0
- package/dist/esm/worker/count-worker.mjs.map +1 -0
- package/dist/esm/worker-pool.mjs +187 -0
- package/dist/esm/worker-pool.mjs.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,1370 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { parentPort, workerData } from "node:worker_threads";
|
|
3
|
+
import { parseDocument } from "yaml";
|
|
4
|
+
|
|
5
|
+
//#region src/markdown/toml/arrays.ts
|
|
6
|
+
function ensureArrayContainer(result, key) {
|
|
7
|
+
const existing = result[key];
|
|
8
|
+
if (Array.isArray(existing)) return existing;
|
|
9
|
+
const list = [];
|
|
10
|
+
result[key] = list;
|
|
11
|
+
return list;
|
|
12
|
+
}
|
|
13
|
+
function flattenArrayTables(result) {
|
|
14
|
+
for (const [key, value] of Object.entries(result)) {
|
|
15
|
+
if (!Array.isArray(value)) continue;
|
|
16
|
+
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
//#endregion
|
|
21
|
+
//#region src/markdown/toml/keys.ts
|
|
22
|
+
function stripKeyQuotes(key) {
|
|
23
|
+
const trimmed = key.trim();
|
|
24
|
+
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
|
|
25
|
+
return trimmed;
|
|
26
|
+
}
|
|
27
|
+
function normalizeKeyPath(key) {
|
|
28
|
+
const trimmed = key.trim();
|
|
29
|
+
if (!trimmed) return null;
|
|
30
|
+
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
|
|
31
|
+
const unquoted = stripKeyQuotes(trimmed);
|
|
32
|
+
return unquoted ? unquoted : null;
|
|
33
|
+
}
|
|
34
|
+
const segments = trimmed.split(".").map((segment) => segment.trim());
|
|
35
|
+
if (segments.some((segment) => !segment)) return null;
|
|
36
|
+
return segments.join(".");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
//#endregion
|
|
40
|
+
//#region src/markdown/toml/strings.ts
|
|
41
|
+
function stripInlineComment(line) {
|
|
42
|
+
let inString = null;
|
|
43
|
+
let escaped = false;
|
|
44
|
+
for (let i = 0; i < line.length; i += 1) {
|
|
45
|
+
const char = line[i] ?? "";
|
|
46
|
+
if (inString) {
|
|
47
|
+
if (escaped) {
|
|
48
|
+
escaped = false;
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
if (char === "\\" && inString === "double") {
|
|
52
|
+
escaped = true;
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
if (inString === "double" && char === "\"") {
|
|
56
|
+
inString = null;
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
if (inString === "single" && char === "'") {
|
|
60
|
+
inString = null;
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (char === "\"") {
|
|
66
|
+
inString = "double";
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
if (char === "'") {
|
|
70
|
+
inString = "single";
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (char === "#") return line.slice(0, i).trimEnd();
|
|
74
|
+
}
|
|
75
|
+
return line;
|
|
76
|
+
}
|
|
77
|
+
function unescapeBasic(input) {
|
|
78
|
+
return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
|
|
79
|
+
}
|
|
80
|
+
function parseStringLiteral(value) {
|
|
81
|
+
if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
|
|
82
|
+
if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
|
|
83
|
+
if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
|
|
84
|
+
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
//#endregion
|
|
89
|
+
//#region src/markdown/toml/values.ts
|
|
90
|
+
function parsePrimitive(raw) {
|
|
91
|
+
const value = raw.trim();
|
|
92
|
+
if (!value) return null;
|
|
93
|
+
const stringLiteral = parseStringLiteral(value);
|
|
94
|
+
if (stringLiteral !== null) return stringLiteral;
|
|
95
|
+
if (value === "true") return true;
|
|
96
|
+
if (value === "false") return false;
|
|
97
|
+
if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
|
|
98
|
+
if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
|
|
99
|
+
return value;
|
|
100
|
+
}
|
|
101
|
+
function parseArray(raw) {
|
|
102
|
+
const value = raw.trim();
|
|
103
|
+
if (!value.startsWith("[") || !value.endsWith("]")) return null;
|
|
104
|
+
const inner = value.slice(1, -1).trim();
|
|
105
|
+
if (!inner) return [];
|
|
106
|
+
const items = [];
|
|
107
|
+
let current = "";
|
|
108
|
+
let inString = null;
|
|
109
|
+
let escaped = false;
|
|
110
|
+
for (let i = 0; i < inner.length; i += 1) {
|
|
111
|
+
const char = inner[i] ?? "";
|
|
112
|
+
if (inString) {
|
|
113
|
+
current += char;
|
|
114
|
+
if (escaped) {
|
|
115
|
+
escaped = false;
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
if (char === "\\" && inString === "double") {
|
|
119
|
+
escaped = true;
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
if (inString === "double" && char === "\"") inString = null;
|
|
123
|
+
else if (inString === "single" && char === "'") inString = null;
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
if (char === "\"") {
|
|
127
|
+
inString = "double";
|
|
128
|
+
current += char;
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
if (char === "'") {
|
|
132
|
+
inString = "single";
|
|
133
|
+
current += char;
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
if (char === ",") {
|
|
137
|
+
const item = parsePrimitive(current);
|
|
138
|
+
if (item === null) return null;
|
|
139
|
+
items.push(item);
|
|
140
|
+
current = "";
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
current += char;
|
|
144
|
+
}
|
|
145
|
+
const finalItem = parsePrimitive(current);
|
|
146
|
+
if (finalItem === null) return null;
|
|
147
|
+
items.push(finalItem);
|
|
148
|
+
return items;
|
|
149
|
+
}
|
|
150
|
+
function parseInlineTable(raw) {
|
|
151
|
+
const trimmed = raw.trim();
|
|
152
|
+
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
|
|
153
|
+
const inner = trimmed.slice(1, -1).trim();
|
|
154
|
+
if (!inner) return {};
|
|
155
|
+
const pairs = [];
|
|
156
|
+
let current = "";
|
|
157
|
+
let inString = null;
|
|
158
|
+
let escaped = false;
|
|
159
|
+
let bracketDepth = 0;
|
|
160
|
+
let braceDepth = 0;
|
|
161
|
+
for (let i = 0; i < inner.length; i += 1) {
|
|
162
|
+
const char = inner[i] ?? "";
|
|
163
|
+
if (inString) {
|
|
164
|
+
current += char;
|
|
165
|
+
if (escaped) {
|
|
166
|
+
escaped = false;
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
if (char === "\\" && inString === "double") {
|
|
170
|
+
escaped = true;
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
if (inString === "double" && char === "\"") inString = null;
|
|
174
|
+
else if (inString === "single" && char === "'") inString = null;
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
if (char === "\"") {
|
|
178
|
+
inString = "double";
|
|
179
|
+
current += char;
|
|
180
|
+
continue;
|
|
181
|
+
}
|
|
182
|
+
if (char === "'") {
|
|
183
|
+
inString = "single";
|
|
184
|
+
current += char;
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
if (char === "[") {
|
|
188
|
+
bracketDepth += 1;
|
|
189
|
+
current += char;
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (char === "]") {
|
|
193
|
+
if (bracketDepth > 0) bracketDepth -= 1;
|
|
194
|
+
current += char;
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
if (char === "{") {
|
|
198
|
+
braceDepth += 1;
|
|
199
|
+
current += char;
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
if (char === "}") {
|
|
203
|
+
if (braceDepth > 0) braceDepth -= 1;
|
|
204
|
+
current += char;
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
if (char === "," && bracketDepth === 0 && braceDepth === 0) {
|
|
208
|
+
pairs.push(current);
|
|
209
|
+
current = "";
|
|
210
|
+
continue;
|
|
211
|
+
}
|
|
212
|
+
current += char;
|
|
213
|
+
}
|
|
214
|
+
if (current.trim()) pairs.push(current);
|
|
215
|
+
const output = {};
|
|
216
|
+
for (const pair of pairs) {
|
|
217
|
+
const separatorIndex = pair.indexOf("=");
|
|
218
|
+
if (separatorIndex === -1) return null;
|
|
219
|
+
const key = normalizeKeyPath(pair.slice(0, separatorIndex));
|
|
220
|
+
if (!key) return null;
|
|
221
|
+
const valueRaw = pair.slice(separatorIndex + 1).trim();
|
|
222
|
+
if (!valueRaw) return null;
|
|
223
|
+
if (valueRaw.startsWith("{")) return null;
|
|
224
|
+
const normalized = normalizeValue(valueRaw);
|
|
225
|
+
if (normalized === null) return null;
|
|
226
|
+
if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
|
|
227
|
+
output[key] = normalized;
|
|
228
|
+
}
|
|
229
|
+
return output;
|
|
230
|
+
}
|
|
231
|
+
function normalizeValue(value) {
|
|
232
|
+
if (!value) return null;
|
|
233
|
+
const trimmed = value.trim();
|
|
234
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
|
|
235
|
+
const array = parseArray(trimmed);
|
|
236
|
+
if (array) return array;
|
|
237
|
+
if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
|
|
238
|
+
return parsePrimitive(trimmed);
|
|
239
|
+
}
|
|
240
|
+
function toPlainText(value) {
|
|
241
|
+
if (value == null) return "";
|
|
242
|
+
if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
|
|
243
|
+
return String(value);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
//#endregion
|
|
247
|
+
//#region src/markdown/toml/parse-frontmatter.ts
|
|
248
|
+
function parseTomlFrontmatter(frontmatter) {
|
|
249
|
+
const result = {};
|
|
250
|
+
const lines = frontmatter.split("\n");
|
|
251
|
+
let tablePrefix = "";
|
|
252
|
+
let tableTarget = null;
|
|
253
|
+
let tablePrefixInList = false;
|
|
254
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
255
|
+
const rawLine = lines[index] ?? "";
|
|
256
|
+
const trimmedLine = rawLine.trim();
|
|
257
|
+
if (!trimmedLine || trimmedLine.startsWith("#")) continue;
|
|
258
|
+
if (trimmedLine.startsWith("[[")) {
|
|
259
|
+
const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
|
|
260
|
+
if (!match) return null;
|
|
261
|
+
const normalizedTable = normalizeKeyPath(match[1] ?? "");
|
|
262
|
+
if (!normalizedTable) return null;
|
|
263
|
+
const list = ensureArrayContainer(result, normalizedTable);
|
|
264
|
+
const newEntry = {};
|
|
265
|
+
list.push(newEntry);
|
|
266
|
+
tableTarget = newEntry;
|
|
267
|
+
tablePrefix = normalizedTable;
|
|
268
|
+
tablePrefixInList = true;
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
|
|
272
|
+
if (tableMatch) {
|
|
273
|
+
const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
|
|
274
|
+
if (!normalizedTable) return null;
|
|
275
|
+
tablePrefix = normalizedTable;
|
|
276
|
+
tablePrefixInList = false;
|
|
277
|
+
tableTarget = null;
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
|
|
281
|
+
const separatorIndex = lineForParsing.indexOf("=");
|
|
282
|
+
if (separatorIndex === -1) return null;
|
|
283
|
+
const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
|
|
284
|
+
let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
|
|
285
|
+
if (!key) return null;
|
|
286
|
+
const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
|
|
287
|
+
if (tripleDelimiter) {
|
|
288
|
+
const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
|
|
289
|
+
if (closingIndex !== -1) {
|
|
290
|
+
const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
|
|
291
|
+
valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
|
|
292
|
+
} else {
|
|
293
|
+
const delimiter = tripleDelimiter;
|
|
294
|
+
let combined = valueRaw;
|
|
295
|
+
let closed = false;
|
|
296
|
+
while (index + 1 < lines.length) {
|
|
297
|
+
index += 1;
|
|
298
|
+
const nextLine = lines[index] ?? "";
|
|
299
|
+
combined += `\n${nextLine}`;
|
|
300
|
+
if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
|
|
301
|
+
closed = true;
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (!closed) return null;
|
|
306
|
+
valueRaw = combined;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
const normalized = normalizeValue(valueRaw);
|
|
310
|
+
if (normalized === null) return null;
|
|
311
|
+
const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
|
|
312
|
+
if (typeof normalized === "object" && !Array.isArray(normalized)) {
|
|
313
|
+
for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
|
|
314
|
+
const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
|
|
315
|
+
if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
|
|
316
|
+
else result[entryKey] = toPlainText(inlineValue);
|
|
317
|
+
}
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
if (tablePrefixInList && tableTarget) {
|
|
321
|
+
tableTarget[key] = toPlainText(normalized);
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
result[fullKey] = toPlainText(normalized);
|
|
325
|
+
}
|
|
326
|
+
flattenArrayTables(result);
|
|
327
|
+
return result;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
//#endregion
|
|
331
|
+
//#region src/markdown/parse-markdown.ts
|
|
332
|
+
const FENCE_TO_TYPE = {
|
|
333
|
+
"---": "yaml",
|
|
334
|
+
"+++": "toml",
|
|
335
|
+
";;;": "json"
|
|
336
|
+
};
|
|
337
|
+
function normalizeNewlines(input) {
|
|
338
|
+
return input.replace(/\r\n/g, "\n");
|
|
339
|
+
}
|
|
340
|
+
function stripBom(line) {
|
|
341
|
+
return line.startsWith("") ? line.slice(1) : line;
|
|
342
|
+
}
|
|
343
|
+
function getFenceType(line) {
|
|
344
|
+
const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
|
|
345
|
+
if (!match) return null;
|
|
346
|
+
return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
|
|
347
|
+
}
|
|
348
|
+
function parseFrontmatter(frontmatter, type) {
|
|
349
|
+
if (!type) return null;
|
|
350
|
+
if (type === "json") try {
|
|
351
|
+
return JSON.parse(frontmatter);
|
|
352
|
+
} catch {
|
|
353
|
+
return null;
|
|
354
|
+
}
|
|
355
|
+
if (type === "yaml") {
|
|
356
|
+
const doc = parseDocument(frontmatter, { prettyErrors: false });
|
|
357
|
+
if (doc.errors.length > 0) return null;
|
|
358
|
+
const data = doc.toJSON();
|
|
359
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return null;
|
|
360
|
+
return data;
|
|
361
|
+
}
|
|
362
|
+
if (type === "toml") return parseTomlFrontmatter(frontmatter);
|
|
363
|
+
return null;
|
|
364
|
+
}
|
|
365
|
+
function extractJsonBlock(text, startIndex) {
|
|
366
|
+
let depth = 0;
|
|
367
|
+
let inString = false;
|
|
368
|
+
let escaped = false;
|
|
369
|
+
for (let i = startIndex; i < text.length; i += 1) {
|
|
370
|
+
const char = text[i] ?? "";
|
|
371
|
+
if (inString) {
|
|
372
|
+
if (escaped) {
|
|
373
|
+
escaped = false;
|
|
374
|
+
continue;
|
|
375
|
+
}
|
|
376
|
+
if (char === "\\") {
|
|
377
|
+
escaped = true;
|
|
378
|
+
continue;
|
|
379
|
+
}
|
|
380
|
+
if (char === "\"") inString = false;
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
if (char === "\"") {
|
|
384
|
+
inString = true;
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
if (char === "{") {
|
|
388
|
+
depth += 1;
|
|
389
|
+
continue;
|
|
390
|
+
}
|
|
391
|
+
if (char === "}") {
|
|
392
|
+
depth -= 1;
|
|
393
|
+
if (depth === 0) return {
|
|
394
|
+
jsonText: text.slice(startIndex, i + 1),
|
|
395
|
+
endIndex: i
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
return null;
|
|
400
|
+
}
|
|
401
|
+
function parseMarkdown(input) {
|
|
402
|
+
const normalized = normalizeNewlines(input);
|
|
403
|
+
const lines = normalized.split("\n");
|
|
404
|
+
if (lines.length === 0) return {
|
|
405
|
+
frontmatter: null,
|
|
406
|
+
content: normalized,
|
|
407
|
+
data: null,
|
|
408
|
+
frontmatterType: null
|
|
409
|
+
};
|
|
410
|
+
lines[0] = stripBom(lines[0] ?? "");
|
|
411
|
+
const normalizedWithoutBom = lines.join("\n");
|
|
412
|
+
const openingType = getFenceType(lines[0] ?? "");
|
|
413
|
+
if (!openingType) {
|
|
414
|
+
const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
|
|
415
|
+
if (normalizedWithoutBom[jsonStart] !== "{") return {
|
|
416
|
+
frontmatter: null,
|
|
417
|
+
content: normalizedWithoutBom,
|
|
418
|
+
data: null,
|
|
419
|
+
frontmatterType: null
|
|
420
|
+
};
|
|
421
|
+
const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
|
|
422
|
+
if (!jsonBlock) return {
|
|
423
|
+
frontmatter: null,
|
|
424
|
+
content: normalizedWithoutBom,
|
|
425
|
+
data: null,
|
|
426
|
+
frontmatterType: null
|
|
427
|
+
};
|
|
428
|
+
const frontmatter = jsonBlock.jsonText;
|
|
429
|
+
let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
|
|
430
|
+
if (content.startsWith("\n")) content = content.slice(1);
|
|
431
|
+
const data = parseFrontmatter(frontmatter, "json");
|
|
432
|
+
if (!data) return {
|
|
433
|
+
frontmatter: null,
|
|
434
|
+
content: normalizedWithoutBom,
|
|
435
|
+
data: null,
|
|
436
|
+
frontmatterType: null
|
|
437
|
+
};
|
|
438
|
+
return {
|
|
439
|
+
frontmatter,
|
|
440
|
+
content,
|
|
441
|
+
data,
|
|
442
|
+
frontmatterType: "json"
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
let closingIndex = -1;
|
|
446
|
+
for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
|
|
447
|
+
closingIndex = i;
|
|
448
|
+
break;
|
|
449
|
+
}
|
|
450
|
+
if (closingIndex === -1) return {
|
|
451
|
+
frontmatter: null,
|
|
452
|
+
content: normalizedWithoutBom,
|
|
453
|
+
data: null,
|
|
454
|
+
frontmatterType: null
|
|
455
|
+
};
|
|
456
|
+
const frontmatter = lines.slice(1, closingIndex).join("\n");
|
|
457
|
+
return {
|
|
458
|
+
frontmatter,
|
|
459
|
+
content: lines.slice(closingIndex + 1).join("\n"),
|
|
460
|
+
data: parseFrontmatter(frontmatter, openingType),
|
|
461
|
+
frontmatterType: openingType
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
//#endregion
|
|
466
|
+
//#region src/wc/segmenter.ts
|
|
467
|
+
const segmenterCache = /* @__PURE__ */ new Map();
|
|
468
|
+
const graphemeSegmenterCache = /* @__PURE__ */ new Map();
|
|
469
|
+
function getSegmenter(locale) {
|
|
470
|
+
const cached = segmenterCache.get(locale);
|
|
471
|
+
if (cached) return cached;
|
|
472
|
+
const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
|
|
473
|
+
segmenterCache.set(locale, segmenter);
|
|
474
|
+
return segmenter;
|
|
475
|
+
}
|
|
476
|
+
function getGraphemeSegmenter(locale) {
|
|
477
|
+
const cached = graphemeSegmenterCache.get(locale);
|
|
478
|
+
if (cached) return cached;
|
|
479
|
+
const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
|
|
480
|
+
graphemeSegmenterCache.set(locale, segmenter);
|
|
481
|
+
return segmenter;
|
|
482
|
+
}
|
|
483
|
+
function supportsSegmenter() {
|
|
484
|
+
return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
|
|
485
|
+
}
|
|
486
|
+
function countCharsForLocale(text, locale) {
|
|
487
|
+
if (!supportsSegmenter()) return Array.from(text).length;
|
|
488
|
+
const segmenter = getGraphemeSegmenter(locale);
|
|
489
|
+
let count = 0;
|
|
490
|
+
for (const _segment of segmenter.segment(text)) count++;
|
|
491
|
+
return count;
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
//#endregion
|
|
495
|
+
//#region src/utils/append-all.ts
|
|
496
|
+
function appendAll(target, source) {
|
|
497
|
+
for (const item of source) target.push(item);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
//#endregion
|
|
501
|
+
//#region src/wc/non-words.ts
|
|
502
|
+
const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
|
|
503
|
+
const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
|
|
504
|
+
const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
|
|
505
|
+
const symbolRegex = /\p{S}/u;
|
|
506
|
+
const punctuationRegex = /\p{P}/u;
|
|
507
|
+
const whitespaceRegex = /\s/u;
|
|
508
|
+
const newlineChars = new Set([
|
|
509
|
+
"\n",
|
|
510
|
+
"\r",
|
|
511
|
+
"\u2028",
|
|
512
|
+
"\u2029"
|
|
513
|
+
]);
|
|
514
|
+
function createNonWordCollection() {
|
|
515
|
+
return {
|
|
516
|
+
emoji: [],
|
|
517
|
+
symbols: [],
|
|
518
|
+
punctuation: [],
|
|
519
|
+
counts: {
|
|
520
|
+
emoji: 0,
|
|
521
|
+
symbols: 0,
|
|
522
|
+
punctuation: 0
|
|
523
|
+
}
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
function addNonWord(collection, category, segment) {
|
|
527
|
+
if (category === "emoji") {
|
|
528
|
+
collection.emoji.push(segment);
|
|
529
|
+
collection.counts.emoji += 1;
|
|
530
|
+
return;
|
|
531
|
+
}
|
|
532
|
+
if (category === "symbol") {
|
|
533
|
+
collection.symbols.push(segment);
|
|
534
|
+
collection.counts.symbols += 1;
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
collection.punctuation.push(segment);
|
|
538
|
+
collection.counts.punctuation += 1;
|
|
539
|
+
}
|
|
540
|
+
function addWhitespace(collection, segment) {
|
|
541
|
+
let whitespace = collection.whitespace;
|
|
542
|
+
let count = 0;
|
|
543
|
+
for (const char of segment) {
|
|
544
|
+
if (char === " ") {
|
|
545
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
546
|
+
whitespace.spaces += 1;
|
|
547
|
+
count += 1;
|
|
548
|
+
continue;
|
|
549
|
+
}
|
|
550
|
+
if (char === " ") {
|
|
551
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
552
|
+
whitespace.tabs += 1;
|
|
553
|
+
count += 1;
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
if (newlineChars.has(char)) {
|
|
557
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
558
|
+
whitespace.newlines += 1;
|
|
559
|
+
count += 1;
|
|
560
|
+
continue;
|
|
561
|
+
}
|
|
562
|
+
if (whitespaceRegex.test(char)) {
|
|
563
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
564
|
+
whitespace.other += 1;
|
|
565
|
+
count += 1;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
if (count > 0) {
|
|
569
|
+
collection.whitespace = whitespace ?? createWhitespaceCounts();
|
|
570
|
+
collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
|
|
571
|
+
}
|
|
572
|
+
return count;
|
|
573
|
+
}
|
|
574
|
+
function classifyNonWordSegment(segment) {
|
|
575
|
+
const hasEmojiVariationSelector = segment.includes("️");
|
|
576
|
+
if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
|
|
577
|
+
if (symbolRegex.test(segment)) return "symbol";
|
|
578
|
+
if (punctuationRegex.test(segment)) return "punctuation";
|
|
579
|
+
return null;
|
|
580
|
+
}
|
|
581
|
+
function mergeNonWordCollections(target, source) {
|
|
582
|
+
if (source.counts.emoji > 0) {
|
|
583
|
+
appendAll(target.emoji, source.emoji);
|
|
584
|
+
target.counts.emoji += source.counts.emoji;
|
|
585
|
+
}
|
|
586
|
+
if (source.counts.symbols > 0) {
|
|
587
|
+
appendAll(target.symbols, source.symbols);
|
|
588
|
+
target.counts.symbols += source.counts.symbols;
|
|
589
|
+
}
|
|
590
|
+
if (source.counts.punctuation > 0) {
|
|
591
|
+
appendAll(target.punctuation, source.punctuation);
|
|
592
|
+
target.counts.punctuation += source.counts.punctuation;
|
|
593
|
+
}
|
|
594
|
+
if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
|
|
595
|
+
const whitespace = target.whitespace ?? createWhitespaceCounts();
|
|
596
|
+
whitespace.spaces += source.whitespace.spaces;
|
|
597
|
+
whitespace.tabs += source.whitespace.tabs;
|
|
598
|
+
whitespace.newlines += source.whitespace.newlines;
|
|
599
|
+
whitespace.other += source.whitespace.other;
|
|
600
|
+
target.whitespace = whitespace;
|
|
601
|
+
target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
|
|
602
|
+
}
|
|
603
|
+
return target;
|
|
604
|
+
}
|
|
605
|
+
function createWhitespaceCounts() {
|
|
606
|
+
return {
|
|
607
|
+
spaces: 0,
|
|
608
|
+
tabs: 0,
|
|
609
|
+
newlines: 0,
|
|
610
|
+
other: 0
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
//#endregion
|
|
615
|
+
//#region src/wc/analyze.ts
|
|
616
|
+
function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
|
|
617
|
+
const segmenter = getSegmenter(chunk.locale);
|
|
618
|
+
const segments = [];
|
|
619
|
+
const nonWords = collectNonWords ? createNonWordCollection() : null;
|
|
620
|
+
for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
|
|
621
|
+
else if (collectNonWords && nonWords) {
|
|
622
|
+
if (includeWhitespace) addWhitespace(nonWords, part.segment);
|
|
623
|
+
const category = classifyNonWordSegment(part.segment);
|
|
624
|
+
if (category) addNonWord(nonWords, category, part.segment);
|
|
625
|
+
}
|
|
626
|
+
return {
|
|
627
|
+
locale: chunk.locale,
|
|
628
|
+
text: chunk.text,
|
|
629
|
+
segments,
|
|
630
|
+
words: segments.length,
|
|
631
|
+
nonWords: nonWords ?? void 0
|
|
632
|
+
};
|
|
633
|
+
}
|
|
634
|
+
function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
|
|
635
|
+
const segmenter = getSegmenter(chunk.locale);
|
|
636
|
+
const nonWords = collectNonWords ? createNonWordCollection() : null;
|
|
637
|
+
let chars = 0;
|
|
638
|
+
let wordChars = 0;
|
|
639
|
+
let nonWordChars = 0;
|
|
640
|
+
for (const part of segmenter.segment(chunk.text)) {
|
|
641
|
+
if (part.isWordLike) {
|
|
642
|
+
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
643
|
+
chars += count;
|
|
644
|
+
wordChars += count;
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
if (collectNonWords && nonWords) {
|
|
648
|
+
let whitespaceCount = 0;
|
|
649
|
+
if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
|
|
650
|
+
const category = classifyNonWordSegment(part.segment);
|
|
651
|
+
if (category) addNonWord(nonWords, category, part.segment);
|
|
652
|
+
if (category || whitespaceCount > 0) {
|
|
653
|
+
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
654
|
+
chars += count;
|
|
655
|
+
nonWordChars += count;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
return {
|
|
660
|
+
locale: chunk.locale,
|
|
661
|
+
text: chunk.text,
|
|
662
|
+
chars,
|
|
663
|
+
wordChars,
|
|
664
|
+
nonWordChars,
|
|
665
|
+
nonWords: nonWords ?? void 0
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
function aggregateCharsByLocale(chunks) {
|
|
669
|
+
const order = [];
|
|
670
|
+
const map = /* @__PURE__ */ new Map();
|
|
671
|
+
for (const chunk of chunks) {
|
|
672
|
+
const existing = map.get(chunk.locale);
|
|
673
|
+
if (existing) {
|
|
674
|
+
existing.chars += chunk.chars;
|
|
675
|
+
existing.wordChars += chunk.wordChars;
|
|
676
|
+
existing.nonWordChars += chunk.nonWordChars;
|
|
677
|
+
if (chunk.nonWords) {
|
|
678
|
+
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
679
|
+
mergeNonWordCollections(existing.nonWords, chunk.nonWords);
|
|
680
|
+
}
|
|
681
|
+
continue;
|
|
682
|
+
}
|
|
683
|
+
order.push(chunk.locale);
|
|
684
|
+
map.set(chunk.locale, {
|
|
685
|
+
locale: chunk.locale,
|
|
686
|
+
chars: chunk.chars,
|
|
687
|
+
wordChars: chunk.wordChars,
|
|
688
|
+
nonWordChars: chunk.nonWordChars,
|
|
689
|
+
nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
|
|
690
|
+
});
|
|
691
|
+
}
|
|
692
|
+
return order.map((locale) => map.get(locale));
|
|
693
|
+
}
|
|
694
|
+
function aggregateByLocale(chunks) {
|
|
695
|
+
const order = [];
|
|
696
|
+
const map = /* @__PURE__ */ new Map();
|
|
697
|
+
for (const chunk of chunks) {
|
|
698
|
+
const existing = map.get(chunk.locale);
|
|
699
|
+
if (existing) {
|
|
700
|
+
existing.words += chunk.words;
|
|
701
|
+
appendAll(existing.segments, chunk.segments);
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
order.push(chunk.locale);
|
|
705
|
+
map.set(chunk.locale, {
|
|
706
|
+
locale: chunk.locale,
|
|
707
|
+
words: chunk.words,
|
|
708
|
+
segments: [...chunk.segments]
|
|
709
|
+
});
|
|
710
|
+
}
|
|
711
|
+
return order.map((locale) => map.get(locale));
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
//#endregion
|
|
715
|
+
//#region src/wc/mode.ts
|
|
716
|
+
const MODE_ALIASES = {
|
|
717
|
+
chunk: "chunk",
|
|
718
|
+
chunks: "chunk",
|
|
719
|
+
segments: "segments",
|
|
720
|
+
segment: "segments",
|
|
721
|
+
seg: "segments",
|
|
722
|
+
collector: "collector",
|
|
723
|
+
collect: "collector",
|
|
724
|
+
colle: "collector",
|
|
725
|
+
char: "char",
|
|
726
|
+
chars: "char",
|
|
727
|
+
character: "char",
|
|
728
|
+
characters: "char",
|
|
729
|
+
"char-collector": "char-collector"
|
|
730
|
+
};
|
|
731
|
+
const CHAR_MODE_ALIASES = new Set([
|
|
732
|
+
"char",
|
|
733
|
+
"chars",
|
|
734
|
+
"character",
|
|
735
|
+
"characters"
|
|
736
|
+
]);
|
|
737
|
+
const COLLECTOR_MODE_ALIASES = new Set([
|
|
738
|
+
"collector",
|
|
739
|
+
"collect",
|
|
740
|
+
"colle",
|
|
741
|
+
"col"
|
|
742
|
+
]);
|
|
743
|
+
function collapseSeparators(value) {
|
|
744
|
+
return value.replace(/[-_\s]+/g, "");
|
|
745
|
+
}
|
|
746
|
+
function isComposedCharCollectorFromTokens(value) {
|
|
747
|
+
const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
|
|
748
|
+
if (tokens.length < 2) return false;
|
|
749
|
+
let hasCharAlias = false;
|
|
750
|
+
let hasCollectorAlias = false;
|
|
751
|
+
for (const token of tokens) {
|
|
752
|
+
if (CHAR_MODE_ALIASES.has(token)) {
|
|
753
|
+
hasCharAlias = true;
|
|
754
|
+
continue;
|
|
755
|
+
}
|
|
756
|
+
if (COLLECTOR_MODE_ALIASES.has(token)) {
|
|
757
|
+
hasCollectorAlias = true;
|
|
758
|
+
continue;
|
|
759
|
+
}
|
|
760
|
+
return false;
|
|
761
|
+
}
|
|
762
|
+
return hasCharAlias && hasCollectorAlias;
|
|
763
|
+
}
|
|
764
|
+
function isComposedCharCollectorCompact(value) {
|
|
765
|
+
for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
|
|
766
|
+
return false;
|
|
767
|
+
}
|
|
768
|
+
function normalizeMode(input) {
|
|
769
|
+
if (!input) return null;
|
|
770
|
+
const normalized = input.trim().toLowerCase();
|
|
771
|
+
const direct = MODE_ALIASES[normalized];
|
|
772
|
+
if (direct) return direct;
|
|
773
|
+
if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
|
|
774
|
+
const compact = collapseSeparators(normalized);
|
|
775
|
+
if (isComposedCharCollectorCompact(compact)) return "char-collector";
|
|
776
|
+
return MODE_ALIASES[compact] ?? null;
|
|
777
|
+
}
|
|
778
|
+
function resolveMode(input, fallback = "chunk") {
|
|
779
|
+
return normalizeMode(input) ?? fallback;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
//#endregion
|
|
783
|
+
//#region src/wc/latin-hints.ts
|
|
784
|
+
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
785
|
+
{
|
|
786
|
+
tag: "de",
|
|
787
|
+
pattern: "[äöüÄÖÜß]"
|
|
788
|
+
},
|
|
789
|
+
{
|
|
790
|
+
tag: "es",
|
|
791
|
+
pattern: "[ñÑ¿¡]"
|
|
792
|
+
},
|
|
793
|
+
{
|
|
794
|
+
tag: "pt",
|
|
795
|
+
pattern: "[ãõÃÕ]"
|
|
796
|
+
},
|
|
797
|
+
{
|
|
798
|
+
tag: "fr",
|
|
799
|
+
pattern: "[œŒæÆ]"
|
|
800
|
+
},
|
|
801
|
+
{
|
|
802
|
+
tag: "pl",
|
|
803
|
+
pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
|
|
804
|
+
},
|
|
805
|
+
{
|
|
806
|
+
tag: "tr",
|
|
807
|
+
pattern: "[ıİğĞşŞ]"
|
|
808
|
+
},
|
|
809
|
+
{
|
|
810
|
+
tag: "ro",
|
|
811
|
+
pattern: "[ăĂâÂîÎșȘțȚ]"
|
|
812
|
+
},
|
|
813
|
+
{
|
|
814
|
+
tag: "hu",
|
|
815
|
+
pattern: "[őŐűŰ]"
|
|
816
|
+
},
|
|
817
|
+
{
|
|
818
|
+
tag: "is",
|
|
819
|
+
pattern: "[ðÐþÞ]"
|
|
820
|
+
}
|
|
821
|
+
];
|
|
822
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
823
|
+
|
|
824
|
+
//#endregion
|
|
825
|
+
//#region src/wc/locale-detect.ts
|
|
826
|
+
const DEFAULT_LOCALE = "und-Latn";
|
|
827
|
+
const DEFAULT_HAN_TAG = "und-Hani";
|
|
828
|
+
const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
|
|
829
|
+
const regex = {
|
|
830
|
+
hiragana: /\p{Script=Hiragana}/u,
|
|
831
|
+
katakana: /\p{Script=Katakana}/u,
|
|
832
|
+
hangul: /\p{Script=Hangul}/u,
|
|
833
|
+
han: /\p{Script=Han}/u,
|
|
834
|
+
latin: /\p{Script=Latin}/u,
|
|
835
|
+
arabic: /\p{Script=Arabic}/u,
|
|
836
|
+
cyrillic: /\p{Script=Cyrillic}/u,
|
|
837
|
+
devanagari: /\p{Script=Devanagari}/u,
|
|
838
|
+
thai: /\p{Script=Thai}/u
|
|
839
|
+
};
|
|
840
|
+
const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
|
|
841
|
+
function isLatinLocale(locale, context) {
|
|
842
|
+
if (context) return context.latinLocales.has(locale);
|
|
843
|
+
return defaultLatinLocales.has(locale);
|
|
844
|
+
}
|
|
845
|
+
function resolveLatinHint(options) {
|
|
846
|
+
const latinTagHint = options.latinTagHint?.trim();
|
|
847
|
+
if (latinTagHint) return latinTagHint;
|
|
848
|
+
const latinLanguageHint = options.latinLanguageHint?.trim();
|
|
849
|
+
if (latinLanguageHint) return latinLanguageHint;
|
|
850
|
+
const latinLocaleHint = options.latinLocaleHint?.trim();
|
|
851
|
+
if (latinLocaleHint) return latinLocaleHint;
|
|
852
|
+
}
|
|
853
|
+
function resolveHanHint(options) {
|
|
854
|
+
const hanTagHint = options.hanTagHint?.trim();
|
|
855
|
+
if (hanTagHint) return hanTagHint;
|
|
856
|
+
const hanLanguageHint = options.hanLanguageHint?.trim();
|
|
857
|
+
if (hanLanguageHint) return hanLanguageHint;
|
|
858
|
+
}
|
|
859
|
+
function compileLatinHintPattern(pattern, label) {
|
|
860
|
+
const source = typeof pattern === "string" ? pattern : pattern.source;
|
|
861
|
+
const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
|
|
862
|
+
const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
|
|
863
|
+
if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
|
|
864
|
+
if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
|
|
865
|
+
try {
|
|
866
|
+
return new RegExp(source, flags);
|
|
867
|
+
} catch (error) {
|
|
868
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
869
|
+
throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
function normalizeLatinHintPriority(priority, label) {
|
|
873
|
+
if (priority === void 0) return 0;
|
|
874
|
+
if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
|
|
875
|
+
return priority;
|
|
876
|
+
}
|
|
877
|
+
function compileLatinHintRule(rule, order, label) {
|
|
878
|
+
const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
|
|
879
|
+
if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
|
|
880
|
+
return {
|
|
881
|
+
tag,
|
|
882
|
+
pattern: compileLatinHintPattern(rule.pattern, label),
|
|
883
|
+
priority: normalizeLatinHintPriority(rule.priority, label),
|
|
884
|
+
order
|
|
885
|
+
};
|
|
886
|
+
}
|
|
887
|
+
function resolveLatinHintRules(options) {
|
|
888
|
+
const useDefaultLatinHints = options.useDefaultLatinHints !== false;
|
|
889
|
+
const customRules = options.latinHintRules ?? [];
|
|
890
|
+
const combinedRules = [];
|
|
891
|
+
for (let index = 0; index < customRules.length; index += 1) {
|
|
892
|
+
const rule = customRules[index];
|
|
893
|
+
if (!rule) continue;
|
|
894
|
+
combinedRules.push({
|
|
895
|
+
rule,
|
|
896
|
+
label: `Invalid custom Latin hint rule at index ${index}`
|
|
897
|
+
});
|
|
898
|
+
}
|
|
899
|
+
if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
|
|
900
|
+
const rule = DEFAULT_LATIN_HINT_RULES[index];
|
|
901
|
+
if (!rule) continue;
|
|
902
|
+
combinedRules.push({
|
|
903
|
+
rule,
|
|
904
|
+
label: `Invalid default Latin hint rule at index ${index}`
|
|
905
|
+
});
|
|
906
|
+
}
|
|
907
|
+
const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
|
|
908
|
+
resolvedRules.sort((left, right) => {
|
|
909
|
+
if (left.priority !== right.priority) return right.priority - left.priority;
|
|
910
|
+
return left.order - right.order;
|
|
911
|
+
});
|
|
912
|
+
return resolvedRules;
|
|
913
|
+
}
|
|
914
|
+
function resolveLocaleDetectContext(options = {}) {
|
|
915
|
+
const latinHint = resolveLatinHint(options);
|
|
916
|
+
const latinHintRules = resolveLatinHintRules(options);
|
|
917
|
+
const latinLocales = new Set([DEFAULT_LOCALE]);
|
|
918
|
+
for (const rule of latinHintRules) latinLocales.add(rule.tag);
|
|
919
|
+
if (latinHint) latinLocales.add(latinHint);
|
|
920
|
+
return {
|
|
921
|
+
latinHint,
|
|
922
|
+
hanHint: resolveHanHint(options),
|
|
923
|
+
latinHintRules,
|
|
924
|
+
latinLocales
|
|
925
|
+
};
|
|
926
|
+
}
|
|
927
|
+
function detectLatinLocale(char, context) {
|
|
928
|
+
for (const hint of context.latinHintRules) {
|
|
929
|
+
hint.pattern.lastIndex = 0;
|
|
930
|
+
if (hint.pattern.test(char)) return hint.tag;
|
|
931
|
+
}
|
|
932
|
+
return DEFAULT_LOCALE;
|
|
933
|
+
}
|
|
934
|
+
function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
|
|
935
|
+
if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
|
|
936
|
+
if (regex.hangul.test(char)) return "ko";
|
|
937
|
+
if (regex.arabic.test(char)) return "ar";
|
|
938
|
+
if (regex.cyrillic.test(char)) return "ru";
|
|
939
|
+
if (regex.devanagari.test(char)) return "hi";
|
|
940
|
+
if (regex.thai.test(char)) return "th";
|
|
941
|
+
if (regex.han.test(char)) {
|
|
942
|
+
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
943
|
+
return context.hanHint ?? DEFAULT_HAN_TAG;
|
|
944
|
+
}
|
|
945
|
+
if (regex.latin.test(char)) {
|
|
946
|
+
const hintedLocale = detectLatinLocale(char, context);
|
|
947
|
+
if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
|
|
948
|
+
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
949
|
+
if (context.latinHint) return context.latinHint;
|
|
950
|
+
return DEFAULT_LOCALE;
|
|
951
|
+
}
|
|
952
|
+
return null;
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
//#endregion
|
|
956
|
+
//#region src/wc/segment.ts
|
|
957
|
+
const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
|
|
958
|
+
const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
|
|
959
|
+
function segmentTextByLocale(text, options = {}) {
|
|
960
|
+
const context = resolveLocaleDetectContext(options);
|
|
961
|
+
const chunks = [];
|
|
962
|
+
let currentLocale = DEFAULT_LOCALE;
|
|
963
|
+
let buffer = "";
|
|
964
|
+
let bufferHasScript = false;
|
|
965
|
+
let sawCarryBoundary = false;
|
|
966
|
+
const updateCarryBoundaryState = (detected, char) => {
|
|
967
|
+
if (detected !== null) {
|
|
968
|
+
sawCarryBoundary = false;
|
|
969
|
+
return;
|
|
970
|
+
}
|
|
971
|
+
if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
|
|
972
|
+
};
|
|
973
|
+
for (const char of text) {
|
|
974
|
+
const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
|
|
975
|
+
const targetLocale = detected ?? currentLocale;
|
|
976
|
+
if (buffer === "") {
|
|
977
|
+
currentLocale = targetLocale;
|
|
978
|
+
buffer = char;
|
|
979
|
+
bufferHasScript = detected !== null;
|
|
980
|
+
updateCarryBoundaryState(detected, char);
|
|
981
|
+
continue;
|
|
982
|
+
}
|
|
983
|
+
if (detected !== null && !bufferHasScript) {
|
|
984
|
+
currentLocale = targetLocale;
|
|
985
|
+
buffer += char;
|
|
986
|
+
bufferHasScript = true;
|
|
987
|
+
updateCarryBoundaryState(detected, char);
|
|
988
|
+
continue;
|
|
989
|
+
}
|
|
990
|
+
if (targetLocale !== currentLocale && detected !== null) {
|
|
991
|
+
if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
|
|
992
|
+
const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
|
|
993
|
+
if (promotionBreakIndex === -1) {
|
|
994
|
+
currentLocale = targetLocale;
|
|
995
|
+
buffer += char;
|
|
996
|
+
bufferHasScript = true;
|
|
997
|
+
updateCarryBoundaryState(detected, char);
|
|
998
|
+
continue;
|
|
999
|
+
}
|
|
1000
|
+
const prefix = buffer.slice(0, promotionBreakIndex + 1);
|
|
1001
|
+
const suffix = buffer.slice(promotionBreakIndex + 1);
|
|
1002
|
+
if (prefix.length > 0) chunks.push({
|
|
1003
|
+
locale: currentLocale,
|
|
1004
|
+
text: prefix
|
|
1005
|
+
});
|
|
1006
|
+
currentLocale = targetLocale;
|
|
1007
|
+
buffer = `${suffix}${char}`;
|
|
1008
|
+
bufferHasScript = true;
|
|
1009
|
+
updateCarryBoundaryState(detected, char);
|
|
1010
|
+
continue;
|
|
1011
|
+
}
|
|
1012
|
+
chunks.push({
|
|
1013
|
+
locale: currentLocale,
|
|
1014
|
+
text: buffer
|
|
1015
|
+
});
|
|
1016
|
+
currentLocale = targetLocale;
|
|
1017
|
+
buffer = char;
|
|
1018
|
+
bufferHasScript = true;
|
|
1019
|
+
updateCarryBoundaryState(detected, char);
|
|
1020
|
+
continue;
|
|
1021
|
+
}
|
|
1022
|
+
buffer += char;
|
|
1023
|
+
if (detected !== null) bufferHasScript = true;
|
|
1024
|
+
updateCarryBoundaryState(detected, char);
|
|
1025
|
+
}
|
|
1026
|
+
if (buffer.length > 0) chunks.push({
|
|
1027
|
+
locale: currentLocale,
|
|
1028
|
+
text: buffer
|
|
1029
|
+
});
|
|
1030
|
+
return mergeAdjacentChunks(chunks);
|
|
1031
|
+
}
|
|
1032
|
+
function findLastLatinPromotionBreakIndex(buffer) {
|
|
1033
|
+
for (let index = buffer.length - 1; index >= 0; index -= 1) {
|
|
1034
|
+
const char = buffer[index];
|
|
1035
|
+
if (!char) continue;
|
|
1036
|
+
if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
|
|
1037
|
+
}
|
|
1038
|
+
return -1;
|
|
1039
|
+
}
|
|
1040
|
+
function mergeAdjacentChunks(chunks) {
|
|
1041
|
+
if (chunks.length === 0) return chunks;
|
|
1042
|
+
const merged = [];
|
|
1043
|
+
let last = chunks[0];
|
|
1044
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
1045
|
+
const chunk = chunks[i];
|
|
1046
|
+
if (chunk.locale === last.locale) last = {
|
|
1047
|
+
locale: last.locale,
|
|
1048
|
+
text: last.text + chunk.text
|
|
1049
|
+
};
|
|
1050
|
+
else {
|
|
1051
|
+
merged.push(last);
|
|
1052
|
+
last = chunk;
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
merged.push(last);
|
|
1056
|
+
return merged;
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
//#endregion
|
|
1060
|
+
//#region src/wc/wc.ts
|
|
1061
|
+
function wordCounter(text, options = {}) {
|
|
1062
|
+
const mode = resolveMode(options.mode, "chunk");
|
|
1063
|
+
const collectNonWords = Boolean(options.nonWords);
|
|
1064
|
+
const includeWhitespace = Boolean(options.includeWhitespace);
|
|
1065
|
+
const chunks = segmentTextByLocale(text, {
|
|
1066
|
+
latinLanguageHint: options.latinLanguageHint,
|
|
1067
|
+
latinTagHint: options.latinTagHint,
|
|
1068
|
+
latinLocaleHint: options.latinLocaleHint,
|
|
1069
|
+
latinHintRules: options.latinHintRules,
|
|
1070
|
+
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1071
|
+
hanLanguageHint: options.hanLanguageHint,
|
|
1072
|
+
hanTagHint: options.hanTagHint
|
|
1073
|
+
});
|
|
1074
|
+
if (mode === "char" || mode === "char-collector") {
|
|
1075
|
+
const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
|
|
1076
|
+
const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
|
|
1077
|
+
const counts = collectNonWords ? {
|
|
1078
|
+
words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
|
|
1079
|
+
nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
|
|
1080
|
+
total
|
|
1081
|
+
} : void 0;
|
|
1082
|
+
if (mode === "char") return {
|
|
1083
|
+
total,
|
|
1084
|
+
counts,
|
|
1085
|
+
breakdown: {
|
|
1086
|
+
mode,
|
|
1087
|
+
items: analyzed.map((chunk) => ({
|
|
1088
|
+
locale: chunk.locale,
|
|
1089
|
+
text: chunk.text,
|
|
1090
|
+
chars: chunk.chars,
|
|
1091
|
+
nonWords: chunk.nonWords
|
|
1092
|
+
}))
|
|
1093
|
+
}
|
|
1094
|
+
};
|
|
1095
|
+
return {
|
|
1096
|
+
total,
|
|
1097
|
+
counts,
|
|
1098
|
+
breakdown: {
|
|
1099
|
+
mode,
|
|
1100
|
+
items: aggregateCharsByLocale(analyzed).map((chunk) => ({
|
|
1101
|
+
locale: chunk.locale,
|
|
1102
|
+
chars: chunk.chars,
|
|
1103
|
+
nonWords: chunk.nonWords
|
|
1104
|
+
}))
|
|
1105
|
+
}
|
|
1106
|
+
};
|
|
1107
|
+
}
|
|
1108
|
+
const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
|
|
1109
|
+
const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
|
|
1110
|
+
const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
|
|
1111
|
+
if (!chunk.nonWords) return sum;
|
|
1112
|
+
return sum + getNonWordTotal(chunk.nonWords);
|
|
1113
|
+
}, 0) : 0;
|
|
1114
|
+
const total = analyzed.reduce((sum, chunk) => {
|
|
1115
|
+
let chunkTotal = chunk.words;
|
|
1116
|
+
if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
|
|
1117
|
+
return sum + chunkTotal;
|
|
1118
|
+
}, 0);
|
|
1119
|
+
const counts = collectNonWords ? {
|
|
1120
|
+
words: wordsTotal,
|
|
1121
|
+
nonWords: nonWordsTotal,
|
|
1122
|
+
total
|
|
1123
|
+
} : void 0;
|
|
1124
|
+
if (mode === "segments") return {
|
|
1125
|
+
total,
|
|
1126
|
+
counts,
|
|
1127
|
+
breakdown: {
|
|
1128
|
+
mode,
|
|
1129
|
+
items: analyzed.map((chunk) => ({
|
|
1130
|
+
locale: chunk.locale,
|
|
1131
|
+
text: chunk.text,
|
|
1132
|
+
words: chunk.words,
|
|
1133
|
+
segments: chunk.segments,
|
|
1134
|
+
nonWords: chunk.nonWords
|
|
1135
|
+
}))
|
|
1136
|
+
}
|
|
1137
|
+
};
|
|
1138
|
+
if (mode === "collector") return {
|
|
1139
|
+
total,
|
|
1140
|
+
counts,
|
|
1141
|
+
breakdown: {
|
|
1142
|
+
mode,
|
|
1143
|
+
items: aggregateByLocale(analyzed),
|
|
1144
|
+
nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
|
|
1145
|
+
}
|
|
1146
|
+
};
|
|
1147
|
+
return {
|
|
1148
|
+
total,
|
|
1149
|
+
counts,
|
|
1150
|
+
breakdown: {
|
|
1151
|
+
mode,
|
|
1152
|
+
items: analyzed.map((chunk) => ({
|
|
1153
|
+
locale: chunk.locale,
|
|
1154
|
+
text: chunk.text,
|
|
1155
|
+
words: chunk.words,
|
|
1156
|
+
nonWords: chunk.nonWords
|
|
1157
|
+
}))
|
|
1158
|
+
}
|
|
1159
|
+
};
|
|
1160
|
+
}
|
|
1161
|
+
function getNonWordTotal(nonWords) {
|
|
1162
|
+
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
1163
|
+
}
|
|
1164
|
+
function collectNonWordsAggregate(analyzed, enabled) {
|
|
1165
|
+
if (!enabled) return;
|
|
1166
|
+
const collection = createNonWordCollection();
|
|
1167
|
+
for (const chunk of analyzed) {
|
|
1168
|
+
if (!chunk.nonWords) continue;
|
|
1169
|
+
mergeNonWordCollections(collection, chunk.nonWords);
|
|
1170
|
+
}
|
|
1171
|
+
return collection;
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
//#endregion
|
|
1175
|
+
//#region src/wc/index.ts
|
|
1176
|
+
var wc_default = wordCounter;
|
|
1177
|
+
|
|
1178
|
+
//#endregion
|
|
1179
|
+
//#region src/markdown/section-count.ts
|
|
1180
|
+
function normalizeText(value) {
|
|
1181
|
+
if (value == null) return "";
|
|
1182
|
+
if (typeof value === "string") return value;
|
|
1183
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
1184
|
+
try {
|
|
1185
|
+
return JSON.stringify(value);
|
|
1186
|
+
} catch {
|
|
1187
|
+
return String(value);
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
function buildPerKeyItems(data, mode, options) {
|
|
1191
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return [];
|
|
1192
|
+
return Object.entries(data).map(([key, value]) => {
|
|
1193
|
+
const valueText = normalizeText(value);
|
|
1194
|
+
return {
|
|
1195
|
+
name: key,
|
|
1196
|
+
source: "frontmatter",
|
|
1197
|
+
result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
|
|
1198
|
+
};
|
|
1199
|
+
});
|
|
1200
|
+
}
|
|
1201
|
+
function buildSingleItem(name, text, mode, options, source) {
|
|
1202
|
+
return [{
|
|
1203
|
+
name,
|
|
1204
|
+
source,
|
|
1205
|
+
result: wc_default(text, options)
|
|
1206
|
+
}];
|
|
1207
|
+
}
|
|
1208
|
+
function sumTotals(items) {
|
|
1209
|
+
return items.reduce((sum, item) => sum + item.result.total, 0);
|
|
1210
|
+
}
|
|
1211
|
+
function countSections(input, section, options = {}) {
|
|
1212
|
+
const mode = options.mode ?? "chunk";
|
|
1213
|
+
if (section === "all") {
|
|
1214
|
+
const result = wc_default(input, options);
|
|
1215
|
+
return {
|
|
1216
|
+
section,
|
|
1217
|
+
total: result.total,
|
|
1218
|
+
frontmatterType: null,
|
|
1219
|
+
items: [{
|
|
1220
|
+
name: "all",
|
|
1221
|
+
source: "content",
|
|
1222
|
+
result
|
|
1223
|
+
}]
|
|
1224
|
+
};
|
|
1225
|
+
}
|
|
1226
|
+
const parsed = parseMarkdown(input);
|
|
1227
|
+
const frontmatterText = parsed.frontmatter ?? "";
|
|
1228
|
+
const contentText = parsed.content ?? "";
|
|
1229
|
+
let items = [];
|
|
1230
|
+
if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
|
|
1231
|
+
else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
|
|
1232
|
+
else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1233
|
+
else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
|
|
1234
|
+
else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1235
|
+
return {
|
|
1236
|
+
section,
|
|
1237
|
+
total: sumTotals(items),
|
|
1238
|
+
frontmatterType: parsed.frontmatterType,
|
|
1239
|
+
items
|
|
1240
|
+
};
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
//#endregion
|
|
1244
|
+
//#region src/cli/batch/aggregate.ts
|
|
1245
|
+
function stripCollectorSegmentsFromWordCounterResult(result) {
|
|
1246
|
+
if (result.breakdown.mode !== "collector") return;
|
|
1247
|
+
for (const item of result.breakdown.items) item.segments = [];
|
|
1248
|
+
}
|
|
1249
|
+
function stripCollectorSegmentsFromSectionedResult(result) {
|
|
1250
|
+
for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
|
|
1251
|
+
}
|
|
1252
|
+
function compactCollectorSegmentsInCountResult(result) {
|
|
1253
|
+
if ("section" in result) {
|
|
1254
|
+
stripCollectorSegmentsFromSectionedResult(result);
|
|
1255
|
+
return;
|
|
1256
|
+
}
|
|
1257
|
+
stripCollectorSegmentsFromWordCounterResult(result);
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
//#endregion
|
|
1261
|
+
//#region src/cli/path/load.ts
|
|
1262
|
+
function isProbablyBinary(buffer) {
|
|
1263
|
+
if (buffer.length === 0) return false;
|
|
1264
|
+
const sampleSize = Math.min(buffer.length, 1024);
|
|
1265
|
+
let suspicious = 0;
|
|
1266
|
+
for (let index = 0; index < sampleSize; index += 1) {
|
|
1267
|
+
const byte = buffer[index] ?? 0;
|
|
1268
|
+
if (byte === 0) return true;
|
|
1269
|
+
if (byte === 9 || byte === 10 || byte === 13) continue;
|
|
1270
|
+
if (byte >= 32 && byte <= 126) continue;
|
|
1271
|
+
if (byte >= 128) continue;
|
|
1272
|
+
suspicious += 1;
|
|
1273
|
+
}
|
|
1274
|
+
return suspicious / sampleSize > .3;
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
//#endregion
|
|
1278
|
+
//#region src/cli/batch/jobs/worker/count-worker.ts
|
|
1279
|
+
const config = workerData;
|
|
1280
|
+
if (!parentPort) throw new Error("Worker protocol init failed: missing parentPort.");
|
|
1281
|
+
parentPort.on("message", async (message) => {
|
|
1282
|
+
if (message.type === "shutdown") {
|
|
1283
|
+
parentPort?.close();
|
|
1284
|
+
return;
|
|
1285
|
+
}
|
|
1286
|
+
const path = message.path;
|
|
1287
|
+
let buffer;
|
|
1288
|
+
try {
|
|
1289
|
+
buffer = await readFile(path);
|
|
1290
|
+
} catch (error) {
|
|
1291
|
+
const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : void 0;
|
|
1292
|
+
const messageText = error instanceof Error ? error.message : String(error);
|
|
1293
|
+
if (code === "EMFILE" || code === "ENFILE") {
|
|
1294
|
+
const response = {
|
|
1295
|
+
type: "fatal",
|
|
1296
|
+
taskId: message.taskId,
|
|
1297
|
+
index: message.index,
|
|
1298
|
+
path,
|
|
1299
|
+
code,
|
|
1300
|
+
message: messageText
|
|
1301
|
+
};
|
|
1302
|
+
parentPort?.postMessage(response);
|
|
1303
|
+
return;
|
|
1304
|
+
}
|
|
1305
|
+
const response = {
|
|
1306
|
+
type: "result",
|
|
1307
|
+
taskId: message.taskId,
|
|
1308
|
+
index: message.index,
|
|
1309
|
+
payload: {
|
|
1310
|
+
kind: "skip",
|
|
1311
|
+
skip: {
|
|
1312
|
+
path,
|
|
1313
|
+
reason: `not readable: ${messageText}`
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
};
|
|
1317
|
+
parentPort?.postMessage(response);
|
|
1318
|
+
return;
|
|
1319
|
+
}
|
|
1320
|
+
if (isProbablyBinary(buffer)) {
|
|
1321
|
+
const response = {
|
|
1322
|
+
type: "result",
|
|
1323
|
+
taskId: message.taskId,
|
|
1324
|
+
index: message.index,
|
|
1325
|
+
payload: {
|
|
1326
|
+
kind: "skip",
|
|
1327
|
+
skip: {
|
|
1328
|
+
path,
|
|
1329
|
+
reason: "binary file"
|
|
1330
|
+
}
|
|
1331
|
+
}
|
|
1332
|
+
};
|
|
1333
|
+
parentPort?.postMessage(response);
|
|
1334
|
+
return;
|
|
1335
|
+
}
|
|
1336
|
+
try {
|
|
1337
|
+
const content = buffer.toString("utf8");
|
|
1338
|
+
const result = config.section === "all" ? wc_default(content, config.wcOptions) : countSections(content, config.section, config.wcOptions);
|
|
1339
|
+
if (!config.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
|
|
1340
|
+
const response = {
|
|
1341
|
+
type: "result",
|
|
1342
|
+
taskId: message.taskId,
|
|
1343
|
+
index: message.index,
|
|
1344
|
+
payload: {
|
|
1345
|
+
kind: "file",
|
|
1346
|
+
file: {
|
|
1347
|
+
path,
|
|
1348
|
+
result
|
|
1349
|
+
}
|
|
1350
|
+
}
|
|
1351
|
+
};
|
|
1352
|
+
parentPort?.postMessage(response);
|
|
1353
|
+
} catch (error) {
|
|
1354
|
+
const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : void 0;
|
|
1355
|
+
const messageText = error instanceof Error ? error.message : String(error);
|
|
1356
|
+
const response = {
|
|
1357
|
+
type: "fatal",
|
|
1358
|
+
taskId: message.taskId,
|
|
1359
|
+
index: message.index,
|
|
1360
|
+
path,
|
|
1361
|
+
code,
|
|
1362
|
+
message: messageText
|
|
1363
|
+
};
|
|
1364
|
+
parentPort?.postMessage(response);
|
|
1365
|
+
}
|
|
1366
|
+
});
|
|
1367
|
+
|
|
1368
|
+
//#endregion
|
|
1369
|
+
export { };
|
|
1370
|
+
//# sourceMappingURL=count-worker.mjs.map
|