@dev-pi2pie/word-counter 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1370 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { parentPort, workerData } from "node:worker_threads";
3
+ import { parseDocument } from "yaml";
4
+
5
+ //#region src/markdown/toml/arrays.ts
6
+ function ensureArrayContainer(result, key) {
7
+ const existing = result[key];
8
+ if (Array.isArray(existing)) return existing;
9
+ const list = [];
10
+ result[key] = list;
11
+ return list;
12
+ }
13
+ function flattenArrayTables(result) {
14
+ for (const [key, value] of Object.entries(result)) {
15
+ if (!Array.isArray(value)) continue;
16
+ result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
17
+ }
18
+ }
19
+
20
+ //#endregion
21
+ //#region src/markdown/toml/keys.ts
22
+ function stripKeyQuotes(key) {
23
+ const trimmed = key.trim();
24
+ if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
25
+ return trimmed;
26
+ }
27
+ function normalizeKeyPath(key) {
28
+ const trimmed = key.trim();
29
+ if (!trimmed) return null;
30
+ if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
31
+ const unquoted = stripKeyQuotes(trimmed);
32
+ return unquoted ? unquoted : null;
33
+ }
34
+ const segments = trimmed.split(".").map((segment) => segment.trim());
35
+ if (segments.some((segment) => !segment)) return null;
36
+ return segments.join(".");
37
+ }
38
+
39
+ //#endregion
40
+ //#region src/markdown/toml/strings.ts
41
+ function stripInlineComment(line) {
42
+ let inString = null;
43
+ let escaped = false;
44
+ for (let i = 0; i < line.length; i += 1) {
45
+ const char = line[i] ?? "";
46
+ if (inString) {
47
+ if (escaped) {
48
+ escaped = false;
49
+ continue;
50
+ }
51
+ if (char === "\\" && inString === "double") {
52
+ escaped = true;
53
+ continue;
54
+ }
55
+ if (inString === "double" && char === "\"") {
56
+ inString = null;
57
+ continue;
58
+ }
59
+ if (inString === "single" && char === "'") {
60
+ inString = null;
61
+ continue;
62
+ }
63
+ continue;
64
+ }
65
+ if (char === "\"") {
66
+ inString = "double";
67
+ continue;
68
+ }
69
+ if (char === "'") {
70
+ inString = "single";
71
+ continue;
72
+ }
73
+ if (char === "#") return line.slice(0, i).trimEnd();
74
+ }
75
+ return line;
76
+ }
77
+ function unescapeBasic(input) {
78
+ return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
79
+ }
80
+ function parseStringLiteral(value) {
81
+ if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
82
+ if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
83
+ if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
84
+ if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
85
+ return null;
86
+ }
87
+
88
+ //#endregion
89
+ //#region src/markdown/toml/values.ts
90
+ function parsePrimitive(raw) {
91
+ const value = raw.trim();
92
+ if (!value) return null;
93
+ const stringLiteral = parseStringLiteral(value);
94
+ if (stringLiteral !== null) return stringLiteral;
95
+ if (value === "true") return true;
96
+ if (value === "false") return false;
97
+ if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
98
+ if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
99
+ return value;
100
+ }
101
+ function parseArray(raw) {
102
+ const value = raw.trim();
103
+ if (!value.startsWith("[") || !value.endsWith("]")) return null;
104
+ const inner = value.slice(1, -1).trim();
105
+ if (!inner) return [];
106
+ const items = [];
107
+ let current = "";
108
+ let inString = null;
109
+ let escaped = false;
110
+ for (let i = 0; i < inner.length; i += 1) {
111
+ const char = inner[i] ?? "";
112
+ if (inString) {
113
+ current += char;
114
+ if (escaped) {
115
+ escaped = false;
116
+ continue;
117
+ }
118
+ if (char === "\\" && inString === "double") {
119
+ escaped = true;
120
+ continue;
121
+ }
122
+ if (inString === "double" && char === "\"") inString = null;
123
+ else if (inString === "single" && char === "'") inString = null;
124
+ continue;
125
+ }
126
+ if (char === "\"") {
127
+ inString = "double";
128
+ current += char;
129
+ continue;
130
+ }
131
+ if (char === "'") {
132
+ inString = "single";
133
+ current += char;
134
+ continue;
135
+ }
136
+ if (char === ",") {
137
+ const item = parsePrimitive(current);
138
+ if (item === null) return null;
139
+ items.push(item);
140
+ current = "";
141
+ continue;
142
+ }
143
+ current += char;
144
+ }
145
+ const finalItem = parsePrimitive(current);
146
+ if (finalItem === null) return null;
147
+ items.push(finalItem);
148
+ return items;
149
+ }
150
+ function parseInlineTable(raw) {
151
+ const trimmed = raw.trim();
152
+ if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
153
+ const inner = trimmed.slice(1, -1).trim();
154
+ if (!inner) return {};
155
+ const pairs = [];
156
+ let current = "";
157
+ let inString = null;
158
+ let escaped = false;
159
+ let bracketDepth = 0;
160
+ let braceDepth = 0;
161
+ for (let i = 0; i < inner.length; i += 1) {
162
+ const char = inner[i] ?? "";
163
+ if (inString) {
164
+ current += char;
165
+ if (escaped) {
166
+ escaped = false;
167
+ continue;
168
+ }
169
+ if (char === "\\" && inString === "double") {
170
+ escaped = true;
171
+ continue;
172
+ }
173
+ if (inString === "double" && char === "\"") inString = null;
174
+ else if (inString === "single" && char === "'") inString = null;
175
+ continue;
176
+ }
177
+ if (char === "\"") {
178
+ inString = "double";
179
+ current += char;
180
+ continue;
181
+ }
182
+ if (char === "'") {
183
+ inString = "single";
184
+ current += char;
185
+ continue;
186
+ }
187
+ if (char === "[") {
188
+ bracketDepth += 1;
189
+ current += char;
190
+ continue;
191
+ }
192
+ if (char === "]") {
193
+ if (bracketDepth > 0) bracketDepth -= 1;
194
+ current += char;
195
+ continue;
196
+ }
197
+ if (char === "{") {
198
+ braceDepth += 1;
199
+ current += char;
200
+ continue;
201
+ }
202
+ if (char === "}") {
203
+ if (braceDepth > 0) braceDepth -= 1;
204
+ current += char;
205
+ continue;
206
+ }
207
+ if (char === "," && bracketDepth === 0 && braceDepth === 0) {
208
+ pairs.push(current);
209
+ current = "";
210
+ continue;
211
+ }
212
+ current += char;
213
+ }
214
+ if (current.trim()) pairs.push(current);
215
+ const output = {};
216
+ for (const pair of pairs) {
217
+ const separatorIndex = pair.indexOf("=");
218
+ if (separatorIndex === -1) return null;
219
+ const key = normalizeKeyPath(pair.slice(0, separatorIndex));
220
+ if (!key) return null;
221
+ const valueRaw = pair.slice(separatorIndex + 1).trim();
222
+ if (!valueRaw) return null;
223
+ if (valueRaw.startsWith("{")) return null;
224
+ const normalized = normalizeValue(valueRaw);
225
+ if (normalized === null) return null;
226
+ if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
227
+ output[key] = normalized;
228
+ }
229
+ return output;
230
+ }
231
+ function normalizeValue(value) {
232
+ if (!value) return null;
233
+ const trimmed = value.trim();
234
+ if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
235
+ const array = parseArray(trimmed);
236
+ if (array) return array;
237
+ if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
238
+ return parsePrimitive(trimmed);
239
+ }
240
+ function toPlainText(value) {
241
+ if (value == null) return "";
242
+ if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
243
+ return String(value);
244
+ }
245
+
246
+ //#endregion
247
+ //#region src/markdown/toml/parse-frontmatter.ts
248
+ function parseTomlFrontmatter(frontmatter) {
249
+ const result = {};
250
+ const lines = frontmatter.split("\n");
251
+ let tablePrefix = "";
252
+ let tableTarget = null;
253
+ let tablePrefixInList = false;
254
+ for (let index = 0; index < lines.length; index += 1) {
255
+ const rawLine = lines[index] ?? "";
256
+ const trimmedLine = rawLine.trim();
257
+ if (!trimmedLine || trimmedLine.startsWith("#")) continue;
258
+ if (trimmedLine.startsWith("[[")) {
259
+ const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
260
+ if (!match) return null;
261
+ const normalizedTable = normalizeKeyPath(match[1] ?? "");
262
+ if (!normalizedTable) return null;
263
+ const list = ensureArrayContainer(result, normalizedTable);
264
+ const newEntry = {};
265
+ list.push(newEntry);
266
+ tableTarget = newEntry;
267
+ tablePrefix = normalizedTable;
268
+ tablePrefixInList = true;
269
+ continue;
270
+ }
271
+ const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
272
+ if (tableMatch) {
273
+ const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
274
+ if (!normalizedTable) return null;
275
+ tablePrefix = normalizedTable;
276
+ tablePrefixInList = false;
277
+ tableTarget = null;
278
+ continue;
279
+ }
280
+ const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
281
+ const separatorIndex = lineForParsing.indexOf("=");
282
+ if (separatorIndex === -1) return null;
283
+ const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
284
+ let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
285
+ if (!key) return null;
286
+ const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
287
+ if (tripleDelimiter) {
288
+ const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
289
+ if (closingIndex !== -1) {
290
+ const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
291
+ valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
292
+ } else {
293
+ const delimiter = tripleDelimiter;
294
+ let combined = valueRaw;
295
+ let closed = false;
296
+ while (index + 1 < lines.length) {
297
+ index += 1;
298
+ const nextLine = lines[index] ?? "";
299
+ combined += `\n${nextLine}`;
300
+ if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
301
+ closed = true;
302
+ break;
303
+ }
304
+ }
305
+ if (!closed) return null;
306
+ valueRaw = combined;
307
+ }
308
+ }
309
+ const normalized = normalizeValue(valueRaw);
310
+ if (normalized === null) return null;
311
+ const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
312
+ if (typeof normalized === "object" && !Array.isArray(normalized)) {
313
+ for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
314
+ const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
315
+ if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
316
+ else result[entryKey] = toPlainText(inlineValue);
317
+ }
318
+ continue;
319
+ }
320
+ if (tablePrefixInList && tableTarget) {
321
+ tableTarget[key] = toPlainText(normalized);
322
+ continue;
323
+ }
324
+ result[fullKey] = toPlainText(normalized);
325
+ }
326
+ flattenArrayTables(result);
327
+ return result;
328
+ }
329
+
330
+ //#endregion
331
+ //#region src/markdown/parse-markdown.ts
332
+ const FENCE_TO_TYPE = {
333
+ "---": "yaml",
334
+ "+++": "toml",
335
+ ";;;": "json"
336
+ };
337
+ function normalizeNewlines(input) {
338
+ return input.replace(/\r\n/g, "\n");
339
+ }
340
+ function stripBom(line) {
341
+ return line.startsWith("") ? line.slice(1) : line;
342
+ }
343
+ function getFenceType(line) {
344
+ const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
345
+ if (!match) return null;
346
+ return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
347
+ }
348
+ function parseFrontmatter(frontmatter, type) {
349
+ if (!type) return null;
350
+ if (type === "json") try {
351
+ return JSON.parse(frontmatter);
352
+ } catch {
353
+ return null;
354
+ }
355
+ if (type === "yaml") {
356
+ const doc = parseDocument(frontmatter, { prettyErrors: false });
357
+ if (doc.errors.length > 0) return null;
358
+ const data = doc.toJSON();
359
+ if (!data || typeof data !== "object" || Array.isArray(data)) return null;
360
+ return data;
361
+ }
362
+ if (type === "toml") return parseTomlFrontmatter(frontmatter);
363
+ return null;
364
+ }
365
+ function extractJsonBlock(text, startIndex) {
366
+ let depth = 0;
367
+ let inString = false;
368
+ let escaped = false;
369
+ for (let i = startIndex; i < text.length; i += 1) {
370
+ const char = text[i] ?? "";
371
+ if (inString) {
372
+ if (escaped) {
373
+ escaped = false;
374
+ continue;
375
+ }
376
+ if (char === "\\") {
377
+ escaped = true;
378
+ continue;
379
+ }
380
+ if (char === "\"") inString = false;
381
+ continue;
382
+ }
383
+ if (char === "\"") {
384
+ inString = true;
385
+ continue;
386
+ }
387
+ if (char === "{") {
388
+ depth += 1;
389
+ continue;
390
+ }
391
+ if (char === "}") {
392
+ depth -= 1;
393
+ if (depth === 0) return {
394
+ jsonText: text.slice(startIndex, i + 1),
395
+ endIndex: i
396
+ };
397
+ }
398
+ }
399
+ return null;
400
+ }
401
+ function parseMarkdown(input) {
402
+ const normalized = normalizeNewlines(input);
403
+ const lines = normalized.split("\n");
404
+ if (lines.length === 0) return {
405
+ frontmatter: null,
406
+ content: normalized,
407
+ data: null,
408
+ frontmatterType: null
409
+ };
410
+ lines[0] = stripBom(lines[0] ?? "");
411
+ const normalizedWithoutBom = lines.join("\n");
412
+ const openingType = getFenceType(lines[0] ?? "");
413
+ if (!openingType) {
414
+ const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
415
+ if (normalizedWithoutBom[jsonStart] !== "{") return {
416
+ frontmatter: null,
417
+ content: normalizedWithoutBom,
418
+ data: null,
419
+ frontmatterType: null
420
+ };
421
+ const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
422
+ if (!jsonBlock) return {
423
+ frontmatter: null,
424
+ content: normalizedWithoutBom,
425
+ data: null,
426
+ frontmatterType: null
427
+ };
428
+ const frontmatter = jsonBlock.jsonText;
429
+ let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
430
+ if (content.startsWith("\n")) content = content.slice(1);
431
+ const data = parseFrontmatter(frontmatter, "json");
432
+ if (!data) return {
433
+ frontmatter: null,
434
+ content: normalizedWithoutBom,
435
+ data: null,
436
+ frontmatterType: null
437
+ };
438
+ return {
439
+ frontmatter,
440
+ content,
441
+ data,
442
+ frontmatterType: "json"
443
+ };
444
+ }
445
+ let closingIndex = -1;
446
+ for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
447
+ closingIndex = i;
448
+ break;
449
+ }
450
+ if (closingIndex === -1) return {
451
+ frontmatter: null,
452
+ content: normalizedWithoutBom,
453
+ data: null,
454
+ frontmatterType: null
455
+ };
456
+ const frontmatter = lines.slice(1, closingIndex).join("\n");
457
+ return {
458
+ frontmatter,
459
+ content: lines.slice(closingIndex + 1).join("\n"),
460
+ data: parseFrontmatter(frontmatter, openingType),
461
+ frontmatterType: openingType
462
+ };
463
+ }
464
+
465
+ //#endregion
466
+ //#region src/wc/segmenter.ts
467
+ const segmenterCache = /* @__PURE__ */ new Map();
468
+ const graphemeSegmenterCache = /* @__PURE__ */ new Map();
469
+ function getSegmenter(locale) {
470
+ const cached = segmenterCache.get(locale);
471
+ if (cached) return cached;
472
+ const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
473
+ segmenterCache.set(locale, segmenter);
474
+ return segmenter;
475
+ }
476
+ function getGraphemeSegmenter(locale) {
477
+ const cached = graphemeSegmenterCache.get(locale);
478
+ if (cached) return cached;
479
+ const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
480
+ graphemeSegmenterCache.set(locale, segmenter);
481
+ return segmenter;
482
+ }
483
+ function supportsSegmenter() {
484
+ return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
485
+ }
486
+ function countCharsForLocale(text, locale) {
487
+ if (!supportsSegmenter()) return Array.from(text).length;
488
+ const segmenter = getGraphemeSegmenter(locale);
489
+ let count = 0;
490
+ for (const _segment of segmenter.segment(text)) count++;
491
+ return count;
492
+ }
493
+
494
+ //#endregion
495
+ //#region src/utils/append-all.ts
496
+ function appendAll(target, source) {
497
+ for (const item of source) target.push(item);
498
+ }
499
+
500
+ //#endregion
501
+ //#region src/wc/non-words.ts
502
+ const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
503
+ const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
504
+ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
505
+ const symbolRegex = /\p{S}/u;
506
+ const punctuationRegex = /\p{P}/u;
507
+ const whitespaceRegex = /\s/u;
508
+ const newlineChars = new Set([
509
+ "\n",
510
+ "\r",
511
+ "\u2028",
512
+ "\u2029"
513
+ ]);
514
+ function createNonWordCollection() {
515
+ return {
516
+ emoji: [],
517
+ symbols: [],
518
+ punctuation: [],
519
+ counts: {
520
+ emoji: 0,
521
+ symbols: 0,
522
+ punctuation: 0
523
+ }
524
+ };
525
+ }
526
+ function addNonWord(collection, category, segment) {
527
+ if (category === "emoji") {
528
+ collection.emoji.push(segment);
529
+ collection.counts.emoji += 1;
530
+ return;
531
+ }
532
+ if (category === "symbol") {
533
+ collection.symbols.push(segment);
534
+ collection.counts.symbols += 1;
535
+ return;
536
+ }
537
+ collection.punctuation.push(segment);
538
+ collection.counts.punctuation += 1;
539
+ }
540
+ function addWhitespace(collection, segment) {
541
+ let whitespace = collection.whitespace;
542
+ let count = 0;
543
+ for (const char of segment) {
544
+ if (char === " ") {
545
+ whitespace = whitespace ?? createWhitespaceCounts();
546
+ whitespace.spaces += 1;
547
+ count += 1;
548
+ continue;
549
+ }
550
+ if (char === " ") {
551
+ whitespace = whitespace ?? createWhitespaceCounts();
552
+ whitespace.tabs += 1;
553
+ count += 1;
554
+ continue;
555
+ }
556
+ if (newlineChars.has(char)) {
557
+ whitespace = whitespace ?? createWhitespaceCounts();
558
+ whitespace.newlines += 1;
559
+ count += 1;
560
+ continue;
561
+ }
562
+ if (whitespaceRegex.test(char)) {
563
+ whitespace = whitespace ?? createWhitespaceCounts();
564
+ whitespace.other += 1;
565
+ count += 1;
566
+ }
567
+ }
568
+ if (count > 0) {
569
+ collection.whitespace = whitespace ?? createWhitespaceCounts();
570
+ collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
571
+ }
572
+ return count;
573
+ }
574
+ function classifyNonWordSegment(segment) {
575
+ const hasEmojiVariationSelector = segment.includes("️");
576
+ if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
577
+ if (symbolRegex.test(segment)) return "symbol";
578
+ if (punctuationRegex.test(segment)) return "punctuation";
579
+ return null;
580
+ }
581
+ function mergeNonWordCollections(target, source) {
582
+ if (source.counts.emoji > 0) {
583
+ appendAll(target.emoji, source.emoji);
584
+ target.counts.emoji += source.counts.emoji;
585
+ }
586
+ if (source.counts.symbols > 0) {
587
+ appendAll(target.symbols, source.symbols);
588
+ target.counts.symbols += source.counts.symbols;
589
+ }
590
+ if (source.counts.punctuation > 0) {
591
+ appendAll(target.punctuation, source.punctuation);
592
+ target.counts.punctuation += source.counts.punctuation;
593
+ }
594
+ if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
595
+ const whitespace = target.whitespace ?? createWhitespaceCounts();
596
+ whitespace.spaces += source.whitespace.spaces;
597
+ whitespace.tabs += source.whitespace.tabs;
598
+ whitespace.newlines += source.whitespace.newlines;
599
+ whitespace.other += source.whitespace.other;
600
+ target.whitespace = whitespace;
601
+ target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
602
+ }
603
+ return target;
604
+ }
605
+ function createWhitespaceCounts() {
606
+ return {
607
+ spaces: 0,
608
+ tabs: 0,
609
+ newlines: 0,
610
+ other: 0
611
+ };
612
+ }
613
+
614
+ //#endregion
615
+ //#region src/wc/analyze.ts
616
+ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
617
+ const segmenter = getSegmenter(chunk.locale);
618
+ const segments = [];
619
+ const nonWords = collectNonWords ? createNonWordCollection() : null;
620
+ for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
621
+ else if (collectNonWords && nonWords) {
622
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
623
+ const category = classifyNonWordSegment(part.segment);
624
+ if (category) addNonWord(nonWords, category, part.segment);
625
+ }
626
+ return {
627
+ locale: chunk.locale,
628
+ text: chunk.text,
629
+ segments,
630
+ words: segments.length,
631
+ nonWords: nonWords ?? void 0
632
+ };
633
+ }
634
+ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
635
+ const segmenter = getSegmenter(chunk.locale);
636
+ const nonWords = collectNonWords ? createNonWordCollection() : null;
637
+ let chars = 0;
638
+ let wordChars = 0;
639
+ let nonWordChars = 0;
640
+ for (const part of segmenter.segment(chunk.text)) {
641
+ if (part.isWordLike) {
642
+ const count = countCharsForLocale(part.segment, chunk.locale);
643
+ chars += count;
644
+ wordChars += count;
645
+ continue;
646
+ }
647
+ if (collectNonWords && nonWords) {
648
+ let whitespaceCount = 0;
649
+ if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
650
+ const category = classifyNonWordSegment(part.segment);
651
+ if (category) addNonWord(nonWords, category, part.segment);
652
+ if (category || whitespaceCount > 0) {
653
+ const count = countCharsForLocale(part.segment, chunk.locale);
654
+ chars += count;
655
+ nonWordChars += count;
656
+ }
657
+ }
658
+ }
659
+ return {
660
+ locale: chunk.locale,
661
+ text: chunk.text,
662
+ chars,
663
+ wordChars,
664
+ nonWordChars,
665
+ nonWords: nonWords ?? void 0
666
+ };
667
+ }
668
+ function aggregateCharsByLocale(chunks) {
669
+ const order = [];
670
+ const map = /* @__PURE__ */ new Map();
671
+ for (const chunk of chunks) {
672
+ const existing = map.get(chunk.locale);
673
+ if (existing) {
674
+ existing.chars += chunk.chars;
675
+ existing.wordChars += chunk.wordChars;
676
+ existing.nonWordChars += chunk.nonWordChars;
677
+ if (chunk.nonWords) {
678
+ if (!existing.nonWords) existing.nonWords = createNonWordCollection();
679
+ mergeNonWordCollections(existing.nonWords, chunk.nonWords);
680
+ }
681
+ continue;
682
+ }
683
+ order.push(chunk.locale);
684
+ map.set(chunk.locale, {
685
+ locale: chunk.locale,
686
+ chars: chunk.chars,
687
+ wordChars: chunk.wordChars,
688
+ nonWordChars: chunk.nonWordChars,
689
+ nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
690
+ });
691
+ }
692
+ return order.map((locale) => map.get(locale));
693
+ }
694
+ function aggregateByLocale(chunks) {
695
+ const order = [];
696
+ const map = /* @__PURE__ */ new Map();
697
+ for (const chunk of chunks) {
698
+ const existing = map.get(chunk.locale);
699
+ if (existing) {
700
+ existing.words += chunk.words;
701
+ appendAll(existing.segments, chunk.segments);
702
+ continue;
703
+ }
704
+ order.push(chunk.locale);
705
+ map.set(chunk.locale, {
706
+ locale: chunk.locale,
707
+ words: chunk.words,
708
+ segments: [...chunk.segments]
709
+ });
710
+ }
711
+ return order.map((locale) => map.get(locale));
712
+ }
713
+
714
+ //#endregion
715
+ //#region src/wc/mode.ts
716
+ const MODE_ALIASES = {
717
+ chunk: "chunk",
718
+ chunks: "chunk",
719
+ segments: "segments",
720
+ segment: "segments",
721
+ seg: "segments",
722
+ collector: "collector",
723
+ collect: "collector",
724
+ colle: "collector",
725
+ char: "char",
726
+ chars: "char",
727
+ character: "char",
728
+ characters: "char",
729
+ "char-collector": "char-collector"
730
+ };
731
+ const CHAR_MODE_ALIASES = new Set([
732
+ "char",
733
+ "chars",
734
+ "character",
735
+ "characters"
736
+ ]);
737
+ const COLLECTOR_MODE_ALIASES = new Set([
738
+ "collector",
739
+ "collect",
740
+ "colle",
741
+ "col"
742
+ ]);
743
+ function collapseSeparators(value) {
744
+ return value.replace(/[-_\s]+/g, "");
745
+ }
746
+ function isComposedCharCollectorFromTokens(value) {
747
+ const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
748
+ if (tokens.length < 2) return false;
749
+ let hasCharAlias = false;
750
+ let hasCollectorAlias = false;
751
+ for (const token of tokens) {
752
+ if (CHAR_MODE_ALIASES.has(token)) {
753
+ hasCharAlias = true;
754
+ continue;
755
+ }
756
+ if (COLLECTOR_MODE_ALIASES.has(token)) {
757
+ hasCollectorAlias = true;
758
+ continue;
759
+ }
760
+ return false;
761
+ }
762
+ return hasCharAlias && hasCollectorAlias;
763
+ }
764
+ function isComposedCharCollectorCompact(value) {
765
+ for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
766
+ return false;
767
+ }
768
+ function normalizeMode(input) {
769
+ if (!input) return null;
770
+ const normalized = input.trim().toLowerCase();
771
+ const direct = MODE_ALIASES[normalized];
772
+ if (direct) return direct;
773
+ if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
774
+ const compact = collapseSeparators(normalized);
775
+ if (isComposedCharCollectorCompact(compact)) return "char-collector";
776
+ return MODE_ALIASES[compact] ?? null;
777
+ }
778
+ function resolveMode(input, fallback = "chunk") {
779
+ return normalizeMode(input) ?? fallback;
780
+ }
781
+
782
+ //#endregion
783
+ //#region src/wc/latin-hints.ts
784
+ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
785
+ {
786
+ tag: "de",
787
+ pattern: "[äöüÄÖÜß]"
788
+ },
789
+ {
790
+ tag: "es",
791
+ pattern: "[ñÑ¿¡]"
792
+ },
793
+ {
794
+ tag: "pt",
795
+ pattern: "[ãõÃÕ]"
796
+ },
797
+ {
798
+ tag: "fr",
799
+ pattern: "[œŒæÆ]"
800
+ },
801
+ {
802
+ tag: "pl",
803
+ pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
804
+ },
805
+ {
806
+ tag: "tr",
807
+ pattern: "[ıİğĞşŞ]"
808
+ },
809
+ {
810
+ tag: "ro",
811
+ pattern: "[ăĂâÂîÎșȘțȚ]"
812
+ },
813
+ {
814
+ tag: "hu",
815
+ pattern: "[őŐűŰ]"
816
+ },
817
+ {
818
+ tag: "is",
819
+ pattern: "[ðÐþÞ]"
820
+ }
821
+ ];
822
+ const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
823
+
824
+ //#endregion
825
+ //#region src/wc/locale-detect.ts
826
+ const DEFAULT_LOCALE = "und-Latn";
827
+ const DEFAULT_HAN_TAG = "und-Hani";
828
+ const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
829
+ const regex = {
830
+ hiragana: /\p{Script=Hiragana}/u,
831
+ katakana: /\p{Script=Katakana}/u,
832
+ hangul: /\p{Script=Hangul}/u,
833
+ han: /\p{Script=Han}/u,
834
+ latin: /\p{Script=Latin}/u,
835
+ arabic: /\p{Script=Arabic}/u,
836
+ cyrillic: /\p{Script=Cyrillic}/u,
837
+ devanagari: /\p{Script=Devanagari}/u,
838
+ thai: /\p{Script=Thai}/u
839
+ };
840
+ const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
841
+ function isLatinLocale(locale, context) {
842
+ if (context) return context.latinLocales.has(locale);
843
+ return defaultLatinLocales.has(locale);
844
+ }
845
+ function resolveLatinHint(options) {
846
+ const latinTagHint = options.latinTagHint?.trim();
847
+ if (latinTagHint) return latinTagHint;
848
+ const latinLanguageHint = options.latinLanguageHint?.trim();
849
+ if (latinLanguageHint) return latinLanguageHint;
850
+ const latinLocaleHint = options.latinLocaleHint?.trim();
851
+ if (latinLocaleHint) return latinLocaleHint;
852
+ }
853
+ function resolveHanHint(options) {
854
+ const hanTagHint = options.hanTagHint?.trim();
855
+ if (hanTagHint) return hanTagHint;
856
+ const hanLanguageHint = options.hanLanguageHint?.trim();
857
+ if (hanLanguageHint) return hanLanguageHint;
858
+ }
859
+ function compileLatinHintPattern(pattern, label) {
860
+ const source = typeof pattern === "string" ? pattern : pattern.source;
861
+ const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
862
+ const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
863
+ if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
864
+ if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
865
+ try {
866
+ return new RegExp(source, flags);
867
+ } catch (error) {
868
+ const message = error instanceof Error ? error.message : String(error);
869
+ throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
870
+ }
871
+ }
872
+ function normalizeLatinHintPriority(priority, label) {
873
+ if (priority === void 0) return 0;
874
+ if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
875
+ return priority;
876
+ }
877
+ function compileLatinHintRule(rule, order, label) {
878
+ const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
879
+ if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
880
+ return {
881
+ tag,
882
+ pattern: compileLatinHintPattern(rule.pattern, label),
883
+ priority: normalizeLatinHintPriority(rule.priority, label),
884
+ order
885
+ };
886
+ }
887
+ function resolveLatinHintRules(options) {
888
+ const useDefaultLatinHints = options.useDefaultLatinHints !== false;
889
+ const customRules = options.latinHintRules ?? [];
890
+ const combinedRules = [];
891
+ for (let index = 0; index < customRules.length; index += 1) {
892
+ const rule = customRules[index];
893
+ if (!rule) continue;
894
+ combinedRules.push({
895
+ rule,
896
+ label: `Invalid custom Latin hint rule at index ${index}`
897
+ });
898
+ }
899
+ if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
900
+ const rule = DEFAULT_LATIN_HINT_RULES[index];
901
+ if (!rule) continue;
902
+ combinedRules.push({
903
+ rule,
904
+ label: `Invalid default Latin hint rule at index ${index}`
905
+ });
906
+ }
907
+ const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
908
+ resolvedRules.sort((left, right) => {
909
+ if (left.priority !== right.priority) return right.priority - left.priority;
910
+ return left.order - right.order;
911
+ });
912
+ return resolvedRules;
913
+ }
914
+ function resolveLocaleDetectContext(options = {}) {
915
+ const latinHint = resolveLatinHint(options);
916
+ const latinHintRules = resolveLatinHintRules(options);
917
+ const latinLocales = new Set([DEFAULT_LOCALE]);
918
+ for (const rule of latinHintRules) latinLocales.add(rule.tag);
919
+ if (latinHint) latinLocales.add(latinHint);
920
+ return {
921
+ latinHint,
922
+ hanHint: resolveHanHint(options),
923
+ latinHintRules,
924
+ latinLocales
925
+ };
926
+ }
927
+ function detectLatinLocale(char, context) {
928
+ for (const hint of context.latinHintRules) {
929
+ hint.pattern.lastIndex = 0;
930
+ if (hint.pattern.test(char)) return hint.tag;
931
+ }
932
+ return DEFAULT_LOCALE;
933
+ }
934
+ function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
935
+ if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
936
+ if (regex.hangul.test(char)) return "ko";
937
+ if (regex.arabic.test(char)) return "ar";
938
+ if (regex.cyrillic.test(char)) return "ru";
939
+ if (regex.devanagari.test(char)) return "hi";
940
+ if (regex.thai.test(char)) return "th";
941
+ if (regex.han.test(char)) {
942
+ if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
943
+ return context.hanHint ?? DEFAULT_HAN_TAG;
944
+ }
945
+ if (regex.latin.test(char)) {
946
+ const hintedLocale = detectLatinLocale(char, context);
947
+ if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
948
+ if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
949
+ if (context.latinHint) return context.latinHint;
950
+ return DEFAULT_LOCALE;
951
+ }
952
+ return null;
953
+ }
954
+
955
+ //#endregion
956
+ //#region src/wc/segment.ts
957
+ const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
958
+ const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
959
+ function segmentTextByLocale(text, options = {}) {
960
+ const context = resolveLocaleDetectContext(options);
961
+ const chunks = [];
962
+ let currentLocale = DEFAULT_LOCALE;
963
+ let buffer = "";
964
+ let bufferHasScript = false;
965
+ let sawCarryBoundary = false;
966
+ const updateCarryBoundaryState = (detected, char) => {
967
+ if (detected !== null) {
968
+ sawCarryBoundary = false;
969
+ return;
970
+ }
971
+ if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
972
+ };
973
+ for (const char of text) {
974
+ const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
975
+ const targetLocale = detected ?? currentLocale;
976
+ if (buffer === "") {
977
+ currentLocale = targetLocale;
978
+ buffer = char;
979
+ bufferHasScript = detected !== null;
980
+ updateCarryBoundaryState(detected, char);
981
+ continue;
982
+ }
983
+ if (detected !== null && !bufferHasScript) {
984
+ currentLocale = targetLocale;
985
+ buffer += char;
986
+ bufferHasScript = true;
987
+ updateCarryBoundaryState(detected, char);
988
+ continue;
989
+ }
990
+ if (targetLocale !== currentLocale && detected !== null) {
991
+ if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
992
+ const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
993
+ if (promotionBreakIndex === -1) {
994
+ currentLocale = targetLocale;
995
+ buffer += char;
996
+ bufferHasScript = true;
997
+ updateCarryBoundaryState(detected, char);
998
+ continue;
999
+ }
1000
+ const prefix = buffer.slice(0, promotionBreakIndex + 1);
1001
+ const suffix = buffer.slice(promotionBreakIndex + 1);
1002
+ if (prefix.length > 0) chunks.push({
1003
+ locale: currentLocale,
1004
+ text: prefix
1005
+ });
1006
+ currentLocale = targetLocale;
1007
+ buffer = `${suffix}${char}`;
1008
+ bufferHasScript = true;
1009
+ updateCarryBoundaryState(detected, char);
1010
+ continue;
1011
+ }
1012
+ chunks.push({
1013
+ locale: currentLocale,
1014
+ text: buffer
1015
+ });
1016
+ currentLocale = targetLocale;
1017
+ buffer = char;
1018
+ bufferHasScript = true;
1019
+ updateCarryBoundaryState(detected, char);
1020
+ continue;
1021
+ }
1022
+ buffer += char;
1023
+ if (detected !== null) bufferHasScript = true;
1024
+ updateCarryBoundaryState(detected, char);
1025
+ }
1026
+ if (buffer.length > 0) chunks.push({
1027
+ locale: currentLocale,
1028
+ text: buffer
1029
+ });
1030
+ return mergeAdjacentChunks(chunks);
1031
+ }
1032
+ function findLastLatinPromotionBreakIndex(buffer) {
1033
+ for (let index = buffer.length - 1; index >= 0; index -= 1) {
1034
+ const char = buffer[index];
1035
+ if (!char) continue;
1036
+ if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
1037
+ }
1038
+ return -1;
1039
+ }
1040
+ function mergeAdjacentChunks(chunks) {
1041
+ if (chunks.length === 0) return chunks;
1042
+ const merged = [];
1043
+ let last = chunks[0];
1044
+ for (let i = 1; i < chunks.length; i++) {
1045
+ const chunk = chunks[i];
1046
+ if (chunk.locale === last.locale) last = {
1047
+ locale: last.locale,
1048
+ text: last.text + chunk.text
1049
+ };
1050
+ else {
1051
+ merged.push(last);
1052
+ last = chunk;
1053
+ }
1054
+ }
1055
+ merged.push(last);
1056
+ return merged;
1057
+ }
1058
+
1059
+ //#endregion
1060
+ //#region src/wc/wc.ts
1061
+ function wordCounter(text, options = {}) {
1062
+ const mode = resolveMode(options.mode, "chunk");
1063
+ const collectNonWords = Boolean(options.nonWords);
1064
+ const includeWhitespace = Boolean(options.includeWhitespace);
1065
+ const chunks = segmentTextByLocale(text, {
1066
+ latinLanguageHint: options.latinLanguageHint,
1067
+ latinTagHint: options.latinTagHint,
1068
+ latinLocaleHint: options.latinLocaleHint,
1069
+ latinHintRules: options.latinHintRules,
1070
+ useDefaultLatinHints: options.useDefaultLatinHints,
1071
+ hanLanguageHint: options.hanLanguageHint,
1072
+ hanTagHint: options.hanTagHint
1073
+ });
1074
+ if (mode === "char" || mode === "char-collector") {
1075
+ const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1076
+ const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
1077
+ const counts = collectNonWords ? {
1078
+ words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
1079
+ nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
1080
+ total
1081
+ } : void 0;
1082
+ if (mode === "char") return {
1083
+ total,
1084
+ counts,
1085
+ breakdown: {
1086
+ mode,
1087
+ items: analyzed.map((chunk) => ({
1088
+ locale: chunk.locale,
1089
+ text: chunk.text,
1090
+ chars: chunk.chars,
1091
+ nonWords: chunk.nonWords
1092
+ }))
1093
+ }
1094
+ };
1095
+ return {
1096
+ total,
1097
+ counts,
1098
+ breakdown: {
1099
+ mode,
1100
+ items: aggregateCharsByLocale(analyzed).map((chunk) => ({
1101
+ locale: chunk.locale,
1102
+ chars: chunk.chars,
1103
+ nonWords: chunk.nonWords
1104
+ }))
1105
+ }
1106
+ };
1107
+ }
1108
+ const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
1109
+ const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
1110
+ const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
1111
+ if (!chunk.nonWords) return sum;
1112
+ return sum + getNonWordTotal(chunk.nonWords);
1113
+ }, 0) : 0;
1114
+ const total = analyzed.reduce((sum, chunk) => {
1115
+ let chunkTotal = chunk.words;
1116
+ if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
1117
+ return sum + chunkTotal;
1118
+ }, 0);
1119
+ const counts = collectNonWords ? {
1120
+ words: wordsTotal,
1121
+ nonWords: nonWordsTotal,
1122
+ total
1123
+ } : void 0;
1124
+ if (mode === "segments") return {
1125
+ total,
1126
+ counts,
1127
+ breakdown: {
1128
+ mode,
1129
+ items: analyzed.map((chunk) => ({
1130
+ locale: chunk.locale,
1131
+ text: chunk.text,
1132
+ words: chunk.words,
1133
+ segments: chunk.segments,
1134
+ nonWords: chunk.nonWords
1135
+ }))
1136
+ }
1137
+ };
1138
+ if (mode === "collector") return {
1139
+ total,
1140
+ counts,
1141
+ breakdown: {
1142
+ mode,
1143
+ items: aggregateByLocale(analyzed),
1144
+ nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
1145
+ }
1146
+ };
1147
+ return {
1148
+ total,
1149
+ counts,
1150
+ breakdown: {
1151
+ mode,
1152
+ items: analyzed.map((chunk) => ({
1153
+ locale: chunk.locale,
1154
+ text: chunk.text,
1155
+ words: chunk.words,
1156
+ nonWords: chunk.nonWords
1157
+ }))
1158
+ }
1159
+ };
1160
+ }
1161
+ function getNonWordTotal(nonWords) {
1162
+ return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
1163
+ }
1164
+ function collectNonWordsAggregate(analyzed, enabled) {
1165
+ if (!enabled) return;
1166
+ const collection = createNonWordCollection();
1167
+ for (const chunk of analyzed) {
1168
+ if (!chunk.nonWords) continue;
1169
+ mergeNonWordCollections(collection, chunk.nonWords);
1170
+ }
1171
+ return collection;
1172
+ }
1173
+
1174
+ //#endregion
1175
+ //#region src/wc/index.ts
1176
+ var wc_default = wordCounter;
1177
+
1178
+ //#endregion
1179
+ //#region src/markdown/section-count.ts
1180
+ function normalizeText(value) {
1181
+ if (value == null) return "";
1182
+ if (typeof value === "string") return value;
1183
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
1184
+ try {
1185
+ return JSON.stringify(value);
1186
+ } catch {
1187
+ return String(value);
1188
+ }
1189
+ }
1190
+ function buildPerKeyItems(data, mode, options) {
1191
+ if (!data || typeof data !== "object" || Array.isArray(data)) return [];
1192
+ return Object.entries(data).map(([key, value]) => {
1193
+ const valueText = normalizeText(value);
1194
+ return {
1195
+ name: key,
1196
+ source: "frontmatter",
1197
+ result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
1198
+ };
1199
+ });
1200
+ }
1201
+ function buildSingleItem(name, text, mode, options, source) {
1202
+ return [{
1203
+ name,
1204
+ source,
1205
+ result: wc_default(text, options)
1206
+ }];
1207
+ }
1208
+ function sumTotals(items) {
1209
+ return items.reduce((sum, item) => sum + item.result.total, 0);
1210
+ }
1211
+ function countSections(input, section, options = {}) {
1212
+ const mode = options.mode ?? "chunk";
1213
+ if (section === "all") {
1214
+ const result = wc_default(input, options);
1215
+ return {
1216
+ section,
1217
+ total: result.total,
1218
+ frontmatterType: null,
1219
+ items: [{
1220
+ name: "all",
1221
+ source: "content",
1222
+ result
1223
+ }]
1224
+ };
1225
+ }
1226
+ const parsed = parseMarkdown(input);
1227
+ const frontmatterText = parsed.frontmatter ?? "";
1228
+ const contentText = parsed.content ?? "";
1229
+ let items = [];
1230
+ if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
1231
+ else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
1232
+ else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
1233
+ else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
1234
+ else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
1235
+ return {
1236
+ section,
1237
+ total: sumTotals(items),
1238
+ frontmatterType: parsed.frontmatterType,
1239
+ items
1240
+ };
1241
+ }
1242
+
1243
+ //#endregion
1244
+ //#region src/cli/batch/aggregate.ts
1245
+ function stripCollectorSegmentsFromWordCounterResult(result) {
1246
+ if (result.breakdown.mode !== "collector") return;
1247
+ for (const item of result.breakdown.items) item.segments = [];
1248
+ }
1249
+ function stripCollectorSegmentsFromSectionedResult(result) {
1250
+ for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
1251
+ }
1252
+ function compactCollectorSegmentsInCountResult(result) {
1253
+ if ("section" in result) {
1254
+ stripCollectorSegmentsFromSectionedResult(result);
1255
+ return;
1256
+ }
1257
+ stripCollectorSegmentsFromWordCounterResult(result);
1258
+ }
1259
+
1260
+ //#endregion
1261
+ //#region src/cli/path/load.ts
1262
+ function isProbablyBinary(buffer) {
1263
+ if (buffer.length === 0) return false;
1264
+ const sampleSize = Math.min(buffer.length, 1024);
1265
+ let suspicious = 0;
1266
+ for (let index = 0; index < sampleSize; index += 1) {
1267
+ const byte = buffer[index] ?? 0;
1268
+ if (byte === 0) return true;
1269
+ if (byte === 9 || byte === 10 || byte === 13) continue;
1270
+ if (byte >= 32 && byte <= 126) continue;
1271
+ if (byte >= 128) continue;
1272
+ suspicious += 1;
1273
+ }
1274
+ return suspicious / sampleSize > .3;
1275
+ }
1276
+
1277
+ //#endregion
1278
+ //#region src/cli/batch/jobs/worker/count-worker.ts
1279
+ const config = workerData;
1280
+ if (!parentPort) throw new Error("Worker protocol init failed: missing parentPort.");
1281
+ parentPort.on("message", async (message) => {
1282
+ if (message.type === "shutdown") {
1283
+ parentPort?.close();
1284
+ return;
1285
+ }
1286
+ const path = message.path;
1287
+ let buffer;
1288
+ try {
1289
+ buffer = await readFile(path);
1290
+ } catch (error) {
1291
+ const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : void 0;
1292
+ const messageText = error instanceof Error ? error.message : String(error);
1293
+ if (code === "EMFILE" || code === "ENFILE") {
1294
+ const response = {
1295
+ type: "fatal",
1296
+ taskId: message.taskId,
1297
+ index: message.index,
1298
+ path,
1299
+ code,
1300
+ message: messageText
1301
+ };
1302
+ parentPort?.postMessage(response);
1303
+ return;
1304
+ }
1305
+ const response = {
1306
+ type: "result",
1307
+ taskId: message.taskId,
1308
+ index: message.index,
1309
+ payload: {
1310
+ kind: "skip",
1311
+ skip: {
1312
+ path,
1313
+ reason: `not readable: ${messageText}`
1314
+ }
1315
+ }
1316
+ };
1317
+ parentPort?.postMessage(response);
1318
+ return;
1319
+ }
1320
+ if (isProbablyBinary(buffer)) {
1321
+ const response = {
1322
+ type: "result",
1323
+ taskId: message.taskId,
1324
+ index: message.index,
1325
+ payload: {
1326
+ kind: "skip",
1327
+ skip: {
1328
+ path,
1329
+ reason: "binary file"
1330
+ }
1331
+ }
1332
+ };
1333
+ parentPort?.postMessage(response);
1334
+ return;
1335
+ }
1336
+ try {
1337
+ const content = buffer.toString("utf8");
1338
+ const result = config.section === "all" ? wc_default(content, config.wcOptions) : countSections(content, config.section, config.wcOptions);
1339
+ if (!config.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
1340
+ const response = {
1341
+ type: "result",
1342
+ taskId: message.taskId,
1343
+ index: message.index,
1344
+ payload: {
1345
+ kind: "file",
1346
+ file: {
1347
+ path,
1348
+ result
1349
+ }
1350
+ }
1351
+ };
1352
+ parentPort?.postMessage(response);
1353
+ } catch (error) {
1354
+ const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : void 0;
1355
+ const messageText = error instanceof Error ? error.message : String(error);
1356
+ const response = {
1357
+ type: "fatal",
1358
+ taskId: message.taskId,
1359
+ index: message.index,
1360
+ path,
1361
+ code,
1362
+ message: messageText
1363
+ };
1364
+ parentPort?.postMessage(response);
1365
+ }
1366
+ });
1367
+
1368
+ //#endregion
1369
+ export { };
1370
+ //# sourceMappingURL=count-worker.mjs.map