@dev-pi2pie/word-counter 0.1.3 → 0.1.4-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/dist/esm/bin.mjs +2033 -1656
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/worker/count-worker.mjs +1370 -0
- package/dist/esm/worker/count-worker.mjs.map +1 -0
- package/dist/esm/worker-pool.mjs +187 -0
- package/dist/esm/worker-pool.mjs.map +1 -0
- package/package.json +1 -1
package/dist/esm/bin.mjs
CHANGED
|
@@ -4,6 +4,7 @@ import { Command, Option } from "commander";
|
|
|
4
4
|
import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
|
|
5
5
|
import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
|
+
import os from "node:os";
|
|
7
8
|
import { readFile, readdir, stat } from "node:fs/promises";
|
|
8
9
|
import { parseDocument } from "yaml";
|
|
9
10
|
|
|
@@ -345,8 +346,14 @@ function collectPathValue(value, previous = []) {
|
|
|
345
346
|
function collectLatinHintValue(value, previous = []) {
|
|
346
347
|
return [...previous, value];
|
|
347
348
|
}
|
|
349
|
+
function parseJobsOption(value) {
|
|
350
|
+
if (!/^\d+$/.test(value)) throw new Error("`--jobs` must be an integer >= 1.");
|
|
351
|
+
const parsed = Number.parseInt(value, 10);
|
|
352
|
+
if (!Number.isSafeInteger(parsed) || parsed < 1) throw new Error("`--jobs` must be an integer >= 1.");
|
|
353
|
+
return parsed;
|
|
354
|
+
}
|
|
348
355
|
function configureProgramOptions(program, parseMode) {
|
|
349
|
-
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
356
|
+
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "concurrent file jobs in batch mode (default: 1; >1 enables worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print suggested max --jobs for current host and exit").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
350
357
|
}
|
|
351
358
|
|
|
352
359
|
//#endregion
|
|
@@ -421,7 +428,7 @@ var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
|
421
428
|
//#endregion
|
|
422
429
|
//#region src/cli/program/version-embedded.ts
|
|
423
430
|
var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
|
|
424
|
-
const EMBEDDED_PACKAGE_VERSION = "0.1.
|
|
431
|
+
const EMBEDDED_PACKAGE_VERSION = "0.1.4-canary.1";
|
|
425
432
|
|
|
426
433
|
//#endregion
|
|
427
434
|
//#region src/cli/program/version.ts
|
|
@@ -474,6 +481,51 @@ function getFormattedVersionLabel() {
|
|
|
474
481
|
return import_picocolors.default.bgBlack(import_picocolors.default.bold(import_picocolors.default.italic(` word-counter ${import_picocolors.default.cyanBright(`ver.${version}`)} `)));
|
|
475
482
|
}
|
|
476
483
|
|
|
484
|
+
//#endregion
|
|
485
|
+
//#region src/cli/batch/jobs/limits.ts
|
|
486
|
+
const DEFAULT_UV_THREADPOOL_SIZE = 4;
|
|
487
|
+
function parsePositiveInteger(value) {
|
|
488
|
+
if (!value) return;
|
|
489
|
+
const parsed = Number.parseInt(value, 10);
|
|
490
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return;
|
|
491
|
+
return parsed;
|
|
492
|
+
}
|
|
493
|
+
function resolveBatchJobsLimit(env = process.env) {
|
|
494
|
+
const cpuLimit = Math.max(1, os.availableParallelism());
|
|
495
|
+
const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
|
|
496
|
+
const ioLimit = Math.max(1, uvThreadpool * 2);
|
|
497
|
+
return {
|
|
498
|
+
suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
|
|
499
|
+
cpuLimit,
|
|
500
|
+
uvThreadpool,
|
|
501
|
+
ioLimit
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
function clampRequestedJobs(requestedJobs, limits) {
|
|
505
|
+
return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
|
|
506
|
+
}
|
|
507
|
+
function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
|
|
508
|
+
return [
|
|
509
|
+
`Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
|
|
510
|
+
`Running with --jobs=${effectiveJobs} as a safety cap.`,
|
|
511
|
+
`Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
|
|
512
|
+
].join(" ");
|
|
513
|
+
}
|
|
514
|
+
function isResourceLimitError(error) {
|
|
515
|
+
if (typeof error !== "object" || error === null) return false;
|
|
516
|
+
const code = "code" in error ? error.code : void 0;
|
|
517
|
+
return code === "EMFILE" || code === "ENFILE";
|
|
518
|
+
}
|
|
519
|
+
function createResourceLimitError(path, error, requestedJobs, limits) {
|
|
520
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
521
|
+
const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
|
|
522
|
+
return new Error([
|
|
523
|
+
`Resource limit reached while processing: ${path} (${code}: ${message}).`,
|
|
524
|
+
`Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
|
|
525
|
+
"Reduce --jobs or raise OS file descriptor limits before retrying."
|
|
526
|
+
].join(" "));
|
|
527
|
+
}
|
|
528
|
+
|
|
477
529
|
//#endregion
|
|
478
530
|
//#region src/utils/append-all.ts
|
|
479
531
|
function appendAll(target, source) {
|
|
@@ -481,1811 +533,2002 @@ function appendAll(target, source) {
|
|
|
481
533
|
}
|
|
482
534
|
|
|
483
535
|
//#endregion
|
|
484
|
-
//#region src/
|
|
485
|
-
function
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
if (
|
|
495
|
-
|
|
536
|
+
//#region src/markdown/toml/arrays.ts
|
|
537
|
+
function ensureArrayContainer(result, key) {
|
|
538
|
+
const existing = result[key];
|
|
539
|
+
if (Array.isArray(existing)) return existing;
|
|
540
|
+
const list = [];
|
|
541
|
+
result[key] = list;
|
|
542
|
+
return list;
|
|
543
|
+
}
|
|
544
|
+
function flattenArrayTables(result) {
|
|
545
|
+
for (const [key, value] of Object.entries(result)) {
|
|
546
|
+
if (!Array.isArray(value)) continue;
|
|
547
|
+
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
496
548
|
}
|
|
497
|
-
return suspicious / sampleSize > .3;
|
|
498
549
|
}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
550
|
+
|
|
551
|
+
//#endregion
|
|
552
|
+
//#region src/markdown/toml/keys.ts
|
|
553
|
+
function stripKeyQuotes(key) {
|
|
554
|
+
const trimmed = key.trim();
|
|
555
|
+
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
|
|
556
|
+
return trimmed;
|
|
557
|
+
}
|
|
558
|
+
function normalizeKeyPath(key) {
|
|
559
|
+
const trimmed = key.trim();
|
|
560
|
+
if (!trimmed) return null;
|
|
561
|
+
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
|
|
562
|
+
const unquoted = stripKeyQuotes(trimmed);
|
|
563
|
+
return unquoted ? unquoted : null;
|
|
564
|
+
}
|
|
565
|
+
const segments = trimmed.split(".").map((segment) => segment.trim());
|
|
566
|
+
if (segments.some((segment) => !segment)) return null;
|
|
567
|
+
return segments.join(".");
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
//#endregion
|
|
571
|
+
//#region src/markdown/toml/strings.ts
|
|
572
|
+
function stripInlineComment(line) {
|
|
573
|
+
let inString = null;
|
|
574
|
+
let escaped = false;
|
|
575
|
+
for (let i = 0; i < line.length; i += 1) {
|
|
576
|
+
const char = line[i] ?? "";
|
|
577
|
+
if (inString) {
|
|
578
|
+
if (escaped) {
|
|
579
|
+
escaped = false;
|
|
580
|
+
continue;
|
|
581
|
+
}
|
|
582
|
+
if (char === "\\" && inString === "double") {
|
|
583
|
+
escaped = true;
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
if (inString === "double" && char === "\"") {
|
|
587
|
+
inString = null;
|
|
588
|
+
continue;
|
|
589
|
+
}
|
|
590
|
+
if (inString === "single" && char === "'") {
|
|
591
|
+
inString = null;
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
512
594
|
continue;
|
|
513
595
|
}
|
|
514
|
-
if (
|
|
515
|
-
|
|
516
|
-
path: filePath,
|
|
517
|
-
reason: "binary file"
|
|
518
|
-
});
|
|
596
|
+
if (char === "\"") {
|
|
597
|
+
inString = "double";
|
|
519
598
|
continue;
|
|
520
599
|
}
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
}
|
|
600
|
+
if (char === "'") {
|
|
601
|
+
inString = "single";
|
|
602
|
+
continue;
|
|
603
|
+
}
|
|
604
|
+
if (char === "#") return line.slice(0, i).trimEnd();
|
|
525
605
|
}
|
|
526
|
-
return
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
606
|
+
return line;
|
|
607
|
+
}
|
|
608
|
+
function unescapeBasic(input) {
|
|
609
|
+
return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
|
|
610
|
+
}
|
|
611
|
+
function parseStringLiteral(value) {
|
|
612
|
+
if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
|
|
613
|
+
if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
|
|
614
|
+
if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
|
|
615
|
+
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
616
|
+
return null;
|
|
530
617
|
}
|
|
531
618
|
|
|
532
619
|
//#endregion
|
|
533
|
-
//#region src/
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
for (const entry of sortedEntries) {
|
|
561
|
-
const entryPath = resolve(directoryPath, entry.name);
|
|
562
|
-
if (entry.isFile()) {
|
|
563
|
-
if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
|
|
564
|
-
skipped.push({
|
|
565
|
-
path: entryPath,
|
|
566
|
-
reason: "extension excluded"
|
|
567
|
-
});
|
|
568
|
-
debug.emit("path.resolve.filter.excluded", {
|
|
569
|
-
path: entryPath,
|
|
570
|
-
reason: "extension excluded"
|
|
571
|
-
}, { verbosity: "verbose" });
|
|
572
|
-
stats.filterExcluded += 1;
|
|
620
|
+
//#region src/markdown/toml/values.ts
|
|
621
|
+
function parsePrimitive(raw) {
|
|
622
|
+
const value = raw.trim();
|
|
623
|
+
if (!value) return null;
|
|
624
|
+
const stringLiteral = parseStringLiteral(value);
|
|
625
|
+
if (stringLiteral !== null) return stringLiteral;
|
|
626
|
+
if (value === "true") return true;
|
|
627
|
+
if (value === "false") return false;
|
|
628
|
+
if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
|
|
629
|
+
if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
|
|
630
|
+
return value;
|
|
631
|
+
}
|
|
632
|
+
function parseArray(raw) {
|
|
633
|
+
const value = raw.trim();
|
|
634
|
+
if (!value.startsWith("[") || !value.endsWith("]")) return null;
|
|
635
|
+
const inner = value.slice(1, -1).trim();
|
|
636
|
+
if (!inner) return [];
|
|
637
|
+
const items = [];
|
|
638
|
+
let current = "";
|
|
639
|
+
let inString = null;
|
|
640
|
+
let escaped = false;
|
|
641
|
+
for (let i = 0; i < inner.length; i += 1) {
|
|
642
|
+
const char = inner[i] ?? "";
|
|
643
|
+
if (inString) {
|
|
644
|
+
current += char;
|
|
645
|
+
if (escaped) {
|
|
646
|
+
escaped = false;
|
|
573
647
|
continue;
|
|
574
648
|
}
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
if (recordRegexExcluded(entryPath)) {
|
|
578
|
-
debug.emit("path.resolve.regex.excluded", {
|
|
579
|
-
path: entryPath,
|
|
580
|
-
relativePath,
|
|
581
|
-
pattern: regexFilter.sourcePattern,
|
|
582
|
-
reason: "regex excluded"
|
|
583
|
-
}, { verbosity: "verbose" });
|
|
584
|
-
stats.regexExcluded += 1;
|
|
585
|
-
}
|
|
649
|
+
if (char === "\\" && inString === "double") {
|
|
650
|
+
escaped = true;
|
|
586
651
|
continue;
|
|
587
652
|
}
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
debug.emit("path.resolve.expand.include", {
|
|
591
|
-
path: entryPath,
|
|
592
|
-
source: "directory"
|
|
593
|
-
}, { verbosity: "verbose" });
|
|
653
|
+
if (inString === "double" && char === "\"") inString = null;
|
|
654
|
+
else if (inString === "single" && char === "'") inString = null;
|
|
594
655
|
continue;
|
|
595
656
|
}
|
|
596
|
-
if (
|
|
597
|
-
|
|
657
|
+
if (char === "\"") {
|
|
658
|
+
inString = "double";
|
|
659
|
+
current += char;
|
|
660
|
+
continue;
|
|
661
|
+
}
|
|
662
|
+
if (char === "'") {
|
|
663
|
+
inString = "single";
|
|
664
|
+
current += char;
|
|
665
|
+
continue;
|
|
666
|
+
}
|
|
667
|
+
if (char === ",") {
|
|
668
|
+
const item = parsePrimitive(current);
|
|
669
|
+
if (item === null) return null;
|
|
670
|
+
items.push(item);
|
|
671
|
+
current = "";
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
current += char;
|
|
598
675
|
}
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
return files;
|
|
676
|
+
const finalItem = parsePrimitive(current);
|
|
677
|
+
if (finalItem === null) return null;
|
|
678
|
+
items.push(finalItem);
|
|
679
|
+
return items;
|
|
604
680
|
}
|
|
605
|
-
|
|
606
|
-
const
|
|
607
|
-
|
|
608
|
-
const
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
regexExcludedPaths.delete(filePath);
|
|
632
|
-
if (resolvedFiles.has(filePath)) {
|
|
633
|
-
stats.dedupeDuplicates += 1;
|
|
634
|
-
debug.emit("path.resolve.dedupe.duplicate", {
|
|
635
|
-
path: filePath,
|
|
636
|
-
source: details.source,
|
|
637
|
-
input: details.input
|
|
638
|
-
}, { verbosity: "verbose" });
|
|
639
|
-
return;
|
|
681
|
+
function parseInlineTable(raw) {
|
|
682
|
+
const trimmed = raw.trim();
|
|
683
|
+
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
|
|
684
|
+
const inner = trimmed.slice(1, -1).trim();
|
|
685
|
+
if (!inner) return {};
|
|
686
|
+
const pairs = [];
|
|
687
|
+
let current = "";
|
|
688
|
+
let inString = null;
|
|
689
|
+
let escaped = false;
|
|
690
|
+
let bracketDepth = 0;
|
|
691
|
+
let braceDepth = 0;
|
|
692
|
+
for (let i = 0; i < inner.length; i += 1) {
|
|
693
|
+
const char = inner[i] ?? "";
|
|
694
|
+
if (inString) {
|
|
695
|
+
current += char;
|
|
696
|
+
if (escaped) {
|
|
697
|
+
escaped = false;
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
if (char === "\\" && inString === "double") {
|
|
701
|
+
escaped = true;
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
if (inString === "double" && char === "\"") inString = null;
|
|
705
|
+
else if (inString === "single" && char === "'") inString = null;
|
|
706
|
+
continue;
|
|
640
707
|
}
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
path: filePath,
|
|
645
|
-
source: details.source,
|
|
646
|
-
input: details.input
|
|
647
|
-
}, { verbosity: "verbose" });
|
|
648
|
-
};
|
|
649
|
-
const getRegexFilter = () => {
|
|
650
|
-
if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
|
|
651
|
-
return regexFilter;
|
|
652
|
-
};
|
|
653
|
-
const recordRegexExcluded = (filePath) => {
|
|
654
|
-
if (resolvedFiles.has(filePath)) return false;
|
|
655
|
-
regexExcludedPaths.add(filePath);
|
|
656
|
-
return true;
|
|
657
|
-
};
|
|
658
|
-
for (const rawPath of pathInputs) {
|
|
659
|
-
const targetPath = resolve(rawPath);
|
|
660
|
-
debug.emit("path.resolve.input", {
|
|
661
|
-
rawPath,
|
|
662
|
-
resolvedPath: targetPath
|
|
663
|
-
});
|
|
664
|
-
let metadata;
|
|
665
|
-
try {
|
|
666
|
-
metadata = await stat(targetPath);
|
|
667
|
-
} catch (error) {
|
|
668
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
669
|
-
skipped.push({
|
|
670
|
-
path: targetPath,
|
|
671
|
-
reason: `not readable: ${message}`
|
|
672
|
-
});
|
|
673
|
-
debug.emit("path.resolve.skip", {
|
|
674
|
-
path: targetPath,
|
|
675
|
-
reason: `not readable: ${message}`
|
|
676
|
-
});
|
|
708
|
+
if (char === "\"") {
|
|
709
|
+
inString = "double";
|
|
710
|
+
current += char;
|
|
677
711
|
continue;
|
|
678
712
|
}
|
|
679
|
-
if (
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
root: targetPath,
|
|
683
|
-
recursive: options.recursive,
|
|
684
|
-
regex: effectiveRegexFilter.sourcePattern ?? null
|
|
685
|
-
});
|
|
686
|
-
const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
|
|
687
|
-
for (const file of files) addResolvedFile(file, {
|
|
688
|
-
source: "directory",
|
|
689
|
-
input: targetPath
|
|
690
|
-
});
|
|
713
|
+
if (char === "'") {
|
|
714
|
+
inString = "single";
|
|
715
|
+
current += char;
|
|
691
716
|
continue;
|
|
692
717
|
}
|
|
693
|
-
if (
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
reason: "not a regular file"
|
|
697
|
-
});
|
|
698
|
-
debug.emit("path.resolve.skip", {
|
|
699
|
-
path: targetPath,
|
|
700
|
-
reason: "not a regular file"
|
|
701
|
-
});
|
|
718
|
+
if (char === "[") {
|
|
719
|
+
bracketDepth += 1;
|
|
720
|
+
current += char;
|
|
702
721
|
continue;
|
|
703
722
|
}
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
723
|
+
if (char === "]") {
|
|
724
|
+
if (bracketDepth > 0) bracketDepth -= 1;
|
|
725
|
+
current += char;
|
|
726
|
+
continue;
|
|
727
|
+
}
|
|
728
|
+
if (char === "{") {
|
|
729
|
+
braceDepth += 1;
|
|
730
|
+
current += char;
|
|
731
|
+
continue;
|
|
732
|
+
}
|
|
733
|
+
if (char === "}") {
|
|
734
|
+
if (braceDepth > 0) braceDepth -= 1;
|
|
735
|
+
current += char;
|
|
736
|
+
continue;
|
|
737
|
+
}
|
|
738
|
+
if (char === "," && bracketDepth === 0 && braceDepth === 0) {
|
|
739
|
+
pairs.push(current);
|
|
740
|
+
current = "";
|
|
741
|
+
continue;
|
|
742
|
+
}
|
|
743
|
+
current += char;
|
|
708
744
|
}
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
files: files.length,
|
|
726
|
-
skipped: skipped.length,
|
|
727
|
-
ordering: "absolute-path-ascending"
|
|
728
|
-
});
|
|
729
|
-
return {
|
|
730
|
-
files,
|
|
731
|
-
skipped
|
|
732
|
-
};
|
|
733
|
-
}
|
|
734
|
-
|
|
735
|
-
//#endregion
|
|
736
|
-
//#region src/cli/progress/reporter.ts
|
|
737
|
-
const PROGRESS_BAR_WIDTH = 20;
|
|
738
|
-
const FILLED_BAR_CHAR = "█";
|
|
739
|
-
const EMPTY_BAR_CHAR = "░";
|
|
740
|
-
function clamp(value, min, max) {
|
|
741
|
-
return Math.max(min, Math.min(max, value));
|
|
742
|
-
}
|
|
743
|
-
function buildProgressBar(completed, total) {
|
|
744
|
-
const safeTotal = Math.max(total, 1);
|
|
745
|
-
const ratio = clamp(completed / safeTotal, 0, 1);
|
|
746
|
-
const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
|
|
747
|
-
const empty = PROGRESS_BAR_WIDTH - filled;
|
|
748
|
-
return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
|
|
749
|
-
}
|
|
750
|
-
function formatElapsed(startedAtMs) {
|
|
751
|
-
const elapsedMs = Date.now() - startedAtMs;
|
|
752
|
-
const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
|
|
753
|
-
const minutes = Math.floor(totalSeconds / 60);
|
|
754
|
-
const seconds = totalSeconds % 60;
|
|
755
|
-
const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
|
|
756
|
-
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
|
|
745
|
+
if (current.trim()) pairs.push(current);
|
|
746
|
+
const output = {};
|
|
747
|
+
for (const pair of pairs) {
|
|
748
|
+
const separatorIndex = pair.indexOf("=");
|
|
749
|
+
if (separatorIndex === -1) return null;
|
|
750
|
+
const key = normalizeKeyPath(pair.slice(0, separatorIndex));
|
|
751
|
+
if (!key) return null;
|
|
752
|
+
const valueRaw = pair.slice(separatorIndex + 1).trim();
|
|
753
|
+
if (!valueRaw) return null;
|
|
754
|
+
if (valueRaw.startsWith("{")) return null;
|
|
755
|
+
const normalized = normalizeValue(valueRaw);
|
|
756
|
+
if (normalized === null) return null;
|
|
757
|
+
if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
|
|
758
|
+
output[key] = normalized;
|
|
759
|
+
}
|
|
760
|
+
return output;
|
|
757
761
|
}
|
|
758
|
-
function
|
|
759
|
-
|
|
760
|
-
const
|
|
761
|
-
|
|
762
|
+
function normalizeValue(value) {
|
|
763
|
+
if (!value) return null;
|
|
764
|
+
const trimmed = value.trim();
|
|
765
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
|
|
766
|
+
const array = parseArray(trimmed);
|
|
767
|
+
if (array) return array;
|
|
768
|
+
if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
|
|
769
|
+
return parsePrimitive(trimmed);
|
|
762
770
|
}
|
|
763
|
-
function
|
|
764
|
-
|
|
771
|
+
function toPlainText(value) {
|
|
772
|
+
if (value == null) return "";
|
|
773
|
+
if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
|
|
774
|
+
return String(value);
|
|
765
775
|
}
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
let
|
|
773
|
-
let
|
|
774
|
-
let
|
|
775
|
-
let
|
|
776
|
-
|
|
777
|
-
const
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
776
|
+
|
|
777
|
+
//#endregion
|
|
778
|
+
//#region src/markdown/toml/parse-frontmatter.ts
|
|
779
|
+
function parseTomlFrontmatter(frontmatter) {
|
|
780
|
+
const result = {};
|
|
781
|
+
const lines = frontmatter.split("\n");
|
|
782
|
+
let tablePrefix = "";
|
|
783
|
+
let tableTarget = null;
|
|
784
|
+
let tablePrefixInList = false;
|
|
785
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
786
|
+
const rawLine = lines[index] ?? "";
|
|
787
|
+
const trimmedLine = rawLine.trim();
|
|
788
|
+
if (!trimmedLine || trimmedLine.startsWith("#")) continue;
|
|
789
|
+
if (trimmedLine.startsWith("[[")) {
|
|
790
|
+
const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
|
|
791
|
+
if (!match) return null;
|
|
792
|
+
const normalizedTable = normalizeKeyPath(match[1] ?? "");
|
|
793
|
+
if (!normalizedTable) return null;
|
|
794
|
+
const list = ensureArrayContainer(result, normalizedTable);
|
|
795
|
+
const newEntry = {};
|
|
796
|
+
list.push(newEntry);
|
|
797
|
+
tableTarget = newEntry;
|
|
798
|
+
tablePrefix = normalizedTable;
|
|
799
|
+
tablePrefixInList = true;
|
|
800
|
+
continue;
|
|
801
|
+
}
|
|
802
|
+
const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
|
|
803
|
+
if (tableMatch) {
|
|
804
|
+
const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
|
|
805
|
+
if (!normalizedTable) return null;
|
|
806
|
+
tablePrefix = normalizedTable;
|
|
807
|
+
tablePrefixInList = false;
|
|
808
|
+
tableTarget = null;
|
|
809
|
+
continue;
|
|
810
|
+
}
|
|
811
|
+
const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
|
|
812
|
+
const separatorIndex = lineForParsing.indexOf("=");
|
|
813
|
+
if (separatorIndex === -1) return null;
|
|
814
|
+
const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
|
|
815
|
+
let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
|
|
816
|
+
if (!key) return null;
|
|
817
|
+
const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
|
|
818
|
+
if (tripleDelimiter) {
|
|
819
|
+
const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
|
|
820
|
+
if (closingIndex !== -1) {
|
|
821
|
+
const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
|
|
822
|
+
valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
|
|
823
|
+
} else {
|
|
824
|
+
const delimiter = tripleDelimiter;
|
|
825
|
+
let combined = valueRaw;
|
|
826
|
+
let closed = false;
|
|
827
|
+
while (index + 1 < lines.length) {
|
|
828
|
+
index += 1;
|
|
829
|
+
const nextLine = lines[index] ?? "";
|
|
830
|
+
combined += `\n${nextLine}`;
|
|
831
|
+
if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
|
|
832
|
+
closed = true;
|
|
833
|
+
break;
|
|
834
|
+
}
|
|
823
835
|
}
|
|
824
|
-
|
|
825
|
-
|
|
836
|
+
if (!closed) return null;
|
|
837
|
+
valueRaw = combined;
|
|
826
838
|
}
|
|
827
|
-
lastLineLength = line.length;
|
|
828
|
-
options.stream.write(`${line}\n`);
|
|
829
|
-
},
|
|
830
|
-
finish() {
|
|
831
|
-
if (!active) return;
|
|
832
|
-
if (isTTY) if (clearOnFinish) clearLine();
|
|
833
|
-
else options.stream.write("\n");
|
|
834
|
-
active = false;
|
|
835
839
|
}
|
|
836
|
-
|
|
840
|
+
const normalized = normalizeValue(valueRaw);
|
|
841
|
+
if (normalized === null) return null;
|
|
842
|
+
const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
|
|
843
|
+
if (typeof normalized === "object" && !Array.isArray(normalized)) {
|
|
844
|
+
for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
|
|
845
|
+
const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
|
|
846
|
+
if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
|
|
847
|
+
else result[entryKey] = toPlainText(inlineValue);
|
|
848
|
+
}
|
|
849
|
+
continue;
|
|
850
|
+
}
|
|
851
|
+
if (tablePrefixInList && tableTarget) {
|
|
852
|
+
tableTarget[key] = toPlainText(normalized);
|
|
853
|
+
continue;
|
|
854
|
+
}
|
|
855
|
+
result[fullKey] = toPlainText(normalized);
|
|
856
|
+
}
|
|
857
|
+
flattenArrayTables(result);
|
|
858
|
+
return result;
|
|
837
859
|
}
|
|
838
860
|
|
|
839
861
|
//#endregion
|
|
840
|
-
//#region src/markdown/
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
862
|
+
//#region src/markdown/parse-markdown.ts
|
|
863
|
+
const FENCE_TO_TYPE = {
|
|
864
|
+
"---": "yaml",
|
|
865
|
+
"+++": "toml",
|
|
866
|
+
";;;": "json"
|
|
867
|
+
};
|
|
868
|
+
function normalizeNewlines(input) {
|
|
869
|
+
return input.replace(/\r\n/g, "\n");
|
|
847
870
|
}
|
|
848
|
-
function
|
|
849
|
-
|
|
850
|
-
if (!Array.isArray(value)) continue;
|
|
851
|
-
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
852
|
-
}
|
|
871
|
+
function stripBom(line) {
|
|
872
|
+
return line.startsWith("") ? line.slice(1) : line;
|
|
853
873
|
}
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
const trimmed = key.trim();
|
|
859
|
-
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
|
|
860
|
-
return trimmed;
|
|
874
|
+
function getFenceType(line) {
|
|
875
|
+
const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
|
|
876
|
+
if (!match) return null;
|
|
877
|
+
return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
|
|
861
878
|
}
|
|
862
|
-
function
|
|
863
|
-
|
|
864
|
-
if (
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
return
|
|
879
|
+
function parseFrontmatter(frontmatter, type) {
|
|
880
|
+
if (!type) return null;
|
|
881
|
+
if (type === "json") try {
|
|
882
|
+
return JSON.parse(frontmatter);
|
|
883
|
+
} catch {
|
|
884
|
+
return null;
|
|
868
885
|
}
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
886
|
+
if (type === "yaml") {
|
|
887
|
+
const doc = parseDocument(frontmatter, { prettyErrors: false });
|
|
888
|
+
if (doc.errors.length > 0) return null;
|
|
889
|
+
const data = doc.toJSON();
|
|
890
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return null;
|
|
891
|
+
return data;
|
|
892
|
+
}
|
|
893
|
+
if (type === "toml") return parseTomlFrontmatter(frontmatter);
|
|
894
|
+
return null;
|
|
872
895
|
}
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
function stripInlineComment(line) {
|
|
877
|
-
let inString = null;
|
|
896
|
+
function extractJsonBlock(text, startIndex) {
|
|
897
|
+
let depth = 0;
|
|
898
|
+
let inString = false;
|
|
878
899
|
let escaped = false;
|
|
879
|
-
for (let i =
|
|
880
|
-
const char =
|
|
900
|
+
for (let i = startIndex; i < text.length; i += 1) {
|
|
901
|
+
const char = text[i] ?? "";
|
|
881
902
|
if (inString) {
|
|
882
903
|
if (escaped) {
|
|
883
904
|
escaped = false;
|
|
884
905
|
continue;
|
|
885
906
|
}
|
|
886
|
-
if (char === "\\"
|
|
907
|
+
if (char === "\\") {
|
|
887
908
|
escaped = true;
|
|
888
909
|
continue;
|
|
889
910
|
}
|
|
890
|
-
if (
|
|
891
|
-
inString = null;
|
|
892
|
-
continue;
|
|
893
|
-
}
|
|
894
|
-
if (inString === "single" && char === "'") {
|
|
895
|
-
inString = null;
|
|
896
|
-
continue;
|
|
897
|
-
}
|
|
911
|
+
if (char === "\"") inString = false;
|
|
898
912
|
continue;
|
|
899
913
|
}
|
|
900
914
|
if (char === "\"") {
|
|
901
|
-
inString =
|
|
915
|
+
inString = true;
|
|
902
916
|
continue;
|
|
903
917
|
}
|
|
904
|
-
if (char === "
|
|
905
|
-
|
|
918
|
+
if (char === "{") {
|
|
919
|
+
depth += 1;
|
|
906
920
|
continue;
|
|
907
921
|
}
|
|
908
|
-
if (char === "
|
|
922
|
+
if (char === "}") {
|
|
923
|
+
depth -= 1;
|
|
924
|
+
if (depth === 0) return {
|
|
925
|
+
jsonText: text.slice(startIndex, i + 1),
|
|
926
|
+
endIndex: i
|
|
927
|
+
};
|
|
928
|
+
}
|
|
909
929
|
}
|
|
910
|
-
return line;
|
|
911
|
-
}
|
|
912
|
-
function unescapeBasic(input) {
|
|
913
|
-
return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
|
|
914
|
-
}
|
|
915
|
-
function parseStringLiteral(value) {
|
|
916
|
-
if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
|
|
917
|
-
if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
|
|
918
|
-
if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
|
|
919
|
-
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
920
930
|
return null;
|
|
921
931
|
}
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
}
|
|
966
|
-
if (char === "'") {
|
|
967
|
-
inString = "single";
|
|
968
|
-
current += char;
|
|
969
|
-
continue;
|
|
970
|
-
}
|
|
971
|
-
if (char === ",") {
|
|
972
|
-
const item = parsePrimitive(current);
|
|
973
|
-
if (item === null) return null;
|
|
974
|
-
items.push(item);
|
|
975
|
-
current = "";
|
|
976
|
-
continue;
|
|
977
|
-
}
|
|
978
|
-
current += char;
|
|
932
|
+
function parseMarkdown(input) {
|
|
933
|
+
const normalized = normalizeNewlines(input);
|
|
934
|
+
const lines = normalized.split("\n");
|
|
935
|
+
if (lines.length === 0) return {
|
|
936
|
+
frontmatter: null,
|
|
937
|
+
content: normalized,
|
|
938
|
+
data: null,
|
|
939
|
+
frontmatterType: null
|
|
940
|
+
};
|
|
941
|
+
lines[0] = stripBom(lines[0] ?? "");
|
|
942
|
+
const normalizedWithoutBom = lines.join("\n");
|
|
943
|
+
const openingType = getFenceType(lines[0] ?? "");
|
|
944
|
+
if (!openingType) {
|
|
945
|
+
const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
|
|
946
|
+
if (normalizedWithoutBom[jsonStart] !== "{") return {
|
|
947
|
+
frontmatter: null,
|
|
948
|
+
content: normalizedWithoutBom,
|
|
949
|
+
data: null,
|
|
950
|
+
frontmatterType: null
|
|
951
|
+
};
|
|
952
|
+
const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
|
|
953
|
+
if (!jsonBlock) return {
|
|
954
|
+
frontmatter: null,
|
|
955
|
+
content: normalizedWithoutBom,
|
|
956
|
+
data: null,
|
|
957
|
+
frontmatterType: null
|
|
958
|
+
};
|
|
959
|
+
const frontmatter = jsonBlock.jsonText;
|
|
960
|
+
let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
|
|
961
|
+
if (content.startsWith("\n")) content = content.slice(1);
|
|
962
|
+
const data = parseFrontmatter(frontmatter, "json");
|
|
963
|
+
if (!data) return {
|
|
964
|
+
frontmatter: null,
|
|
965
|
+
content: normalizedWithoutBom,
|
|
966
|
+
data: null,
|
|
967
|
+
frontmatterType: null
|
|
968
|
+
};
|
|
969
|
+
return {
|
|
970
|
+
frontmatter,
|
|
971
|
+
content,
|
|
972
|
+
data,
|
|
973
|
+
frontmatterType: "json"
|
|
974
|
+
};
|
|
979
975
|
}
|
|
980
|
-
|
|
981
|
-
if (
|
|
982
|
-
|
|
983
|
-
|
|
976
|
+
let closingIndex = -1;
|
|
977
|
+
for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
|
|
978
|
+
closingIndex = i;
|
|
979
|
+
break;
|
|
980
|
+
}
|
|
981
|
+
if (closingIndex === -1) return {
|
|
982
|
+
frontmatter: null,
|
|
983
|
+
content: normalizedWithoutBom,
|
|
984
|
+
data: null,
|
|
985
|
+
frontmatterType: null
|
|
986
|
+
};
|
|
987
|
+
const frontmatter = lines.slice(1, closingIndex).join("\n");
|
|
988
|
+
return {
|
|
989
|
+
frontmatter,
|
|
990
|
+
content: lines.slice(closingIndex + 1).join("\n"),
|
|
991
|
+
data: parseFrontmatter(frontmatter, openingType),
|
|
992
|
+
frontmatterType: openingType
|
|
993
|
+
};
|
|
984
994
|
}
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
995
|
+
|
|
996
|
+
//#endregion
|
|
997
|
+
//#region src/wc/segmenter.ts
|
|
998
|
+
const segmenterCache = /* @__PURE__ */ new Map();
|
|
999
|
+
const graphemeSegmenterCache = /* @__PURE__ */ new Map();
|
|
1000
|
+
function getSegmenter(locale) {
|
|
1001
|
+
const cached = segmenterCache.get(locale);
|
|
1002
|
+
if (cached) return cached;
|
|
1003
|
+
const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
|
|
1004
|
+
segmenterCache.set(locale, segmenter);
|
|
1005
|
+
return segmenter;
|
|
1006
|
+
}
|
|
1007
|
+
function getGraphemeSegmenter(locale) {
|
|
1008
|
+
const cached = graphemeSegmenterCache.get(locale);
|
|
1009
|
+
if (cached) return cached;
|
|
1010
|
+
const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
|
|
1011
|
+
graphemeSegmenterCache.set(locale, segmenter);
|
|
1012
|
+
return segmenter;
|
|
1013
|
+
}
|
|
1014
|
+
function supportsSegmenter() {
|
|
1015
|
+
return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
|
|
1016
|
+
}
|
|
1017
|
+
function countCharsForLocale(text, locale) {
|
|
1018
|
+
if (!supportsSegmenter()) return Array.from(text).length;
|
|
1019
|
+
const segmenter = getGraphemeSegmenter(locale);
|
|
1020
|
+
let count = 0;
|
|
1021
|
+
for (const _segment of segmenter.segment(text)) count++;
|
|
1022
|
+
return count;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
//#endregion
|
|
1026
|
+
//#region src/wc/non-words.ts
|
|
1027
|
+
const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
|
|
1028
|
+
const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
|
|
1029
|
+
const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
|
|
1030
|
+
const symbolRegex = /\p{S}/u;
|
|
1031
|
+
const punctuationRegex = /\p{P}/u;
|
|
1032
|
+
const whitespaceRegex = /\s/u;
|
|
1033
|
+
const newlineChars = new Set([
|
|
1034
|
+
"\n",
|
|
1035
|
+
"\r",
|
|
1036
|
+
"\u2028",
|
|
1037
|
+
"\u2029"
|
|
1038
|
+
]);
|
|
1039
|
+
function createNonWordCollection() {
|
|
1040
|
+
return {
|
|
1041
|
+
emoji: [],
|
|
1042
|
+
symbols: [],
|
|
1043
|
+
punctuation: [],
|
|
1044
|
+
counts: {
|
|
1045
|
+
emoji: 0,
|
|
1046
|
+
symbols: 0,
|
|
1047
|
+
punctuation: 0
|
|
1026
1048
|
}
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1049
|
+
};
|
|
1050
|
+
}
|
|
1051
|
+
function addNonWord(collection, category, segment) {
|
|
1052
|
+
if (category === "emoji") {
|
|
1053
|
+
collection.emoji.push(segment);
|
|
1054
|
+
collection.counts.emoji += 1;
|
|
1055
|
+
return;
|
|
1056
|
+
}
|
|
1057
|
+
if (category === "symbol") {
|
|
1058
|
+
collection.symbols.push(segment);
|
|
1059
|
+
collection.counts.symbols += 1;
|
|
1060
|
+
return;
|
|
1061
|
+
}
|
|
1062
|
+
collection.punctuation.push(segment);
|
|
1063
|
+
collection.counts.punctuation += 1;
|
|
1064
|
+
}
|
|
1065
|
+
function addWhitespace(collection, segment) {
|
|
1066
|
+
let whitespace = collection.whitespace;
|
|
1067
|
+
let count = 0;
|
|
1068
|
+
for (const char of segment) {
|
|
1069
|
+
if (char === " ") {
|
|
1070
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1071
|
+
whitespace.spaces += 1;
|
|
1072
|
+
count += 1;
|
|
1030
1073
|
continue;
|
|
1031
1074
|
}
|
|
1032
|
-
if (char === "
|
|
1033
|
-
|
|
1034
|
-
|
|
1075
|
+
if (char === " ") {
|
|
1076
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1077
|
+
whitespace.tabs += 1;
|
|
1078
|
+
count += 1;
|
|
1035
1079
|
continue;
|
|
1036
1080
|
}
|
|
1037
|
-
if (char
|
|
1038
|
-
|
|
1039
|
-
|
|
1081
|
+
if (newlineChars.has(char)) {
|
|
1082
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1083
|
+
whitespace.newlines += 1;
|
|
1084
|
+
count += 1;
|
|
1040
1085
|
continue;
|
|
1041
1086
|
}
|
|
1042
|
-
if (char
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1087
|
+
if (whitespaceRegex.test(char)) {
|
|
1088
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1089
|
+
whitespace.other += 1;
|
|
1090
|
+
count += 1;
|
|
1046
1091
|
}
|
|
1047
|
-
current += char;
|
|
1048
1092
|
}
|
|
1049
|
-
if (
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
const separatorIndex = pair.indexOf("=");
|
|
1053
|
-
if (separatorIndex === -1) return null;
|
|
1054
|
-
const key = normalizeKeyPath(pair.slice(0, separatorIndex));
|
|
1055
|
-
if (!key) return null;
|
|
1056
|
-
const valueRaw = pair.slice(separatorIndex + 1).trim();
|
|
1057
|
-
if (!valueRaw) return null;
|
|
1058
|
-
if (valueRaw.startsWith("{")) return null;
|
|
1059
|
-
const normalized = normalizeValue(valueRaw);
|
|
1060
|
-
if (normalized === null) return null;
|
|
1061
|
-
if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
|
|
1062
|
-
output[key] = normalized;
|
|
1093
|
+
if (count > 0) {
|
|
1094
|
+
collection.whitespace = whitespace ?? createWhitespaceCounts();
|
|
1095
|
+
collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
|
|
1063
1096
|
}
|
|
1064
|
-
return
|
|
1065
|
-
}
|
|
1066
|
-
function normalizeValue(value) {
|
|
1067
|
-
if (!value) return null;
|
|
1068
|
-
const trimmed = value.trim();
|
|
1069
|
-
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
|
|
1070
|
-
const array = parseArray(trimmed);
|
|
1071
|
-
if (array) return array;
|
|
1072
|
-
if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
|
|
1073
|
-
return parsePrimitive(trimmed);
|
|
1097
|
+
return count;
|
|
1074
1098
|
}
|
|
1075
|
-
function
|
|
1076
|
-
|
|
1077
|
-
if (
|
|
1078
|
-
|
|
1099
|
+
function classifyNonWordSegment(segment) {
|
|
1100
|
+
const hasEmojiVariationSelector = segment.includes("️");
|
|
1101
|
+
if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
|
|
1102
|
+
if (symbolRegex.test(segment)) return "symbol";
|
|
1103
|
+
if (punctuationRegex.test(segment)) return "punctuation";
|
|
1104
|
+
return null;
|
|
1105
|
+
}
|
|
1106
|
+
function mergeNonWordCollections(target, source) {
|
|
1107
|
+
if (source.counts.emoji > 0) {
|
|
1108
|
+
appendAll(target.emoji, source.emoji);
|
|
1109
|
+
target.counts.emoji += source.counts.emoji;
|
|
1110
|
+
}
|
|
1111
|
+
if (source.counts.symbols > 0) {
|
|
1112
|
+
appendAll(target.symbols, source.symbols);
|
|
1113
|
+
target.counts.symbols += source.counts.symbols;
|
|
1114
|
+
}
|
|
1115
|
+
if (source.counts.punctuation > 0) {
|
|
1116
|
+
appendAll(target.punctuation, source.punctuation);
|
|
1117
|
+
target.counts.punctuation += source.counts.punctuation;
|
|
1118
|
+
}
|
|
1119
|
+
if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
|
|
1120
|
+
const whitespace = target.whitespace ?? createWhitespaceCounts();
|
|
1121
|
+
whitespace.spaces += source.whitespace.spaces;
|
|
1122
|
+
whitespace.tabs += source.whitespace.tabs;
|
|
1123
|
+
whitespace.newlines += source.whitespace.newlines;
|
|
1124
|
+
whitespace.other += source.whitespace.other;
|
|
1125
|
+
target.whitespace = whitespace;
|
|
1126
|
+
target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
|
|
1127
|
+
}
|
|
1128
|
+
return target;
|
|
1129
|
+
}
|
|
1130
|
+
function createWhitespaceCounts() {
|
|
1131
|
+
return {
|
|
1132
|
+
spaces: 0,
|
|
1133
|
+
tabs: 0,
|
|
1134
|
+
newlines: 0,
|
|
1135
|
+
other: 0
|
|
1136
|
+
};
|
|
1079
1137
|
}
|
|
1080
1138
|
|
|
1081
1139
|
//#endregion
|
|
1082
|
-
//#region src/
|
|
1083
|
-
function
|
|
1084
|
-
const
|
|
1085
|
-
const
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
const
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
tableTarget = null;
|
|
1140
|
+
//#region src/wc/analyze.ts
|
|
1141
|
+
function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
|
|
1142
|
+
const segmenter = getSegmenter(chunk.locale);
|
|
1143
|
+
const segments = [];
|
|
1144
|
+
const nonWords = collectNonWords ? createNonWordCollection() : null;
|
|
1145
|
+
for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
|
|
1146
|
+
else if (collectNonWords && nonWords) {
|
|
1147
|
+
if (includeWhitespace) addWhitespace(nonWords, part.segment);
|
|
1148
|
+
const category = classifyNonWordSegment(part.segment);
|
|
1149
|
+
if (category) addNonWord(nonWords, category, part.segment);
|
|
1150
|
+
}
|
|
1151
|
+
return {
|
|
1152
|
+
locale: chunk.locale,
|
|
1153
|
+
text: chunk.text,
|
|
1154
|
+
segments,
|
|
1155
|
+
words: segments.length,
|
|
1156
|
+
nonWords: nonWords ?? void 0
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
|
|
1160
|
+
const segmenter = getSegmenter(chunk.locale);
|
|
1161
|
+
const nonWords = collectNonWords ? createNonWordCollection() : null;
|
|
1162
|
+
let chars = 0;
|
|
1163
|
+
let wordChars = 0;
|
|
1164
|
+
let nonWordChars = 0;
|
|
1165
|
+
for (const part of segmenter.segment(chunk.text)) {
|
|
1166
|
+
if (part.isWordLike) {
|
|
1167
|
+
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
1168
|
+
chars += count;
|
|
1169
|
+
wordChars += count;
|
|
1113
1170
|
continue;
|
|
1114
1171
|
}
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
if (closingIndex !== -1) {
|
|
1125
|
-
const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
|
|
1126
|
-
valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
|
|
1127
|
-
} else {
|
|
1128
|
-
const delimiter = tripleDelimiter;
|
|
1129
|
-
let combined = valueRaw;
|
|
1130
|
-
let closed = false;
|
|
1131
|
-
while (index + 1 < lines.length) {
|
|
1132
|
-
index += 1;
|
|
1133
|
-
const nextLine = lines[index] ?? "";
|
|
1134
|
-
combined += `\n${nextLine}`;
|
|
1135
|
-
if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
|
|
1136
|
-
closed = true;
|
|
1137
|
-
break;
|
|
1138
|
-
}
|
|
1139
|
-
}
|
|
1140
|
-
if (!closed) return null;
|
|
1141
|
-
valueRaw = combined;
|
|
1172
|
+
if (collectNonWords && nonWords) {
|
|
1173
|
+
let whitespaceCount = 0;
|
|
1174
|
+
if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
|
|
1175
|
+
const category = classifyNonWordSegment(part.segment);
|
|
1176
|
+
if (category) addNonWord(nonWords, category, part.segment);
|
|
1177
|
+
if (category || whitespaceCount > 0) {
|
|
1178
|
+
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
1179
|
+
chars += count;
|
|
1180
|
+
nonWordChars += count;
|
|
1142
1181
|
}
|
|
1143
1182
|
}
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1183
|
+
}
|
|
1184
|
+
return {
|
|
1185
|
+
locale: chunk.locale,
|
|
1186
|
+
text: chunk.text,
|
|
1187
|
+
chars,
|
|
1188
|
+
wordChars,
|
|
1189
|
+
nonWordChars,
|
|
1190
|
+
nonWords: nonWords ?? void 0
|
|
1191
|
+
};
|
|
1192
|
+
}
|
|
1193
|
+
function aggregateCharsByLocale(chunks) {
|
|
1194
|
+
const order = [];
|
|
1195
|
+
const map = /* @__PURE__ */ new Map();
|
|
1196
|
+
for (const chunk of chunks) {
|
|
1197
|
+
const existing = map.get(chunk.locale);
|
|
1198
|
+
if (existing) {
|
|
1199
|
+
existing.chars += chunk.chars;
|
|
1200
|
+
existing.wordChars += chunk.wordChars;
|
|
1201
|
+
existing.nonWordChars += chunk.nonWordChars;
|
|
1202
|
+
if (chunk.nonWords) {
|
|
1203
|
+
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
1204
|
+
mergeNonWordCollections(existing.nonWords, chunk.nonWords);
|
|
1152
1205
|
}
|
|
1153
1206
|
continue;
|
|
1154
1207
|
}
|
|
1155
|
-
|
|
1156
|
-
|
|
1208
|
+
order.push(chunk.locale);
|
|
1209
|
+
map.set(chunk.locale, {
|
|
1210
|
+
locale: chunk.locale,
|
|
1211
|
+
chars: chunk.chars,
|
|
1212
|
+
wordChars: chunk.wordChars,
|
|
1213
|
+
nonWordChars: chunk.nonWordChars,
|
|
1214
|
+
nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
|
|
1215
|
+
});
|
|
1216
|
+
}
|
|
1217
|
+
return order.map((locale) => map.get(locale));
|
|
1218
|
+
}
|
|
1219
|
+
function aggregateByLocale(chunks) {
|
|
1220
|
+
const order = [];
|
|
1221
|
+
const map = /* @__PURE__ */ new Map();
|
|
1222
|
+
for (const chunk of chunks) {
|
|
1223
|
+
const existing = map.get(chunk.locale);
|
|
1224
|
+
if (existing) {
|
|
1225
|
+
existing.words += chunk.words;
|
|
1226
|
+
appendAll(existing.segments, chunk.segments);
|
|
1157
1227
|
continue;
|
|
1158
1228
|
}
|
|
1159
|
-
|
|
1229
|
+
order.push(chunk.locale);
|
|
1230
|
+
map.set(chunk.locale, {
|
|
1231
|
+
locale: chunk.locale,
|
|
1232
|
+
words: chunk.words,
|
|
1233
|
+
segments: [...chunk.segments]
|
|
1234
|
+
});
|
|
1160
1235
|
}
|
|
1161
|
-
|
|
1162
|
-
return result;
|
|
1236
|
+
return order.map((locale) => map.get(locale));
|
|
1163
1237
|
}
|
|
1164
1238
|
|
|
1165
1239
|
//#endregion
|
|
1166
|
-
//#region src/
|
|
1167
|
-
const
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1240
|
+
//#region src/wc/mode.ts
|
|
1241
|
+
const MODE_ALIASES = {
|
|
1242
|
+
chunk: "chunk",
|
|
1243
|
+
chunks: "chunk",
|
|
1244
|
+
segments: "segments",
|
|
1245
|
+
segment: "segments",
|
|
1246
|
+
seg: "segments",
|
|
1247
|
+
collector: "collector",
|
|
1248
|
+
collect: "collector",
|
|
1249
|
+
colle: "collector",
|
|
1250
|
+
char: "char",
|
|
1251
|
+
chars: "char",
|
|
1252
|
+
character: "char",
|
|
1253
|
+
characters: "char",
|
|
1254
|
+
"char-collector": "char-collector"
|
|
1171
1255
|
};
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1256
|
+
const CHAR_MODE_ALIASES = new Set([
|
|
1257
|
+
"char",
|
|
1258
|
+
"chars",
|
|
1259
|
+
"character",
|
|
1260
|
+
"characters"
|
|
1261
|
+
]);
|
|
1262
|
+
const COLLECTOR_MODE_ALIASES = new Set([
|
|
1263
|
+
"collector",
|
|
1264
|
+
"collect",
|
|
1265
|
+
"colle",
|
|
1266
|
+
"col"
|
|
1267
|
+
]);
|
|
1268
|
+
function collapseSeparators(value) {
|
|
1269
|
+
return value.replace(/[-_\s]+/g, "");
|
|
1177
1270
|
}
|
|
1178
|
-
function
|
|
1179
|
-
const
|
|
1180
|
-
if (
|
|
1181
|
-
|
|
1271
|
+
function isComposedCharCollectorFromTokens(value) {
|
|
1272
|
+
const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
|
|
1273
|
+
if (tokens.length < 2) return false;
|
|
1274
|
+
let hasCharAlias = false;
|
|
1275
|
+
let hasCollectorAlias = false;
|
|
1276
|
+
for (const token of tokens) {
|
|
1277
|
+
if (CHAR_MODE_ALIASES.has(token)) {
|
|
1278
|
+
hasCharAlias = true;
|
|
1279
|
+
continue;
|
|
1280
|
+
}
|
|
1281
|
+
if (COLLECTOR_MODE_ALIASES.has(token)) {
|
|
1282
|
+
hasCollectorAlias = true;
|
|
1283
|
+
continue;
|
|
1284
|
+
}
|
|
1285
|
+
return false;
|
|
1286
|
+
}
|
|
1287
|
+
return hasCharAlias && hasCollectorAlias;
|
|
1182
1288
|
}
|
|
1183
|
-
function
|
|
1184
|
-
if (
|
|
1185
|
-
|
|
1186
|
-
return JSON.parse(frontmatter);
|
|
1187
|
-
} catch {
|
|
1188
|
-
return null;
|
|
1189
|
-
}
|
|
1190
|
-
if (type === "yaml") {
|
|
1191
|
-
const doc = parseDocument(frontmatter, { prettyErrors: false });
|
|
1192
|
-
if (doc.errors.length > 0) return null;
|
|
1193
|
-
const data = doc.toJSON();
|
|
1194
|
-
if (!data || typeof data !== "object" || Array.isArray(data)) return null;
|
|
1195
|
-
return data;
|
|
1196
|
-
}
|
|
1197
|
-
if (type === "toml") return parseTomlFrontmatter(frontmatter);
|
|
1198
|
-
return null;
|
|
1289
|
+
function isComposedCharCollectorCompact(value) {
|
|
1290
|
+
for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
|
|
1291
|
+
return false;
|
|
1199
1292
|
}
|
|
1200
|
-
function
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
continue;
|
|
1210
|
-
}
|
|
1211
|
-
if (char === "\\") {
|
|
1212
|
-
escaped = true;
|
|
1213
|
-
continue;
|
|
1214
|
-
}
|
|
1215
|
-
if (char === "\"") inString = false;
|
|
1216
|
-
continue;
|
|
1217
|
-
}
|
|
1218
|
-
if (char === "\"") {
|
|
1219
|
-
inString = true;
|
|
1220
|
-
continue;
|
|
1221
|
-
}
|
|
1222
|
-
if (char === "{") {
|
|
1223
|
-
depth += 1;
|
|
1224
|
-
continue;
|
|
1225
|
-
}
|
|
1226
|
-
if (char === "}") {
|
|
1227
|
-
depth -= 1;
|
|
1228
|
-
if (depth === 0) return {
|
|
1229
|
-
jsonText: text.slice(startIndex, i + 1),
|
|
1230
|
-
endIndex: i
|
|
1231
|
-
};
|
|
1232
|
-
}
|
|
1233
|
-
}
|
|
1234
|
-
return null;
|
|
1293
|
+
function normalizeMode(input) {
|
|
1294
|
+
if (!input) return null;
|
|
1295
|
+
const normalized = input.trim().toLowerCase();
|
|
1296
|
+
const direct = MODE_ALIASES[normalized];
|
|
1297
|
+
if (direct) return direct;
|
|
1298
|
+
if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
|
|
1299
|
+
const compact = collapseSeparators(normalized);
|
|
1300
|
+
if (isComposedCharCollectorCompact(compact)) return "char-collector";
|
|
1301
|
+
return MODE_ALIASES[compact] ?? null;
|
|
1235
1302
|
}
|
|
1236
|
-
function
|
|
1237
|
-
|
|
1238
|
-
const lines = normalized.split("\n");
|
|
1239
|
-
if (lines.length === 0) return {
|
|
1240
|
-
frontmatter: null,
|
|
1241
|
-
content: normalized,
|
|
1242
|
-
data: null,
|
|
1243
|
-
frontmatterType: null
|
|
1244
|
-
};
|
|
1245
|
-
lines[0] = stripBom(lines[0] ?? "");
|
|
1246
|
-
const normalizedWithoutBom = lines.join("\n");
|
|
1247
|
-
const openingType = getFenceType(lines[0] ?? "");
|
|
1248
|
-
if (!openingType) {
|
|
1249
|
-
const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
|
|
1250
|
-
if (normalizedWithoutBom[jsonStart] !== "{") return {
|
|
1251
|
-
frontmatter: null,
|
|
1252
|
-
content: normalizedWithoutBom,
|
|
1253
|
-
data: null,
|
|
1254
|
-
frontmatterType: null
|
|
1255
|
-
};
|
|
1256
|
-
const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
|
|
1257
|
-
if (!jsonBlock) return {
|
|
1258
|
-
frontmatter: null,
|
|
1259
|
-
content: normalizedWithoutBom,
|
|
1260
|
-
data: null,
|
|
1261
|
-
frontmatterType: null
|
|
1262
|
-
};
|
|
1263
|
-
const frontmatter = jsonBlock.jsonText;
|
|
1264
|
-
let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
|
|
1265
|
-
if (content.startsWith("\n")) content = content.slice(1);
|
|
1266
|
-
const data = parseFrontmatter(frontmatter, "json");
|
|
1267
|
-
if (!data) return {
|
|
1268
|
-
frontmatter: null,
|
|
1269
|
-
content: normalizedWithoutBom,
|
|
1270
|
-
data: null,
|
|
1271
|
-
frontmatterType: null
|
|
1272
|
-
};
|
|
1273
|
-
return {
|
|
1274
|
-
frontmatter,
|
|
1275
|
-
content,
|
|
1276
|
-
data,
|
|
1277
|
-
frontmatterType: "json"
|
|
1278
|
-
};
|
|
1279
|
-
}
|
|
1280
|
-
let closingIndex = -1;
|
|
1281
|
-
for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
|
|
1282
|
-
closingIndex = i;
|
|
1283
|
-
break;
|
|
1284
|
-
}
|
|
1285
|
-
if (closingIndex === -1) return {
|
|
1286
|
-
frontmatter: null,
|
|
1287
|
-
content: normalizedWithoutBom,
|
|
1288
|
-
data: null,
|
|
1289
|
-
frontmatterType: null
|
|
1290
|
-
};
|
|
1291
|
-
const frontmatter = lines.slice(1, closingIndex).join("\n");
|
|
1292
|
-
return {
|
|
1293
|
-
frontmatter,
|
|
1294
|
-
content: lines.slice(closingIndex + 1).join("\n"),
|
|
1295
|
-
data: parseFrontmatter(frontmatter, openingType),
|
|
1296
|
-
frontmatterType: openingType
|
|
1297
|
-
};
|
|
1303
|
+
function resolveMode(input, fallback = "chunk") {
|
|
1304
|
+
return normalizeMode(input) ?? fallback;
|
|
1298
1305
|
}
|
|
1299
1306
|
|
|
1300
1307
|
//#endregion
|
|
1301
|
-
//#region src/wc/
|
|
1302
|
-
const
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1308
|
+
//#region src/wc/latin-hints.ts
|
|
1309
|
+
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
1310
|
+
{
|
|
1311
|
+
tag: "de",
|
|
1312
|
+
pattern: "[äöüÄÖÜß]"
|
|
1313
|
+
},
|
|
1314
|
+
{
|
|
1315
|
+
tag: "es",
|
|
1316
|
+
pattern: "[ñÑ¿¡]"
|
|
1317
|
+
},
|
|
1318
|
+
{
|
|
1319
|
+
tag: "pt",
|
|
1320
|
+
pattern: "[ãõÃÕ]"
|
|
1321
|
+
},
|
|
1322
|
+
{
|
|
1323
|
+
tag: "fr",
|
|
1324
|
+
pattern: "[œŒæÆ]"
|
|
1325
|
+
},
|
|
1326
|
+
{
|
|
1327
|
+
tag: "pl",
|
|
1328
|
+
pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
|
|
1329
|
+
},
|
|
1330
|
+
{
|
|
1331
|
+
tag: "tr",
|
|
1332
|
+
pattern: "[ıİğĞşŞ]"
|
|
1333
|
+
},
|
|
1334
|
+
{
|
|
1335
|
+
tag: "ro",
|
|
1336
|
+
pattern: "[ăĂâÂîÎșȘțȚ]"
|
|
1337
|
+
},
|
|
1338
|
+
{
|
|
1339
|
+
tag: "hu",
|
|
1340
|
+
pattern: "[őŐűŰ]"
|
|
1341
|
+
},
|
|
1342
|
+
{
|
|
1343
|
+
tag: "is",
|
|
1344
|
+
pattern: "[ðÐþÞ]"
|
|
1345
|
+
}
|
|
1346
|
+
];
|
|
1347
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
1348
|
+
|
|
1349
|
+
//#endregion
|
|
1350
|
+
//#region src/wc/locale-detect.ts
|
|
1351
|
+
const DEFAULT_LOCALE = "und-Latn";
|
|
1352
|
+
const DEFAULT_HAN_TAG = "und-Hani";
|
|
1353
|
+
const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
|
|
1354
|
+
const regex = {
|
|
1355
|
+
hiragana: /\p{Script=Hiragana}/u,
|
|
1356
|
+
katakana: /\p{Script=Katakana}/u,
|
|
1357
|
+
hangul: /\p{Script=Hangul}/u,
|
|
1358
|
+
han: /\p{Script=Han}/u,
|
|
1359
|
+
latin: /\p{Script=Latin}/u,
|
|
1360
|
+
arabic: /\p{Script=Arabic}/u,
|
|
1361
|
+
cyrillic: /\p{Script=Cyrillic}/u,
|
|
1362
|
+
devanagari: /\p{Script=Devanagari}/u,
|
|
1363
|
+
thai: /\p{Script=Thai}/u
|
|
1364
|
+
};
|
|
1365
|
+
const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
|
|
1366
|
+
function isLatinLocale(locale, context) {
|
|
1367
|
+
if (context) return context.latinLocales.has(locale);
|
|
1368
|
+
return defaultLatinLocales.has(locale);
|
|
1310
1369
|
}
|
|
1311
|
-
function
|
|
1312
|
-
const
|
|
1313
|
-
if (
|
|
1314
|
-
const
|
|
1315
|
-
|
|
1316
|
-
|
|
1370
|
+
function resolveLatinHint(options) {
|
|
1371
|
+
const latinTagHint = options.latinTagHint?.trim();
|
|
1372
|
+
if (latinTagHint) return latinTagHint;
|
|
1373
|
+
const latinLanguageHint = options.latinLanguageHint?.trim();
|
|
1374
|
+
if (latinLanguageHint) return latinLanguageHint;
|
|
1375
|
+
const latinLocaleHint = options.latinLocaleHint?.trim();
|
|
1376
|
+
if (latinLocaleHint) return latinLocaleHint;
|
|
1317
1377
|
}
|
|
1318
|
-
function
|
|
1319
|
-
|
|
1378
|
+
function resolveHanHint(options) {
|
|
1379
|
+
const hanTagHint = options.hanTagHint?.trim();
|
|
1380
|
+
if (hanTagHint) return hanTagHint;
|
|
1381
|
+
const hanLanguageHint = options.hanLanguageHint?.trim();
|
|
1382
|
+
if (hanLanguageHint) return hanLanguageHint;
|
|
1320
1383
|
}
|
|
1321
|
-
function
|
|
1322
|
-
|
|
1323
|
-
const
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1384
|
+
function compileLatinHintPattern(pattern, label) {
|
|
1385
|
+
const source = typeof pattern === "string" ? pattern : pattern.source;
|
|
1386
|
+
const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
|
|
1387
|
+
const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
|
|
1388
|
+
if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
|
|
1389
|
+
if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
|
|
1390
|
+
try {
|
|
1391
|
+
return new RegExp(source, flags);
|
|
1392
|
+
} catch (error) {
|
|
1393
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1394
|
+
throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
|
|
1395
|
+
}
|
|
1327
1396
|
}
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
const
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
"\u2029"
|
|
1342
|
-
]);
|
|
1343
|
-
function createNonWordCollection() {
|
|
1344
|
-
return {
|
|
1345
|
-
emoji: [],
|
|
1346
|
-
symbols: [],
|
|
1347
|
-
punctuation: [],
|
|
1348
|
-
counts: {
|
|
1349
|
-
emoji: 0,
|
|
1350
|
-
symbols: 0,
|
|
1351
|
-
punctuation: 0
|
|
1352
|
-
}
|
|
1397
|
+
function normalizeLatinHintPriority(priority, label) {
|
|
1398
|
+
if (priority === void 0) return 0;
|
|
1399
|
+
if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
|
|
1400
|
+
return priority;
|
|
1401
|
+
}
|
|
1402
|
+
function compileLatinHintRule(rule, order, label) {
|
|
1403
|
+
const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
|
|
1404
|
+
if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
|
|
1405
|
+
return {
|
|
1406
|
+
tag,
|
|
1407
|
+
pattern: compileLatinHintPattern(rule.pattern, label),
|
|
1408
|
+
priority: normalizeLatinHintPriority(rule.priority, label),
|
|
1409
|
+
order
|
|
1353
1410
|
};
|
|
1354
1411
|
}
|
|
1355
|
-
function
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1412
|
+
function resolveLatinHintRules$1(options) {
|
|
1413
|
+
const useDefaultLatinHints = options.useDefaultLatinHints !== false;
|
|
1414
|
+
const customRules = options.latinHintRules ?? [];
|
|
1415
|
+
const combinedRules = [];
|
|
1416
|
+
for (let index = 0; index < customRules.length; index += 1) {
|
|
1417
|
+
const rule = customRules[index];
|
|
1418
|
+
if (!rule) continue;
|
|
1419
|
+
combinedRules.push({
|
|
1420
|
+
rule,
|
|
1421
|
+
label: `Invalid custom Latin hint rule at index ${index}`
|
|
1422
|
+
});
|
|
1360
1423
|
}
|
|
1361
|
-
if (
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1424
|
+
if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
|
|
1425
|
+
const rule = DEFAULT_LATIN_HINT_RULES[index];
|
|
1426
|
+
if (!rule) continue;
|
|
1427
|
+
combinedRules.push({
|
|
1428
|
+
rule,
|
|
1429
|
+
label: `Invalid default Latin hint rule at index ${index}`
|
|
1430
|
+
});
|
|
1365
1431
|
}
|
|
1366
|
-
|
|
1367
|
-
|
|
1432
|
+
const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
|
|
1433
|
+
resolvedRules.sort((left, right) => {
|
|
1434
|
+
if (left.priority !== right.priority) return right.priority - left.priority;
|
|
1435
|
+
return left.order - right.order;
|
|
1436
|
+
});
|
|
1437
|
+
return resolvedRules;
|
|
1368
1438
|
}
|
|
1369
|
-
function
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1439
|
+
function resolveLocaleDetectContext(options = {}) {
|
|
1440
|
+
const latinHint = resolveLatinHint(options);
|
|
1441
|
+
const latinHintRules = resolveLatinHintRules$1(options);
|
|
1442
|
+
const latinLocales = new Set([DEFAULT_LOCALE]);
|
|
1443
|
+
for (const rule of latinHintRules) latinLocales.add(rule.tag);
|
|
1444
|
+
if (latinHint) latinLocales.add(latinHint);
|
|
1445
|
+
return {
|
|
1446
|
+
latinHint,
|
|
1447
|
+
hanHint: resolveHanHint(options),
|
|
1448
|
+
latinHintRules,
|
|
1449
|
+
latinLocales
|
|
1450
|
+
};
|
|
1451
|
+
}
|
|
1452
|
+
function detectLatinLocale(char, context) {
|
|
1453
|
+
for (const hint of context.latinHintRules) {
|
|
1454
|
+
hint.pattern.lastIndex = 0;
|
|
1455
|
+
if (hint.pattern.test(char)) return hint.tag;
|
|
1456
|
+
}
|
|
1457
|
+
return DEFAULT_LOCALE;
|
|
1458
|
+
}
|
|
1459
|
+
function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
|
|
1460
|
+
if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
|
|
1461
|
+
if (regex.hangul.test(char)) return "ko";
|
|
1462
|
+
if (regex.arabic.test(char)) return "ar";
|
|
1463
|
+
if (regex.cyrillic.test(char)) return "ru";
|
|
1464
|
+
if (regex.devanagari.test(char)) return "hi";
|
|
1465
|
+
if (regex.thai.test(char)) return "th";
|
|
1466
|
+
if (regex.han.test(char)) {
|
|
1467
|
+
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
1468
|
+
return context.hanHint ?? DEFAULT_HAN_TAG;
|
|
1469
|
+
}
|
|
1470
|
+
if (regex.latin.test(char)) {
|
|
1471
|
+
const hintedLocale = detectLatinLocale(char, context);
|
|
1472
|
+
if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
|
|
1473
|
+
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
1474
|
+
if (context.latinHint) return context.latinHint;
|
|
1475
|
+
return DEFAULT_LOCALE;
|
|
1476
|
+
}
|
|
1477
|
+
return null;
|
|
1478
|
+
}
|
|
1479
|
+
|
|
1480
|
+
//#endregion
|
|
1481
|
+
//#region src/wc/segment.ts
|
|
1482
|
+
const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
|
|
1483
|
+
const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
|
|
1484
|
+
function segmentTextByLocale(text, options = {}) {
|
|
1485
|
+
const context = resolveLocaleDetectContext(options);
|
|
1486
|
+
const chunks = [];
|
|
1487
|
+
let currentLocale = DEFAULT_LOCALE;
|
|
1488
|
+
let buffer = "";
|
|
1489
|
+
let bufferHasScript = false;
|
|
1490
|
+
let sawCarryBoundary = false;
|
|
1491
|
+
const updateCarryBoundaryState = (detected, char) => {
|
|
1492
|
+
if (detected !== null) {
|
|
1493
|
+
sawCarryBoundary = false;
|
|
1494
|
+
return;
|
|
1378
1495
|
}
|
|
1379
|
-
if (char
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1496
|
+
if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
|
|
1497
|
+
};
|
|
1498
|
+
for (const char of text) {
|
|
1499
|
+
const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
|
|
1500
|
+
const targetLocale = detected ?? currentLocale;
|
|
1501
|
+
if (buffer === "") {
|
|
1502
|
+
currentLocale = targetLocale;
|
|
1503
|
+
buffer = char;
|
|
1504
|
+
bufferHasScript = detected !== null;
|
|
1505
|
+
updateCarryBoundaryState(detected, char);
|
|
1383
1506
|
continue;
|
|
1384
1507
|
}
|
|
1385
|
-
if (
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1508
|
+
if (detected !== null && !bufferHasScript) {
|
|
1509
|
+
currentLocale = targetLocale;
|
|
1510
|
+
buffer += char;
|
|
1511
|
+
bufferHasScript = true;
|
|
1512
|
+
updateCarryBoundaryState(detected, char);
|
|
1389
1513
|
continue;
|
|
1390
1514
|
}
|
|
1391
|
-
if (
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1515
|
+
if (targetLocale !== currentLocale && detected !== null) {
|
|
1516
|
+
if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
|
|
1517
|
+
const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
|
|
1518
|
+
if (promotionBreakIndex === -1) {
|
|
1519
|
+
currentLocale = targetLocale;
|
|
1520
|
+
buffer += char;
|
|
1521
|
+
bufferHasScript = true;
|
|
1522
|
+
updateCarryBoundaryState(detected, char);
|
|
1523
|
+
continue;
|
|
1524
|
+
}
|
|
1525
|
+
const prefix = buffer.slice(0, promotionBreakIndex + 1);
|
|
1526
|
+
const suffix = buffer.slice(promotionBreakIndex + 1);
|
|
1527
|
+
if (prefix.length > 0) chunks.push({
|
|
1528
|
+
locale: currentLocale,
|
|
1529
|
+
text: prefix
|
|
1530
|
+
});
|
|
1531
|
+
currentLocale = targetLocale;
|
|
1532
|
+
buffer = `${suffix}${char}`;
|
|
1533
|
+
bufferHasScript = true;
|
|
1534
|
+
updateCarryBoundaryState(detected, char);
|
|
1535
|
+
continue;
|
|
1536
|
+
}
|
|
1537
|
+
chunks.push({
|
|
1538
|
+
locale: currentLocale,
|
|
1539
|
+
text: buffer
|
|
1540
|
+
});
|
|
1541
|
+
currentLocale = targetLocale;
|
|
1542
|
+
buffer = char;
|
|
1543
|
+
bufferHasScript = true;
|
|
1544
|
+
updateCarryBoundaryState(detected, char);
|
|
1545
|
+
continue;
|
|
1395
1546
|
}
|
|
1547
|
+
buffer += char;
|
|
1548
|
+
if (detected !== null) bufferHasScript = true;
|
|
1549
|
+
updateCarryBoundaryState(detected, char);
|
|
1396
1550
|
}
|
|
1397
|
-
if (
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
}
|
|
1401
|
-
return
|
|
1402
|
-
}
|
|
1403
|
-
function classifyNonWordSegment(segment) {
|
|
1404
|
-
const hasEmojiVariationSelector = segment.includes("️");
|
|
1405
|
-
if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
|
|
1406
|
-
if (symbolRegex.test(segment)) return "symbol";
|
|
1407
|
-
if (punctuationRegex.test(segment)) return "punctuation";
|
|
1408
|
-
return null;
|
|
1551
|
+
if (buffer.length > 0) chunks.push({
|
|
1552
|
+
locale: currentLocale,
|
|
1553
|
+
text: buffer
|
|
1554
|
+
});
|
|
1555
|
+
return mergeAdjacentChunks(chunks);
|
|
1409
1556
|
}
|
|
1410
|
-
function
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
if (source.counts.symbols > 0) {
|
|
1416
|
-
appendAll(target.symbols, source.symbols);
|
|
1417
|
-
target.counts.symbols += source.counts.symbols;
|
|
1418
|
-
}
|
|
1419
|
-
if (source.counts.punctuation > 0) {
|
|
1420
|
-
appendAll(target.punctuation, source.punctuation);
|
|
1421
|
-
target.counts.punctuation += source.counts.punctuation;
|
|
1422
|
-
}
|
|
1423
|
-
if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
|
|
1424
|
-
const whitespace = target.whitespace ?? createWhitespaceCounts();
|
|
1425
|
-
whitespace.spaces += source.whitespace.spaces;
|
|
1426
|
-
whitespace.tabs += source.whitespace.tabs;
|
|
1427
|
-
whitespace.newlines += source.whitespace.newlines;
|
|
1428
|
-
whitespace.other += source.whitespace.other;
|
|
1429
|
-
target.whitespace = whitespace;
|
|
1430
|
-
target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
|
|
1557
|
+
function findLastLatinPromotionBreakIndex(buffer) {
|
|
1558
|
+
for (let index = buffer.length - 1; index >= 0; index -= 1) {
|
|
1559
|
+
const char = buffer[index];
|
|
1560
|
+
if (!char) continue;
|
|
1561
|
+
if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
|
|
1431
1562
|
}
|
|
1432
|
-
return
|
|
1563
|
+
return -1;
|
|
1433
1564
|
}
|
|
1434
|
-
function
|
|
1435
|
-
return
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1565
|
+
function mergeAdjacentChunks(chunks) {
|
|
1566
|
+
if (chunks.length === 0) return chunks;
|
|
1567
|
+
const merged = [];
|
|
1568
|
+
let last = chunks[0];
|
|
1569
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
1570
|
+
const chunk = chunks[i];
|
|
1571
|
+
if (chunk.locale === last.locale) last = {
|
|
1572
|
+
locale: last.locale,
|
|
1573
|
+
text: last.text + chunk.text
|
|
1574
|
+
};
|
|
1575
|
+
else {
|
|
1576
|
+
merged.push(last);
|
|
1577
|
+
last = chunk;
|
|
1578
|
+
}
|
|
1579
|
+
}
|
|
1580
|
+
merged.push(last);
|
|
1581
|
+
return merged;
|
|
1441
1582
|
}
|
|
1442
1583
|
|
|
1443
1584
|
//#endregion
|
|
1444
|
-
//#region src/wc/
|
|
1445
|
-
function
|
|
1446
|
-
const
|
|
1447
|
-
const
|
|
1448
|
-
const
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1585
|
+
//#region src/wc/wc.ts
|
|
1586
|
+
function wordCounter(text, options = {}) {
|
|
1587
|
+
const mode = resolveMode(options.mode, "chunk");
|
|
1588
|
+
const collectNonWords = Boolean(options.nonWords);
|
|
1589
|
+
const includeWhitespace = Boolean(options.includeWhitespace);
|
|
1590
|
+
const chunks = segmentTextByLocale(text, {
|
|
1591
|
+
latinLanguageHint: options.latinLanguageHint,
|
|
1592
|
+
latinTagHint: options.latinTagHint,
|
|
1593
|
+
latinLocaleHint: options.latinLocaleHint,
|
|
1594
|
+
latinHintRules: options.latinHintRules,
|
|
1595
|
+
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1596
|
+
hanLanguageHint: options.hanLanguageHint,
|
|
1597
|
+
hanTagHint: options.hanTagHint
|
|
1598
|
+
});
|
|
1599
|
+
if (mode === "char" || mode === "char-collector") {
|
|
1600
|
+
const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
|
|
1601
|
+
const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
|
|
1602
|
+
const counts = collectNonWords ? {
|
|
1603
|
+
words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
|
|
1604
|
+
nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
|
|
1605
|
+
total
|
|
1606
|
+
} : void 0;
|
|
1607
|
+
if (mode === "char") return {
|
|
1608
|
+
total,
|
|
1609
|
+
counts,
|
|
1610
|
+
breakdown: {
|
|
1611
|
+
mode,
|
|
1612
|
+
items: analyzed.map((chunk) => ({
|
|
1613
|
+
locale: chunk.locale,
|
|
1614
|
+
text: chunk.text,
|
|
1615
|
+
chars: chunk.chars,
|
|
1616
|
+
nonWords: chunk.nonWords
|
|
1617
|
+
}))
|
|
1618
|
+
}
|
|
1619
|
+
};
|
|
1620
|
+
return {
|
|
1621
|
+
total,
|
|
1622
|
+
counts,
|
|
1623
|
+
breakdown: {
|
|
1624
|
+
mode,
|
|
1625
|
+
items: aggregateCharsByLocale(analyzed).map((chunk) => ({
|
|
1626
|
+
locale: chunk.locale,
|
|
1627
|
+
chars: chunk.chars,
|
|
1628
|
+
nonWords: chunk.nonWords
|
|
1629
|
+
}))
|
|
1630
|
+
}
|
|
1631
|
+
};
|
|
1454
1632
|
}
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1633
|
+
const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
|
|
1634
|
+
const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
|
|
1635
|
+
const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
|
|
1636
|
+
if (!chunk.nonWords) return sum;
|
|
1637
|
+
return sum + getNonWordTotal(chunk.nonWords);
|
|
1638
|
+
}, 0) : 0;
|
|
1639
|
+
const total = analyzed.reduce((sum, chunk) => {
|
|
1640
|
+
let chunkTotal = chunk.words;
|
|
1641
|
+
if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
|
|
1642
|
+
return sum + chunkTotal;
|
|
1643
|
+
}, 0);
|
|
1644
|
+
const counts = collectNonWords ? {
|
|
1645
|
+
words: wordsTotal,
|
|
1646
|
+
nonWords: nonWordsTotal,
|
|
1647
|
+
total
|
|
1648
|
+
} : void 0;
|
|
1649
|
+
if (mode === "segments") return {
|
|
1650
|
+
total,
|
|
1651
|
+
counts,
|
|
1652
|
+
breakdown: {
|
|
1653
|
+
mode,
|
|
1654
|
+
items: analyzed.map((chunk) => ({
|
|
1655
|
+
locale: chunk.locale,
|
|
1656
|
+
text: chunk.text,
|
|
1657
|
+
words: chunk.words,
|
|
1658
|
+
segments: chunk.segments,
|
|
1659
|
+
nonWords: chunk.nonWords
|
|
1660
|
+
}))
|
|
1475
1661
|
}
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
nonWordChars += count;
|
|
1485
|
-
}
|
|
1662
|
+
};
|
|
1663
|
+
if (mode === "collector") return {
|
|
1664
|
+
total,
|
|
1665
|
+
counts,
|
|
1666
|
+
breakdown: {
|
|
1667
|
+
mode,
|
|
1668
|
+
items: aggregateByLocale(analyzed),
|
|
1669
|
+
nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
|
|
1486
1670
|
}
|
|
1487
|
-
}
|
|
1671
|
+
};
|
|
1488
1672
|
return {
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1673
|
+
total,
|
|
1674
|
+
counts,
|
|
1675
|
+
breakdown: {
|
|
1676
|
+
mode,
|
|
1677
|
+
items: analyzed.map((chunk) => ({
|
|
1678
|
+
locale: chunk.locale,
|
|
1679
|
+
text: chunk.text,
|
|
1680
|
+
words: chunk.words,
|
|
1681
|
+
nonWords: chunk.nonWords
|
|
1682
|
+
}))
|
|
1683
|
+
}
|
|
1495
1684
|
};
|
|
1496
1685
|
}
|
|
1497
|
-
function
|
|
1498
|
-
|
|
1499
|
-
const map = /* @__PURE__ */ new Map();
|
|
1500
|
-
for (const chunk of chunks) {
|
|
1501
|
-
const existing = map.get(chunk.locale);
|
|
1502
|
-
if (existing) {
|
|
1503
|
-
existing.chars += chunk.chars;
|
|
1504
|
-
existing.wordChars += chunk.wordChars;
|
|
1505
|
-
existing.nonWordChars += chunk.nonWordChars;
|
|
1506
|
-
if (chunk.nonWords) {
|
|
1507
|
-
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
1508
|
-
mergeNonWordCollections(existing.nonWords, chunk.nonWords);
|
|
1509
|
-
}
|
|
1510
|
-
continue;
|
|
1511
|
-
}
|
|
1512
|
-
order.push(chunk.locale);
|
|
1513
|
-
map.set(chunk.locale, {
|
|
1514
|
-
locale: chunk.locale,
|
|
1515
|
-
chars: chunk.chars,
|
|
1516
|
-
wordChars: chunk.wordChars,
|
|
1517
|
-
nonWordChars: chunk.nonWordChars,
|
|
1518
|
-
nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
|
|
1519
|
-
});
|
|
1520
|
-
}
|
|
1521
|
-
return order.map((locale) => map.get(locale));
|
|
1686
|
+
function getNonWordTotal(nonWords) {
|
|
1687
|
+
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
1522
1688
|
}
|
|
1523
|
-
function
|
|
1524
|
-
|
|
1525
|
-
const
|
|
1526
|
-
for (const chunk of
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
existing.words += chunk.words;
|
|
1530
|
-
appendAll(existing.segments, chunk.segments);
|
|
1531
|
-
continue;
|
|
1532
|
-
}
|
|
1533
|
-
order.push(chunk.locale);
|
|
1534
|
-
map.set(chunk.locale, {
|
|
1535
|
-
locale: chunk.locale,
|
|
1536
|
-
words: chunk.words,
|
|
1537
|
-
segments: [...chunk.segments]
|
|
1538
|
-
});
|
|
1689
|
+
function collectNonWordsAggregate(analyzed, enabled) {
|
|
1690
|
+
if (!enabled) return;
|
|
1691
|
+
const collection = createNonWordCollection();
|
|
1692
|
+
for (const chunk of analyzed) {
|
|
1693
|
+
if (!chunk.nonWords) continue;
|
|
1694
|
+
mergeNonWordCollections(collection, chunk.nonWords);
|
|
1539
1695
|
}
|
|
1540
|
-
return
|
|
1696
|
+
return collection;
|
|
1541
1697
|
}
|
|
1542
1698
|
|
|
1543
1699
|
//#endregion
|
|
1544
|
-
//#region src/wc/
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
characters: "char",
|
|
1558
|
-
"char-collector": "char-collector"
|
|
1559
|
-
};
|
|
1560
|
-
const CHAR_MODE_ALIASES = new Set([
|
|
1561
|
-
"char",
|
|
1562
|
-
"chars",
|
|
1563
|
-
"character",
|
|
1564
|
-
"characters"
|
|
1565
|
-
]);
|
|
1566
|
-
const COLLECTOR_MODE_ALIASES = new Set([
|
|
1567
|
-
"collector",
|
|
1568
|
-
"collect",
|
|
1569
|
-
"colle",
|
|
1570
|
-
"col"
|
|
1571
|
-
]);
|
|
1572
|
-
function collapseSeparators(value) {
|
|
1573
|
-
return value.replace(/[-_\s]+/g, "");
|
|
1574
|
-
}
|
|
1575
|
-
function isComposedCharCollectorFromTokens(value) {
|
|
1576
|
-
const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
|
|
1577
|
-
if (tokens.length < 2) return false;
|
|
1578
|
-
let hasCharAlias = false;
|
|
1579
|
-
let hasCollectorAlias = false;
|
|
1580
|
-
for (const token of tokens) {
|
|
1581
|
-
if (CHAR_MODE_ALIASES.has(token)) {
|
|
1582
|
-
hasCharAlias = true;
|
|
1583
|
-
continue;
|
|
1584
|
-
}
|
|
1585
|
-
if (COLLECTOR_MODE_ALIASES.has(token)) {
|
|
1586
|
-
hasCollectorAlias = true;
|
|
1587
|
-
continue;
|
|
1588
|
-
}
|
|
1589
|
-
return false;
|
|
1700
|
+
//#region src/wc/index.ts
|
|
1701
|
+
var wc_default = wordCounter;
|
|
1702
|
+
|
|
1703
|
+
//#endregion
|
|
1704
|
+
//#region src/markdown/section-count.ts
|
|
1705
|
+
function normalizeText(value) {
|
|
1706
|
+
if (value == null) return "";
|
|
1707
|
+
if (typeof value === "string") return value;
|
|
1708
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
1709
|
+
try {
|
|
1710
|
+
return JSON.stringify(value);
|
|
1711
|
+
} catch {
|
|
1712
|
+
return String(value);
|
|
1590
1713
|
}
|
|
1591
|
-
return hasCharAlias && hasCollectorAlias;
|
|
1592
1714
|
}
|
|
1593
|
-
function
|
|
1594
|
-
|
|
1595
|
-
return
|
|
1715
|
+
function buildPerKeyItems(data, mode, options) {
|
|
1716
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return [];
|
|
1717
|
+
return Object.entries(data).map(([key, value]) => {
|
|
1718
|
+
const valueText = normalizeText(value);
|
|
1719
|
+
return {
|
|
1720
|
+
name: key,
|
|
1721
|
+
source: "frontmatter",
|
|
1722
|
+
result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
|
|
1723
|
+
};
|
|
1724
|
+
});
|
|
1596
1725
|
}
|
|
1597
|
-
function
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
const compact = collapseSeparators(normalized);
|
|
1604
|
-
if (isComposedCharCollectorCompact(compact)) return "char-collector";
|
|
1605
|
-
return MODE_ALIASES[compact] ?? null;
|
|
1726
|
+
function buildSingleItem(name, text, mode, options, source) {
|
|
1727
|
+
return [{
|
|
1728
|
+
name,
|
|
1729
|
+
source,
|
|
1730
|
+
result: wc_default(text, options)
|
|
1731
|
+
}];
|
|
1606
1732
|
}
|
|
1607
|
-
function
|
|
1608
|
-
return
|
|
1733
|
+
function sumTotals(items) {
|
|
1734
|
+
return items.reduce((sum, item) => sum + item.result.total, 0);
|
|
1609
1735
|
}
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
const
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
pattern: "[ãõÃÕ]"
|
|
1625
|
-
},
|
|
1626
|
-
{
|
|
1627
|
-
tag: "fr",
|
|
1628
|
-
pattern: "[œŒæÆ]"
|
|
1629
|
-
},
|
|
1630
|
-
{
|
|
1631
|
-
tag: "pl",
|
|
1632
|
-
pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
|
|
1633
|
-
},
|
|
1634
|
-
{
|
|
1635
|
-
tag: "tr",
|
|
1636
|
-
pattern: "[ıİğĞşŞ]"
|
|
1637
|
-
},
|
|
1638
|
-
{
|
|
1639
|
-
tag: "ro",
|
|
1640
|
-
pattern: "[ăĂâÂîÎșȘțȚ]"
|
|
1641
|
-
},
|
|
1642
|
-
{
|
|
1643
|
-
tag: "hu",
|
|
1644
|
-
pattern: "[őŐűŰ]"
|
|
1645
|
-
},
|
|
1646
|
-
{
|
|
1647
|
-
tag: "is",
|
|
1648
|
-
pattern: "[ðÐþÞ]"
|
|
1736
|
+
function countSections(input, section, options = {}) {
|
|
1737
|
+
const mode = options.mode ?? "chunk";
|
|
1738
|
+
if (section === "all") {
|
|
1739
|
+
const result = wc_default(input, options);
|
|
1740
|
+
return {
|
|
1741
|
+
section,
|
|
1742
|
+
total: result.total,
|
|
1743
|
+
frontmatterType: null,
|
|
1744
|
+
items: [{
|
|
1745
|
+
name: "all",
|
|
1746
|
+
source: "content",
|
|
1747
|
+
result
|
|
1748
|
+
}]
|
|
1749
|
+
};
|
|
1649
1750
|
}
|
|
1650
|
-
|
|
1651
|
-
const
|
|
1751
|
+
const parsed = parseMarkdown(input);
|
|
1752
|
+
const frontmatterText = parsed.frontmatter ?? "";
|
|
1753
|
+
const contentText = parsed.content ?? "";
|
|
1754
|
+
let items = [];
|
|
1755
|
+
if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
|
|
1756
|
+
else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
|
|
1757
|
+
else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1758
|
+
else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
|
|
1759
|
+
else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1760
|
+
return {
|
|
1761
|
+
section,
|
|
1762
|
+
total: sumTotals(items),
|
|
1763
|
+
frontmatterType: parsed.frontmatterType,
|
|
1764
|
+
items
|
|
1765
|
+
};
|
|
1766
|
+
}
|
|
1652
1767
|
|
|
1653
1768
|
//#endregion
|
|
1654
|
-
//#region src/
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
const
|
|
1658
|
-
const
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
if (
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1769
|
+
//#region src/cli/batch/aggregate.ts
|
|
1770
|
+
function mergeWordCounterResult(left, right, preserveCollectorSegments) {
|
|
1771
|
+
if (left.breakdown.mode !== right.breakdown.mode) throw new Error("Cannot merge different breakdown modes.");
|
|
1772
|
+
const total = left.total + right.total;
|
|
1773
|
+
const counts = left.counts || right.counts ? {
|
|
1774
|
+
words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
|
|
1775
|
+
nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
|
|
1776
|
+
total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
|
|
1777
|
+
} : void 0;
|
|
1778
|
+
if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
|
|
1779
|
+
total,
|
|
1780
|
+
counts,
|
|
1781
|
+
breakdown: {
|
|
1782
|
+
mode: "chunk",
|
|
1783
|
+
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1784
|
+
}
|
|
1785
|
+
};
|
|
1786
|
+
if (left.breakdown.mode === "segments" && right.breakdown.mode === "segments") return {
|
|
1787
|
+
total,
|
|
1788
|
+
counts,
|
|
1789
|
+
breakdown: {
|
|
1790
|
+
mode: "segments",
|
|
1791
|
+
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1792
|
+
}
|
|
1793
|
+
};
|
|
1794
|
+
if (left.breakdown.mode === "char" && right.breakdown.mode === "char") return {
|
|
1795
|
+
total,
|
|
1796
|
+
counts,
|
|
1797
|
+
breakdown: {
|
|
1798
|
+
mode: "char",
|
|
1799
|
+
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1800
|
+
}
|
|
1801
|
+
};
|
|
1802
|
+
if (left.breakdown.mode === "char-collector" && right.breakdown.mode === "char-collector") {
|
|
1803
|
+
const localeOrder = [];
|
|
1804
|
+
const mergedByLocale = /* @__PURE__ */ new Map();
|
|
1805
|
+
const addItems = (items) => {
|
|
1806
|
+
for (const item of items) {
|
|
1807
|
+
const existing = mergedByLocale.get(item.locale);
|
|
1808
|
+
if (existing) {
|
|
1809
|
+
existing.chars += item.chars;
|
|
1810
|
+
if (item.nonWords) {
|
|
1811
|
+
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
1812
|
+
mergeNonWordCollections(existing.nonWords, item.nonWords);
|
|
1813
|
+
}
|
|
1814
|
+
continue;
|
|
1815
|
+
}
|
|
1816
|
+
localeOrder.push(item.locale);
|
|
1817
|
+
mergedByLocale.set(item.locale, {
|
|
1818
|
+
locale: item.locale,
|
|
1819
|
+
chars: item.chars,
|
|
1820
|
+
nonWords: item.nonWords ? mergeNonWordCollections(createNonWordCollection(), item.nonWords) : void 0
|
|
1821
|
+
});
|
|
1822
|
+
}
|
|
1823
|
+
};
|
|
1824
|
+
addItems(left.breakdown.items);
|
|
1825
|
+
addItems(right.breakdown.items);
|
|
1826
|
+
return {
|
|
1827
|
+
total,
|
|
1828
|
+
counts,
|
|
1829
|
+
breakdown: {
|
|
1830
|
+
mode: "char-collector",
|
|
1831
|
+
items: localeOrder.map((locale) => {
|
|
1832
|
+
const value = mergedByLocale.get(locale);
|
|
1833
|
+
if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
|
|
1834
|
+
return value;
|
|
1835
|
+
})
|
|
1836
|
+
}
|
|
1837
|
+
};
|
|
1838
|
+
}
|
|
1839
|
+
if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
|
|
1840
|
+
const localeOrder = [];
|
|
1841
|
+
const mergedByLocale = /* @__PURE__ */ new Map();
|
|
1842
|
+
const addItems = (items) => {
|
|
1843
|
+
for (const item of items) {
|
|
1844
|
+
const existing = mergedByLocale.get(item.locale);
|
|
1845
|
+
if (existing) {
|
|
1846
|
+
existing.words += item.words;
|
|
1847
|
+
if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
|
|
1848
|
+
continue;
|
|
1849
|
+
}
|
|
1850
|
+
localeOrder.push(item.locale);
|
|
1851
|
+
mergedByLocale.set(item.locale, {
|
|
1852
|
+
locale: item.locale,
|
|
1853
|
+
words: item.words,
|
|
1854
|
+
segments: preserveCollectorSegments ? [...item.segments] : []
|
|
1855
|
+
});
|
|
1856
|
+
}
|
|
1857
|
+
};
|
|
1858
|
+
addItems(left.breakdown.items);
|
|
1859
|
+
addItems(right.breakdown.items);
|
|
1860
|
+
let mergedNonWords;
|
|
1861
|
+
if (left.breakdown.nonWords || right.breakdown.nonWords) {
|
|
1862
|
+
mergedNonWords = createNonWordCollection();
|
|
1863
|
+
if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
|
|
1864
|
+
if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
|
|
1865
|
+
}
|
|
1866
|
+
return {
|
|
1867
|
+
total,
|
|
1868
|
+
counts,
|
|
1869
|
+
breakdown: {
|
|
1870
|
+
mode: "collector",
|
|
1871
|
+
items: localeOrder.map((locale) => {
|
|
1872
|
+
const value = mergedByLocale.get(locale);
|
|
1873
|
+
if (!value) throw new Error(`Missing collector entry for locale: ${locale}`);
|
|
1874
|
+
return value;
|
|
1875
|
+
}),
|
|
1876
|
+
nonWords: mergedNonWords
|
|
1877
|
+
}
|
|
1878
|
+
};
|
|
1699
1879
|
}
|
|
1880
|
+
return {
|
|
1881
|
+
total,
|
|
1882
|
+
counts,
|
|
1883
|
+
breakdown: left.breakdown
|
|
1884
|
+
};
|
|
1700
1885
|
}
|
|
1701
|
-
function
|
|
1702
|
-
if (
|
|
1703
|
-
|
|
1704
|
-
return
|
|
1886
|
+
function aggregateWordCounterResults(results, preserveCollectorSegments) {
|
|
1887
|
+
if (results.length === 0) return wc_default("", { mode: "chunk" });
|
|
1888
|
+
const first = results[0];
|
|
1889
|
+
if (!first) return wc_default("", { mode: "chunk" });
|
|
1890
|
+
let aggregate = first;
|
|
1891
|
+
for (let index = 1; index < results.length; index += 1) {
|
|
1892
|
+
const current = results[index];
|
|
1893
|
+
if (!current) continue;
|
|
1894
|
+
aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
|
|
1895
|
+
}
|
|
1896
|
+
return aggregate;
|
|
1705
1897
|
}
|
|
1706
|
-
function
|
|
1707
|
-
|
|
1708
|
-
|
|
1898
|
+
function buildSectionKey(name, source) {
|
|
1899
|
+
return `${source}:${name}`;
|
|
1900
|
+
}
|
|
1901
|
+
function aggregateSectionedResults(results, preserveCollectorSegments) {
|
|
1902
|
+
if (results.length === 0) return {
|
|
1903
|
+
section: "all",
|
|
1904
|
+
total: 0,
|
|
1905
|
+
frontmatterType: null,
|
|
1906
|
+
items: []
|
|
1907
|
+
};
|
|
1908
|
+
const section = results[0]?.section ?? "all";
|
|
1909
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
1910
|
+
let total = 0;
|
|
1911
|
+
let frontmatterType = results[0]?.frontmatterType ?? null;
|
|
1912
|
+
for (const result of results) {
|
|
1913
|
+
total += result.total;
|
|
1914
|
+
if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
|
|
1915
|
+
if (frontmatterType !== result.frontmatterType) frontmatterType = null;
|
|
1916
|
+
for (const item of result.items) {
|
|
1917
|
+
const key = buildSectionKey(item.name, item.source);
|
|
1918
|
+
const existing = grouped.get(key);
|
|
1919
|
+
if (!existing) {
|
|
1920
|
+
grouped.set(key, {
|
|
1921
|
+
name: item.name,
|
|
1922
|
+
source: item.source,
|
|
1923
|
+
items: [item.result]
|
|
1924
|
+
});
|
|
1925
|
+
continue;
|
|
1926
|
+
}
|
|
1927
|
+
existing.items.push(item.result);
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
|
|
1931
|
+
const items = [...grouped.values()].sort((left, right) => {
|
|
1932
|
+
const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
|
|
1933
|
+
if (sourceDiff !== 0) return sourceDiff;
|
|
1934
|
+
return left.name.localeCompare(right.name);
|
|
1935
|
+
}).map((entry) => ({
|
|
1936
|
+
name: entry.name,
|
|
1937
|
+
source: entry.source,
|
|
1938
|
+
result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
|
|
1939
|
+
}));
|
|
1709
1940
|
return {
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1941
|
+
section,
|
|
1942
|
+
total,
|
|
1943
|
+
frontmatterType,
|
|
1944
|
+
items
|
|
1714
1945
|
};
|
|
1715
1946
|
}
|
|
1716
|
-
function
|
|
1717
|
-
|
|
1718
|
-
const
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1947
|
+
function stripCollectorSegmentsFromWordCounterResult(result) {
|
|
1948
|
+
if (result.breakdown.mode !== "collector") return;
|
|
1949
|
+
for (const item of result.breakdown.items) item.segments = [];
|
|
1950
|
+
}
|
|
1951
|
+
function stripCollectorSegmentsFromSectionedResult(result) {
|
|
1952
|
+
for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
|
|
1953
|
+
}
|
|
1954
|
+
function compactCollectorSegmentsInCountResult(result) {
|
|
1955
|
+
if ("section" in result) {
|
|
1956
|
+
stripCollectorSegmentsFromSectionedResult(result);
|
|
1957
|
+
return;
|
|
1727
1958
|
}
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1959
|
+
stripCollectorSegmentsFromWordCounterResult(result);
|
|
1960
|
+
}
|
|
1961
|
+
async function buildBatchSummary(inputs, section, wcOptions, options = {}) {
|
|
1962
|
+
const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
|
|
1963
|
+
const files = [];
|
|
1964
|
+
for (const input of inputs) {
|
|
1965
|
+
const result = section === "all" ? wc_default(input.content, wcOptions) : countSections(input.content, section, wcOptions);
|
|
1966
|
+
if (!preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
|
|
1967
|
+
files.push({
|
|
1968
|
+
path: input.path,
|
|
1969
|
+
result
|
|
1970
|
+
});
|
|
1971
|
+
options.onFileCounted?.({
|
|
1972
|
+
completed: files.length,
|
|
1973
|
+
total: inputs.length
|
|
1734
1974
|
});
|
|
1735
1975
|
}
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
return left.order - right.order;
|
|
1976
|
+
return finalizeBatchSummaryFromFileResults(files, section, wcOptions, {
|
|
1977
|
+
onFinalizeStart: options.onFinalizeStart,
|
|
1978
|
+
preserveCollectorSegments: options.preserveCollectorSegments
|
|
1740
1979
|
});
|
|
1741
|
-
return resolvedRules;
|
|
1742
1980
|
}
|
|
1743
|
-
function
|
|
1744
|
-
const
|
|
1745
|
-
const
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1981
|
+
function finalizeBatchSummaryFromFileResults(files, section, wcOptions, options = {}) {
|
|
1982
|
+
const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
|
|
1983
|
+
if (!preserveCollectorSegments) for (const file of files) compactCollectorSegmentsInCountResult(file.result);
|
|
1984
|
+
options.onFinalizeStart?.();
|
|
1985
|
+
if (files.length === 0) return {
|
|
1986
|
+
files,
|
|
1987
|
+
skipped: [],
|
|
1988
|
+
aggregate: section === "all" ? wc_default("", wcOptions) : {
|
|
1989
|
+
section,
|
|
1990
|
+
total: 0,
|
|
1991
|
+
frontmatterType: null,
|
|
1992
|
+
items: []
|
|
1993
|
+
}
|
|
1994
|
+
};
|
|
1749
1995
|
return {
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
latinLocales
|
|
1996
|
+
files,
|
|
1997
|
+
skipped: [],
|
|
1998
|
+
aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
|
|
1754
1999
|
};
|
|
1755
2000
|
}
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
if (regex.han.test(char)) {
|
|
1771
|
-
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
1772
|
-
return context.hanHint ?? DEFAULT_HAN_TAG;
|
|
1773
|
-
}
|
|
1774
|
-
if (regex.latin.test(char)) {
|
|
1775
|
-
const hintedLocale = detectLatinLocale(char, context);
|
|
1776
|
-
if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
|
|
1777
|
-
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
1778
|
-
if (context.latinHint) return context.latinHint;
|
|
1779
|
-
return DEFAULT_LOCALE;
|
|
2001
|
+
|
|
2002
|
+
//#endregion
|
|
2003
|
+
//#region src/cli/path/load.ts
|
|
2004
|
+
function isProbablyBinary(buffer) {
|
|
2005
|
+
if (buffer.length === 0) return false;
|
|
2006
|
+
const sampleSize = Math.min(buffer.length, 1024);
|
|
2007
|
+
let suspicious = 0;
|
|
2008
|
+
for (let index = 0; index < sampleSize; index += 1) {
|
|
2009
|
+
const byte = buffer[index] ?? 0;
|
|
2010
|
+
if (byte === 0) return true;
|
|
2011
|
+
if (byte === 9 || byte === 10 || byte === 13) continue;
|
|
2012
|
+
if (byte >= 32 && byte <= 126) continue;
|
|
2013
|
+
if (byte >= 128) continue;
|
|
2014
|
+
suspicious += 1;
|
|
1780
2015
|
}
|
|
1781
|
-
return
|
|
2016
|
+
return suspicious / sampleSize > .3;
|
|
1782
2017
|
}
|
|
1783
2018
|
|
|
1784
2019
|
//#endregion
|
|
1785
|
-
//#region src/
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
const
|
|
1790
|
-
const
|
|
1791
|
-
let
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
return;
|
|
2020
|
+
//#region src/cli/batch/jobs/queue.ts
|
|
2021
|
+
async function runBoundedQueue(total, requestedJobs, worker) {
|
|
2022
|
+
if (total === 0) return [];
|
|
2023
|
+
const safeRequestedJobs = Number.isFinite(requestedJobs) ? Math.floor(requestedJobs) : 1;
|
|
2024
|
+
const concurrency = Math.max(1, Math.min(total, safeRequestedJobs));
|
|
2025
|
+
const results = new Array(total);
|
|
2026
|
+
let nextIndex = 0;
|
|
2027
|
+
const runWorker = async () => {
|
|
2028
|
+
while (true) {
|
|
2029
|
+
const current = nextIndex;
|
|
2030
|
+
nextIndex += 1;
|
|
2031
|
+
if (current >= total) return;
|
|
2032
|
+
results[current] = await worker(current);
|
|
1799
2033
|
}
|
|
1800
|
-
if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
|
|
1801
2034
|
};
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
2035
|
+
await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
|
|
2036
|
+
return results;
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
//#endregion
|
|
2040
|
+
//#region src/cli/batch/jobs/load-count-experimental.ts
|
|
2041
|
+
async function countBatchInputsWithJobs(filePaths, options) {
|
|
2042
|
+
const limits = resolveBatchJobsLimit();
|
|
2043
|
+
const total = filePaths.length;
|
|
2044
|
+
let completed = 0;
|
|
2045
|
+
const entries = await runBoundedQueue(filePaths.length, options.jobs, async (index) => {
|
|
2046
|
+
const path = filePaths[index];
|
|
2047
|
+
if (!path) {
|
|
2048
|
+
completed += 1;
|
|
2049
|
+
options.onFileProcessed?.({
|
|
2050
|
+
completed,
|
|
2051
|
+
total
|
|
2052
|
+
});
|
|
2053
|
+
return {
|
|
2054
|
+
type: "skip",
|
|
2055
|
+
skip: {
|
|
2056
|
+
path: "",
|
|
2057
|
+
reason: "not readable: missing path"
|
|
2058
|
+
}
|
|
2059
|
+
};
|
|
1818
2060
|
}
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
2061
|
+
let buffer;
|
|
2062
|
+
try {
|
|
2063
|
+
buffer = await readFile(path);
|
|
2064
|
+
} catch (error) {
|
|
2065
|
+
if (isResourceLimitError(error)) throw createResourceLimitError(path, error, options.jobs, limits);
|
|
2066
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2067
|
+
completed += 1;
|
|
2068
|
+
options.onFileProcessed?.({
|
|
2069
|
+
completed,
|
|
2070
|
+
total
|
|
2071
|
+
});
|
|
2072
|
+
return {
|
|
2073
|
+
type: "skip",
|
|
2074
|
+
skip: {
|
|
2075
|
+
path,
|
|
2076
|
+
reason: `not readable: ${message}`
|
|
1828
2077
|
}
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
buffer = `${suffix}${char}`;
|
|
1837
|
-
bufferHasScript = true;
|
|
1838
|
-
updateCarryBoundaryState(detected, char);
|
|
1839
|
-
continue;
|
|
1840
|
-
}
|
|
1841
|
-
chunks.push({
|
|
1842
|
-
locale: currentLocale,
|
|
1843
|
-
text: buffer
|
|
2078
|
+
};
|
|
2079
|
+
}
|
|
2080
|
+
if (isProbablyBinary(buffer)) {
|
|
2081
|
+
completed += 1;
|
|
2082
|
+
options.onFileProcessed?.({
|
|
2083
|
+
completed,
|
|
2084
|
+
total
|
|
1844
2085
|
});
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
2086
|
+
return {
|
|
2087
|
+
type: "skip",
|
|
2088
|
+
skip: {
|
|
2089
|
+
path,
|
|
2090
|
+
reason: "binary file"
|
|
2091
|
+
}
|
|
2092
|
+
};
|
|
2093
|
+
}
|
|
2094
|
+
const content = buffer.toString("utf8");
|
|
2095
|
+
const result = options.section === "all" ? wc_default(content, options.wcOptions) : countSections(content, options.section, options.wcOptions);
|
|
2096
|
+
if (!options.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
|
|
2097
|
+
completed += 1;
|
|
2098
|
+
options.onFileProcessed?.({
|
|
2099
|
+
completed,
|
|
2100
|
+
total
|
|
2101
|
+
});
|
|
2102
|
+
return {
|
|
2103
|
+
type: "file",
|
|
2104
|
+
file: {
|
|
2105
|
+
path,
|
|
2106
|
+
result
|
|
2107
|
+
}
|
|
2108
|
+
};
|
|
2109
|
+
});
|
|
2110
|
+
const files = [];
|
|
2111
|
+
const skipped = [];
|
|
2112
|
+
for (const entry of entries) {
|
|
2113
|
+
if (entry.type === "file") {
|
|
2114
|
+
files.push(entry.file);
|
|
1849
2115
|
continue;
|
|
1850
2116
|
}
|
|
1851
|
-
|
|
1852
|
-
if (detected !== null) bufferHasScript = true;
|
|
1853
|
-
updateCarryBoundaryState(detected, char);
|
|
2117
|
+
skipped.push(entry.skip);
|
|
1854
2118
|
}
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
}
|
|
1859
|
-
return mergeAdjacentChunks(chunks);
|
|
2119
|
+
return {
|
|
2120
|
+
files,
|
|
2121
|
+
skipped
|
|
2122
|
+
};
|
|
1860
2123
|
}
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
2124
|
+
|
|
2125
|
+
//#endregion
|
|
2126
|
+
//#region src/cli/batch/jobs/load-count-worker-experimental.ts
|
|
2127
|
+
var WorkerRouteUnavailableError = class extends Error {};
|
|
2128
|
+
function isFallbackFriendlyWorkerError(error) {
|
|
2129
|
+
if (typeof error !== "object" || error === null) return false;
|
|
2130
|
+
const code = "code" in error ? String(error.code) : "";
|
|
2131
|
+
if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
|
|
2132
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2133
|
+
return message.includes("Unknown file extension") || message.includes("Cannot find module");
|
|
1868
2134
|
}
|
|
1869
|
-
function
|
|
1870
|
-
if (
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
2135
|
+
async function countBatchInputsWithWorkerJobs(filePaths, options) {
|
|
2136
|
+
if (process.env.WORD_COUNTER_DISABLE_EXPERIMENTAL_WORKERS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
|
|
2137
|
+
let workerPoolModule;
|
|
2138
|
+
try {
|
|
2139
|
+
workerPoolModule = await import("./worker-pool.mjs");
|
|
2140
|
+
} catch (error) {
|
|
2141
|
+
throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
2142
|
+
}
|
|
2143
|
+
try {
|
|
2144
|
+
return await workerPoolModule.countBatchInputsWithWorkerPool({
|
|
2145
|
+
filePaths,
|
|
2146
|
+
jobs: options.jobs,
|
|
2147
|
+
section: options.section,
|
|
2148
|
+
wcOptions: options.wcOptions,
|
|
2149
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2150
|
+
onFileProcessed: options.onFileProcessed
|
|
2151
|
+
});
|
|
2152
|
+
} catch (error) {
|
|
2153
|
+
if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
|
|
2154
|
+
if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
|
|
2155
|
+
code: error.code,
|
|
2156
|
+
message: error.message
|
|
2157
|
+
}, options.jobs, resolveBatchJobsLimit());
|
|
2158
|
+
throw new Error(error.message);
|
|
1882
2159
|
}
|
|
2160
|
+
if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
2161
|
+
throw error;
|
|
1883
2162
|
}
|
|
1884
|
-
merged.push(last);
|
|
1885
|
-
return merged;
|
|
1886
2163
|
}
|
|
1887
2164
|
|
|
1888
2165
|
//#endregion
|
|
1889
|
-
//#region src/
|
|
1890
|
-
function
|
|
1891
|
-
const
|
|
1892
|
-
const
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
breakdown: {
|
|
1915
|
-
mode,
|
|
1916
|
-
items: analyzed.map((chunk) => ({
|
|
1917
|
-
locale: chunk.locale,
|
|
1918
|
-
text: chunk.text,
|
|
1919
|
-
chars: chunk.chars,
|
|
1920
|
-
nonWords: chunk.nonWords
|
|
1921
|
-
}))
|
|
1922
|
-
}
|
|
2166
|
+
//#region src/cli/batch/jobs/load-only.ts
|
|
2167
|
+
async function loadBatchInputsWithJobs(filePaths, options) {
|
|
2168
|
+
const limits = resolveBatchJobsLimit();
|
|
2169
|
+
const entries = await runBoundedQueue(filePaths.length, options.jobs, async (index) => {
|
|
2170
|
+
const path = filePaths[index];
|
|
2171
|
+
if (!path) return {
|
|
2172
|
+
type: "skip",
|
|
2173
|
+
path: "",
|
|
2174
|
+
reason: "not readable: missing path"
|
|
2175
|
+
};
|
|
2176
|
+
let buffer;
|
|
2177
|
+
try {
|
|
2178
|
+
buffer = await readFile(path);
|
|
2179
|
+
} catch (error) {
|
|
2180
|
+
if (isResourceLimitError(error)) throw createResourceLimitError(path, error, options.jobs, limits);
|
|
2181
|
+
return {
|
|
2182
|
+
type: "skip",
|
|
2183
|
+
path,
|
|
2184
|
+
reason: `not readable: ${error instanceof Error ? error.message : String(error)}`
|
|
2185
|
+
};
|
|
2186
|
+
}
|
|
2187
|
+
if (isProbablyBinary(buffer)) return {
|
|
2188
|
+
type: "skip",
|
|
2189
|
+
path,
|
|
2190
|
+
reason: "binary file"
|
|
1923
2191
|
};
|
|
1924
2192
|
return {
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
mode,
|
|
1929
|
-
items: aggregateCharsByLocale(analyzed).map((chunk) => ({
|
|
1930
|
-
locale: chunk.locale,
|
|
1931
|
-
chars: chunk.chars,
|
|
1932
|
-
nonWords: chunk.nonWords
|
|
1933
|
-
}))
|
|
1934
|
-
}
|
|
2193
|
+
type: "file",
|
|
2194
|
+
path,
|
|
2195
|
+
content: buffer.toString("utf8")
|
|
1935
2196
|
};
|
|
1936
|
-
}
|
|
1937
|
-
const
|
|
1938
|
-
const
|
|
1939
|
-
const
|
|
1940
|
-
if (
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
return sum + chunkTotal;
|
|
1947
|
-
}, 0);
|
|
1948
|
-
const counts = collectNonWords ? {
|
|
1949
|
-
words: wordsTotal,
|
|
1950
|
-
nonWords: nonWordsTotal,
|
|
1951
|
-
total
|
|
1952
|
-
} : void 0;
|
|
1953
|
-
if (mode === "segments") return {
|
|
1954
|
-
total,
|
|
1955
|
-
counts,
|
|
1956
|
-
breakdown: {
|
|
1957
|
-
mode,
|
|
1958
|
-
items: analyzed.map((chunk) => ({
|
|
1959
|
-
locale: chunk.locale,
|
|
1960
|
-
text: chunk.text,
|
|
1961
|
-
words: chunk.words,
|
|
1962
|
-
segments: chunk.segments,
|
|
1963
|
-
nonWords: chunk.nonWords
|
|
1964
|
-
}))
|
|
1965
|
-
}
|
|
1966
|
-
};
|
|
1967
|
-
if (mode === "collector") return {
|
|
1968
|
-
total,
|
|
1969
|
-
counts,
|
|
1970
|
-
breakdown: {
|
|
1971
|
-
mode,
|
|
1972
|
-
items: aggregateByLocale(analyzed),
|
|
1973
|
-
nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
|
|
2197
|
+
});
|
|
2198
|
+
const files = [];
|
|
2199
|
+
const skipped = [];
|
|
2200
|
+
for (const entry of entries) {
|
|
2201
|
+
if (entry.type === "file") {
|
|
2202
|
+
files.push({
|
|
2203
|
+
path: entry.path,
|
|
2204
|
+
content: entry.content
|
|
2205
|
+
});
|
|
2206
|
+
continue;
|
|
1974
2207
|
}
|
|
1975
|
-
|
|
2208
|
+
skipped.push({
|
|
2209
|
+
path: entry.path,
|
|
2210
|
+
reason: entry.reason
|
|
2211
|
+
});
|
|
2212
|
+
}
|
|
1976
2213
|
return {
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
breakdown: {
|
|
1980
|
-
mode,
|
|
1981
|
-
items: analyzed.map((chunk) => ({
|
|
1982
|
-
locale: chunk.locale,
|
|
1983
|
-
text: chunk.text,
|
|
1984
|
-
words: chunk.words,
|
|
1985
|
-
nonWords: chunk.nonWords
|
|
1986
|
-
}))
|
|
1987
|
-
}
|
|
2214
|
+
files,
|
|
2215
|
+
skipped
|
|
1988
2216
|
};
|
|
1989
2217
|
}
|
|
1990
|
-
function getNonWordTotal(nonWords) {
|
|
1991
|
-
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
1992
|
-
}
|
|
1993
|
-
function collectNonWordsAggregate(analyzed, enabled) {
|
|
1994
|
-
if (!enabled) return;
|
|
1995
|
-
const collection = createNonWordCollection();
|
|
1996
|
-
for (const chunk of analyzed) {
|
|
1997
|
-
if (!chunk.nonWords) continue;
|
|
1998
|
-
mergeNonWordCollections(collection, chunk.nonWords);
|
|
1999
|
-
}
|
|
2000
|
-
return collection;
|
|
2001
|
-
}
|
|
2002
2218
|
|
|
2003
2219
|
//#endregion
|
|
2004
|
-
//#region src/
|
|
2005
|
-
|
|
2220
|
+
//#region src/cli/batch/jobs/render.ts
|
|
2221
|
+
function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
|
|
2222
|
+
return finalizeBatchSummaryFromFileResults(files, section, wcOptions, {
|
|
2223
|
+
onFinalizeStart: options.onFinalizeStart,
|
|
2224
|
+
preserveCollectorSegments: options.preserveCollectorSegments
|
|
2225
|
+
});
|
|
2226
|
+
}
|
|
2006
2227
|
|
|
2007
2228
|
//#endregion
|
|
2008
|
-
//#region src/
|
|
2009
|
-
function
|
|
2010
|
-
|
|
2011
|
-
if (typeof value === "string") return value;
|
|
2012
|
-
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
2229
|
+
//#region src/cli/path/resolve.ts
|
|
2230
|
+
async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
|
|
2231
|
+
let entries;
|
|
2013
2232
|
try {
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2233
|
+
entries = await readdir(directoryPath, {
|
|
2234
|
+
withFileTypes: true,
|
|
2235
|
+
encoding: "utf8"
|
|
2236
|
+
});
|
|
2237
|
+
} catch (error) {
|
|
2238
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2239
|
+
skipped.push({
|
|
2240
|
+
path: directoryPath,
|
|
2241
|
+
reason: `directory read failed: ${message}`
|
|
2242
|
+
});
|
|
2243
|
+
debug.emit("path.resolve.expand.read_failed", {
|
|
2244
|
+
directory: directoryPath,
|
|
2245
|
+
reason: `directory read failed: ${message}`
|
|
2246
|
+
});
|
|
2247
|
+
return [];
|
|
2017
2248
|
}
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2249
|
+
const sortedEntries = entries.slice().sort((left, right) => left.name.localeCompare(right.name));
|
|
2250
|
+
const files = [];
|
|
2251
|
+
debug.emit("path.resolve.expand.start", {
|
|
2252
|
+
directory: directoryPath,
|
|
2253
|
+
entries: sortedEntries.length,
|
|
2254
|
+
recursive
|
|
2255
|
+
});
|
|
2256
|
+
for (const entry of sortedEntries) {
|
|
2257
|
+
const entryPath = resolve(directoryPath, entry.name);
|
|
2258
|
+
if (entry.isFile()) {
|
|
2259
|
+
if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
|
|
2260
|
+
skipped.push({
|
|
2261
|
+
path: entryPath,
|
|
2262
|
+
reason: "extension excluded"
|
|
2263
|
+
});
|
|
2264
|
+
debug.emit("path.resolve.filter.excluded", {
|
|
2265
|
+
path: entryPath,
|
|
2266
|
+
reason: "extension excluded"
|
|
2267
|
+
}, { verbosity: "verbose" });
|
|
2268
|
+
stats.filterExcluded += 1;
|
|
2269
|
+
continue;
|
|
2270
|
+
}
|
|
2271
|
+
const relativePath = toDirectoryRelativePath(rootPath, entryPath);
|
|
2272
|
+
if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
|
|
2273
|
+
if (recordRegexExcluded(entryPath)) {
|
|
2274
|
+
debug.emit("path.resolve.regex.excluded", {
|
|
2275
|
+
path: entryPath,
|
|
2276
|
+
relativePath,
|
|
2277
|
+
pattern: regexFilter.sourcePattern,
|
|
2278
|
+
reason: "regex excluded"
|
|
2279
|
+
}, { verbosity: "verbose" });
|
|
2280
|
+
stats.regexExcluded += 1;
|
|
2281
|
+
}
|
|
2282
|
+
continue;
|
|
2283
|
+
}
|
|
2284
|
+
files.push(entryPath);
|
|
2285
|
+
stats.directoryIncluded += 1;
|
|
2286
|
+
debug.emit("path.resolve.expand.include", {
|
|
2287
|
+
path: entryPath,
|
|
2288
|
+
source: "directory"
|
|
2289
|
+
}, { verbosity: "verbose" });
|
|
2290
|
+
continue;
|
|
2291
|
+
}
|
|
2292
|
+
if (!entry.isDirectory() || !recursive) continue;
|
|
2293
|
+
appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
|
|
2294
|
+
}
|
|
2295
|
+
debug.emit("path.resolve.expand.complete", {
|
|
2296
|
+
directory: directoryPath,
|
|
2297
|
+
files: files.length
|
|
2028
2298
|
});
|
|
2299
|
+
return files;
|
|
2029
2300
|
}
|
|
2030
|
-
function
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
function countSections(input, section, options = {}) {
|
|
2041
|
-
const mode = options.mode ?? "chunk";
|
|
2042
|
-
if (section === "all") {
|
|
2043
|
-
const result = wc_default(input, options);
|
|
2044
|
-
return {
|
|
2045
|
-
section,
|
|
2046
|
-
total: result.total,
|
|
2047
|
-
frontmatterType: null,
|
|
2048
|
-
items: [{
|
|
2049
|
-
name: "all",
|
|
2050
|
-
source: "content",
|
|
2051
|
-
result
|
|
2052
|
-
}]
|
|
2053
|
-
};
|
|
2054
|
-
}
|
|
2055
|
-
const parsed = parseMarkdown(input);
|
|
2056
|
-
const frontmatterText = parsed.frontmatter ?? "";
|
|
2057
|
-
const contentText = parsed.content ?? "";
|
|
2058
|
-
let items = [];
|
|
2059
|
-
if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
|
|
2060
|
-
else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
|
|
2061
|
-
else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
2062
|
-
else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
|
|
2063
|
-
else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
2064
|
-
return {
|
|
2065
|
-
section,
|
|
2066
|
-
total: sumTotals(items),
|
|
2067
|
-
frontmatterType: parsed.frontmatterType,
|
|
2068
|
-
items
|
|
2301
|
+
async function resolveBatchFilePaths(pathInputs, options) {
|
|
2302
|
+
const skipped = [];
|
|
2303
|
+
const regexExcludedPaths = /* @__PURE__ */ new Set();
|
|
2304
|
+
const resolvedFiles = /* @__PURE__ */ new Set();
|
|
2305
|
+
const stats = {
|
|
2306
|
+
dedupeAccepted: 0,
|
|
2307
|
+
dedupeDuplicates: 0,
|
|
2308
|
+
filterExcluded: 0,
|
|
2309
|
+
regexExcluded: 0,
|
|
2310
|
+
directoryIncluded: 0
|
|
2069
2311
|
};
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
const counts = left.counts || right.counts ? {
|
|
2078
|
-
words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
|
|
2079
|
-
nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
|
|
2080
|
-
total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
|
|
2081
|
-
} : void 0;
|
|
2082
|
-
if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
|
|
2083
|
-
total,
|
|
2084
|
-
counts,
|
|
2085
|
-
breakdown: {
|
|
2086
|
-
mode: "chunk",
|
|
2087
|
-
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
2088
|
-
}
|
|
2312
|
+
const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
|
|
2313
|
+
let regexFilter;
|
|
2314
|
+
const debug = options.debug ?? {
|
|
2315
|
+
enabled: false,
|
|
2316
|
+
verbosity: "compact",
|
|
2317
|
+
emit() {},
|
|
2318
|
+
close: async () => {}
|
|
2089
2319
|
};
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2320
|
+
debug.emit("path.resolve.inputs", {
|
|
2321
|
+
inputs: pathInputs.length,
|
|
2322
|
+
pathMode: options.pathMode,
|
|
2323
|
+
recursive: options.recursive,
|
|
2324
|
+
hasRegex: Boolean(options.directoryRegexPattern)
|
|
2325
|
+
});
|
|
2326
|
+
const addResolvedFile = (filePath, details) => {
|
|
2327
|
+
regexExcludedPaths.delete(filePath);
|
|
2328
|
+
if (resolvedFiles.has(filePath)) {
|
|
2329
|
+
stats.dedupeDuplicates += 1;
|
|
2330
|
+
debug.emit("path.resolve.dedupe.duplicate", {
|
|
2331
|
+
path: filePath,
|
|
2332
|
+
source: details.source,
|
|
2333
|
+
input: details.input
|
|
2334
|
+
}, { verbosity: "verbose" });
|
|
2335
|
+
return;
|
|
2096
2336
|
}
|
|
2337
|
+
resolvedFiles.add(filePath);
|
|
2338
|
+
stats.dedupeAccepted += 1;
|
|
2339
|
+
debug.emit("path.resolve.dedupe.accept", {
|
|
2340
|
+
path: filePath,
|
|
2341
|
+
source: details.source,
|
|
2342
|
+
input: details.input
|
|
2343
|
+
}, { verbosity: "verbose" });
|
|
2097
2344
|
};
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
breakdown: {
|
|
2102
|
-
mode: "char",
|
|
2103
|
-
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
2104
|
-
}
|
|
2345
|
+
const getRegexFilter = () => {
|
|
2346
|
+
if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
|
|
2347
|
+
return regexFilter;
|
|
2105
2348
|
};
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
total,
|
|
2132
|
-
counts,
|
|
2133
|
-
breakdown: {
|
|
2134
|
-
mode: "char-collector",
|
|
2135
|
-
items: localeOrder.map((locale) => {
|
|
2136
|
-
const value = mergedByLocale.get(locale);
|
|
2137
|
-
if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
|
|
2138
|
-
return value;
|
|
2139
|
-
})
|
|
2140
|
-
}
|
|
2141
|
-
};
|
|
2142
|
-
}
|
|
2143
|
-
if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
|
|
2144
|
-
const localeOrder = [];
|
|
2145
|
-
const mergedByLocale = /* @__PURE__ */ new Map();
|
|
2146
|
-
const addItems = (items) => {
|
|
2147
|
-
for (const item of items) {
|
|
2148
|
-
const existing = mergedByLocale.get(item.locale);
|
|
2149
|
-
if (existing) {
|
|
2150
|
-
existing.words += item.words;
|
|
2151
|
-
if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
|
|
2152
|
-
continue;
|
|
2153
|
-
}
|
|
2154
|
-
localeOrder.push(item.locale);
|
|
2155
|
-
mergedByLocale.set(item.locale, {
|
|
2156
|
-
locale: item.locale,
|
|
2157
|
-
words: item.words,
|
|
2158
|
-
segments: preserveCollectorSegments ? [...item.segments] : []
|
|
2159
|
-
});
|
|
2160
|
-
}
|
|
2161
|
-
};
|
|
2162
|
-
addItems(left.breakdown.items);
|
|
2163
|
-
addItems(right.breakdown.items);
|
|
2164
|
-
let mergedNonWords;
|
|
2165
|
-
if (left.breakdown.nonWords || right.breakdown.nonWords) {
|
|
2166
|
-
mergedNonWords = createNonWordCollection();
|
|
2167
|
-
if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
|
|
2168
|
-
if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
|
|
2349
|
+
const recordRegexExcluded = (filePath) => {
|
|
2350
|
+
if (resolvedFiles.has(filePath)) return false;
|
|
2351
|
+
regexExcludedPaths.add(filePath);
|
|
2352
|
+
return true;
|
|
2353
|
+
};
|
|
2354
|
+
for (const rawPath of pathInputs) {
|
|
2355
|
+
const targetPath = resolve(rawPath);
|
|
2356
|
+
debug.emit("path.resolve.input", {
|
|
2357
|
+
rawPath,
|
|
2358
|
+
resolvedPath: targetPath
|
|
2359
|
+
});
|
|
2360
|
+
let metadata;
|
|
2361
|
+
try {
|
|
2362
|
+
metadata = await stat(targetPath);
|
|
2363
|
+
} catch (error) {
|
|
2364
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2365
|
+
skipped.push({
|
|
2366
|
+
path: targetPath,
|
|
2367
|
+
reason: `not readable: ${message}`
|
|
2368
|
+
});
|
|
2369
|
+
debug.emit("path.resolve.skip", {
|
|
2370
|
+
path: targetPath,
|
|
2371
|
+
reason: `not readable: ${message}`
|
|
2372
|
+
});
|
|
2373
|
+
continue;
|
|
2169
2374
|
}
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
}
|
|
2182
|
-
|
|
2375
|
+
if (metadata.isDirectory() && options.pathMode === "auto") {
|
|
2376
|
+
const effectiveRegexFilter = getRegexFilter();
|
|
2377
|
+
debug.emit("path.resolve.root.expand", {
|
|
2378
|
+
root: targetPath,
|
|
2379
|
+
recursive: options.recursive,
|
|
2380
|
+
regex: effectiveRegexFilter.sourcePattern ?? null
|
|
2381
|
+
});
|
|
2382
|
+
const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
|
|
2383
|
+
for (const file of files) addResolvedFile(file, {
|
|
2384
|
+
source: "directory",
|
|
2385
|
+
input: targetPath
|
|
2386
|
+
});
|
|
2387
|
+
continue;
|
|
2388
|
+
}
|
|
2389
|
+
if (!metadata.isFile()) {
|
|
2390
|
+
skipped.push({
|
|
2391
|
+
path: targetPath,
|
|
2392
|
+
reason: "not a regular file"
|
|
2393
|
+
});
|
|
2394
|
+
debug.emit("path.resolve.skip", {
|
|
2395
|
+
path: targetPath,
|
|
2396
|
+
reason: "not a regular file"
|
|
2397
|
+
});
|
|
2398
|
+
continue;
|
|
2399
|
+
}
|
|
2400
|
+
addResolvedFile(targetPath, {
|
|
2401
|
+
source: "direct",
|
|
2402
|
+
input: targetPath
|
|
2403
|
+
});
|
|
2183
2404
|
}
|
|
2405
|
+
for (const path of regexExcludedPaths) skipped.push({
|
|
2406
|
+
path,
|
|
2407
|
+
reason: "regex excluded"
|
|
2408
|
+
});
|
|
2409
|
+
const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
|
|
2410
|
+
debug.emit("path.resolve.filter.summary", {
|
|
2411
|
+
excluded: stats.filterExcluded + stats.regexExcluded,
|
|
2412
|
+
extensionExcluded: stats.filterExcluded,
|
|
2413
|
+
regexExcluded: stats.regexExcluded,
|
|
2414
|
+
included: stats.directoryIncluded
|
|
2415
|
+
});
|
|
2416
|
+
debug.emit("path.resolve.dedupe.summary", {
|
|
2417
|
+
accepted: stats.dedupeAccepted,
|
|
2418
|
+
duplicates: stats.dedupeDuplicates
|
|
2419
|
+
});
|
|
2420
|
+
debug.emit("path.resolve.complete", {
|
|
2421
|
+
files: files.length,
|
|
2422
|
+
skipped: skipped.length,
|
|
2423
|
+
ordering: "absolute-path-ascending"
|
|
2424
|
+
});
|
|
2184
2425
|
return {
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
breakdown: left.breakdown
|
|
2426
|
+
files,
|
|
2427
|
+
skipped
|
|
2188
2428
|
};
|
|
2189
2429
|
}
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
|
|
2199
|
-
}
|
|
2200
|
-
return aggregate;
|
|
2430
|
+
|
|
2431
|
+
//#endregion
|
|
2432
|
+
//#region src/cli/progress/reporter.ts
|
|
2433
|
+
const PROGRESS_BAR_WIDTH = 20;
|
|
2434
|
+
const FILLED_BAR_CHAR = "█";
|
|
2435
|
+
const EMPTY_BAR_CHAR = "░";
|
|
2436
|
+
function clamp(value, min, max) {
|
|
2437
|
+
return Math.max(min, Math.min(max, value));
|
|
2201
2438
|
}
|
|
2202
|
-
function
|
|
2203
|
-
|
|
2439
|
+
function buildProgressBar(completed, total) {
|
|
2440
|
+
const safeTotal = Math.max(total, 1);
|
|
2441
|
+
const ratio = clamp(completed / safeTotal, 0, 1);
|
|
2442
|
+
const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
|
|
2443
|
+
const empty = PROGRESS_BAR_WIDTH - filled;
|
|
2444
|
+
return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
|
|
2204
2445
|
}
|
|
2205
|
-
function
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
}
|
|
2212
|
-
const section = results[0]?.section ?? "all";
|
|
2213
|
-
const grouped = /* @__PURE__ */ new Map();
|
|
2214
|
-
let total = 0;
|
|
2215
|
-
let frontmatterType = results[0]?.frontmatterType ?? null;
|
|
2216
|
-
for (const result of results) {
|
|
2217
|
-
total += result.total;
|
|
2218
|
-
if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
|
|
2219
|
-
if (frontmatterType !== result.frontmatterType) frontmatterType = null;
|
|
2220
|
-
for (const item of result.items) {
|
|
2221
|
-
const key = buildSectionKey(item.name, item.source);
|
|
2222
|
-
const existing = grouped.get(key);
|
|
2223
|
-
if (!existing) {
|
|
2224
|
-
grouped.set(key, {
|
|
2225
|
-
name: item.name,
|
|
2226
|
-
source: item.source,
|
|
2227
|
-
items: [item.result]
|
|
2228
|
-
});
|
|
2229
|
-
continue;
|
|
2230
|
-
}
|
|
2231
|
-
existing.items.push(item.result);
|
|
2232
|
-
}
|
|
2233
|
-
}
|
|
2234
|
-
const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
|
|
2235
|
-
const items = [...grouped.values()].sort((left, right) => {
|
|
2236
|
-
const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
|
|
2237
|
-
if (sourceDiff !== 0) return sourceDiff;
|
|
2238
|
-
return left.name.localeCompare(right.name);
|
|
2239
|
-
}).map((entry) => ({
|
|
2240
|
-
name: entry.name,
|
|
2241
|
-
source: entry.source,
|
|
2242
|
-
result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
|
|
2243
|
-
}));
|
|
2244
|
-
return {
|
|
2245
|
-
section,
|
|
2246
|
-
total,
|
|
2247
|
-
frontmatterType,
|
|
2248
|
-
items
|
|
2249
|
-
};
|
|
2446
|
+
function formatElapsed(startedAtMs) {
|
|
2447
|
+
const elapsedMs = Date.now() - startedAtMs;
|
|
2448
|
+
const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
|
|
2449
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
2450
|
+
const seconds = totalSeconds % 60;
|
|
2451
|
+
const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
|
|
2452
|
+
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
|
|
2250
2453
|
}
|
|
2251
|
-
function
|
|
2252
|
-
|
|
2253
|
-
|
|
2454
|
+
function buildProgressLine(completed, total, startedAtMs) {
|
|
2455
|
+
const safeTotal = Math.max(total, 1);
|
|
2456
|
+
const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
|
|
2457
|
+
return `Counting files [${buildProgressBar(completed, safeTotal)}] ${`${String(percent).padStart(3, " ")}%`} ${String(completed).padStart(String(safeTotal).length, " ")}/${safeTotal} elapsed ${formatElapsed(startedAtMs)}`;
|
|
2254
2458
|
}
|
|
2255
|
-
function
|
|
2256
|
-
|
|
2459
|
+
function buildFinalizingLine(startedAtMs) {
|
|
2460
|
+
return `Finalizing aggregate... elapsed ${formatElapsed(startedAtMs)}`;
|
|
2257
2461
|
}
|
|
2258
|
-
|
|
2259
|
-
const
|
|
2260
|
-
const
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
items: []
|
|
2462
|
+
function createBatchProgressReporter(options) {
|
|
2463
|
+
const enabled = options.enabled;
|
|
2464
|
+
const isTTY = Boolean(options.stream.isTTY);
|
|
2465
|
+
const clearOnFinish = options.clearOnFinish ?? true;
|
|
2466
|
+
let active = false;
|
|
2467
|
+
let total = 0;
|
|
2468
|
+
let lastLineLength = 0;
|
|
2469
|
+
let startedAtMs = 0;
|
|
2470
|
+
let lastRenderedPercent = -1;
|
|
2471
|
+
let finalizingStarted = false;
|
|
2472
|
+
const writeTTYLine = (line) => {
|
|
2473
|
+
const trailingPadding = lastLineLength > line.length ? " ".repeat(lastLineLength - line.length) : "";
|
|
2474
|
+
options.stream.write(`\r${line}${trailingPadding}`);
|
|
2475
|
+
lastLineLength = line.length;
|
|
2476
|
+
};
|
|
2477
|
+
const render = (completed) => {
|
|
2478
|
+
const line = buildProgressLine(completed, total, startedAtMs);
|
|
2479
|
+
const safeTotal = Math.max(total, 1);
|
|
2480
|
+
const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
|
|
2481
|
+
if (!isTTY && percent === lastRenderedPercent && completed < safeTotal) return;
|
|
2482
|
+
lastRenderedPercent = percent;
|
|
2483
|
+
if (isTTY) {
|
|
2484
|
+
writeTTYLine(line);
|
|
2485
|
+
return;
|
|
2283
2486
|
}
|
|
2487
|
+
lastLineLength = line.length;
|
|
2488
|
+
options.stream.write(`${line}\n`);
|
|
2489
|
+
};
|
|
2490
|
+
const clearLine = () => {
|
|
2491
|
+
if (lastLineLength === 0) return;
|
|
2492
|
+
options.stream.write(`\r${" ".repeat(lastLineLength)}\r`);
|
|
2493
|
+
lastLineLength = 0;
|
|
2284
2494
|
};
|
|
2285
2495
|
return {
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2496
|
+
enabled,
|
|
2497
|
+
start(nextTotal, nextStartedAtMs) {
|
|
2498
|
+
if (!enabled || nextTotal <= 1) return;
|
|
2499
|
+
total = nextTotal;
|
|
2500
|
+
active = true;
|
|
2501
|
+
startedAtMs = nextStartedAtMs ?? Date.now();
|
|
2502
|
+
lastRenderedPercent = -1;
|
|
2503
|
+
finalizingStarted = false;
|
|
2504
|
+
render(0);
|
|
2505
|
+
},
|
|
2506
|
+
advance(snapshot) {
|
|
2507
|
+
if (!active) return;
|
|
2508
|
+
render(snapshot.completed);
|
|
2509
|
+
},
|
|
2510
|
+
startFinalizing() {
|
|
2511
|
+
if (!active || finalizingStarted) return;
|
|
2512
|
+
finalizingStarted = true;
|
|
2513
|
+
const line = buildFinalizingLine(startedAtMs);
|
|
2514
|
+
if (isTTY) {
|
|
2515
|
+
if (!clearOnFinish) {
|
|
2516
|
+
options.stream.write(`\n${line}`);
|
|
2517
|
+
lastLineLength = line.length;
|
|
2518
|
+
return;
|
|
2519
|
+
}
|
|
2520
|
+
writeTTYLine(line);
|
|
2521
|
+
return;
|
|
2522
|
+
}
|
|
2523
|
+
lastLineLength = line.length;
|
|
2524
|
+
options.stream.write(`${line}\n`);
|
|
2525
|
+
},
|
|
2526
|
+
finish() {
|
|
2527
|
+
if (!active) return;
|
|
2528
|
+
if (isTTY) if (clearOnFinish) clearLine();
|
|
2529
|
+
else options.stream.write("\n");
|
|
2530
|
+
active = false;
|
|
2531
|
+
}
|
|
2289
2532
|
};
|
|
2290
2533
|
}
|
|
2291
2534
|
|
|
@@ -2316,67 +2559,173 @@ async function runBatchCount(options) {
|
|
|
2316
2559
|
stage: "resolve",
|
|
2317
2560
|
elapsedMs: resolveElapsedMs
|
|
2318
2561
|
});
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
const loadElapsedMs = Date.now() - loadStartedAtMs;
|
|
2323
|
-
options.debug.emit("batch.load.complete", {
|
|
2324
|
-
files: loaded.files.length,
|
|
2325
|
-
skipped: loaded.skipped.length,
|
|
2326
|
-
elapsedMs: loadElapsedMs
|
|
2327
|
-
});
|
|
2328
|
-
options.debug.emit("batch.stage.timing", {
|
|
2329
|
-
stage: "load",
|
|
2330
|
-
elapsedMs: loadElapsedMs
|
|
2331
|
-
});
|
|
2332
|
-
const progressEnabled = options.progressReporter.enabled && loaded.files.length > 1;
|
|
2333
|
-
options.debug.emit("batch.progress.start", {
|
|
2334
|
-
enabled: progressEnabled,
|
|
2335
|
-
total: loaded.files.length
|
|
2562
|
+
options.debug.emit("batch.jobs.strategy", {
|
|
2563
|
+
strategy: options.jobsStrategy,
|
|
2564
|
+
jobs: options.jobs
|
|
2336
2565
|
});
|
|
2337
|
-
if (progressEnabled) options.progressReporter.start(loaded.files.length, batchStartedAtMs);
|
|
2338
2566
|
let summary;
|
|
2339
|
-
|
|
2340
|
-
|
|
2341
|
-
|
|
2342
|
-
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
},
|
|
2347
|
-
onFinalizeStart: () => {
|
|
2348
|
-
finalizeStartedAtMs = Date.now();
|
|
2349
|
-
if (progressEnabled) options.progressReporter.startFinalizing();
|
|
2350
|
-
const countElapsedMs = finalizeStartedAtMs - countStartedAtMs;
|
|
2351
|
-
options.debug.emit("batch.stage.timing", {
|
|
2352
|
-
stage: "count",
|
|
2353
|
-
elapsedMs: countElapsedMs
|
|
2354
|
-
});
|
|
2355
|
-
emittedCountTiming = true;
|
|
2356
|
-
},
|
|
2357
|
-
preserveCollectorSegments: options.preserveCollectorSegments
|
|
2567
|
+
let routeSkips = [];
|
|
2568
|
+
if (options.jobsStrategy === "load-only") {
|
|
2569
|
+
const loadStartedAtMs = Date.now();
|
|
2570
|
+
options.debug.emit("batch.load.start", {
|
|
2571
|
+
files: resolved.files.length,
|
|
2572
|
+
jobs: options.jobs,
|
|
2573
|
+
strategy: options.jobsStrategy
|
|
2358
2574
|
});
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
options.debug.emit("batch.
|
|
2575
|
+
const loaded = await loadBatchInputsWithJobs(resolved.files, { jobs: options.jobs });
|
|
2576
|
+
const loadElapsedMs = Date.now() - loadStartedAtMs;
|
|
2577
|
+
options.debug.emit("batch.load.complete", {
|
|
2578
|
+
files: loaded.files.length,
|
|
2579
|
+
skipped: loaded.skipped.length,
|
|
2580
|
+
elapsedMs: loadElapsedMs,
|
|
2581
|
+
strategy: options.jobsStrategy
|
|
2582
|
+
});
|
|
2583
|
+
options.debug.emit("batch.stage.timing", {
|
|
2584
|
+
stage: "load",
|
|
2585
|
+
elapsedMs: loadElapsedMs
|
|
2586
|
+
});
|
|
2587
|
+
const progressEnabled = options.progressReporter.enabled && loaded.files.length > 1;
|
|
2588
|
+
options.debug.emit("batch.progress.start", {
|
|
2362
2589
|
enabled: progressEnabled,
|
|
2363
2590
|
total: loaded.files.length
|
|
2364
2591
|
});
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2592
|
+
if (progressEnabled) options.progressReporter.start(loaded.files.length, batchStartedAtMs);
|
|
2593
|
+
const countStartedAtMs = Date.now();
|
|
2594
|
+
let finalizeStartedAtMs = null;
|
|
2595
|
+
let emittedCountTiming = false;
|
|
2596
|
+
try {
|
|
2597
|
+
summary = await buildBatchSummary(loaded.files, options.section, options.wcOptions, {
|
|
2598
|
+
onFileCounted: (snapshot) => {
|
|
2599
|
+
if (progressEnabled) options.progressReporter.advance(snapshot);
|
|
2600
|
+
},
|
|
2601
|
+
onFinalizeStart: () => {
|
|
2602
|
+
finalizeStartedAtMs = Date.now();
|
|
2603
|
+
if (progressEnabled) options.progressReporter.startFinalizing();
|
|
2604
|
+
const countElapsedMs = finalizeStartedAtMs - countStartedAtMs;
|
|
2605
|
+
options.debug.emit("batch.stage.timing", {
|
|
2606
|
+
stage: "count",
|
|
2607
|
+
elapsedMs: countElapsedMs
|
|
2608
|
+
});
|
|
2609
|
+
emittedCountTiming = true;
|
|
2610
|
+
},
|
|
2611
|
+
preserveCollectorSegments: options.preserveCollectorSegments
|
|
2612
|
+
});
|
|
2613
|
+
} finally {
|
|
2614
|
+
if (progressEnabled) options.progressReporter.finish();
|
|
2615
|
+
options.debug.emit("batch.progress.complete", {
|
|
2616
|
+
enabled: progressEnabled,
|
|
2617
|
+
total: loaded.files.length
|
|
2618
|
+
});
|
|
2619
|
+
}
|
|
2620
|
+
if (!emittedCountTiming) {
|
|
2621
|
+
const countElapsedMs = Date.now() - countStartedAtMs;
|
|
2622
|
+
options.debug.emit("batch.stage.timing", {
|
|
2623
|
+
stage: "count",
|
|
2624
|
+
elapsedMs: countElapsedMs
|
|
2625
|
+
});
|
|
2626
|
+
}
|
|
2627
|
+
const finalizeElapsedMs = finalizeStartedAtMs === null ? 0 : Date.now() - finalizeStartedAtMs;
|
|
2628
|
+
options.debug.emit("batch.stage.timing", {
|
|
2629
|
+
stage: "finalize",
|
|
2630
|
+
elapsedMs: finalizeElapsedMs
|
|
2631
|
+
});
|
|
2632
|
+
routeSkips = loaded.skipped;
|
|
2633
|
+
} else {
|
|
2634
|
+
options.debug.emit("batch.load.start", {
|
|
2635
|
+
files: resolved.files.length,
|
|
2636
|
+
jobs: options.jobs,
|
|
2637
|
+
strategy: options.jobsStrategy
|
|
2638
|
+
});
|
|
2639
|
+
options.debug.emit("batch.load.complete", {
|
|
2640
|
+
files: 0,
|
|
2641
|
+
skipped: 0,
|
|
2642
|
+
elapsedMs: 0,
|
|
2643
|
+
strategy: options.jobsStrategy
|
|
2644
|
+
});
|
|
2645
|
+
options.debug.emit("batch.stage.timing", {
|
|
2646
|
+
stage: "load",
|
|
2647
|
+
elapsedMs: 0
|
|
2648
|
+
});
|
|
2649
|
+
const progressEnabled = options.progressReporter.enabled && resolved.files.length > 1;
|
|
2650
|
+
options.debug.emit("batch.progress.start", {
|
|
2651
|
+
enabled: progressEnabled,
|
|
2652
|
+
total: resolved.files.length
|
|
2653
|
+
});
|
|
2654
|
+
if (progressEnabled) options.progressReporter.start(resolved.files.length, batchStartedAtMs);
|
|
2655
|
+
const countStartedAtMs = Date.now();
|
|
2656
|
+
let finalizeStartedAtMs = null;
|
|
2657
|
+
let emittedCountTiming = false;
|
|
2658
|
+
try {
|
|
2659
|
+
let counted;
|
|
2660
|
+
try {
|
|
2661
|
+
counted = await countBatchInputsWithWorkerJobs(resolved.files, {
|
|
2662
|
+
jobs: options.jobs,
|
|
2663
|
+
section: options.section,
|
|
2664
|
+
wcOptions: options.wcOptions,
|
|
2665
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2666
|
+
onFileProcessed: (snapshot) => {
|
|
2667
|
+
if (progressEnabled) options.progressReporter.advance(snapshot);
|
|
2668
|
+
}
|
|
2669
|
+
});
|
|
2670
|
+
options.debug.emit("batch.jobs.executor", {
|
|
2671
|
+
strategy: options.jobsStrategy,
|
|
2672
|
+
executor: "worker-pool",
|
|
2673
|
+
jobs: options.jobs
|
|
2674
|
+
});
|
|
2675
|
+
} catch (error) {
|
|
2676
|
+
if (!(error instanceof WorkerRouteUnavailableError)) throw error;
|
|
2677
|
+
options.debug.emit("batch.jobs.executor", {
|
|
2678
|
+
strategy: options.jobsStrategy,
|
|
2679
|
+
executor: "async-fallback",
|
|
2680
|
+
reason: error.message,
|
|
2681
|
+
jobs: options.jobs
|
|
2682
|
+
});
|
|
2683
|
+
counted = await countBatchInputsWithJobs(resolved.files, {
|
|
2684
|
+
jobs: options.jobs,
|
|
2685
|
+
section: options.section,
|
|
2686
|
+
wcOptions: options.wcOptions,
|
|
2687
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2688
|
+
onFileProcessed: (snapshot) => {
|
|
2689
|
+
if (progressEnabled) options.progressReporter.advance(snapshot);
|
|
2690
|
+
}
|
|
2691
|
+
});
|
|
2692
|
+
}
|
|
2693
|
+
routeSkips = counted.skipped;
|
|
2694
|
+
summary = finalizeBatchJobsSummary(counted.files, options.section, options.wcOptions, {
|
|
2695
|
+
onFinalizeStart: () => {
|
|
2696
|
+
finalizeStartedAtMs = Date.now();
|
|
2697
|
+
if (progressEnabled) options.progressReporter.startFinalizing();
|
|
2698
|
+
const countElapsedMs = finalizeStartedAtMs - countStartedAtMs;
|
|
2699
|
+
options.debug.emit("batch.stage.timing", {
|
|
2700
|
+
stage: "count",
|
|
2701
|
+
elapsedMs: countElapsedMs
|
|
2702
|
+
});
|
|
2703
|
+
emittedCountTiming = true;
|
|
2704
|
+
},
|
|
2705
|
+
preserveCollectorSegments: options.preserveCollectorSegments
|
|
2706
|
+
});
|
|
2707
|
+
} finally {
|
|
2708
|
+
if (progressEnabled) options.progressReporter.finish();
|
|
2709
|
+
options.debug.emit("batch.progress.complete", {
|
|
2710
|
+
enabled: progressEnabled,
|
|
2711
|
+
total: resolved.files.length
|
|
2712
|
+
});
|
|
2713
|
+
}
|
|
2714
|
+
if (!emittedCountTiming) {
|
|
2715
|
+
const countElapsedMs = Date.now() - countStartedAtMs;
|
|
2716
|
+
options.debug.emit("batch.stage.timing", {
|
|
2717
|
+
stage: "count",
|
|
2718
|
+
elapsedMs: countElapsedMs
|
|
2719
|
+
});
|
|
2720
|
+
}
|
|
2721
|
+
const finalizeElapsedMs = finalizeStartedAtMs === null ? 0 : Date.now() - finalizeStartedAtMs;
|
|
2368
2722
|
options.debug.emit("batch.stage.timing", {
|
|
2369
|
-
stage: "
|
|
2370
|
-
elapsedMs:
|
|
2723
|
+
stage: "finalize",
|
|
2724
|
+
elapsedMs: finalizeElapsedMs
|
|
2371
2725
|
});
|
|
2372
2726
|
}
|
|
2373
|
-
const finalizeElapsedMs = finalizeStartedAtMs === null ? 0 : Date.now() - finalizeStartedAtMs;
|
|
2374
|
-
options.debug.emit("batch.stage.timing", {
|
|
2375
|
-
stage: "finalize",
|
|
2376
|
-
elapsedMs: finalizeElapsedMs
|
|
2377
|
-
});
|
|
2378
2727
|
appendAll(summary.skipped, resolved.skipped);
|
|
2379
|
-
appendAll(summary.skipped,
|
|
2728
|
+
appendAll(summary.skipped, routeSkips);
|
|
2380
2729
|
options.debug.emit("batch.aggregate.complete", {
|
|
2381
2730
|
files: summary.files.length,
|
|
2382
2731
|
skipped: summary.skipped.length,
|
|
@@ -2385,6 +2734,12 @@ async function runBatchCount(options) {
|
|
|
2385
2734
|
return summary;
|
|
2386
2735
|
}
|
|
2387
2736
|
|
|
2737
|
+
//#endregion
|
|
2738
|
+
//#region src/cli/batch/jobs/strategy.ts
|
|
2739
|
+
function resolveBatchJobsStrategy(jobs) {
|
|
2740
|
+
return jobs > 1 ? "load-count" : "load-only";
|
|
2741
|
+
}
|
|
2742
|
+
|
|
2388
2743
|
//#endregion
|
|
2389
2744
|
//#region src/utils/show-singular-or-plural-word.ts
|
|
2390
2745
|
function showSingularOrPluralWord(count, word) {
|
|
@@ -2601,6 +2956,10 @@ function countLongOptionOccurrences(argv, optionName) {
|
|
|
2601
2956
|
function validateSingleRegexOptionUsage(argv) {
|
|
2602
2957
|
if (countLongOptionOccurrences(argv, "--regex") > 1) throw new Error("`--regex` can only be provided once.");
|
|
2603
2958
|
}
|
|
2959
|
+
function validateStandalonePrintJobsLimitUsage(argv) {
|
|
2960
|
+
const tokens = argv.slice(2).filter((token) => token.length > 0);
|
|
2961
|
+
if (tokens.length !== 1 || tokens[0] !== "--print-jobs-limit") throw new Error("`--print-jobs-limit` must be used alone.");
|
|
2962
|
+
}
|
|
2604
2963
|
function resolveBatchScope(argv) {
|
|
2605
2964
|
let scope = "merged";
|
|
2606
2965
|
for (const token of argv) {
|
|
@@ -2709,6 +3068,11 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
|
|
|
2709
3068
|
directoryRegexPattern: options.regex
|
|
2710
3069
|
};
|
|
2711
3070
|
const extensionFilter = buildDirectoryExtensionFilter(options.includeExt, options.excludeExt);
|
|
3071
|
+
const requestedJobs = options.jobs;
|
|
3072
|
+
const jobsLimit = resolveBatchJobsLimit();
|
|
3073
|
+
const jobs = clampRequestedJobs(requestedJobs, jobsLimit);
|
|
3074
|
+
if (requestedJobs > jobsLimit.suggestedMaxJobs) console.error(import_picocolors.default.yellow(formatJobsAdvisoryWarning(requestedJobs, jobs, jobsLimit)));
|
|
3075
|
+
const jobsStrategy = resolveBatchJobsStrategy(jobs);
|
|
2712
3076
|
const debugEnabled = Boolean(options.debug);
|
|
2713
3077
|
const mirrorDebugToTerminal = debugEnabled && (!debug.reportPath || teeEnabled);
|
|
2714
3078
|
const summary = await runBatchCount({
|
|
@@ -2723,7 +3087,9 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
|
|
|
2723
3087
|
enabled: options.format === "standard" && options.progress,
|
|
2724
3088
|
stream: runtime.stderr ?? process.stderr,
|
|
2725
3089
|
clearOnFinish: !(mirrorDebugToTerminal || options.keepProgress)
|
|
2726
|
-
})
|
|
3090
|
+
}),
|
|
3091
|
+
jobs,
|
|
3092
|
+
jobsStrategy
|
|
2727
3093
|
});
|
|
2728
3094
|
const showSkipDiagnostics = debugEnabled && !batchOptions.quietSkips;
|
|
2729
3095
|
debug.emit("batch.skips.policy", {
|
|
@@ -2891,6 +3257,17 @@ async function runCli(argv = process.argv, runtime = {}) {
|
|
|
2891
3257
|
program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number");
|
|
2892
3258
|
configureProgramOptions(program, parseMode);
|
|
2893
3259
|
program.action(async (textTokens, options) => {
|
|
3260
|
+
if (options.printJobsLimit) {
|
|
3261
|
+
try {
|
|
3262
|
+
validateStandalonePrintJobsLimitUsage(argv);
|
|
3263
|
+
} catch (error) {
|
|
3264
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3265
|
+
program.error(import_picocolors.default.red(message));
|
|
3266
|
+
return;
|
|
3267
|
+
}
|
|
3268
|
+
console.log(JSON.stringify(resolveBatchJobsLimit()));
|
|
3269
|
+
return;
|
|
3270
|
+
}
|
|
2894
3271
|
const debugEnabled = Boolean(options.debug);
|
|
2895
3272
|
const debugReportPath = resolveDebugReportPathOption(options.debugReport);
|
|
2896
3273
|
const debugReportEnabled = options.debugReport !== void 0 && options.debugReport !== false;
|