@dev-pi2pie/word-counter 0.1.3 → 0.1.4-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/esm/bin.mjs CHANGED
@@ -4,6 +4,7 @@ import { Command, Option } from "commander";
4
4
  import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
5
5
  import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
+ import os from "node:os";
7
8
  import { readFile, readdir, stat } from "node:fs/promises";
8
9
  import { parseDocument } from "yaml";
9
10
 
@@ -345,8 +346,14 @@ function collectPathValue(value, previous = []) {
345
346
  function collectLatinHintValue(value, previous = []) {
346
347
  return [...previous, value];
347
348
  }
349
+ function parseJobsOption(value) {
350
+ if (!/^\d+$/.test(value)) throw new Error("`--jobs` must be an integer >= 1.");
351
+ const parsed = Number.parseInt(value, 10);
352
+ if (!Number.isSafeInteger(parsed) || parsed < 1) throw new Error("`--jobs` must be an integer >= 1.");
353
+ return parsed;
354
+ }
348
355
  function configureProgramOptions(program, parseMode) {
349
- program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
356
+ program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "concurrent file jobs in batch mode (default: 1; >1 enables worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print suggested max --jobs for current host and exit").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
350
357
  }
351
358
 
352
359
  //#endregion
@@ -421,7 +428,7 @@ var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
421
428
  //#endregion
422
429
  //#region src/cli/program/version-embedded.ts
423
430
  var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
424
- const EMBEDDED_PACKAGE_VERSION = "0.1.3";
431
+ const EMBEDDED_PACKAGE_VERSION = "0.1.4-canary.1";
425
432
 
426
433
  //#endregion
427
434
  //#region src/cli/program/version.ts
@@ -474,6 +481,51 @@ function getFormattedVersionLabel() {
474
481
  return import_picocolors.default.bgBlack(import_picocolors.default.bold(import_picocolors.default.italic(` word-counter ${import_picocolors.default.cyanBright(`ver.${version}`)} `)));
475
482
  }
476
483
 
484
+ //#endregion
485
+ //#region src/cli/batch/jobs/limits.ts
486
+ const DEFAULT_UV_THREADPOOL_SIZE = 4;
487
+ function parsePositiveInteger(value) {
488
+ if (!value) return;
489
+ const parsed = Number.parseInt(value, 10);
490
+ if (!Number.isFinite(parsed) || parsed <= 0) return;
491
+ return parsed;
492
+ }
493
+ function resolveBatchJobsLimit(env = process.env) {
494
+ const cpuLimit = Math.max(1, os.availableParallelism());
495
+ const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
496
+ const ioLimit = Math.max(1, uvThreadpool * 2);
497
+ return {
498
+ suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
499
+ cpuLimit,
500
+ uvThreadpool,
501
+ ioLimit
502
+ };
503
+ }
504
+ function clampRequestedJobs(requestedJobs, limits) {
505
+ return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
506
+ }
507
+ function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
508
+ return [
509
+ `Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
510
+ `Running with --jobs=${effectiveJobs} as a safety cap.`,
511
+ `Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
512
+ ].join(" ");
513
+ }
514
+ function isResourceLimitError(error) {
515
+ if (typeof error !== "object" || error === null) return false;
516
+ const code = "code" in error ? error.code : void 0;
517
+ return code === "EMFILE" || code === "ENFILE";
518
+ }
519
+ function createResourceLimitError(path, error, requestedJobs, limits) {
520
+ const message = error instanceof Error ? error.message : String(error);
521
+ const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
522
+ return new Error([
523
+ `Resource limit reached while processing: ${path} (${code}: ${message}).`,
524
+ `Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
525
+ "Reduce --jobs or raise OS file descriptor limits before retrying."
526
+ ].join(" "));
527
+ }
528
+
477
529
  //#endregion
478
530
  //#region src/utils/append-all.ts
479
531
  function appendAll(target, source) {
@@ -481,1811 +533,2002 @@ function appendAll(target, source) {
481
533
  }
482
534
 
483
535
  //#endregion
484
- //#region src/cli/path/load.ts
485
- function isProbablyBinary(buffer) {
486
- if (buffer.length === 0) return false;
487
- const sampleSize = Math.min(buffer.length, 1024);
488
- let suspicious = 0;
489
- for (let index = 0; index < sampleSize; index += 1) {
490
- const byte = buffer[index] ?? 0;
491
- if (byte === 0) return true;
492
- if (byte === 9 || byte === 10 || byte === 13) continue;
493
- if (byte >= 32 && byte <= 126) continue;
494
- if (byte >= 128) continue;
495
- suspicious += 1;
536
+ //#region src/markdown/toml/arrays.ts
537
+ function ensureArrayContainer(result, key) {
538
+ const existing = result[key];
539
+ if (Array.isArray(existing)) return existing;
540
+ const list = [];
541
+ result[key] = list;
542
+ return list;
543
+ }
544
+ function flattenArrayTables(result) {
545
+ for (const [key, value] of Object.entries(result)) {
546
+ if (!Array.isArray(value)) continue;
547
+ result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
496
548
  }
497
- return suspicious / sampleSize > .3;
498
549
  }
499
- async function loadBatchInputs(filePaths) {
500
- const files = [];
501
- const skipped = [];
502
- for (const filePath of filePaths) {
503
- let buffer;
504
- try {
505
- buffer = await readFile(filePath);
506
- } catch (error) {
507
- const message = error instanceof Error ? error.message : String(error);
508
- skipped.push({
509
- path: filePath,
510
- reason: `not readable: ${message}`
511
- });
550
+
551
+ //#endregion
552
+ //#region src/markdown/toml/keys.ts
553
+ function stripKeyQuotes(key) {
554
+ const trimmed = key.trim();
555
+ if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
556
+ return trimmed;
557
+ }
558
+ function normalizeKeyPath(key) {
559
+ const trimmed = key.trim();
560
+ if (!trimmed) return null;
561
+ if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
562
+ const unquoted = stripKeyQuotes(trimmed);
563
+ return unquoted ? unquoted : null;
564
+ }
565
+ const segments = trimmed.split(".").map((segment) => segment.trim());
566
+ if (segments.some((segment) => !segment)) return null;
567
+ return segments.join(".");
568
+ }
569
+
570
+ //#endregion
571
+ //#region src/markdown/toml/strings.ts
572
+ function stripInlineComment(line) {
573
+ let inString = null;
574
+ let escaped = false;
575
+ for (let i = 0; i < line.length; i += 1) {
576
+ const char = line[i] ?? "";
577
+ if (inString) {
578
+ if (escaped) {
579
+ escaped = false;
580
+ continue;
581
+ }
582
+ if (char === "\\" && inString === "double") {
583
+ escaped = true;
584
+ continue;
585
+ }
586
+ if (inString === "double" && char === "\"") {
587
+ inString = null;
588
+ continue;
589
+ }
590
+ if (inString === "single" && char === "'") {
591
+ inString = null;
592
+ continue;
593
+ }
512
594
  continue;
513
595
  }
514
- if (isProbablyBinary(buffer)) {
515
- skipped.push({
516
- path: filePath,
517
- reason: "binary file"
518
- });
596
+ if (char === "\"") {
597
+ inString = "double";
519
598
  continue;
520
599
  }
521
- files.push({
522
- path: filePath,
523
- content: buffer.toString("utf8")
524
- });
600
+ if (char === "'") {
601
+ inString = "single";
602
+ continue;
603
+ }
604
+ if (char === "#") return line.slice(0, i).trimEnd();
525
605
  }
526
- return {
527
- files,
528
- skipped
529
- };
606
+ return line;
607
+ }
608
+ function unescapeBasic(input) {
609
+ return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
610
+ }
611
+ function parseStringLiteral(value) {
612
+ if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
613
+ if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
614
+ if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
615
+ if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
616
+ return null;
530
617
  }
531
618
 
532
619
  //#endregion
533
- //#region src/cli/path/resolve.ts
534
- async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
535
- let entries;
536
- try {
537
- entries = await readdir(directoryPath, {
538
- withFileTypes: true,
539
- encoding: "utf8"
540
- });
541
- } catch (error) {
542
- const message = error instanceof Error ? error.message : String(error);
543
- skipped.push({
544
- path: directoryPath,
545
- reason: `directory read failed: ${message}`
546
- });
547
- debug.emit("path.resolve.expand.read_failed", {
548
- directory: directoryPath,
549
- reason: `directory read failed: ${message}`
550
- });
551
- return [];
552
- }
553
- const sortedEntries = entries.slice().sort((left, right) => left.name.localeCompare(right.name));
554
- const files = [];
555
- debug.emit("path.resolve.expand.start", {
556
- directory: directoryPath,
557
- entries: sortedEntries.length,
558
- recursive
559
- });
560
- for (const entry of sortedEntries) {
561
- const entryPath = resolve(directoryPath, entry.name);
562
- if (entry.isFile()) {
563
- if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
564
- skipped.push({
565
- path: entryPath,
566
- reason: "extension excluded"
567
- });
568
- debug.emit("path.resolve.filter.excluded", {
569
- path: entryPath,
570
- reason: "extension excluded"
571
- }, { verbosity: "verbose" });
572
- stats.filterExcluded += 1;
620
+ //#region src/markdown/toml/values.ts
621
+ function parsePrimitive(raw) {
622
+ const value = raw.trim();
623
+ if (!value) return null;
624
+ const stringLiteral = parseStringLiteral(value);
625
+ if (stringLiteral !== null) return stringLiteral;
626
+ if (value === "true") return true;
627
+ if (value === "false") return false;
628
+ if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
629
+ if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
630
+ return value;
631
+ }
632
+ function parseArray(raw) {
633
+ const value = raw.trim();
634
+ if (!value.startsWith("[") || !value.endsWith("]")) return null;
635
+ const inner = value.slice(1, -1).trim();
636
+ if (!inner) return [];
637
+ const items = [];
638
+ let current = "";
639
+ let inString = null;
640
+ let escaped = false;
641
+ for (let i = 0; i < inner.length; i += 1) {
642
+ const char = inner[i] ?? "";
643
+ if (inString) {
644
+ current += char;
645
+ if (escaped) {
646
+ escaped = false;
573
647
  continue;
574
648
  }
575
- const relativePath = toDirectoryRelativePath(rootPath, entryPath);
576
- if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
577
- if (recordRegexExcluded(entryPath)) {
578
- debug.emit("path.resolve.regex.excluded", {
579
- path: entryPath,
580
- relativePath,
581
- pattern: regexFilter.sourcePattern,
582
- reason: "regex excluded"
583
- }, { verbosity: "verbose" });
584
- stats.regexExcluded += 1;
585
- }
649
+ if (char === "\\" && inString === "double") {
650
+ escaped = true;
586
651
  continue;
587
652
  }
588
- files.push(entryPath);
589
- stats.directoryIncluded += 1;
590
- debug.emit("path.resolve.expand.include", {
591
- path: entryPath,
592
- source: "directory"
593
- }, { verbosity: "verbose" });
653
+ if (inString === "double" && char === "\"") inString = null;
654
+ else if (inString === "single" && char === "'") inString = null;
594
655
  continue;
595
656
  }
596
- if (!entry.isDirectory() || !recursive) continue;
597
- appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
657
+ if (char === "\"") {
658
+ inString = "double";
659
+ current += char;
660
+ continue;
661
+ }
662
+ if (char === "'") {
663
+ inString = "single";
664
+ current += char;
665
+ continue;
666
+ }
667
+ if (char === ",") {
668
+ const item = parsePrimitive(current);
669
+ if (item === null) return null;
670
+ items.push(item);
671
+ current = "";
672
+ continue;
673
+ }
674
+ current += char;
598
675
  }
599
- debug.emit("path.resolve.expand.complete", {
600
- directory: directoryPath,
601
- files: files.length
602
- });
603
- return files;
676
+ const finalItem = parsePrimitive(current);
677
+ if (finalItem === null) return null;
678
+ items.push(finalItem);
679
+ return items;
604
680
  }
605
- async function resolveBatchFilePaths(pathInputs, options) {
606
- const skipped = [];
607
- const regexExcludedPaths = /* @__PURE__ */ new Set();
608
- const resolvedFiles = /* @__PURE__ */ new Set();
609
- const stats = {
610
- dedupeAccepted: 0,
611
- dedupeDuplicates: 0,
612
- filterExcluded: 0,
613
- regexExcluded: 0,
614
- directoryIncluded: 0
615
- };
616
- const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
617
- let regexFilter;
618
- const debug = options.debug ?? {
619
- enabled: false,
620
- verbosity: "compact",
621
- emit() {},
622
- close: async () => {}
623
- };
624
- debug.emit("path.resolve.inputs", {
625
- inputs: pathInputs.length,
626
- pathMode: options.pathMode,
627
- recursive: options.recursive,
628
- hasRegex: Boolean(options.directoryRegexPattern)
629
- });
630
- const addResolvedFile = (filePath, details) => {
631
- regexExcludedPaths.delete(filePath);
632
- if (resolvedFiles.has(filePath)) {
633
- stats.dedupeDuplicates += 1;
634
- debug.emit("path.resolve.dedupe.duplicate", {
635
- path: filePath,
636
- source: details.source,
637
- input: details.input
638
- }, { verbosity: "verbose" });
639
- return;
681
+ function parseInlineTable(raw) {
682
+ const trimmed = raw.trim();
683
+ if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
684
+ const inner = trimmed.slice(1, -1).trim();
685
+ if (!inner) return {};
686
+ const pairs = [];
687
+ let current = "";
688
+ let inString = null;
689
+ let escaped = false;
690
+ let bracketDepth = 0;
691
+ let braceDepth = 0;
692
+ for (let i = 0; i < inner.length; i += 1) {
693
+ const char = inner[i] ?? "";
694
+ if (inString) {
695
+ current += char;
696
+ if (escaped) {
697
+ escaped = false;
698
+ continue;
699
+ }
700
+ if (char === "\\" && inString === "double") {
701
+ escaped = true;
702
+ continue;
703
+ }
704
+ if (inString === "double" && char === "\"") inString = null;
705
+ else if (inString === "single" && char === "'") inString = null;
706
+ continue;
640
707
  }
641
- resolvedFiles.add(filePath);
642
- stats.dedupeAccepted += 1;
643
- debug.emit("path.resolve.dedupe.accept", {
644
- path: filePath,
645
- source: details.source,
646
- input: details.input
647
- }, { verbosity: "verbose" });
648
- };
649
- const getRegexFilter = () => {
650
- if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
651
- return regexFilter;
652
- };
653
- const recordRegexExcluded = (filePath) => {
654
- if (resolvedFiles.has(filePath)) return false;
655
- regexExcludedPaths.add(filePath);
656
- return true;
657
- };
658
- for (const rawPath of pathInputs) {
659
- const targetPath = resolve(rawPath);
660
- debug.emit("path.resolve.input", {
661
- rawPath,
662
- resolvedPath: targetPath
663
- });
664
- let metadata;
665
- try {
666
- metadata = await stat(targetPath);
667
- } catch (error) {
668
- const message = error instanceof Error ? error.message : String(error);
669
- skipped.push({
670
- path: targetPath,
671
- reason: `not readable: ${message}`
672
- });
673
- debug.emit("path.resolve.skip", {
674
- path: targetPath,
675
- reason: `not readable: ${message}`
676
- });
708
+ if (char === "\"") {
709
+ inString = "double";
710
+ current += char;
677
711
  continue;
678
712
  }
679
- if (metadata.isDirectory() && options.pathMode === "auto") {
680
- const effectiveRegexFilter = getRegexFilter();
681
- debug.emit("path.resolve.root.expand", {
682
- root: targetPath,
683
- recursive: options.recursive,
684
- regex: effectiveRegexFilter.sourcePattern ?? null
685
- });
686
- const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
687
- for (const file of files) addResolvedFile(file, {
688
- source: "directory",
689
- input: targetPath
690
- });
713
+ if (char === "'") {
714
+ inString = "single";
715
+ current += char;
691
716
  continue;
692
717
  }
693
- if (!metadata.isFile()) {
694
- skipped.push({
695
- path: targetPath,
696
- reason: "not a regular file"
697
- });
698
- debug.emit("path.resolve.skip", {
699
- path: targetPath,
700
- reason: "not a regular file"
701
- });
718
+ if (char === "[") {
719
+ bracketDepth += 1;
720
+ current += char;
702
721
  continue;
703
722
  }
704
- addResolvedFile(targetPath, {
705
- source: "direct",
706
- input: targetPath
707
- });
723
+ if (char === "]") {
724
+ if (bracketDepth > 0) bracketDepth -= 1;
725
+ current += char;
726
+ continue;
727
+ }
728
+ if (char === "{") {
729
+ braceDepth += 1;
730
+ current += char;
731
+ continue;
732
+ }
733
+ if (char === "}") {
734
+ if (braceDepth > 0) braceDepth -= 1;
735
+ current += char;
736
+ continue;
737
+ }
738
+ if (char === "," && bracketDepth === 0 && braceDepth === 0) {
739
+ pairs.push(current);
740
+ current = "";
741
+ continue;
742
+ }
743
+ current += char;
708
744
  }
709
- for (const path of regexExcludedPaths) skipped.push({
710
- path,
711
- reason: "regex excluded"
712
- });
713
- const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
714
- debug.emit("path.resolve.filter.summary", {
715
- excluded: stats.filterExcluded + stats.regexExcluded,
716
- extensionExcluded: stats.filterExcluded,
717
- regexExcluded: stats.regexExcluded,
718
- included: stats.directoryIncluded
719
- });
720
- debug.emit("path.resolve.dedupe.summary", {
721
- accepted: stats.dedupeAccepted,
722
- duplicates: stats.dedupeDuplicates
723
- });
724
- debug.emit("path.resolve.complete", {
725
- files: files.length,
726
- skipped: skipped.length,
727
- ordering: "absolute-path-ascending"
728
- });
729
- return {
730
- files,
731
- skipped
732
- };
733
- }
734
-
735
- //#endregion
736
- //#region src/cli/progress/reporter.ts
737
- const PROGRESS_BAR_WIDTH = 20;
738
- const FILLED_BAR_CHAR = "█";
739
- const EMPTY_BAR_CHAR = "░";
740
- function clamp(value, min, max) {
741
- return Math.max(min, Math.min(max, value));
742
- }
743
- function buildProgressBar(completed, total) {
744
- const safeTotal = Math.max(total, 1);
745
- const ratio = clamp(completed / safeTotal, 0, 1);
746
- const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
747
- const empty = PROGRESS_BAR_WIDTH - filled;
748
- return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
749
- }
750
- function formatElapsed(startedAtMs) {
751
- const elapsedMs = Date.now() - startedAtMs;
752
- const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
753
- const minutes = Math.floor(totalSeconds / 60);
754
- const seconds = totalSeconds % 60;
755
- const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
756
- return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
745
+ if (current.trim()) pairs.push(current);
746
+ const output = {};
747
+ for (const pair of pairs) {
748
+ const separatorIndex = pair.indexOf("=");
749
+ if (separatorIndex === -1) return null;
750
+ const key = normalizeKeyPath(pair.slice(0, separatorIndex));
751
+ if (!key) return null;
752
+ const valueRaw = pair.slice(separatorIndex + 1).trim();
753
+ if (!valueRaw) return null;
754
+ if (valueRaw.startsWith("{")) return null;
755
+ const normalized = normalizeValue(valueRaw);
756
+ if (normalized === null) return null;
757
+ if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
758
+ output[key] = normalized;
759
+ }
760
+ return output;
757
761
  }
758
- function buildProgressLine(completed, total, startedAtMs) {
759
- const safeTotal = Math.max(total, 1);
760
- const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
761
- return `Counting files [${buildProgressBar(completed, safeTotal)}] ${`${String(percent).padStart(3, " ")}%`} ${String(completed).padStart(String(safeTotal).length, " ")}/${safeTotal} elapsed ${formatElapsed(startedAtMs)}`;
762
+ function normalizeValue(value) {
763
+ if (!value) return null;
764
+ const trimmed = value.trim();
765
+ if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
766
+ const array = parseArray(trimmed);
767
+ if (array) return array;
768
+ if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
769
+ return parsePrimitive(trimmed);
762
770
  }
763
- function buildFinalizingLine(startedAtMs) {
764
- return `Finalizing aggregate... elapsed ${formatElapsed(startedAtMs)}`;
771
+ function toPlainText(value) {
772
+ if (value == null) return "";
773
+ if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
774
+ return String(value);
765
775
  }
766
- function createBatchProgressReporter(options) {
767
- const enabled = options.enabled;
768
- const isTTY = Boolean(options.stream.isTTY);
769
- const clearOnFinish = options.clearOnFinish ?? true;
770
- let active = false;
771
- let total = 0;
772
- let lastLineLength = 0;
773
- let startedAtMs = 0;
774
- let lastRenderedPercent = -1;
775
- let finalizingStarted = false;
776
- const writeTTYLine = (line) => {
777
- const trailingPadding = lastLineLength > line.length ? " ".repeat(lastLineLength - line.length) : "";
778
- options.stream.write(`\r${line}${trailingPadding}`);
779
- lastLineLength = line.length;
780
- };
781
- const render = (completed) => {
782
- const line = buildProgressLine(completed, total, startedAtMs);
783
- const safeTotal = Math.max(total, 1);
784
- const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
785
- if (!isTTY && percent === lastRenderedPercent && completed < safeTotal) return;
786
- lastRenderedPercent = percent;
787
- if (isTTY) {
788
- writeTTYLine(line);
789
- return;
790
- }
791
- lastLineLength = line.length;
792
- options.stream.write(`${line}\n`);
793
- };
794
- const clearLine = () => {
795
- if (lastLineLength === 0) return;
796
- options.stream.write(`\r${" ".repeat(lastLineLength)}\r`);
797
- lastLineLength = 0;
798
- };
799
- return {
800
- enabled,
801
- start(nextTotal, nextStartedAtMs) {
802
- if (!enabled || nextTotal <= 1) return;
803
- total = nextTotal;
804
- active = true;
805
- startedAtMs = nextStartedAtMs ?? Date.now();
806
- lastRenderedPercent = -1;
807
- finalizingStarted = false;
808
- render(0);
809
- },
810
- advance(snapshot) {
811
- if (!active) return;
812
- render(snapshot.completed);
813
- },
814
- startFinalizing() {
815
- if (!active || finalizingStarted) return;
816
- finalizingStarted = true;
817
- const line = buildFinalizingLine(startedAtMs);
818
- if (isTTY) {
819
- if (!clearOnFinish) {
820
- options.stream.write(`\n${line}`);
821
- lastLineLength = line.length;
822
- return;
776
+
777
+ //#endregion
778
+ //#region src/markdown/toml/parse-frontmatter.ts
779
+ function parseTomlFrontmatter(frontmatter) {
780
+ const result = {};
781
+ const lines = frontmatter.split("\n");
782
+ let tablePrefix = "";
783
+ let tableTarget = null;
784
+ let tablePrefixInList = false;
785
+ for (let index = 0; index < lines.length; index += 1) {
786
+ const rawLine = lines[index] ?? "";
787
+ const trimmedLine = rawLine.trim();
788
+ if (!trimmedLine || trimmedLine.startsWith("#")) continue;
789
+ if (trimmedLine.startsWith("[[")) {
790
+ const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
791
+ if (!match) return null;
792
+ const normalizedTable = normalizeKeyPath(match[1] ?? "");
793
+ if (!normalizedTable) return null;
794
+ const list = ensureArrayContainer(result, normalizedTable);
795
+ const newEntry = {};
796
+ list.push(newEntry);
797
+ tableTarget = newEntry;
798
+ tablePrefix = normalizedTable;
799
+ tablePrefixInList = true;
800
+ continue;
801
+ }
802
+ const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
803
+ if (tableMatch) {
804
+ const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
805
+ if (!normalizedTable) return null;
806
+ tablePrefix = normalizedTable;
807
+ tablePrefixInList = false;
808
+ tableTarget = null;
809
+ continue;
810
+ }
811
+ const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
812
+ const separatorIndex = lineForParsing.indexOf("=");
813
+ if (separatorIndex === -1) return null;
814
+ const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
815
+ let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
816
+ if (!key) return null;
817
+ const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
818
+ if (tripleDelimiter) {
819
+ const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
820
+ if (closingIndex !== -1) {
821
+ const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
822
+ valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
823
+ } else {
824
+ const delimiter = tripleDelimiter;
825
+ let combined = valueRaw;
826
+ let closed = false;
827
+ while (index + 1 < lines.length) {
828
+ index += 1;
829
+ const nextLine = lines[index] ?? "";
830
+ combined += `\n${nextLine}`;
831
+ if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
832
+ closed = true;
833
+ break;
834
+ }
823
835
  }
824
- writeTTYLine(line);
825
- return;
836
+ if (!closed) return null;
837
+ valueRaw = combined;
826
838
  }
827
- lastLineLength = line.length;
828
- options.stream.write(`${line}\n`);
829
- },
830
- finish() {
831
- if (!active) return;
832
- if (isTTY) if (clearOnFinish) clearLine();
833
- else options.stream.write("\n");
834
- active = false;
835
839
  }
836
- };
840
+ const normalized = normalizeValue(valueRaw);
841
+ if (normalized === null) return null;
842
+ const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
843
+ if (typeof normalized === "object" && !Array.isArray(normalized)) {
844
+ for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
845
+ const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
846
+ if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
847
+ else result[entryKey] = toPlainText(inlineValue);
848
+ }
849
+ continue;
850
+ }
851
+ if (tablePrefixInList && tableTarget) {
852
+ tableTarget[key] = toPlainText(normalized);
853
+ continue;
854
+ }
855
+ result[fullKey] = toPlainText(normalized);
856
+ }
857
+ flattenArrayTables(result);
858
+ return result;
837
859
  }
838
860
 
839
861
  //#endregion
840
- //#region src/markdown/toml/arrays.ts
841
- function ensureArrayContainer(result, key) {
842
- const existing = result[key];
843
- if (Array.isArray(existing)) return existing;
844
- const list = [];
845
- result[key] = list;
846
- return list;
862
+ //#region src/markdown/parse-markdown.ts
863
+ const FENCE_TO_TYPE = {
864
+ "---": "yaml",
865
+ "+++": "toml",
866
+ ";;;": "json"
867
+ };
868
+ function normalizeNewlines(input) {
869
+ return input.replace(/\r\n/g, "\n");
847
870
  }
848
- function flattenArrayTables(result) {
849
- for (const [key, value] of Object.entries(result)) {
850
- if (!Array.isArray(value)) continue;
851
- result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
852
- }
871
+ function stripBom(line) {
872
+ return line.startsWith("") ? line.slice(1) : line;
853
873
  }
854
-
855
- //#endregion
856
- //#region src/markdown/toml/keys.ts
857
- function stripKeyQuotes(key) {
858
- const trimmed = key.trim();
859
- if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
860
- return trimmed;
874
+ function getFenceType(line) {
875
+ const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
876
+ if (!match) return null;
877
+ return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
861
878
  }
862
- function normalizeKeyPath(key) {
863
- const trimmed = key.trim();
864
- if (!trimmed) return null;
865
- if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
866
- const unquoted = stripKeyQuotes(trimmed);
867
- return unquoted ? unquoted : null;
879
+ function parseFrontmatter(frontmatter, type) {
880
+ if (!type) return null;
881
+ if (type === "json") try {
882
+ return JSON.parse(frontmatter);
883
+ } catch {
884
+ return null;
868
885
  }
869
- const segments = trimmed.split(".").map((segment) => segment.trim());
870
- if (segments.some((segment) => !segment)) return null;
871
- return segments.join(".");
886
+ if (type === "yaml") {
887
+ const doc = parseDocument(frontmatter, { prettyErrors: false });
888
+ if (doc.errors.length > 0) return null;
889
+ const data = doc.toJSON();
890
+ if (!data || typeof data !== "object" || Array.isArray(data)) return null;
891
+ return data;
892
+ }
893
+ if (type === "toml") return parseTomlFrontmatter(frontmatter);
894
+ return null;
872
895
  }
873
-
874
- //#endregion
875
- //#region src/markdown/toml/strings.ts
876
- function stripInlineComment(line) {
877
- let inString = null;
896
+ function extractJsonBlock(text, startIndex) {
897
+ let depth = 0;
898
+ let inString = false;
878
899
  let escaped = false;
879
- for (let i = 0; i < line.length; i += 1) {
880
- const char = line[i] ?? "";
900
+ for (let i = startIndex; i < text.length; i += 1) {
901
+ const char = text[i] ?? "";
881
902
  if (inString) {
882
903
  if (escaped) {
883
904
  escaped = false;
884
905
  continue;
885
906
  }
886
- if (char === "\\" && inString === "double") {
907
+ if (char === "\\") {
887
908
  escaped = true;
888
909
  continue;
889
910
  }
890
- if (inString === "double" && char === "\"") {
891
- inString = null;
892
- continue;
893
- }
894
- if (inString === "single" && char === "'") {
895
- inString = null;
896
- continue;
897
- }
911
+ if (char === "\"") inString = false;
898
912
  continue;
899
913
  }
900
914
  if (char === "\"") {
901
- inString = "double";
915
+ inString = true;
902
916
  continue;
903
917
  }
904
- if (char === "'") {
905
- inString = "single";
918
+ if (char === "{") {
919
+ depth += 1;
906
920
  continue;
907
921
  }
908
- if (char === "#") return line.slice(0, i).trimEnd();
922
+ if (char === "}") {
923
+ depth -= 1;
924
+ if (depth === 0) return {
925
+ jsonText: text.slice(startIndex, i + 1),
926
+ endIndex: i
927
+ };
928
+ }
909
929
  }
910
- return line;
911
- }
912
- function unescapeBasic(input) {
913
- return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
914
- }
915
- function parseStringLiteral(value) {
916
- if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
917
- if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
918
- if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
919
- if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
920
930
  return null;
921
931
  }
922
-
923
- //#endregion
924
- //#region src/markdown/toml/values.ts
925
- function parsePrimitive(raw) {
926
- const value = raw.trim();
927
- if (!value) return null;
928
- const stringLiteral = parseStringLiteral(value);
929
- if (stringLiteral !== null) return stringLiteral;
930
- if (value === "true") return true;
931
- if (value === "false") return false;
932
- if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
933
- if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
934
- return value;
935
- }
936
- function parseArray(raw) {
937
- const value = raw.trim();
938
- if (!value.startsWith("[") || !value.endsWith("]")) return null;
939
- const inner = value.slice(1, -1).trim();
940
- if (!inner) return [];
941
- const items = [];
942
- let current = "";
943
- let inString = null;
944
- let escaped = false;
945
- for (let i = 0; i < inner.length; i += 1) {
946
- const char = inner[i] ?? "";
947
- if (inString) {
948
- current += char;
949
- if (escaped) {
950
- escaped = false;
951
- continue;
952
- }
953
- if (char === "\\" && inString === "double") {
954
- escaped = true;
955
- continue;
956
- }
957
- if (inString === "double" && char === "\"") inString = null;
958
- else if (inString === "single" && char === "'") inString = null;
959
- continue;
960
- }
961
- if (char === "\"") {
962
- inString = "double";
963
- current += char;
964
- continue;
965
- }
966
- if (char === "'") {
967
- inString = "single";
968
- current += char;
969
- continue;
970
- }
971
- if (char === ",") {
972
- const item = parsePrimitive(current);
973
- if (item === null) return null;
974
- items.push(item);
975
- current = "";
976
- continue;
977
- }
978
- current += char;
932
+ function parseMarkdown(input) {
933
+ const normalized = normalizeNewlines(input);
934
+ const lines = normalized.split("\n");
935
+ if (lines.length === 0) return {
936
+ frontmatter: null,
937
+ content: normalized,
938
+ data: null,
939
+ frontmatterType: null
940
+ };
941
+ lines[0] = stripBom(lines[0] ?? "");
942
+ const normalizedWithoutBom = lines.join("\n");
943
+ const openingType = getFenceType(lines[0] ?? "");
944
+ if (!openingType) {
945
+ const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
946
+ if (normalizedWithoutBom[jsonStart] !== "{") return {
947
+ frontmatter: null,
948
+ content: normalizedWithoutBom,
949
+ data: null,
950
+ frontmatterType: null
951
+ };
952
+ const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
953
+ if (!jsonBlock) return {
954
+ frontmatter: null,
955
+ content: normalizedWithoutBom,
956
+ data: null,
957
+ frontmatterType: null
958
+ };
959
+ const frontmatter = jsonBlock.jsonText;
960
+ let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
961
+ if (content.startsWith("\n")) content = content.slice(1);
962
+ const data = parseFrontmatter(frontmatter, "json");
963
+ if (!data) return {
964
+ frontmatter: null,
965
+ content: normalizedWithoutBom,
966
+ data: null,
967
+ frontmatterType: null
968
+ };
969
+ return {
970
+ frontmatter,
971
+ content,
972
+ data,
973
+ frontmatterType: "json"
974
+ };
979
975
  }
980
- const finalItem = parsePrimitive(current);
981
- if (finalItem === null) return null;
982
- items.push(finalItem);
983
- return items;
976
+ let closingIndex = -1;
977
+ for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
978
+ closingIndex = i;
979
+ break;
980
+ }
981
+ if (closingIndex === -1) return {
982
+ frontmatter: null,
983
+ content: normalizedWithoutBom,
984
+ data: null,
985
+ frontmatterType: null
986
+ };
987
+ const frontmatter = lines.slice(1, closingIndex).join("\n");
988
+ return {
989
+ frontmatter,
990
+ content: lines.slice(closingIndex + 1).join("\n"),
991
+ data: parseFrontmatter(frontmatter, openingType),
992
+ frontmatterType: openingType
993
+ };
984
994
  }
985
- function parseInlineTable(raw) {
986
- const trimmed = raw.trim();
987
- if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
988
- const inner = trimmed.slice(1, -1).trim();
989
- if (!inner) return {};
990
- const pairs = [];
991
- let current = "";
992
- let inString = null;
993
- let escaped = false;
994
- let bracketDepth = 0;
995
- let braceDepth = 0;
996
- for (let i = 0; i < inner.length; i += 1) {
997
- const char = inner[i] ?? "";
998
- if (inString) {
999
- current += char;
1000
- if (escaped) {
1001
- escaped = false;
1002
- continue;
1003
- }
1004
- if (char === "\\" && inString === "double") {
1005
- escaped = true;
1006
- continue;
1007
- }
1008
- if (inString === "double" && char === "\"") inString = null;
1009
- else if (inString === "single" && char === "'") inString = null;
1010
- continue;
1011
- }
1012
- if (char === "\"") {
1013
- inString = "double";
1014
- current += char;
1015
- continue;
1016
- }
1017
- if (char === "'") {
1018
- inString = "single";
1019
- current += char;
1020
- continue;
1021
- }
1022
- if (char === "[") {
1023
- bracketDepth += 1;
1024
- current += char;
1025
- continue;
995
+
996
+ //#endregion
997
+ //#region src/wc/segmenter.ts
998
+ const segmenterCache = /* @__PURE__ */ new Map();
999
+ const graphemeSegmenterCache = /* @__PURE__ */ new Map();
1000
+ function getSegmenter(locale) {
1001
+ const cached = segmenterCache.get(locale);
1002
+ if (cached) return cached;
1003
+ const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
1004
+ segmenterCache.set(locale, segmenter);
1005
+ return segmenter;
1006
+ }
1007
+ function getGraphemeSegmenter(locale) {
1008
+ const cached = graphemeSegmenterCache.get(locale);
1009
+ if (cached) return cached;
1010
+ const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
1011
+ graphemeSegmenterCache.set(locale, segmenter);
1012
+ return segmenter;
1013
+ }
1014
+ function supportsSegmenter() {
1015
+ return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
1016
+ }
1017
+ function countCharsForLocale(text, locale) {
1018
+ if (!supportsSegmenter()) return Array.from(text).length;
1019
+ const segmenter = getGraphemeSegmenter(locale);
1020
+ let count = 0;
1021
+ for (const _segment of segmenter.segment(text)) count++;
1022
+ return count;
1023
+ }
1024
+
1025
+ //#endregion
1026
+ //#region src/wc/non-words.ts
1027
+ const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
1028
+ const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
1029
+ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
1030
+ const symbolRegex = /\p{S}/u;
1031
+ const punctuationRegex = /\p{P}/u;
1032
+ const whitespaceRegex = /\s/u;
1033
+ const newlineChars = new Set([
1034
+ "\n",
1035
+ "\r",
1036
+ "\u2028",
1037
+ "\u2029"
1038
+ ]);
1039
+ function createNonWordCollection() {
1040
+ return {
1041
+ emoji: [],
1042
+ symbols: [],
1043
+ punctuation: [],
1044
+ counts: {
1045
+ emoji: 0,
1046
+ symbols: 0,
1047
+ punctuation: 0
1026
1048
  }
1027
- if (char === "]") {
1028
- if (bracketDepth > 0) bracketDepth -= 1;
1029
- current += char;
1049
+ };
1050
+ }
1051
+ function addNonWord(collection, category, segment) {
1052
+ if (category === "emoji") {
1053
+ collection.emoji.push(segment);
1054
+ collection.counts.emoji += 1;
1055
+ return;
1056
+ }
1057
+ if (category === "symbol") {
1058
+ collection.symbols.push(segment);
1059
+ collection.counts.symbols += 1;
1060
+ return;
1061
+ }
1062
+ collection.punctuation.push(segment);
1063
+ collection.counts.punctuation += 1;
1064
+ }
1065
+ function addWhitespace(collection, segment) {
1066
+ let whitespace = collection.whitespace;
1067
+ let count = 0;
1068
+ for (const char of segment) {
1069
+ if (char === " ") {
1070
+ whitespace = whitespace ?? createWhitespaceCounts();
1071
+ whitespace.spaces += 1;
1072
+ count += 1;
1030
1073
  continue;
1031
1074
  }
1032
- if (char === "{") {
1033
- braceDepth += 1;
1034
- current += char;
1075
+ if (char === " ") {
1076
+ whitespace = whitespace ?? createWhitespaceCounts();
1077
+ whitespace.tabs += 1;
1078
+ count += 1;
1035
1079
  continue;
1036
1080
  }
1037
- if (char === "}") {
1038
- if (braceDepth > 0) braceDepth -= 1;
1039
- current += char;
1081
+ if (newlineChars.has(char)) {
1082
+ whitespace = whitespace ?? createWhitespaceCounts();
1083
+ whitespace.newlines += 1;
1084
+ count += 1;
1040
1085
  continue;
1041
1086
  }
1042
- if (char === "," && bracketDepth === 0 && braceDepth === 0) {
1043
- pairs.push(current);
1044
- current = "";
1045
- continue;
1087
+ if (whitespaceRegex.test(char)) {
1088
+ whitespace = whitespace ?? createWhitespaceCounts();
1089
+ whitespace.other += 1;
1090
+ count += 1;
1046
1091
  }
1047
- current += char;
1048
1092
  }
1049
- if (current.trim()) pairs.push(current);
1050
- const output = {};
1051
- for (const pair of pairs) {
1052
- const separatorIndex = pair.indexOf("=");
1053
- if (separatorIndex === -1) return null;
1054
- const key = normalizeKeyPath(pair.slice(0, separatorIndex));
1055
- if (!key) return null;
1056
- const valueRaw = pair.slice(separatorIndex + 1).trim();
1057
- if (!valueRaw) return null;
1058
- if (valueRaw.startsWith("{")) return null;
1059
- const normalized = normalizeValue(valueRaw);
1060
- if (normalized === null) return null;
1061
- if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
1062
- output[key] = normalized;
1093
+ if (count > 0) {
1094
+ collection.whitespace = whitespace ?? createWhitespaceCounts();
1095
+ collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
1063
1096
  }
1064
- return output;
1065
- }
1066
- function normalizeValue(value) {
1067
- if (!value) return null;
1068
- const trimmed = value.trim();
1069
- if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
1070
- const array = parseArray(trimmed);
1071
- if (array) return array;
1072
- if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
1073
- return parsePrimitive(trimmed);
1097
+ return count;
1074
1098
  }
1075
- function toPlainText(value) {
1076
- if (value == null) return "";
1077
- if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
1078
- return String(value);
1099
+ function classifyNonWordSegment(segment) {
1100
+ const hasEmojiVariationSelector = segment.includes("");
1101
+ if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
1102
+ if (symbolRegex.test(segment)) return "symbol";
1103
+ if (punctuationRegex.test(segment)) return "punctuation";
1104
+ return null;
1105
+ }
1106
+ function mergeNonWordCollections(target, source) {
1107
+ if (source.counts.emoji > 0) {
1108
+ appendAll(target.emoji, source.emoji);
1109
+ target.counts.emoji += source.counts.emoji;
1110
+ }
1111
+ if (source.counts.symbols > 0) {
1112
+ appendAll(target.symbols, source.symbols);
1113
+ target.counts.symbols += source.counts.symbols;
1114
+ }
1115
+ if (source.counts.punctuation > 0) {
1116
+ appendAll(target.punctuation, source.punctuation);
1117
+ target.counts.punctuation += source.counts.punctuation;
1118
+ }
1119
+ if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
1120
+ const whitespace = target.whitespace ?? createWhitespaceCounts();
1121
+ whitespace.spaces += source.whitespace.spaces;
1122
+ whitespace.tabs += source.whitespace.tabs;
1123
+ whitespace.newlines += source.whitespace.newlines;
1124
+ whitespace.other += source.whitespace.other;
1125
+ target.whitespace = whitespace;
1126
+ target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
1127
+ }
1128
+ return target;
1129
+ }
1130
+ function createWhitespaceCounts() {
1131
+ return {
1132
+ spaces: 0,
1133
+ tabs: 0,
1134
+ newlines: 0,
1135
+ other: 0
1136
+ };
1079
1137
  }
1080
1138
 
1081
1139
  //#endregion
1082
- //#region src/markdown/toml/parse-frontmatter.ts
1083
- function parseTomlFrontmatter(frontmatter) {
1084
- const result = {};
1085
- const lines = frontmatter.split("\n");
1086
- let tablePrefix = "";
1087
- let tableTarget = null;
1088
- let tablePrefixInList = false;
1089
- for (let index = 0; index < lines.length; index += 1) {
1090
- const rawLine = lines[index] ?? "";
1091
- const trimmedLine = rawLine.trim();
1092
- if (!trimmedLine || trimmedLine.startsWith("#")) continue;
1093
- if (trimmedLine.startsWith("[[")) {
1094
- const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
1095
- if (!match) return null;
1096
- const normalizedTable = normalizeKeyPath(match[1] ?? "");
1097
- if (!normalizedTable) return null;
1098
- const list = ensureArrayContainer(result, normalizedTable);
1099
- const newEntry = {};
1100
- list.push(newEntry);
1101
- tableTarget = newEntry;
1102
- tablePrefix = normalizedTable;
1103
- tablePrefixInList = true;
1104
- continue;
1105
- }
1106
- const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
1107
- if (tableMatch) {
1108
- const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
1109
- if (!normalizedTable) return null;
1110
- tablePrefix = normalizedTable;
1111
- tablePrefixInList = false;
1112
- tableTarget = null;
1140
+ //#region src/wc/analyze.ts
1141
+ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
1142
+ const segmenter = getSegmenter(chunk.locale);
1143
+ const segments = [];
1144
+ const nonWords = collectNonWords ? createNonWordCollection() : null;
1145
+ for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
1146
+ else if (collectNonWords && nonWords) {
1147
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
1148
+ const category = classifyNonWordSegment(part.segment);
1149
+ if (category) addNonWord(nonWords, category, part.segment);
1150
+ }
1151
+ return {
1152
+ locale: chunk.locale,
1153
+ text: chunk.text,
1154
+ segments,
1155
+ words: segments.length,
1156
+ nonWords: nonWords ?? void 0
1157
+ };
1158
+ }
1159
+ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
1160
+ const segmenter = getSegmenter(chunk.locale);
1161
+ const nonWords = collectNonWords ? createNonWordCollection() : null;
1162
+ let chars = 0;
1163
+ let wordChars = 0;
1164
+ let nonWordChars = 0;
1165
+ for (const part of segmenter.segment(chunk.text)) {
1166
+ if (part.isWordLike) {
1167
+ const count = countCharsForLocale(part.segment, chunk.locale);
1168
+ chars += count;
1169
+ wordChars += count;
1113
1170
  continue;
1114
1171
  }
1115
- const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
1116
- const separatorIndex = lineForParsing.indexOf("=");
1117
- if (separatorIndex === -1) return null;
1118
- const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
1119
- let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
1120
- if (!key) return null;
1121
- const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
1122
- if (tripleDelimiter) {
1123
- const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
1124
- if (closingIndex !== -1) {
1125
- const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
1126
- valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
1127
- } else {
1128
- const delimiter = tripleDelimiter;
1129
- let combined = valueRaw;
1130
- let closed = false;
1131
- while (index + 1 < lines.length) {
1132
- index += 1;
1133
- const nextLine = lines[index] ?? "";
1134
- combined += `\n${nextLine}`;
1135
- if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
1136
- closed = true;
1137
- break;
1138
- }
1139
- }
1140
- if (!closed) return null;
1141
- valueRaw = combined;
1172
+ if (collectNonWords && nonWords) {
1173
+ let whitespaceCount = 0;
1174
+ if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
1175
+ const category = classifyNonWordSegment(part.segment);
1176
+ if (category) addNonWord(nonWords, category, part.segment);
1177
+ if (category || whitespaceCount > 0) {
1178
+ const count = countCharsForLocale(part.segment, chunk.locale);
1179
+ chars += count;
1180
+ nonWordChars += count;
1142
1181
  }
1143
1182
  }
1144
- const normalized = normalizeValue(valueRaw);
1145
- if (normalized === null) return null;
1146
- const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
1147
- if (typeof normalized === "object" && !Array.isArray(normalized)) {
1148
- for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
1149
- const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
1150
- if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
1151
- else result[entryKey] = toPlainText(inlineValue);
1183
+ }
1184
+ return {
1185
+ locale: chunk.locale,
1186
+ text: chunk.text,
1187
+ chars,
1188
+ wordChars,
1189
+ nonWordChars,
1190
+ nonWords: nonWords ?? void 0
1191
+ };
1192
+ }
1193
+ function aggregateCharsByLocale(chunks) {
1194
+ const order = [];
1195
+ const map = /* @__PURE__ */ new Map();
1196
+ for (const chunk of chunks) {
1197
+ const existing = map.get(chunk.locale);
1198
+ if (existing) {
1199
+ existing.chars += chunk.chars;
1200
+ existing.wordChars += chunk.wordChars;
1201
+ existing.nonWordChars += chunk.nonWordChars;
1202
+ if (chunk.nonWords) {
1203
+ if (!existing.nonWords) existing.nonWords = createNonWordCollection();
1204
+ mergeNonWordCollections(existing.nonWords, chunk.nonWords);
1152
1205
  }
1153
1206
  continue;
1154
1207
  }
1155
- if (tablePrefixInList && tableTarget) {
1156
- tableTarget[key] = toPlainText(normalized);
1208
+ order.push(chunk.locale);
1209
+ map.set(chunk.locale, {
1210
+ locale: chunk.locale,
1211
+ chars: chunk.chars,
1212
+ wordChars: chunk.wordChars,
1213
+ nonWordChars: chunk.nonWordChars,
1214
+ nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
1215
+ });
1216
+ }
1217
+ return order.map((locale) => map.get(locale));
1218
+ }
1219
+ function aggregateByLocale(chunks) {
1220
+ const order = [];
1221
+ const map = /* @__PURE__ */ new Map();
1222
+ for (const chunk of chunks) {
1223
+ const existing = map.get(chunk.locale);
1224
+ if (existing) {
1225
+ existing.words += chunk.words;
1226
+ appendAll(existing.segments, chunk.segments);
1157
1227
  continue;
1158
1228
  }
1159
- result[fullKey] = toPlainText(normalized);
1229
+ order.push(chunk.locale);
1230
+ map.set(chunk.locale, {
1231
+ locale: chunk.locale,
1232
+ words: chunk.words,
1233
+ segments: [...chunk.segments]
1234
+ });
1160
1235
  }
1161
- flattenArrayTables(result);
1162
- return result;
1236
+ return order.map((locale) => map.get(locale));
1163
1237
  }
1164
1238
 
1165
1239
  //#endregion
1166
- //#region src/markdown/parse-markdown.ts
1167
- const FENCE_TO_TYPE = {
1168
- "---": "yaml",
1169
- "+++": "toml",
1170
- ";;;": "json"
1240
+ //#region src/wc/mode.ts
1241
+ const MODE_ALIASES = {
1242
+ chunk: "chunk",
1243
+ chunks: "chunk",
1244
+ segments: "segments",
1245
+ segment: "segments",
1246
+ seg: "segments",
1247
+ collector: "collector",
1248
+ collect: "collector",
1249
+ colle: "collector",
1250
+ char: "char",
1251
+ chars: "char",
1252
+ character: "char",
1253
+ characters: "char",
1254
+ "char-collector": "char-collector"
1171
1255
  };
1172
- function normalizeNewlines(input) {
1173
- return input.replace(/\r\n/g, "\n");
1174
- }
1175
- function stripBom(line) {
1176
- return line.startsWith("") ? line.slice(1) : line;
1256
+ const CHAR_MODE_ALIASES = new Set([
1257
+ "char",
1258
+ "chars",
1259
+ "character",
1260
+ "characters"
1261
+ ]);
1262
+ const COLLECTOR_MODE_ALIASES = new Set([
1263
+ "collector",
1264
+ "collect",
1265
+ "colle",
1266
+ "col"
1267
+ ]);
1268
+ function collapseSeparators(value) {
1269
+ return value.replace(/[-_\s]+/g, "");
1177
1270
  }
1178
- function getFenceType(line) {
1179
- const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
1180
- if (!match) return null;
1181
- return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
1271
+ function isComposedCharCollectorFromTokens(value) {
1272
+ const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
1273
+ if (tokens.length < 2) return false;
1274
+ let hasCharAlias = false;
1275
+ let hasCollectorAlias = false;
1276
+ for (const token of tokens) {
1277
+ if (CHAR_MODE_ALIASES.has(token)) {
1278
+ hasCharAlias = true;
1279
+ continue;
1280
+ }
1281
+ if (COLLECTOR_MODE_ALIASES.has(token)) {
1282
+ hasCollectorAlias = true;
1283
+ continue;
1284
+ }
1285
+ return false;
1286
+ }
1287
+ return hasCharAlias && hasCollectorAlias;
1182
1288
  }
1183
- function parseFrontmatter(frontmatter, type) {
1184
- if (!type) return null;
1185
- if (type === "json") try {
1186
- return JSON.parse(frontmatter);
1187
- } catch {
1188
- return null;
1189
- }
1190
- if (type === "yaml") {
1191
- const doc = parseDocument(frontmatter, { prettyErrors: false });
1192
- if (doc.errors.length > 0) return null;
1193
- const data = doc.toJSON();
1194
- if (!data || typeof data !== "object" || Array.isArray(data)) return null;
1195
- return data;
1196
- }
1197
- if (type === "toml") return parseTomlFrontmatter(frontmatter);
1198
- return null;
1289
+ function isComposedCharCollectorCompact(value) {
1290
+ for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
1291
+ return false;
1199
1292
  }
1200
- function extractJsonBlock(text, startIndex) {
1201
- let depth = 0;
1202
- let inString = false;
1203
- let escaped = false;
1204
- for (let i = startIndex; i < text.length; i += 1) {
1205
- const char = text[i] ?? "";
1206
- if (inString) {
1207
- if (escaped) {
1208
- escaped = false;
1209
- continue;
1210
- }
1211
- if (char === "\\") {
1212
- escaped = true;
1213
- continue;
1214
- }
1215
- if (char === "\"") inString = false;
1216
- continue;
1217
- }
1218
- if (char === "\"") {
1219
- inString = true;
1220
- continue;
1221
- }
1222
- if (char === "{") {
1223
- depth += 1;
1224
- continue;
1225
- }
1226
- if (char === "}") {
1227
- depth -= 1;
1228
- if (depth === 0) return {
1229
- jsonText: text.slice(startIndex, i + 1),
1230
- endIndex: i
1231
- };
1232
- }
1233
- }
1234
- return null;
1293
+ function normalizeMode(input) {
1294
+ if (!input) return null;
1295
+ const normalized = input.trim().toLowerCase();
1296
+ const direct = MODE_ALIASES[normalized];
1297
+ if (direct) return direct;
1298
+ if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
1299
+ const compact = collapseSeparators(normalized);
1300
+ if (isComposedCharCollectorCompact(compact)) return "char-collector";
1301
+ return MODE_ALIASES[compact] ?? null;
1235
1302
  }
1236
- function parseMarkdown(input) {
1237
- const normalized = normalizeNewlines(input);
1238
- const lines = normalized.split("\n");
1239
- if (lines.length === 0) return {
1240
- frontmatter: null,
1241
- content: normalized,
1242
- data: null,
1243
- frontmatterType: null
1244
- };
1245
- lines[0] = stripBom(lines[0] ?? "");
1246
- const normalizedWithoutBom = lines.join("\n");
1247
- const openingType = getFenceType(lines[0] ?? "");
1248
- if (!openingType) {
1249
- const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
1250
- if (normalizedWithoutBom[jsonStart] !== "{") return {
1251
- frontmatter: null,
1252
- content: normalizedWithoutBom,
1253
- data: null,
1254
- frontmatterType: null
1255
- };
1256
- const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
1257
- if (!jsonBlock) return {
1258
- frontmatter: null,
1259
- content: normalizedWithoutBom,
1260
- data: null,
1261
- frontmatterType: null
1262
- };
1263
- const frontmatter = jsonBlock.jsonText;
1264
- let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
1265
- if (content.startsWith("\n")) content = content.slice(1);
1266
- const data = parseFrontmatter(frontmatter, "json");
1267
- if (!data) return {
1268
- frontmatter: null,
1269
- content: normalizedWithoutBom,
1270
- data: null,
1271
- frontmatterType: null
1272
- };
1273
- return {
1274
- frontmatter,
1275
- content,
1276
- data,
1277
- frontmatterType: "json"
1278
- };
1279
- }
1280
- let closingIndex = -1;
1281
- for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
1282
- closingIndex = i;
1283
- break;
1284
- }
1285
- if (closingIndex === -1) return {
1286
- frontmatter: null,
1287
- content: normalizedWithoutBom,
1288
- data: null,
1289
- frontmatterType: null
1290
- };
1291
- const frontmatter = lines.slice(1, closingIndex).join("\n");
1292
- return {
1293
- frontmatter,
1294
- content: lines.slice(closingIndex + 1).join("\n"),
1295
- data: parseFrontmatter(frontmatter, openingType),
1296
- frontmatterType: openingType
1297
- };
1303
+ function resolveMode(input, fallback = "chunk") {
1304
+ return normalizeMode(input) ?? fallback;
1298
1305
  }
1299
1306
 
1300
1307
  //#endregion
1301
- //#region src/wc/segmenter.ts
1302
- const segmenterCache = /* @__PURE__ */ new Map();
1303
- const graphemeSegmenterCache = /* @__PURE__ */ new Map();
1304
- function getSegmenter(locale) {
1305
- const cached = segmenterCache.get(locale);
1306
- if (cached) return cached;
1307
- const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
1308
- segmenterCache.set(locale, segmenter);
1309
- return segmenter;
1308
+ //#region src/wc/latin-hints.ts
1309
+ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
1310
+ {
1311
+ tag: "de",
1312
+ pattern: "[äöüÄÖÜß]"
1313
+ },
1314
+ {
1315
+ tag: "es",
1316
+ pattern: "[ñÑ¿¡]"
1317
+ },
1318
+ {
1319
+ tag: "pt",
1320
+ pattern: "[ãõÃÕ]"
1321
+ },
1322
+ {
1323
+ tag: "fr",
1324
+ pattern: "[œŒæÆ]"
1325
+ },
1326
+ {
1327
+ tag: "pl",
1328
+ pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
1329
+ },
1330
+ {
1331
+ tag: "tr",
1332
+ pattern: "[ıİğĞşŞ]"
1333
+ },
1334
+ {
1335
+ tag: "ro",
1336
+ pattern: "[ăĂâÂîÎșȘțȚ]"
1337
+ },
1338
+ {
1339
+ tag: "hu",
1340
+ pattern: "[őŐűŰ]"
1341
+ },
1342
+ {
1343
+ tag: "is",
1344
+ pattern: "[ðÐþÞ]"
1345
+ }
1346
+ ];
1347
+ const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
1348
+
1349
+ //#endregion
1350
+ //#region src/wc/locale-detect.ts
1351
+ const DEFAULT_LOCALE = "und-Latn";
1352
+ const DEFAULT_HAN_TAG = "und-Hani";
1353
+ const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
1354
+ const regex = {
1355
+ hiragana: /\p{Script=Hiragana}/u,
1356
+ katakana: /\p{Script=Katakana}/u,
1357
+ hangul: /\p{Script=Hangul}/u,
1358
+ han: /\p{Script=Han}/u,
1359
+ latin: /\p{Script=Latin}/u,
1360
+ arabic: /\p{Script=Arabic}/u,
1361
+ cyrillic: /\p{Script=Cyrillic}/u,
1362
+ devanagari: /\p{Script=Devanagari}/u,
1363
+ thai: /\p{Script=Thai}/u
1364
+ };
1365
+ const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
1366
+ function isLatinLocale(locale, context) {
1367
+ if (context) return context.latinLocales.has(locale);
1368
+ return defaultLatinLocales.has(locale);
1310
1369
  }
1311
- function getGraphemeSegmenter(locale) {
1312
- const cached = graphemeSegmenterCache.get(locale);
1313
- if (cached) return cached;
1314
- const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
1315
- graphemeSegmenterCache.set(locale, segmenter);
1316
- return segmenter;
1370
+ function resolveLatinHint(options) {
1371
+ const latinTagHint = options.latinTagHint?.trim();
1372
+ if (latinTagHint) return latinTagHint;
1373
+ const latinLanguageHint = options.latinLanguageHint?.trim();
1374
+ if (latinLanguageHint) return latinLanguageHint;
1375
+ const latinLocaleHint = options.latinLocaleHint?.trim();
1376
+ if (latinLocaleHint) return latinLocaleHint;
1317
1377
  }
1318
- function supportsSegmenter() {
1319
- return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
1378
+ function resolveHanHint(options) {
1379
+ const hanTagHint = options.hanTagHint?.trim();
1380
+ if (hanTagHint) return hanTagHint;
1381
+ const hanLanguageHint = options.hanLanguageHint?.trim();
1382
+ if (hanLanguageHint) return hanLanguageHint;
1320
1383
  }
1321
- function countCharsForLocale(text, locale) {
1322
- if (!supportsSegmenter()) return Array.from(text).length;
1323
- const segmenter = getGraphemeSegmenter(locale);
1324
- let count = 0;
1325
- for (const _segment of segmenter.segment(text)) count++;
1326
- return count;
1384
+ function compileLatinHintPattern(pattern, label) {
1385
+ const source = typeof pattern === "string" ? pattern : pattern.source;
1386
+ const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
1387
+ const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
1388
+ if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
1389
+ if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
1390
+ try {
1391
+ return new RegExp(source, flags);
1392
+ } catch (error) {
1393
+ const message = error instanceof Error ? error.message : String(error);
1394
+ throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
1395
+ }
1327
1396
  }
1328
-
1329
- //#endregion
1330
- //#region src/wc/non-words.ts
1331
- const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
1332
- const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
1333
- const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
1334
- const symbolRegex = /\p{S}/u;
1335
- const punctuationRegex = /\p{P}/u;
1336
- const whitespaceRegex = /\s/u;
1337
- const newlineChars = new Set([
1338
- "\n",
1339
- "\r",
1340
- "\u2028",
1341
- "\u2029"
1342
- ]);
1343
- function createNonWordCollection() {
1344
- return {
1345
- emoji: [],
1346
- symbols: [],
1347
- punctuation: [],
1348
- counts: {
1349
- emoji: 0,
1350
- symbols: 0,
1351
- punctuation: 0
1352
- }
1397
+ function normalizeLatinHintPriority(priority, label) {
1398
+ if (priority === void 0) return 0;
1399
+ if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
1400
+ return priority;
1401
+ }
1402
+ function compileLatinHintRule(rule, order, label) {
1403
+ const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
1404
+ if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
1405
+ return {
1406
+ tag,
1407
+ pattern: compileLatinHintPattern(rule.pattern, label),
1408
+ priority: normalizeLatinHintPriority(rule.priority, label),
1409
+ order
1353
1410
  };
1354
1411
  }
1355
- function addNonWord(collection, category, segment) {
1356
- if (category === "emoji") {
1357
- collection.emoji.push(segment);
1358
- collection.counts.emoji += 1;
1359
- return;
1412
+ function resolveLatinHintRules$1(options) {
1413
+ const useDefaultLatinHints = options.useDefaultLatinHints !== false;
1414
+ const customRules = options.latinHintRules ?? [];
1415
+ const combinedRules = [];
1416
+ for (let index = 0; index < customRules.length; index += 1) {
1417
+ const rule = customRules[index];
1418
+ if (!rule) continue;
1419
+ combinedRules.push({
1420
+ rule,
1421
+ label: `Invalid custom Latin hint rule at index ${index}`
1422
+ });
1360
1423
  }
1361
- if (category === "symbol") {
1362
- collection.symbols.push(segment);
1363
- collection.counts.symbols += 1;
1364
- return;
1424
+ if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
1425
+ const rule = DEFAULT_LATIN_HINT_RULES[index];
1426
+ if (!rule) continue;
1427
+ combinedRules.push({
1428
+ rule,
1429
+ label: `Invalid default Latin hint rule at index ${index}`
1430
+ });
1365
1431
  }
1366
- collection.punctuation.push(segment);
1367
- collection.counts.punctuation += 1;
1432
+ const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
1433
+ resolvedRules.sort((left, right) => {
1434
+ if (left.priority !== right.priority) return right.priority - left.priority;
1435
+ return left.order - right.order;
1436
+ });
1437
+ return resolvedRules;
1368
1438
  }
1369
- function addWhitespace(collection, segment) {
1370
- let whitespace = collection.whitespace;
1371
- let count = 0;
1372
- for (const char of segment) {
1373
- if (char === " ") {
1374
- whitespace = whitespace ?? createWhitespaceCounts();
1375
- whitespace.spaces += 1;
1376
- count += 1;
1377
- continue;
1439
+ function resolveLocaleDetectContext(options = {}) {
1440
+ const latinHint = resolveLatinHint(options);
1441
+ const latinHintRules = resolveLatinHintRules$1(options);
1442
+ const latinLocales = new Set([DEFAULT_LOCALE]);
1443
+ for (const rule of latinHintRules) latinLocales.add(rule.tag);
1444
+ if (latinHint) latinLocales.add(latinHint);
1445
+ return {
1446
+ latinHint,
1447
+ hanHint: resolveHanHint(options),
1448
+ latinHintRules,
1449
+ latinLocales
1450
+ };
1451
+ }
1452
+ function detectLatinLocale(char, context) {
1453
+ for (const hint of context.latinHintRules) {
1454
+ hint.pattern.lastIndex = 0;
1455
+ if (hint.pattern.test(char)) return hint.tag;
1456
+ }
1457
+ return DEFAULT_LOCALE;
1458
+ }
1459
+ function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
1460
+ if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
1461
+ if (regex.hangul.test(char)) return "ko";
1462
+ if (regex.arabic.test(char)) return "ar";
1463
+ if (regex.cyrillic.test(char)) return "ru";
1464
+ if (regex.devanagari.test(char)) return "hi";
1465
+ if (regex.thai.test(char)) return "th";
1466
+ if (regex.han.test(char)) {
1467
+ if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
1468
+ return context.hanHint ?? DEFAULT_HAN_TAG;
1469
+ }
1470
+ if (regex.latin.test(char)) {
1471
+ const hintedLocale = detectLatinLocale(char, context);
1472
+ if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
1473
+ if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
1474
+ if (context.latinHint) return context.latinHint;
1475
+ return DEFAULT_LOCALE;
1476
+ }
1477
+ return null;
1478
+ }
1479
+
1480
+ //#endregion
1481
+ //#region src/wc/segment.ts
1482
+ const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
1483
+ const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
1484
+ function segmentTextByLocale(text, options = {}) {
1485
+ const context = resolveLocaleDetectContext(options);
1486
+ const chunks = [];
1487
+ let currentLocale = DEFAULT_LOCALE;
1488
+ let buffer = "";
1489
+ let bufferHasScript = false;
1490
+ let sawCarryBoundary = false;
1491
+ const updateCarryBoundaryState = (detected, char) => {
1492
+ if (detected !== null) {
1493
+ sawCarryBoundary = false;
1494
+ return;
1378
1495
  }
1379
- if (char === " ") {
1380
- whitespace = whitespace ?? createWhitespaceCounts();
1381
- whitespace.tabs += 1;
1382
- count += 1;
1496
+ if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
1497
+ };
1498
+ for (const char of text) {
1499
+ const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
1500
+ const targetLocale = detected ?? currentLocale;
1501
+ if (buffer === "") {
1502
+ currentLocale = targetLocale;
1503
+ buffer = char;
1504
+ bufferHasScript = detected !== null;
1505
+ updateCarryBoundaryState(detected, char);
1383
1506
  continue;
1384
1507
  }
1385
- if (newlineChars.has(char)) {
1386
- whitespace = whitespace ?? createWhitespaceCounts();
1387
- whitespace.newlines += 1;
1388
- count += 1;
1508
+ if (detected !== null && !bufferHasScript) {
1509
+ currentLocale = targetLocale;
1510
+ buffer += char;
1511
+ bufferHasScript = true;
1512
+ updateCarryBoundaryState(detected, char);
1389
1513
  continue;
1390
1514
  }
1391
- if (whitespaceRegex.test(char)) {
1392
- whitespace = whitespace ?? createWhitespaceCounts();
1393
- whitespace.other += 1;
1394
- count += 1;
1515
+ if (targetLocale !== currentLocale && detected !== null) {
1516
+ if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
1517
+ const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
1518
+ if (promotionBreakIndex === -1) {
1519
+ currentLocale = targetLocale;
1520
+ buffer += char;
1521
+ bufferHasScript = true;
1522
+ updateCarryBoundaryState(detected, char);
1523
+ continue;
1524
+ }
1525
+ const prefix = buffer.slice(0, promotionBreakIndex + 1);
1526
+ const suffix = buffer.slice(promotionBreakIndex + 1);
1527
+ if (prefix.length > 0) chunks.push({
1528
+ locale: currentLocale,
1529
+ text: prefix
1530
+ });
1531
+ currentLocale = targetLocale;
1532
+ buffer = `${suffix}${char}`;
1533
+ bufferHasScript = true;
1534
+ updateCarryBoundaryState(detected, char);
1535
+ continue;
1536
+ }
1537
+ chunks.push({
1538
+ locale: currentLocale,
1539
+ text: buffer
1540
+ });
1541
+ currentLocale = targetLocale;
1542
+ buffer = char;
1543
+ bufferHasScript = true;
1544
+ updateCarryBoundaryState(detected, char);
1545
+ continue;
1395
1546
  }
1547
+ buffer += char;
1548
+ if (detected !== null) bufferHasScript = true;
1549
+ updateCarryBoundaryState(detected, char);
1396
1550
  }
1397
- if (count > 0) {
1398
- collection.whitespace = whitespace ?? createWhitespaceCounts();
1399
- collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
1400
- }
1401
- return count;
1402
- }
1403
- function classifyNonWordSegment(segment) {
1404
- const hasEmojiVariationSelector = segment.includes("️");
1405
- if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
1406
- if (symbolRegex.test(segment)) return "symbol";
1407
- if (punctuationRegex.test(segment)) return "punctuation";
1408
- return null;
1551
+ if (buffer.length > 0) chunks.push({
1552
+ locale: currentLocale,
1553
+ text: buffer
1554
+ });
1555
+ return mergeAdjacentChunks(chunks);
1409
1556
  }
1410
- function mergeNonWordCollections(target, source) {
1411
- if (source.counts.emoji > 0) {
1412
- appendAll(target.emoji, source.emoji);
1413
- target.counts.emoji += source.counts.emoji;
1414
- }
1415
- if (source.counts.symbols > 0) {
1416
- appendAll(target.symbols, source.symbols);
1417
- target.counts.symbols += source.counts.symbols;
1418
- }
1419
- if (source.counts.punctuation > 0) {
1420
- appendAll(target.punctuation, source.punctuation);
1421
- target.counts.punctuation += source.counts.punctuation;
1422
- }
1423
- if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
1424
- const whitespace = target.whitespace ?? createWhitespaceCounts();
1425
- whitespace.spaces += source.whitespace.spaces;
1426
- whitespace.tabs += source.whitespace.tabs;
1427
- whitespace.newlines += source.whitespace.newlines;
1428
- whitespace.other += source.whitespace.other;
1429
- target.whitespace = whitespace;
1430
- target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
1557
+ function findLastLatinPromotionBreakIndex(buffer) {
1558
+ for (let index = buffer.length - 1; index >= 0; index -= 1) {
1559
+ const char = buffer[index];
1560
+ if (!char) continue;
1561
+ if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
1431
1562
  }
1432
- return target;
1563
+ return -1;
1433
1564
  }
1434
- function createWhitespaceCounts() {
1435
- return {
1436
- spaces: 0,
1437
- tabs: 0,
1438
- newlines: 0,
1439
- other: 0
1440
- };
1565
+ function mergeAdjacentChunks(chunks) {
1566
+ if (chunks.length === 0) return chunks;
1567
+ const merged = [];
1568
+ let last = chunks[0];
1569
+ for (let i = 1; i < chunks.length; i++) {
1570
+ const chunk = chunks[i];
1571
+ if (chunk.locale === last.locale) last = {
1572
+ locale: last.locale,
1573
+ text: last.text + chunk.text
1574
+ };
1575
+ else {
1576
+ merged.push(last);
1577
+ last = chunk;
1578
+ }
1579
+ }
1580
+ merged.push(last);
1581
+ return merged;
1441
1582
  }
1442
1583
 
1443
1584
  //#endregion
1444
- //#region src/wc/analyze.ts
1445
- function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
1446
- const segmenter = getSegmenter(chunk.locale);
1447
- const segments = [];
1448
- const nonWords = collectNonWords ? createNonWordCollection() : null;
1449
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
1450
- else if (collectNonWords && nonWords) {
1451
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
1452
- const category = classifyNonWordSegment(part.segment);
1453
- if (category) addNonWord(nonWords, category, part.segment);
1585
+ //#region src/wc/wc.ts
1586
+ function wordCounter(text, options = {}) {
1587
+ const mode = resolveMode(options.mode, "chunk");
1588
+ const collectNonWords = Boolean(options.nonWords);
1589
+ const includeWhitespace = Boolean(options.includeWhitespace);
1590
+ const chunks = segmentTextByLocale(text, {
1591
+ latinLanguageHint: options.latinLanguageHint,
1592
+ latinTagHint: options.latinTagHint,
1593
+ latinLocaleHint: options.latinLocaleHint,
1594
+ latinHintRules: options.latinHintRules,
1595
+ useDefaultLatinHints: options.useDefaultLatinHints,
1596
+ hanLanguageHint: options.hanLanguageHint,
1597
+ hanTagHint: options.hanTagHint
1598
+ });
1599
+ if (mode === "char" || mode === "char-collector") {
1600
+ const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1601
+ const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
1602
+ const counts = collectNonWords ? {
1603
+ words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
1604
+ nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
1605
+ total
1606
+ } : void 0;
1607
+ if (mode === "char") return {
1608
+ total,
1609
+ counts,
1610
+ breakdown: {
1611
+ mode,
1612
+ items: analyzed.map((chunk) => ({
1613
+ locale: chunk.locale,
1614
+ text: chunk.text,
1615
+ chars: chunk.chars,
1616
+ nonWords: chunk.nonWords
1617
+ }))
1618
+ }
1619
+ };
1620
+ return {
1621
+ total,
1622
+ counts,
1623
+ breakdown: {
1624
+ mode,
1625
+ items: aggregateCharsByLocale(analyzed).map((chunk) => ({
1626
+ locale: chunk.locale,
1627
+ chars: chunk.chars,
1628
+ nonWords: chunk.nonWords
1629
+ }))
1630
+ }
1631
+ };
1454
1632
  }
1455
- return {
1456
- locale: chunk.locale,
1457
- text: chunk.text,
1458
- segments,
1459
- words: segments.length,
1460
- nonWords: nonWords ?? void 0
1461
- };
1462
- }
1463
- function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
1464
- const segmenter = getSegmenter(chunk.locale);
1465
- const nonWords = collectNonWords ? createNonWordCollection() : null;
1466
- let chars = 0;
1467
- let wordChars = 0;
1468
- let nonWordChars = 0;
1469
- for (const part of segmenter.segment(chunk.text)) {
1470
- if (part.isWordLike) {
1471
- const count = countCharsForLocale(part.segment, chunk.locale);
1472
- chars += count;
1473
- wordChars += count;
1474
- continue;
1633
+ const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
1634
+ const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
1635
+ const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
1636
+ if (!chunk.nonWords) return sum;
1637
+ return sum + getNonWordTotal(chunk.nonWords);
1638
+ }, 0) : 0;
1639
+ const total = analyzed.reduce((sum, chunk) => {
1640
+ let chunkTotal = chunk.words;
1641
+ if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
1642
+ return sum + chunkTotal;
1643
+ }, 0);
1644
+ const counts = collectNonWords ? {
1645
+ words: wordsTotal,
1646
+ nonWords: nonWordsTotal,
1647
+ total
1648
+ } : void 0;
1649
+ if (mode === "segments") return {
1650
+ total,
1651
+ counts,
1652
+ breakdown: {
1653
+ mode,
1654
+ items: analyzed.map((chunk) => ({
1655
+ locale: chunk.locale,
1656
+ text: chunk.text,
1657
+ words: chunk.words,
1658
+ segments: chunk.segments,
1659
+ nonWords: chunk.nonWords
1660
+ }))
1475
1661
  }
1476
- if (collectNonWords && nonWords) {
1477
- let whitespaceCount = 0;
1478
- if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
1479
- const category = classifyNonWordSegment(part.segment);
1480
- if (category) addNonWord(nonWords, category, part.segment);
1481
- if (category || whitespaceCount > 0) {
1482
- const count = countCharsForLocale(part.segment, chunk.locale);
1483
- chars += count;
1484
- nonWordChars += count;
1485
- }
1662
+ };
1663
+ if (mode === "collector") return {
1664
+ total,
1665
+ counts,
1666
+ breakdown: {
1667
+ mode,
1668
+ items: aggregateByLocale(analyzed),
1669
+ nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
1486
1670
  }
1487
- }
1671
+ };
1488
1672
  return {
1489
- locale: chunk.locale,
1490
- text: chunk.text,
1491
- chars,
1492
- wordChars,
1493
- nonWordChars,
1494
- nonWords: nonWords ?? void 0
1673
+ total,
1674
+ counts,
1675
+ breakdown: {
1676
+ mode,
1677
+ items: analyzed.map((chunk) => ({
1678
+ locale: chunk.locale,
1679
+ text: chunk.text,
1680
+ words: chunk.words,
1681
+ nonWords: chunk.nonWords
1682
+ }))
1683
+ }
1495
1684
  };
1496
1685
  }
1497
- function aggregateCharsByLocale(chunks) {
1498
- const order = [];
1499
- const map = /* @__PURE__ */ new Map();
1500
- for (const chunk of chunks) {
1501
- const existing = map.get(chunk.locale);
1502
- if (existing) {
1503
- existing.chars += chunk.chars;
1504
- existing.wordChars += chunk.wordChars;
1505
- existing.nonWordChars += chunk.nonWordChars;
1506
- if (chunk.nonWords) {
1507
- if (!existing.nonWords) existing.nonWords = createNonWordCollection();
1508
- mergeNonWordCollections(existing.nonWords, chunk.nonWords);
1509
- }
1510
- continue;
1511
- }
1512
- order.push(chunk.locale);
1513
- map.set(chunk.locale, {
1514
- locale: chunk.locale,
1515
- chars: chunk.chars,
1516
- wordChars: chunk.wordChars,
1517
- nonWordChars: chunk.nonWordChars,
1518
- nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
1519
- });
1520
- }
1521
- return order.map((locale) => map.get(locale));
1686
+ function getNonWordTotal(nonWords) {
1687
+ return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
1522
1688
  }
1523
- function aggregateByLocale(chunks) {
1524
- const order = [];
1525
- const map = /* @__PURE__ */ new Map();
1526
- for (const chunk of chunks) {
1527
- const existing = map.get(chunk.locale);
1528
- if (existing) {
1529
- existing.words += chunk.words;
1530
- appendAll(existing.segments, chunk.segments);
1531
- continue;
1532
- }
1533
- order.push(chunk.locale);
1534
- map.set(chunk.locale, {
1535
- locale: chunk.locale,
1536
- words: chunk.words,
1537
- segments: [...chunk.segments]
1538
- });
1689
+ function collectNonWordsAggregate(analyzed, enabled) {
1690
+ if (!enabled) return;
1691
+ const collection = createNonWordCollection();
1692
+ for (const chunk of analyzed) {
1693
+ if (!chunk.nonWords) continue;
1694
+ mergeNonWordCollections(collection, chunk.nonWords);
1539
1695
  }
1540
- return order.map((locale) => map.get(locale));
1696
+ return collection;
1541
1697
  }
1542
1698
 
1543
1699
  //#endregion
1544
- //#region src/wc/mode.ts
1545
- const MODE_ALIASES = {
1546
- chunk: "chunk",
1547
- chunks: "chunk",
1548
- segments: "segments",
1549
- segment: "segments",
1550
- seg: "segments",
1551
- collector: "collector",
1552
- collect: "collector",
1553
- colle: "collector",
1554
- char: "char",
1555
- chars: "char",
1556
- character: "char",
1557
- characters: "char",
1558
- "char-collector": "char-collector"
1559
- };
1560
- const CHAR_MODE_ALIASES = new Set([
1561
- "char",
1562
- "chars",
1563
- "character",
1564
- "characters"
1565
- ]);
1566
- const COLLECTOR_MODE_ALIASES = new Set([
1567
- "collector",
1568
- "collect",
1569
- "colle",
1570
- "col"
1571
- ]);
1572
- function collapseSeparators(value) {
1573
- return value.replace(/[-_\s]+/g, "");
1574
- }
1575
- function isComposedCharCollectorFromTokens(value) {
1576
- const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
1577
- if (tokens.length < 2) return false;
1578
- let hasCharAlias = false;
1579
- let hasCollectorAlias = false;
1580
- for (const token of tokens) {
1581
- if (CHAR_MODE_ALIASES.has(token)) {
1582
- hasCharAlias = true;
1583
- continue;
1584
- }
1585
- if (COLLECTOR_MODE_ALIASES.has(token)) {
1586
- hasCollectorAlias = true;
1587
- continue;
1588
- }
1589
- return false;
1700
+ //#region src/wc/index.ts
1701
+ var wc_default = wordCounter;
1702
+
1703
+ //#endregion
1704
+ //#region src/markdown/section-count.ts
1705
+ function normalizeText(value) {
1706
+ if (value == null) return "";
1707
+ if (typeof value === "string") return value;
1708
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
1709
+ try {
1710
+ return JSON.stringify(value);
1711
+ } catch {
1712
+ return String(value);
1590
1713
  }
1591
- return hasCharAlias && hasCollectorAlias;
1592
1714
  }
1593
- function isComposedCharCollectorCompact(value) {
1594
- for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
1595
- return false;
1715
+ function buildPerKeyItems(data, mode, options) {
1716
+ if (!data || typeof data !== "object" || Array.isArray(data)) return [];
1717
+ return Object.entries(data).map(([key, value]) => {
1718
+ const valueText = normalizeText(value);
1719
+ return {
1720
+ name: key,
1721
+ source: "frontmatter",
1722
+ result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
1723
+ };
1724
+ });
1596
1725
  }
1597
- function normalizeMode(input) {
1598
- if (!input) return null;
1599
- const normalized = input.trim().toLowerCase();
1600
- const direct = MODE_ALIASES[normalized];
1601
- if (direct) return direct;
1602
- if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
1603
- const compact = collapseSeparators(normalized);
1604
- if (isComposedCharCollectorCompact(compact)) return "char-collector";
1605
- return MODE_ALIASES[compact] ?? null;
1726
+ function buildSingleItem(name, text, mode, options, source) {
1727
+ return [{
1728
+ name,
1729
+ source,
1730
+ result: wc_default(text, options)
1731
+ }];
1606
1732
  }
1607
- function resolveMode(input, fallback = "chunk") {
1608
- return normalizeMode(input) ?? fallback;
1733
+ function sumTotals(items) {
1734
+ return items.reduce((sum, item) => sum + item.result.total, 0);
1609
1735
  }
1610
-
1611
- //#endregion
1612
- //#region src/wc/latin-hints.ts
1613
- const DEFAULT_LATIN_HINT_RULES_SOURCE = [
1614
- {
1615
- tag: "de",
1616
- pattern: "[äöüÄÖÜß]"
1617
- },
1618
- {
1619
- tag: "es",
1620
- pattern: "[ñÑ¿¡]"
1621
- },
1622
- {
1623
- tag: "pt",
1624
- pattern: "[ãõÃÕ]"
1625
- },
1626
- {
1627
- tag: "fr",
1628
- pattern: "[œŒæÆ]"
1629
- },
1630
- {
1631
- tag: "pl",
1632
- pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
1633
- },
1634
- {
1635
- tag: "tr",
1636
- pattern: "[ıİğĞşŞ]"
1637
- },
1638
- {
1639
- tag: "ro",
1640
- pattern: "[ăĂâÂîÎșȘțȚ]"
1641
- },
1642
- {
1643
- tag: "hu",
1644
- pattern: "[őŐűŰ]"
1645
- },
1646
- {
1647
- tag: "is",
1648
- pattern: "[ðÐþÞ]"
1736
+ function countSections(input, section, options = {}) {
1737
+ const mode = options.mode ?? "chunk";
1738
+ if (section === "all") {
1739
+ const result = wc_default(input, options);
1740
+ return {
1741
+ section,
1742
+ total: result.total,
1743
+ frontmatterType: null,
1744
+ items: [{
1745
+ name: "all",
1746
+ source: "content",
1747
+ result
1748
+ }]
1749
+ };
1649
1750
  }
1650
- ];
1651
- const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
1751
+ const parsed = parseMarkdown(input);
1752
+ const frontmatterText = parsed.frontmatter ?? "";
1753
+ const contentText = parsed.content ?? "";
1754
+ let items = [];
1755
+ if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
1756
+ else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
1757
+ else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
1758
+ else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
1759
+ else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
1760
+ return {
1761
+ section,
1762
+ total: sumTotals(items),
1763
+ frontmatterType: parsed.frontmatterType,
1764
+ items
1765
+ };
1766
+ }
1652
1767
 
1653
1768
  //#endregion
1654
- //#region src/wc/locale-detect.ts
1655
- const DEFAULT_LOCALE = "und-Latn";
1656
- const DEFAULT_HAN_TAG = "und-Hani";
1657
- const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
1658
- const regex = {
1659
- hiragana: /\p{Script=Hiragana}/u,
1660
- katakana: /\p{Script=Katakana}/u,
1661
- hangul: /\p{Script=Hangul}/u,
1662
- han: /\p{Script=Han}/u,
1663
- latin: /\p{Script=Latin}/u,
1664
- arabic: /\p{Script=Arabic}/u,
1665
- cyrillic: /\p{Script=Cyrillic}/u,
1666
- devanagari: /\p{Script=Devanagari}/u,
1667
- thai: /\p{Script=Thai}/u
1668
- };
1669
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
1670
- function isLatinLocale(locale, context) {
1671
- if (context) return context.latinLocales.has(locale);
1672
- return defaultLatinLocales.has(locale);
1673
- }
1674
- function resolveLatinHint(options) {
1675
- const latinTagHint = options.latinTagHint?.trim();
1676
- if (latinTagHint) return latinTagHint;
1677
- const latinLanguageHint = options.latinLanguageHint?.trim();
1678
- if (latinLanguageHint) return latinLanguageHint;
1679
- const latinLocaleHint = options.latinLocaleHint?.trim();
1680
- if (latinLocaleHint) return latinLocaleHint;
1681
- }
1682
- function resolveHanHint(options) {
1683
- const hanTagHint = options.hanTagHint?.trim();
1684
- if (hanTagHint) return hanTagHint;
1685
- const hanLanguageHint = options.hanLanguageHint?.trim();
1686
- if (hanLanguageHint) return hanLanguageHint;
1687
- }
1688
- function compileLatinHintPattern(pattern, label) {
1689
- const source = typeof pattern === "string" ? pattern : pattern.source;
1690
- const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
1691
- const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
1692
- if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
1693
- if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
1694
- try {
1695
- return new RegExp(source, flags);
1696
- } catch (error) {
1697
- const message = error instanceof Error ? error.message : String(error);
1698
- throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
1769
+ //#region src/cli/batch/aggregate.ts
1770
+ function mergeWordCounterResult(left, right, preserveCollectorSegments) {
1771
+ if (left.breakdown.mode !== right.breakdown.mode) throw new Error("Cannot merge different breakdown modes.");
1772
+ const total = left.total + right.total;
1773
+ const counts = left.counts || right.counts ? {
1774
+ words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
1775
+ nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
1776
+ total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
1777
+ } : void 0;
1778
+ if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
1779
+ total,
1780
+ counts,
1781
+ breakdown: {
1782
+ mode: "chunk",
1783
+ items: [...left.breakdown.items, ...right.breakdown.items]
1784
+ }
1785
+ };
1786
+ if (left.breakdown.mode === "segments" && right.breakdown.mode === "segments") return {
1787
+ total,
1788
+ counts,
1789
+ breakdown: {
1790
+ mode: "segments",
1791
+ items: [...left.breakdown.items, ...right.breakdown.items]
1792
+ }
1793
+ };
1794
+ if (left.breakdown.mode === "char" && right.breakdown.mode === "char") return {
1795
+ total,
1796
+ counts,
1797
+ breakdown: {
1798
+ mode: "char",
1799
+ items: [...left.breakdown.items, ...right.breakdown.items]
1800
+ }
1801
+ };
1802
+ if (left.breakdown.mode === "char-collector" && right.breakdown.mode === "char-collector") {
1803
+ const localeOrder = [];
1804
+ const mergedByLocale = /* @__PURE__ */ new Map();
1805
+ const addItems = (items) => {
1806
+ for (const item of items) {
1807
+ const existing = mergedByLocale.get(item.locale);
1808
+ if (existing) {
1809
+ existing.chars += item.chars;
1810
+ if (item.nonWords) {
1811
+ if (!existing.nonWords) existing.nonWords = createNonWordCollection();
1812
+ mergeNonWordCollections(existing.nonWords, item.nonWords);
1813
+ }
1814
+ continue;
1815
+ }
1816
+ localeOrder.push(item.locale);
1817
+ mergedByLocale.set(item.locale, {
1818
+ locale: item.locale,
1819
+ chars: item.chars,
1820
+ nonWords: item.nonWords ? mergeNonWordCollections(createNonWordCollection(), item.nonWords) : void 0
1821
+ });
1822
+ }
1823
+ };
1824
+ addItems(left.breakdown.items);
1825
+ addItems(right.breakdown.items);
1826
+ return {
1827
+ total,
1828
+ counts,
1829
+ breakdown: {
1830
+ mode: "char-collector",
1831
+ items: localeOrder.map((locale) => {
1832
+ const value = mergedByLocale.get(locale);
1833
+ if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
1834
+ return value;
1835
+ })
1836
+ }
1837
+ };
1838
+ }
1839
+ if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
1840
+ const localeOrder = [];
1841
+ const mergedByLocale = /* @__PURE__ */ new Map();
1842
+ const addItems = (items) => {
1843
+ for (const item of items) {
1844
+ const existing = mergedByLocale.get(item.locale);
1845
+ if (existing) {
1846
+ existing.words += item.words;
1847
+ if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
1848
+ continue;
1849
+ }
1850
+ localeOrder.push(item.locale);
1851
+ mergedByLocale.set(item.locale, {
1852
+ locale: item.locale,
1853
+ words: item.words,
1854
+ segments: preserveCollectorSegments ? [...item.segments] : []
1855
+ });
1856
+ }
1857
+ };
1858
+ addItems(left.breakdown.items);
1859
+ addItems(right.breakdown.items);
1860
+ let mergedNonWords;
1861
+ if (left.breakdown.nonWords || right.breakdown.nonWords) {
1862
+ mergedNonWords = createNonWordCollection();
1863
+ if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
1864
+ if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
1865
+ }
1866
+ return {
1867
+ total,
1868
+ counts,
1869
+ breakdown: {
1870
+ mode: "collector",
1871
+ items: localeOrder.map((locale) => {
1872
+ const value = mergedByLocale.get(locale);
1873
+ if (!value) throw new Error(`Missing collector entry for locale: ${locale}`);
1874
+ return value;
1875
+ }),
1876
+ nonWords: mergedNonWords
1877
+ }
1878
+ };
1699
1879
  }
1880
+ return {
1881
+ total,
1882
+ counts,
1883
+ breakdown: left.breakdown
1884
+ };
1700
1885
  }
1701
- function normalizeLatinHintPriority(priority, label) {
1702
- if (priority === void 0) return 0;
1703
- if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
1704
- return priority;
1886
+ function aggregateWordCounterResults(results, preserveCollectorSegments) {
1887
+ if (results.length === 0) return wc_default("", { mode: "chunk" });
1888
+ const first = results[0];
1889
+ if (!first) return wc_default("", { mode: "chunk" });
1890
+ let aggregate = first;
1891
+ for (let index = 1; index < results.length; index += 1) {
1892
+ const current = results[index];
1893
+ if (!current) continue;
1894
+ aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
1895
+ }
1896
+ return aggregate;
1705
1897
  }
1706
- function compileLatinHintRule(rule, order, label) {
1707
- const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
1708
- if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
1898
+ function buildSectionKey(name, source) {
1899
+ return `${source}:${name}`;
1900
+ }
1901
+ function aggregateSectionedResults(results, preserveCollectorSegments) {
1902
+ if (results.length === 0) return {
1903
+ section: "all",
1904
+ total: 0,
1905
+ frontmatterType: null,
1906
+ items: []
1907
+ };
1908
+ const section = results[0]?.section ?? "all";
1909
+ const grouped = /* @__PURE__ */ new Map();
1910
+ let total = 0;
1911
+ let frontmatterType = results[0]?.frontmatterType ?? null;
1912
+ for (const result of results) {
1913
+ total += result.total;
1914
+ if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
1915
+ if (frontmatterType !== result.frontmatterType) frontmatterType = null;
1916
+ for (const item of result.items) {
1917
+ const key = buildSectionKey(item.name, item.source);
1918
+ const existing = grouped.get(key);
1919
+ if (!existing) {
1920
+ grouped.set(key, {
1921
+ name: item.name,
1922
+ source: item.source,
1923
+ items: [item.result]
1924
+ });
1925
+ continue;
1926
+ }
1927
+ existing.items.push(item.result);
1928
+ }
1929
+ }
1930
+ const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
1931
+ const items = [...grouped.values()].sort((left, right) => {
1932
+ const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
1933
+ if (sourceDiff !== 0) return sourceDiff;
1934
+ return left.name.localeCompare(right.name);
1935
+ }).map((entry) => ({
1936
+ name: entry.name,
1937
+ source: entry.source,
1938
+ result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
1939
+ }));
1709
1940
  return {
1710
- tag,
1711
- pattern: compileLatinHintPattern(rule.pattern, label),
1712
- priority: normalizeLatinHintPriority(rule.priority, label),
1713
- order
1941
+ section,
1942
+ total,
1943
+ frontmatterType,
1944
+ items
1714
1945
  };
1715
1946
  }
1716
- function resolveLatinHintRules$1(options) {
1717
- const useDefaultLatinHints = options.useDefaultLatinHints !== false;
1718
- const customRules = options.latinHintRules ?? [];
1719
- const combinedRules = [];
1720
- for (let index = 0; index < customRules.length; index += 1) {
1721
- const rule = customRules[index];
1722
- if (!rule) continue;
1723
- combinedRules.push({
1724
- rule,
1725
- label: `Invalid custom Latin hint rule at index ${index}`
1726
- });
1947
+ function stripCollectorSegmentsFromWordCounterResult(result) {
1948
+ if (result.breakdown.mode !== "collector") return;
1949
+ for (const item of result.breakdown.items) item.segments = [];
1950
+ }
1951
+ function stripCollectorSegmentsFromSectionedResult(result) {
1952
+ for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
1953
+ }
1954
+ function compactCollectorSegmentsInCountResult(result) {
1955
+ if ("section" in result) {
1956
+ stripCollectorSegmentsFromSectionedResult(result);
1957
+ return;
1727
1958
  }
1728
- if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
1729
- const rule = DEFAULT_LATIN_HINT_RULES[index];
1730
- if (!rule) continue;
1731
- combinedRules.push({
1732
- rule,
1733
- label: `Invalid default Latin hint rule at index ${index}`
1959
+ stripCollectorSegmentsFromWordCounterResult(result);
1960
+ }
1961
+ async function buildBatchSummary(inputs, section, wcOptions, options = {}) {
1962
+ const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
1963
+ const files = [];
1964
+ for (const input of inputs) {
1965
+ const result = section === "all" ? wc_default(input.content, wcOptions) : countSections(input.content, section, wcOptions);
1966
+ if (!preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
1967
+ files.push({
1968
+ path: input.path,
1969
+ result
1970
+ });
1971
+ options.onFileCounted?.({
1972
+ completed: files.length,
1973
+ total: inputs.length
1734
1974
  });
1735
1975
  }
1736
- const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
1737
- resolvedRules.sort((left, right) => {
1738
- if (left.priority !== right.priority) return right.priority - left.priority;
1739
- return left.order - right.order;
1976
+ return finalizeBatchSummaryFromFileResults(files, section, wcOptions, {
1977
+ onFinalizeStart: options.onFinalizeStart,
1978
+ preserveCollectorSegments: options.preserveCollectorSegments
1740
1979
  });
1741
- return resolvedRules;
1742
1980
  }
1743
- function resolveLocaleDetectContext(options = {}) {
1744
- const latinHint = resolveLatinHint(options);
1745
- const latinHintRules = resolveLatinHintRules$1(options);
1746
- const latinLocales = new Set([DEFAULT_LOCALE]);
1747
- for (const rule of latinHintRules) latinLocales.add(rule.tag);
1748
- if (latinHint) latinLocales.add(latinHint);
1981
+ function finalizeBatchSummaryFromFileResults(files, section, wcOptions, options = {}) {
1982
+ const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
1983
+ if (!preserveCollectorSegments) for (const file of files) compactCollectorSegmentsInCountResult(file.result);
1984
+ options.onFinalizeStart?.();
1985
+ if (files.length === 0) return {
1986
+ files,
1987
+ skipped: [],
1988
+ aggregate: section === "all" ? wc_default("", wcOptions) : {
1989
+ section,
1990
+ total: 0,
1991
+ frontmatterType: null,
1992
+ items: []
1993
+ }
1994
+ };
1749
1995
  return {
1750
- latinHint,
1751
- hanHint: resolveHanHint(options),
1752
- latinHintRules,
1753
- latinLocales
1996
+ files,
1997
+ skipped: [],
1998
+ aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
1754
1999
  };
1755
2000
  }
1756
- function detectLatinLocale(char, context) {
1757
- for (const hint of context.latinHintRules) {
1758
- hint.pattern.lastIndex = 0;
1759
- if (hint.pattern.test(char)) return hint.tag;
1760
- }
1761
- return DEFAULT_LOCALE;
1762
- }
1763
- function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
1764
- if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
1765
- if (regex.hangul.test(char)) return "ko";
1766
- if (regex.arabic.test(char)) return "ar";
1767
- if (regex.cyrillic.test(char)) return "ru";
1768
- if (regex.devanagari.test(char)) return "hi";
1769
- if (regex.thai.test(char)) return "th";
1770
- if (regex.han.test(char)) {
1771
- if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
1772
- return context.hanHint ?? DEFAULT_HAN_TAG;
1773
- }
1774
- if (regex.latin.test(char)) {
1775
- const hintedLocale = detectLatinLocale(char, context);
1776
- if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
1777
- if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
1778
- if (context.latinHint) return context.latinHint;
1779
- return DEFAULT_LOCALE;
2001
+
2002
+ //#endregion
2003
+ //#region src/cli/path/load.ts
2004
+ function isProbablyBinary(buffer) {
2005
+ if (buffer.length === 0) return false;
2006
+ const sampleSize = Math.min(buffer.length, 1024);
2007
+ let suspicious = 0;
2008
+ for (let index = 0; index < sampleSize; index += 1) {
2009
+ const byte = buffer[index] ?? 0;
2010
+ if (byte === 0) return true;
2011
+ if (byte === 9 || byte === 10 || byte === 13) continue;
2012
+ if (byte >= 32 && byte <= 126) continue;
2013
+ if (byte >= 128) continue;
2014
+ suspicious += 1;
1780
2015
  }
1781
- return null;
2016
+ return suspicious / sampleSize > .3;
1782
2017
  }
1783
2018
 
1784
2019
  //#endregion
1785
- //#region src/wc/segment.ts
1786
- const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
1787
- const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
1788
- function segmentTextByLocale(text, options = {}) {
1789
- const context = resolveLocaleDetectContext(options);
1790
- const chunks = [];
1791
- let currentLocale = DEFAULT_LOCALE;
1792
- let buffer = "";
1793
- let bufferHasScript = false;
1794
- let sawCarryBoundary = false;
1795
- const updateCarryBoundaryState = (detected, char) => {
1796
- if (detected !== null) {
1797
- sawCarryBoundary = false;
1798
- return;
2020
+ //#region src/cli/batch/jobs/queue.ts
2021
+ async function runBoundedQueue(total, requestedJobs, worker) {
2022
+ if (total === 0) return [];
2023
+ const safeRequestedJobs = Number.isFinite(requestedJobs) ? Math.floor(requestedJobs) : 1;
2024
+ const concurrency = Math.max(1, Math.min(total, safeRequestedJobs));
2025
+ const results = new Array(total);
2026
+ let nextIndex = 0;
2027
+ const runWorker = async () => {
2028
+ while (true) {
2029
+ const current = nextIndex;
2030
+ nextIndex += 1;
2031
+ if (current >= total) return;
2032
+ results[current] = await worker(current);
1799
2033
  }
1800
- if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
1801
2034
  };
1802
- for (const char of text) {
1803
- const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
1804
- const targetLocale = detected ?? currentLocale;
1805
- if (buffer === "") {
1806
- currentLocale = targetLocale;
1807
- buffer = char;
1808
- bufferHasScript = detected !== null;
1809
- updateCarryBoundaryState(detected, char);
1810
- continue;
1811
- }
1812
- if (detected !== null && !bufferHasScript) {
1813
- currentLocale = targetLocale;
1814
- buffer += char;
1815
- bufferHasScript = true;
1816
- updateCarryBoundaryState(detected, char);
1817
- continue;
2035
+ await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
2036
+ return results;
2037
+ }
2038
+
2039
+ //#endregion
2040
+ //#region src/cli/batch/jobs/load-count-experimental.ts
2041
+ async function countBatchInputsWithJobs(filePaths, options) {
2042
+ const limits = resolveBatchJobsLimit();
2043
+ const total = filePaths.length;
2044
+ let completed = 0;
2045
+ const entries = await runBoundedQueue(filePaths.length, options.jobs, async (index) => {
2046
+ const path = filePaths[index];
2047
+ if (!path) {
2048
+ completed += 1;
2049
+ options.onFileProcessed?.({
2050
+ completed,
2051
+ total
2052
+ });
2053
+ return {
2054
+ type: "skip",
2055
+ skip: {
2056
+ path: "",
2057
+ reason: "not readable: missing path"
2058
+ }
2059
+ };
1818
2060
  }
1819
- if (targetLocale !== currentLocale && detected !== null) {
1820
- if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
1821
- const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
1822
- if (promotionBreakIndex === -1) {
1823
- currentLocale = targetLocale;
1824
- buffer += char;
1825
- bufferHasScript = true;
1826
- updateCarryBoundaryState(detected, char);
1827
- continue;
2061
+ let buffer;
2062
+ try {
2063
+ buffer = await readFile(path);
2064
+ } catch (error) {
2065
+ if (isResourceLimitError(error)) throw createResourceLimitError(path, error, options.jobs, limits);
2066
+ const message = error instanceof Error ? error.message : String(error);
2067
+ completed += 1;
2068
+ options.onFileProcessed?.({
2069
+ completed,
2070
+ total
2071
+ });
2072
+ return {
2073
+ type: "skip",
2074
+ skip: {
2075
+ path,
2076
+ reason: `not readable: ${message}`
1828
2077
  }
1829
- const prefix = buffer.slice(0, promotionBreakIndex + 1);
1830
- const suffix = buffer.slice(promotionBreakIndex + 1);
1831
- if (prefix.length > 0) chunks.push({
1832
- locale: currentLocale,
1833
- text: prefix
1834
- });
1835
- currentLocale = targetLocale;
1836
- buffer = `${suffix}${char}`;
1837
- bufferHasScript = true;
1838
- updateCarryBoundaryState(detected, char);
1839
- continue;
1840
- }
1841
- chunks.push({
1842
- locale: currentLocale,
1843
- text: buffer
2078
+ };
2079
+ }
2080
+ if (isProbablyBinary(buffer)) {
2081
+ completed += 1;
2082
+ options.onFileProcessed?.({
2083
+ completed,
2084
+ total
1844
2085
  });
1845
- currentLocale = targetLocale;
1846
- buffer = char;
1847
- bufferHasScript = true;
1848
- updateCarryBoundaryState(detected, char);
2086
+ return {
2087
+ type: "skip",
2088
+ skip: {
2089
+ path,
2090
+ reason: "binary file"
2091
+ }
2092
+ };
2093
+ }
2094
+ const content = buffer.toString("utf8");
2095
+ const result = options.section === "all" ? wc_default(content, options.wcOptions) : countSections(content, options.section, options.wcOptions);
2096
+ if (!options.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
2097
+ completed += 1;
2098
+ options.onFileProcessed?.({
2099
+ completed,
2100
+ total
2101
+ });
2102
+ return {
2103
+ type: "file",
2104
+ file: {
2105
+ path,
2106
+ result
2107
+ }
2108
+ };
2109
+ });
2110
+ const files = [];
2111
+ const skipped = [];
2112
+ for (const entry of entries) {
2113
+ if (entry.type === "file") {
2114
+ files.push(entry.file);
1849
2115
  continue;
1850
2116
  }
1851
- buffer += char;
1852
- if (detected !== null) bufferHasScript = true;
1853
- updateCarryBoundaryState(detected, char);
2117
+ skipped.push(entry.skip);
1854
2118
  }
1855
- if (buffer.length > 0) chunks.push({
1856
- locale: currentLocale,
1857
- text: buffer
1858
- });
1859
- return mergeAdjacentChunks(chunks);
2119
+ return {
2120
+ files,
2121
+ skipped
2122
+ };
1860
2123
  }
1861
- function findLastLatinPromotionBreakIndex(buffer) {
1862
- for (let index = buffer.length - 1; index >= 0; index -= 1) {
1863
- const char = buffer[index];
1864
- if (!char) continue;
1865
- if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
1866
- }
1867
- return -1;
2124
+
2125
+ //#endregion
2126
+ //#region src/cli/batch/jobs/load-count-worker-experimental.ts
2127
+ var WorkerRouteUnavailableError = class extends Error {};
2128
+ function isFallbackFriendlyWorkerError(error) {
2129
+ if (typeof error !== "object" || error === null) return false;
2130
+ const code = "code" in error ? String(error.code) : "";
2131
+ if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
2132
+ const message = error instanceof Error ? error.message : String(error);
2133
+ return message.includes("Unknown file extension") || message.includes("Cannot find module");
1868
2134
  }
1869
- function mergeAdjacentChunks(chunks) {
1870
- if (chunks.length === 0) return chunks;
1871
- const merged = [];
1872
- let last = chunks[0];
1873
- for (let i = 1; i < chunks.length; i++) {
1874
- const chunk = chunks[i];
1875
- if (chunk.locale === last.locale) last = {
1876
- locale: last.locale,
1877
- text: last.text + chunk.text
1878
- };
1879
- else {
1880
- merged.push(last);
1881
- last = chunk;
2135
+ async function countBatchInputsWithWorkerJobs(filePaths, options) {
2136
+ if (process.env.WORD_COUNTER_DISABLE_EXPERIMENTAL_WORKERS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
2137
+ let workerPoolModule;
2138
+ try {
2139
+ workerPoolModule = await import("./worker-pool.mjs");
2140
+ } catch (error) {
2141
+ throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
2142
+ }
2143
+ try {
2144
+ return await workerPoolModule.countBatchInputsWithWorkerPool({
2145
+ filePaths,
2146
+ jobs: options.jobs,
2147
+ section: options.section,
2148
+ wcOptions: options.wcOptions,
2149
+ preserveCollectorSegments: options.preserveCollectorSegments,
2150
+ onFileProcessed: options.onFileProcessed
2151
+ });
2152
+ } catch (error) {
2153
+ if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
2154
+ if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
2155
+ code: error.code,
2156
+ message: error.message
2157
+ }, options.jobs, resolveBatchJobsLimit());
2158
+ throw new Error(error.message);
1882
2159
  }
2160
+ if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
2161
+ throw error;
1883
2162
  }
1884
- merged.push(last);
1885
- return merged;
1886
2163
  }
1887
2164
 
1888
2165
  //#endregion
1889
- //#region src/wc/wc.ts
1890
- function wordCounter(text, options = {}) {
1891
- const mode = resolveMode(options.mode, "chunk");
1892
- const collectNonWords = Boolean(options.nonWords);
1893
- const includeWhitespace = Boolean(options.includeWhitespace);
1894
- const chunks = segmentTextByLocale(text, {
1895
- latinLanguageHint: options.latinLanguageHint,
1896
- latinTagHint: options.latinTagHint,
1897
- latinLocaleHint: options.latinLocaleHint,
1898
- latinHintRules: options.latinHintRules,
1899
- useDefaultLatinHints: options.useDefaultLatinHints,
1900
- hanLanguageHint: options.hanLanguageHint,
1901
- hanTagHint: options.hanTagHint
1902
- });
1903
- if (mode === "char" || mode === "char-collector") {
1904
- const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1905
- const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
1906
- const counts = collectNonWords ? {
1907
- words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
1908
- nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
1909
- total
1910
- } : void 0;
1911
- if (mode === "char") return {
1912
- total,
1913
- counts,
1914
- breakdown: {
1915
- mode,
1916
- items: analyzed.map((chunk) => ({
1917
- locale: chunk.locale,
1918
- text: chunk.text,
1919
- chars: chunk.chars,
1920
- nonWords: chunk.nonWords
1921
- }))
1922
- }
2166
+ //#region src/cli/batch/jobs/load-only.ts
2167
+ async function loadBatchInputsWithJobs(filePaths, options) {
2168
+ const limits = resolveBatchJobsLimit();
2169
+ const entries = await runBoundedQueue(filePaths.length, options.jobs, async (index) => {
2170
+ const path = filePaths[index];
2171
+ if (!path) return {
2172
+ type: "skip",
2173
+ path: "",
2174
+ reason: "not readable: missing path"
2175
+ };
2176
+ let buffer;
2177
+ try {
2178
+ buffer = await readFile(path);
2179
+ } catch (error) {
2180
+ if (isResourceLimitError(error)) throw createResourceLimitError(path, error, options.jobs, limits);
2181
+ return {
2182
+ type: "skip",
2183
+ path,
2184
+ reason: `not readable: ${error instanceof Error ? error.message : String(error)}`
2185
+ };
2186
+ }
2187
+ if (isProbablyBinary(buffer)) return {
2188
+ type: "skip",
2189
+ path,
2190
+ reason: "binary file"
1923
2191
  };
1924
2192
  return {
1925
- total,
1926
- counts,
1927
- breakdown: {
1928
- mode,
1929
- items: aggregateCharsByLocale(analyzed).map((chunk) => ({
1930
- locale: chunk.locale,
1931
- chars: chunk.chars,
1932
- nonWords: chunk.nonWords
1933
- }))
1934
- }
2193
+ type: "file",
2194
+ path,
2195
+ content: buffer.toString("utf8")
1935
2196
  };
1936
- }
1937
- const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
1938
- const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
1939
- const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
1940
- if (!chunk.nonWords) return sum;
1941
- return sum + getNonWordTotal(chunk.nonWords);
1942
- }, 0) : 0;
1943
- const total = analyzed.reduce((sum, chunk) => {
1944
- let chunkTotal = chunk.words;
1945
- if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
1946
- return sum + chunkTotal;
1947
- }, 0);
1948
- const counts = collectNonWords ? {
1949
- words: wordsTotal,
1950
- nonWords: nonWordsTotal,
1951
- total
1952
- } : void 0;
1953
- if (mode === "segments") return {
1954
- total,
1955
- counts,
1956
- breakdown: {
1957
- mode,
1958
- items: analyzed.map((chunk) => ({
1959
- locale: chunk.locale,
1960
- text: chunk.text,
1961
- words: chunk.words,
1962
- segments: chunk.segments,
1963
- nonWords: chunk.nonWords
1964
- }))
1965
- }
1966
- };
1967
- if (mode === "collector") return {
1968
- total,
1969
- counts,
1970
- breakdown: {
1971
- mode,
1972
- items: aggregateByLocale(analyzed),
1973
- nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
2197
+ });
2198
+ const files = [];
2199
+ const skipped = [];
2200
+ for (const entry of entries) {
2201
+ if (entry.type === "file") {
2202
+ files.push({
2203
+ path: entry.path,
2204
+ content: entry.content
2205
+ });
2206
+ continue;
1974
2207
  }
1975
- };
2208
+ skipped.push({
2209
+ path: entry.path,
2210
+ reason: entry.reason
2211
+ });
2212
+ }
1976
2213
  return {
1977
- total,
1978
- counts,
1979
- breakdown: {
1980
- mode,
1981
- items: analyzed.map((chunk) => ({
1982
- locale: chunk.locale,
1983
- text: chunk.text,
1984
- words: chunk.words,
1985
- nonWords: chunk.nonWords
1986
- }))
1987
- }
2214
+ files,
2215
+ skipped
1988
2216
  };
1989
2217
  }
1990
- function getNonWordTotal(nonWords) {
1991
- return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
1992
- }
1993
- function collectNonWordsAggregate(analyzed, enabled) {
1994
- if (!enabled) return;
1995
- const collection = createNonWordCollection();
1996
- for (const chunk of analyzed) {
1997
- if (!chunk.nonWords) continue;
1998
- mergeNonWordCollections(collection, chunk.nonWords);
1999
- }
2000
- return collection;
2001
- }
2002
2218
 
2003
2219
  //#endregion
2004
- //#region src/wc/index.ts
2005
- var wc_default = wordCounter;
2220
+ //#region src/cli/batch/jobs/render.ts
2221
+ function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
2222
+ return finalizeBatchSummaryFromFileResults(files, section, wcOptions, {
2223
+ onFinalizeStart: options.onFinalizeStart,
2224
+ preserveCollectorSegments: options.preserveCollectorSegments
2225
+ });
2226
+ }
2006
2227
 
2007
2228
  //#endregion
2008
- //#region src/markdown/section-count.ts
2009
- function normalizeText(value) {
2010
- if (value == null) return "";
2011
- if (typeof value === "string") return value;
2012
- if (typeof value === "number" || typeof value === "boolean") return String(value);
2229
+ //#region src/cli/path/resolve.ts
2230
+ async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
2231
+ let entries;
2013
2232
  try {
2014
- return JSON.stringify(value);
2015
- } catch {
2016
- return String(value);
2233
+ entries = await readdir(directoryPath, {
2234
+ withFileTypes: true,
2235
+ encoding: "utf8"
2236
+ });
2237
+ } catch (error) {
2238
+ const message = error instanceof Error ? error.message : String(error);
2239
+ skipped.push({
2240
+ path: directoryPath,
2241
+ reason: `directory read failed: ${message}`
2242
+ });
2243
+ debug.emit("path.resolve.expand.read_failed", {
2244
+ directory: directoryPath,
2245
+ reason: `directory read failed: ${message}`
2246
+ });
2247
+ return [];
2017
2248
  }
2018
- }
2019
- function buildPerKeyItems(data, mode, options) {
2020
- if (!data || typeof data !== "object" || Array.isArray(data)) return [];
2021
- return Object.entries(data).map(([key, value]) => {
2022
- const valueText = normalizeText(value);
2023
- return {
2024
- name: key,
2025
- source: "frontmatter",
2026
- result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
2027
- };
2249
+ const sortedEntries = entries.slice().sort((left, right) => left.name.localeCompare(right.name));
2250
+ const files = [];
2251
+ debug.emit("path.resolve.expand.start", {
2252
+ directory: directoryPath,
2253
+ entries: sortedEntries.length,
2254
+ recursive
2255
+ });
2256
+ for (const entry of sortedEntries) {
2257
+ const entryPath = resolve(directoryPath, entry.name);
2258
+ if (entry.isFile()) {
2259
+ if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
2260
+ skipped.push({
2261
+ path: entryPath,
2262
+ reason: "extension excluded"
2263
+ });
2264
+ debug.emit("path.resolve.filter.excluded", {
2265
+ path: entryPath,
2266
+ reason: "extension excluded"
2267
+ }, { verbosity: "verbose" });
2268
+ stats.filterExcluded += 1;
2269
+ continue;
2270
+ }
2271
+ const relativePath = toDirectoryRelativePath(rootPath, entryPath);
2272
+ if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
2273
+ if (recordRegexExcluded(entryPath)) {
2274
+ debug.emit("path.resolve.regex.excluded", {
2275
+ path: entryPath,
2276
+ relativePath,
2277
+ pattern: regexFilter.sourcePattern,
2278
+ reason: "regex excluded"
2279
+ }, { verbosity: "verbose" });
2280
+ stats.regexExcluded += 1;
2281
+ }
2282
+ continue;
2283
+ }
2284
+ files.push(entryPath);
2285
+ stats.directoryIncluded += 1;
2286
+ debug.emit("path.resolve.expand.include", {
2287
+ path: entryPath,
2288
+ source: "directory"
2289
+ }, { verbosity: "verbose" });
2290
+ continue;
2291
+ }
2292
+ if (!entry.isDirectory() || !recursive) continue;
2293
+ appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
2294
+ }
2295
+ debug.emit("path.resolve.expand.complete", {
2296
+ directory: directoryPath,
2297
+ files: files.length
2028
2298
  });
2299
+ return files;
2029
2300
  }
2030
- function buildSingleItem(name, text, mode, options, source) {
2031
- return [{
2032
- name,
2033
- source,
2034
- result: wc_default(text, options)
2035
- }];
2036
- }
2037
- function sumTotals(items) {
2038
- return items.reduce((sum, item) => sum + item.result.total, 0);
2039
- }
2040
- function countSections(input, section, options = {}) {
2041
- const mode = options.mode ?? "chunk";
2042
- if (section === "all") {
2043
- const result = wc_default(input, options);
2044
- return {
2045
- section,
2046
- total: result.total,
2047
- frontmatterType: null,
2048
- items: [{
2049
- name: "all",
2050
- source: "content",
2051
- result
2052
- }]
2053
- };
2054
- }
2055
- const parsed = parseMarkdown(input);
2056
- const frontmatterText = parsed.frontmatter ?? "";
2057
- const contentText = parsed.content ?? "";
2058
- let items = [];
2059
- if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
2060
- else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
2061
- else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
2062
- else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
2063
- else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
2064
- return {
2065
- section,
2066
- total: sumTotals(items),
2067
- frontmatterType: parsed.frontmatterType,
2068
- items
2301
+ async function resolveBatchFilePaths(pathInputs, options) {
2302
+ const skipped = [];
2303
+ const regexExcludedPaths = /* @__PURE__ */ new Set();
2304
+ const resolvedFiles = /* @__PURE__ */ new Set();
2305
+ const stats = {
2306
+ dedupeAccepted: 0,
2307
+ dedupeDuplicates: 0,
2308
+ filterExcluded: 0,
2309
+ regexExcluded: 0,
2310
+ directoryIncluded: 0
2069
2311
  };
2070
- }
2071
-
2072
- //#endregion
2073
- //#region src/cli/batch/aggregate.ts
2074
- function mergeWordCounterResult(left, right, preserveCollectorSegments) {
2075
- if (left.breakdown.mode !== right.breakdown.mode) throw new Error("Cannot merge different breakdown modes.");
2076
- const total = left.total + right.total;
2077
- const counts = left.counts || right.counts ? {
2078
- words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
2079
- nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
2080
- total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
2081
- } : void 0;
2082
- if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
2083
- total,
2084
- counts,
2085
- breakdown: {
2086
- mode: "chunk",
2087
- items: [...left.breakdown.items, ...right.breakdown.items]
2088
- }
2312
+ const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
2313
+ let regexFilter;
2314
+ const debug = options.debug ?? {
2315
+ enabled: false,
2316
+ verbosity: "compact",
2317
+ emit() {},
2318
+ close: async () => {}
2089
2319
  };
2090
- if (left.breakdown.mode === "segments" && right.breakdown.mode === "segments") return {
2091
- total,
2092
- counts,
2093
- breakdown: {
2094
- mode: "segments",
2095
- items: [...left.breakdown.items, ...right.breakdown.items]
2320
+ debug.emit("path.resolve.inputs", {
2321
+ inputs: pathInputs.length,
2322
+ pathMode: options.pathMode,
2323
+ recursive: options.recursive,
2324
+ hasRegex: Boolean(options.directoryRegexPattern)
2325
+ });
2326
+ const addResolvedFile = (filePath, details) => {
2327
+ regexExcludedPaths.delete(filePath);
2328
+ if (resolvedFiles.has(filePath)) {
2329
+ stats.dedupeDuplicates += 1;
2330
+ debug.emit("path.resolve.dedupe.duplicate", {
2331
+ path: filePath,
2332
+ source: details.source,
2333
+ input: details.input
2334
+ }, { verbosity: "verbose" });
2335
+ return;
2096
2336
  }
2337
+ resolvedFiles.add(filePath);
2338
+ stats.dedupeAccepted += 1;
2339
+ debug.emit("path.resolve.dedupe.accept", {
2340
+ path: filePath,
2341
+ source: details.source,
2342
+ input: details.input
2343
+ }, { verbosity: "verbose" });
2097
2344
  };
2098
- if (left.breakdown.mode === "char" && right.breakdown.mode === "char") return {
2099
- total,
2100
- counts,
2101
- breakdown: {
2102
- mode: "char",
2103
- items: [...left.breakdown.items, ...right.breakdown.items]
2104
- }
2345
+ const getRegexFilter = () => {
2346
+ if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
2347
+ return regexFilter;
2105
2348
  };
2106
- if (left.breakdown.mode === "char-collector" && right.breakdown.mode === "char-collector") {
2107
- const localeOrder = [];
2108
- const mergedByLocale = /* @__PURE__ */ new Map();
2109
- const addItems = (items) => {
2110
- for (const item of items) {
2111
- const existing = mergedByLocale.get(item.locale);
2112
- if (existing) {
2113
- existing.chars += item.chars;
2114
- if (item.nonWords) {
2115
- if (!existing.nonWords) existing.nonWords = createNonWordCollection();
2116
- mergeNonWordCollections(existing.nonWords, item.nonWords);
2117
- }
2118
- continue;
2119
- }
2120
- localeOrder.push(item.locale);
2121
- mergedByLocale.set(item.locale, {
2122
- locale: item.locale,
2123
- chars: item.chars,
2124
- nonWords: item.nonWords ? mergeNonWordCollections(createNonWordCollection(), item.nonWords) : void 0
2125
- });
2126
- }
2127
- };
2128
- addItems(left.breakdown.items);
2129
- addItems(right.breakdown.items);
2130
- return {
2131
- total,
2132
- counts,
2133
- breakdown: {
2134
- mode: "char-collector",
2135
- items: localeOrder.map((locale) => {
2136
- const value = mergedByLocale.get(locale);
2137
- if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
2138
- return value;
2139
- })
2140
- }
2141
- };
2142
- }
2143
- if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
2144
- const localeOrder = [];
2145
- const mergedByLocale = /* @__PURE__ */ new Map();
2146
- const addItems = (items) => {
2147
- for (const item of items) {
2148
- const existing = mergedByLocale.get(item.locale);
2149
- if (existing) {
2150
- existing.words += item.words;
2151
- if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
2152
- continue;
2153
- }
2154
- localeOrder.push(item.locale);
2155
- mergedByLocale.set(item.locale, {
2156
- locale: item.locale,
2157
- words: item.words,
2158
- segments: preserveCollectorSegments ? [...item.segments] : []
2159
- });
2160
- }
2161
- };
2162
- addItems(left.breakdown.items);
2163
- addItems(right.breakdown.items);
2164
- let mergedNonWords;
2165
- if (left.breakdown.nonWords || right.breakdown.nonWords) {
2166
- mergedNonWords = createNonWordCollection();
2167
- if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
2168
- if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
2349
+ const recordRegexExcluded = (filePath) => {
2350
+ if (resolvedFiles.has(filePath)) return false;
2351
+ regexExcludedPaths.add(filePath);
2352
+ return true;
2353
+ };
2354
+ for (const rawPath of pathInputs) {
2355
+ const targetPath = resolve(rawPath);
2356
+ debug.emit("path.resolve.input", {
2357
+ rawPath,
2358
+ resolvedPath: targetPath
2359
+ });
2360
+ let metadata;
2361
+ try {
2362
+ metadata = await stat(targetPath);
2363
+ } catch (error) {
2364
+ const message = error instanceof Error ? error.message : String(error);
2365
+ skipped.push({
2366
+ path: targetPath,
2367
+ reason: `not readable: ${message}`
2368
+ });
2369
+ debug.emit("path.resolve.skip", {
2370
+ path: targetPath,
2371
+ reason: `not readable: ${message}`
2372
+ });
2373
+ continue;
2169
2374
  }
2170
- return {
2171
- total,
2172
- counts,
2173
- breakdown: {
2174
- mode: "collector",
2175
- items: localeOrder.map((locale) => {
2176
- const value = mergedByLocale.get(locale);
2177
- if (!value) throw new Error(`Missing collector entry for locale: ${locale}`);
2178
- return value;
2179
- }),
2180
- nonWords: mergedNonWords
2181
- }
2182
- };
2375
+ if (metadata.isDirectory() && options.pathMode === "auto") {
2376
+ const effectiveRegexFilter = getRegexFilter();
2377
+ debug.emit("path.resolve.root.expand", {
2378
+ root: targetPath,
2379
+ recursive: options.recursive,
2380
+ regex: effectiveRegexFilter.sourcePattern ?? null
2381
+ });
2382
+ const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
2383
+ for (const file of files) addResolvedFile(file, {
2384
+ source: "directory",
2385
+ input: targetPath
2386
+ });
2387
+ continue;
2388
+ }
2389
+ if (!metadata.isFile()) {
2390
+ skipped.push({
2391
+ path: targetPath,
2392
+ reason: "not a regular file"
2393
+ });
2394
+ debug.emit("path.resolve.skip", {
2395
+ path: targetPath,
2396
+ reason: "not a regular file"
2397
+ });
2398
+ continue;
2399
+ }
2400
+ addResolvedFile(targetPath, {
2401
+ source: "direct",
2402
+ input: targetPath
2403
+ });
2183
2404
  }
2405
+ for (const path of regexExcludedPaths) skipped.push({
2406
+ path,
2407
+ reason: "regex excluded"
2408
+ });
2409
+ const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
2410
+ debug.emit("path.resolve.filter.summary", {
2411
+ excluded: stats.filterExcluded + stats.regexExcluded,
2412
+ extensionExcluded: stats.filterExcluded,
2413
+ regexExcluded: stats.regexExcluded,
2414
+ included: stats.directoryIncluded
2415
+ });
2416
+ debug.emit("path.resolve.dedupe.summary", {
2417
+ accepted: stats.dedupeAccepted,
2418
+ duplicates: stats.dedupeDuplicates
2419
+ });
2420
+ debug.emit("path.resolve.complete", {
2421
+ files: files.length,
2422
+ skipped: skipped.length,
2423
+ ordering: "absolute-path-ascending"
2424
+ });
2184
2425
  return {
2185
- total,
2186
- counts,
2187
- breakdown: left.breakdown
2426
+ files,
2427
+ skipped
2188
2428
  };
2189
2429
  }
2190
- function aggregateWordCounterResults(results, preserveCollectorSegments) {
2191
- if (results.length === 0) return wc_default("", { mode: "chunk" });
2192
- const first = results[0];
2193
- if (!first) return wc_default("", { mode: "chunk" });
2194
- let aggregate = first;
2195
- for (let index = 1; index < results.length; index += 1) {
2196
- const current = results[index];
2197
- if (!current) continue;
2198
- aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
2199
- }
2200
- return aggregate;
2430
+
2431
+ //#endregion
2432
+ //#region src/cli/progress/reporter.ts
2433
+ const PROGRESS_BAR_WIDTH = 20;
2434
+ const FILLED_BAR_CHAR = "█";
2435
+ const EMPTY_BAR_CHAR = "░";
2436
+ function clamp(value, min, max) {
2437
+ return Math.max(min, Math.min(max, value));
2201
2438
  }
2202
- function buildSectionKey(name, source) {
2203
- return `${source}:${name}`;
2439
+ function buildProgressBar(completed, total) {
2440
+ const safeTotal = Math.max(total, 1);
2441
+ const ratio = clamp(completed / safeTotal, 0, 1);
2442
+ const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
2443
+ const empty = PROGRESS_BAR_WIDTH - filled;
2444
+ return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
2204
2445
  }
2205
- function aggregateSectionedResults(results, preserveCollectorSegments) {
2206
- if (results.length === 0) return {
2207
- section: "all",
2208
- total: 0,
2209
- frontmatterType: null,
2210
- items: []
2211
- };
2212
- const section = results[0]?.section ?? "all";
2213
- const grouped = /* @__PURE__ */ new Map();
2214
- let total = 0;
2215
- let frontmatterType = results[0]?.frontmatterType ?? null;
2216
- for (const result of results) {
2217
- total += result.total;
2218
- if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
2219
- if (frontmatterType !== result.frontmatterType) frontmatterType = null;
2220
- for (const item of result.items) {
2221
- const key = buildSectionKey(item.name, item.source);
2222
- const existing = grouped.get(key);
2223
- if (!existing) {
2224
- grouped.set(key, {
2225
- name: item.name,
2226
- source: item.source,
2227
- items: [item.result]
2228
- });
2229
- continue;
2230
- }
2231
- existing.items.push(item.result);
2232
- }
2233
- }
2234
- const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
2235
- const items = [...grouped.values()].sort((left, right) => {
2236
- const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
2237
- if (sourceDiff !== 0) return sourceDiff;
2238
- return left.name.localeCompare(right.name);
2239
- }).map((entry) => ({
2240
- name: entry.name,
2241
- source: entry.source,
2242
- result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
2243
- }));
2244
- return {
2245
- section,
2246
- total,
2247
- frontmatterType,
2248
- items
2249
- };
2446
+ function formatElapsed(startedAtMs) {
2447
+ const elapsedMs = Date.now() - startedAtMs;
2448
+ const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
2449
+ const minutes = Math.floor(totalSeconds / 60);
2450
+ const seconds = totalSeconds % 60;
2451
+ const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
2452
+ return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
2250
2453
  }
2251
- function stripCollectorSegmentsFromWordCounterResult(result) {
2252
- if (result.breakdown.mode !== "collector") return;
2253
- for (const item of result.breakdown.items) item.segments = [];
2454
+ function buildProgressLine(completed, total, startedAtMs) {
2455
+ const safeTotal = Math.max(total, 1);
2456
+ const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
2457
+ return `Counting files [${buildProgressBar(completed, safeTotal)}] ${`${String(percent).padStart(3, " ")}%`} ${String(completed).padStart(String(safeTotal).length, " ")}/${safeTotal} elapsed ${formatElapsed(startedAtMs)}`;
2254
2458
  }
2255
- function stripCollectorSegmentsFromSectionedResult(result) {
2256
- for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
2459
+ function buildFinalizingLine(startedAtMs) {
2460
+ return `Finalizing aggregate... elapsed ${formatElapsed(startedAtMs)}`;
2257
2461
  }
2258
- async function buildBatchSummary(inputs, section, wcOptions, options = {}) {
2259
- const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
2260
- const files = [];
2261
- for (const input of inputs) {
2262
- const result = section === "all" ? wc_default(input.content, wcOptions) : countSections(input.content, section, wcOptions);
2263
- if (!preserveCollectorSegments) if ("section" in result) stripCollectorSegmentsFromSectionedResult(result);
2264
- else stripCollectorSegmentsFromWordCounterResult(result);
2265
- files.push({
2266
- path: input.path,
2267
- result
2268
- });
2269
- options.onFileCounted?.({
2270
- completed: files.length,
2271
- total: inputs.length
2272
- });
2273
- }
2274
- options.onFinalizeStart?.();
2275
- if (files.length === 0) return {
2276
- files,
2277
- skipped: [],
2278
- aggregate: section === "all" ? wc_default("", wcOptions) : {
2279
- section,
2280
- total: 0,
2281
- frontmatterType: null,
2282
- items: []
2462
+ function createBatchProgressReporter(options) {
2463
+ const enabled = options.enabled;
2464
+ const isTTY = Boolean(options.stream.isTTY);
2465
+ const clearOnFinish = options.clearOnFinish ?? true;
2466
+ let active = false;
2467
+ let total = 0;
2468
+ let lastLineLength = 0;
2469
+ let startedAtMs = 0;
2470
+ let lastRenderedPercent = -1;
2471
+ let finalizingStarted = false;
2472
+ const writeTTYLine = (line) => {
2473
+ const trailingPadding = lastLineLength > line.length ? " ".repeat(lastLineLength - line.length) : "";
2474
+ options.stream.write(`\r${line}${trailingPadding}`);
2475
+ lastLineLength = line.length;
2476
+ };
2477
+ const render = (completed) => {
2478
+ const line = buildProgressLine(completed, total, startedAtMs);
2479
+ const safeTotal = Math.max(total, 1);
2480
+ const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
2481
+ if (!isTTY && percent === lastRenderedPercent && completed < safeTotal) return;
2482
+ lastRenderedPercent = percent;
2483
+ if (isTTY) {
2484
+ writeTTYLine(line);
2485
+ return;
2283
2486
  }
2487
+ lastLineLength = line.length;
2488
+ options.stream.write(`${line}\n`);
2489
+ };
2490
+ const clearLine = () => {
2491
+ if (lastLineLength === 0) return;
2492
+ options.stream.write(`\r${" ".repeat(lastLineLength)}\r`);
2493
+ lastLineLength = 0;
2284
2494
  };
2285
2495
  return {
2286
- files,
2287
- skipped: [],
2288
- aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
2496
+ enabled,
2497
+ start(nextTotal, nextStartedAtMs) {
2498
+ if (!enabled || nextTotal <= 1) return;
2499
+ total = nextTotal;
2500
+ active = true;
2501
+ startedAtMs = nextStartedAtMs ?? Date.now();
2502
+ lastRenderedPercent = -1;
2503
+ finalizingStarted = false;
2504
+ render(0);
2505
+ },
2506
+ advance(snapshot) {
2507
+ if (!active) return;
2508
+ render(snapshot.completed);
2509
+ },
2510
+ startFinalizing() {
2511
+ if (!active || finalizingStarted) return;
2512
+ finalizingStarted = true;
2513
+ const line = buildFinalizingLine(startedAtMs);
2514
+ if (isTTY) {
2515
+ if (!clearOnFinish) {
2516
+ options.stream.write(`\n${line}`);
2517
+ lastLineLength = line.length;
2518
+ return;
2519
+ }
2520
+ writeTTYLine(line);
2521
+ return;
2522
+ }
2523
+ lastLineLength = line.length;
2524
+ options.stream.write(`${line}\n`);
2525
+ },
2526
+ finish() {
2527
+ if (!active) return;
2528
+ if (isTTY) if (clearOnFinish) clearLine();
2529
+ else options.stream.write("\n");
2530
+ active = false;
2531
+ }
2289
2532
  };
2290
2533
  }
2291
2534
 
@@ -2316,67 +2559,173 @@ async function runBatchCount(options) {
2316
2559
  stage: "resolve",
2317
2560
  elapsedMs: resolveElapsedMs
2318
2561
  });
2319
- const loadStartedAtMs = Date.now();
2320
- options.debug.emit("batch.load.start", { files: resolved.files.length });
2321
- const loaded = await loadBatchInputs(resolved.files);
2322
- const loadElapsedMs = Date.now() - loadStartedAtMs;
2323
- options.debug.emit("batch.load.complete", {
2324
- files: loaded.files.length,
2325
- skipped: loaded.skipped.length,
2326
- elapsedMs: loadElapsedMs
2327
- });
2328
- options.debug.emit("batch.stage.timing", {
2329
- stage: "load",
2330
- elapsedMs: loadElapsedMs
2331
- });
2332
- const progressEnabled = options.progressReporter.enabled && loaded.files.length > 1;
2333
- options.debug.emit("batch.progress.start", {
2334
- enabled: progressEnabled,
2335
- total: loaded.files.length
2562
+ options.debug.emit("batch.jobs.strategy", {
2563
+ strategy: options.jobsStrategy,
2564
+ jobs: options.jobs
2336
2565
  });
2337
- if (progressEnabled) options.progressReporter.start(loaded.files.length, batchStartedAtMs);
2338
2566
  let summary;
2339
- const countStartedAtMs = Date.now();
2340
- let finalizeStartedAtMs = null;
2341
- let emittedCountTiming = false;
2342
- try {
2343
- summary = await buildBatchSummary(loaded.files, options.section, options.wcOptions, {
2344
- onFileCounted: (snapshot) => {
2345
- if (progressEnabled) options.progressReporter.advance(snapshot);
2346
- },
2347
- onFinalizeStart: () => {
2348
- finalizeStartedAtMs = Date.now();
2349
- if (progressEnabled) options.progressReporter.startFinalizing();
2350
- const countElapsedMs = finalizeStartedAtMs - countStartedAtMs;
2351
- options.debug.emit("batch.stage.timing", {
2352
- stage: "count",
2353
- elapsedMs: countElapsedMs
2354
- });
2355
- emittedCountTiming = true;
2356
- },
2357
- preserveCollectorSegments: options.preserveCollectorSegments
2567
+ let routeSkips = [];
2568
+ if (options.jobsStrategy === "load-only") {
2569
+ const loadStartedAtMs = Date.now();
2570
+ options.debug.emit("batch.load.start", {
2571
+ files: resolved.files.length,
2572
+ jobs: options.jobs,
2573
+ strategy: options.jobsStrategy
2358
2574
  });
2359
- } finally {
2360
- if (progressEnabled) options.progressReporter.finish();
2361
- options.debug.emit("batch.progress.complete", {
2575
+ const loaded = await loadBatchInputsWithJobs(resolved.files, { jobs: options.jobs });
2576
+ const loadElapsedMs = Date.now() - loadStartedAtMs;
2577
+ options.debug.emit("batch.load.complete", {
2578
+ files: loaded.files.length,
2579
+ skipped: loaded.skipped.length,
2580
+ elapsedMs: loadElapsedMs,
2581
+ strategy: options.jobsStrategy
2582
+ });
2583
+ options.debug.emit("batch.stage.timing", {
2584
+ stage: "load",
2585
+ elapsedMs: loadElapsedMs
2586
+ });
2587
+ const progressEnabled = options.progressReporter.enabled && loaded.files.length > 1;
2588
+ options.debug.emit("batch.progress.start", {
2362
2589
  enabled: progressEnabled,
2363
2590
  total: loaded.files.length
2364
2591
  });
2365
- }
2366
- if (!emittedCountTiming) {
2367
- const countElapsedMs = Date.now() - countStartedAtMs;
2592
+ if (progressEnabled) options.progressReporter.start(loaded.files.length, batchStartedAtMs);
2593
+ const countStartedAtMs = Date.now();
2594
+ let finalizeStartedAtMs = null;
2595
+ let emittedCountTiming = false;
2596
+ try {
2597
+ summary = await buildBatchSummary(loaded.files, options.section, options.wcOptions, {
2598
+ onFileCounted: (snapshot) => {
2599
+ if (progressEnabled) options.progressReporter.advance(snapshot);
2600
+ },
2601
+ onFinalizeStart: () => {
2602
+ finalizeStartedAtMs = Date.now();
2603
+ if (progressEnabled) options.progressReporter.startFinalizing();
2604
+ const countElapsedMs = finalizeStartedAtMs - countStartedAtMs;
2605
+ options.debug.emit("batch.stage.timing", {
2606
+ stage: "count",
2607
+ elapsedMs: countElapsedMs
2608
+ });
2609
+ emittedCountTiming = true;
2610
+ },
2611
+ preserveCollectorSegments: options.preserveCollectorSegments
2612
+ });
2613
+ } finally {
2614
+ if (progressEnabled) options.progressReporter.finish();
2615
+ options.debug.emit("batch.progress.complete", {
2616
+ enabled: progressEnabled,
2617
+ total: loaded.files.length
2618
+ });
2619
+ }
2620
+ if (!emittedCountTiming) {
2621
+ const countElapsedMs = Date.now() - countStartedAtMs;
2622
+ options.debug.emit("batch.stage.timing", {
2623
+ stage: "count",
2624
+ elapsedMs: countElapsedMs
2625
+ });
2626
+ }
2627
+ const finalizeElapsedMs = finalizeStartedAtMs === null ? 0 : Date.now() - finalizeStartedAtMs;
2628
+ options.debug.emit("batch.stage.timing", {
2629
+ stage: "finalize",
2630
+ elapsedMs: finalizeElapsedMs
2631
+ });
2632
+ routeSkips = loaded.skipped;
2633
+ } else {
2634
+ options.debug.emit("batch.load.start", {
2635
+ files: resolved.files.length,
2636
+ jobs: options.jobs,
2637
+ strategy: options.jobsStrategy
2638
+ });
2639
+ options.debug.emit("batch.load.complete", {
2640
+ files: 0,
2641
+ skipped: 0,
2642
+ elapsedMs: 0,
2643
+ strategy: options.jobsStrategy
2644
+ });
2645
+ options.debug.emit("batch.stage.timing", {
2646
+ stage: "load",
2647
+ elapsedMs: 0
2648
+ });
2649
+ const progressEnabled = options.progressReporter.enabled && resolved.files.length > 1;
2650
+ options.debug.emit("batch.progress.start", {
2651
+ enabled: progressEnabled,
2652
+ total: resolved.files.length
2653
+ });
2654
+ if (progressEnabled) options.progressReporter.start(resolved.files.length, batchStartedAtMs);
2655
+ const countStartedAtMs = Date.now();
2656
+ let finalizeStartedAtMs = null;
2657
+ let emittedCountTiming = false;
2658
+ try {
2659
+ let counted;
2660
+ try {
2661
+ counted = await countBatchInputsWithWorkerJobs(resolved.files, {
2662
+ jobs: options.jobs,
2663
+ section: options.section,
2664
+ wcOptions: options.wcOptions,
2665
+ preserveCollectorSegments: options.preserveCollectorSegments,
2666
+ onFileProcessed: (snapshot) => {
2667
+ if (progressEnabled) options.progressReporter.advance(snapshot);
2668
+ }
2669
+ });
2670
+ options.debug.emit("batch.jobs.executor", {
2671
+ strategy: options.jobsStrategy,
2672
+ executor: "worker-pool",
2673
+ jobs: options.jobs
2674
+ });
2675
+ } catch (error) {
2676
+ if (!(error instanceof WorkerRouteUnavailableError)) throw error;
2677
+ options.debug.emit("batch.jobs.executor", {
2678
+ strategy: options.jobsStrategy,
2679
+ executor: "async-fallback",
2680
+ reason: error.message,
2681
+ jobs: options.jobs
2682
+ });
2683
+ counted = await countBatchInputsWithJobs(resolved.files, {
2684
+ jobs: options.jobs,
2685
+ section: options.section,
2686
+ wcOptions: options.wcOptions,
2687
+ preserveCollectorSegments: options.preserveCollectorSegments,
2688
+ onFileProcessed: (snapshot) => {
2689
+ if (progressEnabled) options.progressReporter.advance(snapshot);
2690
+ }
2691
+ });
2692
+ }
2693
+ routeSkips = counted.skipped;
2694
+ summary = finalizeBatchJobsSummary(counted.files, options.section, options.wcOptions, {
2695
+ onFinalizeStart: () => {
2696
+ finalizeStartedAtMs = Date.now();
2697
+ if (progressEnabled) options.progressReporter.startFinalizing();
2698
+ const countElapsedMs = finalizeStartedAtMs - countStartedAtMs;
2699
+ options.debug.emit("batch.stage.timing", {
2700
+ stage: "count",
2701
+ elapsedMs: countElapsedMs
2702
+ });
2703
+ emittedCountTiming = true;
2704
+ },
2705
+ preserveCollectorSegments: options.preserveCollectorSegments
2706
+ });
2707
+ } finally {
2708
+ if (progressEnabled) options.progressReporter.finish();
2709
+ options.debug.emit("batch.progress.complete", {
2710
+ enabled: progressEnabled,
2711
+ total: resolved.files.length
2712
+ });
2713
+ }
2714
+ if (!emittedCountTiming) {
2715
+ const countElapsedMs = Date.now() - countStartedAtMs;
2716
+ options.debug.emit("batch.stage.timing", {
2717
+ stage: "count",
2718
+ elapsedMs: countElapsedMs
2719
+ });
2720
+ }
2721
+ const finalizeElapsedMs = finalizeStartedAtMs === null ? 0 : Date.now() - finalizeStartedAtMs;
2368
2722
  options.debug.emit("batch.stage.timing", {
2369
- stage: "count",
2370
- elapsedMs: countElapsedMs
2723
+ stage: "finalize",
2724
+ elapsedMs: finalizeElapsedMs
2371
2725
  });
2372
2726
  }
2373
- const finalizeElapsedMs = finalizeStartedAtMs === null ? 0 : Date.now() - finalizeStartedAtMs;
2374
- options.debug.emit("batch.stage.timing", {
2375
- stage: "finalize",
2376
- elapsedMs: finalizeElapsedMs
2377
- });
2378
2727
  appendAll(summary.skipped, resolved.skipped);
2379
- appendAll(summary.skipped, loaded.skipped);
2728
+ appendAll(summary.skipped, routeSkips);
2380
2729
  options.debug.emit("batch.aggregate.complete", {
2381
2730
  files: summary.files.length,
2382
2731
  skipped: summary.skipped.length,
@@ -2385,6 +2734,12 @@ async function runBatchCount(options) {
2385
2734
  return summary;
2386
2735
  }
2387
2736
 
2737
+ //#endregion
2738
+ //#region src/cli/batch/jobs/strategy.ts
2739
+ function resolveBatchJobsStrategy(jobs) {
2740
+ return jobs > 1 ? "load-count" : "load-only";
2741
+ }
2742
+
2388
2743
  //#endregion
2389
2744
  //#region src/utils/show-singular-or-plural-word.ts
2390
2745
  function showSingularOrPluralWord(count, word) {
@@ -2601,6 +2956,10 @@ function countLongOptionOccurrences(argv, optionName) {
2601
2956
  function validateSingleRegexOptionUsage(argv) {
2602
2957
  if (countLongOptionOccurrences(argv, "--regex") > 1) throw new Error("`--regex` can only be provided once.");
2603
2958
  }
2959
+ function validateStandalonePrintJobsLimitUsage(argv) {
2960
+ const tokens = argv.slice(2).filter((token) => token.length > 0);
2961
+ if (tokens.length !== 1 || tokens[0] !== "--print-jobs-limit") throw new Error("`--print-jobs-limit` must be used alone.");
2962
+ }
2604
2963
  function resolveBatchScope(argv) {
2605
2964
  let scope = "merged";
2606
2965
  for (const token of argv) {
@@ -2709,6 +3068,11 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
2709
3068
  directoryRegexPattern: options.regex
2710
3069
  };
2711
3070
  const extensionFilter = buildDirectoryExtensionFilter(options.includeExt, options.excludeExt);
3071
+ const requestedJobs = options.jobs;
3072
+ const jobsLimit = resolveBatchJobsLimit();
3073
+ const jobs = clampRequestedJobs(requestedJobs, jobsLimit);
3074
+ if (requestedJobs > jobsLimit.suggestedMaxJobs) console.error(import_picocolors.default.yellow(formatJobsAdvisoryWarning(requestedJobs, jobs, jobsLimit)));
3075
+ const jobsStrategy = resolveBatchJobsStrategy(jobs);
2712
3076
  const debugEnabled = Boolean(options.debug);
2713
3077
  const mirrorDebugToTerminal = debugEnabled && (!debug.reportPath || teeEnabled);
2714
3078
  const summary = await runBatchCount({
@@ -2723,7 +3087,9 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
2723
3087
  enabled: options.format === "standard" && options.progress,
2724
3088
  stream: runtime.stderr ?? process.stderr,
2725
3089
  clearOnFinish: !(mirrorDebugToTerminal || options.keepProgress)
2726
- })
3090
+ }),
3091
+ jobs,
3092
+ jobsStrategy
2727
3093
  });
2728
3094
  const showSkipDiagnostics = debugEnabled && !batchOptions.quietSkips;
2729
3095
  debug.emit("batch.skips.policy", {
@@ -2891,6 +3257,17 @@ async function runCli(argv = process.argv, runtime = {}) {
2891
3257
  program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number");
2892
3258
  configureProgramOptions(program, parseMode);
2893
3259
  program.action(async (textTokens, options) => {
3260
+ if (options.printJobsLimit) {
3261
+ try {
3262
+ validateStandalonePrintJobsLimitUsage(argv);
3263
+ } catch (error) {
3264
+ const message = error instanceof Error ? error.message : String(error);
3265
+ program.error(import_picocolors.default.red(message));
3266
+ return;
3267
+ }
3268
+ console.log(JSON.stringify(resolveBatchJobsLimit()));
3269
+ return;
3270
+ }
2894
3271
  const debugEnabled = Boolean(options.debug);
2895
3272
  const debugReportPath = resolveDebugReportPathOption(options.debugReport);
2896
3273
  const debugReportEnabled = options.debugReport !== void 0 && options.debugReport !== false;