@dev-pi2pie/word-counter 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/esm/bin.mjs CHANGED
@@ -4,8 +4,9 @@ import { Command, Option } from "commander";
4
4
  import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
5
5
  import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
- import { readFile, readdir, stat } from "node:fs/promises";
7
+ import os from "node:os";
8
8
  import { parseDocument } from "yaml";
9
+ import { readFile, readdir, stat } from "node:fs/promises";
9
10
 
10
11
  //#region \0rolldown/runtime.js
11
12
  var __create = Object.create;
@@ -345,8 +346,14 @@ function collectPathValue(value, previous = []) {
345
346
  function collectLatinHintValue(value, previous = []) {
346
347
  return [...previous, value];
347
348
  }
349
+ function parseJobsOption(value) {
350
+ if (!/^\d+$/.test(value)) throw new Error("`--jobs` must be an integer >= 1.");
351
+ const parsed = Number.parseInt(value, 10);
352
+ if (!Number.isSafeInteger(parsed) || parsed < 1) throw new Error("`--jobs` must be an integer >= 1.");
353
+ return parsed;
354
+ }
348
355
  function configureProgramOptions(program, parseMode) {
349
- program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
356
+ program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "batch jobs in --path mode (1=async main-thread, >1=worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print host jobs-limit JSON and exit (must be used alone)").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-warnings", "suppress non-fatal warning diagnostics").option("--quiet-skips", "suppress debug skip output and per-file json skipped field").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
350
357
  }
351
358
 
352
359
  //#endregion
@@ -421,7 +428,7 @@ var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
421
428
  //#endregion
422
429
  //#region src/cli/program/version-embedded.ts
423
430
  var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
424
- const EMBEDDED_PACKAGE_VERSION = "0.1.3";
431
+ const EMBEDDED_PACKAGE_VERSION = "0.1.4";
425
432
 
426
433
  //#endregion
427
434
  //#region src/cli/program/version.ts
@@ -474,6 +481,51 @@ function getFormattedVersionLabel() {
474
481
  return import_picocolors.default.bgBlack(import_picocolors.default.bold(import_picocolors.default.italic(` word-counter ${import_picocolors.default.cyanBright(`ver.${version}`)} `)));
475
482
  }
476
483
 
484
+ //#endregion
485
+ //#region src/cli/batch/jobs/limits.ts
486
+ const DEFAULT_UV_THREADPOOL_SIZE = 4;
487
+ function parsePositiveInteger(value) {
488
+ if (!value) return;
489
+ const parsed = Number.parseInt(value, 10);
490
+ if (!Number.isFinite(parsed) || parsed <= 0) return;
491
+ return parsed;
492
+ }
493
+ function resolveBatchJobsLimit(env = process.env) {
494
+ const cpuLimit = Math.max(1, os.availableParallelism());
495
+ const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
496
+ const ioLimit = Math.max(1, uvThreadpool * 2);
497
+ return {
498
+ suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
499
+ cpuLimit,
500
+ uvThreadpool,
501
+ ioLimit
502
+ };
503
+ }
504
+ function clampRequestedJobs(requestedJobs, limits) {
505
+ return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
506
+ }
507
+ function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
508
+ return [
509
+ `Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
510
+ `Running with --jobs=${effectiveJobs} as a safety cap.`,
511
+ `Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
512
+ ].join(" ");
513
+ }
514
+ function isResourceLimitError(error) {
515
+ if (typeof error !== "object" || error === null) return false;
516
+ const code = "code" in error ? error.code : void 0;
517
+ return code === "EMFILE" || code === "ENFILE";
518
+ }
519
+ function createResourceLimitError(path, error, requestedJobs, limits) {
520
+ const message = error instanceof Error ? error.message : String(error);
521
+ const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
522
+ return new Error([
523
+ `Resource limit reached while processing: ${path} (${code}: ${message}).`,
524
+ `Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
525
+ "Reduce --jobs or raise OS file descriptor limits before retrying."
526
+ ].join(" "));
527
+ }
528
+
477
529
  //#endregion
478
530
  //#region src/utils/append-all.ts
479
531
  function appendAll(target, source) {
@@ -481,1811 +533,1928 @@ function appendAll(target, source) {
481
533
  }
482
534
 
483
535
  //#endregion
484
- //#region src/cli/path/load.ts
485
- function isProbablyBinary(buffer) {
486
- if (buffer.length === 0) return false;
487
- const sampleSize = Math.min(buffer.length, 1024);
488
- let suspicious = 0;
489
- for (let index = 0; index < sampleSize; index += 1) {
490
- const byte = buffer[index] ?? 0;
491
- if (byte === 0) return true;
492
- if (byte === 9 || byte === 10 || byte === 13) continue;
493
- if (byte >= 32 && byte <= 126) continue;
494
- if (byte >= 128) continue;
495
- suspicious += 1;
536
+ //#region src/markdown/toml/arrays.ts
537
+ function ensureArrayContainer(result, key) {
538
+ const existing = result[key];
539
+ if (Array.isArray(existing)) return existing;
540
+ const list = [];
541
+ result[key] = list;
542
+ return list;
543
+ }
544
+ function flattenArrayTables(result) {
545
+ for (const [key, value] of Object.entries(result)) {
546
+ if (!Array.isArray(value)) continue;
547
+ result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
496
548
  }
497
- return suspicious / sampleSize > .3;
498
549
  }
499
- async function loadBatchInputs(filePaths) {
500
- const files = [];
501
- const skipped = [];
502
- for (const filePath of filePaths) {
503
- let buffer;
504
- try {
505
- buffer = await readFile(filePath);
506
- } catch (error) {
507
- const message = error instanceof Error ? error.message : String(error);
508
- skipped.push({
509
- path: filePath,
510
- reason: `not readable: ${message}`
511
- });
550
+
551
+ //#endregion
552
+ //#region src/markdown/toml/keys.ts
553
+ function stripKeyQuotes(key) {
554
+ const trimmed = key.trim();
555
+ if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
556
+ return trimmed;
557
+ }
558
+ function normalizeKeyPath(key) {
559
+ const trimmed = key.trim();
560
+ if (!trimmed) return null;
561
+ if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
562
+ const unquoted = stripKeyQuotes(trimmed);
563
+ return unquoted ? unquoted : null;
564
+ }
565
+ const segments = trimmed.split(".").map((segment) => segment.trim());
566
+ if (segments.some((segment) => !segment)) return null;
567
+ return segments.join(".");
568
+ }
569
+
570
+ //#endregion
571
+ //#region src/markdown/toml/strings.ts
572
+ function stripInlineComment(line) {
573
+ let inString = null;
574
+ let escaped = false;
575
+ for (let i = 0; i < line.length; i += 1) {
576
+ const char = line[i] ?? "";
577
+ if (inString) {
578
+ if (escaped) {
579
+ escaped = false;
580
+ continue;
581
+ }
582
+ if (char === "\\" && inString === "double") {
583
+ escaped = true;
584
+ continue;
585
+ }
586
+ if (inString === "double" && char === "\"") {
587
+ inString = null;
588
+ continue;
589
+ }
590
+ if (inString === "single" && char === "'") {
591
+ inString = null;
592
+ continue;
593
+ }
512
594
  continue;
513
595
  }
514
- if (isProbablyBinary(buffer)) {
515
- skipped.push({
516
- path: filePath,
517
- reason: "binary file"
518
- });
596
+ if (char === "\"") {
597
+ inString = "double";
519
598
  continue;
520
599
  }
521
- files.push({
522
- path: filePath,
523
- content: buffer.toString("utf8")
524
- });
600
+ if (char === "'") {
601
+ inString = "single";
602
+ continue;
603
+ }
604
+ if (char === "#") return line.slice(0, i).trimEnd();
525
605
  }
526
- return {
527
- files,
528
- skipped
529
- };
606
+ return line;
607
+ }
608
+ function unescapeBasic(input) {
609
+ return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
610
+ }
611
+ function parseStringLiteral(value) {
612
+ if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
613
+ if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
614
+ if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
615
+ if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
616
+ return null;
530
617
  }
531
618
 
532
619
  //#endregion
533
- //#region src/cli/path/resolve.ts
534
- async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
535
- let entries;
536
- try {
537
- entries = await readdir(directoryPath, {
538
- withFileTypes: true,
539
- encoding: "utf8"
540
- });
541
- } catch (error) {
542
- const message = error instanceof Error ? error.message : String(error);
543
- skipped.push({
544
- path: directoryPath,
545
- reason: `directory read failed: ${message}`
546
- });
547
- debug.emit("path.resolve.expand.read_failed", {
548
- directory: directoryPath,
549
- reason: `directory read failed: ${message}`
550
- });
551
- return [];
552
- }
553
- const sortedEntries = entries.slice().sort((left, right) => left.name.localeCompare(right.name));
554
- const files = [];
555
- debug.emit("path.resolve.expand.start", {
556
- directory: directoryPath,
557
- entries: sortedEntries.length,
558
- recursive
559
- });
560
- for (const entry of sortedEntries) {
561
- const entryPath = resolve(directoryPath, entry.name);
562
- if (entry.isFile()) {
563
- if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
564
- skipped.push({
565
- path: entryPath,
566
- reason: "extension excluded"
567
- });
568
- debug.emit("path.resolve.filter.excluded", {
569
- path: entryPath,
570
- reason: "extension excluded"
571
- }, { verbosity: "verbose" });
572
- stats.filterExcluded += 1;
620
+ //#region src/markdown/toml/values.ts
621
+ function parsePrimitive(raw) {
622
+ const value = raw.trim();
623
+ if (!value) return null;
624
+ const stringLiteral = parseStringLiteral(value);
625
+ if (stringLiteral !== null) return stringLiteral;
626
+ if (value === "true") return true;
627
+ if (value === "false") return false;
628
+ if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
629
+ if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
630
+ return value;
631
+ }
632
+ function parseArray(raw) {
633
+ const value = raw.trim();
634
+ if (!value.startsWith("[") || !value.endsWith("]")) return null;
635
+ const inner = value.slice(1, -1).trim();
636
+ if (!inner) return [];
637
+ const items = [];
638
+ let current = "";
639
+ let inString = null;
640
+ let escaped = false;
641
+ for (let i = 0; i < inner.length; i += 1) {
642
+ const char = inner[i] ?? "";
643
+ if (inString) {
644
+ current += char;
645
+ if (escaped) {
646
+ escaped = false;
573
647
  continue;
574
648
  }
575
- const relativePath = toDirectoryRelativePath(rootPath, entryPath);
576
- if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
577
- if (recordRegexExcluded(entryPath)) {
578
- debug.emit("path.resolve.regex.excluded", {
579
- path: entryPath,
580
- relativePath,
581
- pattern: regexFilter.sourcePattern,
582
- reason: "regex excluded"
583
- }, { verbosity: "verbose" });
584
- stats.regexExcluded += 1;
585
- }
649
+ if (char === "\\" && inString === "double") {
650
+ escaped = true;
586
651
  continue;
587
652
  }
588
- files.push(entryPath);
589
- stats.directoryIncluded += 1;
590
- debug.emit("path.resolve.expand.include", {
591
- path: entryPath,
592
- source: "directory"
593
- }, { verbosity: "verbose" });
653
+ if (inString === "double" && char === "\"") inString = null;
654
+ else if (inString === "single" && char === "'") inString = null;
594
655
  continue;
595
656
  }
596
- if (!entry.isDirectory() || !recursive) continue;
597
- appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
657
+ if (char === "\"") {
658
+ inString = "double";
659
+ current += char;
660
+ continue;
661
+ }
662
+ if (char === "'") {
663
+ inString = "single";
664
+ current += char;
665
+ continue;
666
+ }
667
+ if (char === ",") {
668
+ const item = parsePrimitive(current);
669
+ if (item === null) return null;
670
+ items.push(item);
671
+ current = "";
672
+ continue;
673
+ }
674
+ current += char;
598
675
  }
599
- debug.emit("path.resolve.expand.complete", {
600
- directory: directoryPath,
601
- files: files.length
602
- });
603
- return files;
676
+ const finalItem = parsePrimitive(current);
677
+ if (finalItem === null) return null;
678
+ items.push(finalItem);
679
+ return items;
604
680
  }
605
- async function resolveBatchFilePaths(pathInputs, options) {
606
- const skipped = [];
607
- const regexExcludedPaths = /* @__PURE__ */ new Set();
608
- const resolvedFiles = /* @__PURE__ */ new Set();
609
- const stats = {
610
- dedupeAccepted: 0,
611
- dedupeDuplicates: 0,
612
- filterExcluded: 0,
613
- regexExcluded: 0,
614
- directoryIncluded: 0
615
- };
616
- const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
617
- let regexFilter;
618
- const debug = options.debug ?? {
619
- enabled: false,
620
- verbosity: "compact",
621
- emit() {},
622
- close: async () => {}
623
- };
624
- debug.emit("path.resolve.inputs", {
625
- inputs: pathInputs.length,
626
- pathMode: options.pathMode,
627
- recursive: options.recursive,
628
- hasRegex: Boolean(options.directoryRegexPattern)
629
- });
630
- const addResolvedFile = (filePath, details) => {
631
- regexExcludedPaths.delete(filePath);
632
- if (resolvedFiles.has(filePath)) {
633
- stats.dedupeDuplicates += 1;
634
- debug.emit("path.resolve.dedupe.duplicate", {
635
- path: filePath,
636
- source: details.source,
637
- input: details.input
638
- }, { verbosity: "verbose" });
639
- return;
681
+ function parseInlineTable(raw) {
682
+ const trimmed = raw.trim();
683
+ if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
684
+ const inner = trimmed.slice(1, -1).trim();
685
+ if (!inner) return {};
686
+ const pairs = [];
687
+ let current = "";
688
+ let inString = null;
689
+ let escaped = false;
690
+ let bracketDepth = 0;
691
+ let braceDepth = 0;
692
+ for (let i = 0; i < inner.length; i += 1) {
693
+ const char = inner[i] ?? "";
694
+ if (inString) {
695
+ current += char;
696
+ if (escaped) {
697
+ escaped = false;
698
+ continue;
699
+ }
700
+ if (char === "\\" && inString === "double") {
701
+ escaped = true;
702
+ continue;
703
+ }
704
+ if (inString === "double" && char === "\"") inString = null;
705
+ else if (inString === "single" && char === "'") inString = null;
706
+ continue;
640
707
  }
641
- resolvedFiles.add(filePath);
642
- stats.dedupeAccepted += 1;
643
- debug.emit("path.resolve.dedupe.accept", {
644
- path: filePath,
645
- source: details.source,
646
- input: details.input
647
- }, { verbosity: "verbose" });
648
- };
649
- const getRegexFilter = () => {
650
- if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
651
- return regexFilter;
652
- };
653
- const recordRegexExcluded = (filePath) => {
654
- if (resolvedFiles.has(filePath)) return false;
655
- regexExcludedPaths.add(filePath);
656
- return true;
657
- };
658
- for (const rawPath of pathInputs) {
659
- const targetPath = resolve(rawPath);
660
- debug.emit("path.resolve.input", {
661
- rawPath,
662
- resolvedPath: targetPath
663
- });
664
- let metadata;
665
- try {
666
- metadata = await stat(targetPath);
667
- } catch (error) {
668
- const message = error instanceof Error ? error.message : String(error);
669
- skipped.push({
670
- path: targetPath,
671
- reason: `not readable: ${message}`
672
- });
673
- debug.emit("path.resolve.skip", {
674
- path: targetPath,
675
- reason: `not readable: ${message}`
676
- });
708
+ if (char === "\"") {
709
+ inString = "double";
710
+ current += char;
677
711
  continue;
678
712
  }
679
- if (metadata.isDirectory() && options.pathMode === "auto") {
680
- const effectiveRegexFilter = getRegexFilter();
681
- debug.emit("path.resolve.root.expand", {
682
- root: targetPath,
683
- recursive: options.recursive,
684
- regex: effectiveRegexFilter.sourcePattern ?? null
685
- });
686
- const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
687
- for (const file of files) addResolvedFile(file, {
688
- source: "directory",
689
- input: targetPath
690
- });
713
+ if (char === "'") {
714
+ inString = "single";
715
+ current += char;
691
716
  continue;
692
717
  }
693
- if (!metadata.isFile()) {
694
- skipped.push({
695
- path: targetPath,
696
- reason: "not a regular file"
697
- });
698
- debug.emit("path.resolve.skip", {
699
- path: targetPath,
700
- reason: "not a regular file"
701
- });
718
+ if (char === "[") {
719
+ bracketDepth += 1;
720
+ current += char;
702
721
  continue;
703
722
  }
704
- addResolvedFile(targetPath, {
705
- source: "direct",
706
- input: targetPath
707
- });
723
+ if (char === "]") {
724
+ if (bracketDepth > 0) bracketDepth -= 1;
725
+ current += char;
726
+ continue;
727
+ }
728
+ if (char === "{") {
729
+ braceDepth += 1;
730
+ current += char;
731
+ continue;
732
+ }
733
+ if (char === "}") {
734
+ if (braceDepth > 0) braceDepth -= 1;
735
+ current += char;
736
+ continue;
737
+ }
738
+ if (char === "," && bracketDepth === 0 && braceDepth === 0) {
739
+ pairs.push(current);
740
+ current = "";
741
+ continue;
742
+ }
743
+ current += char;
708
744
  }
709
- for (const path of regexExcludedPaths) skipped.push({
710
- path,
711
- reason: "regex excluded"
712
- });
713
- const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
714
- debug.emit("path.resolve.filter.summary", {
715
- excluded: stats.filterExcluded + stats.regexExcluded,
716
- extensionExcluded: stats.filterExcluded,
717
- regexExcluded: stats.regexExcluded,
718
- included: stats.directoryIncluded
719
- });
720
- debug.emit("path.resolve.dedupe.summary", {
721
- accepted: stats.dedupeAccepted,
722
- duplicates: stats.dedupeDuplicates
723
- });
724
- debug.emit("path.resolve.complete", {
725
- files: files.length,
726
- skipped: skipped.length,
727
- ordering: "absolute-path-ascending"
728
- });
729
- return {
730
- files,
731
- skipped
732
- };
733
- }
734
-
735
- //#endregion
736
- //#region src/cli/progress/reporter.ts
737
- const PROGRESS_BAR_WIDTH = 20;
738
- const FILLED_BAR_CHAR = "█";
739
- const EMPTY_BAR_CHAR = "░";
740
- function clamp(value, min, max) {
741
- return Math.max(min, Math.min(max, value));
742
- }
743
- function buildProgressBar(completed, total) {
744
- const safeTotal = Math.max(total, 1);
745
- const ratio = clamp(completed / safeTotal, 0, 1);
746
- const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
747
- const empty = PROGRESS_BAR_WIDTH - filled;
748
- return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
749
- }
750
- function formatElapsed(startedAtMs) {
751
- const elapsedMs = Date.now() - startedAtMs;
752
- const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
753
- const minutes = Math.floor(totalSeconds / 60);
754
- const seconds = totalSeconds % 60;
755
- const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
756
- return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
745
+ if (current.trim()) pairs.push(current);
746
+ const output = {};
747
+ for (const pair of pairs) {
748
+ const separatorIndex = pair.indexOf("=");
749
+ if (separatorIndex === -1) return null;
750
+ const key = normalizeKeyPath(pair.slice(0, separatorIndex));
751
+ if (!key) return null;
752
+ const valueRaw = pair.slice(separatorIndex + 1).trim();
753
+ if (!valueRaw) return null;
754
+ if (valueRaw.startsWith("{")) return null;
755
+ const normalized = normalizeValue(valueRaw);
756
+ if (normalized === null) return null;
757
+ if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
758
+ output[key] = normalized;
759
+ }
760
+ return output;
757
761
  }
758
- function buildProgressLine(completed, total, startedAtMs) {
759
- const safeTotal = Math.max(total, 1);
760
- const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
761
- return `Counting files [${buildProgressBar(completed, safeTotal)}] ${`${String(percent).padStart(3, " ")}%`} ${String(completed).padStart(String(safeTotal).length, " ")}/${safeTotal} elapsed ${formatElapsed(startedAtMs)}`;
762
+ function normalizeValue(value) {
763
+ if (!value) return null;
764
+ const trimmed = value.trim();
765
+ if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
766
+ const array = parseArray(trimmed);
767
+ if (array) return array;
768
+ if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
769
+ return parsePrimitive(trimmed);
762
770
  }
763
- function buildFinalizingLine(startedAtMs) {
764
- return `Finalizing aggregate... elapsed ${formatElapsed(startedAtMs)}`;
771
+ function toPlainText(value) {
772
+ if (value == null) return "";
773
+ if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
774
+ return String(value);
765
775
  }
766
- function createBatchProgressReporter(options) {
767
- const enabled = options.enabled;
768
- const isTTY = Boolean(options.stream.isTTY);
769
- const clearOnFinish = options.clearOnFinish ?? true;
770
- let active = false;
771
- let total = 0;
772
- let lastLineLength = 0;
773
- let startedAtMs = 0;
774
- let lastRenderedPercent = -1;
775
- let finalizingStarted = false;
776
- const writeTTYLine = (line) => {
777
- const trailingPadding = lastLineLength > line.length ? " ".repeat(lastLineLength - line.length) : "";
778
- options.stream.write(`\r${line}${trailingPadding}`);
779
- lastLineLength = line.length;
780
- };
781
- const render = (completed) => {
782
- const line = buildProgressLine(completed, total, startedAtMs);
783
- const safeTotal = Math.max(total, 1);
784
- const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
785
- if (!isTTY && percent === lastRenderedPercent && completed < safeTotal) return;
786
- lastRenderedPercent = percent;
787
- if (isTTY) {
788
- writeTTYLine(line);
789
- return;
790
- }
791
- lastLineLength = line.length;
792
- options.stream.write(`${line}\n`);
793
- };
794
- const clearLine = () => {
795
- if (lastLineLength === 0) return;
796
- options.stream.write(`\r${" ".repeat(lastLineLength)}\r`);
797
- lastLineLength = 0;
798
- };
799
- return {
800
- enabled,
801
- start(nextTotal, nextStartedAtMs) {
802
- if (!enabled || nextTotal <= 1) return;
803
- total = nextTotal;
804
- active = true;
805
- startedAtMs = nextStartedAtMs ?? Date.now();
806
- lastRenderedPercent = -1;
807
- finalizingStarted = false;
808
- render(0);
809
- },
810
- advance(snapshot) {
811
- if (!active) return;
812
- render(snapshot.completed);
813
- },
814
- startFinalizing() {
815
- if (!active || finalizingStarted) return;
816
- finalizingStarted = true;
817
- const line = buildFinalizingLine(startedAtMs);
818
- if (isTTY) {
819
- if (!clearOnFinish) {
820
- options.stream.write(`\n${line}`);
821
- lastLineLength = line.length;
822
- return;
776
+
777
+ //#endregion
778
+ //#region src/markdown/toml/parse-frontmatter.ts
779
+ function parseTomlFrontmatter(frontmatter) {
780
+ const result = {};
781
+ const lines = frontmatter.split("\n");
782
+ let tablePrefix = "";
783
+ let tableTarget = null;
784
+ let tablePrefixInList = false;
785
+ for (let index = 0; index < lines.length; index += 1) {
786
+ const rawLine = lines[index] ?? "";
787
+ const trimmedLine = rawLine.trim();
788
+ if (!trimmedLine || trimmedLine.startsWith("#")) continue;
789
+ if (trimmedLine.startsWith("[[")) {
790
+ const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
791
+ if (!match) return null;
792
+ const normalizedTable = normalizeKeyPath(match[1] ?? "");
793
+ if (!normalizedTable) return null;
794
+ const list = ensureArrayContainer(result, normalizedTable);
795
+ const newEntry = {};
796
+ list.push(newEntry);
797
+ tableTarget = newEntry;
798
+ tablePrefix = normalizedTable;
799
+ tablePrefixInList = true;
800
+ continue;
801
+ }
802
+ const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
803
+ if (tableMatch) {
804
+ const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
805
+ if (!normalizedTable) return null;
806
+ tablePrefix = normalizedTable;
807
+ tablePrefixInList = false;
808
+ tableTarget = null;
809
+ continue;
810
+ }
811
+ const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
812
+ const separatorIndex = lineForParsing.indexOf("=");
813
+ if (separatorIndex === -1) return null;
814
+ const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
815
+ let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
816
+ if (!key) return null;
817
+ const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
818
+ if (tripleDelimiter) {
819
+ const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
820
+ if (closingIndex !== -1) {
821
+ const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
822
+ valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
823
+ } else {
824
+ const delimiter = tripleDelimiter;
825
+ let combined = valueRaw;
826
+ let closed = false;
827
+ while (index + 1 < lines.length) {
828
+ index += 1;
829
+ const nextLine = lines[index] ?? "";
830
+ combined += `\n${nextLine}`;
831
+ if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
832
+ closed = true;
833
+ break;
834
+ }
823
835
  }
824
- writeTTYLine(line);
825
- return;
836
+ if (!closed) return null;
837
+ valueRaw = combined;
826
838
  }
827
- lastLineLength = line.length;
828
- options.stream.write(`${line}\n`);
829
- },
830
- finish() {
831
- if (!active) return;
832
- if (isTTY) if (clearOnFinish) clearLine();
833
- else options.stream.write("\n");
834
- active = false;
835
839
  }
836
- };
840
+ const normalized = normalizeValue(valueRaw);
841
+ if (normalized === null) return null;
842
+ const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
843
+ if (typeof normalized === "object" && !Array.isArray(normalized)) {
844
+ for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
845
+ const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
846
+ if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
847
+ else result[entryKey] = toPlainText(inlineValue);
848
+ }
849
+ continue;
850
+ }
851
+ if (tablePrefixInList && tableTarget) {
852
+ tableTarget[key] = toPlainText(normalized);
853
+ continue;
854
+ }
855
+ result[fullKey] = toPlainText(normalized);
856
+ }
857
+ flattenArrayTables(result);
858
+ return result;
837
859
  }
838
860
 
839
861
  //#endregion
840
- //#region src/markdown/toml/arrays.ts
841
- function ensureArrayContainer(result, key) {
842
- const existing = result[key];
843
- if (Array.isArray(existing)) return existing;
844
- const list = [];
845
- result[key] = list;
846
- return list;
862
+ //#region src/markdown/parse-markdown.ts
863
+ const FENCE_TO_TYPE = {
864
+ "---": "yaml",
865
+ "+++": "toml",
866
+ ";;;": "json"
867
+ };
868
+ function normalizeNewlines(input) {
869
+ return input.replace(/\r\n/g, "\n");
847
870
  }
848
- function flattenArrayTables(result) {
849
- for (const [key, value] of Object.entries(result)) {
850
- if (!Array.isArray(value)) continue;
851
- result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
852
- }
871
+ function stripBom(line) {
872
+ return line.startsWith("") ? line.slice(1) : line;
853
873
  }
854
-
855
- //#endregion
856
- //#region src/markdown/toml/keys.ts
857
- function stripKeyQuotes(key) {
858
- const trimmed = key.trim();
859
- if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
860
- return trimmed;
874
+ function getFenceType(line) {
875
+ const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
876
+ if (!match) return null;
877
+ return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
861
878
  }
862
- function normalizeKeyPath(key) {
863
- const trimmed = key.trim();
864
- if (!trimmed) return null;
865
- if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
866
- const unquoted = stripKeyQuotes(trimmed);
867
- return unquoted ? unquoted : null;
879
+ function parseFrontmatter(frontmatter, type) {
880
+ if (!type) return null;
881
+ if (type === "json") try {
882
+ return JSON.parse(frontmatter);
883
+ } catch {
884
+ return null;
868
885
  }
869
- const segments = trimmed.split(".").map((segment) => segment.trim());
870
- if (segments.some((segment) => !segment)) return null;
871
- return segments.join(".");
886
+ if (type === "yaml") {
887
+ const doc = parseDocument(frontmatter, { prettyErrors: false });
888
+ if (doc.errors.length > 0) return null;
889
+ const data = doc.toJSON();
890
+ if (!data || typeof data !== "object" || Array.isArray(data)) return null;
891
+ return data;
892
+ }
893
+ if (type === "toml") return parseTomlFrontmatter(frontmatter);
894
+ return null;
872
895
  }
873
-
874
- //#endregion
875
- //#region src/markdown/toml/strings.ts
876
- function stripInlineComment(line) {
877
- let inString = null;
896
+ function extractJsonBlock(text, startIndex) {
897
+ let depth = 0;
898
+ let inString = false;
878
899
  let escaped = false;
879
- for (let i = 0; i < line.length; i += 1) {
880
- const char = line[i] ?? "";
900
+ for (let i = startIndex; i < text.length; i += 1) {
901
+ const char = text[i] ?? "";
881
902
  if (inString) {
882
903
  if (escaped) {
883
904
  escaped = false;
884
905
  continue;
885
906
  }
886
- if (char === "\\" && inString === "double") {
907
+ if (char === "\\") {
887
908
  escaped = true;
888
909
  continue;
889
910
  }
890
- if (inString === "double" && char === "\"") {
891
- inString = null;
892
- continue;
893
- }
894
- if (inString === "single" && char === "'") {
895
- inString = null;
896
- continue;
897
- }
911
+ if (char === "\"") inString = false;
898
912
  continue;
899
913
  }
900
914
  if (char === "\"") {
901
- inString = "double";
915
+ inString = true;
902
916
  continue;
903
917
  }
904
- if (char === "'") {
905
- inString = "single";
918
+ if (char === "{") {
919
+ depth += 1;
906
920
  continue;
907
921
  }
908
- if (char === "#") return line.slice(0, i).trimEnd();
922
+ if (char === "}") {
923
+ depth -= 1;
924
+ if (depth === 0) return {
925
+ jsonText: text.slice(startIndex, i + 1),
926
+ endIndex: i
927
+ };
928
+ }
909
929
  }
910
- return line;
911
- }
912
- function unescapeBasic(input) {
913
- return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
914
- }
915
- function parseStringLiteral(value) {
916
- if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
917
- if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
918
- if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
919
- if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
920
930
  return null;
921
931
  }
922
-
923
- //#endregion
924
- //#region src/markdown/toml/values.ts
925
- function parsePrimitive(raw) {
926
- const value = raw.trim();
927
- if (!value) return null;
928
- const stringLiteral = parseStringLiteral(value);
929
- if (stringLiteral !== null) return stringLiteral;
930
- if (value === "true") return true;
931
- if (value === "false") return false;
932
- if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
933
- if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
934
- return value;
935
- }
936
- function parseArray(raw) {
937
- const value = raw.trim();
938
- if (!value.startsWith("[") || !value.endsWith("]")) return null;
939
- const inner = value.slice(1, -1).trim();
940
- if (!inner) return [];
941
- const items = [];
942
- let current = "";
943
- let inString = null;
944
- let escaped = false;
945
- for (let i = 0; i < inner.length; i += 1) {
946
- const char = inner[i] ?? "";
947
- if (inString) {
948
- current += char;
949
- if (escaped) {
950
- escaped = false;
951
- continue;
952
- }
953
- if (char === "\\" && inString === "double") {
954
- escaped = true;
955
- continue;
956
- }
957
- if (inString === "double" && char === "\"") inString = null;
958
- else if (inString === "single" && char === "'") inString = null;
959
- continue;
960
- }
961
- if (char === "\"") {
962
- inString = "double";
963
- current += char;
964
- continue;
965
- }
966
- if (char === "'") {
967
- inString = "single";
968
- current += char;
969
- continue;
970
- }
971
- if (char === ",") {
972
- const item = parsePrimitive(current);
973
- if (item === null) return null;
974
- items.push(item);
975
- current = "";
976
- continue;
977
- }
978
- current += char;
932
+ function parseMarkdown(input) {
933
+ const normalized = normalizeNewlines(input);
934
+ const lines = normalized.split("\n");
935
+ if (lines.length === 0) return {
936
+ frontmatter: null,
937
+ content: normalized,
938
+ data: null,
939
+ frontmatterType: null
940
+ };
941
+ lines[0] = stripBom(lines[0] ?? "");
942
+ const normalizedWithoutBom = lines.join("\n");
943
+ const openingType = getFenceType(lines[0] ?? "");
944
+ if (!openingType) {
945
+ const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
946
+ if (normalizedWithoutBom[jsonStart] !== "{") return {
947
+ frontmatter: null,
948
+ content: normalizedWithoutBom,
949
+ data: null,
950
+ frontmatterType: null
951
+ };
952
+ const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
953
+ if (!jsonBlock) return {
954
+ frontmatter: null,
955
+ content: normalizedWithoutBom,
956
+ data: null,
957
+ frontmatterType: null
958
+ };
959
+ const frontmatter = jsonBlock.jsonText;
960
+ let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
961
+ if (content.startsWith("\n")) content = content.slice(1);
962
+ const data = parseFrontmatter(frontmatter, "json");
963
+ if (!data) return {
964
+ frontmatter: null,
965
+ content: normalizedWithoutBom,
966
+ data: null,
967
+ frontmatterType: null
968
+ };
969
+ return {
970
+ frontmatter,
971
+ content,
972
+ data,
973
+ frontmatterType: "json"
974
+ };
979
975
  }
980
- const finalItem = parsePrimitive(current);
981
- if (finalItem === null) return null;
982
- items.push(finalItem);
983
- return items;
976
+ let closingIndex = -1;
977
+ for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
978
+ closingIndex = i;
979
+ break;
980
+ }
981
+ if (closingIndex === -1) return {
982
+ frontmatter: null,
983
+ content: normalizedWithoutBom,
984
+ data: null,
985
+ frontmatterType: null
986
+ };
987
+ const frontmatter = lines.slice(1, closingIndex).join("\n");
988
+ return {
989
+ frontmatter,
990
+ content: lines.slice(closingIndex + 1).join("\n"),
991
+ data: parseFrontmatter(frontmatter, openingType),
992
+ frontmatterType: openingType
993
+ };
984
994
  }
985
- function parseInlineTable(raw) {
986
- const trimmed = raw.trim();
987
- if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
988
- const inner = trimmed.slice(1, -1).trim();
989
- if (!inner) return {};
990
- const pairs = [];
991
- let current = "";
992
- let inString = null;
993
- let escaped = false;
994
- let bracketDepth = 0;
995
- let braceDepth = 0;
996
- for (let i = 0; i < inner.length; i += 1) {
997
- const char = inner[i] ?? "";
998
- if (inString) {
999
- current += char;
1000
- if (escaped) {
1001
- escaped = false;
1002
- continue;
1003
- }
1004
- if (char === "\\" && inString === "double") {
1005
- escaped = true;
1006
- continue;
1007
- }
1008
- if (inString === "double" && char === "\"") inString = null;
1009
- else if (inString === "single" && char === "'") inString = null;
1010
- continue;
1011
- }
1012
- if (char === "\"") {
1013
- inString = "double";
1014
- current += char;
1015
- continue;
1016
- }
1017
- if (char === "'") {
1018
- inString = "single";
1019
- current += char;
1020
- continue;
1021
- }
1022
- if (char === "[") {
1023
- bracketDepth += 1;
1024
- current += char;
1025
- continue;
995
+
996
+ //#endregion
997
+ //#region src/wc/segmenter.ts
998
+ const segmenterCache = /* @__PURE__ */ new Map();
999
+ const graphemeSegmenterCache = /* @__PURE__ */ new Map();
1000
+ function getSegmenter(locale) {
1001
+ const cached = segmenterCache.get(locale);
1002
+ if (cached) return cached;
1003
+ const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
1004
+ segmenterCache.set(locale, segmenter);
1005
+ return segmenter;
1006
+ }
1007
+ function getGraphemeSegmenter(locale) {
1008
+ const cached = graphemeSegmenterCache.get(locale);
1009
+ if (cached) return cached;
1010
+ const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
1011
+ graphemeSegmenterCache.set(locale, segmenter);
1012
+ return segmenter;
1013
+ }
1014
+ function supportsSegmenter() {
1015
+ return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
1016
+ }
1017
+ function countCharsForLocale(text, locale) {
1018
+ if (!supportsSegmenter()) return Array.from(text).length;
1019
+ const segmenter = getGraphemeSegmenter(locale);
1020
+ let count = 0;
1021
+ for (const _segment of segmenter.segment(text)) count++;
1022
+ return count;
1023
+ }
1024
+
1025
+ //#endregion
1026
+ //#region src/wc/non-words.ts
1027
+ const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
1028
+ const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
1029
+ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
1030
+ const symbolRegex = /\p{S}/u;
1031
+ const punctuationRegex = /\p{P}/u;
1032
+ const whitespaceRegex = /\s/u;
1033
+ const newlineChars = new Set([
1034
+ "\n",
1035
+ "\r",
1036
+ "\u2028",
1037
+ "\u2029"
1038
+ ]);
1039
+ function createNonWordCollection() {
1040
+ return {
1041
+ emoji: [],
1042
+ symbols: [],
1043
+ punctuation: [],
1044
+ counts: {
1045
+ emoji: 0,
1046
+ symbols: 0,
1047
+ punctuation: 0
1026
1048
  }
1027
- if (char === "]") {
1028
- if (bracketDepth > 0) bracketDepth -= 1;
1029
- current += char;
1049
+ };
1050
+ }
1051
+ function addNonWord(collection, category, segment) {
1052
+ if (category === "emoji") {
1053
+ collection.emoji.push(segment);
1054
+ collection.counts.emoji += 1;
1055
+ return;
1056
+ }
1057
+ if (category === "symbol") {
1058
+ collection.symbols.push(segment);
1059
+ collection.counts.symbols += 1;
1060
+ return;
1061
+ }
1062
+ collection.punctuation.push(segment);
1063
+ collection.counts.punctuation += 1;
1064
+ }
1065
+ function addWhitespace(collection, segment) {
1066
+ let whitespace = collection.whitespace;
1067
+ let count = 0;
1068
+ for (const char of segment) {
1069
+ if (char === " ") {
1070
+ whitespace = whitespace ?? createWhitespaceCounts();
1071
+ whitespace.spaces += 1;
1072
+ count += 1;
1030
1073
  continue;
1031
1074
  }
1032
- if (char === "{") {
1033
- braceDepth += 1;
1034
- current += char;
1075
+ if (char === " ") {
1076
+ whitespace = whitespace ?? createWhitespaceCounts();
1077
+ whitespace.tabs += 1;
1078
+ count += 1;
1035
1079
  continue;
1036
1080
  }
1037
- if (char === "}") {
1038
- if (braceDepth > 0) braceDepth -= 1;
1039
- current += char;
1081
+ if (newlineChars.has(char)) {
1082
+ whitespace = whitespace ?? createWhitespaceCounts();
1083
+ whitespace.newlines += 1;
1084
+ count += 1;
1040
1085
  continue;
1041
1086
  }
1042
- if (char === "," && bracketDepth === 0 && braceDepth === 0) {
1043
- pairs.push(current);
1044
- current = "";
1045
- continue;
1087
+ if (whitespaceRegex.test(char)) {
1088
+ whitespace = whitespace ?? createWhitespaceCounts();
1089
+ whitespace.other += 1;
1090
+ count += 1;
1046
1091
  }
1047
- current += char;
1048
1092
  }
1049
- if (current.trim()) pairs.push(current);
1050
- const output = {};
1051
- for (const pair of pairs) {
1052
- const separatorIndex = pair.indexOf("=");
1053
- if (separatorIndex === -1) return null;
1054
- const key = normalizeKeyPath(pair.slice(0, separatorIndex));
1055
- if (!key) return null;
1056
- const valueRaw = pair.slice(separatorIndex + 1).trim();
1057
- if (!valueRaw) return null;
1058
- if (valueRaw.startsWith("{")) return null;
1059
- const normalized = normalizeValue(valueRaw);
1060
- if (normalized === null) return null;
1061
- if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
1062
- output[key] = normalized;
1093
+ if (count > 0) {
1094
+ collection.whitespace = whitespace ?? createWhitespaceCounts();
1095
+ collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
1063
1096
  }
1064
- return output;
1097
+ return count;
1065
1098
  }
1066
- function normalizeValue(value) {
1067
- if (!value) return null;
1068
- const trimmed = value.trim();
1069
- if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
1070
- const array = parseArray(trimmed);
1071
- if (array) return array;
1072
- if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
1073
- return parsePrimitive(trimmed);
1099
+ function classifyNonWordSegment(segment) {
1100
+ const hasEmojiVariationSelector = segment.includes("️");
1101
+ if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
1102
+ if (symbolRegex.test(segment)) return "symbol";
1103
+ if (punctuationRegex.test(segment)) return "punctuation";
1104
+ return null;
1074
1105
  }
1075
- function toPlainText(value) {
1076
- if (value == null) return "";
1077
- if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
1078
- return String(value);
1106
+ function mergeNonWordCollections(target, source) {
1107
+ if (source.counts.emoji > 0) {
1108
+ appendAll(target.emoji, source.emoji);
1109
+ target.counts.emoji += source.counts.emoji;
1110
+ }
1111
+ if (source.counts.symbols > 0) {
1112
+ appendAll(target.symbols, source.symbols);
1113
+ target.counts.symbols += source.counts.symbols;
1114
+ }
1115
+ if (source.counts.punctuation > 0) {
1116
+ appendAll(target.punctuation, source.punctuation);
1117
+ target.counts.punctuation += source.counts.punctuation;
1118
+ }
1119
+ if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
1120
+ const whitespace = target.whitespace ?? createWhitespaceCounts();
1121
+ whitespace.spaces += source.whitespace.spaces;
1122
+ whitespace.tabs += source.whitespace.tabs;
1123
+ whitespace.newlines += source.whitespace.newlines;
1124
+ whitespace.other += source.whitespace.other;
1125
+ target.whitespace = whitespace;
1126
+ target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
1127
+ }
1128
+ return target;
1129
+ }
1130
+ function createWhitespaceCounts() {
1131
+ return {
1132
+ spaces: 0,
1133
+ tabs: 0,
1134
+ newlines: 0,
1135
+ other: 0
1136
+ };
1079
1137
  }
1080
1138
 
1081
1139
  //#endregion
1082
- //#region src/markdown/toml/parse-frontmatter.ts
1083
- function parseTomlFrontmatter(frontmatter) {
1084
- const result = {};
1085
- const lines = frontmatter.split("\n");
1086
- let tablePrefix = "";
1087
- let tableTarget = null;
1088
- let tablePrefixInList = false;
1089
- for (let index = 0; index < lines.length; index += 1) {
1090
- const rawLine = lines[index] ?? "";
1091
- const trimmedLine = rawLine.trim();
1092
- if (!trimmedLine || trimmedLine.startsWith("#")) continue;
1093
- if (trimmedLine.startsWith("[[")) {
1094
- const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
1095
- if (!match) return null;
1096
- const normalizedTable = normalizeKeyPath(match[1] ?? "");
1097
- if (!normalizedTable) return null;
1098
- const list = ensureArrayContainer(result, normalizedTable);
1099
- const newEntry = {};
1100
- list.push(newEntry);
1101
- tableTarget = newEntry;
1102
- tablePrefix = normalizedTable;
1103
- tablePrefixInList = true;
1104
- continue;
1105
- }
1106
- const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
1107
- if (tableMatch) {
1108
- const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
1109
- if (!normalizedTable) return null;
1110
- tablePrefix = normalizedTable;
1111
- tablePrefixInList = false;
1112
- tableTarget = null;
1140
+ //#region src/wc/analyze.ts
1141
+ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
1142
+ const segmenter = getSegmenter(chunk.locale);
1143
+ const segments = [];
1144
+ const nonWords = collectNonWords ? createNonWordCollection() : null;
1145
+ for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
1146
+ else if (collectNonWords && nonWords) {
1147
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
1148
+ const category = classifyNonWordSegment(part.segment);
1149
+ if (category) addNonWord(nonWords, category, part.segment);
1150
+ }
1151
+ return {
1152
+ locale: chunk.locale,
1153
+ text: chunk.text,
1154
+ segments,
1155
+ words: segments.length,
1156
+ nonWords: nonWords ?? void 0
1157
+ };
1158
+ }
1159
+ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
1160
+ const segmenter = getSegmenter(chunk.locale);
1161
+ const nonWords = collectNonWords ? createNonWordCollection() : null;
1162
+ let chars = 0;
1163
+ let wordChars = 0;
1164
+ let nonWordChars = 0;
1165
+ for (const part of segmenter.segment(chunk.text)) {
1166
+ if (part.isWordLike) {
1167
+ const count = countCharsForLocale(part.segment, chunk.locale);
1168
+ chars += count;
1169
+ wordChars += count;
1113
1170
  continue;
1114
1171
  }
1115
- const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
1116
- const separatorIndex = lineForParsing.indexOf("=");
1117
- if (separatorIndex === -1) return null;
1118
- const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
1119
- let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
1120
- if (!key) return null;
1121
- const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
1122
- if (tripleDelimiter) {
1123
- const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
1124
- if (closingIndex !== -1) {
1125
- const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
1126
- valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
1127
- } else {
1128
- const delimiter = tripleDelimiter;
1129
- let combined = valueRaw;
1130
- let closed = false;
1131
- while (index + 1 < lines.length) {
1132
- index += 1;
1133
- const nextLine = lines[index] ?? "";
1134
- combined += `\n${nextLine}`;
1135
- if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
1136
- closed = true;
1137
- break;
1138
- }
1139
- }
1140
- if (!closed) return null;
1141
- valueRaw = combined;
1172
+ if (collectNonWords && nonWords) {
1173
+ let whitespaceCount = 0;
1174
+ if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
1175
+ const category = classifyNonWordSegment(part.segment);
1176
+ if (category) addNonWord(nonWords, category, part.segment);
1177
+ if (category || whitespaceCount > 0) {
1178
+ const count = countCharsForLocale(part.segment, chunk.locale);
1179
+ chars += count;
1180
+ nonWordChars += count;
1142
1181
  }
1143
1182
  }
1144
- const normalized = normalizeValue(valueRaw);
1145
- if (normalized === null) return null;
1146
- const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
1147
- if (typeof normalized === "object" && !Array.isArray(normalized)) {
1148
- for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
1149
- const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
1150
- if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
1151
- else result[entryKey] = toPlainText(inlineValue);
1183
+ }
1184
+ return {
1185
+ locale: chunk.locale,
1186
+ text: chunk.text,
1187
+ chars,
1188
+ wordChars,
1189
+ nonWordChars,
1190
+ nonWords: nonWords ?? void 0
1191
+ };
1192
+ }
1193
+ function aggregateCharsByLocale(chunks) {
1194
+ const order = [];
1195
+ const map = /* @__PURE__ */ new Map();
1196
+ for (const chunk of chunks) {
1197
+ const existing = map.get(chunk.locale);
1198
+ if (existing) {
1199
+ existing.chars += chunk.chars;
1200
+ existing.wordChars += chunk.wordChars;
1201
+ existing.nonWordChars += chunk.nonWordChars;
1202
+ if (chunk.nonWords) {
1203
+ if (!existing.nonWords) existing.nonWords = createNonWordCollection();
1204
+ mergeNonWordCollections(existing.nonWords, chunk.nonWords);
1152
1205
  }
1153
1206
  continue;
1154
1207
  }
1155
- if (tablePrefixInList && tableTarget) {
1156
- tableTarget[key] = toPlainText(normalized);
1208
+ order.push(chunk.locale);
1209
+ map.set(chunk.locale, {
1210
+ locale: chunk.locale,
1211
+ chars: chunk.chars,
1212
+ wordChars: chunk.wordChars,
1213
+ nonWordChars: chunk.nonWordChars,
1214
+ nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
1215
+ });
1216
+ }
1217
+ return order.map((locale) => map.get(locale));
1218
+ }
1219
+ function aggregateByLocale(chunks) {
1220
+ const order = [];
1221
+ const map = /* @__PURE__ */ new Map();
1222
+ for (const chunk of chunks) {
1223
+ const existing = map.get(chunk.locale);
1224
+ if (existing) {
1225
+ existing.words += chunk.words;
1226
+ appendAll(existing.segments, chunk.segments);
1157
1227
  continue;
1158
1228
  }
1159
- result[fullKey] = toPlainText(normalized);
1229
+ order.push(chunk.locale);
1230
+ map.set(chunk.locale, {
1231
+ locale: chunk.locale,
1232
+ words: chunk.words,
1233
+ segments: [...chunk.segments]
1234
+ });
1160
1235
  }
1161
- flattenArrayTables(result);
1162
- return result;
1236
+ return order.map((locale) => map.get(locale));
1163
1237
  }
1164
1238
 
1165
1239
  //#endregion
1166
- //#region src/markdown/parse-markdown.ts
1167
- const FENCE_TO_TYPE = {
1168
- "---": "yaml",
1169
- "+++": "toml",
1170
- ";;;": "json"
1240
+ //#region src/wc/mode.ts
1241
+ const MODE_ALIASES = {
1242
+ chunk: "chunk",
1243
+ chunks: "chunk",
1244
+ segments: "segments",
1245
+ segment: "segments",
1246
+ seg: "segments",
1247
+ collector: "collector",
1248
+ collect: "collector",
1249
+ colle: "collector",
1250
+ char: "char",
1251
+ chars: "char",
1252
+ character: "char",
1253
+ characters: "char",
1254
+ "char-collector": "char-collector"
1171
1255
  };
1172
- function normalizeNewlines(input) {
1173
- return input.replace(/\r\n/g, "\n");
1256
+ const CHAR_MODE_ALIASES = new Set([
1257
+ "char",
1258
+ "chars",
1259
+ "character",
1260
+ "characters"
1261
+ ]);
1262
+ const COLLECTOR_MODE_ALIASES = new Set([
1263
+ "collector",
1264
+ "collect",
1265
+ "colle",
1266
+ "col"
1267
+ ]);
1268
+ function collapseSeparators(value) {
1269
+ return value.replace(/[-_\s]+/g, "");
1174
1270
  }
1175
- function stripBom(line) {
1176
- return line.startsWith("") ? line.slice(1) : line;
1271
+ function isComposedCharCollectorFromTokens(value) {
1272
+ const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
1273
+ if (tokens.length < 2) return false;
1274
+ let hasCharAlias = false;
1275
+ let hasCollectorAlias = false;
1276
+ for (const token of tokens) {
1277
+ if (CHAR_MODE_ALIASES.has(token)) {
1278
+ hasCharAlias = true;
1279
+ continue;
1280
+ }
1281
+ if (COLLECTOR_MODE_ALIASES.has(token)) {
1282
+ hasCollectorAlias = true;
1283
+ continue;
1284
+ }
1285
+ return false;
1286
+ }
1287
+ return hasCharAlias && hasCollectorAlias;
1177
1288
  }
1178
- function getFenceType(line) {
1179
- const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
1180
- if (!match) return null;
1181
- return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
1289
+ function isComposedCharCollectorCompact(value) {
1290
+ for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
1291
+ return false;
1182
1292
  }
1183
- function parseFrontmatter(frontmatter, type) {
1184
- if (!type) return null;
1185
- if (type === "json") try {
1186
- return JSON.parse(frontmatter);
1187
- } catch {
1188
- return null;
1189
- }
1190
- if (type === "yaml") {
1191
- const doc = parseDocument(frontmatter, { prettyErrors: false });
1192
- if (doc.errors.length > 0) return null;
1193
- const data = doc.toJSON();
1194
- if (!data || typeof data !== "object" || Array.isArray(data)) return null;
1195
- return data;
1196
- }
1197
- if (type === "toml") return parseTomlFrontmatter(frontmatter);
1198
- return null;
1293
+ function normalizeMode(input) {
1294
+ if (!input) return null;
1295
+ const normalized = input.trim().toLowerCase();
1296
+ const direct = MODE_ALIASES[normalized];
1297
+ if (direct) return direct;
1298
+ if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
1299
+ const compact = collapseSeparators(normalized);
1300
+ if (isComposedCharCollectorCompact(compact)) return "char-collector";
1301
+ return MODE_ALIASES[compact] ?? null;
1199
1302
  }
1200
- function extractJsonBlock(text, startIndex) {
1201
- let depth = 0;
1202
- let inString = false;
1203
- let escaped = false;
1204
- for (let i = startIndex; i < text.length; i += 1) {
1205
- const char = text[i] ?? "";
1206
- if (inString) {
1207
- if (escaped) {
1208
- escaped = false;
1209
- continue;
1210
- }
1211
- if (char === "\\") {
1212
- escaped = true;
1213
- continue;
1214
- }
1215
- if (char === "\"") inString = false;
1216
- continue;
1217
- }
1218
- if (char === "\"") {
1219
- inString = true;
1220
- continue;
1221
- }
1222
- if (char === "{") {
1223
- depth += 1;
1224
- continue;
1225
- }
1226
- if (char === "}") {
1227
- depth -= 1;
1228
- if (depth === 0) return {
1229
- jsonText: text.slice(startIndex, i + 1),
1230
- endIndex: i
1231
- };
1232
- }
1233
- }
1234
- return null;
1303
+ function resolveMode(input, fallback = "chunk") {
1304
+ return normalizeMode(input) ?? fallback;
1235
1305
  }
1236
- function parseMarkdown(input) {
1237
- const normalized = normalizeNewlines(input);
1238
- const lines = normalized.split("\n");
1239
- if (lines.length === 0) return {
1240
- frontmatter: null,
1241
- content: normalized,
1242
- data: null,
1243
- frontmatterType: null
1244
- };
1245
- lines[0] = stripBom(lines[0] ?? "");
1246
- const normalizedWithoutBom = lines.join("\n");
1247
- const openingType = getFenceType(lines[0] ?? "");
1248
- if (!openingType) {
1249
- const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
1250
- if (normalizedWithoutBom[jsonStart] !== "{") return {
1251
- frontmatter: null,
1252
- content: normalizedWithoutBom,
1253
- data: null,
1254
- frontmatterType: null
1255
- };
1256
- const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
1257
- if (!jsonBlock) return {
1258
- frontmatter: null,
1259
- content: normalizedWithoutBom,
1260
- data: null,
1261
- frontmatterType: null
1262
- };
1263
- const frontmatter = jsonBlock.jsonText;
1264
- let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
1265
- if (content.startsWith("\n")) content = content.slice(1);
1266
- const data = parseFrontmatter(frontmatter, "json");
1267
- if (!data) return {
1268
- frontmatter: null,
1269
- content: normalizedWithoutBom,
1270
- data: null,
1271
- frontmatterType: null
1272
- };
1273
- return {
1274
- frontmatter,
1275
- content,
1276
- data,
1277
- frontmatterType: "json"
1278
- };
1279
- }
1280
- let closingIndex = -1;
1281
- for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
1282
- closingIndex = i;
1283
- break;
1306
+
1307
+ //#endregion
1308
+ //#region src/wc/latin-hints.ts
1309
+ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
1310
+ {
1311
+ tag: "de",
1312
+ pattern: "[äöüÄÖÜß]"
1313
+ },
1314
+ {
1315
+ tag: "es",
1316
+ pattern: "[ñÑ¿¡]"
1317
+ },
1318
+ {
1319
+ tag: "pt",
1320
+ pattern: "[ãõÃÕ]"
1321
+ },
1322
+ {
1323
+ tag: "fr",
1324
+ pattern: "[œŒæÆ]"
1325
+ },
1326
+ {
1327
+ tag: "pl",
1328
+ pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
1329
+ },
1330
+ {
1331
+ tag: "tr",
1332
+ pattern: "[ıİğĞşŞ]"
1333
+ },
1334
+ {
1335
+ tag: "ro",
1336
+ pattern: "[ăĂâÂîÎșȘțȚ]"
1337
+ },
1338
+ {
1339
+ tag: "hu",
1340
+ pattern: "[őŐűŰ]"
1341
+ },
1342
+ {
1343
+ tag: "is",
1344
+ pattern: "[ðÐþÞ]"
1284
1345
  }
1285
- if (closingIndex === -1) return {
1286
- frontmatter: null,
1287
- content: normalizedWithoutBom,
1288
- data: null,
1289
- frontmatterType: null
1290
- };
1291
- const frontmatter = lines.slice(1, closingIndex).join("\n");
1292
- return {
1293
- frontmatter,
1294
- content: lines.slice(closingIndex + 1).join("\n"),
1295
- data: parseFrontmatter(frontmatter, openingType),
1296
- frontmatterType: openingType
1297
- };
1298
- }
1346
+ ];
1347
+ const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
1299
1348
 
1300
1349
  //#endregion
1301
- //#region src/wc/segmenter.ts
1302
- const segmenterCache = /* @__PURE__ */ new Map();
1303
- const graphemeSegmenterCache = /* @__PURE__ */ new Map();
1304
- function getSegmenter(locale) {
1305
- const cached = segmenterCache.get(locale);
1306
- if (cached) return cached;
1307
- const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
1308
- segmenterCache.set(locale, segmenter);
1309
- return segmenter;
1350
+ //#region src/wc/locale-detect.ts
1351
+ const DEFAULT_LOCALE = "und-Latn";
1352
+ const DEFAULT_HAN_TAG = "und-Hani";
1353
+ const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
1354
+ const regex = {
1355
+ hiragana: /\p{Script=Hiragana}/u,
1356
+ katakana: /\p{Script=Katakana}/u,
1357
+ hangul: /\p{Script=Hangul}/u,
1358
+ han: /\p{Script=Han}/u,
1359
+ latin: /\p{Script=Latin}/u,
1360
+ arabic: /\p{Script=Arabic}/u,
1361
+ cyrillic: /\p{Script=Cyrillic}/u,
1362
+ devanagari: /\p{Script=Devanagari}/u,
1363
+ thai: /\p{Script=Thai}/u
1364
+ };
1365
+ const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
1366
+ function isLatinLocale(locale, context) {
1367
+ if (context) return context.latinLocales.has(locale);
1368
+ return defaultLatinLocales.has(locale);
1310
1369
  }
1311
- function getGraphemeSegmenter(locale) {
1312
- const cached = graphemeSegmenterCache.get(locale);
1313
- if (cached) return cached;
1314
- const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
1315
- graphemeSegmenterCache.set(locale, segmenter);
1316
- return segmenter;
1370
+ function resolveLatinHint(options) {
1371
+ const latinTagHint = options.latinTagHint?.trim();
1372
+ if (latinTagHint) return latinTagHint;
1373
+ const latinLanguageHint = options.latinLanguageHint?.trim();
1374
+ if (latinLanguageHint) return latinLanguageHint;
1375
+ const latinLocaleHint = options.latinLocaleHint?.trim();
1376
+ if (latinLocaleHint) return latinLocaleHint;
1317
1377
  }
1318
- function supportsSegmenter() {
1319
- return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
1378
+ function resolveHanHint(options) {
1379
+ const hanTagHint = options.hanTagHint?.trim();
1380
+ if (hanTagHint) return hanTagHint;
1381
+ const hanLanguageHint = options.hanLanguageHint?.trim();
1382
+ if (hanLanguageHint) return hanLanguageHint;
1320
1383
  }
1321
- function countCharsForLocale(text, locale) {
1322
- if (!supportsSegmenter()) return Array.from(text).length;
1323
- const segmenter = getGraphemeSegmenter(locale);
1324
- let count = 0;
1325
- for (const _segment of segmenter.segment(text)) count++;
1326
- return count;
1384
+ function compileLatinHintPattern(pattern, label) {
1385
+ const source = typeof pattern === "string" ? pattern : pattern.source;
1386
+ const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
1387
+ const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
1388
+ if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
1389
+ if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
1390
+ try {
1391
+ return new RegExp(source, flags);
1392
+ } catch (error) {
1393
+ const message = error instanceof Error ? error.message : String(error);
1394
+ throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
1395
+ }
1327
1396
  }
1328
-
1329
- //#endregion
1330
- //#region src/wc/non-words.ts
1331
- const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
1332
- const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
1333
- const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
1334
- const symbolRegex = /\p{S}/u;
1335
- const punctuationRegex = /\p{P}/u;
1336
- const whitespaceRegex = /\s/u;
1337
- const newlineChars = new Set([
1338
- "\n",
1339
- "\r",
1340
- "\u2028",
1341
- "\u2029"
1342
- ]);
1343
- function createNonWordCollection() {
1397
+ function normalizeLatinHintPriority(priority, label) {
1398
+ if (priority === void 0) return 0;
1399
+ if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
1400
+ return priority;
1401
+ }
1402
+ function compileLatinHintRule(rule, order, label) {
1403
+ const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
1404
+ if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
1344
1405
  return {
1345
- emoji: [],
1346
- symbols: [],
1347
- punctuation: [],
1348
- counts: {
1349
- emoji: 0,
1350
- symbols: 0,
1351
- punctuation: 0
1352
- }
1406
+ tag,
1407
+ pattern: compileLatinHintPattern(rule.pattern, label),
1408
+ priority: normalizeLatinHintPriority(rule.priority, label),
1409
+ order
1353
1410
  };
1354
1411
  }
1355
- function addNonWord(collection, category, segment) {
1356
- if (category === "emoji") {
1357
- collection.emoji.push(segment);
1358
- collection.counts.emoji += 1;
1359
- return;
1360
- }
1361
- if (category === "symbol") {
1362
- collection.symbols.push(segment);
1363
- collection.counts.symbols += 1;
1364
- return;
1365
- }
1366
- collection.punctuation.push(segment);
1367
- collection.counts.punctuation += 1;
1368
- }
1369
- function addWhitespace(collection, segment) {
1370
- let whitespace = collection.whitespace;
1371
- let count = 0;
1372
- for (const char of segment) {
1373
- if (char === " ") {
1374
- whitespace = whitespace ?? createWhitespaceCounts();
1375
- whitespace.spaces += 1;
1376
- count += 1;
1377
- continue;
1378
- }
1379
- if (char === " ") {
1380
- whitespace = whitespace ?? createWhitespaceCounts();
1381
- whitespace.tabs += 1;
1382
- count += 1;
1383
- continue;
1384
- }
1385
- if (newlineChars.has(char)) {
1386
- whitespace = whitespace ?? createWhitespaceCounts();
1387
- whitespace.newlines += 1;
1388
- count += 1;
1389
- continue;
1390
- }
1391
- if (whitespaceRegex.test(char)) {
1392
- whitespace = whitespace ?? createWhitespaceCounts();
1393
- whitespace.other += 1;
1394
- count += 1;
1395
- }
1412
+ function resolveLatinHintRules$1(options) {
1413
+ const useDefaultLatinHints = options.useDefaultLatinHints !== false;
1414
+ const customRules = options.latinHintRules ?? [];
1415
+ const combinedRules = [];
1416
+ for (let index = 0; index < customRules.length; index += 1) {
1417
+ const rule = customRules[index];
1418
+ if (!rule) continue;
1419
+ combinedRules.push({
1420
+ rule,
1421
+ label: `Invalid custom Latin hint rule at index ${index}`
1422
+ });
1396
1423
  }
1397
- if (count > 0) {
1398
- collection.whitespace = whitespace ?? createWhitespaceCounts();
1399
- collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
1424
+ if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
1425
+ const rule = DEFAULT_LATIN_HINT_RULES[index];
1426
+ if (!rule) continue;
1427
+ combinedRules.push({
1428
+ rule,
1429
+ label: `Invalid default Latin hint rule at index ${index}`
1430
+ });
1400
1431
  }
1401
- return count;
1432
+ const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
1433
+ resolvedRules.sort((left, right) => {
1434
+ if (left.priority !== right.priority) return right.priority - left.priority;
1435
+ return left.order - right.order;
1436
+ });
1437
+ return resolvedRules;
1402
1438
  }
1403
- function classifyNonWordSegment(segment) {
1404
- const hasEmojiVariationSelector = segment.includes("️");
1405
- if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
1406
- if (symbolRegex.test(segment)) return "symbol";
1407
- if (punctuationRegex.test(segment)) return "punctuation";
1408
- return null;
1439
+ function resolveLocaleDetectContext(options = {}) {
1440
+ const latinHint = resolveLatinHint(options);
1441
+ const latinHintRules = resolveLatinHintRules$1(options);
1442
+ const latinLocales = new Set([DEFAULT_LOCALE]);
1443
+ for (const rule of latinHintRules) latinLocales.add(rule.tag);
1444
+ if (latinHint) latinLocales.add(latinHint);
1445
+ return {
1446
+ latinHint,
1447
+ hanHint: resolveHanHint(options),
1448
+ latinHintRules,
1449
+ latinLocales
1450
+ };
1409
1451
  }
1410
- function mergeNonWordCollections(target, source) {
1411
- if (source.counts.emoji > 0) {
1412
- appendAll(target.emoji, source.emoji);
1413
- target.counts.emoji += source.counts.emoji;
1414
- }
1415
- if (source.counts.symbols > 0) {
1416
- appendAll(target.symbols, source.symbols);
1417
- target.counts.symbols += source.counts.symbols;
1452
+ function detectLatinLocale(char, context) {
1453
+ for (const hint of context.latinHintRules) {
1454
+ hint.pattern.lastIndex = 0;
1455
+ if (hint.pattern.test(char)) return hint.tag;
1418
1456
  }
1419
- if (source.counts.punctuation > 0) {
1420
- appendAll(target.punctuation, source.punctuation);
1421
- target.counts.punctuation += source.counts.punctuation;
1457
+ return DEFAULT_LOCALE;
1458
+ }
1459
+ function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
1460
+ if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
1461
+ if (regex.hangul.test(char)) return "ko";
1462
+ if (regex.arabic.test(char)) return "ar";
1463
+ if (regex.cyrillic.test(char)) return "ru";
1464
+ if (regex.devanagari.test(char)) return "hi";
1465
+ if (regex.thai.test(char)) return "th";
1466
+ if (regex.han.test(char)) {
1467
+ if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
1468
+ return context.hanHint ?? DEFAULT_HAN_TAG;
1422
1469
  }
1423
- if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
1424
- const whitespace = target.whitespace ?? createWhitespaceCounts();
1425
- whitespace.spaces += source.whitespace.spaces;
1426
- whitespace.tabs += source.whitespace.tabs;
1427
- whitespace.newlines += source.whitespace.newlines;
1428
- whitespace.other += source.whitespace.other;
1429
- target.whitespace = whitespace;
1430
- target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
1470
+ if (regex.latin.test(char)) {
1471
+ const hintedLocale = detectLatinLocale(char, context);
1472
+ if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
1473
+ if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
1474
+ if (context.latinHint) return context.latinHint;
1475
+ return DEFAULT_LOCALE;
1431
1476
  }
1432
- return target;
1433
- }
1434
- function createWhitespaceCounts() {
1435
- return {
1436
- spaces: 0,
1437
- tabs: 0,
1438
- newlines: 0,
1439
- other: 0
1440
- };
1477
+ return null;
1441
1478
  }
1442
1479
 
1443
1480
  //#endregion
1444
- //#region src/wc/analyze.ts
1445
- function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
1446
- const segmenter = getSegmenter(chunk.locale);
1447
- const segments = [];
1448
- const nonWords = collectNonWords ? createNonWordCollection() : null;
1449
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
1450
- else if (collectNonWords && nonWords) {
1451
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
1452
- const category = classifyNonWordSegment(part.segment);
1453
- if (category) addNonWord(nonWords, category, part.segment);
1454
- }
1455
- return {
1456
- locale: chunk.locale,
1457
- text: chunk.text,
1458
- segments,
1459
- words: segments.length,
1460
- nonWords: nonWords ?? void 0
1481
+ //#region src/wc/segment.ts
1482
+ const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
1483
+ const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
1484
+ function segmentTextByLocale(text, options = {}) {
1485
+ const context = resolveLocaleDetectContext(options);
1486
+ const chunks = [];
1487
+ let currentLocale = DEFAULT_LOCALE;
1488
+ let buffer = "";
1489
+ let bufferHasScript = false;
1490
+ let sawCarryBoundary = false;
1491
+ const updateCarryBoundaryState = (detected, char) => {
1492
+ if (detected !== null) {
1493
+ sawCarryBoundary = false;
1494
+ return;
1495
+ }
1496
+ if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
1461
1497
  };
1462
- }
1463
- function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
1464
- const segmenter = getSegmenter(chunk.locale);
1465
- const nonWords = collectNonWords ? createNonWordCollection() : null;
1466
- let chars = 0;
1467
- let wordChars = 0;
1468
- let nonWordChars = 0;
1469
- for (const part of segmenter.segment(chunk.text)) {
1470
- if (part.isWordLike) {
1471
- const count = countCharsForLocale(part.segment, chunk.locale);
1472
- chars += count;
1473
- wordChars += count;
1498
+ for (const char of text) {
1499
+ const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
1500
+ const targetLocale = detected ?? currentLocale;
1501
+ if (buffer === "") {
1502
+ currentLocale = targetLocale;
1503
+ buffer = char;
1504
+ bufferHasScript = detected !== null;
1505
+ updateCarryBoundaryState(detected, char);
1474
1506
  continue;
1475
1507
  }
1476
- if (collectNonWords && nonWords) {
1477
- let whitespaceCount = 0;
1478
- if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
1479
- const category = classifyNonWordSegment(part.segment);
1480
- if (category) addNonWord(nonWords, category, part.segment);
1481
- if (category || whitespaceCount > 0) {
1482
- const count = countCharsForLocale(part.segment, chunk.locale);
1483
- chars += count;
1484
- nonWordChars += count;
1508
+ if (detected !== null && !bufferHasScript) {
1509
+ currentLocale = targetLocale;
1510
+ buffer += char;
1511
+ bufferHasScript = true;
1512
+ updateCarryBoundaryState(detected, char);
1513
+ continue;
1514
+ }
1515
+ if (targetLocale !== currentLocale && detected !== null) {
1516
+ if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
1517
+ const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
1518
+ if (promotionBreakIndex === -1) {
1519
+ currentLocale = targetLocale;
1520
+ buffer += char;
1521
+ bufferHasScript = true;
1522
+ updateCarryBoundaryState(detected, char);
1523
+ continue;
1524
+ }
1525
+ const prefix = buffer.slice(0, promotionBreakIndex + 1);
1526
+ const suffix = buffer.slice(promotionBreakIndex + 1);
1527
+ if (prefix.length > 0) chunks.push({
1528
+ locale: currentLocale,
1529
+ text: prefix
1530
+ });
1531
+ currentLocale = targetLocale;
1532
+ buffer = `${suffix}${char}`;
1533
+ bufferHasScript = true;
1534
+ updateCarryBoundaryState(detected, char);
1535
+ continue;
1485
1536
  }
1537
+ chunks.push({
1538
+ locale: currentLocale,
1539
+ text: buffer
1540
+ });
1541
+ currentLocale = targetLocale;
1542
+ buffer = char;
1543
+ bufferHasScript = true;
1544
+ updateCarryBoundaryState(detected, char);
1545
+ continue;
1486
1546
  }
1547
+ buffer += char;
1548
+ if (detected !== null) bufferHasScript = true;
1549
+ updateCarryBoundaryState(detected, char);
1487
1550
  }
1488
- return {
1489
- locale: chunk.locale,
1490
- text: chunk.text,
1491
- chars,
1492
- wordChars,
1493
- nonWordChars,
1494
- nonWords: nonWords ?? void 0
1495
- };
1551
+ if (buffer.length > 0) chunks.push({
1552
+ locale: currentLocale,
1553
+ text: buffer
1554
+ });
1555
+ return mergeAdjacentChunks(chunks);
1496
1556
  }
1497
- function aggregateCharsByLocale(chunks) {
1498
- const order = [];
1499
- const map = /* @__PURE__ */ new Map();
1500
- for (const chunk of chunks) {
1501
- const existing = map.get(chunk.locale);
1502
- if (existing) {
1503
- existing.chars += chunk.chars;
1504
- existing.wordChars += chunk.wordChars;
1505
- existing.nonWordChars += chunk.nonWordChars;
1506
- if (chunk.nonWords) {
1507
- if (!existing.nonWords) existing.nonWords = createNonWordCollection();
1508
- mergeNonWordCollections(existing.nonWords, chunk.nonWords);
1509
- }
1510
- continue;
1511
- }
1512
- order.push(chunk.locale);
1513
- map.set(chunk.locale, {
1514
- locale: chunk.locale,
1515
- chars: chunk.chars,
1516
- wordChars: chunk.wordChars,
1517
- nonWordChars: chunk.nonWordChars,
1518
- nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
1519
- });
1557
+ function findLastLatinPromotionBreakIndex(buffer) {
1558
+ for (let index = buffer.length - 1; index >= 0; index -= 1) {
1559
+ const char = buffer[index];
1560
+ if (!char) continue;
1561
+ if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
1520
1562
  }
1521
- return order.map((locale) => map.get(locale));
1563
+ return -1;
1522
1564
  }
1523
- function aggregateByLocale(chunks) {
1524
- const order = [];
1525
- const map = /* @__PURE__ */ new Map();
1526
- for (const chunk of chunks) {
1527
- const existing = map.get(chunk.locale);
1528
- if (existing) {
1529
- existing.words += chunk.words;
1530
- appendAll(existing.segments, chunk.segments);
1531
- continue;
1565
+ function mergeAdjacentChunks(chunks) {
1566
+ if (chunks.length === 0) return chunks;
1567
+ const merged = [];
1568
+ let last = chunks[0];
1569
+ for (let i = 1; i < chunks.length; i++) {
1570
+ const chunk = chunks[i];
1571
+ if (chunk.locale === last.locale) last = {
1572
+ locale: last.locale,
1573
+ text: last.text + chunk.text
1574
+ };
1575
+ else {
1576
+ merged.push(last);
1577
+ last = chunk;
1532
1578
  }
1533
- order.push(chunk.locale);
1534
- map.set(chunk.locale, {
1535
- locale: chunk.locale,
1536
- words: chunk.words,
1537
- segments: [...chunk.segments]
1538
- });
1539
1579
  }
1540
- return order.map((locale) => map.get(locale));
1580
+ merged.push(last);
1581
+ return merged;
1541
1582
  }
1542
1583
 
1543
1584
  //#endregion
1544
- //#region src/wc/mode.ts
1545
- const MODE_ALIASES = {
1546
- chunk: "chunk",
1547
- chunks: "chunk",
1548
- segments: "segments",
1549
- segment: "segments",
1550
- seg: "segments",
1551
- collector: "collector",
1552
- collect: "collector",
1553
- colle: "collector",
1554
- char: "char",
1555
- chars: "char",
1556
- character: "char",
1557
- characters: "char",
1558
- "char-collector": "char-collector"
1559
- };
1560
- const CHAR_MODE_ALIASES = new Set([
1561
- "char",
1562
- "chars",
1563
- "character",
1564
- "characters"
1565
- ]);
1566
- const COLLECTOR_MODE_ALIASES = new Set([
1567
- "collector",
1568
- "collect",
1569
- "colle",
1570
- "col"
1571
- ]);
1572
- function collapseSeparators(value) {
1573
- return value.replace(/[-_\s]+/g, "");
1574
- }
1575
- function isComposedCharCollectorFromTokens(value) {
1576
- const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
1577
- if (tokens.length < 2) return false;
1578
- let hasCharAlias = false;
1579
- let hasCollectorAlias = false;
1580
- for (const token of tokens) {
1581
- if (CHAR_MODE_ALIASES.has(token)) {
1582
- hasCharAlias = true;
1583
- continue;
1585
+ //#region src/wc/wc.ts
1586
+ function wordCounter(text, options = {}) {
1587
+ const mode = resolveMode(options.mode, "chunk");
1588
+ const collectNonWords = Boolean(options.nonWords);
1589
+ const includeWhitespace = Boolean(options.includeWhitespace);
1590
+ const chunks = segmentTextByLocale(text, {
1591
+ latinLanguageHint: options.latinLanguageHint,
1592
+ latinTagHint: options.latinTagHint,
1593
+ latinLocaleHint: options.latinLocaleHint,
1594
+ latinHintRules: options.latinHintRules,
1595
+ useDefaultLatinHints: options.useDefaultLatinHints,
1596
+ hanLanguageHint: options.hanLanguageHint,
1597
+ hanTagHint: options.hanTagHint
1598
+ });
1599
+ if (mode === "char" || mode === "char-collector") {
1600
+ const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1601
+ const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
1602
+ const counts = collectNonWords ? {
1603
+ words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
1604
+ nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
1605
+ total
1606
+ } : void 0;
1607
+ if (mode === "char") return {
1608
+ total,
1609
+ counts,
1610
+ breakdown: {
1611
+ mode,
1612
+ items: analyzed.map((chunk) => ({
1613
+ locale: chunk.locale,
1614
+ text: chunk.text,
1615
+ chars: chunk.chars,
1616
+ nonWords: chunk.nonWords
1617
+ }))
1618
+ }
1619
+ };
1620
+ return {
1621
+ total,
1622
+ counts,
1623
+ breakdown: {
1624
+ mode,
1625
+ items: aggregateCharsByLocale(analyzed).map((chunk) => ({
1626
+ locale: chunk.locale,
1627
+ chars: chunk.chars,
1628
+ nonWords: chunk.nonWords
1629
+ }))
1630
+ }
1631
+ };
1632
+ }
1633
+ const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
1634
+ const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
1635
+ const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
1636
+ if (!chunk.nonWords) return sum;
1637
+ return sum + getNonWordTotal(chunk.nonWords);
1638
+ }, 0) : 0;
1639
+ const total = analyzed.reduce((sum, chunk) => {
1640
+ let chunkTotal = chunk.words;
1641
+ if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
1642
+ return sum + chunkTotal;
1643
+ }, 0);
1644
+ const counts = collectNonWords ? {
1645
+ words: wordsTotal,
1646
+ nonWords: nonWordsTotal,
1647
+ total
1648
+ } : void 0;
1649
+ if (mode === "segments") return {
1650
+ total,
1651
+ counts,
1652
+ breakdown: {
1653
+ mode,
1654
+ items: analyzed.map((chunk) => ({
1655
+ locale: chunk.locale,
1656
+ text: chunk.text,
1657
+ words: chunk.words,
1658
+ segments: chunk.segments,
1659
+ nonWords: chunk.nonWords
1660
+ }))
1584
1661
  }
1585
- if (COLLECTOR_MODE_ALIASES.has(token)) {
1586
- hasCollectorAlias = true;
1587
- continue;
1662
+ };
1663
+ if (mode === "collector") return {
1664
+ total,
1665
+ counts,
1666
+ breakdown: {
1667
+ mode,
1668
+ items: aggregateByLocale(analyzed),
1669
+ nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
1588
1670
  }
1589
- return false;
1590
- }
1591
- return hasCharAlias && hasCollectorAlias;
1592
- }
1593
- function isComposedCharCollectorCompact(value) {
1594
- for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
1595
- return false;
1671
+ };
1672
+ return {
1673
+ total,
1674
+ counts,
1675
+ breakdown: {
1676
+ mode,
1677
+ items: analyzed.map((chunk) => ({
1678
+ locale: chunk.locale,
1679
+ text: chunk.text,
1680
+ words: chunk.words,
1681
+ nonWords: chunk.nonWords
1682
+ }))
1683
+ }
1684
+ };
1596
1685
  }
1597
- function normalizeMode(input) {
1598
- if (!input) return null;
1599
- const normalized = input.trim().toLowerCase();
1600
- const direct = MODE_ALIASES[normalized];
1601
- if (direct) return direct;
1602
- if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
1603
- const compact = collapseSeparators(normalized);
1604
- if (isComposedCharCollectorCompact(compact)) return "char-collector";
1605
- return MODE_ALIASES[compact] ?? null;
1686
+ function getNonWordTotal(nonWords) {
1687
+ return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
1606
1688
  }
1607
- function resolveMode(input, fallback = "chunk") {
1608
- return normalizeMode(input) ?? fallback;
1689
+ function collectNonWordsAggregate(analyzed, enabled) {
1690
+ if (!enabled) return;
1691
+ const collection = createNonWordCollection();
1692
+ for (const chunk of analyzed) {
1693
+ if (!chunk.nonWords) continue;
1694
+ mergeNonWordCollections(collection, chunk.nonWords);
1695
+ }
1696
+ return collection;
1609
1697
  }
1610
1698
 
1611
1699
  //#endregion
1612
- //#region src/wc/latin-hints.ts
1613
- const DEFAULT_LATIN_HINT_RULES_SOURCE = [
1614
- {
1615
- tag: "de",
1616
- pattern: "[äöüÄÖÜß]"
1617
- },
1618
- {
1619
- tag: "es",
1620
- pattern: "[ñÑ¿¡]"
1621
- },
1622
- {
1623
- tag: "pt",
1624
- pattern: "[ãõÃÕ]"
1625
- },
1626
- {
1627
- tag: "fr",
1628
- pattern: "[œŒæÆ]"
1629
- },
1630
- {
1631
- tag: "pl",
1632
- pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
1633
- },
1634
- {
1635
- tag: "tr",
1636
- pattern: "[ıİğĞşŞ]"
1637
- },
1638
- {
1639
- tag: "ro",
1640
- pattern: "[ăĂâÂîÎșȘțȚ]"
1641
- },
1642
- {
1643
- tag: "hu",
1644
- pattern: "[őŐűŰ]"
1645
- },
1646
- {
1647
- tag: "is",
1648
- pattern: "[ðÐþÞ]"
1649
- }
1650
- ];
1651
- const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
1700
+ //#region src/wc/index.ts
1701
+ var wc_default = wordCounter;
1652
1702
 
1653
1703
  //#endregion
1654
- //#region src/wc/locale-detect.ts
1655
- const DEFAULT_LOCALE = "und-Latn";
1656
- const DEFAULT_HAN_TAG = "und-Hani";
1657
- const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
1658
- const regex = {
1659
- hiragana: /\p{Script=Hiragana}/u,
1660
- katakana: /\p{Script=Katakana}/u,
1661
- hangul: /\p{Script=Hangul}/u,
1662
- han: /\p{Script=Han}/u,
1663
- latin: /\p{Script=Latin}/u,
1664
- arabic: /\p{Script=Arabic}/u,
1665
- cyrillic: /\p{Script=Cyrillic}/u,
1666
- devanagari: /\p{Script=Devanagari}/u,
1667
- thai: /\p{Script=Thai}/u
1668
- };
1669
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
1670
- function isLatinLocale(locale, context) {
1671
- if (context) return context.latinLocales.has(locale);
1672
- return defaultLatinLocales.has(locale);
1673
- }
1674
- function resolveLatinHint(options) {
1675
- const latinTagHint = options.latinTagHint?.trim();
1676
- if (latinTagHint) return latinTagHint;
1677
- const latinLanguageHint = options.latinLanguageHint?.trim();
1678
- if (latinLanguageHint) return latinLanguageHint;
1679
- const latinLocaleHint = options.latinLocaleHint?.trim();
1680
- if (latinLocaleHint) return latinLocaleHint;
1681
- }
1682
- function resolveHanHint(options) {
1683
- const hanTagHint = options.hanTagHint?.trim();
1684
- if (hanTagHint) return hanTagHint;
1685
- const hanLanguageHint = options.hanLanguageHint?.trim();
1686
- if (hanLanguageHint) return hanLanguageHint;
1687
- }
1688
- function compileLatinHintPattern(pattern, label) {
1689
- const source = typeof pattern === "string" ? pattern : pattern.source;
1690
- const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
1691
- const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
1692
- if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
1693
- if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
1704
+ //#region src/markdown/section-count.ts
1705
+ function normalizeText(value) {
1706
+ if (value == null) return "";
1707
+ if (typeof value === "string") return value;
1708
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
1694
1709
  try {
1695
- return new RegExp(source, flags);
1696
- } catch (error) {
1697
- const message = error instanceof Error ? error.message : String(error);
1698
- throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
1710
+ return JSON.stringify(value);
1711
+ } catch {
1712
+ return String(value);
1699
1713
  }
1700
1714
  }
1701
- function normalizeLatinHintPriority(priority, label) {
1702
- if (priority === void 0) return 0;
1703
- if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
1704
- return priority;
1705
- }
1706
- function compileLatinHintRule(rule, order, label) {
1707
- const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
1708
- if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
1709
- return {
1710
- tag,
1711
- pattern: compileLatinHintPattern(rule.pattern, label),
1712
- priority: normalizeLatinHintPriority(rule.priority, label),
1713
- order
1714
- };
1715
- }
1716
- function resolveLatinHintRules$1(options) {
1717
- const useDefaultLatinHints = options.useDefaultLatinHints !== false;
1718
- const customRules = options.latinHintRules ?? [];
1719
- const combinedRules = [];
1720
- for (let index = 0; index < customRules.length; index += 1) {
1721
- const rule = customRules[index];
1722
- if (!rule) continue;
1723
- combinedRules.push({
1724
- rule,
1725
- label: `Invalid custom Latin hint rule at index ${index}`
1726
- });
1727
- }
1728
- if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
1729
- const rule = DEFAULT_LATIN_HINT_RULES[index];
1730
- if (!rule) continue;
1731
- combinedRules.push({
1732
- rule,
1733
- label: `Invalid default Latin hint rule at index ${index}`
1734
- });
1735
- }
1736
- const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
1737
- resolvedRules.sort((left, right) => {
1738
- if (left.priority !== right.priority) return right.priority - left.priority;
1739
- return left.order - right.order;
1715
+ function buildPerKeyItems(data, mode, options) {
1716
+ if (!data || typeof data !== "object" || Array.isArray(data)) return [];
1717
+ return Object.entries(data).map(([key, value]) => {
1718
+ const valueText = normalizeText(value);
1719
+ return {
1720
+ name: key,
1721
+ source: "frontmatter",
1722
+ result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
1723
+ };
1740
1724
  });
1741
- return resolvedRules;
1742
1725
  }
1743
- function resolveLocaleDetectContext(options = {}) {
1744
- const latinHint = resolveLatinHint(options);
1745
- const latinHintRules = resolveLatinHintRules$1(options);
1746
- const latinLocales = new Set([DEFAULT_LOCALE]);
1747
- for (const rule of latinHintRules) latinLocales.add(rule.tag);
1748
- if (latinHint) latinLocales.add(latinHint);
1749
- return {
1750
- latinHint,
1751
- hanHint: resolveHanHint(options),
1752
- latinHintRules,
1753
- latinLocales
1754
- };
1726
+ function buildSingleItem(name, text, mode, options, source) {
1727
+ return [{
1728
+ name,
1729
+ source,
1730
+ result: wc_default(text, options)
1731
+ }];
1755
1732
  }
1756
- function detectLatinLocale(char, context) {
1757
- for (const hint of context.latinHintRules) {
1758
- hint.pattern.lastIndex = 0;
1759
- if (hint.pattern.test(char)) return hint.tag;
1760
- }
1761
- return DEFAULT_LOCALE;
1733
+ function sumTotals(items) {
1734
+ return items.reduce((sum, item) => sum + item.result.total, 0);
1762
1735
  }
1763
- function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
1764
- if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
1765
- if (regex.hangul.test(char)) return "ko";
1766
- if (regex.arabic.test(char)) return "ar";
1767
- if (regex.cyrillic.test(char)) return "ru";
1768
- if (regex.devanagari.test(char)) return "hi";
1769
- if (regex.thai.test(char)) return "th";
1770
- if (regex.han.test(char)) {
1771
- if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
1772
- return context.hanHint ?? DEFAULT_HAN_TAG;
1773
- }
1774
- if (regex.latin.test(char)) {
1775
- const hintedLocale = detectLatinLocale(char, context);
1776
- if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
1777
- if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
1778
- if (context.latinHint) return context.latinHint;
1779
- return DEFAULT_LOCALE;
1736
+ function countSections(input, section, options = {}) {
1737
+ const mode = options.mode ?? "chunk";
1738
+ if (section === "all") {
1739
+ const result = wc_default(input, options);
1740
+ return {
1741
+ section,
1742
+ total: result.total,
1743
+ frontmatterType: null,
1744
+ items: [{
1745
+ name: "all",
1746
+ source: "content",
1747
+ result
1748
+ }]
1749
+ };
1780
1750
  }
1781
- return null;
1751
+ const parsed = parseMarkdown(input);
1752
+ const frontmatterText = parsed.frontmatter ?? "";
1753
+ const contentText = parsed.content ?? "";
1754
+ let items = [];
1755
+ if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
1756
+ else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
1757
+ else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
1758
+ else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
1759
+ else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
1760
+ return {
1761
+ section,
1762
+ total: sumTotals(items),
1763
+ frontmatterType: parsed.frontmatterType,
1764
+ items
1765
+ };
1782
1766
  }
1783
1767
 
1784
1768
  //#endregion
1785
- //#region src/wc/segment.ts
1786
- const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
1787
- const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
1788
- function segmentTextByLocale(text, options = {}) {
1789
- const context = resolveLocaleDetectContext(options);
1790
- const chunks = [];
1791
- let currentLocale = DEFAULT_LOCALE;
1792
- let buffer = "";
1793
- let bufferHasScript = false;
1794
- let sawCarryBoundary = false;
1795
- const updateCarryBoundaryState = (detected, char) => {
1796
- if (detected !== null) {
1797
- sawCarryBoundary = false;
1798
- return;
1769
+ //#region src/cli/batch/aggregate.ts
1770
+ function mergeWordCounterResult(left, right, preserveCollectorSegments) {
1771
+ if (left.breakdown.mode !== right.breakdown.mode) throw new Error("Cannot merge different breakdown modes.");
1772
+ const total = left.total + right.total;
1773
+ const counts = left.counts || right.counts ? {
1774
+ words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
1775
+ nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
1776
+ total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
1777
+ } : void 0;
1778
+ if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
1779
+ total,
1780
+ counts,
1781
+ breakdown: {
1782
+ mode: "chunk",
1783
+ items: [...left.breakdown.items, ...right.breakdown.items]
1799
1784
  }
1800
- if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
1801
1785
  };
1802
- for (const char of text) {
1803
- const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
1804
- const targetLocale = detected ?? currentLocale;
1805
- if (buffer === "") {
1806
- currentLocale = targetLocale;
1807
- buffer = char;
1808
- bufferHasScript = detected !== null;
1809
- updateCarryBoundaryState(detected, char);
1810
- continue;
1786
+ if (left.breakdown.mode === "segments" && right.breakdown.mode === "segments") return {
1787
+ total,
1788
+ counts,
1789
+ breakdown: {
1790
+ mode: "segments",
1791
+ items: [...left.breakdown.items, ...right.breakdown.items]
1811
1792
  }
1812
- if (detected !== null && !bufferHasScript) {
1813
- currentLocale = targetLocale;
1814
- buffer += char;
1815
- bufferHasScript = true;
1816
- updateCarryBoundaryState(detected, char);
1817
- continue;
1793
+ };
1794
+ if (left.breakdown.mode === "char" && right.breakdown.mode === "char") return {
1795
+ total,
1796
+ counts,
1797
+ breakdown: {
1798
+ mode: "char",
1799
+ items: [...left.breakdown.items, ...right.breakdown.items]
1818
1800
  }
1819
- if (targetLocale !== currentLocale && detected !== null) {
1820
- if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
1821
- const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
1822
- if (promotionBreakIndex === -1) {
1823
- currentLocale = targetLocale;
1824
- buffer += char;
1825
- bufferHasScript = true;
1826
- updateCarryBoundaryState(detected, char);
1801
+ };
1802
+ if (left.breakdown.mode === "char-collector" && right.breakdown.mode === "char-collector") {
1803
+ const localeOrder = [];
1804
+ const mergedByLocale = /* @__PURE__ */ new Map();
1805
+ const addItems = (items) => {
1806
+ for (const item of items) {
1807
+ const existing = mergedByLocale.get(item.locale);
1808
+ if (existing) {
1809
+ existing.chars += item.chars;
1810
+ if (item.nonWords) {
1811
+ if (!existing.nonWords) existing.nonWords = createNonWordCollection();
1812
+ mergeNonWordCollections(existing.nonWords, item.nonWords);
1813
+ }
1827
1814
  continue;
1828
1815
  }
1829
- const prefix = buffer.slice(0, promotionBreakIndex + 1);
1830
- const suffix = buffer.slice(promotionBreakIndex + 1);
1831
- if (prefix.length > 0) chunks.push({
1832
- locale: currentLocale,
1833
- text: prefix
1816
+ localeOrder.push(item.locale);
1817
+ mergedByLocale.set(item.locale, {
1818
+ locale: item.locale,
1819
+ chars: item.chars,
1820
+ nonWords: item.nonWords ? mergeNonWordCollections(createNonWordCollection(), item.nonWords) : void 0
1834
1821
  });
1835
- currentLocale = targetLocale;
1836
- buffer = `${suffix}${char}`;
1837
- bufferHasScript = true;
1838
- updateCarryBoundaryState(detected, char);
1839
- continue;
1840
1822
  }
1841
- chunks.push({
1842
- locale: currentLocale,
1843
- text: buffer
1844
- });
1845
- currentLocale = targetLocale;
1846
- buffer = char;
1847
- bufferHasScript = true;
1848
- updateCarryBoundaryState(detected, char);
1849
- continue;
1850
- }
1851
- buffer += char;
1852
- if (detected !== null) bufferHasScript = true;
1853
- updateCarryBoundaryState(detected, char);
1854
- }
1855
- if (buffer.length > 0) chunks.push({
1856
- locale: currentLocale,
1857
- text: buffer
1858
- });
1859
- return mergeAdjacentChunks(chunks);
1860
- }
1861
- function findLastLatinPromotionBreakIndex(buffer) {
1862
- for (let index = buffer.length - 1; index >= 0; index -= 1) {
1863
- const char = buffer[index];
1864
- if (!char) continue;
1865
- if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
1866
- }
1867
- return -1;
1868
- }
1869
- function mergeAdjacentChunks(chunks) {
1870
- if (chunks.length === 0) return chunks;
1871
- const merged = [];
1872
- let last = chunks[0];
1873
- for (let i = 1; i < chunks.length; i++) {
1874
- const chunk = chunks[i];
1875
- if (chunk.locale === last.locale) last = {
1876
- locale: last.locale,
1877
- text: last.text + chunk.text
1878
1823
  };
1879
- else {
1880
- merged.push(last);
1881
- last = chunk;
1882
- }
1883
- }
1884
- merged.push(last);
1885
- return merged;
1886
- }
1887
-
1888
- //#endregion
1889
- //#region src/wc/wc.ts
1890
- function wordCounter(text, options = {}) {
1891
- const mode = resolveMode(options.mode, "chunk");
1892
- const collectNonWords = Boolean(options.nonWords);
1893
- const includeWhitespace = Boolean(options.includeWhitespace);
1894
- const chunks = segmentTextByLocale(text, {
1895
- latinLanguageHint: options.latinLanguageHint,
1896
- latinTagHint: options.latinTagHint,
1897
- latinLocaleHint: options.latinLocaleHint,
1898
- latinHintRules: options.latinHintRules,
1899
- useDefaultLatinHints: options.useDefaultLatinHints,
1900
- hanLanguageHint: options.hanLanguageHint,
1901
- hanTagHint: options.hanTagHint
1902
- });
1903
- if (mode === "char" || mode === "char-collector") {
1904
- const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
1905
- const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
1906
- const counts = collectNonWords ? {
1907
- words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
1908
- nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
1909
- total
1910
- } : void 0;
1911
- if (mode === "char") return {
1824
+ addItems(left.breakdown.items);
1825
+ addItems(right.breakdown.items);
1826
+ return {
1912
1827
  total,
1913
1828
  counts,
1914
1829
  breakdown: {
1915
- mode,
1916
- items: analyzed.map((chunk) => ({
1917
- locale: chunk.locale,
1918
- text: chunk.text,
1919
- chars: chunk.chars,
1920
- nonWords: chunk.nonWords
1921
- }))
1830
+ mode: "char-collector",
1831
+ items: localeOrder.map((locale) => {
1832
+ const value = mergedByLocale.get(locale);
1833
+ if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
1834
+ return value;
1835
+ })
1836
+ }
1837
+ };
1838
+ }
1839
+ if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
1840
+ const localeOrder = [];
1841
+ const mergedByLocale = /* @__PURE__ */ new Map();
1842
+ const addItems = (items) => {
1843
+ for (const item of items) {
1844
+ const existing = mergedByLocale.get(item.locale);
1845
+ if (existing) {
1846
+ existing.words += item.words;
1847
+ if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
1848
+ continue;
1849
+ }
1850
+ localeOrder.push(item.locale);
1851
+ mergedByLocale.set(item.locale, {
1852
+ locale: item.locale,
1853
+ words: item.words,
1854
+ segments: preserveCollectorSegments ? [...item.segments] : []
1855
+ });
1922
1856
  }
1923
1857
  };
1858
+ addItems(left.breakdown.items);
1859
+ addItems(right.breakdown.items);
1860
+ let mergedNonWords;
1861
+ if (left.breakdown.nonWords || right.breakdown.nonWords) {
1862
+ mergedNonWords = createNonWordCollection();
1863
+ if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
1864
+ if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
1865
+ }
1924
1866
  return {
1925
1867
  total,
1926
1868
  counts,
1927
1869
  breakdown: {
1928
- mode,
1929
- items: aggregateCharsByLocale(analyzed).map((chunk) => ({
1930
- locale: chunk.locale,
1931
- chars: chunk.chars,
1932
- nonWords: chunk.nonWords
1933
- }))
1870
+ mode: "collector",
1871
+ items: localeOrder.map((locale) => {
1872
+ const value = mergedByLocale.get(locale);
1873
+ if (!value) throw new Error(`Missing collector entry for locale: ${locale}`);
1874
+ return value;
1875
+ }),
1876
+ nonWords: mergedNonWords
1934
1877
  }
1935
1878
  };
1936
1879
  }
1937
- const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
1938
- const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
1939
- const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
1940
- if (!chunk.nonWords) return sum;
1941
- return sum + getNonWordTotal(chunk.nonWords);
1942
- }, 0) : 0;
1943
- const total = analyzed.reduce((sum, chunk) => {
1944
- let chunkTotal = chunk.words;
1945
- if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
1946
- return sum + chunkTotal;
1947
- }, 0);
1948
- const counts = collectNonWords ? {
1949
- words: wordsTotal,
1950
- nonWords: nonWordsTotal,
1951
- total
1952
- } : void 0;
1953
- if (mode === "segments") return {
1954
- total,
1955
- counts,
1956
- breakdown: {
1957
- mode,
1958
- items: analyzed.map((chunk) => ({
1959
- locale: chunk.locale,
1960
- text: chunk.text,
1961
- words: chunk.words,
1962
- segments: chunk.segments,
1963
- nonWords: chunk.nonWords
1964
- }))
1965
- }
1966
- };
1967
- if (mode === "collector") return {
1968
- total,
1969
- counts,
1970
- breakdown: {
1971
- mode,
1972
- items: aggregateByLocale(analyzed),
1973
- nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
1974
- }
1975
- };
1976
1880
  return {
1977
1881
  total,
1978
1882
  counts,
1979
- breakdown: {
1980
- mode,
1981
- items: analyzed.map((chunk) => ({
1982
- locale: chunk.locale,
1983
- text: chunk.text,
1984
- words: chunk.words,
1985
- nonWords: chunk.nonWords
1986
- }))
1987
- }
1883
+ breakdown: left.breakdown
1988
1884
  };
1989
1885
  }
1990
- function getNonWordTotal(nonWords) {
1991
- return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
1992
- }
1993
- function collectNonWordsAggregate(analyzed, enabled) {
1994
- if (!enabled) return;
1995
- const collection = createNonWordCollection();
1996
- for (const chunk of analyzed) {
1997
- if (!chunk.nonWords) continue;
1998
- mergeNonWordCollections(collection, chunk.nonWords);
1886
+ function aggregateWordCounterResults(results, preserveCollectorSegments) {
1887
+ if (results.length === 0) return wc_default("", { mode: "chunk" });
1888
+ const first = results[0];
1889
+ if (!first) return wc_default("", { mode: "chunk" });
1890
+ let aggregate = first;
1891
+ for (let index = 1; index < results.length; index += 1) {
1892
+ const current = results[index];
1893
+ if (!current) continue;
1894
+ aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
1999
1895
  }
2000
- return collection;
1896
+ return aggregate;
2001
1897
  }
2002
-
2003
- //#endregion
2004
- //#region src/wc/index.ts
2005
- var wc_default = wordCounter;
2006
-
2007
- //#endregion
2008
- //#region src/markdown/section-count.ts
2009
- function normalizeText(value) {
2010
- if (value == null) return "";
2011
- if (typeof value === "string") return value;
2012
- if (typeof value === "number" || typeof value === "boolean") return String(value);
2013
- try {
2014
- return JSON.stringify(value);
2015
- } catch {
2016
- return String(value);
2017
- }
1898
+ function buildSectionKey(name, source) {
1899
+ return `${source}:${name}`;
2018
1900
  }
2019
- function buildPerKeyItems(data, mode, options) {
2020
- if (!data || typeof data !== "object" || Array.isArray(data)) return [];
2021
- return Object.entries(data).map(([key, value]) => {
2022
- const valueText = normalizeText(value);
2023
- return {
2024
- name: key,
2025
- source: "frontmatter",
2026
- result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
2027
- };
2028
- });
1901
+ function aggregateSectionedResults(results, preserveCollectorSegments) {
1902
+ if (results.length === 0) return {
1903
+ section: "all",
1904
+ total: 0,
1905
+ frontmatterType: null,
1906
+ items: []
1907
+ };
1908
+ const section = results[0]?.section ?? "all";
1909
+ const grouped = /* @__PURE__ */ new Map();
1910
+ let total = 0;
1911
+ let frontmatterType = results[0]?.frontmatterType ?? null;
1912
+ for (const result of results) {
1913
+ total += result.total;
1914
+ if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
1915
+ if (frontmatterType !== result.frontmatterType) frontmatterType = null;
1916
+ for (const item of result.items) {
1917
+ const key = buildSectionKey(item.name, item.source);
1918
+ const existing = grouped.get(key);
1919
+ if (!existing) {
1920
+ grouped.set(key, {
1921
+ name: item.name,
1922
+ source: item.source,
1923
+ items: [item.result]
1924
+ });
1925
+ continue;
1926
+ }
1927
+ existing.items.push(item.result);
1928
+ }
1929
+ }
1930
+ const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
1931
+ const items = [...grouped.values()].sort((left, right) => {
1932
+ const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
1933
+ if (sourceDiff !== 0) return sourceDiff;
1934
+ return left.name.localeCompare(right.name);
1935
+ }).map((entry) => ({
1936
+ name: entry.name,
1937
+ source: entry.source,
1938
+ result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
1939
+ }));
1940
+ return {
1941
+ section,
1942
+ total,
1943
+ frontmatterType,
1944
+ items
1945
+ };
2029
1946
  }
2030
- function buildSingleItem(name, text, mode, options, source) {
2031
- return [{
2032
- name,
2033
- source,
2034
- result: wc_default(text, options)
2035
- }];
1947
+ function stripCollectorSegmentsFromWordCounterResult(result) {
1948
+ if (result.breakdown.mode !== "collector") return;
1949
+ for (const item of result.breakdown.items) item.segments = [];
2036
1950
  }
2037
- function sumTotals(items) {
2038
- return items.reduce((sum, item) => sum + item.result.total, 0);
1951
+ function stripCollectorSegmentsFromSectionedResult(result) {
1952
+ for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
2039
1953
  }
2040
- function countSections(input, section, options = {}) {
2041
- const mode = options.mode ?? "chunk";
2042
- if (section === "all") {
2043
- const result = wc_default(input, options);
2044
- return {
1954
+ function compactCollectorSegmentsInCountResult(result) {
1955
+ if ("section" in result) {
1956
+ stripCollectorSegmentsFromSectionedResult(result);
1957
+ return;
1958
+ }
1959
+ stripCollectorSegmentsFromWordCounterResult(result);
1960
+ }
1961
+ function finalizeBatchSummaryFromFileResults(files, section, wcOptions, options = {}) {
1962
+ const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
1963
+ if (!preserveCollectorSegments) for (const file of files) compactCollectorSegmentsInCountResult(file.result);
1964
+ options.onFinalizeStart?.();
1965
+ if (files.length === 0) return {
1966
+ files,
1967
+ skipped: [],
1968
+ aggregate: section === "all" ? wc_default("", wcOptions) : {
2045
1969
  section,
2046
- total: result.total,
1970
+ total: 0,
2047
1971
  frontmatterType: null,
2048
- items: [{
2049
- name: "all",
2050
- source: "content",
2051
- result
2052
- }]
2053
- };
2054
- }
2055
- const parsed = parseMarkdown(input);
2056
- const frontmatterText = parsed.frontmatter ?? "";
2057
- const contentText = parsed.content ?? "";
2058
- let items = [];
2059
- if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
2060
- else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
2061
- else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
2062
- else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
2063
- else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
1972
+ items: []
1973
+ }
1974
+ };
2064
1975
  return {
2065
- section,
2066
- total: sumTotals(items),
2067
- frontmatterType: parsed.frontmatterType,
2068
- items
1976
+ files,
1977
+ skipped: [],
1978
+ aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
2069
1979
  };
2070
1980
  }
2071
1981
 
2072
1982
  //#endregion
2073
- //#region src/cli/batch/aggregate.ts
2074
- function mergeWordCounterResult(left, right, preserveCollectorSegments) {
2075
- if (left.breakdown.mode !== right.breakdown.mode) throw new Error("Cannot merge different breakdown modes.");
2076
- const total = left.total + right.total;
2077
- const counts = left.counts || right.counts ? {
2078
- words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
2079
- nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
2080
- total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
2081
- } : void 0;
2082
- if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
2083
- total,
2084
- counts,
2085
- breakdown: {
2086
- mode: "chunk",
2087
- items: [...left.breakdown.items, ...right.breakdown.items]
1983
+ //#region src/cli/batch/jobs/queue.ts
1984
+ async function runBoundedQueue(total, requestedJobs, worker) {
1985
+ if (total === 0) return [];
1986
+ const safeRequestedJobs = Number.isFinite(requestedJobs) ? Math.floor(requestedJobs) : 1;
1987
+ const concurrency = Math.max(1, Math.min(total, safeRequestedJobs));
1988
+ const results = new Array(total);
1989
+ let nextIndex = 0;
1990
+ const runWorker = async () => {
1991
+ while (true) {
1992
+ const current = nextIndex;
1993
+ nextIndex += 1;
1994
+ if (current >= total) return;
1995
+ results[current] = await worker(current);
2088
1996
  }
2089
1997
  };
2090
- if (left.breakdown.mode === "segments" && right.breakdown.mode === "segments") return {
2091
- total,
2092
- counts,
2093
- breakdown: {
2094
- mode: "segments",
2095
- items: [...left.breakdown.items, ...right.breakdown.items]
2096
- }
1998
+ await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
1999
+ return results;
2000
+ }
2001
+
2002
+ //#endregion
2003
+ //#region src/cli/path/load.ts
2004
+ function isProbablyBinary(buffer) {
2005
+ if (buffer.length === 0) return false;
2006
+ const sampleSize = Math.min(buffer.length, 1024);
2007
+ let suspicious = 0;
2008
+ for (let index = 0; index < sampleSize; index += 1) {
2009
+ const byte = buffer[index] ?? 0;
2010
+ if (byte === 0) return true;
2011
+ if (byte === 9 || byte === 10 || byte === 13) continue;
2012
+ if (byte >= 32 && byte <= 126) continue;
2013
+ if (byte >= 128) continue;
2014
+ suspicious += 1;
2015
+ }
2016
+ return suspicious / sampleSize > .3;
2017
+ }
2018
+
2019
+ //#endregion
2020
+ //#region src/cli/batch/jobs/read-input.ts
2021
+ async function readBatchInput(path, options) {
2022
+ if (!path) return {
2023
+ type: "skip",
2024
+ path: "",
2025
+ reason: "not readable: missing path"
2097
2026
  };
2098
- if (left.breakdown.mode === "char" && right.breakdown.mode === "char") return {
2099
- total,
2100
- counts,
2101
- breakdown: {
2102
- mode: "char",
2103
- items: [...left.breakdown.items, ...right.breakdown.items]
2104
- }
2027
+ let buffer;
2028
+ try {
2029
+ buffer = await readFile(path);
2030
+ } catch (error) {
2031
+ if (isResourceLimitError(error)) throw createResourceLimitError(path, error, options.requestedJobs, options.limits);
2032
+ return {
2033
+ type: "skip",
2034
+ path,
2035
+ reason: `not readable: ${error instanceof Error ? error.message : String(error)}`
2036
+ };
2037
+ }
2038
+ if (isProbablyBinary(buffer)) return {
2039
+ type: "skip",
2040
+ path,
2041
+ reason: "binary file"
2105
2042
  };
2106
- if (left.breakdown.mode === "char-collector" && right.breakdown.mode === "char-collector") {
2107
- const localeOrder = [];
2108
- const mergedByLocale = /* @__PURE__ */ new Map();
2109
- const addItems = (items) => {
2110
- for (const item of items) {
2111
- const existing = mergedByLocale.get(item.locale);
2112
- if (existing) {
2113
- existing.chars += item.chars;
2114
- if (item.nonWords) {
2115
- if (!existing.nonWords) existing.nonWords = createNonWordCollection();
2116
- mergeNonWordCollections(existing.nonWords, item.nonWords);
2117
- }
2118
- continue;
2043
+ return {
2044
+ type: "file",
2045
+ path,
2046
+ content: buffer.toString("utf8")
2047
+ };
2048
+ }
2049
+
2050
+ //#endregion
2051
+ //#region src/cli/batch/jobs/load-count.ts
2052
+ async function countBatchInputsWithJobs(filePaths, options) {
2053
+ const limits = resolveBatchJobsLimit();
2054
+ const total = filePaths.length;
2055
+ let completed = 0;
2056
+ const entries = await runBoundedQueue(filePaths.length, options.jobs, async (index) => {
2057
+ const loaded = await readBatchInput(filePaths[index], {
2058
+ requestedJobs: options.jobs,
2059
+ limits
2060
+ });
2061
+ if (loaded.type === "skip") {
2062
+ completed += 1;
2063
+ options.onFileProcessed?.({
2064
+ completed,
2065
+ total
2066
+ });
2067
+ return {
2068
+ type: "skip",
2069
+ skip: {
2070
+ path: loaded.path,
2071
+ reason: loaded.reason
2119
2072
  }
2120
- localeOrder.push(item.locale);
2121
- mergedByLocale.set(item.locale, {
2122
- locale: item.locale,
2123
- chars: item.chars,
2124
- nonWords: item.nonWords ? mergeNonWordCollections(createNonWordCollection(), item.nonWords) : void 0
2125
- });
2126
- }
2127
- };
2128
- addItems(left.breakdown.items);
2129
- addItems(right.breakdown.items);
2073
+ };
2074
+ }
2075
+ const result = options.section === "all" ? wc_default(loaded.content, options.wcOptions) : countSections(loaded.content, options.section, options.wcOptions);
2076
+ if (!options.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
2077
+ completed += 1;
2078
+ options.onFileProcessed?.({
2079
+ completed,
2080
+ total
2081
+ });
2130
2082
  return {
2131
- total,
2132
- counts,
2133
- breakdown: {
2134
- mode: "char-collector",
2135
- items: localeOrder.map((locale) => {
2136
- const value = mergedByLocale.get(locale);
2137
- if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
2138
- return value;
2139
- })
2083
+ type: "file",
2084
+ file: {
2085
+ path: loaded.path,
2086
+ result
2140
2087
  }
2141
2088
  };
2089
+ });
2090
+ const files = [];
2091
+ const skipped = [];
2092
+ for (const entry of entries) {
2093
+ if (entry.type === "file") {
2094
+ files.push(entry.file);
2095
+ continue;
2096
+ }
2097
+ skipped.push(entry.skip);
2142
2098
  }
2143
- if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
2144
- const localeOrder = [];
2145
- const mergedByLocale = /* @__PURE__ */ new Map();
2146
- const addItems = (items) => {
2147
- for (const item of items) {
2148
- const existing = mergedByLocale.get(item.locale);
2149
- if (existing) {
2150
- existing.words += item.words;
2151
- if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
2152
- continue;
2153
- }
2154
- localeOrder.push(item.locale);
2155
- mergedByLocale.set(item.locale, {
2156
- locale: item.locale,
2157
- words: item.words,
2158
- segments: preserveCollectorSegments ? [...item.segments] : []
2099
+ return {
2100
+ files,
2101
+ skipped
2102
+ };
2103
+ }
2104
+
2105
+ //#endregion
2106
+ //#region src/cli/batch/jobs/load-count-worker.ts
2107
+ var WorkerRouteUnavailableError = class extends Error {};
2108
+ function isFallbackFriendlyWorkerError(error) {
2109
+ if (typeof error !== "object" || error === null) return false;
2110
+ const code = "code" in error ? String(error.code) : "";
2111
+ if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
2112
+ const message = error instanceof Error ? error.message : String(error);
2113
+ return message.includes("Unknown file extension") || message.includes("Cannot find module");
2114
+ }
2115
+ async function countBatchInputsWithWorkerJobs(filePaths, options) {
2116
+ if (process.env.WORD_COUNTER_DISABLE_WORKER_JOBS === "1" || process.env.WORD_COUNTER_DISABLE_EXPERIMENTAL_WORKERS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
2117
+ let workerPoolModule;
2118
+ try {
2119
+ workerPoolModule = await import("./worker-pool.mjs");
2120
+ } catch (error) {
2121
+ throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
2122
+ }
2123
+ try {
2124
+ return await workerPoolModule.countBatchInputsWithWorkerPool({
2125
+ filePaths,
2126
+ jobs: options.jobs,
2127
+ section: options.section,
2128
+ wcOptions: options.wcOptions,
2129
+ preserveCollectorSegments: options.preserveCollectorSegments,
2130
+ onFileProcessed: options.onFileProcessed
2131
+ });
2132
+ } catch (error) {
2133
+ if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
2134
+ if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
2135
+ code: error.code,
2136
+ message: error.message
2137
+ }, options.jobs, resolveBatchJobsLimit());
2138
+ throw new Error(error.message);
2139
+ }
2140
+ if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
2141
+ throw error;
2142
+ }
2143
+ }
2144
+
2145
+ //#endregion
2146
+ //#region src/cli/batch/jobs/render.ts
2147
+ function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
2148
+ return finalizeBatchSummaryFromFileResults(files, section, wcOptions, {
2149
+ onFinalizeStart: options.onFinalizeStart,
2150
+ preserveCollectorSegments: options.preserveCollectorSegments
2151
+ });
2152
+ }
2153
+
2154
+ //#endregion
2155
+ //#region src/cli/path/resolve.ts
2156
+ async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
2157
+ let entries;
2158
+ try {
2159
+ entries = await readdir(directoryPath, {
2160
+ withFileTypes: true,
2161
+ encoding: "utf8"
2162
+ });
2163
+ } catch (error) {
2164
+ const message = error instanceof Error ? error.message : String(error);
2165
+ skipped.push({
2166
+ path: directoryPath,
2167
+ reason: `directory read failed: ${message}`
2168
+ });
2169
+ debug.emit("path.resolve.expand.read_failed", {
2170
+ directory: directoryPath,
2171
+ reason: `directory read failed: ${message}`
2172
+ });
2173
+ return [];
2174
+ }
2175
+ const sortedEntries = entries.slice().sort((left, right) => left.name.localeCompare(right.name));
2176
+ const files = [];
2177
+ debug.emit("path.resolve.expand.start", {
2178
+ directory: directoryPath,
2179
+ entries: sortedEntries.length,
2180
+ recursive
2181
+ });
2182
+ for (const entry of sortedEntries) {
2183
+ const entryPath = resolve(directoryPath, entry.name);
2184
+ if (entry.isFile()) {
2185
+ if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
2186
+ skipped.push({
2187
+ path: entryPath,
2188
+ reason: "extension excluded"
2159
2189
  });
2190
+ debug.emit("path.resolve.filter.excluded", {
2191
+ path: entryPath,
2192
+ reason: "extension excluded"
2193
+ }, { verbosity: "verbose" });
2194
+ stats.filterExcluded += 1;
2195
+ continue;
2160
2196
  }
2161
- };
2162
- addItems(left.breakdown.items);
2163
- addItems(right.breakdown.items);
2164
- let mergedNonWords;
2165
- if (left.breakdown.nonWords || right.breakdown.nonWords) {
2166
- mergedNonWords = createNonWordCollection();
2167
- if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
2168
- if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
2197
+ const relativePath = toDirectoryRelativePath(rootPath, entryPath);
2198
+ if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
2199
+ if (recordRegexExcluded(entryPath)) {
2200
+ debug.emit("path.resolve.regex.excluded", {
2201
+ path: entryPath,
2202
+ relativePath,
2203
+ pattern: regexFilter.sourcePattern,
2204
+ reason: "regex excluded"
2205
+ }, { verbosity: "verbose" });
2206
+ stats.regexExcluded += 1;
2207
+ }
2208
+ continue;
2209
+ }
2210
+ files.push(entryPath);
2211
+ stats.directoryIncluded += 1;
2212
+ debug.emit("path.resolve.expand.include", {
2213
+ path: entryPath,
2214
+ source: "directory"
2215
+ }, { verbosity: "verbose" });
2216
+ continue;
2217
+ }
2218
+ if (!entry.isDirectory() || !recursive) continue;
2219
+ appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
2220
+ }
2221
+ debug.emit("path.resolve.expand.complete", {
2222
+ directory: directoryPath,
2223
+ files: files.length
2224
+ });
2225
+ return files;
2226
+ }
2227
+ async function resolveBatchFilePaths(pathInputs, options) {
2228
+ const skipped = [];
2229
+ const regexExcludedPaths = /* @__PURE__ */ new Set();
2230
+ const resolvedFiles = /* @__PURE__ */ new Set();
2231
+ const stats = {
2232
+ dedupeAccepted: 0,
2233
+ dedupeDuplicates: 0,
2234
+ filterExcluded: 0,
2235
+ regexExcluded: 0,
2236
+ directoryIncluded: 0
2237
+ };
2238
+ const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
2239
+ let regexFilter;
2240
+ const debug = options.debug ?? {
2241
+ enabled: false,
2242
+ verbosity: "compact",
2243
+ emit() {},
2244
+ close: async () => {}
2245
+ };
2246
+ debug.emit("path.resolve.inputs", {
2247
+ inputs: pathInputs.length,
2248
+ pathMode: options.pathMode,
2249
+ recursive: options.recursive,
2250
+ hasRegex: Boolean(options.directoryRegexPattern)
2251
+ });
2252
+ const addResolvedFile = (filePath, details) => {
2253
+ regexExcludedPaths.delete(filePath);
2254
+ if (resolvedFiles.has(filePath)) {
2255
+ stats.dedupeDuplicates += 1;
2256
+ debug.emit("path.resolve.dedupe.duplicate", {
2257
+ path: filePath,
2258
+ source: details.source,
2259
+ input: details.input
2260
+ }, { verbosity: "verbose" });
2261
+ return;
2262
+ }
2263
+ resolvedFiles.add(filePath);
2264
+ stats.dedupeAccepted += 1;
2265
+ debug.emit("path.resolve.dedupe.accept", {
2266
+ path: filePath,
2267
+ source: details.source,
2268
+ input: details.input
2269
+ }, { verbosity: "verbose" });
2270
+ };
2271
+ const getRegexFilter = () => {
2272
+ if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
2273
+ return regexFilter;
2274
+ };
2275
+ const recordRegexExcluded = (filePath) => {
2276
+ if (resolvedFiles.has(filePath)) return false;
2277
+ regexExcludedPaths.add(filePath);
2278
+ return true;
2279
+ };
2280
+ for (const rawPath of pathInputs) {
2281
+ const targetPath = resolve(rawPath);
2282
+ debug.emit("path.resolve.input", {
2283
+ rawPath,
2284
+ resolvedPath: targetPath
2285
+ });
2286
+ let metadata;
2287
+ try {
2288
+ metadata = await stat(targetPath);
2289
+ } catch (error) {
2290
+ const message = error instanceof Error ? error.message : String(error);
2291
+ skipped.push({
2292
+ path: targetPath,
2293
+ reason: `not readable: ${message}`
2294
+ });
2295
+ debug.emit("path.resolve.skip", {
2296
+ path: targetPath,
2297
+ reason: `not readable: ${message}`
2298
+ });
2299
+ continue;
2300
+ }
2301
+ if (metadata.isDirectory() && options.pathMode === "auto") {
2302
+ const effectiveRegexFilter = getRegexFilter();
2303
+ debug.emit("path.resolve.root.expand", {
2304
+ root: targetPath,
2305
+ recursive: options.recursive,
2306
+ regex: effectiveRegexFilter.sourcePattern ?? null
2307
+ });
2308
+ const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
2309
+ for (const file of files) addResolvedFile(file, {
2310
+ source: "directory",
2311
+ input: targetPath
2312
+ });
2313
+ continue;
2169
2314
  }
2170
- return {
2171
- total,
2172
- counts,
2173
- breakdown: {
2174
- mode: "collector",
2175
- items: localeOrder.map((locale) => {
2176
- const value = mergedByLocale.get(locale);
2177
- if (!value) throw new Error(`Missing collector entry for locale: ${locale}`);
2178
- return value;
2179
- }),
2180
- nonWords: mergedNonWords
2181
- }
2182
- };
2315
+ if (!metadata.isFile()) {
2316
+ skipped.push({
2317
+ path: targetPath,
2318
+ reason: "not a regular file"
2319
+ });
2320
+ debug.emit("path.resolve.skip", {
2321
+ path: targetPath,
2322
+ reason: "not a regular file"
2323
+ });
2324
+ continue;
2325
+ }
2326
+ addResolvedFile(targetPath, {
2327
+ source: "direct",
2328
+ input: targetPath
2329
+ });
2183
2330
  }
2331
+ for (const path of regexExcludedPaths) skipped.push({
2332
+ path,
2333
+ reason: "regex excluded"
2334
+ });
2335
+ const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
2336
+ debug.emit("path.resolve.filter.summary", {
2337
+ excluded: stats.filterExcluded + stats.regexExcluded,
2338
+ extensionExcluded: stats.filterExcluded,
2339
+ regexExcluded: stats.regexExcluded,
2340
+ included: stats.directoryIncluded
2341
+ });
2342
+ debug.emit("path.resolve.dedupe.summary", {
2343
+ accepted: stats.dedupeAccepted,
2344
+ duplicates: stats.dedupeDuplicates
2345
+ });
2346
+ debug.emit("path.resolve.complete", {
2347
+ files: files.length,
2348
+ skipped: skipped.length,
2349
+ ordering: "absolute-path-ascending"
2350
+ });
2184
2351
  return {
2185
- total,
2186
- counts,
2187
- breakdown: left.breakdown
2352
+ files,
2353
+ skipped
2188
2354
  };
2189
2355
  }
2190
- function aggregateWordCounterResults(results, preserveCollectorSegments) {
2191
- if (results.length === 0) return wc_default("", { mode: "chunk" });
2192
- const first = results[0];
2193
- if (!first) return wc_default("", { mode: "chunk" });
2194
- let aggregate = first;
2195
- for (let index = 1; index < results.length; index += 1) {
2196
- const current = results[index];
2197
- if (!current) continue;
2198
- aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
2199
- }
2200
- return aggregate;
2356
+
2357
+ //#endregion
2358
+ //#region src/cli/progress/reporter.ts
2359
+ const PROGRESS_BAR_WIDTH = 20;
2360
+ const FILLED_BAR_CHAR = "█";
2361
+ const EMPTY_BAR_CHAR = "░";
2362
+ function clamp(value, min, max) {
2363
+ return Math.max(min, Math.min(max, value));
2201
2364
  }
2202
- function buildSectionKey(name, source) {
2203
- return `${source}:${name}`;
2365
+ function buildProgressBar(completed, total) {
2366
+ const safeTotal = Math.max(total, 1);
2367
+ const ratio = clamp(completed / safeTotal, 0, 1);
2368
+ const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
2369
+ const empty = PROGRESS_BAR_WIDTH - filled;
2370
+ return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
2204
2371
  }
2205
- function aggregateSectionedResults(results, preserveCollectorSegments) {
2206
- if (results.length === 0) return {
2207
- section: "all",
2208
- total: 0,
2209
- frontmatterType: null,
2210
- items: []
2211
- };
2212
- const section = results[0]?.section ?? "all";
2213
- const grouped = /* @__PURE__ */ new Map();
2214
- let total = 0;
2215
- let frontmatterType = results[0]?.frontmatterType ?? null;
2216
- for (const result of results) {
2217
- total += result.total;
2218
- if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
2219
- if (frontmatterType !== result.frontmatterType) frontmatterType = null;
2220
- for (const item of result.items) {
2221
- const key = buildSectionKey(item.name, item.source);
2222
- const existing = grouped.get(key);
2223
- if (!existing) {
2224
- grouped.set(key, {
2225
- name: item.name,
2226
- source: item.source,
2227
- items: [item.result]
2228
- });
2229
- continue;
2230
- }
2231
- existing.items.push(item.result);
2232
- }
2233
- }
2234
- const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
2235
- const items = [...grouped.values()].sort((left, right) => {
2236
- const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
2237
- if (sourceDiff !== 0) return sourceDiff;
2238
- return left.name.localeCompare(right.name);
2239
- }).map((entry) => ({
2240
- name: entry.name,
2241
- source: entry.source,
2242
- result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
2243
- }));
2244
- return {
2245
- section,
2246
- total,
2247
- frontmatterType,
2248
- items
2249
- };
2372
+ function formatElapsed(startedAtMs) {
2373
+ const elapsedMs = Date.now() - startedAtMs;
2374
+ const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
2375
+ const minutes = Math.floor(totalSeconds / 60);
2376
+ const seconds = totalSeconds % 60;
2377
+ const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
2378
+ return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
2250
2379
  }
2251
- function stripCollectorSegmentsFromWordCounterResult(result) {
2252
- if (result.breakdown.mode !== "collector") return;
2253
- for (const item of result.breakdown.items) item.segments = [];
2380
+ function buildProgressLine(completed, total, startedAtMs) {
2381
+ const safeTotal = Math.max(total, 1);
2382
+ const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
2383
+ return `Counting files [${buildProgressBar(completed, safeTotal)}] ${`${String(percent).padStart(3, " ")}%`} ${String(completed).padStart(String(safeTotal).length, " ")}/${safeTotal} elapsed ${formatElapsed(startedAtMs)}`;
2254
2384
  }
2255
- function stripCollectorSegmentsFromSectionedResult(result) {
2256
- for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
2385
+ function buildFinalizingLine(startedAtMs) {
2386
+ return `Finalizing aggregate... elapsed ${formatElapsed(startedAtMs)}`;
2257
2387
  }
2258
- async function buildBatchSummary(inputs, section, wcOptions, options = {}) {
2259
- const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
2260
- const files = [];
2261
- for (const input of inputs) {
2262
- const result = section === "all" ? wc_default(input.content, wcOptions) : countSections(input.content, section, wcOptions);
2263
- if (!preserveCollectorSegments) if ("section" in result) stripCollectorSegmentsFromSectionedResult(result);
2264
- else stripCollectorSegmentsFromWordCounterResult(result);
2265
- files.push({
2266
- path: input.path,
2267
- result
2268
- });
2269
- options.onFileCounted?.({
2270
- completed: files.length,
2271
- total: inputs.length
2272
- });
2273
- }
2274
- options.onFinalizeStart?.();
2275
- if (files.length === 0) return {
2276
- files,
2277
- skipped: [],
2278
- aggregate: section === "all" ? wc_default("", wcOptions) : {
2279
- section,
2280
- total: 0,
2281
- frontmatterType: null,
2282
- items: []
2388
+ function createBatchProgressReporter(options) {
2389
+ const enabled = options.enabled;
2390
+ const isTTY = Boolean(options.stream.isTTY);
2391
+ const clearOnFinish = options.clearOnFinish ?? true;
2392
+ let active = false;
2393
+ let total = 0;
2394
+ let lastLineLength = 0;
2395
+ let startedAtMs = 0;
2396
+ let lastRenderedPercent = -1;
2397
+ let finalizingStarted = false;
2398
+ const writeTTYLine = (line) => {
2399
+ const trailingPadding = lastLineLength > line.length ? " ".repeat(lastLineLength - line.length) : "";
2400
+ options.stream.write(`\r${line}${trailingPadding}`);
2401
+ lastLineLength = line.length;
2402
+ };
2403
+ const render = (completed) => {
2404
+ const line = buildProgressLine(completed, total, startedAtMs);
2405
+ const safeTotal = Math.max(total, 1);
2406
+ const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
2407
+ if (!isTTY && percent === lastRenderedPercent && completed < safeTotal) return;
2408
+ lastRenderedPercent = percent;
2409
+ if (isTTY) {
2410
+ writeTTYLine(line);
2411
+ return;
2283
2412
  }
2413
+ lastLineLength = line.length;
2414
+ options.stream.write(`${line}\n`);
2415
+ };
2416
+ const clearLine = () => {
2417
+ if (lastLineLength === 0) return;
2418
+ options.stream.write(`\r${" ".repeat(lastLineLength)}\r`);
2419
+ lastLineLength = 0;
2284
2420
  };
2285
2421
  return {
2286
- files,
2287
- skipped: [],
2288
- aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
2422
+ enabled,
2423
+ start(nextTotal, nextStartedAtMs) {
2424
+ if (!enabled || nextTotal <= 1) return;
2425
+ total = nextTotal;
2426
+ active = true;
2427
+ startedAtMs = nextStartedAtMs ?? Date.now();
2428
+ lastRenderedPercent = -1;
2429
+ finalizingStarted = false;
2430
+ render(0);
2431
+ },
2432
+ advance(snapshot) {
2433
+ if (!active) return;
2434
+ render(snapshot.completed);
2435
+ },
2436
+ startFinalizing() {
2437
+ if (!active || finalizingStarted) return;
2438
+ finalizingStarted = true;
2439
+ const line = buildFinalizingLine(startedAtMs);
2440
+ if (isTTY) {
2441
+ if (!clearOnFinish) {
2442
+ options.stream.write(`\n${line}`);
2443
+ lastLineLength = line.length;
2444
+ return;
2445
+ }
2446
+ writeTTYLine(line);
2447
+ return;
2448
+ }
2449
+ lastLineLength = line.length;
2450
+ options.stream.write(`${line}\n`);
2451
+ },
2452
+ finish() {
2453
+ if (!active) return;
2454
+ if (isTTY) if (clearOnFinish) clearLine();
2455
+ else options.stream.write("\n");
2456
+ active = false;
2457
+ }
2289
2458
  };
2290
2459
  }
2291
2460
 
@@ -2316,34 +2485,90 @@ async function runBatchCount(options) {
2316
2485
  stage: "resolve",
2317
2486
  elapsedMs: resolveElapsedMs
2318
2487
  });
2319
- const loadStartedAtMs = Date.now();
2320
- options.debug.emit("batch.load.start", { files: resolved.files.length });
2321
- const loaded = await loadBatchInputs(resolved.files);
2322
- const loadElapsedMs = Date.now() - loadStartedAtMs;
2488
+ options.debug.emit("batch.jobs.strategy", {
2489
+ strategy: options.jobsStrategy,
2490
+ jobs: options.jobs
2491
+ });
2492
+ let summary;
2493
+ let routeSkips = [];
2494
+ options.debug.emit("batch.load.start", {
2495
+ files: resolved.files.length,
2496
+ jobs: options.jobs,
2497
+ strategy: options.jobsStrategy
2498
+ });
2323
2499
  options.debug.emit("batch.load.complete", {
2324
- files: loaded.files.length,
2325
- skipped: loaded.skipped.length,
2326
- elapsedMs: loadElapsedMs
2500
+ files: 0,
2501
+ skipped: 0,
2502
+ elapsedMs: 0,
2503
+ strategy: options.jobsStrategy
2327
2504
  });
2328
2505
  options.debug.emit("batch.stage.timing", {
2329
2506
  stage: "load",
2330
- elapsedMs: loadElapsedMs
2507
+ elapsedMs: 0
2331
2508
  });
2332
- const progressEnabled = options.progressReporter.enabled && loaded.files.length > 1;
2509
+ const progressEnabled = options.progressReporter.enabled && resolved.files.length > 1;
2333
2510
  options.debug.emit("batch.progress.start", {
2334
2511
  enabled: progressEnabled,
2335
- total: loaded.files.length
2512
+ total: resolved.files.length
2336
2513
  });
2337
- if (progressEnabled) options.progressReporter.start(loaded.files.length, batchStartedAtMs);
2338
- let summary;
2514
+ if (progressEnabled) options.progressReporter.start(resolved.files.length, batchStartedAtMs);
2339
2515
  const countStartedAtMs = Date.now();
2340
2516
  let finalizeStartedAtMs = null;
2341
2517
  let emittedCountTiming = false;
2342
2518
  try {
2343
- summary = await buildBatchSummary(loaded.files, options.section, options.wcOptions, {
2344
- onFileCounted: (snapshot) => {
2345
- if (progressEnabled) options.progressReporter.advance(snapshot);
2346
- },
2519
+ let counted;
2520
+ if (options.jobs > 1) try {
2521
+ counted = await countBatchInputsWithWorkerJobs(resolved.files, {
2522
+ jobs: options.jobs,
2523
+ section: options.section,
2524
+ wcOptions: options.wcOptions,
2525
+ preserveCollectorSegments: options.preserveCollectorSegments,
2526
+ onFileProcessed: (snapshot) => {
2527
+ if (progressEnabled) options.progressReporter.advance(snapshot);
2528
+ }
2529
+ });
2530
+ options.debug.emit("batch.jobs.executor", {
2531
+ strategy: options.jobsStrategy,
2532
+ executor: "worker-pool",
2533
+ jobs: options.jobs
2534
+ });
2535
+ } catch (error) {
2536
+ if (!(error instanceof WorkerRouteUnavailableError)) throw error;
2537
+ options.emitWarning?.(`Worker executor unavailable; falling back to async load+count. (${error.message})`);
2538
+ options.debug.emit("batch.jobs.executor", {
2539
+ strategy: options.jobsStrategy,
2540
+ executor: "async-fallback",
2541
+ reason: error.message,
2542
+ jobs: options.jobs
2543
+ });
2544
+ counted = await countBatchInputsWithJobs(resolved.files, {
2545
+ jobs: options.jobs,
2546
+ section: options.section,
2547
+ wcOptions: options.wcOptions,
2548
+ preserveCollectorSegments: options.preserveCollectorSegments,
2549
+ onFileProcessed: (snapshot) => {
2550
+ if (progressEnabled) options.progressReporter.advance(snapshot);
2551
+ }
2552
+ });
2553
+ }
2554
+ else {
2555
+ counted = await countBatchInputsWithJobs(resolved.files, {
2556
+ jobs: options.jobs,
2557
+ section: options.section,
2558
+ wcOptions: options.wcOptions,
2559
+ preserveCollectorSegments: options.preserveCollectorSegments,
2560
+ onFileProcessed: (snapshot) => {
2561
+ if (progressEnabled) options.progressReporter.advance(snapshot);
2562
+ }
2563
+ });
2564
+ options.debug.emit("batch.jobs.executor", {
2565
+ strategy: options.jobsStrategy,
2566
+ executor: "async-main",
2567
+ jobs: options.jobs
2568
+ });
2569
+ }
2570
+ routeSkips = counted.skipped;
2571
+ summary = finalizeBatchJobsSummary(counted.files, options.section, options.wcOptions, {
2347
2572
  onFinalizeStart: () => {
2348
2573
  finalizeStartedAtMs = Date.now();
2349
2574
  if (progressEnabled) options.progressReporter.startFinalizing();
@@ -2360,7 +2585,7 @@ async function runBatchCount(options) {
2360
2585
  if (progressEnabled) options.progressReporter.finish();
2361
2586
  options.debug.emit("batch.progress.complete", {
2362
2587
  enabled: progressEnabled,
2363
- total: loaded.files.length
2588
+ total: resolved.files.length
2364
2589
  });
2365
2590
  }
2366
2591
  if (!emittedCountTiming) {
@@ -2376,7 +2601,7 @@ async function runBatchCount(options) {
2376
2601
  elapsedMs: finalizeElapsedMs
2377
2602
  });
2378
2603
  appendAll(summary.skipped, resolved.skipped);
2379
- appendAll(summary.skipped, loaded.skipped);
2604
+ appendAll(summary.skipped, routeSkips);
2380
2605
  options.debug.emit("batch.aggregate.complete", {
2381
2606
  files: summary.files.length,
2382
2607
  skipped: summary.skipped.length,
@@ -2385,6 +2610,12 @@ async function runBatchCount(options) {
2385
2610
  return summary;
2386
2611
  }
2387
2612
 
2613
+ //#endregion
2614
+ //#region src/cli/batch/jobs/strategy.ts
2615
+ function resolveBatchJobsStrategy(_jobs) {
2616
+ return "load-count";
2617
+ }
2618
+
2388
2619
  //#endregion
2389
2620
  //#region src/utils/show-singular-or-plural-word.ts
2390
2621
  function showSingularOrPluralWord(count, word) {
@@ -2601,6 +2832,10 @@ function countLongOptionOccurrences(argv, optionName) {
2601
2832
  function validateSingleRegexOptionUsage(argv) {
2602
2833
  if (countLongOptionOccurrences(argv, "--regex") > 1) throw new Error("`--regex` can only be provided once.");
2603
2834
  }
2835
+ function validateStandalonePrintJobsLimitUsage(argv) {
2836
+ const tokens = argv.slice(2).filter((token) => token.length > 0);
2837
+ if (tokens.length !== 1 || tokens[0] !== "--print-jobs-limit") throw new Error("`--print-jobs-limit` must be used alone.");
2838
+ }
2604
2839
  function resolveBatchScope(argv) {
2605
2840
  let scope = "merged";
2606
2841
  for (const token of argv) {
@@ -2701,6 +2936,12 @@ function formatInputReadError(error) {
2701
2936
  //#endregion
2702
2937
  //#region src/cli/runtime/batch.ts
2703
2938
  async function executeBatchCount({ argv, options, runtime, resolved, debug, teeEnabled }) {
2939
+ const warningsEnabled = !Boolean(options.quietWarnings);
2940
+ const emitWarning = (message) => {
2941
+ if (!warningsEnabled) return;
2942
+ const warningLine = message.startsWith("Warning:") ? message : `Warning: ${message}`;
2943
+ console.error(import_picocolors.default.yellow(warningLine));
2944
+ };
2704
2945
  const batchOptions = {
2705
2946
  scope: resolveBatchScope(argv),
2706
2947
  pathMode: options.pathMode,
@@ -2709,6 +2950,11 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
2709
2950
  directoryRegexPattern: options.regex
2710
2951
  };
2711
2952
  const extensionFilter = buildDirectoryExtensionFilter(options.includeExt, options.excludeExt);
2953
+ const requestedJobs = options.jobs;
2954
+ const jobsLimit = resolveBatchJobsLimit();
2955
+ const jobs = clampRequestedJobs(requestedJobs, jobsLimit);
2956
+ if (requestedJobs > jobsLimit.suggestedMaxJobs) emitWarning(formatJobsAdvisoryWarning(requestedJobs, jobs, jobsLimit));
2957
+ const jobsStrategy = resolveBatchJobsStrategy(jobs);
2712
2958
  const debugEnabled = Boolean(options.debug);
2713
2959
  const mirrorDebugToTerminal = debugEnabled && (!debug.reportPath || teeEnabled);
2714
2960
  const summary = await runBatchCount({
@@ -2723,16 +2969,21 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
2723
2969
  enabled: options.format === "standard" && options.progress,
2724
2970
  stream: runtime.stderr ?? process.stderr,
2725
2971
  clearOnFinish: !(mirrorDebugToTerminal || options.keepProgress)
2726
- })
2972
+ }),
2973
+ jobs,
2974
+ jobsStrategy,
2975
+ emitWarning
2727
2976
  });
2728
2977
  const showSkipDiagnostics = debugEnabled && !batchOptions.quietSkips;
2978
+ const showSkipItems = showSkipDiagnostics && Boolean(options.verbose);
2729
2979
  debug.emit("batch.skips.policy", {
2730
2980
  enabled: showSkipDiagnostics,
2981
+ items: showSkipItems,
2731
2982
  quietSkips: batchOptions.quietSkips
2732
2983
  });
2733
2984
  if (showSkipDiagnostics) {
2734
2985
  debug.emit("batch.skips.report", { count: summary.skipped.length });
2735
- if (options.verbose) for (const skip of summary.skipped) debug.emit("batch.skips.item", {
2986
+ if (showSkipItems) for (const skip of summary.skipped) debug.emit("batch.skips.item", {
2736
2987
  path: skip.path,
2737
2988
  reason: skip.reason
2738
2989
  }, { verbosity: "verbose" });
@@ -2891,6 +3142,17 @@ async function runCli(argv = process.argv, runtime = {}) {
2891
3142
  program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number");
2892
3143
  configureProgramOptions(program, parseMode);
2893
3144
  program.action(async (textTokens, options) => {
3145
+ if (options.printJobsLimit) {
3146
+ try {
3147
+ validateStandalonePrintJobsLimitUsage(argv);
3148
+ } catch (error) {
3149
+ const message = error instanceof Error ? error.message : String(error);
3150
+ program.error(import_picocolors.default.red(message));
3151
+ return;
3152
+ }
3153
+ console.log(JSON.stringify(resolveBatchJobsLimit()));
3154
+ return;
3155
+ }
2894
3156
  const debugEnabled = Boolean(options.debug);
2895
3157
  const debugReportPath = resolveDebugReportPathOption(options.debugReport);
2896
3158
  const debugReportEnabled = options.debugReport !== void 0 && options.debugReport !== false;