@dev-pi2pie/word-counter 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -4
- package/dist/esm/bin.mjs +1878 -1616
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/worker/count-worker.mjs +1370 -0
- package/dist/esm/worker/count-worker.mjs.map +1 -0
- package/dist/esm/worker-pool.mjs +187 -0
- package/dist/esm/worker-pool.mjs.map +1 -0
- package/package.json +1 -1
package/dist/esm/bin.mjs
CHANGED
|
@@ -4,8 +4,9 @@ import { Command, Option } from "commander";
|
|
|
4
4
|
import { closeSync, createWriteStream, existsSync, mkdirSync, openSync, readFileSync, statSync } from "node:fs";
|
|
5
5
|
import { basename, dirname, extname, join, relative, resolve, sep } from "node:path";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
|
-
import
|
|
7
|
+
import os from "node:os";
|
|
8
8
|
import { parseDocument } from "yaml";
|
|
9
|
+
import { readFile, readdir, stat } from "node:fs/promises";
|
|
9
10
|
|
|
10
11
|
//#region \0rolldown/runtime.js
|
|
11
12
|
var __create = Object.create;
|
|
@@ -345,8 +346,14 @@ function collectPathValue(value, previous = []) {
|
|
|
345
346
|
function collectLatinHintValue(value, previous = []) {
|
|
346
347
|
return [...previous, value];
|
|
347
348
|
}
|
|
349
|
+
function parseJobsOption(value) {
|
|
350
|
+
if (!/^\d+$/.test(value)) throw new Error("`--jobs` must be an integer >= 1.");
|
|
351
|
+
const parsed = Number.parseInt(value, 10);
|
|
352
|
+
if (!Number.isSafeInteger(parsed) || parsed < 1) throw new Error("`--jobs` must be an integer >= 1.");
|
|
353
|
+
return parsed;
|
|
354
|
+
}
|
|
348
355
|
function configureProgramOptions(program, parseMode) {
|
|
349
|
-
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies
|
|
356
|
+
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--jobs <n>", "batch jobs in --path mode (1=async main-thread, >1=worker load+count)", parseJobsOption, 1).option("--print-jobs-limit", "print host jobs-limit JSON and exit (must be used alone)").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-warnings", "suppress non-fatal warning diagnostics").option("--quiet-skips", "suppress debug skip output and per-file json skipped field").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
350
357
|
}
|
|
351
358
|
|
|
352
359
|
//#endregion
|
|
@@ -421,7 +428,7 @@ var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
|
421
428
|
//#endregion
|
|
422
429
|
//#region src/cli/program/version-embedded.ts
|
|
423
430
|
var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
|
|
424
|
-
const EMBEDDED_PACKAGE_VERSION = "0.1.
|
|
431
|
+
const EMBEDDED_PACKAGE_VERSION = "0.1.4";
|
|
425
432
|
|
|
426
433
|
//#endregion
|
|
427
434
|
//#region src/cli/program/version.ts
|
|
@@ -474,6 +481,51 @@ function getFormattedVersionLabel() {
|
|
|
474
481
|
return import_picocolors.default.bgBlack(import_picocolors.default.bold(import_picocolors.default.italic(` word-counter ${import_picocolors.default.cyanBright(`ver.${version}`)} `)));
|
|
475
482
|
}
|
|
476
483
|
|
|
484
|
+
//#endregion
|
|
485
|
+
//#region src/cli/batch/jobs/limits.ts
|
|
486
|
+
const DEFAULT_UV_THREADPOOL_SIZE = 4;
|
|
487
|
+
function parsePositiveInteger(value) {
|
|
488
|
+
if (!value) return;
|
|
489
|
+
const parsed = Number.parseInt(value, 10);
|
|
490
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return;
|
|
491
|
+
return parsed;
|
|
492
|
+
}
|
|
493
|
+
function resolveBatchJobsLimit(env = process.env) {
|
|
494
|
+
const cpuLimit = Math.max(1, os.availableParallelism());
|
|
495
|
+
const uvThreadpool = parsePositiveInteger(env.UV_THREADPOOL_SIZE) ?? DEFAULT_UV_THREADPOOL_SIZE;
|
|
496
|
+
const ioLimit = Math.max(1, uvThreadpool * 2);
|
|
497
|
+
return {
|
|
498
|
+
suggestedMaxJobs: Math.max(1, Math.min(cpuLimit, ioLimit)),
|
|
499
|
+
cpuLimit,
|
|
500
|
+
uvThreadpool,
|
|
501
|
+
ioLimit
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
function clampRequestedJobs(requestedJobs, limits) {
|
|
505
|
+
return Math.max(1, Math.min(requestedJobs, limits.suggestedMaxJobs));
|
|
506
|
+
}
|
|
507
|
+
function formatJobsAdvisoryWarning(requestedJobs, effectiveJobs, limits) {
|
|
508
|
+
return [
|
|
509
|
+
`Warning: requested --jobs=${requestedJobs} exceeds suggested host limit (${limits.suggestedMaxJobs}).`,
|
|
510
|
+
`Running with --jobs=${effectiveJobs} as a safety cap.`,
|
|
511
|
+
`Host limits: cpuLimit=${limits.cpuLimit}, uvThreadpool=${limits.uvThreadpool}, ioLimit=${limits.ioLimit}.`
|
|
512
|
+
].join(" ");
|
|
513
|
+
}
|
|
514
|
+
function isResourceLimitError(error) {
|
|
515
|
+
if (typeof error !== "object" || error === null) return false;
|
|
516
|
+
const code = "code" in error ? error.code : void 0;
|
|
517
|
+
return code === "EMFILE" || code === "ENFILE";
|
|
518
|
+
}
|
|
519
|
+
function createResourceLimitError(path, error, requestedJobs, limits) {
|
|
520
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
521
|
+
const code = typeof error === "object" && error !== null && "code" in error ? String(error.code) : "UNKNOWN";
|
|
522
|
+
return new Error([
|
|
523
|
+
`Resource limit reached while processing: ${path} (${code}: ${message}).`,
|
|
524
|
+
`Requested --jobs=${requestedJobs}; suggested host limit is ${limits.suggestedMaxJobs}.`,
|
|
525
|
+
"Reduce --jobs or raise OS file descriptor limits before retrying."
|
|
526
|
+
].join(" "));
|
|
527
|
+
}
|
|
528
|
+
|
|
477
529
|
//#endregion
|
|
478
530
|
//#region src/utils/append-all.ts
|
|
479
531
|
function appendAll(target, source) {
|
|
@@ -481,1811 +533,1928 @@ function appendAll(target, source) {
|
|
|
481
533
|
}
|
|
482
534
|
|
|
483
535
|
//#endregion
|
|
484
|
-
//#region src/
|
|
485
|
-
function
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
if (
|
|
495
|
-
|
|
536
|
+
//#region src/markdown/toml/arrays.ts
|
|
537
|
+
function ensureArrayContainer(result, key) {
|
|
538
|
+
const existing = result[key];
|
|
539
|
+
if (Array.isArray(existing)) return existing;
|
|
540
|
+
const list = [];
|
|
541
|
+
result[key] = list;
|
|
542
|
+
return list;
|
|
543
|
+
}
|
|
544
|
+
function flattenArrayTables(result) {
|
|
545
|
+
for (const [key, value] of Object.entries(result)) {
|
|
546
|
+
if (!Array.isArray(value)) continue;
|
|
547
|
+
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
496
548
|
}
|
|
497
|
-
return suspicious / sampleSize > .3;
|
|
498
549
|
}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
550
|
+
|
|
551
|
+
//#endregion
|
|
552
|
+
//#region src/markdown/toml/keys.ts
|
|
553
|
+
function stripKeyQuotes(key) {
|
|
554
|
+
const trimmed = key.trim();
|
|
555
|
+
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
|
|
556
|
+
return trimmed;
|
|
557
|
+
}
|
|
558
|
+
function normalizeKeyPath(key) {
|
|
559
|
+
const trimmed = key.trim();
|
|
560
|
+
if (!trimmed) return null;
|
|
561
|
+
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) {
|
|
562
|
+
const unquoted = stripKeyQuotes(trimmed);
|
|
563
|
+
return unquoted ? unquoted : null;
|
|
564
|
+
}
|
|
565
|
+
const segments = trimmed.split(".").map((segment) => segment.trim());
|
|
566
|
+
if (segments.some((segment) => !segment)) return null;
|
|
567
|
+
return segments.join(".");
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
//#endregion
|
|
571
|
+
//#region src/markdown/toml/strings.ts
|
|
572
|
+
function stripInlineComment(line) {
|
|
573
|
+
let inString = null;
|
|
574
|
+
let escaped = false;
|
|
575
|
+
for (let i = 0; i < line.length; i += 1) {
|
|
576
|
+
const char = line[i] ?? "";
|
|
577
|
+
if (inString) {
|
|
578
|
+
if (escaped) {
|
|
579
|
+
escaped = false;
|
|
580
|
+
continue;
|
|
581
|
+
}
|
|
582
|
+
if (char === "\\" && inString === "double") {
|
|
583
|
+
escaped = true;
|
|
584
|
+
continue;
|
|
585
|
+
}
|
|
586
|
+
if (inString === "double" && char === "\"") {
|
|
587
|
+
inString = null;
|
|
588
|
+
continue;
|
|
589
|
+
}
|
|
590
|
+
if (inString === "single" && char === "'") {
|
|
591
|
+
inString = null;
|
|
592
|
+
continue;
|
|
593
|
+
}
|
|
512
594
|
continue;
|
|
513
595
|
}
|
|
514
|
-
if (
|
|
515
|
-
|
|
516
|
-
path: filePath,
|
|
517
|
-
reason: "binary file"
|
|
518
|
-
});
|
|
596
|
+
if (char === "\"") {
|
|
597
|
+
inString = "double";
|
|
519
598
|
continue;
|
|
520
599
|
}
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
}
|
|
600
|
+
if (char === "'") {
|
|
601
|
+
inString = "single";
|
|
602
|
+
continue;
|
|
603
|
+
}
|
|
604
|
+
if (char === "#") return line.slice(0, i).trimEnd();
|
|
525
605
|
}
|
|
526
|
-
return
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
606
|
+
return line;
|
|
607
|
+
}
|
|
608
|
+
function unescapeBasic(input) {
|
|
609
|
+
return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
|
|
610
|
+
}
|
|
611
|
+
function parseStringLiteral(value) {
|
|
612
|
+
if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
|
|
613
|
+
if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
|
|
614
|
+
if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
|
|
615
|
+
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
616
|
+
return null;
|
|
530
617
|
}
|
|
531
618
|
|
|
532
619
|
//#endregion
|
|
533
|
-
//#region src/
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
for (const entry of sortedEntries) {
|
|
561
|
-
const entryPath = resolve(directoryPath, entry.name);
|
|
562
|
-
if (entry.isFile()) {
|
|
563
|
-
if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
|
|
564
|
-
skipped.push({
|
|
565
|
-
path: entryPath,
|
|
566
|
-
reason: "extension excluded"
|
|
567
|
-
});
|
|
568
|
-
debug.emit("path.resolve.filter.excluded", {
|
|
569
|
-
path: entryPath,
|
|
570
|
-
reason: "extension excluded"
|
|
571
|
-
}, { verbosity: "verbose" });
|
|
572
|
-
stats.filterExcluded += 1;
|
|
620
|
+
//#region src/markdown/toml/values.ts
|
|
621
|
+
function parsePrimitive(raw) {
|
|
622
|
+
const value = raw.trim();
|
|
623
|
+
if (!value) return null;
|
|
624
|
+
const stringLiteral = parseStringLiteral(value);
|
|
625
|
+
if (stringLiteral !== null) return stringLiteral;
|
|
626
|
+
if (value === "true") return true;
|
|
627
|
+
if (value === "false") return false;
|
|
628
|
+
if (/^[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?$/.test(value)) return Number(value);
|
|
629
|
+
if (/^\d{4}-\d{2}-\d{2}/.test(value)) return value;
|
|
630
|
+
return value;
|
|
631
|
+
}
|
|
632
|
+
function parseArray(raw) {
|
|
633
|
+
const value = raw.trim();
|
|
634
|
+
if (!value.startsWith("[") || !value.endsWith("]")) return null;
|
|
635
|
+
const inner = value.slice(1, -1).trim();
|
|
636
|
+
if (!inner) return [];
|
|
637
|
+
const items = [];
|
|
638
|
+
let current = "";
|
|
639
|
+
let inString = null;
|
|
640
|
+
let escaped = false;
|
|
641
|
+
for (let i = 0; i < inner.length; i += 1) {
|
|
642
|
+
const char = inner[i] ?? "";
|
|
643
|
+
if (inString) {
|
|
644
|
+
current += char;
|
|
645
|
+
if (escaped) {
|
|
646
|
+
escaped = false;
|
|
573
647
|
continue;
|
|
574
648
|
}
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
if (recordRegexExcluded(entryPath)) {
|
|
578
|
-
debug.emit("path.resolve.regex.excluded", {
|
|
579
|
-
path: entryPath,
|
|
580
|
-
relativePath,
|
|
581
|
-
pattern: regexFilter.sourcePattern,
|
|
582
|
-
reason: "regex excluded"
|
|
583
|
-
}, { verbosity: "verbose" });
|
|
584
|
-
stats.regexExcluded += 1;
|
|
585
|
-
}
|
|
649
|
+
if (char === "\\" && inString === "double") {
|
|
650
|
+
escaped = true;
|
|
586
651
|
continue;
|
|
587
652
|
}
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
debug.emit("path.resolve.expand.include", {
|
|
591
|
-
path: entryPath,
|
|
592
|
-
source: "directory"
|
|
593
|
-
}, { verbosity: "verbose" });
|
|
653
|
+
if (inString === "double" && char === "\"") inString = null;
|
|
654
|
+
else if (inString === "single" && char === "'") inString = null;
|
|
594
655
|
continue;
|
|
595
656
|
}
|
|
596
|
-
if (
|
|
597
|
-
|
|
657
|
+
if (char === "\"") {
|
|
658
|
+
inString = "double";
|
|
659
|
+
current += char;
|
|
660
|
+
continue;
|
|
661
|
+
}
|
|
662
|
+
if (char === "'") {
|
|
663
|
+
inString = "single";
|
|
664
|
+
current += char;
|
|
665
|
+
continue;
|
|
666
|
+
}
|
|
667
|
+
if (char === ",") {
|
|
668
|
+
const item = parsePrimitive(current);
|
|
669
|
+
if (item === null) return null;
|
|
670
|
+
items.push(item);
|
|
671
|
+
current = "";
|
|
672
|
+
continue;
|
|
673
|
+
}
|
|
674
|
+
current += char;
|
|
598
675
|
}
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
return files;
|
|
676
|
+
const finalItem = parsePrimitive(current);
|
|
677
|
+
if (finalItem === null) return null;
|
|
678
|
+
items.push(finalItem);
|
|
679
|
+
return items;
|
|
604
680
|
}
|
|
605
|
-
|
|
606
|
-
const
|
|
607
|
-
|
|
608
|
-
const
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
regexExcludedPaths.delete(filePath);
|
|
632
|
-
if (resolvedFiles.has(filePath)) {
|
|
633
|
-
stats.dedupeDuplicates += 1;
|
|
634
|
-
debug.emit("path.resolve.dedupe.duplicate", {
|
|
635
|
-
path: filePath,
|
|
636
|
-
source: details.source,
|
|
637
|
-
input: details.input
|
|
638
|
-
}, { verbosity: "verbose" });
|
|
639
|
-
return;
|
|
681
|
+
function parseInlineTable(raw) {
|
|
682
|
+
const trimmed = raw.trim();
|
|
683
|
+
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) return null;
|
|
684
|
+
const inner = trimmed.slice(1, -1).trim();
|
|
685
|
+
if (!inner) return {};
|
|
686
|
+
const pairs = [];
|
|
687
|
+
let current = "";
|
|
688
|
+
let inString = null;
|
|
689
|
+
let escaped = false;
|
|
690
|
+
let bracketDepth = 0;
|
|
691
|
+
let braceDepth = 0;
|
|
692
|
+
for (let i = 0; i < inner.length; i += 1) {
|
|
693
|
+
const char = inner[i] ?? "";
|
|
694
|
+
if (inString) {
|
|
695
|
+
current += char;
|
|
696
|
+
if (escaped) {
|
|
697
|
+
escaped = false;
|
|
698
|
+
continue;
|
|
699
|
+
}
|
|
700
|
+
if (char === "\\" && inString === "double") {
|
|
701
|
+
escaped = true;
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
if (inString === "double" && char === "\"") inString = null;
|
|
705
|
+
else if (inString === "single" && char === "'") inString = null;
|
|
706
|
+
continue;
|
|
640
707
|
}
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
path: filePath,
|
|
645
|
-
source: details.source,
|
|
646
|
-
input: details.input
|
|
647
|
-
}, { verbosity: "verbose" });
|
|
648
|
-
};
|
|
649
|
-
const getRegexFilter = () => {
|
|
650
|
-
if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
|
|
651
|
-
return regexFilter;
|
|
652
|
-
};
|
|
653
|
-
const recordRegexExcluded = (filePath) => {
|
|
654
|
-
if (resolvedFiles.has(filePath)) return false;
|
|
655
|
-
regexExcludedPaths.add(filePath);
|
|
656
|
-
return true;
|
|
657
|
-
};
|
|
658
|
-
for (const rawPath of pathInputs) {
|
|
659
|
-
const targetPath = resolve(rawPath);
|
|
660
|
-
debug.emit("path.resolve.input", {
|
|
661
|
-
rawPath,
|
|
662
|
-
resolvedPath: targetPath
|
|
663
|
-
});
|
|
664
|
-
let metadata;
|
|
665
|
-
try {
|
|
666
|
-
metadata = await stat(targetPath);
|
|
667
|
-
} catch (error) {
|
|
668
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
669
|
-
skipped.push({
|
|
670
|
-
path: targetPath,
|
|
671
|
-
reason: `not readable: ${message}`
|
|
672
|
-
});
|
|
673
|
-
debug.emit("path.resolve.skip", {
|
|
674
|
-
path: targetPath,
|
|
675
|
-
reason: `not readable: ${message}`
|
|
676
|
-
});
|
|
708
|
+
if (char === "\"") {
|
|
709
|
+
inString = "double";
|
|
710
|
+
current += char;
|
|
677
711
|
continue;
|
|
678
712
|
}
|
|
679
|
-
if (
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
root: targetPath,
|
|
683
|
-
recursive: options.recursive,
|
|
684
|
-
regex: effectiveRegexFilter.sourcePattern ?? null
|
|
685
|
-
});
|
|
686
|
-
const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
|
|
687
|
-
for (const file of files) addResolvedFile(file, {
|
|
688
|
-
source: "directory",
|
|
689
|
-
input: targetPath
|
|
690
|
-
});
|
|
713
|
+
if (char === "'") {
|
|
714
|
+
inString = "single";
|
|
715
|
+
current += char;
|
|
691
716
|
continue;
|
|
692
717
|
}
|
|
693
|
-
if (
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
reason: "not a regular file"
|
|
697
|
-
});
|
|
698
|
-
debug.emit("path.resolve.skip", {
|
|
699
|
-
path: targetPath,
|
|
700
|
-
reason: "not a regular file"
|
|
701
|
-
});
|
|
718
|
+
if (char === "[") {
|
|
719
|
+
bracketDepth += 1;
|
|
720
|
+
current += char;
|
|
702
721
|
continue;
|
|
703
722
|
}
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
723
|
+
if (char === "]") {
|
|
724
|
+
if (bracketDepth > 0) bracketDepth -= 1;
|
|
725
|
+
current += char;
|
|
726
|
+
continue;
|
|
727
|
+
}
|
|
728
|
+
if (char === "{") {
|
|
729
|
+
braceDepth += 1;
|
|
730
|
+
current += char;
|
|
731
|
+
continue;
|
|
732
|
+
}
|
|
733
|
+
if (char === "}") {
|
|
734
|
+
if (braceDepth > 0) braceDepth -= 1;
|
|
735
|
+
current += char;
|
|
736
|
+
continue;
|
|
737
|
+
}
|
|
738
|
+
if (char === "," && bracketDepth === 0 && braceDepth === 0) {
|
|
739
|
+
pairs.push(current);
|
|
740
|
+
current = "";
|
|
741
|
+
continue;
|
|
742
|
+
}
|
|
743
|
+
current += char;
|
|
708
744
|
}
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
files: files.length,
|
|
726
|
-
skipped: skipped.length,
|
|
727
|
-
ordering: "absolute-path-ascending"
|
|
728
|
-
});
|
|
729
|
-
return {
|
|
730
|
-
files,
|
|
731
|
-
skipped
|
|
732
|
-
};
|
|
733
|
-
}
|
|
734
|
-
|
|
735
|
-
//#endregion
|
|
736
|
-
//#region src/cli/progress/reporter.ts
|
|
737
|
-
const PROGRESS_BAR_WIDTH = 20;
|
|
738
|
-
const FILLED_BAR_CHAR = "█";
|
|
739
|
-
const EMPTY_BAR_CHAR = "░";
|
|
740
|
-
function clamp(value, min, max) {
|
|
741
|
-
return Math.max(min, Math.min(max, value));
|
|
742
|
-
}
|
|
743
|
-
function buildProgressBar(completed, total) {
|
|
744
|
-
const safeTotal = Math.max(total, 1);
|
|
745
|
-
const ratio = clamp(completed / safeTotal, 0, 1);
|
|
746
|
-
const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
|
|
747
|
-
const empty = PROGRESS_BAR_WIDTH - filled;
|
|
748
|
-
return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
|
|
749
|
-
}
|
|
750
|
-
function formatElapsed(startedAtMs) {
|
|
751
|
-
const elapsedMs = Date.now() - startedAtMs;
|
|
752
|
-
const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
|
|
753
|
-
const minutes = Math.floor(totalSeconds / 60);
|
|
754
|
-
const seconds = totalSeconds % 60;
|
|
755
|
-
const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
|
|
756
|
-
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
|
|
745
|
+
if (current.trim()) pairs.push(current);
|
|
746
|
+
const output = {};
|
|
747
|
+
for (const pair of pairs) {
|
|
748
|
+
const separatorIndex = pair.indexOf("=");
|
|
749
|
+
if (separatorIndex === -1) return null;
|
|
750
|
+
const key = normalizeKeyPath(pair.slice(0, separatorIndex));
|
|
751
|
+
if (!key) return null;
|
|
752
|
+
const valueRaw = pair.slice(separatorIndex + 1).trim();
|
|
753
|
+
if (!valueRaw) return null;
|
|
754
|
+
if (valueRaw.startsWith("{")) return null;
|
|
755
|
+
const normalized = normalizeValue(valueRaw);
|
|
756
|
+
if (normalized === null) return null;
|
|
757
|
+
if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
|
|
758
|
+
output[key] = normalized;
|
|
759
|
+
}
|
|
760
|
+
return output;
|
|
757
761
|
}
|
|
758
|
-
function
|
|
759
|
-
|
|
760
|
-
const
|
|
761
|
-
|
|
762
|
+
function normalizeValue(value) {
|
|
763
|
+
if (!value) return null;
|
|
764
|
+
const trimmed = value.trim();
|
|
765
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return parseInlineTable(trimmed);
|
|
766
|
+
const array = parseArray(trimmed);
|
|
767
|
+
if (array) return array;
|
|
768
|
+
if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
|
|
769
|
+
return parsePrimitive(trimmed);
|
|
762
770
|
}
|
|
763
|
-
function
|
|
764
|
-
|
|
771
|
+
function toPlainText(value) {
|
|
772
|
+
if (value == null) return "";
|
|
773
|
+
if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
|
|
774
|
+
return String(value);
|
|
765
775
|
}
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
let
|
|
773
|
-
let
|
|
774
|
-
let
|
|
775
|
-
let
|
|
776
|
-
|
|
777
|
-
const
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
776
|
+
|
|
777
|
+
//#endregion
|
|
778
|
+
//#region src/markdown/toml/parse-frontmatter.ts
|
|
779
|
+
function parseTomlFrontmatter(frontmatter) {
|
|
780
|
+
const result = {};
|
|
781
|
+
const lines = frontmatter.split("\n");
|
|
782
|
+
let tablePrefix = "";
|
|
783
|
+
let tableTarget = null;
|
|
784
|
+
let tablePrefixInList = false;
|
|
785
|
+
for (let index = 0; index < lines.length; index += 1) {
|
|
786
|
+
const rawLine = lines[index] ?? "";
|
|
787
|
+
const trimmedLine = rawLine.trim();
|
|
788
|
+
if (!trimmedLine || trimmedLine.startsWith("#")) continue;
|
|
789
|
+
if (trimmedLine.startsWith("[[")) {
|
|
790
|
+
const match = trimmedLine.match(/^\[\[([^\]]+)]]$/);
|
|
791
|
+
if (!match) return null;
|
|
792
|
+
const normalizedTable = normalizeKeyPath(match[1] ?? "");
|
|
793
|
+
if (!normalizedTable) return null;
|
|
794
|
+
const list = ensureArrayContainer(result, normalizedTable);
|
|
795
|
+
const newEntry = {};
|
|
796
|
+
list.push(newEntry);
|
|
797
|
+
tableTarget = newEntry;
|
|
798
|
+
tablePrefix = normalizedTable;
|
|
799
|
+
tablePrefixInList = true;
|
|
800
|
+
continue;
|
|
801
|
+
}
|
|
802
|
+
const tableMatch = trimmedLine.match(/^\[([^\]]+)]$/);
|
|
803
|
+
if (tableMatch) {
|
|
804
|
+
const normalizedTable = normalizeKeyPath(tableMatch[1] ?? "");
|
|
805
|
+
if (!normalizedTable) return null;
|
|
806
|
+
tablePrefix = normalizedTable;
|
|
807
|
+
tablePrefixInList = false;
|
|
808
|
+
tableTarget = null;
|
|
809
|
+
continue;
|
|
810
|
+
}
|
|
811
|
+
const lineForParsing = /("""|''')/.test(rawLine) ? rawLine : stripInlineComment(rawLine);
|
|
812
|
+
const separatorIndex = lineForParsing.indexOf("=");
|
|
813
|
+
if (separatorIndex === -1) return null;
|
|
814
|
+
const key = normalizeKeyPath(lineForParsing.slice(0, separatorIndex));
|
|
815
|
+
let valueRaw = lineForParsing.slice(separatorIndex + 1).trim();
|
|
816
|
+
if (!key) return null;
|
|
817
|
+
const tripleDelimiter = valueRaw.startsWith("\"\"\"") ? "\"\"\"" : valueRaw.startsWith("'''") ? "'''" : null;
|
|
818
|
+
if (tripleDelimiter) {
|
|
819
|
+
const closingIndex = valueRaw.indexOf(tripleDelimiter, tripleDelimiter.length);
|
|
820
|
+
if (closingIndex !== -1) {
|
|
821
|
+
const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
|
|
822
|
+
valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
|
|
823
|
+
} else {
|
|
824
|
+
const delimiter = tripleDelimiter;
|
|
825
|
+
let combined = valueRaw;
|
|
826
|
+
let closed = false;
|
|
827
|
+
while (index + 1 < lines.length) {
|
|
828
|
+
index += 1;
|
|
829
|
+
const nextLine = lines[index] ?? "";
|
|
830
|
+
combined += `\n${nextLine}`;
|
|
831
|
+
if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
|
|
832
|
+
closed = true;
|
|
833
|
+
break;
|
|
834
|
+
}
|
|
823
835
|
}
|
|
824
|
-
|
|
825
|
-
|
|
836
|
+
if (!closed) return null;
|
|
837
|
+
valueRaw = combined;
|
|
826
838
|
}
|
|
827
|
-
lastLineLength = line.length;
|
|
828
|
-
options.stream.write(`${line}\n`);
|
|
829
|
-
},
|
|
830
|
-
finish() {
|
|
831
|
-
if (!active) return;
|
|
832
|
-
if (isTTY) if (clearOnFinish) clearLine();
|
|
833
|
-
else options.stream.write("\n");
|
|
834
|
-
active = false;
|
|
835
839
|
}
|
|
836
|
-
|
|
840
|
+
const normalized = normalizeValue(valueRaw);
|
|
841
|
+
if (normalized === null) return null;
|
|
842
|
+
const fullKey = tablePrefix ? `${tablePrefix}.${key}` : key;
|
|
843
|
+
if (typeof normalized === "object" && !Array.isArray(normalized)) {
|
|
844
|
+
for (const [inlineKey, inlineValue] of Object.entries(normalized)) {
|
|
845
|
+
const entryKey = tablePrefixInList ? `${key}.${inlineKey}` : `${fullKey}.${inlineKey}`;
|
|
846
|
+
if (tablePrefixInList && tableTarget) tableTarget[entryKey] = toPlainText(inlineValue);
|
|
847
|
+
else result[entryKey] = toPlainText(inlineValue);
|
|
848
|
+
}
|
|
849
|
+
continue;
|
|
850
|
+
}
|
|
851
|
+
if (tablePrefixInList && tableTarget) {
|
|
852
|
+
tableTarget[key] = toPlainText(normalized);
|
|
853
|
+
continue;
|
|
854
|
+
}
|
|
855
|
+
result[fullKey] = toPlainText(normalized);
|
|
856
|
+
}
|
|
857
|
+
flattenArrayTables(result);
|
|
858
|
+
return result;
|
|
837
859
|
}
|
|
838
860
|
|
|
839
861
|
//#endregion
|
|
840
|
-
//#region src/markdown/
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
862
|
+
//#region src/markdown/parse-markdown.ts
|
|
863
|
+
const FENCE_TO_TYPE = {
|
|
864
|
+
"---": "yaml",
|
|
865
|
+
"+++": "toml",
|
|
866
|
+
";;;": "json"
|
|
867
|
+
};
|
|
868
|
+
function normalizeNewlines(input) {
|
|
869
|
+
return input.replace(/\r\n/g, "\n");
|
|
847
870
|
}
|
|
848
|
-
function
|
|
849
|
-
|
|
850
|
-
if (!Array.isArray(value)) continue;
|
|
851
|
-
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
852
|
-
}
|
|
871
|
+
function stripBom(line) {
|
|
872
|
+
return line.startsWith("") ? line.slice(1) : line;
|
|
853
873
|
}
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
const trimmed = key.trim();
|
|
859
|
-
if (trimmed.startsWith("\"") && trimmed.endsWith("\"") || trimmed.startsWith("'") && trimmed.endsWith("'")) return trimmed.slice(1, -1);
|
|
860
|
-
return trimmed;
|
|
874
|
+
function getFenceType(line) {
|
|
875
|
+
const match = line.match(/^[\t ]*(---|\+\+\+|;;;)[\t ]*$/);
|
|
876
|
+
if (!match) return null;
|
|
877
|
+
return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
|
|
861
878
|
}
|
|
862
|
-
function
|
|
863
|
-
|
|
864
|
-
if (
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
return
|
|
879
|
+
function parseFrontmatter(frontmatter, type) {
|
|
880
|
+
if (!type) return null;
|
|
881
|
+
if (type === "json") try {
|
|
882
|
+
return JSON.parse(frontmatter);
|
|
883
|
+
} catch {
|
|
884
|
+
return null;
|
|
868
885
|
}
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
886
|
+
if (type === "yaml") {
|
|
887
|
+
const doc = parseDocument(frontmatter, { prettyErrors: false });
|
|
888
|
+
if (doc.errors.length > 0) return null;
|
|
889
|
+
const data = doc.toJSON();
|
|
890
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return null;
|
|
891
|
+
return data;
|
|
892
|
+
}
|
|
893
|
+
if (type === "toml") return parseTomlFrontmatter(frontmatter);
|
|
894
|
+
return null;
|
|
872
895
|
}
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
function stripInlineComment(line) {
|
|
877
|
-
let inString = null;
|
|
896
|
+
function extractJsonBlock(text, startIndex) {
|
|
897
|
+
let depth = 0;
|
|
898
|
+
let inString = false;
|
|
878
899
|
let escaped = false;
|
|
879
|
-
for (let i =
|
|
880
|
-
const char =
|
|
900
|
+
for (let i = startIndex; i < text.length; i += 1) {
|
|
901
|
+
const char = text[i] ?? "";
|
|
881
902
|
if (inString) {
|
|
882
903
|
if (escaped) {
|
|
883
904
|
escaped = false;
|
|
884
905
|
continue;
|
|
885
906
|
}
|
|
886
|
-
if (char === "\\"
|
|
907
|
+
if (char === "\\") {
|
|
887
908
|
escaped = true;
|
|
888
909
|
continue;
|
|
889
910
|
}
|
|
890
|
-
if (
|
|
891
|
-
inString = null;
|
|
892
|
-
continue;
|
|
893
|
-
}
|
|
894
|
-
if (inString === "single" && char === "'") {
|
|
895
|
-
inString = null;
|
|
896
|
-
continue;
|
|
897
|
-
}
|
|
911
|
+
if (char === "\"") inString = false;
|
|
898
912
|
continue;
|
|
899
913
|
}
|
|
900
914
|
if (char === "\"") {
|
|
901
|
-
inString =
|
|
915
|
+
inString = true;
|
|
902
916
|
continue;
|
|
903
917
|
}
|
|
904
|
-
if (char === "
|
|
905
|
-
|
|
918
|
+
if (char === "{") {
|
|
919
|
+
depth += 1;
|
|
906
920
|
continue;
|
|
907
921
|
}
|
|
908
|
-
if (char === "
|
|
922
|
+
if (char === "}") {
|
|
923
|
+
depth -= 1;
|
|
924
|
+
if (depth === 0) return {
|
|
925
|
+
jsonText: text.slice(startIndex, i + 1),
|
|
926
|
+
endIndex: i
|
|
927
|
+
};
|
|
928
|
+
}
|
|
909
929
|
}
|
|
910
|
-
return line;
|
|
911
|
-
}
|
|
912
|
-
function unescapeBasic(input) {
|
|
913
|
-
return input.replace(/\\\\/g, "\\").replace(/\\"/g, "\"").replace(/\\n/g, "\n").replace(/\\t/g, " ").replace(/\\r/g, "\r");
|
|
914
|
-
}
|
|
915
|
-
function parseStringLiteral(value) {
|
|
916
|
-
if (value.startsWith("\"\"\"") && value.endsWith("\"\"\"")) return unescapeBasic(value.slice(3, -3));
|
|
917
|
-
if (value.startsWith("'''") && value.endsWith("'''")) return value.slice(3, -3);
|
|
918
|
-
if (value.startsWith("\"") && value.endsWith("\"")) return unescapeBasic(value.slice(1, -1));
|
|
919
|
-
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
920
930
|
return null;
|
|
921
931
|
}
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
}
|
|
966
|
-
if (char === "'") {
|
|
967
|
-
inString = "single";
|
|
968
|
-
current += char;
|
|
969
|
-
continue;
|
|
970
|
-
}
|
|
971
|
-
if (char === ",") {
|
|
972
|
-
const item = parsePrimitive(current);
|
|
973
|
-
if (item === null) return null;
|
|
974
|
-
items.push(item);
|
|
975
|
-
current = "";
|
|
976
|
-
continue;
|
|
977
|
-
}
|
|
978
|
-
current += char;
|
|
932
|
+
function parseMarkdown(input) {
|
|
933
|
+
const normalized = normalizeNewlines(input);
|
|
934
|
+
const lines = normalized.split("\n");
|
|
935
|
+
if (lines.length === 0) return {
|
|
936
|
+
frontmatter: null,
|
|
937
|
+
content: normalized,
|
|
938
|
+
data: null,
|
|
939
|
+
frontmatterType: null
|
|
940
|
+
};
|
|
941
|
+
lines[0] = stripBom(lines[0] ?? "");
|
|
942
|
+
const normalizedWithoutBom = lines.join("\n");
|
|
943
|
+
const openingType = getFenceType(lines[0] ?? "");
|
|
944
|
+
if (!openingType) {
|
|
945
|
+
const jsonStart = (normalizedWithoutBom.match(/^[\t \n]*/)?.[0] ?? "").length;
|
|
946
|
+
if (normalizedWithoutBom[jsonStart] !== "{") return {
|
|
947
|
+
frontmatter: null,
|
|
948
|
+
content: normalizedWithoutBom,
|
|
949
|
+
data: null,
|
|
950
|
+
frontmatterType: null
|
|
951
|
+
};
|
|
952
|
+
const jsonBlock = extractJsonBlock(normalizedWithoutBom, jsonStart);
|
|
953
|
+
if (!jsonBlock) return {
|
|
954
|
+
frontmatter: null,
|
|
955
|
+
content: normalizedWithoutBom,
|
|
956
|
+
data: null,
|
|
957
|
+
frontmatterType: null
|
|
958
|
+
};
|
|
959
|
+
const frontmatter = jsonBlock.jsonText;
|
|
960
|
+
let content = normalizedWithoutBom.slice(jsonBlock.endIndex + 1);
|
|
961
|
+
if (content.startsWith("\n")) content = content.slice(1);
|
|
962
|
+
const data = parseFrontmatter(frontmatter, "json");
|
|
963
|
+
if (!data) return {
|
|
964
|
+
frontmatter: null,
|
|
965
|
+
content: normalizedWithoutBom,
|
|
966
|
+
data: null,
|
|
967
|
+
frontmatterType: null
|
|
968
|
+
};
|
|
969
|
+
return {
|
|
970
|
+
frontmatter,
|
|
971
|
+
content,
|
|
972
|
+
data,
|
|
973
|
+
frontmatterType: "json"
|
|
974
|
+
};
|
|
979
975
|
}
|
|
980
|
-
|
|
981
|
-
if (
|
|
982
|
-
|
|
983
|
-
|
|
976
|
+
let closingIndex = -1;
|
|
977
|
+
for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
|
|
978
|
+
closingIndex = i;
|
|
979
|
+
break;
|
|
980
|
+
}
|
|
981
|
+
if (closingIndex === -1) return {
|
|
982
|
+
frontmatter: null,
|
|
983
|
+
content: normalizedWithoutBom,
|
|
984
|
+
data: null,
|
|
985
|
+
frontmatterType: null
|
|
986
|
+
};
|
|
987
|
+
const frontmatter = lines.slice(1, closingIndex).join("\n");
|
|
988
|
+
return {
|
|
989
|
+
frontmatter,
|
|
990
|
+
content: lines.slice(closingIndex + 1).join("\n"),
|
|
991
|
+
data: parseFrontmatter(frontmatter, openingType),
|
|
992
|
+
frontmatterType: openingType
|
|
993
|
+
};
|
|
984
994
|
}
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
995
|
+
|
|
996
|
+
//#endregion
|
|
997
|
+
//#region src/wc/segmenter.ts
|
|
998
|
+
const segmenterCache = /* @__PURE__ */ new Map();
|
|
999
|
+
const graphemeSegmenterCache = /* @__PURE__ */ new Map();
|
|
1000
|
+
function getSegmenter(locale) {
|
|
1001
|
+
const cached = segmenterCache.get(locale);
|
|
1002
|
+
if (cached) return cached;
|
|
1003
|
+
const segmenter = new Intl.Segmenter(locale, { granularity: "word" });
|
|
1004
|
+
segmenterCache.set(locale, segmenter);
|
|
1005
|
+
return segmenter;
|
|
1006
|
+
}
|
|
1007
|
+
function getGraphemeSegmenter(locale) {
|
|
1008
|
+
const cached = graphemeSegmenterCache.get(locale);
|
|
1009
|
+
if (cached) return cached;
|
|
1010
|
+
const segmenter = new Intl.Segmenter(locale, { granularity: "grapheme" });
|
|
1011
|
+
graphemeSegmenterCache.set(locale, segmenter);
|
|
1012
|
+
return segmenter;
|
|
1013
|
+
}
|
|
1014
|
+
function supportsSegmenter() {
|
|
1015
|
+
return typeof Intl !== "undefined" && typeof Intl.Segmenter === "function";
|
|
1016
|
+
}
|
|
1017
|
+
function countCharsForLocale(text, locale) {
|
|
1018
|
+
if (!supportsSegmenter()) return Array.from(text).length;
|
|
1019
|
+
const segmenter = getGraphemeSegmenter(locale);
|
|
1020
|
+
let count = 0;
|
|
1021
|
+
for (const _segment of segmenter.segment(text)) count++;
|
|
1022
|
+
return count;
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
//#endregion
|
|
1026
|
+
//#region src/wc/non-words.ts
|
|
1027
|
+
const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
|
|
1028
|
+
const emojiPresentationRegex = /\p{Emoji_Presentation}/u;
|
|
1029
|
+
const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
|
|
1030
|
+
const symbolRegex = /\p{S}/u;
|
|
1031
|
+
const punctuationRegex = /\p{P}/u;
|
|
1032
|
+
const whitespaceRegex = /\s/u;
|
|
1033
|
+
const newlineChars = new Set([
|
|
1034
|
+
"\n",
|
|
1035
|
+
"\r",
|
|
1036
|
+
"\u2028",
|
|
1037
|
+
"\u2029"
|
|
1038
|
+
]);
|
|
1039
|
+
function createNonWordCollection() {
|
|
1040
|
+
return {
|
|
1041
|
+
emoji: [],
|
|
1042
|
+
symbols: [],
|
|
1043
|
+
punctuation: [],
|
|
1044
|
+
counts: {
|
|
1045
|
+
emoji: 0,
|
|
1046
|
+
symbols: 0,
|
|
1047
|
+
punctuation: 0
|
|
1026
1048
|
}
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1049
|
+
};
|
|
1050
|
+
}
|
|
1051
|
+
function addNonWord(collection, category, segment) {
|
|
1052
|
+
if (category === "emoji") {
|
|
1053
|
+
collection.emoji.push(segment);
|
|
1054
|
+
collection.counts.emoji += 1;
|
|
1055
|
+
return;
|
|
1056
|
+
}
|
|
1057
|
+
if (category === "symbol") {
|
|
1058
|
+
collection.symbols.push(segment);
|
|
1059
|
+
collection.counts.symbols += 1;
|
|
1060
|
+
return;
|
|
1061
|
+
}
|
|
1062
|
+
collection.punctuation.push(segment);
|
|
1063
|
+
collection.counts.punctuation += 1;
|
|
1064
|
+
}
|
|
1065
|
+
function addWhitespace(collection, segment) {
|
|
1066
|
+
let whitespace = collection.whitespace;
|
|
1067
|
+
let count = 0;
|
|
1068
|
+
for (const char of segment) {
|
|
1069
|
+
if (char === " ") {
|
|
1070
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1071
|
+
whitespace.spaces += 1;
|
|
1072
|
+
count += 1;
|
|
1030
1073
|
continue;
|
|
1031
1074
|
}
|
|
1032
|
-
if (char === "
|
|
1033
|
-
|
|
1034
|
-
|
|
1075
|
+
if (char === " ") {
|
|
1076
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1077
|
+
whitespace.tabs += 1;
|
|
1078
|
+
count += 1;
|
|
1035
1079
|
continue;
|
|
1036
1080
|
}
|
|
1037
|
-
if (char
|
|
1038
|
-
|
|
1039
|
-
|
|
1081
|
+
if (newlineChars.has(char)) {
|
|
1082
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1083
|
+
whitespace.newlines += 1;
|
|
1084
|
+
count += 1;
|
|
1040
1085
|
continue;
|
|
1041
1086
|
}
|
|
1042
|
-
if (char
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1087
|
+
if (whitespaceRegex.test(char)) {
|
|
1088
|
+
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1089
|
+
whitespace.other += 1;
|
|
1090
|
+
count += 1;
|
|
1046
1091
|
}
|
|
1047
|
-
current += char;
|
|
1048
1092
|
}
|
|
1049
|
-
if (
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
const separatorIndex = pair.indexOf("=");
|
|
1053
|
-
if (separatorIndex === -1) return null;
|
|
1054
|
-
const key = normalizeKeyPath(pair.slice(0, separatorIndex));
|
|
1055
|
-
if (!key) return null;
|
|
1056
|
-
const valueRaw = pair.slice(separatorIndex + 1).trim();
|
|
1057
|
-
if (!valueRaw) return null;
|
|
1058
|
-
if (valueRaw.startsWith("{")) return null;
|
|
1059
|
-
const normalized = normalizeValue(valueRaw);
|
|
1060
|
-
if (normalized === null) return null;
|
|
1061
|
-
if (typeof normalized === "object" && !Array.isArray(normalized)) return null;
|
|
1062
|
-
output[key] = normalized;
|
|
1093
|
+
if (count > 0) {
|
|
1094
|
+
collection.whitespace = whitespace ?? createWhitespaceCounts();
|
|
1095
|
+
collection.counts.whitespace = (collection.counts.whitespace ?? 0) + count;
|
|
1063
1096
|
}
|
|
1064
|
-
return
|
|
1097
|
+
return count;
|
|
1065
1098
|
}
|
|
1066
|
-
function
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
if (
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
if (trimmed.startsWith("[") && trimmed.endsWith("]")) return null;
|
|
1073
|
-
return parsePrimitive(trimmed);
|
|
1099
|
+
function classifyNonWordSegment(segment) {
|
|
1100
|
+
const hasEmojiVariationSelector = segment.includes("️");
|
|
1101
|
+
if (keycapEmojiRegex.test(segment) || emojiPresentationRegex.test(segment) || hasEmojiVariationSelector && emojiRegex.test(segment)) return "emoji";
|
|
1102
|
+
if (symbolRegex.test(segment)) return "symbol";
|
|
1103
|
+
if (punctuationRegex.test(segment)) return "punctuation";
|
|
1104
|
+
return null;
|
|
1074
1105
|
}
|
|
1075
|
-
function
|
|
1076
|
-
if (
|
|
1077
|
-
|
|
1078
|
-
|
|
1106
|
+
function mergeNonWordCollections(target, source) {
|
|
1107
|
+
if (source.counts.emoji > 0) {
|
|
1108
|
+
appendAll(target.emoji, source.emoji);
|
|
1109
|
+
target.counts.emoji += source.counts.emoji;
|
|
1110
|
+
}
|
|
1111
|
+
if (source.counts.symbols > 0) {
|
|
1112
|
+
appendAll(target.symbols, source.symbols);
|
|
1113
|
+
target.counts.symbols += source.counts.symbols;
|
|
1114
|
+
}
|
|
1115
|
+
if (source.counts.punctuation > 0) {
|
|
1116
|
+
appendAll(target.punctuation, source.punctuation);
|
|
1117
|
+
target.counts.punctuation += source.counts.punctuation;
|
|
1118
|
+
}
|
|
1119
|
+
if (source.counts.whitespace && source.counts.whitespace > 0 && source.whitespace) {
|
|
1120
|
+
const whitespace = target.whitespace ?? createWhitespaceCounts();
|
|
1121
|
+
whitespace.spaces += source.whitespace.spaces;
|
|
1122
|
+
whitespace.tabs += source.whitespace.tabs;
|
|
1123
|
+
whitespace.newlines += source.whitespace.newlines;
|
|
1124
|
+
whitespace.other += source.whitespace.other;
|
|
1125
|
+
target.whitespace = whitespace;
|
|
1126
|
+
target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
|
|
1127
|
+
}
|
|
1128
|
+
return target;
|
|
1129
|
+
}
|
|
1130
|
+
function createWhitespaceCounts() {
|
|
1131
|
+
return {
|
|
1132
|
+
spaces: 0,
|
|
1133
|
+
tabs: 0,
|
|
1134
|
+
newlines: 0,
|
|
1135
|
+
other: 0
|
|
1136
|
+
};
|
|
1079
1137
|
}
|
|
1080
1138
|
|
|
1081
1139
|
//#endregion
|
|
1082
|
-
//#region src/
|
|
1083
|
-
function
|
|
1084
|
-
const
|
|
1085
|
-
const
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
const
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
tableTarget = null;
|
|
1140
|
+
//#region src/wc/analyze.ts
|
|
1141
|
+
function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
|
|
1142
|
+
const segmenter = getSegmenter(chunk.locale);
|
|
1143
|
+
const segments = [];
|
|
1144
|
+
const nonWords = collectNonWords ? createNonWordCollection() : null;
|
|
1145
|
+
for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
|
|
1146
|
+
else if (collectNonWords && nonWords) {
|
|
1147
|
+
if (includeWhitespace) addWhitespace(nonWords, part.segment);
|
|
1148
|
+
const category = classifyNonWordSegment(part.segment);
|
|
1149
|
+
if (category) addNonWord(nonWords, category, part.segment);
|
|
1150
|
+
}
|
|
1151
|
+
return {
|
|
1152
|
+
locale: chunk.locale,
|
|
1153
|
+
text: chunk.text,
|
|
1154
|
+
segments,
|
|
1155
|
+
words: segments.length,
|
|
1156
|
+
nonWords: nonWords ?? void 0
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
|
|
1160
|
+
const segmenter = getSegmenter(chunk.locale);
|
|
1161
|
+
const nonWords = collectNonWords ? createNonWordCollection() : null;
|
|
1162
|
+
let chars = 0;
|
|
1163
|
+
let wordChars = 0;
|
|
1164
|
+
let nonWordChars = 0;
|
|
1165
|
+
for (const part of segmenter.segment(chunk.text)) {
|
|
1166
|
+
if (part.isWordLike) {
|
|
1167
|
+
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
1168
|
+
chars += count;
|
|
1169
|
+
wordChars += count;
|
|
1113
1170
|
continue;
|
|
1114
1171
|
}
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
if (closingIndex !== -1) {
|
|
1125
|
-
const strippedAfter = stripInlineComment(valueRaw.slice(closingIndex + tripleDelimiter.length));
|
|
1126
|
-
valueRaw = `${valueRaw.slice(0, closingIndex + tripleDelimiter.length)}${strippedAfter}`;
|
|
1127
|
-
} else {
|
|
1128
|
-
const delimiter = tripleDelimiter;
|
|
1129
|
-
let combined = valueRaw;
|
|
1130
|
-
let closed = false;
|
|
1131
|
-
while (index + 1 < lines.length) {
|
|
1132
|
-
index += 1;
|
|
1133
|
-
const nextLine = lines[index] ?? "";
|
|
1134
|
-
combined += `\n${nextLine}`;
|
|
1135
|
-
if (new RegExp(`${delimiter}\\s*$`).test(nextLine)) {
|
|
1136
|
-
closed = true;
|
|
1137
|
-
break;
|
|
1138
|
-
}
|
|
1139
|
-
}
|
|
1140
|
-
if (!closed) return null;
|
|
1141
|
-
valueRaw = combined;
|
|
1172
|
+
if (collectNonWords && nonWords) {
|
|
1173
|
+
let whitespaceCount = 0;
|
|
1174
|
+
if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
|
|
1175
|
+
const category = classifyNonWordSegment(part.segment);
|
|
1176
|
+
if (category) addNonWord(nonWords, category, part.segment);
|
|
1177
|
+
if (category || whitespaceCount > 0) {
|
|
1178
|
+
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
1179
|
+
chars += count;
|
|
1180
|
+
nonWordChars += count;
|
|
1142
1181
|
}
|
|
1143
1182
|
}
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1183
|
+
}
|
|
1184
|
+
return {
|
|
1185
|
+
locale: chunk.locale,
|
|
1186
|
+
text: chunk.text,
|
|
1187
|
+
chars,
|
|
1188
|
+
wordChars,
|
|
1189
|
+
nonWordChars,
|
|
1190
|
+
nonWords: nonWords ?? void 0
|
|
1191
|
+
};
|
|
1192
|
+
}
|
|
1193
|
+
function aggregateCharsByLocale(chunks) {
|
|
1194
|
+
const order = [];
|
|
1195
|
+
const map = /* @__PURE__ */ new Map();
|
|
1196
|
+
for (const chunk of chunks) {
|
|
1197
|
+
const existing = map.get(chunk.locale);
|
|
1198
|
+
if (existing) {
|
|
1199
|
+
existing.chars += chunk.chars;
|
|
1200
|
+
existing.wordChars += chunk.wordChars;
|
|
1201
|
+
existing.nonWordChars += chunk.nonWordChars;
|
|
1202
|
+
if (chunk.nonWords) {
|
|
1203
|
+
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
1204
|
+
mergeNonWordCollections(existing.nonWords, chunk.nonWords);
|
|
1152
1205
|
}
|
|
1153
1206
|
continue;
|
|
1154
1207
|
}
|
|
1155
|
-
|
|
1156
|
-
|
|
1208
|
+
order.push(chunk.locale);
|
|
1209
|
+
map.set(chunk.locale, {
|
|
1210
|
+
locale: chunk.locale,
|
|
1211
|
+
chars: chunk.chars,
|
|
1212
|
+
wordChars: chunk.wordChars,
|
|
1213
|
+
nonWordChars: chunk.nonWordChars,
|
|
1214
|
+
nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
|
|
1215
|
+
});
|
|
1216
|
+
}
|
|
1217
|
+
return order.map((locale) => map.get(locale));
|
|
1218
|
+
}
|
|
1219
|
+
function aggregateByLocale(chunks) {
|
|
1220
|
+
const order = [];
|
|
1221
|
+
const map = /* @__PURE__ */ new Map();
|
|
1222
|
+
for (const chunk of chunks) {
|
|
1223
|
+
const existing = map.get(chunk.locale);
|
|
1224
|
+
if (existing) {
|
|
1225
|
+
existing.words += chunk.words;
|
|
1226
|
+
appendAll(existing.segments, chunk.segments);
|
|
1157
1227
|
continue;
|
|
1158
1228
|
}
|
|
1159
|
-
|
|
1229
|
+
order.push(chunk.locale);
|
|
1230
|
+
map.set(chunk.locale, {
|
|
1231
|
+
locale: chunk.locale,
|
|
1232
|
+
words: chunk.words,
|
|
1233
|
+
segments: [...chunk.segments]
|
|
1234
|
+
});
|
|
1160
1235
|
}
|
|
1161
|
-
|
|
1162
|
-
return result;
|
|
1236
|
+
return order.map((locale) => map.get(locale));
|
|
1163
1237
|
}
|
|
1164
1238
|
|
|
1165
1239
|
//#endregion
|
|
1166
|
-
//#region src/
|
|
1167
|
-
const
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1240
|
+
//#region src/wc/mode.ts
|
|
1241
|
+
const MODE_ALIASES = {
|
|
1242
|
+
chunk: "chunk",
|
|
1243
|
+
chunks: "chunk",
|
|
1244
|
+
segments: "segments",
|
|
1245
|
+
segment: "segments",
|
|
1246
|
+
seg: "segments",
|
|
1247
|
+
collector: "collector",
|
|
1248
|
+
collect: "collector",
|
|
1249
|
+
colle: "collector",
|
|
1250
|
+
char: "char",
|
|
1251
|
+
chars: "char",
|
|
1252
|
+
character: "char",
|
|
1253
|
+
characters: "char",
|
|
1254
|
+
"char-collector": "char-collector"
|
|
1171
1255
|
};
|
|
1172
|
-
|
|
1173
|
-
|
|
1256
|
+
const CHAR_MODE_ALIASES = new Set([
|
|
1257
|
+
"char",
|
|
1258
|
+
"chars",
|
|
1259
|
+
"character",
|
|
1260
|
+
"characters"
|
|
1261
|
+
]);
|
|
1262
|
+
const COLLECTOR_MODE_ALIASES = new Set([
|
|
1263
|
+
"collector",
|
|
1264
|
+
"collect",
|
|
1265
|
+
"colle",
|
|
1266
|
+
"col"
|
|
1267
|
+
]);
|
|
1268
|
+
function collapseSeparators(value) {
|
|
1269
|
+
return value.replace(/[-_\s]+/g, "");
|
|
1174
1270
|
}
|
|
1175
|
-
function
|
|
1176
|
-
|
|
1271
|
+
function isComposedCharCollectorFromTokens(value) {
|
|
1272
|
+
const tokens = value.split(/[-_\s]+/).map((token) => token.trim()).filter((token) => token.length > 0);
|
|
1273
|
+
if (tokens.length < 2) return false;
|
|
1274
|
+
let hasCharAlias = false;
|
|
1275
|
+
let hasCollectorAlias = false;
|
|
1276
|
+
for (const token of tokens) {
|
|
1277
|
+
if (CHAR_MODE_ALIASES.has(token)) {
|
|
1278
|
+
hasCharAlias = true;
|
|
1279
|
+
continue;
|
|
1280
|
+
}
|
|
1281
|
+
if (COLLECTOR_MODE_ALIASES.has(token)) {
|
|
1282
|
+
hasCollectorAlias = true;
|
|
1283
|
+
continue;
|
|
1284
|
+
}
|
|
1285
|
+
return false;
|
|
1286
|
+
}
|
|
1287
|
+
return hasCharAlias && hasCollectorAlias;
|
|
1177
1288
|
}
|
|
1178
|
-
function
|
|
1179
|
-
const
|
|
1180
|
-
|
|
1181
|
-
return FENCE_TO_TYPE[match[1] ?? ""] ?? null;
|
|
1289
|
+
function isComposedCharCollectorCompact(value) {
|
|
1290
|
+
for (const charAlias of CHAR_MODE_ALIASES) for (const collectorAlias of COLLECTOR_MODE_ALIASES) if (value === `${charAlias}${collectorAlias}` || value === `${collectorAlias}${charAlias}`) return true;
|
|
1291
|
+
return false;
|
|
1182
1292
|
}
|
|
1183
|
-
function
|
|
1184
|
-
if (!
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
if (
|
|
1191
|
-
|
|
1192
|
-
if (doc.errors.length > 0) return null;
|
|
1193
|
-
const data = doc.toJSON();
|
|
1194
|
-
if (!data || typeof data !== "object" || Array.isArray(data)) return null;
|
|
1195
|
-
return data;
|
|
1196
|
-
}
|
|
1197
|
-
if (type === "toml") return parseTomlFrontmatter(frontmatter);
|
|
1198
|
-
return null;
|
|
1293
|
+
function normalizeMode(input) {
|
|
1294
|
+
if (!input) return null;
|
|
1295
|
+
const normalized = input.trim().toLowerCase();
|
|
1296
|
+
const direct = MODE_ALIASES[normalized];
|
|
1297
|
+
if (direct) return direct;
|
|
1298
|
+
if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
|
|
1299
|
+
const compact = collapseSeparators(normalized);
|
|
1300
|
+
if (isComposedCharCollectorCompact(compact)) return "char-collector";
|
|
1301
|
+
return MODE_ALIASES[compact] ?? null;
|
|
1199
1302
|
}
|
|
1200
|
-
function
|
|
1201
|
-
|
|
1202
|
-
let inString = false;
|
|
1203
|
-
let escaped = false;
|
|
1204
|
-
for (let i = startIndex; i < text.length; i += 1) {
|
|
1205
|
-
const char = text[i] ?? "";
|
|
1206
|
-
if (inString) {
|
|
1207
|
-
if (escaped) {
|
|
1208
|
-
escaped = false;
|
|
1209
|
-
continue;
|
|
1210
|
-
}
|
|
1211
|
-
if (char === "\\") {
|
|
1212
|
-
escaped = true;
|
|
1213
|
-
continue;
|
|
1214
|
-
}
|
|
1215
|
-
if (char === "\"") inString = false;
|
|
1216
|
-
continue;
|
|
1217
|
-
}
|
|
1218
|
-
if (char === "\"") {
|
|
1219
|
-
inString = true;
|
|
1220
|
-
continue;
|
|
1221
|
-
}
|
|
1222
|
-
if (char === "{") {
|
|
1223
|
-
depth += 1;
|
|
1224
|
-
continue;
|
|
1225
|
-
}
|
|
1226
|
-
if (char === "}") {
|
|
1227
|
-
depth -= 1;
|
|
1228
|
-
if (depth === 0) return {
|
|
1229
|
-
jsonText: text.slice(startIndex, i + 1),
|
|
1230
|
-
endIndex: i
|
|
1231
|
-
};
|
|
1232
|
-
}
|
|
1233
|
-
}
|
|
1234
|
-
return null;
|
|
1303
|
+
function resolveMode(input, fallback = "chunk") {
|
|
1304
|
+
return normalizeMode(input) ?? fallback;
|
|
1235
1305
|
}
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
content,
|
|
1276
|
-
data,
|
|
1277
|
-
frontmatterType: "json"
|
|
1278
|
-
};
|
|
1279
|
-
}
|
|
1280
|
-
let closingIndex = -1;
|
|
1281
|
-
for (let i = 1; i < lines.length; i += 1) if (getFenceType(lines[i] ?? "") === openingType) {
|
|
1282
|
-
closingIndex = i;
|
|
1283
|
-
break;
|
|
1306
|
+
|
|
1307
|
+
//#endregion
|
|
1308
|
+
//#region src/wc/latin-hints.ts
|
|
1309
|
+
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
1310
|
+
{
|
|
1311
|
+
tag: "de",
|
|
1312
|
+
pattern: "[äöüÄÖÜß]"
|
|
1313
|
+
},
|
|
1314
|
+
{
|
|
1315
|
+
tag: "es",
|
|
1316
|
+
pattern: "[ñÑ¿¡]"
|
|
1317
|
+
},
|
|
1318
|
+
{
|
|
1319
|
+
tag: "pt",
|
|
1320
|
+
pattern: "[ãõÃÕ]"
|
|
1321
|
+
},
|
|
1322
|
+
{
|
|
1323
|
+
tag: "fr",
|
|
1324
|
+
pattern: "[œŒæÆ]"
|
|
1325
|
+
},
|
|
1326
|
+
{
|
|
1327
|
+
tag: "pl",
|
|
1328
|
+
pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
|
|
1329
|
+
},
|
|
1330
|
+
{
|
|
1331
|
+
tag: "tr",
|
|
1332
|
+
pattern: "[ıİğĞşŞ]"
|
|
1333
|
+
},
|
|
1334
|
+
{
|
|
1335
|
+
tag: "ro",
|
|
1336
|
+
pattern: "[ăĂâÂîÎșȘțȚ]"
|
|
1337
|
+
},
|
|
1338
|
+
{
|
|
1339
|
+
tag: "hu",
|
|
1340
|
+
pattern: "[őŐűŰ]"
|
|
1341
|
+
},
|
|
1342
|
+
{
|
|
1343
|
+
tag: "is",
|
|
1344
|
+
pattern: "[ðÐþÞ]"
|
|
1284
1345
|
}
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
content: normalizedWithoutBom,
|
|
1288
|
-
data: null,
|
|
1289
|
-
frontmatterType: null
|
|
1290
|
-
};
|
|
1291
|
-
const frontmatter = lines.slice(1, closingIndex).join("\n");
|
|
1292
|
-
return {
|
|
1293
|
-
frontmatter,
|
|
1294
|
-
content: lines.slice(closingIndex + 1).join("\n"),
|
|
1295
|
-
data: parseFrontmatter(frontmatter, openingType),
|
|
1296
|
-
frontmatterType: openingType
|
|
1297
|
-
};
|
|
1298
|
-
}
|
|
1346
|
+
];
|
|
1347
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
1299
1348
|
|
|
1300
1349
|
//#endregion
|
|
1301
|
-
//#region src/wc/
|
|
1302
|
-
const
|
|
1303
|
-
const
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1350
|
+
//#region src/wc/locale-detect.ts
|
|
1351
|
+
const DEFAULT_LOCALE = "und-Latn";
|
|
1352
|
+
const DEFAULT_HAN_TAG = "und-Hani";
|
|
1353
|
+
const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
|
|
1354
|
+
const regex = {
|
|
1355
|
+
hiragana: /\p{Script=Hiragana}/u,
|
|
1356
|
+
katakana: /\p{Script=Katakana}/u,
|
|
1357
|
+
hangul: /\p{Script=Hangul}/u,
|
|
1358
|
+
han: /\p{Script=Han}/u,
|
|
1359
|
+
latin: /\p{Script=Latin}/u,
|
|
1360
|
+
arabic: /\p{Script=Arabic}/u,
|
|
1361
|
+
cyrillic: /\p{Script=Cyrillic}/u,
|
|
1362
|
+
devanagari: /\p{Script=Devanagari}/u,
|
|
1363
|
+
thai: /\p{Script=Thai}/u
|
|
1364
|
+
};
|
|
1365
|
+
const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
|
|
1366
|
+
function isLatinLocale(locale, context) {
|
|
1367
|
+
if (context) return context.latinLocales.has(locale);
|
|
1368
|
+
return defaultLatinLocales.has(locale);
|
|
1310
1369
|
}
|
|
1311
|
-
function
|
|
1312
|
-
const
|
|
1313
|
-
if (
|
|
1314
|
-
const
|
|
1315
|
-
|
|
1316
|
-
|
|
1370
|
+
function resolveLatinHint(options) {
|
|
1371
|
+
const latinTagHint = options.latinTagHint?.trim();
|
|
1372
|
+
if (latinTagHint) return latinTagHint;
|
|
1373
|
+
const latinLanguageHint = options.latinLanguageHint?.trim();
|
|
1374
|
+
if (latinLanguageHint) return latinLanguageHint;
|
|
1375
|
+
const latinLocaleHint = options.latinLocaleHint?.trim();
|
|
1376
|
+
if (latinLocaleHint) return latinLocaleHint;
|
|
1317
1377
|
}
|
|
1318
|
-
function
|
|
1319
|
-
|
|
1378
|
+
function resolveHanHint(options) {
|
|
1379
|
+
const hanTagHint = options.hanTagHint?.trim();
|
|
1380
|
+
if (hanTagHint) return hanTagHint;
|
|
1381
|
+
const hanLanguageHint = options.hanLanguageHint?.trim();
|
|
1382
|
+
if (hanLanguageHint) return hanLanguageHint;
|
|
1320
1383
|
}
|
|
1321
|
-
function
|
|
1322
|
-
|
|
1323
|
-
const
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1384
|
+
function compileLatinHintPattern(pattern, label) {
|
|
1385
|
+
const source = typeof pattern === "string" ? pattern : pattern.source;
|
|
1386
|
+
const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
|
|
1387
|
+
const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
|
|
1388
|
+
if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
|
|
1389
|
+
if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
|
|
1390
|
+
try {
|
|
1391
|
+
return new RegExp(source, flags);
|
|
1392
|
+
} catch (error) {
|
|
1393
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1394
|
+
throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
|
|
1395
|
+
}
|
|
1327
1396
|
}
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
const
|
|
1335
|
-
|
|
1336
|
-
const whitespaceRegex = /\s/u;
|
|
1337
|
-
const newlineChars = new Set([
|
|
1338
|
-
"\n",
|
|
1339
|
-
"\r",
|
|
1340
|
-
"\u2028",
|
|
1341
|
-
"\u2029"
|
|
1342
|
-
]);
|
|
1343
|
-
function createNonWordCollection() {
|
|
1397
|
+
function normalizeLatinHintPriority(priority, label) {
|
|
1398
|
+
if (priority === void 0) return 0;
|
|
1399
|
+
if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
|
|
1400
|
+
return priority;
|
|
1401
|
+
}
|
|
1402
|
+
function compileLatinHintRule(rule, order, label) {
|
|
1403
|
+
const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
|
|
1404
|
+
if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
|
|
1344
1405
|
return {
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
emoji: 0,
|
|
1350
|
-
symbols: 0,
|
|
1351
|
-
punctuation: 0
|
|
1352
|
-
}
|
|
1406
|
+
tag,
|
|
1407
|
+
pattern: compileLatinHintPattern(rule.pattern, label),
|
|
1408
|
+
priority: normalizeLatinHintPriority(rule.priority, label),
|
|
1409
|
+
order
|
|
1353
1410
|
};
|
|
1354
1411
|
}
|
|
1355
|
-
function
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
collection.punctuation.push(segment);
|
|
1367
|
-
collection.counts.punctuation += 1;
|
|
1368
|
-
}
|
|
1369
|
-
function addWhitespace(collection, segment) {
|
|
1370
|
-
let whitespace = collection.whitespace;
|
|
1371
|
-
let count = 0;
|
|
1372
|
-
for (const char of segment) {
|
|
1373
|
-
if (char === " ") {
|
|
1374
|
-
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1375
|
-
whitespace.spaces += 1;
|
|
1376
|
-
count += 1;
|
|
1377
|
-
continue;
|
|
1378
|
-
}
|
|
1379
|
-
if (char === " ") {
|
|
1380
|
-
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1381
|
-
whitespace.tabs += 1;
|
|
1382
|
-
count += 1;
|
|
1383
|
-
continue;
|
|
1384
|
-
}
|
|
1385
|
-
if (newlineChars.has(char)) {
|
|
1386
|
-
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1387
|
-
whitespace.newlines += 1;
|
|
1388
|
-
count += 1;
|
|
1389
|
-
continue;
|
|
1390
|
-
}
|
|
1391
|
-
if (whitespaceRegex.test(char)) {
|
|
1392
|
-
whitespace = whitespace ?? createWhitespaceCounts();
|
|
1393
|
-
whitespace.other += 1;
|
|
1394
|
-
count += 1;
|
|
1395
|
-
}
|
|
1412
|
+
function resolveLatinHintRules$1(options) {
|
|
1413
|
+
const useDefaultLatinHints = options.useDefaultLatinHints !== false;
|
|
1414
|
+
const customRules = options.latinHintRules ?? [];
|
|
1415
|
+
const combinedRules = [];
|
|
1416
|
+
for (let index = 0; index < customRules.length; index += 1) {
|
|
1417
|
+
const rule = customRules[index];
|
|
1418
|
+
if (!rule) continue;
|
|
1419
|
+
combinedRules.push({
|
|
1420
|
+
rule,
|
|
1421
|
+
label: `Invalid custom Latin hint rule at index ${index}`
|
|
1422
|
+
});
|
|
1396
1423
|
}
|
|
1397
|
-
if (
|
|
1398
|
-
|
|
1399
|
-
|
|
1424
|
+
if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
|
|
1425
|
+
const rule = DEFAULT_LATIN_HINT_RULES[index];
|
|
1426
|
+
if (!rule) continue;
|
|
1427
|
+
combinedRules.push({
|
|
1428
|
+
rule,
|
|
1429
|
+
label: `Invalid default Latin hint rule at index ${index}`
|
|
1430
|
+
});
|
|
1400
1431
|
}
|
|
1401
|
-
|
|
1432
|
+
const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
|
|
1433
|
+
resolvedRules.sort((left, right) => {
|
|
1434
|
+
if (left.priority !== right.priority) return right.priority - left.priority;
|
|
1435
|
+
return left.order - right.order;
|
|
1436
|
+
});
|
|
1437
|
+
return resolvedRules;
|
|
1402
1438
|
}
|
|
1403
|
-
function
|
|
1404
|
-
const
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1439
|
+
function resolveLocaleDetectContext(options = {}) {
|
|
1440
|
+
const latinHint = resolveLatinHint(options);
|
|
1441
|
+
const latinHintRules = resolveLatinHintRules$1(options);
|
|
1442
|
+
const latinLocales = new Set([DEFAULT_LOCALE]);
|
|
1443
|
+
for (const rule of latinHintRules) latinLocales.add(rule.tag);
|
|
1444
|
+
if (latinHint) latinLocales.add(latinHint);
|
|
1445
|
+
return {
|
|
1446
|
+
latinHint,
|
|
1447
|
+
hanHint: resolveHanHint(options),
|
|
1448
|
+
latinHintRules,
|
|
1449
|
+
latinLocales
|
|
1450
|
+
};
|
|
1409
1451
|
}
|
|
1410
|
-
function
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
}
|
|
1415
|
-
if (source.counts.symbols > 0) {
|
|
1416
|
-
appendAll(target.symbols, source.symbols);
|
|
1417
|
-
target.counts.symbols += source.counts.symbols;
|
|
1452
|
+
function detectLatinLocale(char, context) {
|
|
1453
|
+
for (const hint of context.latinHintRules) {
|
|
1454
|
+
hint.pattern.lastIndex = 0;
|
|
1455
|
+
if (hint.pattern.test(char)) return hint.tag;
|
|
1418
1456
|
}
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1457
|
+
return DEFAULT_LOCALE;
|
|
1458
|
+
}
|
|
1459
|
+
function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options), allowLatinLocaleCarry = true, allowJapaneseHanCarry = true) {
|
|
1460
|
+
if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
|
|
1461
|
+
if (regex.hangul.test(char)) return "ko";
|
|
1462
|
+
if (regex.arabic.test(char)) return "ar";
|
|
1463
|
+
if (regex.cyrillic.test(char)) return "ru";
|
|
1464
|
+
if (regex.devanagari.test(char)) return "hi";
|
|
1465
|
+
if (regex.thai.test(char)) return "th";
|
|
1466
|
+
if (regex.han.test(char)) {
|
|
1467
|
+
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
1468
|
+
return context.hanHint ?? DEFAULT_HAN_TAG;
|
|
1422
1469
|
}
|
|
1423
|
-
if (
|
|
1424
|
-
const
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
target.whitespace = whitespace;
|
|
1430
|
-
target.counts.whitespace = (target.counts.whitespace ?? 0) + source.counts.whitespace;
|
|
1470
|
+
if (regex.latin.test(char)) {
|
|
1471
|
+
const hintedLocale = detectLatinLocale(char, context);
|
|
1472
|
+
if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
|
|
1473
|
+
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
1474
|
+
if (context.latinHint) return context.latinHint;
|
|
1475
|
+
return DEFAULT_LOCALE;
|
|
1431
1476
|
}
|
|
1432
|
-
return
|
|
1433
|
-
}
|
|
1434
|
-
function createWhitespaceCounts() {
|
|
1435
|
-
return {
|
|
1436
|
-
spaces: 0,
|
|
1437
|
-
tabs: 0,
|
|
1438
|
-
newlines: 0,
|
|
1439
|
-
other: 0
|
|
1440
|
-
};
|
|
1477
|
+
return null;
|
|
1441
1478
|
}
|
|
1442
1479
|
|
|
1443
1480
|
//#endregion
|
|
1444
|
-
//#region src/wc/
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
const
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
nonWords: nonWords ?? void 0
|
|
1481
|
+
//#region src/wc/segment.ts
|
|
1482
|
+
const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
|
|
1483
|
+
const LATIN_PROMOTION_BREAK_REGEX = /[\s,.!?;:,、。!?;:.。、]/u;
|
|
1484
|
+
function segmentTextByLocale(text, options = {}) {
|
|
1485
|
+
const context = resolveLocaleDetectContext(options);
|
|
1486
|
+
const chunks = [];
|
|
1487
|
+
let currentLocale = DEFAULT_LOCALE;
|
|
1488
|
+
let buffer = "";
|
|
1489
|
+
let bufferHasScript = false;
|
|
1490
|
+
let sawCarryBoundary = false;
|
|
1491
|
+
const updateCarryBoundaryState = (detected, char) => {
|
|
1492
|
+
if (detected !== null) {
|
|
1493
|
+
sawCarryBoundary = false;
|
|
1494
|
+
return;
|
|
1495
|
+
}
|
|
1496
|
+
if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
|
|
1461
1497
|
};
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
if (part.isWordLike) {
|
|
1471
|
-
const count = countCharsForLocale(part.segment, chunk.locale);
|
|
1472
|
-
chars += count;
|
|
1473
|
-
wordChars += count;
|
|
1498
|
+
for (const char of text) {
|
|
1499
|
+
const detected = detectLocaleForChar(char, currentLocale, options, context, !sawCarryBoundary, !sawCarryBoundary);
|
|
1500
|
+
const targetLocale = detected ?? currentLocale;
|
|
1501
|
+
if (buffer === "") {
|
|
1502
|
+
currentLocale = targetLocale;
|
|
1503
|
+
buffer = char;
|
|
1504
|
+
bufferHasScript = detected !== null;
|
|
1505
|
+
updateCarryBoundaryState(detected, char);
|
|
1474
1506
|
continue;
|
|
1475
1507
|
}
|
|
1476
|
-
if (
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1508
|
+
if (detected !== null && !bufferHasScript) {
|
|
1509
|
+
currentLocale = targetLocale;
|
|
1510
|
+
buffer += char;
|
|
1511
|
+
bufferHasScript = true;
|
|
1512
|
+
updateCarryBoundaryState(detected, char);
|
|
1513
|
+
continue;
|
|
1514
|
+
}
|
|
1515
|
+
if (targetLocale !== currentLocale && detected !== null) {
|
|
1516
|
+
if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
|
|
1517
|
+
const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
|
|
1518
|
+
if (promotionBreakIndex === -1) {
|
|
1519
|
+
currentLocale = targetLocale;
|
|
1520
|
+
buffer += char;
|
|
1521
|
+
bufferHasScript = true;
|
|
1522
|
+
updateCarryBoundaryState(detected, char);
|
|
1523
|
+
continue;
|
|
1524
|
+
}
|
|
1525
|
+
const prefix = buffer.slice(0, promotionBreakIndex + 1);
|
|
1526
|
+
const suffix = buffer.slice(promotionBreakIndex + 1);
|
|
1527
|
+
if (prefix.length > 0) chunks.push({
|
|
1528
|
+
locale: currentLocale,
|
|
1529
|
+
text: prefix
|
|
1530
|
+
});
|
|
1531
|
+
currentLocale = targetLocale;
|
|
1532
|
+
buffer = `${suffix}${char}`;
|
|
1533
|
+
bufferHasScript = true;
|
|
1534
|
+
updateCarryBoundaryState(detected, char);
|
|
1535
|
+
continue;
|
|
1485
1536
|
}
|
|
1537
|
+
chunks.push({
|
|
1538
|
+
locale: currentLocale,
|
|
1539
|
+
text: buffer
|
|
1540
|
+
});
|
|
1541
|
+
currentLocale = targetLocale;
|
|
1542
|
+
buffer = char;
|
|
1543
|
+
bufferHasScript = true;
|
|
1544
|
+
updateCarryBoundaryState(detected, char);
|
|
1545
|
+
continue;
|
|
1486
1546
|
}
|
|
1547
|
+
buffer += char;
|
|
1548
|
+
if (detected !== null) bufferHasScript = true;
|
|
1549
|
+
updateCarryBoundaryState(detected, char);
|
|
1487
1550
|
}
|
|
1488
|
-
|
|
1489
|
-
locale:
|
|
1490
|
-
text:
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
nonWordChars,
|
|
1494
|
-
nonWords: nonWords ?? void 0
|
|
1495
|
-
};
|
|
1551
|
+
if (buffer.length > 0) chunks.push({
|
|
1552
|
+
locale: currentLocale,
|
|
1553
|
+
text: buffer
|
|
1554
|
+
});
|
|
1555
|
+
return mergeAdjacentChunks(chunks);
|
|
1496
1556
|
}
|
|
1497
|
-
function
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
if (existing) {
|
|
1503
|
-
existing.chars += chunk.chars;
|
|
1504
|
-
existing.wordChars += chunk.wordChars;
|
|
1505
|
-
existing.nonWordChars += chunk.nonWordChars;
|
|
1506
|
-
if (chunk.nonWords) {
|
|
1507
|
-
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
1508
|
-
mergeNonWordCollections(existing.nonWords, chunk.nonWords);
|
|
1509
|
-
}
|
|
1510
|
-
continue;
|
|
1511
|
-
}
|
|
1512
|
-
order.push(chunk.locale);
|
|
1513
|
-
map.set(chunk.locale, {
|
|
1514
|
-
locale: chunk.locale,
|
|
1515
|
-
chars: chunk.chars,
|
|
1516
|
-
wordChars: chunk.wordChars,
|
|
1517
|
-
nonWordChars: chunk.nonWordChars,
|
|
1518
|
-
nonWords: chunk.nonWords ? mergeNonWordCollections(createNonWordCollection(), chunk.nonWords) : void 0
|
|
1519
|
-
});
|
|
1557
|
+
function findLastLatinPromotionBreakIndex(buffer) {
|
|
1558
|
+
for (let index = buffer.length - 1; index >= 0; index -= 1) {
|
|
1559
|
+
const char = buffer[index];
|
|
1560
|
+
if (!char) continue;
|
|
1561
|
+
if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
|
|
1520
1562
|
}
|
|
1521
|
-
return
|
|
1563
|
+
return -1;
|
|
1522
1564
|
}
|
|
1523
|
-
function
|
|
1524
|
-
|
|
1525
|
-
const
|
|
1526
|
-
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1565
|
+
function mergeAdjacentChunks(chunks) {
|
|
1566
|
+
if (chunks.length === 0) return chunks;
|
|
1567
|
+
const merged = [];
|
|
1568
|
+
let last = chunks[0];
|
|
1569
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
1570
|
+
const chunk = chunks[i];
|
|
1571
|
+
if (chunk.locale === last.locale) last = {
|
|
1572
|
+
locale: last.locale,
|
|
1573
|
+
text: last.text + chunk.text
|
|
1574
|
+
};
|
|
1575
|
+
else {
|
|
1576
|
+
merged.push(last);
|
|
1577
|
+
last = chunk;
|
|
1532
1578
|
}
|
|
1533
|
-
order.push(chunk.locale);
|
|
1534
|
-
map.set(chunk.locale, {
|
|
1535
|
-
locale: chunk.locale,
|
|
1536
|
-
words: chunk.words,
|
|
1537
|
-
segments: [...chunk.segments]
|
|
1538
|
-
});
|
|
1539
1579
|
}
|
|
1540
|
-
|
|
1580
|
+
merged.push(last);
|
|
1581
|
+
return merged;
|
|
1541
1582
|
}
|
|
1542
1583
|
|
|
1543
1584
|
//#endregion
|
|
1544
|
-
//#region src/wc/
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
"char
|
|
1559
|
-
|
|
1560
|
-
const
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1585
|
+
//#region src/wc/wc.ts
|
|
1586
|
+
function wordCounter(text, options = {}) {
|
|
1587
|
+
const mode = resolveMode(options.mode, "chunk");
|
|
1588
|
+
const collectNonWords = Boolean(options.nonWords);
|
|
1589
|
+
const includeWhitespace = Boolean(options.includeWhitespace);
|
|
1590
|
+
const chunks = segmentTextByLocale(text, {
|
|
1591
|
+
latinLanguageHint: options.latinLanguageHint,
|
|
1592
|
+
latinTagHint: options.latinTagHint,
|
|
1593
|
+
latinLocaleHint: options.latinLocaleHint,
|
|
1594
|
+
latinHintRules: options.latinHintRules,
|
|
1595
|
+
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1596
|
+
hanLanguageHint: options.hanLanguageHint,
|
|
1597
|
+
hanTagHint: options.hanTagHint
|
|
1598
|
+
});
|
|
1599
|
+
if (mode === "char" || mode === "char-collector") {
|
|
1600
|
+
const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
|
|
1601
|
+
const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
|
|
1602
|
+
const counts = collectNonWords ? {
|
|
1603
|
+
words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
|
|
1604
|
+
nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
|
|
1605
|
+
total
|
|
1606
|
+
} : void 0;
|
|
1607
|
+
if (mode === "char") return {
|
|
1608
|
+
total,
|
|
1609
|
+
counts,
|
|
1610
|
+
breakdown: {
|
|
1611
|
+
mode,
|
|
1612
|
+
items: analyzed.map((chunk) => ({
|
|
1613
|
+
locale: chunk.locale,
|
|
1614
|
+
text: chunk.text,
|
|
1615
|
+
chars: chunk.chars,
|
|
1616
|
+
nonWords: chunk.nonWords
|
|
1617
|
+
}))
|
|
1618
|
+
}
|
|
1619
|
+
};
|
|
1620
|
+
return {
|
|
1621
|
+
total,
|
|
1622
|
+
counts,
|
|
1623
|
+
breakdown: {
|
|
1624
|
+
mode,
|
|
1625
|
+
items: aggregateCharsByLocale(analyzed).map((chunk) => ({
|
|
1626
|
+
locale: chunk.locale,
|
|
1627
|
+
chars: chunk.chars,
|
|
1628
|
+
nonWords: chunk.nonWords
|
|
1629
|
+
}))
|
|
1630
|
+
}
|
|
1631
|
+
};
|
|
1632
|
+
}
|
|
1633
|
+
const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
|
|
1634
|
+
const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
|
|
1635
|
+
const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
|
|
1636
|
+
if (!chunk.nonWords) return sum;
|
|
1637
|
+
return sum + getNonWordTotal(chunk.nonWords);
|
|
1638
|
+
}, 0) : 0;
|
|
1639
|
+
const total = analyzed.reduce((sum, chunk) => {
|
|
1640
|
+
let chunkTotal = chunk.words;
|
|
1641
|
+
if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
|
|
1642
|
+
return sum + chunkTotal;
|
|
1643
|
+
}, 0);
|
|
1644
|
+
const counts = collectNonWords ? {
|
|
1645
|
+
words: wordsTotal,
|
|
1646
|
+
nonWords: nonWordsTotal,
|
|
1647
|
+
total
|
|
1648
|
+
} : void 0;
|
|
1649
|
+
if (mode === "segments") return {
|
|
1650
|
+
total,
|
|
1651
|
+
counts,
|
|
1652
|
+
breakdown: {
|
|
1653
|
+
mode,
|
|
1654
|
+
items: analyzed.map((chunk) => ({
|
|
1655
|
+
locale: chunk.locale,
|
|
1656
|
+
text: chunk.text,
|
|
1657
|
+
words: chunk.words,
|
|
1658
|
+
segments: chunk.segments,
|
|
1659
|
+
nonWords: chunk.nonWords
|
|
1660
|
+
}))
|
|
1584
1661
|
}
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1662
|
+
};
|
|
1663
|
+
if (mode === "collector") return {
|
|
1664
|
+
total,
|
|
1665
|
+
counts,
|
|
1666
|
+
breakdown: {
|
|
1667
|
+
mode,
|
|
1668
|
+
items: aggregateByLocale(analyzed),
|
|
1669
|
+
nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
|
|
1588
1670
|
}
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1671
|
+
};
|
|
1672
|
+
return {
|
|
1673
|
+
total,
|
|
1674
|
+
counts,
|
|
1675
|
+
breakdown: {
|
|
1676
|
+
mode,
|
|
1677
|
+
items: analyzed.map((chunk) => ({
|
|
1678
|
+
locale: chunk.locale,
|
|
1679
|
+
text: chunk.text,
|
|
1680
|
+
words: chunk.words,
|
|
1681
|
+
nonWords: chunk.nonWords
|
|
1682
|
+
}))
|
|
1683
|
+
}
|
|
1684
|
+
};
|
|
1596
1685
|
}
|
|
1597
|
-
function
|
|
1598
|
-
|
|
1599
|
-
const normalized = input.trim().toLowerCase();
|
|
1600
|
-
const direct = MODE_ALIASES[normalized];
|
|
1601
|
-
if (direct) return direct;
|
|
1602
|
-
if (isComposedCharCollectorFromTokens(normalized)) return "char-collector";
|
|
1603
|
-
const compact = collapseSeparators(normalized);
|
|
1604
|
-
if (isComposedCharCollectorCompact(compact)) return "char-collector";
|
|
1605
|
-
return MODE_ALIASES[compact] ?? null;
|
|
1686
|
+
function getNonWordTotal(nonWords) {
|
|
1687
|
+
return nonWords.counts.emoji + nonWords.counts.symbols + nonWords.counts.punctuation + (nonWords.counts.whitespace ?? 0);
|
|
1606
1688
|
}
|
|
1607
|
-
function
|
|
1608
|
-
|
|
1689
|
+
function collectNonWordsAggregate(analyzed, enabled) {
|
|
1690
|
+
if (!enabled) return;
|
|
1691
|
+
const collection = createNonWordCollection();
|
|
1692
|
+
for (const chunk of analyzed) {
|
|
1693
|
+
if (!chunk.nonWords) continue;
|
|
1694
|
+
mergeNonWordCollections(collection, chunk.nonWords);
|
|
1695
|
+
}
|
|
1696
|
+
return collection;
|
|
1609
1697
|
}
|
|
1610
1698
|
|
|
1611
1699
|
//#endregion
|
|
1612
|
-
//#region src/wc/
|
|
1613
|
-
|
|
1614
|
-
{
|
|
1615
|
-
tag: "de",
|
|
1616
|
-
pattern: "[äöüÄÖÜß]"
|
|
1617
|
-
},
|
|
1618
|
-
{
|
|
1619
|
-
tag: "es",
|
|
1620
|
-
pattern: "[ñÑ¿¡]"
|
|
1621
|
-
},
|
|
1622
|
-
{
|
|
1623
|
-
tag: "pt",
|
|
1624
|
-
pattern: "[ãõÃÕ]"
|
|
1625
|
-
},
|
|
1626
|
-
{
|
|
1627
|
-
tag: "fr",
|
|
1628
|
-
pattern: "[œŒæÆ]"
|
|
1629
|
-
},
|
|
1630
|
-
{
|
|
1631
|
-
tag: "pl",
|
|
1632
|
-
pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
|
|
1633
|
-
},
|
|
1634
|
-
{
|
|
1635
|
-
tag: "tr",
|
|
1636
|
-
pattern: "[ıİğĞşŞ]"
|
|
1637
|
-
},
|
|
1638
|
-
{
|
|
1639
|
-
tag: "ro",
|
|
1640
|
-
pattern: "[ăĂâÂîÎșȘțȚ]"
|
|
1641
|
-
},
|
|
1642
|
-
{
|
|
1643
|
-
tag: "hu",
|
|
1644
|
-
pattern: "[őŐűŰ]"
|
|
1645
|
-
},
|
|
1646
|
-
{
|
|
1647
|
-
tag: "is",
|
|
1648
|
-
pattern: "[ðÐþÞ]"
|
|
1649
|
-
}
|
|
1650
|
-
];
|
|
1651
|
-
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
1700
|
+
//#region src/wc/index.ts
|
|
1701
|
+
var wc_default = wordCounter;
|
|
1652
1702
|
|
|
1653
1703
|
//#endregion
|
|
1654
|
-
//#region src/
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
hiragana: /\p{Script=Hiragana}/u,
|
|
1660
|
-
katakana: /\p{Script=Katakana}/u,
|
|
1661
|
-
hangul: /\p{Script=Hangul}/u,
|
|
1662
|
-
han: /\p{Script=Han}/u,
|
|
1663
|
-
latin: /\p{Script=Latin}/u,
|
|
1664
|
-
arabic: /\p{Script=Arabic}/u,
|
|
1665
|
-
cyrillic: /\p{Script=Cyrillic}/u,
|
|
1666
|
-
devanagari: /\p{Script=Devanagari}/u,
|
|
1667
|
-
thai: /\p{Script=Thai}/u
|
|
1668
|
-
};
|
|
1669
|
-
const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
|
|
1670
|
-
function isLatinLocale(locale, context) {
|
|
1671
|
-
if (context) return context.latinLocales.has(locale);
|
|
1672
|
-
return defaultLatinLocales.has(locale);
|
|
1673
|
-
}
|
|
1674
|
-
function resolveLatinHint(options) {
|
|
1675
|
-
const latinTagHint = options.latinTagHint?.trim();
|
|
1676
|
-
if (latinTagHint) return latinTagHint;
|
|
1677
|
-
const latinLanguageHint = options.latinLanguageHint?.trim();
|
|
1678
|
-
if (latinLanguageHint) return latinLanguageHint;
|
|
1679
|
-
const latinLocaleHint = options.latinLocaleHint?.trim();
|
|
1680
|
-
if (latinLocaleHint) return latinLocaleHint;
|
|
1681
|
-
}
|
|
1682
|
-
function resolveHanHint(options) {
|
|
1683
|
-
const hanTagHint = options.hanTagHint?.trim();
|
|
1684
|
-
if (hanTagHint) return hanTagHint;
|
|
1685
|
-
const hanLanguageHint = options.hanLanguageHint?.trim();
|
|
1686
|
-
if (hanLanguageHint) return hanLanguageHint;
|
|
1687
|
-
}
|
|
1688
|
-
function compileLatinHintPattern(pattern, label) {
|
|
1689
|
-
const source = typeof pattern === "string" ? pattern : pattern.source;
|
|
1690
|
-
const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
|
|
1691
|
-
const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
|
|
1692
|
-
if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
|
|
1693
|
-
if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
|
|
1704
|
+
//#region src/markdown/section-count.ts
|
|
1705
|
+
function normalizeText(value) {
|
|
1706
|
+
if (value == null) return "";
|
|
1707
|
+
if (typeof value === "string") return value;
|
|
1708
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
1694
1709
|
try {
|
|
1695
|
-
return
|
|
1696
|
-
} catch
|
|
1697
|
-
|
|
1698
|
-
throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
|
|
1710
|
+
return JSON.stringify(value);
|
|
1711
|
+
} catch {
|
|
1712
|
+
return String(value);
|
|
1699
1713
|
}
|
|
1700
1714
|
}
|
|
1701
|
-
function
|
|
1702
|
-
if (
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
tag,
|
|
1711
|
-
pattern: compileLatinHintPattern(rule.pattern, label),
|
|
1712
|
-
priority: normalizeLatinHintPriority(rule.priority, label),
|
|
1713
|
-
order
|
|
1714
|
-
};
|
|
1715
|
-
}
|
|
1716
|
-
function resolveLatinHintRules$1(options) {
|
|
1717
|
-
const useDefaultLatinHints = options.useDefaultLatinHints !== false;
|
|
1718
|
-
const customRules = options.latinHintRules ?? [];
|
|
1719
|
-
const combinedRules = [];
|
|
1720
|
-
for (let index = 0; index < customRules.length; index += 1) {
|
|
1721
|
-
const rule = customRules[index];
|
|
1722
|
-
if (!rule) continue;
|
|
1723
|
-
combinedRules.push({
|
|
1724
|
-
rule,
|
|
1725
|
-
label: `Invalid custom Latin hint rule at index ${index}`
|
|
1726
|
-
});
|
|
1727
|
-
}
|
|
1728
|
-
if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
|
|
1729
|
-
const rule = DEFAULT_LATIN_HINT_RULES[index];
|
|
1730
|
-
if (!rule) continue;
|
|
1731
|
-
combinedRules.push({
|
|
1732
|
-
rule,
|
|
1733
|
-
label: `Invalid default Latin hint rule at index ${index}`
|
|
1734
|
-
});
|
|
1735
|
-
}
|
|
1736
|
-
const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
|
|
1737
|
-
resolvedRules.sort((left, right) => {
|
|
1738
|
-
if (left.priority !== right.priority) return right.priority - left.priority;
|
|
1739
|
-
return left.order - right.order;
|
|
1715
|
+
function buildPerKeyItems(data, mode, options) {
|
|
1716
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return [];
|
|
1717
|
+
return Object.entries(data).map(([key, value]) => {
|
|
1718
|
+
const valueText = normalizeText(value);
|
|
1719
|
+
return {
|
|
1720
|
+
name: key,
|
|
1721
|
+
source: "frontmatter",
|
|
1722
|
+
result: wc_default(valueText ? `${key}: ${valueText}` : key, options)
|
|
1723
|
+
};
|
|
1740
1724
|
});
|
|
1741
|
-
return resolvedRules;
|
|
1742
1725
|
}
|
|
1743
|
-
function
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
return {
|
|
1750
|
-
latinHint,
|
|
1751
|
-
hanHint: resolveHanHint(options),
|
|
1752
|
-
latinHintRules,
|
|
1753
|
-
latinLocales
|
|
1754
|
-
};
|
|
1726
|
+
function buildSingleItem(name, text, mode, options, source) {
|
|
1727
|
+
return [{
|
|
1728
|
+
name,
|
|
1729
|
+
source,
|
|
1730
|
+
result: wc_default(text, options)
|
|
1731
|
+
}];
|
|
1755
1732
|
}
|
|
1756
|
-
function
|
|
1757
|
-
|
|
1758
|
-
hint.pattern.lastIndex = 0;
|
|
1759
|
-
if (hint.pattern.test(char)) return hint.tag;
|
|
1760
|
-
}
|
|
1761
|
-
return DEFAULT_LOCALE;
|
|
1733
|
+
function sumTotals(items) {
|
|
1734
|
+
return items.reduce((sum, item) => sum + item.result.total, 0);
|
|
1762
1735
|
}
|
|
1763
|
-
function
|
|
1764
|
-
|
|
1765
|
-
if (
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
1778
|
-
if (context.latinHint) return context.latinHint;
|
|
1779
|
-
return DEFAULT_LOCALE;
|
|
1736
|
+
function countSections(input, section, options = {}) {
|
|
1737
|
+
const mode = options.mode ?? "chunk";
|
|
1738
|
+
if (section === "all") {
|
|
1739
|
+
const result = wc_default(input, options);
|
|
1740
|
+
return {
|
|
1741
|
+
section,
|
|
1742
|
+
total: result.total,
|
|
1743
|
+
frontmatterType: null,
|
|
1744
|
+
items: [{
|
|
1745
|
+
name: "all",
|
|
1746
|
+
source: "content",
|
|
1747
|
+
result
|
|
1748
|
+
}]
|
|
1749
|
+
};
|
|
1780
1750
|
}
|
|
1781
|
-
|
|
1751
|
+
const parsed = parseMarkdown(input);
|
|
1752
|
+
const frontmatterText = parsed.frontmatter ?? "";
|
|
1753
|
+
const contentText = parsed.content ?? "";
|
|
1754
|
+
let items = [];
|
|
1755
|
+
if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
|
|
1756
|
+
else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
|
|
1757
|
+
else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1758
|
+
else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
|
|
1759
|
+
else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1760
|
+
return {
|
|
1761
|
+
section,
|
|
1762
|
+
total: sumTotals(items),
|
|
1763
|
+
frontmatterType: parsed.frontmatterType,
|
|
1764
|
+
items
|
|
1765
|
+
};
|
|
1782
1766
|
}
|
|
1783
1767
|
|
|
1784
1768
|
//#endregion
|
|
1785
|
-
//#region src/
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
const
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1769
|
+
//#region src/cli/batch/aggregate.ts
|
|
1770
|
+
function mergeWordCounterResult(left, right, preserveCollectorSegments) {
|
|
1771
|
+
if (left.breakdown.mode !== right.breakdown.mode) throw new Error("Cannot merge different breakdown modes.");
|
|
1772
|
+
const total = left.total + right.total;
|
|
1773
|
+
const counts = left.counts || right.counts ? {
|
|
1774
|
+
words: (left.counts?.words ?? 0) + (right.counts?.words ?? 0),
|
|
1775
|
+
nonWords: (left.counts?.nonWords ?? 0) + (right.counts?.nonWords ?? 0),
|
|
1776
|
+
total: (left.counts?.total ?? 0) + (right.counts?.total ?? 0)
|
|
1777
|
+
} : void 0;
|
|
1778
|
+
if (left.breakdown.mode === "chunk" && right.breakdown.mode === "chunk") return {
|
|
1779
|
+
total,
|
|
1780
|
+
counts,
|
|
1781
|
+
breakdown: {
|
|
1782
|
+
mode: "chunk",
|
|
1783
|
+
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1799
1784
|
}
|
|
1800
|
-
if (HARD_BOUNDARY_REGEX.test(char)) sawCarryBoundary = true;
|
|
1801
1785
|
};
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
bufferHasScript = detected !== null;
|
|
1809
|
-
updateCarryBoundaryState(detected, char);
|
|
1810
|
-
continue;
|
|
1786
|
+
if (left.breakdown.mode === "segments" && right.breakdown.mode === "segments") return {
|
|
1787
|
+
total,
|
|
1788
|
+
counts,
|
|
1789
|
+
breakdown: {
|
|
1790
|
+
mode: "segments",
|
|
1791
|
+
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1811
1792
|
}
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1793
|
+
};
|
|
1794
|
+
if (left.breakdown.mode === "char" && right.breakdown.mode === "char") return {
|
|
1795
|
+
total,
|
|
1796
|
+
counts,
|
|
1797
|
+
breakdown: {
|
|
1798
|
+
mode: "char",
|
|
1799
|
+
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1818
1800
|
}
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1801
|
+
};
|
|
1802
|
+
if (left.breakdown.mode === "char-collector" && right.breakdown.mode === "char-collector") {
|
|
1803
|
+
const localeOrder = [];
|
|
1804
|
+
const mergedByLocale = /* @__PURE__ */ new Map();
|
|
1805
|
+
const addItems = (items) => {
|
|
1806
|
+
for (const item of items) {
|
|
1807
|
+
const existing = mergedByLocale.get(item.locale);
|
|
1808
|
+
if (existing) {
|
|
1809
|
+
existing.chars += item.chars;
|
|
1810
|
+
if (item.nonWords) {
|
|
1811
|
+
if (!existing.nonWords) existing.nonWords = createNonWordCollection();
|
|
1812
|
+
mergeNonWordCollections(existing.nonWords, item.nonWords);
|
|
1813
|
+
}
|
|
1827
1814
|
continue;
|
|
1828
1815
|
}
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1816
|
+
localeOrder.push(item.locale);
|
|
1817
|
+
mergedByLocale.set(item.locale, {
|
|
1818
|
+
locale: item.locale,
|
|
1819
|
+
chars: item.chars,
|
|
1820
|
+
nonWords: item.nonWords ? mergeNonWordCollections(createNonWordCollection(), item.nonWords) : void 0
|
|
1834
1821
|
});
|
|
1835
|
-
currentLocale = targetLocale;
|
|
1836
|
-
buffer = `${suffix}${char}`;
|
|
1837
|
-
bufferHasScript = true;
|
|
1838
|
-
updateCarryBoundaryState(detected, char);
|
|
1839
|
-
continue;
|
|
1840
1822
|
}
|
|
1841
|
-
chunks.push({
|
|
1842
|
-
locale: currentLocale,
|
|
1843
|
-
text: buffer
|
|
1844
|
-
});
|
|
1845
|
-
currentLocale = targetLocale;
|
|
1846
|
-
buffer = char;
|
|
1847
|
-
bufferHasScript = true;
|
|
1848
|
-
updateCarryBoundaryState(detected, char);
|
|
1849
|
-
continue;
|
|
1850
|
-
}
|
|
1851
|
-
buffer += char;
|
|
1852
|
-
if (detected !== null) bufferHasScript = true;
|
|
1853
|
-
updateCarryBoundaryState(detected, char);
|
|
1854
|
-
}
|
|
1855
|
-
if (buffer.length > 0) chunks.push({
|
|
1856
|
-
locale: currentLocale,
|
|
1857
|
-
text: buffer
|
|
1858
|
-
});
|
|
1859
|
-
return mergeAdjacentChunks(chunks);
|
|
1860
|
-
}
|
|
1861
|
-
function findLastLatinPromotionBreakIndex(buffer) {
|
|
1862
|
-
for (let index = buffer.length - 1; index >= 0; index -= 1) {
|
|
1863
|
-
const char = buffer[index];
|
|
1864
|
-
if (!char) continue;
|
|
1865
|
-
if (LATIN_PROMOTION_BREAK_REGEX.test(char)) return index;
|
|
1866
|
-
}
|
|
1867
|
-
return -1;
|
|
1868
|
-
}
|
|
1869
|
-
function mergeAdjacentChunks(chunks) {
|
|
1870
|
-
if (chunks.length === 0) return chunks;
|
|
1871
|
-
const merged = [];
|
|
1872
|
-
let last = chunks[0];
|
|
1873
|
-
for (let i = 1; i < chunks.length; i++) {
|
|
1874
|
-
const chunk = chunks[i];
|
|
1875
|
-
if (chunk.locale === last.locale) last = {
|
|
1876
|
-
locale: last.locale,
|
|
1877
|
-
text: last.text + chunk.text
|
|
1878
1823
|
};
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
}
|
|
1883
|
-
}
|
|
1884
|
-
merged.push(last);
|
|
1885
|
-
return merged;
|
|
1886
|
-
}
|
|
1887
|
-
|
|
1888
|
-
//#endregion
|
|
1889
|
-
//#region src/wc/wc.ts
|
|
1890
|
-
function wordCounter(text, options = {}) {
|
|
1891
|
-
const mode = resolveMode(options.mode, "chunk");
|
|
1892
|
-
const collectNonWords = Boolean(options.nonWords);
|
|
1893
|
-
const includeWhitespace = Boolean(options.includeWhitespace);
|
|
1894
|
-
const chunks = segmentTextByLocale(text, {
|
|
1895
|
-
latinLanguageHint: options.latinLanguageHint,
|
|
1896
|
-
latinTagHint: options.latinTagHint,
|
|
1897
|
-
latinLocaleHint: options.latinLocaleHint,
|
|
1898
|
-
latinHintRules: options.latinHintRules,
|
|
1899
|
-
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1900
|
-
hanLanguageHint: options.hanLanguageHint,
|
|
1901
|
-
hanTagHint: options.hanTagHint
|
|
1902
|
-
});
|
|
1903
|
-
if (mode === "char" || mode === "char-collector") {
|
|
1904
|
-
const analyzed = chunks.map((chunk) => analyzeCharChunk(chunk, collectNonWords, includeWhitespace));
|
|
1905
|
-
const total = analyzed.reduce((sum, chunk) => sum + chunk.chars, 0);
|
|
1906
|
-
const counts = collectNonWords ? {
|
|
1907
|
-
words: analyzed.reduce((sum, chunk) => sum + chunk.wordChars, 0),
|
|
1908
|
-
nonWords: analyzed.reduce((sum, chunk) => sum + chunk.nonWordChars, 0),
|
|
1909
|
-
total
|
|
1910
|
-
} : void 0;
|
|
1911
|
-
if (mode === "char") return {
|
|
1824
|
+
addItems(left.breakdown.items);
|
|
1825
|
+
addItems(right.breakdown.items);
|
|
1826
|
+
return {
|
|
1912
1827
|
total,
|
|
1913
1828
|
counts,
|
|
1914
1829
|
breakdown: {
|
|
1915
|
-
mode,
|
|
1916
|
-
items:
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1830
|
+
mode: "char-collector",
|
|
1831
|
+
items: localeOrder.map((locale) => {
|
|
1832
|
+
const value = mergedByLocale.get(locale);
|
|
1833
|
+
if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
|
|
1834
|
+
return value;
|
|
1835
|
+
})
|
|
1836
|
+
}
|
|
1837
|
+
};
|
|
1838
|
+
}
|
|
1839
|
+
if (left.breakdown.mode === "collector" && right.breakdown.mode === "collector") {
|
|
1840
|
+
const localeOrder = [];
|
|
1841
|
+
const mergedByLocale = /* @__PURE__ */ new Map();
|
|
1842
|
+
const addItems = (items) => {
|
|
1843
|
+
for (const item of items) {
|
|
1844
|
+
const existing = mergedByLocale.get(item.locale);
|
|
1845
|
+
if (existing) {
|
|
1846
|
+
existing.words += item.words;
|
|
1847
|
+
if (preserveCollectorSegments) appendAll(existing.segments, item.segments);
|
|
1848
|
+
continue;
|
|
1849
|
+
}
|
|
1850
|
+
localeOrder.push(item.locale);
|
|
1851
|
+
mergedByLocale.set(item.locale, {
|
|
1852
|
+
locale: item.locale,
|
|
1853
|
+
words: item.words,
|
|
1854
|
+
segments: preserveCollectorSegments ? [...item.segments] : []
|
|
1855
|
+
});
|
|
1922
1856
|
}
|
|
1923
1857
|
};
|
|
1858
|
+
addItems(left.breakdown.items);
|
|
1859
|
+
addItems(right.breakdown.items);
|
|
1860
|
+
let mergedNonWords;
|
|
1861
|
+
if (left.breakdown.nonWords || right.breakdown.nonWords) {
|
|
1862
|
+
mergedNonWords = createNonWordCollection();
|
|
1863
|
+
if (left.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, left.breakdown.nonWords);
|
|
1864
|
+
if (right.breakdown.nonWords) mergeNonWordCollections(mergedNonWords, right.breakdown.nonWords);
|
|
1865
|
+
}
|
|
1924
1866
|
return {
|
|
1925
1867
|
total,
|
|
1926
1868
|
counts,
|
|
1927
1869
|
breakdown: {
|
|
1928
|
-
mode,
|
|
1929
|
-
items:
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
})
|
|
1870
|
+
mode: "collector",
|
|
1871
|
+
items: localeOrder.map((locale) => {
|
|
1872
|
+
const value = mergedByLocale.get(locale);
|
|
1873
|
+
if (!value) throw new Error(`Missing collector entry for locale: ${locale}`);
|
|
1874
|
+
return value;
|
|
1875
|
+
}),
|
|
1876
|
+
nonWords: mergedNonWords
|
|
1934
1877
|
}
|
|
1935
1878
|
};
|
|
1936
1879
|
}
|
|
1937
|
-
const analyzed = chunks.map((chunk) => analyzeChunk(chunk, collectNonWords, includeWhitespace));
|
|
1938
|
-
const wordsTotal = analyzed.reduce((sum, chunk) => sum + chunk.words, 0);
|
|
1939
|
-
const nonWordsTotal = collectNonWords ? analyzed.reduce((sum, chunk) => {
|
|
1940
|
-
if (!chunk.nonWords) return sum;
|
|
1941
|
-
return sum + getNonWordTotal(chunk.nonWords);
|
|
1942
|
-
}, 0) : 0;
|
|
1943
|
-
const total = analyzed.reduce((sum, chunk) => {
|
|
1944
|
-
let chunkTotal = chunk.words;
|
|
1945
|
-
if (collectNonWords && chunk.nonWords) chunkTotal += getNonWordTotal(chunk.nonWords);
|
|
1946
|
-
return sum + chunkTotal;
|
|
1947
|
-
}, 0);
|
|
1948
|
-
const counts = collectNonWords ? {
|
|
1949
|
-
words: wordsTotal,
|
|
1950
|
-
nonWords: nonWordsTotal,
|
|
1951
|
-
total
|
|
1952
|
-
} : void 0;
|
|
1953
|
-
if (mode === "segments") return {
|
|
1954
|
-
total,
|
|
1955
|
-
counts,
|
|
1956
|
-
breakdown: {
|
|
1957
|
-
mode,
|
|
1958
|
-
items: analyzed.map((chunk) => ({
|
|
1959
|
-
locale: chunk.locale,
|
|
1960
|
-
text: chunk.text,
|
|
1961
|
-
words: chunk.words,
|
|
1962
|
-
segments: chunk.segments,
|
|
1963
|
-
nonWords: chunk.nonWords
|
|
1964
|
-
}))
|
|
1965
|
-
}
|
|
1966
|
-
};
|
|
1967
|
-
if (mode === "collector") return {
|
|
1968
|
-
total,
|
|
1969
|
-
counts,
|
|
1970
|
-
breakdown: {
|
|
1971
|
-
mode,
|
|
1972
|
-
items: aggregateByLocale(analyzed),
|
|
1973
|
-
nonWords: collectNonWordsAggregate(analyzed, collectNonWords)
|
|
1974
|
-
}
|
|
1975
|
-
};
|
|
1976
1880
|
return {
|
|
1977
1881
|
total,
|
|
1978
1882
|
counts,
|
|
1979
|
-
breakdown:
|
|
1980
|
-
mode,
|
|
1981
|
-
items: analyzed.map((chunk) => ({
|
|
1982
|
-
locale: chunk.locale,
|
|
1983
|
-
text: chunk.text,
|
|
1984
|
-
words: chunk.words,
|
|
1985
|
-
nonWords: chunk.nonWords
|
|
1986
|
-
}))
|
|
1987
|
-
}
|
|
1883
|
+
breakdown: left.breakdown
|
|
1988
1884
|
};
|
|
1989
1885
|
}
|
|
1990
|
-
function
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
if (!
|
|
1998
|
-
|
|
1886
|
+
function aggregateWordCounterResults(results, preserveCollectorSegments) {
|
|
1887
|
+
if (results.length === 0) return wc_default("", { mode: "chunk" });
|
|
1888
|
+
const first = results[0];
|
|
1889
|
+
if (!first) return wc_default("", { mode: "chunk" });
|
|
1890
|
+
let aggregate = first;
|
|
1891
|
+
for (let index = 1; index < results.length; index += 1) {
|
|
1892
|
+
const current = results[index];
|
|
1893
|
+
if (!current) continue;
|
|
1894
|
+
aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
|
|
1999
1895
|
}
|
|
2000
|
-
return
|
|
1896
|
+
return aggregate;
|
|
2001
1897
|
}
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
//#region src/wc/index.ts
|
|
2005
|
-
var wc_default = wordCounter;
|
|
2006
|
-
|
|
2007
|
-
//#endregion
|
|
2008
|
-
//#region src/markdown/section-count.ts
|
|
2009
|
-
function normalizeText(value) {
|
|
2010
|
-
if (value == null) return "";
|
|
2011
|
-
if (typeof value === "string") return value;
|
|
2012
|
-
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
2013
|
-
try {
|
|
2014
|
-
return JSON.stringify(value);
|
|
2015
|
-
} catch {
|
|
2016
|
-
return String(value);
|
|
2017
|
-
}
|
|
1898
|
+
function buildSectionKey(name, source) {
|
|
1899
|
+
return `${source}:${name}`;
|
|
2018
1900
|
}
|
|
2019
|
-
function
|
|
2020
|
-
if (
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
1901
|
+
function aggregateSectionedResults(results, preserveCollectorSegments) {
|
|
1902
|
+
if (results.length === 0) return {
|
|
1903
|
+
section: "all",
|
|
1904
|
+
total: 0,
|
|
1905
|
+
frontmatterType: null,
|
|
1906
|
+
items: []
|
|
1907
|
+
};
|
|
1908
|
+
const section = results[0]?.section ?? "all";
|
|
1909
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
1910
|
+
let total = 0;
|
|
1911
|
+
let frontmatterType = results[0]?.frontmatterType ?? null;
|
|
1912
|
+
for (const result of results) {
|
|
1913
|
+
total += result.total;
|
|
1914
|
+
if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
|
|
1915
|
+
if (frontmatterType !== result.frontmatterType) frontmatterType = null;
|
|
1916
|
+
for (const item of result.items) {
|
|
1917
|
+
const key = buildSectionKey(item.name, item.source);
|
|
1918
|
+
const existing = grouped.get(key);
|
|
1919
|
+
if (!existing) {
|
|
1920
|
+
grouped.set(key, {
|
|
1921
|
+
name: item.name,
|
|
1922
|
+
source: item.source,
|
|
1923
|
+
items: [item.result]
|
|
1924
|
+
});
|
|
1925
|
+
continue;
|
|
1926
|
+
}
|
|
1927
|
+
existing.items.push(item.result);
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
|
|
1931
|
+
const items = [...grouped.values()].sort((left, right) => {
|
|
1932
|
+
const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
|
|
1933
|
+
if (sourceDiff !== 0) return sourceDiff;
|
|
1934
|
+
return left.name.localeCompare(right.name);
|
|
1935
|
+
}).map((entry) => ({
|
|
1936
|
+
name: entry.name,
|
|
1937
|
+
source: entry.source,
|
|
1938
|
+
result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
|
|
1939
|
+
}));
|
|
1940
|
+
return {
|
|
1941
|
+
section,
|
|
1942
|
+
total,
|
|
1943
|
+
frontmatterType,
|
|
1944
|
+
items
|
|
1945
|
+
};
|
|
2029
1946
|
}
|
|
2030
|
-
function
|
|
2031
|
-
return
|
|
2032
|
-
|
|
2033
|
-
source,
|
|
2034
|
-
result: wc_default(text, options)
|
|
2035
|
-
}];
|
|
1947
|
+
function stripCollectorSegmentsFromWordCounterResult(result) {
|
|
1948
|
+
if (result.breakdown.mode !== "collector") return;
|
|
1949
|
+
for (const item of result.breakdown.items) item.segments = [];
|
|
2036
1950
|
}
|
|
2037
|
-
function
|
|
2038
|
-
|
|
1951
|
+
function stripCollectorSegmentsFromSectionedResult(result) {
|
|
1952
|
+
for (const item of result.items) stripCollectorSegmentsFromWordCounterResult(item.result);
|
|
2039
1953
|
}
|
|
2040
|
-
function
|
|
2041
|
-
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
1954
|
+
function compactCollectorSegmentsInCountResult(result) {
|
|
1955
|
+
if ("section" in result) {
|
|
1956
|
+
stripCollectorSegmentsFromSectionedResult(result);
|
|
1957
|
+
return;
|
|
1958
|
+
}
|
|
1959
|
+
stripCollectorSegmentsFromWordCounterResult(result);
|
|
1960
|
+
}
|
|
1961
|
+
function finalizeBatchSummaryFromFileResults(files, section, wcOptions, options = {}) {
|
|
1962
|
+
const preserveCollectorSegments = options.preserveCollectorSegments ?? true;
|
|
1963
|
+
if (!preserveCollectorSegments) for (const file of files) compactCollectorSegmentsInCountResult(file.result);
|
|
1964
|
+
options.onFinalizeStart?.();
|
|
1965
|
+
if (files.length === 0) return {
|
|
1966
|
+
files,
|
|
1967
|
+
skipped: [],
|
|
1968
|
+
aggregate: section === "all" ? wc_default("", wcOptions) : {
|
|
2045
1969
|
section,
|
|
2046
|
-
total:
|
|
1970
|
+
total: 0,
|
|
2047
1971
|
frontmatterType: null,
|
|
2048
|
-
items: [
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
result
|
|
2052
|
-
}]
|
|
2053
|
-
};
|
|
2054
|
-
}
|
|
2055
|
-
const parsed = parseMarkdown(input);
|
|
2056
|
-
const frontmatterText = parsed.frontmatter ?? "";
|
|
2057
|
-
const contentText = parsed.content ?? "";
|
|
2058
|
-
let items = [];
|
|
2059
|
-
if (section === "frontmatter") items = buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter");
|
|
2060
|
-
else if (section === "content") items = buildSingleItem("content", contentText, mode, options, "content");
|
|
2061
|
-
else if (section === "split") items = [...buildSingleItem("frontmatter", frontmatterText, mode, options, "frontmatter"), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
2062
|
-
else if (section === "per-key") items = buildPerKeyItems(parsed.data, mode, options);
|
|
2063
|
-
else if (section === "split-per-key") items = [...buildPerKeyItems(parsed.data, mode, options), ...buildSingleItem("content", contentText, mode, options, "content")];
|
|
1972
|
+
items: []
|
|
1973
|
+
}
|
|
1974
|
+
};
|
|
2064
1975
|
return {
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
items
|
|
1976
|
+
files,
|
|
1977
|
+
skipped: [],
|
|
1978
|
+
aggregate: section === "all" ? aggregateWordCounterResults(files.map((file) => file.result), preserveCollectorSegments) : aggregateSectionedResults(files.map((file) => file.result), preserveCollectorSegments)
|
|
2069
1979
|
};
|
|
2070
1980
|
}
|
|
2071
1981
|
|
|
2072
1982
|
//#endregion
|
|
2073
|
-
//#region src/cli/batch/
|
|
2074
|
-
function
|
|
2075
|
-
if (
|
|
2076
|
-
const
|
|
2077
|
-
const
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
mode: "chunk",
|
|
2087
|
-
items: [...left.breakdown.items, ...right.breakdown.items]
|
|
1983
|
+
//#region src/cli/batch/jobs/queue.ts
|
|
1984
|
+
async function runBoundedQueue(total, requestedJobs, worker) {
|
|
1985
|
+
if (total === 0) return [];
|
|
1986
|
+
const safeRequestedJobs = Number.isFinite(requestedJobs) ? Math.floor(requestedJobs) : 1;
|
|
1987
|
+
const concurrency = Math.max(1, Math.min(total, safeRequestedJobs));
|
|
1988
|
+
const results = new Array(total);
|
|
1989
|
+
let nextIndex = 0;
|
|
1990
|
+
const runWorker = async () => {
|
|
1991
|
+
while (true) {
|
|
1992
|
+
const current = nextIndex;
|
|
1993
|
+
nextIndex += 1;
|
|
1994
|
+
if (current >= total) return;
|
|
1995
|
+
results[current] = await worker(current);
|
|
2088
1996
|
}
|
|
2089
1997
|
};
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
1998
|
+
await Promise.all(Array.from({ length: concurrency }, () => runWorker()));
|
|
1999
|
+
return results;
|
|
2000
|
+
}
|
|
2001
|
+
|
|
2002
|
+
//#endregion
|
|
2003
|
+
//#region src/cli/path/load.ts
|
|
2004
|
+
function isProbablyBinary(buffer) {
|
|
2005
|
+
if (buffer.length === 0) return false;
|
|
2006
|
+
const sampleSize = Math.min(buffer.length, 1024);
|
|
2007
|
+
let suspicious = 0;
|
|
2008
|
+
for (let index = 0; index < sampleSize; index += 1) {
|
|
2009
|
+
const byte = buffer[index] ?? 0;
|
|
2010
|
+
if (byte === 0) return true;
|
|
2011
|
+
if (byte === 9 || byte === 10 || byte === 13) continue;
|
|
2012
|
+
if (byte >= 32 && byte <= 126) continue;
|
|
2013
|
+
if (byte >= 128) continue;
|
|
2014
|
+
suspicious += 1;
|
|
2015
|
+
}
|
|
2016
|
+
return suspicious / sampleSize > .3;
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
//#endregion
|
|
2020
|
+
//#region src/cli/batch/jobs/read-input.ts
|
|
2021
|
+
async function readBatchInput(path, options) {
|
|
2022
|
+
if (!path) return {
|
|
2023
|
+
type: "skip",
|
|
2024
|
+
path: "",
|
|
2025
|
+
reason: "not readable: missing path"
|
|
2097
2026
|
};
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2027
|
+
let buffer;
|
|
2028
|
+
try {
|
|
2029
|
+
buffer = await readFile(path);
|
|
2030
|
+
} catch (error) {
|
|
2031
|
+
if (isResourceLimitError(error)) throw createResourceLimitError(path, error, options.requestedJobs, options.limits);
|
|
2032
|
+
return {
|
|
2033
|
+
type: "skip",
|
|
2034
|
+
path,
|
|
2035
|
+
reason: `not readable: ${error instanceof Error ? error.message : String(error)}`
|
|
2036
|
+
};
|
|
2037
|
+
}
|
|
2038
|
+
if (isProbablyBinary(buffer)) return {
|
|
2039
|
+
type: "skip",
|
|
2040
|
+
path,
|
|
2041
|
+
reason: "binary file"
|
|
2105
2042
|
};
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2043
|
+
return {
|
|
2044
|
+
type: "file",
|
|
2045
|
+
path,
|
|
2046
|
+
content: buffer.toString("utf8")
|
|
2047
|
+
};
|
|
2048
|
+
}
|
|
2049
|
+
|
|
2050
|
+
//#endregion
|
|
2051
|
+
//#region src/cli/batch/jobs/load-count.ts
|
|
2052
|
+
async function countBatchInputsWithJobs(filePaths, options) {
|
|
2053
|
+
const limits = resolveBatchJobsLimit();
|
|
2054
|
+
const total = filePaths.length;
|
|
2055
|
+
let completed = 0;
|
|
2056
|
+
const entries = await runBoundedQueue(filePaths.length, options.jobs, async (index) => {
|
|
2057
|
+
const loaded = await readBatchInput(filePaths[index], {
|
|
2058
|
+
requestedJobs: options.jobs,
|
|
2059
|
+
limits
|
|
2060
|
+
});
|
|
2061
|
+
if (loaded.type === "skip") {
|
|
2062
|
+
completed += 1;
|
|
2063
|
+
options.onFileProcessed?.({
|
|
2064
|
+
completed,
|
|
2065
|
+
total
|
|
2066
|
+
});
|
|
2067
|
+
return {
|
|
2068
|
+
type: "skip",
|
|
2069
|
+
skip: {
|
|
2070
|
+
path: loaded.path,
|
|
2071
|
+
reason: loaded.reason
|
|
2119
2072
|
}
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
addItems(right.breakdown.items);
|
|
2073
|
+
};
|
|
2074
|
+
}
|
|
2075
|
+
const result = options.section === "all" ? wc_default(loaded.content, options.wcOptions) : countSections(loaded.content, options.section, options.wcOptions);
|
|
2076
|
+
if (!options.preserveCollectorSegments) compactCollectorSegmentsInCountResult(result);
|
|
2077
|
+
completed += 1;
|
|
2078
|
+
options.onFileProcessed?.({
|
|
2079
|
+
completed,
|
|
2080
|
+
total
|
|
2081
|
+
});
|
|
2130
2082
|
return {
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
items: localeOrder.map((locale) => {
|
|
2136
|
-
const value = mergedByLocale.get(locale);
|
|
2137
|
-
if (!value) throw new Error(`Missing char-collector entry for locale: ${locale}`);
|
|
2138
|
-
return value;
|
|
2139
|
-
})
|
|
2083
|
+
type: "file",
|
|
2084
|
+
file: {
|
|
2085
|
+
path: loaded.path,
|
|
2086
|
+
result
|
|
2140
2087
|
}
|
|
2141
2088
|
};
|
|
2089
|
+
});
|
|
2090
|
+
const files = [];
|
|
2091
|
+
const skipped = [];
|
|
2092
|
+
for (const entry of entries) {
|
|
2093
|
+
if (entry.type === "file") {
|
|
2094
|
+
files.push(entry.file);
|
|
2095
|
+
continue;
|
|
2096
|
+
}
|
|
2097
|
+
skipped.push(entry.skip);
|
|
2142
2098
|
}
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2099
|
+
return {
|
|
2100
|
+
files,
|
|
2101
|
+
skipped
|
|
2102
|
+
};
|
|
2103
|
+
}
|
|
2104
|
+
|
|
2105
|
+
//#endregion
|
|
2106
|
+
//#region src/cli/batch/jobs/load-count-worker.ts
|
|
2107
|
+
var WorkerRouteUnavailableError = class extends Error {};
|
|
2108
|
+
function isFallbackFriendlyWorkerError(error) {
|
|
2109
|
+
if (typeof error !== "object" || error === null) return false;
|
|
2110
|
+
const code = "code" in error ? String(error.code) : "";
|
|
2111
|
+
if (code === "ERR_WORKER_PATH" || code === "ERR_WORKER_UNSUPPORTED_EXTENSION" || code === "ERR_UNKNOWN_FILE_EXTENSION" || code === "ERR_MODULE_NOT_FOUND") return true;
|
|
2112
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2113
|
+
return message.includes("Unknown file extension") || message.includes("Cannot find module");
|
|
2114
|
+
}
|
|
2115
|
+
async function countBatchInputsWithWorkerJobs(filePaths, options) {
|
|
2116
|
+
if (process.env.WORD_COUNTER_DISABLE_WORKER_JOBS === "1" || process.env.WORD_COUNTER_DISABLE_EXPERIMENTAL_WORKERS === "1") throw new WorkerRouteUnavailableError("Worker route disabled by environment.");
|
|
2117
|
+
let workerPoolModule;
|
|
2118
|
+
try {
|
|
2119
|
+
workerPoolModule = await import("./worker-pool.mjs");
|
|
2120
|
+
} catch (error) {
|
|
2121
|
+
throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
2122
|
+
}
|
|
2123
|
+
try {
|
|
2124
|
+
return await workerPoolModule.countBatchInputsWithWorkerPool({
|
|
2125
|
+
filePaths,
|
|
2126
|
+
jobs: options.jobs,
|
|
2127
|
+
section: options.section,
|
|
2128
|
+
wcOptions: options.wcOptions,
|
|
2129
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2130
|
+
onFileProcessed: options.onFileProcessed
|
|
2131
|
+
});
|
|
2132
|
+
} catch (error) {
|
|
2133
|
+
if (error instanceof workerPoolModule.WorkerPoolTaskFatalError) {
|
|
2134
|
+
if (error.code === "EMFILE" || error.code === "ENFILE") throw createResourceLimitError(error.path, {
|
|
2135
|
+
code: error.code,
|
|
2136
|
+
message: error.message
|
|
2137
|
+
}, options.jobs, resolveBatchJobsLimit());
|
|
2138
|
+
throw new Error(error.message);
|
|
2139
|
+
}
|
|
2140
|
+
if (error instanceof workerPoolModule.WorkerPoolUnavailableError || isFallbackFriendlyWorkerError(error)) throw new WorkerRouteUnavailableError(`Worker route unavailable: ${error instanceof Error ? error.message : String(error)}`);
|
|
2141
|
+
throw error;
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
|
|
2145
|
+
//#endregion
|
|
2146
|
+
//#region src/cli/batch/jobs/render.ts
|
|
2147
|
+
function finalizeBatchJobsSummary(files, section, wcOptions, options = {}) {
|
|
2148
|
+
return finalizeBatchSummaryFromFileResults(files, section, wcOptions, {
|
|
2149
|
+
onFinalizeStart: options.onFinalizeStart,
|
|
2150
|
+
preserveCollectorSegments: options.preserveCollectorSegments
|
|
2151
|
+
});
|
|
2152
|
+
}
|
|
2153
|
+
|
|
2154
|
+
//#endregion
|
|
2155
|
+
//#region src/cli/path/resolve.ts
|
|
2156
|
+
async function expandDirectory(rootPath, directoryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats) {
|
|
2157
|
+
let entries;
|
|
2158
|
+
try {
|
|
2159
|
+
entries = await readdir(directoryPath, {
|
|
2160
|
+
withFileTypes: true,
|
|
2161
|
+
encoding: "utf8"
|
|
2162
|
+
});
|
|
2163
|
+
} catch (error) {
|
|
2164
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2165
|
+
skipped.push({
|
|
2166
|
+
path: directoryPath,
|
|
2167
|
+
reason: `directory read failed: ${message}`
|
|
2168
|
+
});
|
|
2169
|
+
debug.emit("path.resolve.expand.read_failed", {
|
|
2170
|
+
directory: directoryPath,
|
|
2171
|
+
reason: `directory read failed: ${message}`
|
|
2172
|
+
});
|
|
2173
|
+
return [];
|
|
2174
|
+
}
|
|
2175
|
+
const sortedEntries = entries.slice().sort((left, right) => left.name.localeCompare(right.name));
|
|
2176
|
+
const files = [];
|
|
2177
|
+
debug.emit("path.resolve.expand.start", {
|
|
2178
|
+
directory: directoryPath,
|
|
2179
|
+
entries: sortedEntries.length,
|
|
2180
|
+
recursive
|
|
2181
|
+
});
|
|
2182
|
+
for (const entry of sortedEntries) {
|
|
2183
|
+
const entryPath = resolve(directoryPath, entry.name);
|
|
2184
|
+
if (entry.isFile()) {
|
|
2185
|
+
if (!shouldIncludeFromDirectory(entryPath, extensionFilter)) {
|
|
2186
|
+
skipped.push({
|
|
2187
|
+
path: entryPath,
|
|
2188
|
+
reason: "extension excluded"
|
|
2159
2189
|
});
|
|
2190
|
+
debug.emit("path.resolve.filter.excluded", {
|
|
2191
|
+
path: entryPath,
|
|
2192
|
+
reason: "extension excluded"
|
|
2193
|
+
}, { verbosity: "verbose" });
|
|
2194
|
+
stats.filterExcluded += 1;
|
|
2195
|
+
continue;
|
|
2160
2196
|
}
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2197
|
+
const relativePath = toDirectoryRelativePath(rootPath, entryPath);
|
|
2198
|
+
if (!shouldIncludeFromDirectoryRegex(relativePath, regexFilter)) {
|
|
2199
|
+
if (recordRegexExcluded(entryPath)) {
|
|
2200
|
+
debug.emit("path.resolve.regex.excluded", {
|
|
2201
|
+
path: entryPath,
|
|
2202
|
+
relativePath,
|
|
2203
|
+
pattern: regexFilter.sourcePattern,
|
|
2204
|
+
reason: "regex excluded"
|
|
2205
|
+
}, { verbosity: "verbose" });
|
|
2206
|
+
stats.regexExcluded += 1;
|
|
2207
|
+
}
|
|
2208
|
+
continue;
|
|
2209
|
+
}
|
|
2210
|
+
files.push(entryPath);
|
|
2211
|
+
stats.directoryIncluded += 1;
|
|
2212
|
+
debug.emit("path.resolve.expand.include", {
|
|
2213
|
+
path: entryPath,
|
|
2214
|
+
source: "directory"
|
|
2215
|
+
}, { verbosity: "verbose" });
|
|
2216
|
+
continue;
|
|
2217
|
+
}
|
|
2218
|
+
if (!entry.isDirectory() || !recursive) continue;
|
|
2219
|
+
appendAll(files, await expandDirectory(rootPath, entryPath, recursive, extensionFilter, regexFilter, skipped, recordRegexExcluded, debug, stats));
|
|
2220
|
+
}
|
|
2221
|
+
debug.emit("path.resolve.expand.complete", {
|
|
2222
|
+
directory: directoryPath,
|
|
2223
|
+
files: files.length
|
|
2224
|
+
});
|
|
2225
|
+
return files;
|
|
2226
|
+
}
|
|
2227
|
+
async function resolveBatchFilePaths(pathInputs, options) {
|
|
2228
|
+
const skipped = [];
|
|
2229
|
+
const regexExcludedPaths = /* @__PURE__ */ new Set();
|
|
2230
|
+
const resolvedFiles = /* @__PURE__ */ new Set();
|
|
2231
|
+
const stats = {
|
|
2232
|
+
dedupeAccepted: 0,
|
|
2233
|
+
dedupeDuplicates: 0,
|
|
2234
|
+
filterExcluded: 0,
|
|
2235
|
+
regexExcluded: 0,
|
|
2236
|
+
directoryIncluded: 0
|
|
2237
|
+
};
|
|
2238
|
+
const extensionFilter = options.extensionFilter ?? buildDirectoryExtensionFilter(void 0, void 0);
|
|
2239
|
+
let regexFilter;
|
|
2240
|
+
const debug = options.debug ?? {
|
|
2241
|
+
enabled: false,
|
|
2242
|
+
verbosity: "compact",
|
|
2243
|
+
emit() {},
|
|
2244
|
+
close: async () => {}
|
|
2245
|
+
};
|
|
2246
|
+
debug.emit("path.resolve.inputs", {
|
|
2247
|
+
inputs: pathInputs.length,
|
|
2248
|
+
pathMode: options.pathMode,
|
|
2249
|
+
recursive: options.recursive,
|
|
2250
|
+
hasRegex: Boolean(options.directoryRegexPattern)
|
|
2251
|
+
});
|
|
2252
|
+
const addResolvedFile = (filePath, details) => {
|
|
2253
|
+
regexExcludedPaths.delete(filePath);
|
|
2254
|
+
if (resolvedFiles.has(filePath)) {
|
|
2255
|
+
stats.dedupeDuplicates += 1;
|
|
2256
|
+
debug.emit("path.resolve.dedupe.duplicate", {
|
|
2257
|
+
path: filePath,
|
|
2258
|
+
source: details.source,
|
|
2259
|
+
input: details.input
|
|
2260
|
+
}, { verbosity: "verbose" });
|
|
2261
|
+
return;
|
|
2262
|
+
}
|
|
2263
|
+
resolvedFiles.add(filePath);
|
|
2264
|
+
stats.dedupeAccepted += 1;
|
|
2265
|
+
debug.emit("path.resolve.dedupe.accept", {
|
|
2266
|
+
path: filePath,
|
|
2267
|
+
source: details.source,
|
|
2268
|
+
input: details.input
|
|
2269
|
+
}, { verbosity: "verbose" });
|
|
2270
|
+
};
|
|
2271
|
+
const getRegexFilter = () => {
|
|
2272
|
+
if (!regexFilter) regexFilter = buildDirectoryRegexFilter(options.directoryRegexPattern);
|
|
2273
|
+
return regexFilter;
|
|
2274
|
+
};
|
|
2275
|
+
const recordRegexExcluded = (filePath) => {
|
|
2276
|
+
if (resolvedFiles.has(filePath)) return false;
|
|
2277
|
+
regexExcludedPaths.add(filePath);
|
|
2278
|
+
return true;
|
|
2279
|
+
};
|
|
2280
|
+
for (const rawPath of pathInputs) {
|
|
2281
|
+
const targetPath = resolve(rawPath);
|
|
2282
|
+
debug.emit("path.resolve.input", {
|
|
2283
|
+
rawPath,
|
|
2284
|
+
resolvedPath: targetPath
|
|
2285
|
+
});
|
|
2286
|
+
let metadata;
|
|
2287
|
+
try {
|
|
2288
|
+
metadata = await stat(targetPath);
|
|
2289
|
+
} catch (error) {
|
|
2290
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2291
|
+
skipped.push({
|
|
2292
|
+
path: targetPath,
|
|
2293
|
+
reason: `not readable: ${message}`
|
|
2294
|
+
});
|
|
2295
|
+
debug.emit("path.resolve.skip", {
|
|
2296
|
+
path: targetPath,
|
|
2297
|
+
reason: `not readable: ${message}`
|
|
2298
|
+
});
|
|
2299
|
+
continue;
|
|
2300
|
+
}
|
|
2301
|
+
if (metadata.isDirectory() && options.pathMode === "auto") {
|
|
2302
|
+
const effectiveRegexFilter = getRegexFilter();
|
|
2303
|
+
debug.emit("path.resolve.root.expand", {
|
|
2304
|
+
root: targetPath,
|
|
2305
|
+
recursive: options.recursive,
|
|
2306
|
+
regex: effectiveRegexFilter.sourcePattern ?? null
|
|
2307
|
+
});
|
|
2308
|
+
const files = await expandDirectory(targetPath, targetPath, options.recursive, extensionFilter, effectiveRegexFilter, skipped, recordRegexExcluded, debug, stats);
|
|
2309
|
+
for (const file of files) addResolvedFile(file, {
|
|
2310
|
+
source: "directory",
|
|
2311
|
+
input: targetPath
|
|
2312
|
+
});
|
|
2313
|
+
continue;
|
|
2169
2314
|
}
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2315
|
+
if (!metadata.isFile()) {
|
|
2316
|
+
skipped.push({
|
|
2317
|
+
path: targetPath,
|
|
2318
|
+
reason: "not a regular file"
|
|
2319
|
+
});
|
|
2320
|
+
debug.emit("path.resolve.skip", {
|
|
2321
|
+
path: targetPath,
|
|
2322
|
+
reason: "not a regular file"
|
|
2323
|
+
});
|
|
2324
|
+
continue;
|
|
2325
|
+
}
|
|
2326
|
+
addResolvedFile(targetPath, {
|
|
2327
|
+
source: "direct",
|
|
2328
|
+
input: targetPath
|
|
2329
|
+
});
|
|
2183
2330
|
}
|
|
2331
|
+
for (const path of regexExcludedPaths) skipped.push({
|
|
2332
|
+
path,
|
|
2333
|
+
reason: "regex excluded"
|
|
2334
|
+
});
|
|
2335
|
+
const files = [...resolvedFiles].sort((left, right) => left.localeCompare(right));
|
|
2336
|
+
debug.emit("path.resolve.filter.summary", {
|
|
2337
|
+
excluded: stats.filterExcluded + stats.regexExcluded,
|
|
2338
|
+
extensionExcluded: stats.filterExcluded,
|
|
2339
|
+
regexExcluded: stats.regexExcluded,
|
|
2340
|
+
included: stats.directoryIncluded
|
|
2341
|
+
});
|
|
2342
|
+
debug.emit("path.resolve.dedupe.summary", {
|
|
2343
|
+
accepted: stats.dedupeAccepted,
|
|
2344
|
+
duplicates: stats.dedupeDuplicates
|
|
2345
|
+
});
|
|
2346
|
+
debug.emit("path.resolve.complete", {
|
|
2347
|
+
files: files.length,
|
|
2348
|
+
skipped: skipped.length,
|
|
2349
|
+
ordering: "absolute-path-ascending"
|
|
2350
|
+
});
|
|
2184
2351
|
return {
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
breakdown: left.breakdown
|
|
2352
|
+
files,
|
|
2353
|
+
skipped
|
|
2188
2354
|
};
|
|
2189
2355
|
}
|
|
2190
|
-
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2196
|
-
|
|
2197
|
-
|
|
2198
|
-
aggregate = mergeWordCounterResult(aggregate, current, preserveCollectorSegments);
|
|
2199
|
-
}
|
|
2200
|
-
return aggregate;
|
|
2356
|
+
|
|
2357
|
+
//#endregion
|
|
2358
|
+
//#region src/cli/progress/reporter.ts
|
|
2359
|
+
const PROGRESS_BAR_WIDTH = 20;
|
|
2360
|
+
const FILLED_BAR_CHAR = "█";
|
|
2361
|
+
const EMPTY_BAR_CHAR = "░";
|
|
2362
|
+
function clamp(value, min, max) {
|
|
2363
|
+
return Math.max(min, Math.min(max, value));
|
|
2201
2364
|
}
|
|
2202
|
-
function
|
|
2203
|
-
|
|
2365
|
+
function buildProgressBar(completed, total) {
|
|
2366
|
+
const safeTotal = Math.max(total, 1);
|
|
2367
|
+
const ratio = clamp(completed / safeTotal, 0, 1);
|
|
2368
|
+
const filled = completed >= safeTotal ? PROGRESS_BAR_WIDTH : Math.floor(ratio * PROGRESS_BAR_WIDTH);
|
|
2369
|
+
const empty = PROGRESS_BAR_WIDTH - filled;
|
|
2370
|
+
return `${FILLED_BAR_CHAR.repeat(filled)}${EMPTY_BAR_CHAR.repeat(empty)}`;
|
|
2204
2371
|
}
|
|
2205
|
-
function
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
}
|
|
2212
|
-
const section = results[0]?.section ?? "all";
|
|
2213
|
-
const grouped = /* @__PURE__ */ new Map();
|
|
2214
|
-
let total = 0;
|
|
2215
|
-
let frontmatterType = results[0]?.frontmatterType ?? null;
|
|
2216
|
-
for (const result of results) {
|
|
2217
|
-
total += result.total;
|
|
2218
|
-
if (result.section !== section) throw new Error("Cannot aggregate section results with different section modes.");
|
|
2219
|
-
if (frontmatterType !== result.frontmatterType) frontmatterType = null;
|
|
2220
|
-
for (const item of result.items) {
|
|
2221
|
-
const key = buildSectionKey(item.name, item.source);
|
|
2222
|
-
const existing = grouped.get(key);
|
|
2223
|
-
if (!existing) {
|
|
2224
|
-
grouped.set(key, {
|
|
2225
|
-
name: item.name,
|
|
2226
|
-
source: item.source,
|
|
2227
|
-
items: [item.result]
|
|
2228
|
-
});
|
|
2229
|
-
continue;
|
|
2230
|
-
}
|
|
2231
|
-
existing.items.push(item.result);
|
|
2232
|
-
}
|
|
2233
|
-
}
|
|
2234
|
-
const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
|
|
2235
|
-
const items = [...grouped.values()].sort((left, right) => {
|
|
2236
|
-
const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
|
|
2237
|
-
if (sourceDiff !== 0) return sourceDiff;
|
|
2238
|
-
return left.name.localeCompare(right.name);
|
|
2239
|
-
}).map((entry) => ({
|
|
2240
|
-
name: entry.name,
|
|
2241
|
-
source: entry.source,
|
|
2242
|
-
result: aggregateWordCounterResults(entry.items, preserveCollectorSegments)
|
|
2243
|
-
}));
|
|
2244
|
-
return {
|
|
2245
|
-
section,
|
|
2246
|
-
total,
|
|
2247
|
-
frontmatterType,
|
|
2248
|
-
items
|
|
2249
|
-
};
|
|
2372
|
+
function formatElapsed(startedAtMs) {
|
|
2373
|
+
const elapsedMs = Date.now() - startedAtMs;
|
|
2374
|
+
const totalSeconds = Math.max(0, Math.floor(elapsedMs / 1e3));
|
|
2375
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
2376
|
+
const seconds = totalSeconds % 60;
|
|
2377
|
+
const tenths = Math.floor(Math.max(0, elapsedMs) % 1e3 / 100);
|
|
2378
|
+
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${tenths}`;
|
|
2250
2379
|
}
|
|
2251
|
-
function
|
|
2252
|
-
|
|
2253
|
-
|
|
2380
|
+
function buildProgressLine(completed, total, startedAtMs) {
|
|
2381
|
+
const safeTotal = Math.max(total, 1);
|
|
2382
|
+
const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
|
|
2383
|
+
return `Counting files [${buildProgressBar(completed, safeTotal)}] ${`${String(percent).padStart(3, " ")}%`} ${String(completed).padStart(String(safeTotal).length, " ")}/${safeTotal} elapsed ${formatElapsed(startedAtMs)}`;
|
|
2254
2384
|
}
|
|
2255
|
-
function
|
|
2256
|
-
|
|
2385
|
+
function buildFinalizingLine(startedAtMs) {
|
|
2386
|
+
return `Finalizing aggregate... elapsed ${formatElapsed(startedAtMs)}`;
|
|
2257
2387
|
}
|
|
2258
|
-
|
|
2259
|
-
const
|
|
2260
|
-
const
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
|
|
2273
|
-
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
|
|
2278
|
-
|
|
2279
|
-
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
items: []
|
|
2388
|
+
function createBatchProgressReporter(options) {
|
|
2389
|
+
const enabled = options.enabled;
|
|
2390
|
+
const isTTY = Boolean(options.stream.isTTY);
|
|
2391
|
+
const clearOnFinish = options.clearOnFinish ?? true;
|
|
2392
|
+
let active = false;
|
|
2393
|
+
let total = 0;
|
|
2394
|
+
let lastLineLength = 0;
|
|
2395
|
+
let startedAtMs = 0;
|
|
2396
|
+
let lastRenderedPercent = -1;
|
|
2397
|
+
let finalizingStarted = false;
|
|
2398
|
+
const writeTTYLine = (line) => {
|
|
2399
|
+
const trailingPadding = lastLineLength > line.length ? " ".repeat(lastLineLength - line.length) : "";
|
|
2400
|
+
options.stream.write(`\r${line}${trailingPadding}`);
|
|
2401
|
+
lastLineLength = line.length;
|
|
2402
|
+
};
|
|
2403
|
+
const render = (completed) => {
|
|
2404
|
+
const line = buildProgressLine(completed, total, startedAtMs);
|
|
2405
|
+
const safeTotal = Math.max(total, 1);
|
|
2406
|
+
const percent = completed >= safeTotal ? 100 : Math.floor(completed / safeTotal * 100);
|
|
2407
|
+
if (!isTTY && percent === lastRenderedPercent && completed < safeTotal) return;
|
|
2408
|
+
lastRenderedPercent = percent;
|
|
2409
|
+
if (isTTY) {
|
|
2410
|
+
writeTTYLine(line);
|
|
2411
|
+
return;
|
|
2283
2412
|
}
|
|
2413
|
+
lastLineLength = line.length;
|
|
2414
|
+
options.stream.write(`${line}\n`);
|
|
2415
|
+
};
|
|
2416
|
+
const clearLine = () => {
|
|
2417
|
+
if (lastLineLength === 0) return;
|
|
2418
|
+
options.stream.write(`\r${" ".repeat(lastLineLength)}\r`);
|
|
2419
|
+
lastLineLength = 0;
|
|
2284
2420
|
};
|
|
2285
2421
|
return {
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2422
|
+
enabled,
|
|
2423
|
+
start(nextTotal, nextStartedAtMs) {
|
|
2424
|
+
if (!enabled || nextTotal <= 1) return;
|
|
2425
|
+
total = nextTotal;
|
|
2426
|
+
active = true;
|
|
2427
|
+
startedAtMs = nextStartedAtMs ?? Date.now();
|
|
2428
|
+
lastRenderedPercent = -1;
|
|
2429
|
+
finalizingStarted = false;
|
|
2430
|
+
render(0);
|
|
2431
|
+
},
|
|
2432
|
+
advance(snapshot) {
|
|
2433
|
+
if (!active) return;
|
|
2434
|
+
render(snapshot.completed);
|
|
2435
|
+
},
|
|
2436
|
+
startFinalizing() {
|
|
2437
|
+
if (!active || finalizingStarted) return;
|
|
2438
|
+
finalizingStarted = true;
|
|
2439
|
+
const line = buildFinalizingLine(startedAtMs);
|
|
2440
|
+
if (isTTY) {
|
|
2441
|
+
if (!clearOnFinish) {
|
|
2442
|
+
options.stream.write(`\n${line}`);
|
|
2443
|
+
lastLineLength = line.length;
|
|
2444
|
+
return;
|
|
2445
|
+
}
|
|
2446
|
+
writeTTYLine(line);
|
|
2447
|
+
return;
|
|
2448
|
+
}
|
|
2449
|
+
lastLineLength = line.length;
|
|
2450
|
+
options.stream.write(`${line}\n`);
|
|
2451
|
+
},
|
|
2452
|
+
finish() {
|
|
2453
|
+
if (!active) return;
|
|
2454
|
+
if (isTTY) if (clearOnFinish) clearLine();
|
|
2455
|
+
else options.stream.write("\n");
|
|
2456
|
+
active = false;
|
|
2457
|
+
}
|
|
2289
2458
|
};
|
|
2290
2459
|
}
|
|
2291
2460
|
|
|
@@ -2316,34 +2485,90 @@ async function runBatchCount(options) {
|
|
|
2316
2485
|
stage: "resolve",
|
|
2317
2486
|
elapsedMs: resolveElapsedMs
|
|
2318
2487
|
});
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2488
|
+
options.debug.emit("batch.jobs.strategy", {
|
|
2489
|
+
strategy: options.jobsStrategy,
|
|
2490
|
+
jobs: options.jobs
|
|
2491
|
+
});
|
|
2492
|
+
let summary;
|
|
2493
|
+
let routeSkips = [];
|
|
2494
|
+
options.debug.emit("batch.load.start", {
|
|
2495
|
+
files: resolved.files.length,
|
|
2496
|
+
jobs: options.jobs,
|
|
2497
|
+
strategy: options.jobsStrategy
|
|
2498
|
+
});
|
|
2323
2499
|
options.debug.emit("batch.load.complete", {
|
|
2324
|
-
files:
|
|
2325
|
-
skipped:
|
|
2326
|
-
elapsedMs:
|
|
2500
|
+
files: 0,
|
|
2501
|
+
skipped: 0,
|
|
2502
|
+
elapsedMs: 0,
|
|
2503
|
+
strategy: options.jobsStrategy
|
|
2327
2504
|
});
|
|
2328
2505
|
options.debug.emit("batch.stage.timing", {
|
|
2329
2506
|
stage: "load",
|
|
2330
|
-
elapsedMs:
|
|
2507
|
+
elapsedMs: 0
|
|
2331
2508
|
});
|
|
2332
|
-
const progressEnabled = options.progressReporter.enabled &&
|
|
2509
|
+
const progressEnabled = options.progressReporter.enabled && resolved.files.length > 1;
|
|
2333
2510
|
options.debug.emit("batch.progress.start", {
|
|
2334
2511
|
enabled: progressEnabled,
|
|
2335
|
-
total:
|
|
2512
|
+
total: resolved.files.length
|
|
2336
2513
|
});
|
|
2337
|
-
if (progressEnabled) options.progressReporter.start(
|
|
2338
|
-
let summary;
|
|
2514
|
+
if (progressEnabled) options.progressReporter.start(resolved.files.length, batchStartedAtMs);
|
|
2339
2515
|
const countStartedAtMs = Date.now();
|
|
2340
2516
|
let finalizeStartedAtMs = null;
|
|
2341
2517
|
let emittedCountTiming = false;
|
|
2342
2518
|
try {
|
|
2343
|
-
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2519
|
+
let counted;
|
|
2520
|
+
if (options.jobs > 1) try {
|
|
2521
|
+
counted = await countBatchInputsWithWorkerJobs(resolved.files, {
|
|
2522
|
+
jobs: options.jobs,
|
|
2523
|
+
section: options.section,
|
|
2524
|
+
wcOptions: options.wcOptions,
|
|
2525
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2526
|
+
onFileProcessed: (snapshot) => {
|
|
2527
|
+
if (progressEnabled) options.progressReporter.advance(snapshot);
|
|
2528
|
+
}
|
|
2529
|
+
});
|
|
2530
|
+
options.debug.emit("batch.jobs.executor", {
|
|
2531
|
+
strategy: options.jobsStrategy,
|
|
2532
|
+
executor: "worker-pool",
|
|
2533
|
+
jobs: options.jobs
|
|
2534
|
+
});
|
|
2535
|
+
} catch (error) {
|
|
2536
|
+
if (!(error instanceof WorkerRouteUnavailableError)) throw error;
|
|
2537
|
+
options.emitWarning?.(`Worker executor unavailable; falling back to async load+count. (${error.message})`);
|
|
2538
|
+
options.debug.emit("batch.jobs.executor", {
|
|
2539
|
+
strategy: options.jobsStrategy,
|
|
2540
|
+
executor: "async-fallback",
|
|
2541
|
+
reason: error.message,
|
|
2542
|
+
jobs: options.jobs
|
|
2543
|
+
});
|
|
2544
|
+
counted = await countBatchInputsWithJobs(resolved.files, {
|
|
2545
|
+
jobs: options.jobs,
|
|
2546
|
+
section: options.section,
|
|
2547
|
+
wcOptions: options.wcOptions,
|
|
2548
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2549
|
+
onFileProcessed: (snapshot) => {
|
|
2550
|
+
if (progressEnabled) options.progressReporter.advance(snapshot);
|
|
2551
|
+
}
|
|
2552
|
+
});
|
|
2553
|
+
}
|
|
2554
|
+
else {
|
|
2555
|
+
counted = await countBatchInputsWithJobs(resolved.files, {
|
|
2556
|
+
jobs: options.jobs,
|
|
2557
|
+
section: options.section,
|
|
2558
|
+
wcOptions: options.wcOptions,
|
|
2559
|
+
preserveCollectorSegments: options.preserveCollectorSegments,
|
|
2560
|
+
onFileProcessed: (snapshot) => {
|
|
2561
|
+
if (progressEnabled) options.progressReporter.advance(snapshot);
|
|
2562
|
+
}
|
|
2563
|
+
});
|
|
2564
|
+
options.debug.emit("batch.jobs.executor", {
|
|
2565
|
+
strategy: options.jobsStrategy,
|
|
2566
|
+
executor: "async-main",
|
|
2567
|
+
jobs: options.jobs
|
|
2568
|
+
});
|
|
2569
|
+
}
|
|
2570
|
+
routeSkips = counted.skipped;
|
|
2571
|
+
summary = finalizeBatchJobsSummary(counted.files, options.section, options.wcOptions, {
|
|
2347
2572
|
onFinalizeStart: () => {
|
|
2348
2573
|
finalizeStartedAtMs = Date.now();
|
|
2349
2574
|
if (progressEnabled) options.progressReporter.startFinalizing();
|
|
@@ -2360,7 +2585,7 @@ async function runBatchCount(options) {
|
|
|
2360
2585
|
if (progressEnabled) options.progressReporter.finish();
|
|
2361
2586
|
options.debug.emit("batch.progress.complete", {
|
|
2362
2587
|
enabled: progressEnabled,
|
|
2363
|
-
total:
|
|
2588
|
+
total: resolved.files.length
|
|
2364
2589
|
});
|
|
2365
2590
|
}
|
|
2366
2591
|
if (!emittedCountTiming) {
|
|
@@ -2376,7 +2601,7 @@ async function runBatchCount(options) {
|
|
|
2376
2601
|
elapsedMs: finalizeElapsedMs
|
|
2377
2602
|
});
|
|
2378
2603
|
appendAll(summary.skipped, resolved.skipped);
|
|
2379
|
-
appendAll(summary.skipped,
|
|
2604
|
+
appendAll(summary.skipped, routeSkips);
|
|
2380
2605
|
options.debug.emit("batch.aggregate.complete", {
|
|
2381
2606
|
files: summary.files.length,
|
|
2382
2607
|
skipped: summary.skipped.length,
|
|
@@ -2385,6 +2610,12 @@ async function runBatchCount(options) {
|
|
|
2385
2610
|
return summary;
|
|
2386
2611
|
}
|
|
2387
2612
|
|
|
2613
|
+
//#endregion
|
|
2614
|
+
//#region src/cli/batch/jobs/strategy.ts
|
|
2615
|
+
function resolveBatchJobsStrategy(_jobs) {
|
|
2616
|
+
return "load-count";
|
|
2617
|
+
}
|
|
2618
|
+
|
|
2388
2619
|
//#endregion
|
|
2389
2620
|
//#region src/utils/show-singular-or-plural-word.ts
|
|
2390
2621
|
function showSingularOrPluralWord(count, word) {
|
|
@@ -2601,6 +2832,10 @@ function countLongOptionOccurrences(argv, optionName) {
|
|
|
2601
2832
|
function validateSingleRegexOptionUsage(argv) {
|
|
2602
2833
|
if (countLongOptionOccurrences(argv, "--regex") > 1) throw new Error("`--regex` can only be provided once.");
|
|
2603
2834
|
}
|
|
2835
|
+
function validateStandalonePrintJobsLimitUsage(argv) {
|
|
2836
|
+
const tokens = argv.slice(2).filter((token) => token.length > 0);
|
|
2837
|
+
if (tokens.length !== 1 || tokens[0] !== "--print-jobs-limit") throw new Error("`--print-jobs-limit` must be used alone.");
|
|
2838
|
+
}
|
|
2604
2839
|
function resolveBatchScope(argv) {
|
|
2605
2840
|
let scope = "merged";
|
|
2606
2841
|
for (const token of argv) {
|
|
@@ -2701,6 +2936,12 @@ function formatInputReadError(error) {
|
|
|
2701
2936
|
//#endregion
|
|
2702
2937
|
//#region src/cli/runtime/batch.ts
|
|
2703
2938
|
async function executeBatchCount({ argv, options, runtime, resolved, debug, teeEnabled }) {
|
|
2939
|
+
const warningsEnabled = !Boolean(options.quietWarnings);
|
|
2940
|
+
const emitWarning = (message) => {
|
|
2941
|
+
if (!warningsEnabled) return;
|
|
2942
|
+
const warningLine = message.startsWith("Warning:") ? message : `Warning: ${message}`;
|
|
2943
|
+
console.error(import_picocolors.default.yellow(warningLine));
|
|
2944
|
+
};
|
|
2704
2945
|
const batchOptions = {
|
|
2705
2946
|
scope: resolveBatchScope(argv),
|
|
2706
2947
|
pathMode: options.pathMode,
|
|
@@ -2709,6 +2950,11 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
|
|
|
2709
2950
|
directoryRegexPattern: options.regex
|
|
2710
2951
|
};
|
|
2711
2952
|
const extensionFilter = buildDirectoryExtensionFilter(options.includeExt, options.excludeExt);
|
|
2953
|
+
const requestedJobs = options.jobs;
|
|
2954
|
+
const jobsLimit = resolveBatchJobsLimit();
|
|
2955
|
+
const jobs = clampRequestedJobs(requestedJobs, jobsLimit);
|
|
2956
|
+
if (requestedJobs > jobsLimit.suggestedMaxJobs) emitWarning(formatJobsAdvisoryWarning(requestedJobs, jobs, jobsLimit));
|
|
2957
|
+
const jobsStrategy = resolveBatchJobsStrategy(jobs);
|
|
2712
2958
|
const debugEnabled = Boolean(options.debug);
|
|
2713
2959
|
const mirrorDebugToTerminal = debugEnabled && (!debug.reportPath || teeEnabled);
|
|
2714
2960
|
const summary = await runBatchCount({
|
|
@@ -2723,16 +2969,21 @@ async function executeBatchCount({ argv, options, runtime, resolved, debug, teeE
|
|
|
2723
2969
|
enabled: options.format === "standard" && options.progress,
|
|
2724
2970
|
stream: runtime.stderr ?? process.stderr,
|
|
2725
2971
|
clearOnFinish: !(mirrorDebugToTerminal || options.keepProgress)
|
|
2726
|
-
})
|
|
2972
|
+
}),
|
|
2973
|
+
jobs,
|
|
2974
|
+
jobsStrategy,
|
|
2975
|
+
emitWarning
|
|
2727
2976
|
});
|
|
2728
2977
|
const showSkipDiagnostics = debugEnabled && !batchOptions.quietSkips;
|
|
2978
|
+
const showSkipItems = showSkipDiagnostics && Boolean(options.verbose);
|
|
2729
2979
|
debug.emit("batch.skips.policy", {
|
|
2730
2980
|
enabled: showSkipDiagnostics,
|
|
2981
|
+
items: showSkipItems,
|
|
2731
2982
|
quietSkips: batchOptions.quietSkips
|
|
2732
2983
|
});
|
|
2733
2984
|
if (showSkipDiagnostics) {
|
|
2734
2985
|
debug.emit("batch.skips.report", { count: summary.skipped.length });
|
|
2735
|
-
if (
|
|
2986
|
+
if (showSkipItems) for (const skip of summary.skipped) debug.emit("batch.skips.item", {
|
|
2736
2987
|
path: skip.path,
|
|
2737
2988
|
reason: skip.reason
|
|
2738
2989
|
}, { verbosity: "verbose" });
|
|
@@ -2891,6 +3142,17 @@ async function runCli(argv = process.argv, runtime = {}) {
|
|
|
2891
3142
|
program.name("word-counter").description("Locale-aware word counting powered by Intl.Segmenter.").version(getFormattedVersionLabel(), "-v, --version", "output the version number");
|
|
2892
3143
|
configureProgramOptions(program, parseMode);
|
|
2893
3144
|
program.action(async (textTokens, options) => {
|
|
3145
|
+
if (options.printJobsLimit) {
|
|
3146
|
+
try {
|
|
3147
|
+
validateStandalonePrintJobsLimitUsage(argv);
|
|
3148
|
+
} catch (error) {
|
|
3149
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
3150
|
+
program.error(import_picocolors.default.red(message));
|
|
3151
|
+
return;
|
|
3152
|
+
}
|
|
3153
|
+
console.log(JSON.stringify(resolveBatchJobsLimit()));
|
|
3154
|
+
return;
|
|
3155
|
+
}
|
|
2894
3156
|
const debugEnabled = Boolean(options.debug);
|
|
2895
3157
|
const debugReportPath = resolveDebugReportPathOption(options.debugReport);
|
|
2896
3158
|
const debugReportEnabled = options.debugReport !== void 0 && options.debugReport !== false;
|