@dev-pi2pie/word-counter 0.1.2 → 0.1.3-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -4
- package/dist/cjs/index.cjs +133 -34
- package/dist/cjs/index.cjs.map +1 -1
- package/dist/esm/bin.mjs +194 -39
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/index.d.mts +22 -10
- package/dist/esm/index.mjs +134 -35
- package/dist/esm/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/esm/bin.mjs
CHANGED
|
@@ -146,7 +146,7 @@ function createDebugChannel(options) {
|
|
|
146
146
|
|
|
147
147
|
//#endregion
|
|
148
148
|
//#region src/cli/path/filter.ts
|
|
149
|
-
const DEFAULT_INCLUDE_EXTENSIONS =
|
|
149
|
+
const DEFAULT_INCLUDE_EXTENSIONS = Object.freeze([
|
|
150
150
|
".md",
|
|
151
151
|
".markdown",
|
|
152
152
|
".mdx",
|
|
@@ -223,13 +223,13 @@ function shouldIncludeFromDirectoryRegex(relativePath, filter) {
|
|
|
223
223
|
|
|
224
224
|
//#endregion
|
|
225
225
|
//#region src/cli/total-of.ts
|
|
226
|
-
const TOTAL_OF_PARTS = [
|
|
226
|
+
const TOTAL_OF_PARTS = Object.freeze([
|
|
227
227
|
"words",
|
|
228
228
|
"emoji",
|
|
229
229
|
"symbols",
|
|
230
230
|
"punctuation",
|
|
231
231
|
"whitespace"
|
|
232
|
-
];
|
|
232
|
+
]);
|
|
233
233
|
const TOTAL_OF_PART_ALIASES = {
|
|
234
234
|
word: "words",
|
|
235
235
|
words: "words",
|
|
@@ -342,8 +342,11 @@ const PATH_MODE_CHOICES = ["auto", "manual"];
|
|
|
342
342
|
function collectPathValue(value, previous = []) {
|
|
343
343
|
return [...previous, value];
|
|
344
344
|
}
|
|
345
|
+
function collectLatinHintValue(value, previous = []) {
|
|
346
|
+
return [...previous, value];
|
|
347
|
+
}
|
|
345
348
|
function configureProgramOptions(program, parseMode) {
|
|
346
|
-
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
349
|
+
program.addOption(new Option("-m, --mode <mode>", "breakdown mode").choices(MODE_CHOICES).argParser(parseMode).default("chunk")).addOption(new Option("-f, --format <format>", "output format").choices(FORMAT_CHOICES).default("standard")).addOption(new Option("--section <section>", "document section mode").choices(SECTION_CHOICES).default("all")).addOption(new Option("--path-mode <mode>", "path resolution mode: auto (default) expands directories; manual treats --path values as literal files").choices(PATH_MODE_CHOICES).default("auto")).option("--latin-language <language>", "hint a language tag for Latin script text").option("--latin-tag <tag>", "hint a BCP 47 tag for Latin script text").option("--latin-locale <locale>", "legacy alias of --latin-language").option("--latin-hint <tag>=<pattern>", "add a custom Latin hint rule (repeatable)", collectLatinHintValue, []).option("--latin-hints-file <path>", "load custom Latin hint rules from a JSON file").option("--no-default-latin-hints", "disable built-in Latin hint rules").option("--han-language <language>", "hint a language tag for Han script text").option("--han-tag <tag>", "hint a BCP 47 tag for Han script text").option("--non-words", "collect emoji, symbols, and punctuation (excludes whitespace)").option("--include-whitespace", "include whitespace counts (implies with --non-words; same as --misc)").option("--misc", "collect non-words plus whitespace (alias for --include-whitespace)").option("--total-of <parts>", "override total composition (comma-separated): words,emoji,symbols,punctuation,whitespace", parseTotalOfOption).option("--pretty", "pretty print JSON output", false).option("--debug", "enable debug diagnostics on stderr").option("--verbose", "emit verbose per-file debug diagnostics (requires --debug)").option("--debug-report [path]", "write debug diagnostics to a report file").option("--debug-report-tee", "mirror debug diagnostics to both report file and stderr").option("--debug-tee", "alias of --debug-report-tee").option("--merged", "show merged aggregate output (default)").option("--per-file", "show per-file output plus merged summary").option("--no-progress", "disable batch progress indicator").option("--keep-progress", "keep final batch progress line visible in standard mode").option("--no-recursive", "disable recursive directory traversal").option("--quiet-skips", "hide skip diagnostics (applies when --debug is enabled)").option("--include-ext <exts>", "comma-separated extensions to include during directory scanning", collectExtensionOption, []).option("--exclude-ext <exts>", "comma-separated extensions to exclude during directory scanning", collectExtensionOption, []).option("--regex <pattern>", "regex filter for directory-scanned paths (applies to --path directories only)").option("-p, --path <path>", "read input from file or directory (directories expand in auto mode by default)", collectPathValue, []).argument("[text...]", "text to count").showHelpAfterError();
|
|
347
350
|
}
|
|
348
351
|
|
|
349
352
|
//#endregion
|
|
@@ -418,7 +421,7 @@ var require_picocolors = /* @__PURE__ */ __commonJSMin(((exports, module) => {
|
|
|
418
421
|
//#endregion
|
|
419
422
|
//#region src/cli/program/version-embedded.ts
|
|
420
423
|
var import_picocolors = /* @__PURE__ */ __toESM(require_picocolors(), 1);
|
|
421
|
-
const EMBEDDED_PACKAGE_VERSION = "0.1.
|
|
424
|
+
const EMBEDDED_PACKAGE_VERSION = "0.1.3-canary.1";
|
|
422
425
|
|
|
423
426
|
//#endregion
|
|
424
427
|
//#region src/cli/program/version.ts
|
|
@@ -1605,10 +1608,53 @@ function resolveMode(input, fallback = "chunk") {
|
|
|
1605
1608
|
return normalizeMode(input) ?? fallback;
|
|
1606
1609
|
}
|
|
1607
1610
|
|
|
1611
|
+
//#endregion
|
|
1612
|
+
//#region src/wc/latin-hints.ts
|
|
1613
|
+
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
1614
|
+
{
|
|
1615
|
+
tag: "de",
|
|
1616
|
+
pattern: "[äöüÄÖÜß]"
|
|
1617
|
+
},
|
|
1618
|
+
{
|
|
1619
|
+
tag: "es",
|
|
1620
|
+
pattern: "[ñÑ¿¡]"
|
|
1621
|
+
},
|
|
1622
|
+
{
|
|
1623
|
+
tag: "pt",
|
|
1624
|
+
pattern: "[ãõÃÕ]"
|
|
1625
|
+
},
|
|
1626
|
+
{
|
|
1627
|
+
tag: "fr",
|
|
1628
|
+
pattern: "[œŒæÆ]"
|
|
1629
|
+
},
|
|
1630
|
+
{
|
|
1631
|
+
tag: "pl",
|
|
1632
|
+
pattern: "[ąćęłńśźżĄĆĘŁŃŚŹŻ]"
|
|
1633
|
+
},
|
|
1634
|
+
{
|
|
1635
|
+
tag: "tr",
|
|
1636
|
+
pattern: "[ıİğĞşŞ]"
|
|
1637
|
+
},
|
|
1638
|
+
{
|
|
1639
|
+
tag: "ro",
|
|
1640
|
+
pattern: "[ăĂâÂîÎșȘțȚ]"
|
|
1641
|
+
},
|
|
1642
|
+
{
|
|
1643
|
+
tag: "hu",
|
|
1644
|
+
pattern: "[őŐűŰ]"
|
|
1645
|
+
},
|
|
1646
|
+
{
|
|
1647
|
+
tag: "is",
|
|
1648
|
+
pattern: "[ðÐþÞ]"
|
|
1649
|
+
}
|
|
1650
|
+
];
|
|
1651
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
1652
|
+
|
|
1608
1653
|
//#endregion
|
|
1609
1654
|
//#region src/wc/locale-detect.ts
|
|
1610
1655
|
const DEFAULT_LOCALE = "und-Latn";
|
|
1611
|
-
const DEFAULT_HAN_TAG = "
|
|
1656
|
+
const DEFAULT_HAN_TAG = "und-Hani";
|
|
1657
|
+
const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
|
|
1612
1658
|
const regex = {
|
|
1613
1659
|
hiragana: /\p{Script=Hiragana}/u,
|
|
1614
1660
|
katakana: /\p{Script=Katakana}/u,
|
|
@@ -1620,31 +1666,10 @@ const regex = {
|
|
|
1620
1666
|
devanagari: /\p{Script=Devanagari}/u,
|
|
1621
1667
|
thai: /\p{Script=Thai}/u
|
|
1622
1668
|
};
|
|
1623
|
-
const
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
},
|
|
1628
|
-
{
|
|
1629
|
-
locale: "es",
|
|
1630
|
-
regex: /[ñÑ¿¡]/
|
|
1631
|
-
},
|
|
1632
|
-
{
|
|
1633
|
-
locale: "pt",
|
|
1634
|
-
regex: /[ãõÃÕ]/
|
|
1635
|
-
},
|
|
1636
|
-
{
|
|
1637
|
-
locale: "fr",
|
|
1638
|
-
regex: /[œŒæÆ]/
|
|
1639
|
-
}
|
|
1640
|
-
];
|
|
1641
|
-
const latinLocales = new Set([DEFAULT_LOCALE, ...latinLocaleHints.map((hint) => hint.locale)]);
|
|
1642
|
-
function isLatinLocale(locale) {
|
|
1643
|
-
return latinLocales.has(locale);
|
|
1644
|
-
}
|
|
1645
|
-
function detectLatinLocale(char) {
|
|
1646
|
-
for (const hint of latinLocaleHints) if (hint.regex.test(char)) return hint.locale;
|
|
1647
|
-
return DEFAULT_LOCALE;
|
|
1669
|
+
const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
|
|
1670
|
+
function isLatinLocale(locale, context) {
|
|
1671
|
+
if (context) return context.latinLocales.has(locale);
|
|
1672
|
+
return defaultLatinLocales.has(locale);
|
|
1648
1673
|
}
|
|
1649
1674
|
function resolveLatinHint(options) {
|
|
1650
1675
|
const latinTagHint = options.latinTagHint?.trim();
|
|
@@ -1660,7 +1685,82 @@ function resolveHanHint(options) {
|
|
|
1660
1685
|
const hanLanguageHint = options.hanLanguageHint?.trim();
|
|
1661
1686
|
if (hanLanguageHint) return hanLanguageHint;
|
|
1662
1687
|
}
|
|
1663
|
-
function
|
|
1688
|
+
function compileLatinHintPattern(pattern, label) {
|
|
1689
|
+
const source = typeof pattern === "string" ? pattern : pattern.source;
|
|
1690
|
+
const hasUnicodeMode = typeof pattern !== "string" && (pattern.flags.includes("u") || pattern.flags.includes("v"));
|
|
1691
|
+
const flags = typeof pattern === "string" ? "u" : hasUnicodeMode ? pattern.flags : `${pattern.flags}u`;
|
|
1692
|
+
if (source.length === 0) throw new Error(`${label}: pattern must not be empty.`);
|
|
1693
|
+
if (source.length > MAX_LATIN_HINT_PATTERN_LENGTH) throw new Error(`${label}: pattern must be at most ${MAX_LATIN_HINT_PATTERN_LENGTH} characters.`);
|
|
1694
|
+
try {
|
|
1695
|
+
return new RegExp(source, flags);
|
|
1696
|
+
} catch (error) {
|
|
1697
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1698
|
+
throw new Error(`${label}: invalid Unicode regex pattern (${message}).`);
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
function normalizeLatinHintPriority(priority, label) {
|
|
1702
|
+
if (priority === void 0) return 0;
|
|
1703
|
+
if (typeof priority !== "number" || !Number.isFinite(priority)) throw new Error(`${label}: priority must be a finite number when provided.`);
|
|
1704
|
+
return priority;
|
|
1705
|
+
}
|
|
1706
|
+
function compileLatinHintRule(rule, order, label) {
|
|
1707
|
+
const tag = typeof rule.tag === "string" ? rule.tag.trim() : "";
|
|
1708
|
+
if (!tag) throw new Error(`${label}: tag must be a non-empty string.`);
|
|
1709
|
+
return {
|
|
1710
|
+
tag,
|
|
1711
|
+
pattern: compileLatinHintPattern(rule.pattern, label),
|
|
1712
|
+
priority: normalizeLatinHintPriority(rule.priority, label),
|
|
1713
|
+
order
|
|
1714
|
+
};
|
|
1715
|
+
}
|
|
1716
|
+
function resolveLatinHintRules$1(options) {
|
|
1717
|
+
const useDefaultLatinHints = options.useDefaultLatinHints !== false;
|
|
1718
|
+
const customRules = options.latinHintRules ?? [];
|
|
1719
|
+
const combinedRules = [];
|
|
1720
|
+
for (let index = 0; index < customRules.length; index += 1) {
|
|
1721
|
+
const rule = customRules[index];
|
|
1722
|
+
if (!rule) continue;
|
|
1723
|
+
combinedRules.push({
|
|
1724
|
+
rule,
|
|
1725
|
+
label: `Invalid custom Latin hint rule at index ${index}`
|
|
1726
|
+
});
|
|
1727
|
+
}
|
|
1728
|
+
if (useDefaultLatinHints) for (let index = 0; index < DEFAULT_LATIN_HINT_RULES.length; index += 1) {
|
|
1729
|
+
const rule = DEFAULT_LATIN_HINT_RULES[index];
|
|
1730
|
+
if (!rule) continue;
|
|
1731
|
+
combinedRules.push({
|
|
1732
|
+
rule,
|
|
1733
|
+
label: `Invalid default Latin hint rule at index ${index}`
|
|
1734
|
+
});
|
|
1735
|
+
}
|
|
1736
|
+
const resolvedRules = combinedRules.map((entry, index) => compileLatinHintRule(entry.rule, index, entry.label));
|
|
1737
|
+
resolvedRules.sort((left, right) => {
|
|
1738
|
+
if (left.priority !== right.priority) return right.priority - left.priority;
|
|
1739
|
+
return left.order - right.order;
|
|
1740
|
+
});
|
|
1741
|
+
return resolvedRules;
|
|
1742
|
+
}
|
|
1743
|
+
function resolveLocaleDetectContext(options = {}) {
|
|
1744
|
+
const latinHint = resolveLatinHint(options);
|
|
1745
|
+
const latinHintRules = resolveLatinHintRules$1(options);
|
|
1746
|
+
const latinLocales = new Set([DEFAULT_LOCALE]);
|
|
1747
|
+
for (const rule of latinHintRules) latinLocales.add(rule.tag);
|
|
1748
|
+
if (latinHint) latinLocales.add(latinHint);
|
|
1749
|
+
return {
|
|
1750
|
+
latinHint,
|
|
1751
|
+
hanHint: resolveHanHint(options),
|
|
1752
|
+
latinHintRules,
|
|
1753
|
+
latinLocales
|
|
1754
|
+
};
|
|
1755
|
+
}
|
|
1756
|
+
function detectLatinLocale(char, context) {
|
|
1757
|
+
for (const hint of context.latinHintRules) {
|
|
1758
|
+
hint.pattern.lastIndex = 0;
|
|
1759
|
+
if (hint.pattern.test(char)) return hint.tag;
|
|
1760
|
+
}
|
|
1761
|
+
return DEFAULT_LOCALE;
|
|
1762
|
+
}
|
|
1763
|
+
function detectLocaleForChar(char, previousLocale, options = {}, context = resolveLocaleDetectContext(options)) {
|
|
1664
1764
|
if (regex.hiragana.test(char) || regex.katakana.test(char)) return "ja";
|
|
1665
1765
|
if (regex.hangul.test(char)) return "ko";
|
|
1666
1766
|
if (regex.arabic.test(char)) return "ar";
|
|
@@ -1669,14 +1769,13 @@ function detectLocaleForChar(char, previousLocale, options = {}) {
|
|
|
1669
1769
|
if (regex.thai.test(char)) return "th";
|
|
1670
1770
|
if (regex.han.test(char)) {
|
|
1671
1771
|
if (previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
1672
|
-
return
|
|
1772
|
+
return context.hanHint ?? DEFAULT_HAN_TAG;
|
|
1673
1773
|
}
|
|
1674
1774
|
if (regex.latin.test(char)) {
|
|
1675
|
-
const hintedLocale = detectLatinLocale(char);
|
|
1775
|
+
const hintedLocale = detectLatinLocale(char, context);
|
|
1676
1776
|
if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
|
|
1677
|
-
if (previousLocale && isLatinLocale(previousLocale) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
1678
|
-
|
|
1679
|
-
if (latinHint) return latinHint;
|
|
1777
|
+
if (previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
|
|
1778
|
+
if (context.latinHint) return context.latinHint;
|
|
1680
1779
|
return DEFAULT_LOCALE;
|
|
1681
1780
|
}
|
|
1682
1781
|
return null;
|
|
@@ -1685,12 +1784,13 @@ function detectLocaleForChar(char, previousLocale, options = {}) {
|
|
|
1685
1784
|
//#endregion
|
|
1686
1785
|
//#region src/wc/segment.ts
|
|
1687
1786
|
function segmentTextByLocale(text, options = {}) {
|
|
1787
|
+
const context = resolveLocaleDetectContext(options);
|
|
1688
1788
|
const chunks = [];
|
|
1689
1789
|
let currentLocale = DEFAULT_LOCALE;
|
|
1690
1790
|
let buffer = "";
|
|
1691
1791
|
let bufferHasScript = false;
|
|
1692
1792
|
for (const char of text) {
|
|
1693
|
-
const detected = detectLocaleForChar(char, currentLocale, options);
|
|
1793
|
+
const detected = detectLocaleForChar(char, currentLocale, options, context);
|
|
1694
1794
|
const targetLocale = detected ?? currentLocale;
|
|
1695
1795
|
if (buffer === "") {
|
|
1696
1796
|
currentLocale = targetLocale;
|
|
@@ -1705,7 +1805,7 @@ function segmentTextByLocale(text, options = {}) {
|
|
|
1705
1805
|
continue;
|
|
1706
1806
|
}
|
|
1707
1807
|
if (targetLocale !== currentLocale && detected !== null) {
|
|
1708
|
-
if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale)) {
|
|
1808
|
+
if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
|
|
1709
1809
|
currentLocale = targetLocale;
|
|
1710
1810
|
buffer += char;
|
|
1711
1811
|
bufferHasScript = true;
|
|
@@ -1758,6 +1858,8 @@ function wordCounter(text, options = {}) {
|
|
|
1758
1858
|
latinLanguageHint: options.latinLanguageHint,
|
|
1759
1859
|
latinTagHint: options.latinTagHint,
|
|
1760
1860
|
latinLocaleHint: options.latinLocaleHint,
|
|
1861
|
+
latinHintRules: options.latinHintRules,
|
|
1862
|
+
useDefaultLatinHints: options.useDefaultLatinHints,
|
|
1761
1863
|
hanLanguageHint: options.hanLanguageHint,
|
|
1762
1864
|
hanTagHint: options.hanTagHint
|
|
1763
1865
|
});
|
|
@@ -2477,6 +2579,57 @@ function resolveDebugReportPathOption(rawValue) {
|
|
|
2477
2579
|
if (rawValue === void 0 || rawValue === false) return;
|
|
2478
2580
|
if (typeof rawValue === "string") return rawValue;
|
|
2479
2581
|
}
|
|
2582
|
+
function parseInlineLatinHintRule(value) {
|
|
2583
|
+
const separatorIndex = value.indexOf("=");
|
|
2584
|
+
if (separatorIndex <= 0) throw new Error("`--latin-hint` must use `<tag>=<pattern>` format.");
|
|
2585
|
+
const tag = value.slice(0, separatorIndex).trim();
|
|
2586
|
+
const pattern = value.slice(separatorIndex + 1);
|
|
2587
|
+
if (!tag) throw new Error("`--latin-hint` tag must be non-empty.");
|
|
2588
|
+
if (!pattern) throw new Error("`--latin-hint` pattern must be non-empty.");
|
|
2589
|
+
return {
|
|
2590
|
+
tag,
|
|
2591
|
+
pattern
|
|
2592
|
+
};
|
|
2593
|
+
}
|
|
2594
|
+
function parseLatinHintsFileRule(value, index, sourcePath) {
|
|
2595
|
+
if (typeof value !== "object" || value === null) throw new Error(`Invalid Latin hint rule at ${sourcePath}#${index}: rule must be an object.`);
|
|
2596
|
+
const tag = "tag" in value ? value.tag : void 0;
|
|
2597
|
+
const pattern = "pattern" in value ? value.pattern : void 0;
|
|
2598
|
+
const priority = "priority" in value ? value.priority : void 0;
|
|
2599
|
+
if (typeof tag !== "string" || tag.trim().length === 0) throw new Error(`Invalid Latin hint rule at ${sourcePath}#${index}: tag must be a non-empty string.`);
|
|
2600
|
+
if (typeof pattern !== "string") throw new Error(`Invalid Latin hint rule at ${sourcePath}#${index}: pattern must be a string.`);
|
|
2601
|
+
if (priority !== void 0 && (typeof priority !== "number" || !Number.isFinite(priority))) throw new Error(`Invalid Latin hint rule at ${sourcePath}#${index}: priority must be a finite number.`);
|
|
2602
|
+
return {
|
|
2603
|
+
tag,
|
|
2604
|
+
pattern,
|
|
2605
|
+
...priority !== void 0 ? { priority } : {}
|
|
2606
|
+
};
|
|
2607
|
+
}
|
|
2608
|
+
function parseLatinHintsFile(path) {
|
|
2609
|
+
let raw;
|
|
2610
|
+
try {
|
|
2611
|
+
raw = readFileSync(path, "utf8");
|
|
2612
|
+
} catch (error) {
|
|
2613
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2614
|
+
throw new Error(`Failed to read Latin hint file (${path}): ${message}`);
|
|
2615
|
+
}
|
|
2616
|
+
let parsed;
|
|
2617
|
+
try {
|
|
2618
|
+
parsed = JSON.parse(raw);
|
|
2619
|
+
} catch (error) {
|
|
2620
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2621
|
+
throw new Error(`Invalid JSON in Latin hint file (${path}): ${message}`);
|
|
2622
|
+
}
|
|
2623
|
+
if (!Array.isArray(parsed)) throw new Error(`Latin hint file (${path}) must contain a JSON array.`);
|
|
2624
|
+
return parsed.map((rule, index) => parseLatinHintsFileRule(rule, index, path));
|
|
2625
|
+
}
|
|
2626
|
+
function resolveLatinHintRules(options) {
|
|
2627
|
+
const inlineRules = (options.latinHint ?? []).map((value) => parseInlineLatinHintRule(value));
|
|
2628
|
+
const fileRules = typeof options.latinHintsFile === "string" && options.latinHintsFile.length > 0 ? parseLatinHintsFile(options.latinHintsFile) : [];
|
|
2629
|
+
const mergedRules = [...inlineRules, ...fileRules];
|
|
2630
|
+
if (mergedRules.length === 0) return;
|
|
2631
|
+
return mergedRules;
|
|
2632
|
+
}
|
|
2480
2633
|
function resolveCountRunOptions(options) {
|
|
2481
2634
|
const useSection = options.section !== "all";
|
|
2482
2635
|
const totalOfParts = options.totalOf;
|
|
@@ -2495,6 +2648,8 @@ function resolveCountRunOptions(options) {
|
|
|
2495
2648
|
latinLanguageHint: options.latinLanguage,
|
|
2496
2649
|
latinTagHint: options.latinTag,
|
|
2497
2650
|
latinLocaleHint: options.latinLocale,
|
|
2651
|
+
latinHintRules: resolveLatinHintRules(options),
|
|
2652
|
+
useDefaultLatinHints: options.defaultLatinHints !== false,
|
|
2498
2653
|
hanLanguageHint: options.hanLanguage,
|
|
2499
2654
|
hanTagHint: options.hanTag,
|
|
2500
2655
|
nonWords: enableNonWords,
|