@dev-pi2pie/word-counter 0.1.4-canary.1 → 0.1.5-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -5
- package/dist/cjs/index.cjs +7 -30
- package/dist/cjs/index.cjs.map +1 -1
- package/dist/esm/bin.mjs +660 -514
- package/dist/esm/bin.mjs.map +1 -1
- package/dist/esm/index.mjs +7 -29
- package/dist/esm/index.mjs.map +1 -1
- package/dist/esm/worker/count-worker.mjs +8 -32
- package/dist/esm/worker/count-worker.mjs.map +1 -1
- package/dist/esm/worker-pool.mjs +5 -3
- package/dist/esm/worker-pool.mjs.map +1 -1
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -151,8 +151,10 @@ word-counter --path ./examples/test-case-multi-files-support --jobs 4
|
|
|
151
151
|
Quick policy:
|
|
152
152
|
|
|
153
153
|
- no `--jobs` and `--jobs 1` are equivalent baseline behavior.
|
|
154
|
-
- `--jobs
|
|
154
|
+
- `--jobs 1`: async main-thread `load+count` baseline.
|
|
155
|
+
- `--jobs > 1`: worker `load+count` with async fallback when workers are unavailable.
|
|
155
156
|
- if requested `--jobs` exceeds host `suggestedMaxJobs` (from `--print-jobs-limit`), the CLI warns and runs with the suggested limit as a safety cap.
|
|
157
|
+
- use `--quiet-warnings` to suppress non-fatal warning lines (for example jobs-limit advisory and worker-fallback warning).
|
|
156
158
|
|
|
157
159
|
Inspect host jobs diagnostics:
|
|
158
160
|
|
|
@@ -160,6 +162,35 @@ Inspect host jobs diagnostics:
|
|
|
160
162
|
word-counter --print-jobs-limit
|
|
161
163
|
```
|
|
162
164
|
|
|
165
|
+
`--print-jobs-limit` must be used alone (no other inputs or runtime flags).
|
|
166
|
+
|
|
167
|
+
### Doctor (`doctor`)
|
|
168
|
+
|
|
169
|
+
Use `doctor` to verify whether the current host can run `word-counter` reliably:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
word-counter doctor
|
|
173
|
+
word-counter doctor --format json
|
|
174
|
+
word-counter doctor --format json --pretty
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Doctor scope in v1:
|
|
178
|
+
|
|
179
|
+
- checks runtime support policy against Node.js `>=20`
|
|
180
|
+
- verifies `Intl.Segmenter` availability plus word/grapheme constructor health
|
|
181
|
+
- reports batch jobs host limits using the same heuristics as `--print-jobs-limit`
|
|
182
|
+
- reports worker-route preflight signals and the worker-disable env toggle that affects worker availability
|
|
183
|
+
|
|
184
|
+
Doctor output contract:
|
|
185
|
+
|
|
186
|
+
- default output is human-readable text
|
|
187
|
+
- `--format json` prints compact machine-readable JSON
|
|
188
|
+
- `--format json --pretty` prints indented JSON
|
|
189
|
+
- doctor exits with code `0` for `ok` / `warn`, `1` for invalid doctor usage, and `2` for runtime `fail`
|
|
190
|
+
- doctor does not accept counting inputs, `--path`, `--jobs`, or other counting/debug flags
|
|
191
|
+
|
|
192
|
+
For a field-by-field explanation of doctor text and JSON output, see [`docs/doctor-usage-guide.md`](docs/doctor-usage-guide.md).
|
|
193
|
+
|
|
163
194
|
For full policy details, JSON parity expectations (`--misc`, `--total-of whitespace,words`), and benchmark standards, see [`docs/batch-jobs-usage-guide.md`](docs/batch-jobs-usage-guide.md).
|
|
164
195
|
|
|
165
196
|
### Stable Path Resolution Contract
|
|
@@ -220,6 +251,8 @@ For additional usage details and troubleshooting, see [`docs/regex-usage-guide.m
|
|
|
220
251
|
|
|
221
252
|
### Debugging Diagnostics (`--debug`)
|
|
222
253
|
|
|
254
|
+
Noise policy: default output shows errors + warnings; `--debug` enables diagnostics; `--verbose` enables per-item diagnostics; `--quiet-warnings` suppresses warnings.
|
|
255
|
+
|
|
223
256
|
`--debug` remains the diagnostics gate and now defaults to `compact` event volume:
|
|
224
257
|
|
|
225
258
|
- lifecycle/stage timing events
|
|
@@ -252,7 +285,7 @@ word-counter --path ./examples/test-case-multi-files-support --debug --debug-rep
|
|
|
252
285
|
word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl --debug-tee
|
|
253
286
|
```
|
|
254
287
|
|
|
255
|
-
Skip details stay debug-gated and can
|
|
288
|
+
Skip details stay debug-gated and can be suppressed with `--quiet-skips`.
|
|
256
289
|
|
|
257
290
|
## How It Works
|
|
258
291
|
|
|
@@ -491,8 +524,9 @@ Example (trimmed):
|
|
|
491
524
|
"frontmatterType": "yaml",
|
|
492
525
|
"total": 7,
|
|
493
526
|
"items": [
|
|
494
|
-
{ "name": "content", "source": "frontmatter", "result": { "total":
|
|
495
|
-
{ "name": "content", "source": "
|
|
527
|
+
{ "name": "content", "source": "frontmatter", "result": { "total": 4 } },
|
|
528
|
+
{ "name": "content", "source": "frontmatter", "result": { "total": 2 } },
|
|
529
|
+
{ "name": "content", "source": "content", "result": { "total": 5 } }
|
|
496
530
|
]
|
|
497
531
|
}
|
|
498
532
|
```
|
|
@@ -585,7 +619,7 @@ word-counter --include-whitespace "Hi\tthere\n"
|
|
|
585
619
|
word-counter --misc "Hi\tthere\n"
|
|
586
620
|
```
|
|
587
621
|
|
|
588
|
-
In the CLI, `--include-whitespace` implies
|
|
622
|
+
In the CLI, `--include-whitespace` implies `--non-words` (same behavior as `--misc`). `--non-words` alone does not include whitespace. When enabled, whitespace counts appear under `nonWords.whitespace`, and `total = words + nonWords` (emoji + symbols + punctuation + whitespace). JSON output also includes top-level `counts` when `nonWords` is enabled. See `docs/schemas/whitespace-categories.md` for how whitespace is categorized.
|
|
589
623
|
|
|
590
624
|
Example JSON (trimmed):
|
|
591
625
|
|
package/dist/cjs/index.cjs
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
let yaml = require("yaml");
|
|
2
|
-
|
|
3
2
|
//#region src/wc/segmenter.ts
|
|
4
3
|
const segmenterCache = /* @__PURE__ */ new Map();
|
|
5
4
|
const graphemeSegmenterCache = /* @__PURE__ */ new Map();
|
|
@@ -33,13 +32,11 @@ function countCharsForLocale(text, locale) {
|
|
|
33
32
|
for (const _segment of segmenter.segment(text)) count++;
|
|
34
33
|
return count;
|
|
35
34
|
}
|
|
36
|
-
|
|
37
35
|
//#endregion
|
|
38
36
|
//#region src/utils/append-all.ts
|
|
39
37
|
function appendAll(target, source) {
|
|
40
38
|
for (const item of source) target.push(item);
|
|
41
39
|
}
|
|
42
|
-
|
|
43
40
|
//#endregion
|
|
44
41
|
//#region src/wc/non-words.ts
|
|
45
42
|
const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
|
|
@@ -153,7 +150,6 @@ function createWhitespaceCounts() {
|
|
|
153
150
|
other: 0
|
|
154
151
|
};
|
|
155
152
|
}
|
|
156
|
-
|
|
157
153
|
//#endregion
|
|
158
154
|
//#region src/wc/analyze.ts
|
|
159
155
|
function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
|
|
@@ -253,7 +249,6 @@ function aggregateByLocale(chunks) {
|
|
|
253
249
|
}
|
|
254
250
|
return order.map((locale) => map.get(locale));
|
|
255
251
|
}
|
|
256
|
-
|
|
257
252
|
//#endregion
|
|
258
253
|
//#region src/wc/mode.ts
|
|
259
254
|
const MODE_ALIASES = {
|
|
@@ -321,10 +316,7 @@ function normalizeMode(input) {
|
|
|
321
316
|
function resolveMode(input, fallback = "chunk") {
|
|
322
317
|
return normalizeMode(input) ?? fallback;
|
|
323
318
|
}
|
|
324
|
-
|
|
325
|
-
//#endregion
|
|
326
|
-
//#region src/wc/latin-hints.ts
|
|
327
|
-
const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
319
|
+
const DEFAULT_LATIN_HINT_RULES = Object.freeze([
|
|
328
320
|
{
|
|
329
321
|
tag: "de",
|
|
330
322
|
pattern: "[äöüÄÖÜß]"
|
|
@@ -361,13 +353,10 @@ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
|
|
|
361
353
|
tag: "is",
|
|
362
354
|
pattern: "[ðÐþÞ]"
|
|
363
355
|
}
|
|
364
|
-
];
|
|
365
|
-
const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
|
|
366
|
-
|
|
356
|
+
].map((rule) => Object.freeze({ ...rule })));
|
|
367
357
|
//#endregion
|
|
368
358
|
//#region src/wc/locale-detect.ts
|
|
369
359
|
const DEFAULT_LOCALE = "und-Latn";
|
|
370
|
-
const DEFAULT_HAN_TAG = "und-Hani";
|
|
371
360
|
const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
|
|
372
361
|
const regex = {
|
|
373
362
|
hiragana: /\p{Script=Hiragana}/u,
|
|
@@ -483,18 +472,17 @@ function detectLocaleForChar(char, previousLocale, options = {}, context = resol
|
|
|
483
472
|
if (regex.thai.test(char)) return "th";
|
|
484
473
|
if (regex.han.test(char)) {
|
|
485
474
|
if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
|
|
486
|
-
return context.hanHint ??
|
|
475
|
+
return context.hanHint ?? "und-Hani";
|
|
487
476
|
}
|
|
488
477
|
if (regex.latin.test(char)) {
|
|
489
478
|
const hintedLocale = detectLatinLocale(char, context);
|
|
490
|
-
if (hintedLocale !==
|
|
491
|
-
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !==
|
|
479
|
+
if (hintedLocale !== "und-Latn") return hintedLocale;
|
|
480
|
+
if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== "und-Latn") return previousLocale;
|
|
492
481
|
if (context.latinHint) return context.latinHint;
|
|
493
482
|
return DEFAULT_LOCALE;
|
|
494
483
|
}
|
|
495
484
|
return null;
|
|
496
485
|
}
|
|
497
|
-
|
|
498
486
|
//#endregion
|
|
499
487
|
//#region src/wc/segment.ts
|
|
500
488
|
const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
|
|
@@ -531,7 +519,7 @@ function segmentTextByLocale(text, options = {}) {
|
|
|
531
519
|
continue;
|
|
532
520
|
}
|
|
533
521
|
if (targetLocale !== currentLocale && detected !== null) {
|
|
534
|
-
if (currentLocale ===
|
|
522
|
+
if (currentLocale === "und-Latn" && isLatinLocale(targetLocale, context)) {
|
|
535
523
|
const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
|
|
536
524
|
if (promotionBreakIndex === -1) {
|
|
537
525
|
currentLocale = targetLocale;
|
|
@@ -598,7 +586,6 @@ function mergeAdjacentChunks(chunks) {
|
|
|
598
586
|
merged.push(last);
|
|
599
587
|
return merged;
|
|
600
588
|
}
|
|
601
|
-
|
|
602
589
|
//#endregion
|
|
603
590
|
//#region src/wc/wc.ts
|
|
604
591
|
function wordCounter(text, options = {}) {
|
|
@@ -713,11 +700,9 @@ function collectNonWordsAggregate(analyzed, enabled) {
|
|
|
713
700
|
}
|
|
714
701
|
return collection;
|
|
715
702
|
}
|
|
716
|
-
|
|
717
703
|
//#endregion
|
|
718
704
|
//#region src/wc/index.ts
|
|
719
705
|
var wc_default = wordCounter;
|
|
720
|
-
|
|
721
706
|
//#endregion
|
|
722
707
|
//#region src/markdown/toml/arrays.ts
|
|
723
708
|
function ensureArrayContainer(result, key) {
|
|
@@ -733,7 +718,6 @@ function flattenArrayTables(result) {
|
|
|
733
718
|
result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
|
|
734
719
|
}
|
|
735
720
|
}
|
|
736
|
-
|
|
737
721
|
//#endregion
|
|
738
722
|
//#region src/markdown/toml/keys.ts
|
|
739
723
|
function stripKeyQuotes(key) {
|
|
@@ -752,7 +736,6 @@ function normalizeKeyPath(key) {
|
|
|
752
736
|
if (segments.some((segment) => !segment)) return null;
|
|
753
737
|
return segments.join(".");
|
|
754
738
|
}
|
|
755
|
-
|
|
756
739
|
//#endregion
|
|
757
740
|
//#region src/markdown/toml/strings.ts
|
|
758
741
|
function stripInlineComment(line) {
|
|
@@ -801,7 +784,6 @@ function parseStringLiteral(value) {
|
|
|
801
784
|
if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
|
|
802
785
|
return null;
|
|
803
786
|
}
|
|
804
|
-
|
|
805
787
|
//#endregion
|
|
806
788
|
//#region src/markdown/toml/values.ts
|
|
807
789
|
function parsePrimitive(raw) {
|
|
@@ -959,7 +941,6 @@ function toPlainText(value) {
|
|
|
959
941
|
if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
|
|
960
942
|
return String(value);
|
|
961
943
|
}
|
|
962
|
-
|
|
963
944
|
//#endregion
|
|
964
945
|
//#region src/markdown/toml/parse-frontmatter.ts
|
|
965
946
|
function parseTomlFrontmatter(frontmatter) {
|
|
@@ -1043,7 +1024,6 @@ function parseTomlFrontmatter(frontmatter) {
|
|
|
1043
1024
|
flattenArrayTables(result);
|
|
1044
1025
|
return result;
|
|
1045
1026
|
}
|
|
1046
|
-
|
|
1047
1027
|
//#endregion
|
|
1048
1028
|
//#region src/markdown/parse-markdown.ts
|
|
1049
1029
|
const FENCE_TO_TYPE = {
|
|
@@ -1178,7 +1158,6 @@ function parseMarkdown(input) {
|
|
|
1178
1158
|
frontmatterType: openingType
|
|
1179
1159
|
};
|
|
1180
1160
|
}
|
|
1181
|
-
|
|
1182
1161
|
//#endregion
|
|
1183
1162
|
//#region src/markdown/section-count.ts
|
|
1184
1163
|
function normalizeText(value) {
|
|
@@ -1243,13 +1222,11 @@ function countSections(input, section, options = {}) {
|
|
|
1243
1222
|
items
|
|
1244
1223
|
};
|
|
1245
1224
|
}
|
|
1246
|
-
|
|
1247
1225
|
//#endregion
|
|
1248
1226
|
//#region src/utils/show-singular-or-plural-word.ts
|
|
1249
1227
|
function showSingularOrPluralWord(count, word) {
|
|
1250
1228
|
return `${count} ${word}${count === 1 ? "" : "s"}`;
|
|
1251
1229
|
}
|
|
1252
|
-
|
|
1253
1230
|
//#endregion
|
|
1254
1231
|
//#region src/index.cjs.ts
|
|
1255
1232
|
const cjsExports = Object.assign(wc_default, {
|
|
@@ -1263,6 +1240,6 @@ const cjsExports = Object.assign(wc_default, {
|
|
|
1263
1240
|
showSingularOrPluralWord
|
|
1264
1241
|
});
|
|
1265
1242
|
module.exports = cjsExports;
|
|
1266
|
-
|
|
1267
1243
|
//#endregion
|
|
1244
|
+
|
|
1268
1245
|
//# sourceMappingURL=index.cjs.map
|