@dev-pi2pie/word-counter 0.1.4-canary.1 → 0.1.5-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -151,8 +151,10 @@ word-counter --path ./examples/test-case-multi-files-support --jobs 4
151
151
  Quick policy:
152
152
 
153
153
  - no `--jobs` and `--jobs 1` are equivalent baseline behavior.
154
- - `--jobs > 1` enables concurrent `load+count`.
154
+ - `--jobs 1`: async main-thread `load+count` baseline.
155
+ - `--jobs > 1`: worker `load+count` with async fallback when workers are unavailable.
155
156
  - if requested `--jobs` exceeds host `suggestedMaxJobs` (from `--print-jobs-limit`), the CLI warns and runs with the suggested limit as a safety cap.
157
+ - use `--quiet-warnings` to suppress non-fatal warning lines (for example jobs-limit advisory and worker-fallback warning).
156
158
 
157
159
  Inspect host jobs diagnostics:
158
160
 
@@ -160,6 +162,35 @@ Inspect host jobs diagnostics:
160
162
  word-counter --print-jobs-limit
161
163
  ```
162
164
 
165
+ `--print-jobs-limit` must be used alone (no other inputs or runtime flags).
166
+
167
+ ### Doctor (`doctor`)
168
+
169
+ Use `doctor` to verify whether the current host can run `word-counter` reliably:
170
+
171
+ ```bash
172
+ word-counter doctor
173
+ word-counter doctor --format json
174
+ word-counter doctor --format json --pretty
175
+ ```
176
+
177
+ Doctor scope in v1:
178
+
179
+ - checks runtime support policy against Node.js `>=20`
180
+ - verifies `Intl.Segmenter` availability plus word/grapheme constructor health
181
+ - reports batch jobs host limits using the same heuristics as `--print-jobs-limit`
182
+ - reports worker-route preflight signals and the worker-disable env toggle that affects worker availability
183
+
184
+ Doctor output contract:
185
+
186
+ - default output is human-readable text
187
+ - `--format json` prints compact machine-readable JSON
188
+ - `--format json --pretty` prints indented JSON
189
+ - doctor exits with code `0` for `ok` / `warn`, `1` for invalid doctor usage, and `2` for runtime `fail`
190
+ - doctor does not accept counting inputs, `--path`, `--jobs`, or other counting/debug flags
191
+
192
+ For a field-by-field explanation of doctor text and JSON output, see [`docs/doctor-usage-guide.md`](docs/doctor-usage-guide.md).
193
+
163
194
  For full policy details, JSON parity expectations (`--misc`, `--total-of whitespace,words`), and benchmark standards, see [`docs/batch-jobs-usage-guide.md`](docs/batch-jobs-usage-guide.md).
164
195
 
165
196
  ### Stable Path Resolution Contract
@@ -220,6 +251,8 @@ For additional usage details and troubleshooting, see [`docs/regex-usage-guide.m
220
251
 
221
252
  ### Debugging Diagnostics (`--debug`)
222
253
 
254
+ Noise policy: default output shows errors + warnings; `--debug` enables diagnostics; `--verbose` enables per-item diagnostics; `--quiet-warnings` suppresses warnings.
255
+
223
256
  `--debug` remains the diagnostics gate and now defaults to `compact` event volume:
224
257
 
225
258
  - lifecycle/stage timing events
@@ -252,7 +285,7 @@ word-counter --path ./examples/test-case-multi-files-support --debug --debug-rep
252
285
  word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl --debug-tee
253
286
  ```
254
287
 
255
- Skip details stay debug-gated and can still be suppressed with `--quiet-skips`.
288
+ Skip details stay debug-gated and can be suppressed with `--quiet-skips`.
256
289
 
257
290
  ## How It Works
258
291
 
@@ -491,8 +524,9 @@ Example (trimmed):
491
524
  "frontmatterType": "yaml",
492
525
  "total": 7,
493
526
  "items": [
494
- { "name": "content", "source": "frontmatter", "result": { "total": 3 } },
495
- { "name": "content", "source": "content", "result": { "total": 4 } }
527
+ { "name": "content", "source": "frontmatter", "result": { "total": 4 } },
528
+ { "name": "content", "source": "frontmatter", "result": { "total": 2 } },
529
+ { "name": "content", "source": "content", "result": { "total": 5 } }
496
530
  ]
497
531
  }
498
532
  ```
@@ -585,7 +619,7 @@ word-counter --include-whitespace "Hi\tthere\n"
585
619
  word-counter --misc "Hi\tthere\n"
586
620
  ```
587
621
 
588
- In the CLI, `--include-whitespace` implies with `--non-words` (same behavior as `--misc`). `--non-words` alone does not include whitespace. When enabled, whitespace counts appear under `nonWords.whitespace`, and `total = words + nonWords` (emoji + symbols + punctuation + whitespace). JSON output also includes top-level `counts` when `nonWords` is enabled. See `docs/schemas/whitespace-categories.md` for how whitespace is categorized.
622
+ In the CLI, `--include-whitespace` implies `--non-words` (same behavior as `--misc`). `--non-words` alone does not include whitespace. When enabled, whitespace counts appear under `nonWords.whitespace`, and `total = words + nonWords` (emoji + symbols + punctuation + whitespace). JSON output also includes top-level `counts` when `nonWords` is enabled. See `docs/schemas/whitespace-categories.md` for how whitespace is categorized.
589
623
 
590
624
  Example JSON (trimmed):
591
625
 
@@ -1,5 +1,4 @@
1
1
  let yaml = require("yaml");
2
-
3
2
  //#region src/wc/segmenter.ts
4
3
  const segmenterCache = /* @__PURE__ */ new Map();
5
4
  const graphemeSegmenterCache = /* @__PURE__ */ new Map();
@@ -33,13 +32,11 @@ function countCharsForLocale(text, locale) {
33
32
  for (const _segment of segmenter.segment(text)) count++;
34
33
  return count;
35
34
  }
36
-
37
35
  //#endregion
38
36
  //#region src/utils/append-all.ts
39
37
  function appendAll(target, source) {
40
38
  for (const item of source) target.push(item);
41
39
  }
42
-
43
40
  //#endregion
44
41
  //#region src/wc/non-words.ts
45
42
  const emojiRegex = /(?:\p{Extended_Pictographic}|\p{Emoji_Presentation})/u;
@@ -153,7 +150,6 @@ function createWhitespaceCounts() {
153
150
  other: 0
154
151
  };
155
152
  }
156
-
157
153
  //#endregion
158
154
  //#region src/wc/analyze.ts
159
155
  function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
@@ -253,7 +249,6 @@ function aggregateByLocale(chunks) {
253
249
  }
254
250
  return order.map((locale) => map.get(locale));
255
251
  }
256
-
257
252
  //#endregion
258
253
  //#region src/wc/mode.ts
259
254
  const MODE_ALIASES = {
@@ -321,10 +316,7 @@ function normalizeMode(input) {
321
316
  function resolveMode(input, fallback = "chunk") {
322
317
  return normalizeMode(input) ?? fallback;
323
318
  }
324
-
325
- //#endregion
326
- //#region src/wc/latin-hints.ts
327
- const DEFAULT_LATIN_HINT_RULES_SOURCE = [
319
+ const DEFAULT_LATIN_HINT_RULES = Object.freeze([
328
320
  {
329
321
  tag: "de",
330
322
  pattern: "[äöüÄÖÜß]"
@@ -361,13 +353,10 @@ const DEFAULT_LATIN_HINT_RULES_SOURCE = [
361
353
  tag: "is",
362
354
  pattern: "[ðÐþÞ]"
363
355
  }
364
- ];
365
- const DEFAULT_LATIN_HINT_RULES = Object.freeze(DEFAULT_LATIN_HINT_RULES_SOURCE.map((rule) => Object.freeze({ ...rule })));
366
-
356
+ ].map((rule) => Object.freeze({ ...rule })));
367
357
  //#endregion
368
358
  //#region src/wc/locale-detect.ts
369
359
  const DEFAULT_LOCALE = "und-Latn";
370
- const DEFAULT_HAN_TAG = "und-Hani";
371
360
  const MAX_LATIN_HINT_PATTERN_LENGTH = 256;
372
361
  const regex = {
373
362
  hiragana: /\p{Script=Hiragana}/u,
@@ -483,18 +472,17 @@ function detectLocaleForChar(char, previousLocale, options = {}, context = resol
483
472
  if (regex.thai.test(char)) return "th";
484
473
  if (regex.han.test(char)) {
485
474
  if (allowJapaneseHanCarry && previousLocale && previousLocale.startsWith("ja")) return previousLocale;
486
- return context.hanHint ?? DEFAULT_HAN_TAG;
475
+ return context.hanHint ?? "und-Hani";
487
476
  }
488
477
  if (regex.latin.test(char)) {
489
478
  const hintedLocale = detectLatinLocale(char, context);
490
- if (hintedLocale !== DEFAULT_LOCALE) return hintedLocale;
491
- if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== DEFAULT_LOCALE) return previousLocale;
479
+ if (hintedLocale !== "und-Latn") return hintedLocale;
480
+ if (allowLatinLocaleCarry && previousLocale && isLatinLocale(previousLocale, context) && previousLocale !== "und-Latn") return previousLocale;
492
481
  if (context.latinHint) return context.latinHint;
493
482
  return DEFAULT_LOCALE;
494
483
  }
495
484
  return null;
496
485
  }
497
-
498
486
  //#endregion
499
487
  //#region src/wc/segment.ts
500
488
  const HARD_BOUNDARY_REGEX = /[\r\n,.!?;:,、。!?;:.。、]/u;
@@ -531,7 +519,7 @@ function segmentTextByLocale(text, options = {}) {
531
519
  continue;
532
520
  }
533
521
  if (targetLocale !== currentLocale && detected !== null) {
534
- if (currentLocale === DEFAULT_LOCALE && isLatinLocale(targetLocale, context)) {
522
+ if (currentLocale === "und-Latn" && isLatinLocale(targetLocale, context)) {
535
523
  const promotionBreakIndex = findLastLatinPromotionBreakIndex(buffer);
536
524
  if (promotionBreakIndex === -1) {
537
525
  currentLocale = targetLocale;
@@ -598,7 +586,6 @@ function mergeAdjacentChunks(chunks) {
598
586
  merged.push(last);
599
587
  return merged;
600
588
  }
601
-
602
589
  //#endregion
603
590
  //#region src/wc/wc.ts
604
591
  function wordCounter(text, options = {}) {
@@ -713,11 +700,9 @@ function collectNonWordsAggregate(analyzed, enabled) {
713
700
  }
714
701
  return collection;
715
702
  }
716
-
717
703
  //#endregion
718
704
  //#region src/wc/index.ts
719
705
  var wc_default = wordCounter;
720
-
721
706
  //#endregion
722
707
  //#region src/markdown/toml/arrays.ts
723
708
  function ensureArrayContainer(result, key) {
@@ -733,7 +718,6 @@ function flattenArrayTables(result) {
733
718
  result[key] = value.map((entry) => Object.entries(entry).map(([entryKey, entryValue]) => `${entryKey}=${entryValue}`).join(", ")).join(" | ");
734
719
  }
735
720
  }
736
-
737
721
  //#endregion
738
722
  //#region src/markdown/toml/keys.ts
739
723
  function stripKeyQuotes(key) {
@@ -752,7 +736,6 @@ function normalizeKeyPath(key) {
752
736
  if (segments.some((segment) => !segment)) return null;
753
737
  return segments.join(".");
754
738
  }
755
-
756
739
  //#endregion
757
740
  //#region src/markdown/toml/strings.ts
758
741
  function stripInlineComment(line) {
@@ -801,7 +784,6 @@ function parseStringLiteral(value) {
801
784
  if (value.startsWith("'") && value.endsWith("'")) return value.slice(1, -1);
802
785
  return null;
803
786
  }
804
-
805
787
  //#endregion
806
788
  //#region src/markdown/toml/values.ts
807
789
  function parsePrimitive(raw) {
@@ -959,7 +941,6 @@ function toPlainText(value) {
959
941
  if (Array.isArray(value)) return value.map((item) => String(item)).join(", ");
960
942
  return String(value);
961
943
  }
962
-
963
944
  //#endregion
964
945
  //#region src/markdown/toml/parse-frontmatter.ts
965
946
  function parseTomlFrontmatter(frontmatter) {
@@ -1043,7 +1024,6 @@ function parseTomlFrontmatter(frontmatter) {
1043
1024
  flattenArrayTables(result);
1044
1025
  return result;
1045
1026
  }
1046
-
1047
1027
  //#endregion
1048
1028
  //#region src/markdown/parse-markdown.ts
1049
1029
  const FENCE_TO_TYPE = {
@@ -1178,7 +1158,6 @@ function parseMarkdown(input) {
1178
1158
  frontmatterType: openingType
1179
1159
  };
1180
1160
  }
1181
-
1182
1161
  //#endregion
1183
1162
  //#region src/markdown/section-count.ts
1184
1163
  function normalizeText(value) {
@@ -1243,13 +1222,11 @@ function countSections(input, section, options = {}) {
1243
1222
  items
1244
1223
  };
1245
1224
  }
1246
-
1247
1225
  //#endregion
1248
1226
  //#region src/utils/show-singular-or-plural-word.ts
1249
1227
  function showSingularOrPluralWord(count, word) {
1250
1228
  return `${count} ${word}${count === 1 ? "" : "s"}`;
1251
1229
  }
1252
-
1253
1230
  //#endregion
1254
1231
  //#region src/index.cjs.ts
1255
1232
  const cjsExports = Object.assign(wc_default, {
@@ -1263,6 +1240,6 @@ const cjsExports = Object.assign(wc_default, {
1263
1240
  showSingularOrPluralWord
1264
1241
  });
1265
1242
  module.exports = cjsExports;
1266
-
1267
1243
  //#endregion
1244
+
1268
1245
  //# sourceMappingURL=index.cjs.map