@dev-pi2pie/word-counter 0.1.6-canary.1 → 0.1.7-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -111,6 +111,7 @@ Inspect detector behavior without count output:
111
111
  ```bash
112
112
  word-counter inspect "こんにちは、世界!これはテストです。"
113
113
  word-counter inspect --detector wasm --view engine "This sentence should clearly be detected as English for the wasm detector path."
114
+ word-counter inspect --detector wasm --view engine --content-gate strict "Readers understand this behavior."
114
115
  word-counter inspect --detector regex -f json "こんにちは、世界!これはテストです。"
115
116
  word-counter inspect --detector regex -f json --pretty "こんにちは、世界!これはテストです。"
116
117
  word-counter inspect --detector wasm --content-gate off "mode: debug\ntee: true\npath: logs\nUse this for testing."
@@ -144,6 +145,11 @@ Detector mode notes:
144
145
  - Technical-noise-heavy Latin windows stay conservative and may remain `und-Latn` even when the detector produces a wrong-but-confident language guess.
145
146
  - inspect/debug disclosure uses `contentGate` as the canonical gate field.
146
147
  - legacy debug/evidence payloads still emit `qualityGate` as a compatibility alias derived from `contentGate.passed`.
148
+ - `inspect --view engine` stays raw:
149
+ - it shows the detector sample plus raw/normalized/remapped Whatlang output
150
+ - it does not apply `eligibility` or `contentGate` policy decisions
151
+ - if engine view uses an explicit or effective non-default content-gate mode, the CLI emits a cyan info note and points to `--view pipeline`
152
+ - `inspect --view pipeline` is the inspect surface for `eligibility`, `contentGate`, acceptance, and fallback reasoning.
147
153
  - for practical verification, use `inspect` to compare direct mode outcomes across `default`, `strict`, `loose`, and `off`; use `--debug --detector-evidence` when you specifically need counting-flow event details or legacy `qualityGate` compatibility
148
154
  - `word-counter inspect` supports:
149
155
  - positional text input
@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
966
966
  }
967
967
  async function loadWhatlangWasmModule() {
968
968
  if (!modulePromise) modulePromise = (async () => {
969
- return requireFromHere(resolveWhatlangWasmModulePath());
969
+ const modulePath = resolveWhatlangWasmModulePath();
970
+ return requireFromHere(modulePath);
970
971
  })();
971
972
  return modulePromise;
972
973
  }
@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
45
45
  const symbolRegex = /\p{S}/u;
46
46
  const punctuationRegex = /\p{P}/u;
47
47
  const whitespaceRegex = /\s/u;
48
- const newlineChars = new Set([
48
+ const newlineChars = /* @__PURE__ */ new Set([
49
49
  "\n",
50
50
  "\r",
51
51
  "\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
156
156
  const segmenter = getSegmenter(chunk.locale);
157
157
  const segments = [];
158
158
  const nonWords = collectNonWords ? createNonWordCollection() : null;
159
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
160
- else if (collectNonWords && nonWords) {
161
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
159
+ for (const part of segmenter.segment(chunk.text)) {
162
160
  const category = classifyNonWordSegment(part.segment);
163
- if (category) addNonWord(nonWords, category, part.segment);
161
+ if (category) {
162
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
163
+ continue;
164
+ }
165
+ if (part.isWordLike) segments.push(part.segment);
166
+ else if (collectNonWords && nonWords) {
167
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
168
+ }
164
169
  }
165
170
  return {
166
171
  locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
177
182
  let wordChars = 0;
178
183
  let nonWordChars = 0;
179
184
  for (const part of segmenter.segment(chunk.text)) {
185
+ const category = classifyNonWordSegment(part.segment);
186
+ if (category) {
187
+ if (collectNonWords && nonWords) {
188
+ addNonWord(nonWords, category, part.segment);
189
+ const count = countCharsForLocale(part.segment, chunk.locale);
190
+ chars += count;
191
+ nonWordChars += count;
192
+ }
193
+ continue;
194
+ }
180
195
  if (part.isWordLike) {
181
196
  const count = countCharsForLocale(part.segment, chunk.locale);
182
197
  chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
186
201
  if (collectNonWords && nonWords) {
187
202
  let whitespaceCount = 0;
188
203
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
189
- const category = classifyNonWordSegment(part.segment);
190
- if (category) addNonWord(nonWords, category, part.segment);
191
- if (category || whitespaceCount > 0) {
204
+ if (whitespaceCount > 0) {
192
205
  const count = countCharsForLocale(part.segment, chunk.locale);
193
206
  chars += count;
194
207
  nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
266
279
  characters: "char",
267
280
  "char-collector": "char-collector"
268
281
  };
269
- const CHAR_MODE_ALIASES = new Set([
282
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
270
283
  "char",
271
284
  "chars",
272
285
  "character",
273
286
  "characters"
274
287
  ]);
275
- const COLLECTOR_MODE_ALIASES = new Set([
288
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
276
289
  "collector",
277
290
  "collect",
278
291
  "colle",
@@ -370,7 +383,7 @@ const regex = {
370
383
  devanagari: /\p{Script=Devanagari}/u,
371
384
  thai: /\p{Script=Thai}/u
372
385
  };
373
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
386
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
374
387
  function isLatinLocale(locale, context) {
375
388
  if (context) return context.latinLocales.has(locale);
376
389
  return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
447
460
  function resolveLocaleDetectContext(options = {}) {
448
461
  const latinHint = resolveLatinHint(options);
449
462
  const latinHintRules = resolveLatinHintRules(options);
450
- const latinLocales = new Set([DEFAULT_LOCALE]);
463
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
451
464
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
452
465
  if (latinHint) latinLocales.add(latinHint);
453
466
  return {
package/dist/esm/bin.mjs CHANGED
@@ -15,7 +15,7 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
15
15
  var __getOwnPropNames = Object.getOwnPropertyNames;
16
16
  var __getProtoOf = Object.getPrototypeOf;
17
17
  var __hasOwnProp = Object.prototype.hasOwnProperty;
18
- var __commonJSMin = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
18
+ var __commonJSMin = (cb, mod) => () => (mod || (cb((mod = { exports: {} }).exports, mod), cb = null), mod.exports);
19
19
  var __copyProps = (to, from, except, desc) => {
20
20
  if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
21
21
  key = keys[i];
@@ -262,7 +262,8 @@ function collectTotalOfCounts(result) {
262
262
  return counts;
263
263
  }
264
264
  function parseTotalOfToken(token) {
265
- const canonical = TOTAL_OF_PART_ALIASES[token.trim().toLowerCase()];
265
+ const normalized = token.trim().toLowerCase();
266
+ const canonical = TOTAL_OF_PART_ALIASES[normalized];
266
267
  if (canonical) return canonical;
267
268
  throw new Error(`Invalid --total-of part: ${token}. Allowed: ${TOTAL_OF_PARTS.join(", ")}.`);
268
269
  }
@@ -1271,7 +1272,7 @@ function meetsRequiredNodeVersion(version) {
1271
1272
  return version.patch >= REQUIRED_NODE_VERSION.patch;
1272
1273
  }
1273
1274
  function resolveRuntimeSummary(overrides = {}) {
1274
- const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.6-canary.1");
1275
+ const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.7-canary.1");
1275
1276
  const nodeVersion = overrides.nodeVersion ?? process.version;
1276
1277
  const parsedNodeVersion = parseNodeVersion(nodeVersion);
1277
1278
  return {
@@ -2027,7 +2028,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
2027
2028
  const symbolRegex = /\p{S}/u;
2028
2029
  const punctuationRegex = /\p{P}/u;
2029
2030
  const whitespaceRegex = /\s/u;
2030
- const newlineChars = new Set([
2031
+ const newlineChars = /* @__PURE__ */ new Set([
2031
2032
  "\n",
2032
2033
  "\r",
2033
2034
  "\u2028",
@@ -2138,11 +2139,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
2138
2139
  const segmenter = getSegmenter(chunk.locale);
2139
2140
  const segments = [];
2140
2141
  const nonWords = collectNonWords ? createNonWordCollection() : null;
2141
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
2142
- else if (collectNonWords && nonWords) {
2143
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
2142
+ for (const part of segmenter.segment(chunk.text)) {
2144
2143
  const category = classifyNonWordSegment(part.segment);
2145
- if (category) addNonWord(nonWords, category, part.segment);
2144
+ if (category) {
2145
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
2146
+ continue;
2147
+ }
2148
+ if (part.isWordLike) segments.push(part.segment);
2149
+ else if (collectNonWords && nonWords) {
2150
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
2151
+ }
2146
2152
  }
2147
2153
  return {
2148
2154
  locale: chunk.locale,
@@ -2159,6 +2165,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
2159
2165
  let wordChars = 0;
2160
2166
  let nonWordChars = 0;
2161
2167
  for (const part of segmenter.segment(chunk.text)) {
2168
+ const category = classifyNonWordSegment(part.segment);
2169
+ if (category) {
2170
+ if (collectNonWords && nonWords) {
2171
+ addNonWord(nonWords, category, part.segment);
2172
+ const count = countCharsForLocale(part.segment, chunk.locale);
2173
+ chars += count;
2174
+ nonWordChars += count;
2175
+ }
2176
+ continue;
2177
+ }
2162
2178
  if (part.isWordLike) {
2163
2179
  const count = countCharsForLocale(part.segment, chunk.locale);
2164
2180
  chars += count;
@@ -2168,9 +2184,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
2168
2184
  if (collectNonWords && nonWords) {
2169
2185
  let whitespaceCount = 0;
2170
2186
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
2171
- const category = classifyNonWordSegment(part.segment);
2172
- if (category) addNonWord(nonWords, category, part.segment);
2173
- if (category || whitespaceCount > 0) {
2187
+ if (whitespaceCount > 0) {
2174
2188
  const count = countCharsForLocale(part.segment, chunk.locale);
2175
2189
  chars += count;
2176
2190
  nonWordChars += count;
@@ -2248,13 +2262,13 @@ const MODE_ALIASES = {
2248
2262
  characters: "char",
2249
2263
  "char-collector": "char-collector"
2250
2264
  };
2251
- const CHAR_MODE_ALIASES = new Set([
2265
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
2252
2266
  "char",
2253
2267
  "chars",
2254
2268
  "character",
2255
2269
  "characters"
2256
2270
  ]);
2257
- const COLLECTOR_MODE_ALIASES = new Set([
2271
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
2258
2272
  "collector",
2259
2273
  "collect",
2260
2274
  "colle",
@@ -2352,7 +2366,7 @@ const regex = {
2352
2366
  devanagari: /\p{Script=Devanagari}/u,
2353
2367
  thai: /\p{Script=Thai}/u
2354
2368
  };
2355
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
2369
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
2356
2370
  function isLatinLocale(locale, context) {
2357
2371
  if (context) return context.latinLocales.has(locale);
2358
2372
  return defaultLatinLocales.has(locale);
@@ -2429,7 +2443,7 @@ function resolveLatinHintRules$1(options) {
2429
2443
  function resolveLocaleDetectContext(options = {}) {
2430
2444
  const latinHint = resolveLatinHint(options);
2431
2445
  const latinHintRules = resolveLatinHintRules$1(options);
2432
- const latinLocales = new Set([DEFAULT_LOCALE]);
2446
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
2433
2447
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
2434
2448
  if (latinHint) latinLocales.add(latinHint);
2435
2449
  return {
@@ -3766,7 +3780,8 @@ function resolveWhatlangWasmModulePath() {
3766
3780
  }
3767
3781
  async function loadWhatlangWasmModule() {
3768
3782
  if (!modulePromise) modulePromise = (async () => {
3769
- return requireFromHere(resolveWhatlangWasmModulePath());
3783
+ const modulePath = resolveWhatlangWasmModulePath();
3784
+ return requireFromHere(modulePath);
3770
3785
  })();
3771
3786
  return modulePromise;
3772
3787
  }
@@ -4968,7 +4983,7 @@ const INSPECT_HELP_LINES = [
4968
4983
  "",
4969
4984
  "Options:",
4970
4985
  " -d, --detector <mode> inspect detector mode (wasm, regex) (default: regex)",
4971
- " --content-gate <mode> content gate mode (default, strict, loose, off) (default: default)",
4986
+ " --content-gate <mode> content gate mode for pipeline policy inspection (default, strict, loose, off) (default: default)",
4972
4987
  " --view <view> inspect view (pipeline, engine) (default: pipeline)",
4973
4988
  " -f, --format <format> inspect output format (standard, json) (default: standard)",
4974
4989
  " --pretty pretty print inspect JSON output",
@@ -5223,6 +5238,13 @@ function emitConfigNotes$1(notes) {
5223
5238
  console.error(import_picocolors.default.yellow(warningLine));
5224
5239
  }
5225
5240
  }
5241
+ function shouldEmitEngineContentGateInfo(validated) {
5242
+ if (validated.view !== "engine" || validated.detector !== "wasm") return false;
5243
+ return validated.sources.contentGate || validated.contentGateMode !== "default";
5244
+ }
5245
+ function emitEngineContentGateInfo() {
5246
+ console.error(import_picocolors.default.cyan("Info: `--content-gate` does not affect `inspect --view engine`; engine view shows raw detector output. Use `--view pipeline` to inspect eligibility and content-gate restrictions."));
5247
+ }
5226
5248
  async function executeInspectCommand({ argv, runtime }) {
5227
5249
  const parsed = validateInspectInvocation(argv);
5228
5250
  if (!parsed.ok) {
@@ -5254,6 +5276,7 @@ async function executeInspectCommand({ argv, runtime }) {
5254
5276
  process.exitCode = 1;
5255
5277
  return;
5256
5278
  }
5279
+ if (shouldEmitEngineContentGateInfo(validated)) emitEngineContentGateInfo();
5257
5280
  try {
5258
5281
  if (validated.paths.length === 0) {
5259
5282
  const input = await loadSingleInspectInput(void 0, validated.textTokens, validated.section);
@@ -5362,7 +5385,7 @@ function normalizeVersion(value) {
5362
5385
  return trimmed;
5363
5386
  }
5364
5387
  function resolvePackageVersion(options = {}) {
5365
- const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.6-canary.1");
5388
+ const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.7-canary.1");
5366
5389
  if (embeddedVersion) return embeddedVersion;
5367
5390
  const maxLevels = options.maxLevels ?? 8;
5368
5391
  const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
@@ -5560,7 +5583,7 @@ function aggregateSectionedResults(results, preserveCollectorSegments) {
5560
5583
  existing.items.push(item.result);
5561
5584
  }
5562
5585
  }
5563
- const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
5586
+ const sourceOrder = /* @__PURE__ */ new Map([["frontmatter", 0], ["content", 1]]);
5564
5587
  const items = [...grouped.values()].sort((left, right) => {
5565
5588
  const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
5566
5589
  if (sourceDiff !== 0) return sourceDiff;
@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
966
966
  }
967
967
  async function loadWhatlangWasmModule() {
968
968
  if (!modulePromise) modulePromise = (async () => {
969
- return requireFromHere(resolveWhatlangWasmModulePath());
969
+ const modulePath = resolveWhatlangWasmModulePath();
970
+ return requireFromHere(modulePath);
970
971
  })();
971
972
  return modulePromise;
972
973
  }
@@ -1,2 +1,2 @@
1
1
  import { _ as NonWordCollection, a as SectionMode, b as WordCounterOptions, c as appendAll, d as countCharsForLocale, f as countWordsForLocale, h as LatinHintRule, i as ParsedMarkdown, l as wordCounter, n as parseMarkdown, o as SectionedResult, p as segmentTextByLocale, r as FrontmatterType, s as showSingularOrPluralWord, t as countSections, u as DEFAULT_LATIN_HINT_RULES, v as WordCounterBreakdown, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
2
- export { DEFAULT_LATIN_HINT_RULES, FrontmatterType, LatinHintRule, NonWordCollection, ParsedMarkdown, SectionMode, SectionedResult, WordCounterBreakdown, WordCounterMode, WordCounterOptions, WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };
2
+ export { DEFAULT_LATIN_HINT_RULES, type FrontmatterType, type LatinHintRule, type NonWordCollection, type ParsedMarkdown, type SectionMode, type SectionedResult, type WordCounterBreakdown, type WordCounterMode, type WordCounterOptions, type WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };
@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
45
45
  const symbolRegex = /\p{S}/u;
46
46
  const punctuationRegex = /\p{P}/u;
47
47
  const whitespaceRegex = /\s/u;
48
- const newlineChars = new Set([
48
+ const newlineChars = /* @__PURE__ */ new Set([
49
49
  "\n",
50
50
  "\r",
51
51
  "\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
156
156
  const segmenter = getSegmenter(chunk.locale);
157
157
  const segments = [];
158
158
  const nonWords = collectNonWords ? createNonWordCollection() : null;
159
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
160
- else if (collectNonWords && nonWords) {
161
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
159
+ for (const part of segmenter.segment(chunk.text)) {
162
160
  const category = classifyNonWordSegment(part.segment);
163
- if (category) addNonWord(nonWords, category, part.segment);
161
+ if (category) {
162
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
163
+ continue;
164
+ }
165
+ if (part.isWordLike) segments.push(part.segment);
166
+ else if (collectNonWords && nonWords) {
167
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
168
+ }
164
169
  }
165
170
  return {
166
171
  locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
177
182
  let wordChars = 0;
178
183
  let nonWordChars = 0;
179
184
  for (const part of segmenter.segment(chunk.text)) {
185
+ const category = classifyNonWordSegment(part.segment);
186
+ if (category) {
187
+ if (collectNonWords && nonWords) {
188
+ addNonWord(nonWords, category, part.segment);
189
+ const count = countCharsForLocale(part.segment, chunk.locale);
190
+ chars += count;
191
+ nonWordChars += count;
192
+ }
193
+ continue;
194
+ }
180
195
  if (part.isWordLike) {
181
196
  const count = countCharsForLocale(part.segment, chunk.locale);
182
197
  chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
186
201
  if (collectNonWords && nonWords) {
187
202
  let whitespaceCount = 0;
188
203
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
189
- const category = classifyNonWordSegment(part.segment);
190
- if (category) addNonWord(nonWords, category, part.segment);
191
- if (category || whitespaceCount > 0) {
204
+ if (whitespaceCount > 0) {
192
205
  const count = countCharsForLocale(part.segment, chunk.locale);
193
206
  chars += count;
194
207
  nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
266
279
  characters: "char",
267
280
  "char-collector": "char-collector"
268
281
  };
269
- const CHAR_MODE_ALIASES = new Set([
282
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
270
283
  "char",
271
284
  "chars",
272
285
  "character",
273
286
  "characters"
274
287
  ]);
275
- const COLLECTOR_MODE_ALIASES = new Set([
288
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
276
289
  "collector",
277
290
  "collect",
278
291
  "colle",
@@ -370,7 +383,7 @@ const regex = {
370
383
  devanagari: /\p{Script=Devanagari}/u,
371
384
  thai: /\p{Script=Thai}/u
372
385
  };
373
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
386
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
374
387
  function isLatinLocale(locale, context) {
375
388
  if (context) return context.latinLocales.has(locale);
376
389
  return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
447
460
  function resolveLocaleDetectContext(options = {}) {
448
461
  const latinHint = resolveLatinHint(options);
449
462
  const latinHintRules = resolveLatinHintRules(options);
450
- const latinLocales = new Set([DEFAULT_LOCALE]);
463
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
451
464
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
452
465
  if (latinHint) latinLocales.add(latinHint);
453
466
  return {
@@ -500,7 +500,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
500
500
  const symbolRegex = /\p{S}/u;
501
501
  const punctuationRegex = /\p{P}/u;
502
502
  const whitespaceRegex = /\s/u;
503
- const newlineChars = new Set([
503
+ const newlineChars = /* @__PURE__ */ new Set([
504
504
  "\n",
505
505
  "\r",
506
506
  "\u2028",
@@ -611,11 +611,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
611
611
  const segmenter = getSegmenter(chunk.locale);
612
612
  const segments = [];
613
613
  const nonWords = collectNonWords ? createNonWordCollection() : null;
614
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
615
- else if (collectNonWords && nonWords) {
616
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
614
+ for (const part of segmenter.segment(chunk.text)) {
617
615
  const category = classifyNonWordSegment(part.segment);
618
- if (category) addNonWord(nonWords, category, part.segment);
616
+ if (category) {
617
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
618
+ continue;
619
+ }
620
+ if (part.isWordLike) segments.push(part.segment);
621
+ else if (collectNonWords && nonWords) {
622
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
623
+ }
619
624
  }
620
625
  return {
621
626
  locale: chunk.locale,
@@ -632,6 +637,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
632
637
  let wordChars = 0;
633
638
  let nonWordChars = 0;
634
639
  for (const part of segmenter.segment(chunk.text)) {
640
+ const category = classifyNonWordSegment(part.segment);
641
+ if (category) {
642
+ if (collectNonWords && nonWords) {
643
+ addNonWord(nonWords, category, part.segment);
644
+ const count = countCharsForLocale(part.segment, chunk.locale);
645
+ chars += count;
646
+ nonWordChars += count;
647
+ }
648
+ continue;
649
+ }
635
650
  if (part.isWordLike) {
636
651
  const count = countCharsForLocale(part.segment, chunk.locale);
637
652
  chars += count;
@@ -641,9 +656,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
641
656
  if (collectNonWords && nonWords) {
642
657
  let whitespaceCount = 0;
643
658
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
644
- const category = classifyNonWordSegment(part.segment);
645
- if (category) addNonWord(nonWords, category, part.segment);
646
- if (category || whitespaceCount > 0) {
659
+ if (whitespaceCount > 0) {
647
660
  const count = countCharsForLocale(part.segment, chunk.locale);
648
661
  chars += count;
649
662
  nonWordChars += count;
@@ -721,13 +734,13 @@ const MODE_ALIASES = {
721
734
  characters: "char",
722
735
  "char-collector": "char-collector"
723
736
  };
724
- const CHAR_MODE_ALIASES = new Set([
737
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
725
738
  "char",
726
739
  "chars",
727
740
  "character",
728
741
  "characters"
729
742
  ]);
730
- const COLLECTOR_MODE_ALIASES = new Set([
743
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
731
744
  "collector",
732
745
  "collect",
733
746
  "colle",
@@ -825,7 +838,7 @@ const regex = {
825
838
  devanagari: /\p{Script=Devanagari}/u,
826
839
  thai: /\p{Script=Thai}/u
827
840
  };
828
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
841
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
829
842
  function isLatinLocale(locale, context) {
830
843
  if (context) return context.latinLocales.has(locale);
831
844
  return defaultLatinLocales.has(locale);
@@ -902,7 +915,7 @@ function resolveLatinHintRules(options) {
902
915
  function resolveLocaleDetectContext(options = {}) {
903
916
  const latinHint = resolveLatinHint(options);
904
917
  const latinHintRules = resolveLatinHintRules(options);
905
- const latinLocales = new Set([DEFAULT_LOCALE]);
918
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
906
919
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
907
920
  if (latinHint) latinLocales.add(latinHint);
908
921
  return {
@@ -1921,7 +1934,8 @@ function resolveWhatlangWasmModulePath() {
1921
1934
  }
1922
1935
  async function loadWhatlangWasmModule() {
1923
1936
  if (!modulePromise) modulePromise = (async () => {
1924
- return requireFromHere(resolveWhatlangWasmModulePath());
1937
+ const modulePath = resolveWhatlangWasmModulePath();
1938
+ return requireFromHere(modulePath);
1925
1939
  })();
1926
1940
  return modulePromise;
1927
1941
  }
@@ -14,14 +14,13 @@ function detect_language(text, _route_tag) {
14
14
  return ret;
15
15
  }
16
16
  exports.detect_language = detect_language;
17
-
18
17
  function __wbg_get_imports() {
19
18
  const import0 = {
20
19
  __proto__: null,
21
- __wbg___wbindgen_throw_bd5a70920abf0236: function(arg0, arg1) {
20
+ __wbg___wbindgen_throw_344f42d3211c4765: function(arg0, arg1) {
22
21
  throw new Error(getStringFromWasm0(arg0, arg1));
23
22
  },
24
- __wbg_new_e4597c3f125a2038: function() {
23
+ __wbg_new_da52cf8fe3429cb2: function() {
25
24
  const ret = new Object();
26
25
  return ret;
27
26
  },
@@ -55,8 +54,7 @@ function __wbg_get_imports() {
55
54
  }
56
55
 
57
56
  function getStringFromWasm0(ptr, len) {
58
- ptr = ptr >>> 0;
59
- return decodeText(ptr, len);
57
+ return decodeText(ptr >>> 0, len);
60
58
  }
61
59
 
62
60
  let cachedUint8ArrayMemory0 = null;
@@ -128,5 +126,6 @@ let WASM_VECTOR_LEN = 0;
128
126
  const wasmPath = `${__dirname}/language_detector_bg.wasm`;
129
127
  const wasmBytes = require('fs').readFileSync(wasmPath);
130
128
  const wasmModule = new WebAssembly.Module(wasmBytes);
131
- let wasm = new WebAssembly.Instance(wasmModule, __wbg_get_imports()).exports;
129
+ let wasmInstance = new WebAssembly.Instance(wasmModule, __wbg_get_imports());
130
+ let wasm = wasmInstance.exports;
132
131
  wasm.__wbindgen_start();
@@ -14,4 +14,4 @@
14
14
  ],
15
15
  "main": "language_detector.js",
16
16
  "types": "language_detector.d.ts"
17
- }
17
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dev-pi2pie/word-counter",
3
- "version": "0.1.6-canary.1",
3
+ "version": "0.1.7-canary.1",
4
4
  "keywords": [
5
5
  "cli",
6
6
  "intl-segmenter",
@@ -56,24 +56,21 @@
56
56
  "format:check": "oxfmt --check src test scripts package.json tsconfig.json tsconfig.test.json .oxlintrc.json .oxfmtrc.json"
57
57
  },
58
58
  "dependencies": {
59
- "commander": "^14.0.3",
60
- "yaml": "^2.8.3"
59
+ "commander": "^15.0.0",
60
+ "yaml": "^2.9.0"
61
61
  },
62
62
  "devDependencies": {
63
- "@types/bun": "^1.3.11",
64
- "@types/node": "^25.5.0",
65
- "oxfmt": "^0.43.0",
66
- "oxlint": "^1.58.0",
63
+ "@types/bun": "^1.3.14",
64
+ "@types/node": "^26.1.0",
65
+ "oxfmt": "^0.57.0",
66
+ "oxlint": "^1.72.0",
67
67
  "picocolors": "^1.1.1",
68
- "tsdown": "^0.21.7",
69
- "typescript": "^6.0.2"
68
+ "tsdown": "^0.22.3",
69
+ "typescript": "^6.0.3"
70
70
  },
71
71
  "peerDependencies": {
72
72
  "typescript": "^5 || ^6"
73
73
  },
74
- "overrides": {
75
- "picomatch": "4.0.4"
76
- },
77
74
  "engines": {
78
75
  "node": ">=22.18.0"
79
76
  }