@dev-pi2pie/word-counter 0.1.6 → 0.1.7-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
966
966
  }
967
967
  async function loadWhatlangWasmModule() {
968
968
  if (!modulePromise) modulePromise = (async () => {
969
- return requireFromHere(resolveWhatlangWasmModulePath());
969
+ const modulePath = resolveWhatlangWasmModulePath();
970
+ return requireFromHere(modulePath);
970
971
  })();
971
972
  return modulePromise;
972
973
  }
@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
45
45
  const symbolRegex = /\p{S}/u;
46
46
  const punctuationRegex = /\p{P}/u;
47
47
  const whitespaceRegex = /\s/u;
48
- const newlineChars = new Set([
48
+ const newlineChars = /* @__PURE__ */ new Set([
49
49
  "\n",
50
50
  "\r",
51
51
  "\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
156
156
  const segmenter = getSegmenter(chunk.locale);
157
157
  const segments = [];
158
158
  const nonWords = collectNonWords ? createNonWordCollection() : null;
159
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
160
- else if (collectNonWords && nonWords) {
161
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
159
+ for (const part of segmenter.segment(chunk.text)) {
162
160
  const category = classifyNonWordSegment(part.segment);
163
- if (category) addNonWord(nonWords, category, part.segment);
161
+ if (category) {
162
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
163
+ continue;
164
+ }
165
+ if (part.isWordLike) segments.push(part.segment);
166
+ else if (collectNonWords && nonWords) {
167
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
168
+ }
164
169
  }
165
170
  return {
166
171
  locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
177
182
  let wordChars = 0;
178
183
  let nonWordChars = 0;
179
184
  for (const part of segmenter.segment(chunk.text)) {
185
+ const category = classifyNonWordSegment(part.segment);
186
+ if (category) {
187
+ if (collectNonWords && nonWords) {
188
+ addNonWord(nonWords, category, part.segment);
189
+ const count = countCharsForLocale(part.segment, chunk.locale);
190
+ chars += count;
191
+ nonWordChars += count;
192
+ }
193
+ continue;
194
+ }
180
195
  if (part.isWordLike) {
181
196
  const count = countCharsForLocale(part.segment, chunk.locale);
182
197
  chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
186
201
  if (collectNonWords && nonWords) {
187
202
  let whitespaceCount = 0;
188
203
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
189
- const category = classifyNonWordSegment(part.segment);
190
- if (category) addNonWord(nonWords, category, part.segment);
191
- if (category || whitespaceCount > 0) {
204
+ if (whitespaceCount > 0) {
192
205
  const count = countCharsForLocale(part.segment, chunk.locale);
193
206
  chars += count;
194
207
  nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
266
279
  characters: "char",
267
280
  "char-collector": "char-collector"
268
281
  };
269
- const CHAR_MODE_ALIASES = new Set([
282
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
270
283
  "char",
271
284
  "chars",
272
285
  "character",
273
286
  "characters"
274
287
  ]);
275
- const COLLECTOR_MODE_ALIASES = new Set([
288
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
276
289
  "collector",
277
290
  "collect",
278
291
  "colle",
@@ -370,7 +383,7 @@ const regex = {
370
383
  devanagari: /\p{Script=Devanagari}/u,
371
384
  thai: /\p{Script=Thai}/u
372
385
  };
373
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
386
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
374
387
  function isLatinLocale(locale, context) {
375
388
  if (context) return context.latinLocales.has(locale);
376
389
  return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
447
460
  function resolveLocaleDetectContext(options = {}) {
448
461
  const latinHint = resolveLatinHint(options);
449
462
  const latinHintRules = resolveLatinHintRules(options);
450
- const latinLocales = new Set([DEFAULT_LOCALE]);
463
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
451
464
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
452
465
  if (latinHint) latinLocales.add(latinHint);
453
466
  return {
package/dist/esm/bin.mjs CHANGED
@@ -15,7 +15,7 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
15
15
  var __getOwnPropNames = Object.getOwnPropertyNames;
16
16
  var __getProtoOf = Object.getPrototypeOf;
17
17
  var __hasOwnProp = Object.prototype.hasOwnProperty;
18
- var __commonJSMin = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
18
+ var __commonJSMin = (cb, mod) => () => (mod || (cb((mod = { exports: {} }).exports, mod), cb = null), mod.exports);
19
19
  var __copyProps = (to, from, except, desc) => {
20
20
  if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
21
21
  key = keys[i];
@@ -262,7 +262,8 @@ function collectTotalOfCounts(result) {
262
262
  return counts;
263
263
  }
264
264
  function parseTotalOfToken(token) {
265
- const canonical = TOTAL_OF_PART_ALIASES[token.trim().toLowerCase()];
265
+ const normalized = token.trim().toLowerCase();
266
+ const canonical = TOTAL_OF_PART_ALIASES[normalized];
266
267
  if (canonical) return canonical;
267
268
  throw new Error(`Invalid --total-of part: ${token}. Allowed: ${TOTAL_OF_PARTS.join(", ")}.`);
268
269
  }
@@ -1271,7 +1272,7 @@ function meetsRequiredNodeVersion(version) {
1271
1272
  return version.patch >= REQUIRED_NODE_VERSION.patch;
1272
1273
  }
1273
1274
  function resolveRuntimeSummary(overrides = {}) {
1274
- const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.6");
1275
+ const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.7-canary.1");
1275
1276
  const nodeVersion = overrides.nodeVersion ?? process.version;
1276
1277
  const parsedNodeVersion = parseNodeVersion(nodeVersion);
1277
1278
  return {
@@ -2027,7 +2028,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
2027
2028
  const symbolRegex = /\p{S}/u;
2028
2029
  const punctuationRegex = /\p{P}/u;
2029
2030
  const whitespaceRegex = /\s/u;
2030
- const newlineChars = new Set([
2031
+ const newlineChars = /* @__PURE__ */ new Set([
2031
2032
  "\n",
2032
2033
  "\r",
2033
2034
  "\u2028",
@@ -2138,11 +2139,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
2138
2139
  const segmenter = getSegmenter(chunk.locale);
2139
2140
  const segments = [];
2140
2141
  const nonWords = collectNonWords ? createNonWordCollection() : null;
2141
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
2142
- else if (collectNonWords && nonWords) {
2143
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
2142
+ for (const part of segmenter.segment(chunk.text)) {
2144
2143
  const category = classifyNonWordSegment(part.segment);
2145
- if (category) addNonWord(nonWords, category, part.segment);
2144
+ if (category) {
2145
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
2146
+ continue;
2147
+ }
2148
+ if (part.isWordLike) segments.push(part.segment);
2149
+ else if (collectNonWords && nonWords) {
2150
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
2151
+ }
2146
2152
  }
2147
2153
  return {
2148
2154
  locale: chunk.locale,
@@ -2159,6 +2165,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
2159
2165
  let wordChars = 0;
2160
2166
  let nonWordChars = 0;
2161
2167
  for (const part of segmenter.segment(chunk.text)) {
2168
+ const category = classifyNonWordSegment(part.segment);
2169
+ if (category) {
2170
+ if (collectNonWords && nonWords) {
2171
+ addNonWord(nonWords, category, part.segment);
2172
+ const count = countCharsForLocale(part.segment, chunk.locale);
2173
+ chars += count;
2174
+ nonWordChars += count;
2175
+ }
2176
+ continue;
2177
+ }
2162
2178
  if (part.isWordLike) {
2163
2179
  const count = countCharsForLocale(part.segment, chunk.locale);
2164
2180
  chars += count;
@@ -2168,9 +2184,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
2168
2184
  if (collectNonWords && nonWords) {
2169
2185
  let whitespaceCount = 0;
2170
2186
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
2171
- const category = classifyNonWordSegment(part.segment);
2172
- if (category) addNonWord(nonWords, category, part.segment);
2173
- if (category || whitespaceCount > 0) {
2187
+ if (whitespaceCount > 0) {
2174
2188
  const count = countCharsForLocale(part.segment, chunk.locale);
2175
2189
  chars += count;
2176
2190
  nonWordChars += count;
@@ -2248,13 +2262,13 @@ const MODE_ALIASES = {
2248
2262
  characters: "char",
2249
2263
  "char-collector": "char-collector"
2250
2264
  };
2251
- const CHAR_MODE_ALIASES = new Set([
2265
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
2252
2266
  "char",
2253
2267
  "chars",
2254
2268
  "character",
2255
2269
  "characters"
2256
2270
  ]);
2257
- const COLLECTOR_MODE_ALIASES = new Set([
2271
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
2258
2272
  "collector",
2259
2273
  "collect",
2260
2274
  "colle",
@@ -2352,7 +2366,7 @@ const regex = {
2352
2366
  devanagari: /\p{Script=Devanagari}/u,
2353
2367
  thai: /\p{Script=Thai}/u
2354
2368
  };
2355
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
2369
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
2356
2370
  function isLatinLocale(locale, context) {
2357
2371
  if (context) return context.latinLocales.has(locale);
2358
2372
  return defaultLatinLocales.has(locale);
@@ -2429,7 +2443,7 @@ function resolveLatinHintRules$1(options) {
2429
2443
  function resolveLocaleDetectContext(options = {}) {
2430
2444
  const latinHint = resolveLatinHint(options);
2431
2445
  const latinHintRules = resolveLatinHintRules$1(options);
2432
- const latinLocales = new Set([DEFAULT_LOCALE]);
2446
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
2433
2447
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
2434
2448
  if (latinHint) latinLocales.add(latinHint);
2435
2449
  return {
@@ -3766,7 +3780,8 @@ function resolveWhatlangWasmModulePath() {
3766
3780
  }
3767
3781
  async function loadWhatlangWasmModule() {
3768
3782
  if (!modulePromise) modulePromise = (async () => {
3769
- return requireFromHere(resolveWhatlangWasmModulePath());
3783
+ const modulePath = resolveWhatlangWasmModulePath();
3784
+ return requireFromHere(modulePath);
3770
3785
  })();
3771
3786
  return modulePromise;
3772
3787
  }
@@ -5370,7 +5385,7 @@ function normalizeVersion(value) {
5370
5385
  return trimmed;
5371
5386
  }
5372
5387
  function resolvePackageVersion(options = {}) {
5373
- const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.6");
5388
+ const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.7-canary.1");
5374
5389
  if (embeddedVersion) return embeddedVersion;
5375
5390
  const maxLevels = options.maxLevels ?? 8;
5376
5391
  const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
@@ -5568,7 +5583,7 @@ function aggregateSectionedResults(results, preserveCollectorSegments) {
5568
5583
  existing.items.push(item.result);
5569
5584
  }
5570
5585
  }
5571
- const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
5586
+ const sourceOrder = /* @__PURE__ */ new Map([["frontmatter", 0], ["content", 1]]);
5572
5587
  const items = [...grouped.values()].sort((left, right) => {
5573
5588
  const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
5574
5589
  if (sourceDiff !== 0) return sourceDiff;
@@ -1,4 +1,4 @@
1
- import { b as WordCounterOptions, c as SectionedResult, g as LocaleChunk, m as LocaleDetectOptions, s as SectionMode, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
1
+ import { a as SectionMode, b as WordCounterOptions, g as LocaleChunk, m as LocaleDetectOptions, o as SectionedResult, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
2
2
 
3
3
  //#region src/detector/policy.d.ts
4
4
  type DetectorContentGatePolicy = "latinProse" | "none";
@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
966
966
  }
967
967
  async function loadWhatlangWasmModule() {
968
968
  if (!modulePromise) modulePromise = (async () => {
969
- return requireFromHere(resolveWhatlangWasmModulePath());
969
+ const modulePath = resolveWhatlangWasmModulePath();
970
+ return requireFromHere(modulePath);
970
971
  })();
971
972
  return modulePromise;
972
973
  }
@@ -113,6 +113,12 @@ declare const DEFAULT_LATIN_HINT_RULES: ReadonlyArray<Readonly<LatinHintRule>>;
113
113
  //#region src/wc/wc.d.ts
114
114
  declare function wordCounter(text: string, options?: WordCounterOptions): WordCounterResult;
115
115
  //#endregion
116
+ //#region src/utils/append-all.d.ts
117
+ declare function appendAll<T>(target: T[], source: readonly T[]): void;
118
+ //#endregion
119
+ //#region src/utils/show-singular-or-plural-word.d.ts
120
+ declare function showSingularOrPluralWord(count: number, word: string): string;
121
+ //#endregion
116
122
  //#region src/markdown/types.d.ts
117
123
  type FrontmatterType = "yaml" | "toml" | "json";
118
124
  interface ParsedMarkdown {
@@ -139,10 +145,4 @@ declare function parseMarkdown(input: string): ParsedMarkdown;
139
145
  //#region src/markdown/section-count.d.ts
140
146
  declare function countSections(input: string, section: SectionMode, options?: WordCounterOptions): SectionedResult;
141
147
  //#endregion
142
- //#region src/utils/append-all.d.ts
143
- declare function appendAll<T>(target: T[], source: readonly T[]): void;
144
- //#endregion
145
- //#region src/utils/show-singular-or-plural-word.d.ts
146
- declare function showSingularOrPluralWord(count: number, word: string): string;
147
- //#endregion
148
- export { NonWordCollection as _, FrontmatterType as a, WordCounterOptions as b, SectionedResult as c, countCharsForLocale as d, countWordsForLocale as f, LocaleChunk as g, LatinHintRule as h, parseMarkdown as i, wordCounter as l, LocaleDetectOptions as m, appendAll as n, ParsedMarkdown as o, segmentTextByLocale as p, countSections as r, SectionMode as s, showSingularOrPluralWord as t, DEFAULT_LATIN_HINT_RULES as u, WordCounterBreakdown as v, WordCounterResult as x, WordCounterMode as y };
148
+ export { NonWordCollection as _, SectionMode as a, WordCounterOptions as b, appendAll as c, countCharsForLocale as d, countWordsForLocale as f, LocaleChunk as g, LatinHintRule as h, ParsedMarkdown as i, wordCounter as l, LocaleDetectOptions as m, parseMarkdown as n, SectionedResult as o, segmentTextByLocale as p, FrontmatterType as r, showSingularOrPluralWord as s, countSections as t, DEFAULT_LATIN_HINT_RULES as u, WordCounterBreakdown as v, WordCounterResult as x, WordCounterMode as y };
@@ -1,2 +1,2 @@
1
- import { _ as NonWordCollection, a as FrontmatterType, b as WordCounterOptions, c as SectionedResult, d as countCharsForLocale, f as countWordsForLocale, h as LatinHintRule, i as parseMarkdown, l as wordCounter, n as appendAll, o as ParsedMarkdown, p as segmentTextByLocale, r as countSections, s as SectionMode, t as showSingularOrPluralWord, u as DEFAULT_LATIN_HINT_RULES, v as WordCounterBreakdown, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
2
- export { DEFAULT_LATIN_HINT_RULES, FrontmatterType, LatinHintRule, NonWordCollection, ParsedMarkdown, SectionMode, SectionedResult, WordCounterBreakdown, WordCounterMode, WordCounterOptions, WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };
1
+ import { _ as NonWordCollection, a as SectionMode, b as WordCounterOptions, c as appendAll, d as countCharsForLocale, f as countWordsForLocale, h as LatinHintRule, i as ParsedMarkdown, l as wordCounter, n as parseMarkdown, o as SectionedResult, p as segmentTextByLocale, r as FrontmatterType, s as showSingularOrPluralWord, t as countSections, u as DEFAULT_LATIN_HINT_RULES, v as WordCounterBreakdown, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
2
+ export { DEFAULT_LATIN_HINT_RULES, type FrontmatterType, type LatinHintRule, type NonWordCollection, type ParsedMarkdown, type SectionMode, type SectionedResult, type WordCounterBreakdown, type WordCounterMode, type WordCounterOptions, type WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };
@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
45
45
  const symbolRegex = /\p{S}/u;
46
46
  const punctuationRegex = /\p{P}/u;
47
47
  const whitespaceRegex = /\s/u;
48
- const newlineChars = new Set([
48
+ const newlineChars = /* @__PURE__ */ new Set([
49
49
  "\n",
50
50
  "\r",
51
51
  "\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
156
156
  const segmenter = getSegmenter(chunk.locale);
157
157
  const segments = [];
158
158
  const nonWords = collectNonWords ? createNonWordCollection() : null;
159
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
160
- else if (collectNonWords && nonWords) {
161
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
159
+ for (const part of segmenter.segment(chunk.text)) {
162
160
  const category = classifyNonWordSegment(part.segment);
163
- if (category) addNonWord(nonWords, category, part.segment);
161
+ if (category) {
162
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
163
+ continue;
164
+ }
165
+ if (part.isWordLike) segments.push(part.segment);
166
+ else if (collectNonWords && nonWords) {
167
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
168
+ }
164
169
  }
165
170
  return {
166
171
  locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
177
182
  let wordChars = 0;
178
183
  let nonWordChars = 0;
179
184
  for (const part of segmenter.segment(chunk.text)) {
185
+ const category = classifyNonWordSegment(part.segment);
186
+ if (category) {
187
+ if (collectNonWords && nonWords) {
188
+ addNonWord(nonWords, category, part.segment);
189
+ const count = countCharsForLocale(part.segment, chunk.locale);
190
+ chars += count;
191
+ nonWordChars += count;
192
+ }
193
+ continue;
194
+ }
180
195
  if (part.isWordLike) {
181
196
  const count = countCharsForLocale(part.segment, chunk.locale);
182
197
  chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
186
201
  if (collectNonWords && nonWords) {
187
202
  let whitespaceCount = 0;
188
203
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
189
- const category = classifyNonWordSegment(part.segment);
190
- if (category) addNonWord(nonWords, category, part.segment);
191
- if (category || whitespaceCount > 0) {
204
+ if (whitespaceCount > 0) {
192
205
  const count = countCharsForLocale(part.segment, chunk.locale);
193
206
  chars += count;
194
207
  nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
266
279
  characters: "char",
267
280
  "char-collector": "char-collector"
268
281
  };
269
- const CHAR_MODE_ALIASES = new Set([
282
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
270
283
  "char",
271
284
  "chars",
272
285
  "character",
273
286
  "characters"
274
287
  ]);
275
- const COLLECTOR_MODE_ALIASES = new Set([
288
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
276
289
  "collector",
277
290
  "collect",
278
291
  "colle",
@@ -370,7 +383,7 @@ const regex = {
370
383
  devanagari: /\p{Script=Devanagari}/u,
371
384
  thai: /\p{Script=Thai}/u
372
385
  };
373
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
386
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
374
387
  function isLatinLocale(locale, context) {
375
388
  if (context) return context.latinLocales.has(locale);
376
389
  return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
447
460
  function resolveLocaleDetectContext(options = {}) {
448
461
  const latinHint = resolveLatinHint(options);
449
462
  const latinHintRules = resolveLatinHintRules(options);
450
- const latinLocales = new Set([DEFAULT_LOCALE]);
463
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
451
464
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
452
465
  if (latinHint) latinLocales.add(latinHint);
453
466
  return {
@@ -500,7 +500,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
500
500
  const symbolRegex = /\p{S}/u;
501
501
  const punctuationRegex = /\p{P}/u;
502
502
  const whitespaceRegex = /\s/u;
503
- const newlineChars = new Set([
503
+ const newlineChars = /* @__PURE__ */ new Set([
504
504
  "\n",
505
505
  "\r",
506
506
  "\u2028",
@@ -611,11 +611,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
611
611
  const segmenter = getSegmenter(chunk.locale);
612
612
  const segments = [];
613
613
  const nonWords = collectNonWords ? createNonWordCollection() : null;
614
- for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
615
- else if (collectNonWords && nonWords) {
616
- if (includeWhitespace) addWhitespace(nonWords, part.segment);
614
+ for (const part of segmenter.segment(chunk.text)) {
617
615
  const category = classifyNonWordSegment(part.segment);
618
- if (category) addNonWord(nonWords, category, part.segment);
616
+ if (category) {
617
+ if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
618
+ continue;
619
+ }
620
+ if (part.isWordLike) segments.push(part.segment);
621
+ else if (collectNonWords && nonWords) {
622
+ if (includeWhitespace) addWhitespace(nonWords, part.segment);
623
+ }
619
624
  }
620
625
  return {
621
626
  locale: chunk.locale,
@@ -632,6 +637,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
632
637
  let wordChars = 0;
633
638
  let nonWordChars = 0;
634
639
  for (const part of segmenter.segment(chunk.text)) {
640
+ const category = classifyNonWordSegment(part.segment);
641
+ if (category) {
642
+ if (collectNonWords && nonWords) {
643
+ addNonWord(nonWords, category, part.segment);
644
+ const count = countCharsForLocale(part.segment, chunk.locale);
645
+ chars += count;
646
+ nonWordChars += count;
647
+ }
648
+ continue;
649
+ }
635
650
  if (part.isWordLike) {
636
651
  const count = countCharsForLocale(part.segment, chunk.locale);
637
652
  chars += count;
@@ -641,9 +656,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
641
656
  if (collectNonWords && nonWords) {
642
657
  let whitespaceCount = 0;
643
658
  if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
644
- const category = classifyNonWordSegment(part.segment);
645
- if (category) addNonWord(nonWords, category, part.segment);
646
- if (category || whitespaceCount > 0) {
659
+ if (whitespaceCount > 0) {
647
660
  const count = countCharsForLocale(part.segment, chunk.locale);
648
661
  chars += count;
649
662
  nonWordChars += count;
@@ -721,13 +734,13 @@ const MODE_ALIASES = {
721
734
  characters: "char",
722
735
  "char-collector": "char-collector"
723
736
  };
724
- const CHAR_MODE_ALIASES = new Set([
737
+ const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
725
738
  "char",
726
739
  "chars",
727
740
  "character",
728
741
  "characters"
729
742
  ]);
730
- const COLLECTOR_MODE_ALIASES = new Set([
743
+ const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
731
744
  "collector",
732
745
  "collect",
733
746
  "colle",
@@ -825,7 +838,7 @@ const regex = {
825
838
  devanagari: /\p{Script=Devanagari}/u,
826
839
  thai: /\p{Script=Thai}/u
827
840
  };
828
- const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
841
+ const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
829
842
  function isLatinLocale(locale, context) {
830
843
  if (context) return context.latinLocales.has(locale);
831
844
  return defaultLatinLocales.has(locale);
@@ -902,7 +915,7 @@ function resolveLatinHintRules(options) {
902
915
  function resolveLocaleDetectContext(options = {}) {
903
916
  const latinHint = resolveLatinHint(options);
904
917
  const latinHintRules = resolveLatinHintRules(options);
905
- const latinLocales = new Set([DEFAULT_LOCALE]);
918
+ const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
906
919
  for (const rule of latinHintRules) latinLocales.add(rule.tag);
907
920
  if (latinHint) latinLocales.add(latinHint);
908
921
  return {
@@ -1921,7 +1934,8 @@ function resolveWhatlangWasmModulePath() {
1921
1934
  }
1922
1935
  async function loadWhatlangWasmModule() {
1923
1936
  if (!modulePromise) modulePromise = (async () => {
1924
- return requireFromHere(resolveWhatlangWasmModulePath());
1937
+ const modulePath = resolveWhatlangWasmModulePath();
1938
+ return requireFromHere(modulePath);
1925
1939
  })();
1926
1940
  return modulePromise;
1927
1941
  }
@@ -14,14 +14,13 @@ function detect_language(text, _route_tag) {
14
14
  return ret;
15
15
  }
16
16
  exports.detect_language = detect_language;
17
-
18
17
  function __wbg_get_imports() {
19
18
  const import0 = {
20
19
  __proto__: null,
21
- __wbg___wbindgen_throw_bd5a70920abf0236: function(arg0, arg1) {
20
+ __wbg___wbindgen_throw_344f42d3211c4765: function(arg0, arg1) {
22
21
  throw new Error(getStringFromWasm0(arg0, arg1));
23
22
  },
24
- __wbg_new_e4597c3f125a2038: function() {
23
+ __wbg_new_da52cf8fe3429cb2: function() {
25
24
  const ret = new Object();
26
25
  return ret;
27
26
  },
@@ -55,8 +54,7 @@ function __wbg_get_imports() {
55
54
  }
56
55
 
57
56
  function getStringFromWasm0(ptr, len) {
58
- ptr = ptr >>> 0;
59
- return decodeText(ptr, len);
57
+ return decodeText(ptr >>> 0, len);
60
58
  }
61
59
 
62
60
  let cachedUint8ArrayMemory0 = null;
@@ -128,5 +126,6 @@ let WASM_VECTOR_LEN = 0;
128
126
  const wasmPath = `${__dirname}/language_detector_bg.wasm`;
129
127
  const wasmBytes = require('fs').readFileSync(wasmPath);
130
128
  const wasmModule = new WebAssembly.Module(wasmBytes);
131
- let wasm = new WebAssembly.Instance(wasmModule, __wbg_get_imports()).exports;
129
+ let wasmInstance = new WebAssembly.Instance(wasmModule, __wbg_get_imports());
130
+ let wasm = wasmInstance.exports;
132
131
  wasm.__wbindgen_start();
@@ -14,4 +14,4 @@
14
14
  ],
15
15
  "main": "language_detector.js",
16
16
  "types": "language_detector.d.ts"
17
- }
17
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dev-pi2pie/word-counter",
3
- "version": "0.1.6",
3
+ "version": "0.1.7-canary.1",
4
4
  "keywords": [
5
5
  "cli",
6
6
  "intl-segmenter",
@@ -56,17 +56,17 @@
56
56
  "format:check": "oxfmt --check src test scripts package.json tsconfig.json tsconfig.test.json .oxlintrc.json .oxfmtrc.json"
57
57
  },
58
58
  "dependencies": {
59
- "commander": "^14.0.3",
60
- "yaml": "^2.8.3"
59
+ "commander": "^15.0.0",
60
+ "yaml": "^2.9.0"
61
61
  },
62
62
  "devDependencies": {
63
- "@types/bun": "^1.3.11",
64
- "@types/node": "^25.5.0",
65
- "oxfmt": "^0.43.0",
66
- "oxlint": "^1.58.0",
63
+ "@types/bun": "^1.3.14",
64
+ "@types/node": "^26.1.0",
65
+ "oxfmt": "^0.57.0",
66
+ "oxlint": "^1.72.0",
67
67
  "picocolors": "^1.1.1",
68
- "tsdown": "^0.21.7",
69
- "typescript": "^6.0.2"
68
+ "tsdown": "^0.22.3",
69
+ "typescript": "^6.0.3"
70
70
  },
71
71
  "peerDependencies": {
72
72
  "typescript": "^5 || ^6"