npm - @dev-pi2pie/word-counter - Versions diffs - 0.1.6 → 0.1.7-canary.1 - Mend

@dev-pi2pie/word-counter 0.1.6 → 0.1.7-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/cjs/detector.cjs +2 -1
package/dist/cjs/markdown.cjs +25 -12
package/dist/esm/bin.mjs +33 -18
package/dist/esm/detector.d.mts +1 -1
package/dist/esm/detector.mjs +2 -1
package/dist/esm/index.d.mts +7 -7
package/dist/esm/index2.d.mts +2 -2
package/dist/esm/markdown.mjs +25 -12
package/dist/esm/worker/count-worker.mjs +27 -13
package/dist/wasm-language-detector/language_detector.js +5 -6
package/dist/wasm-language-detector/language_detector_bg.wasm +0 -0
package/dist/wasm-language-detector/package.json +1 -1
package/package.json +9 -9

package/dist/cjs/detector.cjs CHANGED Viewed

@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }

package/dist/cjs/markdown.cjs CHANGED Viewed

@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -370,7 +383,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {

package/dist/esm/bin.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
 var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
-var __commonJSMin = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
+var __commonJSMin = (cb, mod) => () => (mod || (cb((mod = { exports: {} }).exports, mod), cb = null), mod.exports);
 var __copyProps = (to, from, except, desc) => {
 	if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
 		key = keys[i];
@@ -262,7 +262,8 @@ function collectTotalOfCounts(result) {
 	return counts;
 }
 function parseTotalOfToken(token) {
-	const canonical = TOTAL_OF_PART_ALIASES[token.trim().toLowerCase()];
+	const normalized = token.trim().toLowerCase();
+	const canonical = TOTAL_OF_PART_ALIASES[normalized];
 	if (canonical) return canonical;
 	throw new Error(`Invalid --total-of part: ${token}. Allowed: ${TOTAL_OF_PARTS.join(", ")}.`);
 }
@@ -1271,7 +1272,7 @@ function meetsRequiredNodeVersion(version) {
 	return version.patch >= REQUIRED_NODE_VERSION.patch;
 }
 function resolveRuntimeSummary(overrides = {}) {
-	const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.6");
+	const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.7-canary.1");
 	const nodeVersion = overrides.nodeVersion ?? process.version;
 	const parsedNodeVersion = parseNodeVersion(nodeVersion);
 	return {
@@ -2027,7 +2028,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -2138,11 +2139,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -2159,6 +2165,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -2168,9 +2184,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -2248,13 +2262,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -2352,7 +2366,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -2429,7 +2443,7 @@ function resolveLatinHintRules$1(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules$1(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {
@@ -3766,7 +3780,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }
@@ -5370,7 +5385,7 @@ function normalizeVersion(value) {
 	return trimmed;
 }
 function resolvePackageVersion(options = {}) {
-	const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.6");
+	const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.7-canary.1");
 	if (embeddedVersion) return embeddedVersion;
 	const maxLevels = options.maxLevels ?? 8;
 	const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
@@ -5568,7 +5583,7 @@ function aggregateSectionedResults(results, preserveCollectorSegments) {
 			existing.items.push(item.result);
 		}
 	}
-	const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
+	const sourceOrder = /* @__PURE__ */ new Map([["frontmatter", 0], ["content", 1]]);
 	const items = [...grouped.values()].sort((left, right) => {
 		const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
 		if (sourceDiff !== 0) return sourceDiff;

package/dist/esm/detector.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { b as WordCounterOptions, c as SectionedResult, g as LocaleChunk, m as LocaleDetectOptions, s as SectionMode, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
+import { a as SectionMode, b as WordCounterOptions, g as LocaleChunk, m as LocaleDetectOptions, o as SectionedResult, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
 //#region src/detector/policy.d.ts
 type DetectorContentGatePolicy = "latinProse" | "none";

package/dist/esm/detector.mjs CHANGED Viewed

@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }

package/dist/esm/index.d.mts CHANGED Viewed

@@ -113,6 +113,12 @@ declare const DEFAULT_LATIN_HINT_RULES: ReadonlyArray<Readonly<LatinHintRule>>;
 //#region src/wc/wc.d.ts
 declare function wordCounter(text: string, options?: WordCounterOptions): WordCounterResult;
 //#endregion
+//#region src/utils/append-all.d.ts
+declare function appendAll<T>(target: T[], source: readonly T[]): void;
+//#endregion
+//#region src/utils/show-singular-or-plural-word.d.ts
+declare function showSingularOrPluralWord(count: number, word: string): string;
+//#endregion
 //#region src/markdown/types.d.ts
 type FrontmatterType = "yaml" | "toml" | "json";
 interface ParsedMarkdown {
@@ -139,10 +145,4 @@ declare function parseMarkdown(input: string): ParsedMarkdown;
 //#region src/markdown/section-count.d.ts
 declare function countSections(input: string, section: SectionMode, options?: WordCounterOptions): SectionedResult;
 //#endregion
-//#region src/utils/append-all.d.ts
-declare function appendAll<T>(target: T[], source: readonly T[]): void;
-//#endregion
-//#region src/utils/show-singular-or-plural-word.d.ts
-declare function showSingularOrPluralWord(count: number, word: string): string;
-//#endregion
-export { NonWordCollection as _, FrontmatterType as a, WordCounterOptions as b, SectionedResult as c, countCharsForLocale as d, countWordsForLocale as f, LocaleChunk as g, LatinHintRule as h, parseMarkdown as i, wordCounter as l, LocaleDetectOptions as m, appendAll as n, ParsedMarkdown as o, segmentTextByLocale as p, countSections as r, SectionMode as s, showSingularOrPluralWord as t, DEFAULT_LATIN_HINT_RULES as u, WordCounterBreakdown as v, WordCounterResult as x, WordCounterMode as y };
+export { NonWordCollection as _, SectionMode as a, WordCounterOptions as b, appendAll as c, countCharsForLocale as d, countWordsForLocale as f, LocaleChunk as g, LatinHintRule as h, ParsedMarkdown as i, wordCounter as l, LocaleDetectOptions as m, parseMarkdown as n, SectionedResult as o, segmentTextByLocale as p, FrontmatterType as r, showSingularOrPluralWord as s, countSections as t, DEFAULT_LATIN_HINT_RULES as u, WordCounterBreakdown as v, WordCounterResult as x, WordCounterMode as y };

package/dist/esm/index2.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-import { _ as NonWordCollection, a as FrontmatterType, b as WordCounterOptions, c as SectionedResult, d as countCharsForLocale, f as countWordsForLocale, h as LatinHintRule, i as parseMarkdown, l as wordCounter, n as appendAll, o as ParsedMarkdown, p as segmentTextByLocale, r as countSections, s as SectionMode, t as showSingularOrPluralWord, u as DEFAULT_LATIN_HINT_RULES, v as WordCounterBreakdown, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
-export { DEFAULT_LATIN_HINT_RULES, FrontmatterType, LatinHintRule, NonWordCollection, ParsedMarkdown, SectionMode, SectionedResult, WordCounterBreakdown, WordCounterMode, WordCounterOptions, WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };
+import { _ as NonWordCollection, a as SectionMode, b as WordCounterOptions, c as appendAll, d as countCharsForLocale, f as countWordsForLocale, h as LatinHintRule, i as ParsedMarkdown, l as wordCounter, n as parseMarkdown, o as SectionedResult, p as segmentTextByLocale, r as FrontmatterType, s as showSingularOrPluralWord, t as countSections, u as DEFAULT_LATIN_HINT_RULES, v as WordCounterBreakdown, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
+export { DEFAULT_LATIN_HINT_RULES, type FrontmatterType, type LatinHintRule, type NonWordCollection, type ParsedMarkdown, type SectionMode, type SectionedResult, type WordCounterBreakdown, type WordCounterMode, type WordCounterOptions, type WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };

package/dist/esm/markdown.mjs CHANGED Viewed

@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -370,7 +383,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {

package/dist/esm/worker/count-worker.mjs CHANGED Viewed

@@ -500,7 +500,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -611,11 +611,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -632,6 +637,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -641,9 +656,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -721,13 +734,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -825,7 +838,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -902,7 +915,7 @@ function resolveLatinHintRules(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {
@@ -1921,7 +1934,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }

package/dist/wasm-language-detector/language_detector.js CHANGED Viewed

@@ -14,14 +14,13 @@ function detect_language(text, _route_tag) {
     return ret;
 }
 exports.detect_language = detect_language;
 function __wbg_get_imports() {
     const import0 = {
         __proto__: null,
-        __wbg___wbindgen_throw_bd5a70920abf0236: function(arg0, arg1) {
+        __wbg___wbindgen_throw_344f42d3211c4765: function(arg0, arg1) {
             throw new Error(getStringFromWasm0(arg0, arg1));
         },
-        __wbg_new_e4597c3f125a2038: function() {
+        __wbg_new_da52cf8fe3429cb2: function() {
             const ret = new Object();
             return ret;
         },
@@ -55,8 +54,7 @@ function __wbg_get_imports() {
 }
 function getStringFromWasm0(ptr, len) {
-    ptr = ptr >>> 0;
-    return decodeText(ptr, len);
+    return decodeText(ptr >>> 0, len);
 }
 let cachedUint8ArrayMemory0 = null;
@@ -128,5 +126,6 @@ let WASM_VECTOR_LEN = 0;
 const wasmPath = `${__dirname}/language_detector_bg.wasm`;
 const wasmBytes = require('fs').readFileSync(wasmPath);
 const wasmModule = new WebAssembly.Module(wasmBytes);
-let wasm = new WebAssembly.Instance(wasmModule, __wbg_get_imports()).exports;
+let wasmInstance = new WebAssembly.Instance(wasmModule, __wbg_get_imports());
+let wasm = wasmInstance.exports;
 wasm.__wbindgen_start();

package/dist/wasm-language-detector/language_detector_bg.wasm CHANGED Viewed

Binary file

package/dist/wasm-language-detector/package.json CHANGED Viewed

@@ -14,4 +14,4 @@
   ],
   "main": "language_detector.js",
   "types": "language_detector.d.ts"
-}
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dev-pi2pie/word-counter",
-  "version": "0.1.6",
+  "version": "0.1.7-canary.1",
   "keywords": [
     "cli",
     "intl-segmenter",
@@ -56,17 +56,17 @@
     "format:check": "oxfmt --check src test scripts package.json tsconfig.json tsconfig.test.json .oxlintrc.json .oxfmtrc.json"
   },
   "dependencies": {
-    "commander": "^14.0.3",
-    "yaml": "^2.8.3"
+    "commander": "^15.0.0",
+    "yaml": "^2.9.0"
   },
   "devDependencies": {
-    "@types/bun": "^1.3.11",
-    "@types/node": "^25.5.0",
-    "oxfmt": "^0.43.0",
-    "oxlint": "^1.58.0",
+    "@types/bun": "^1.3.14",
+    "@types/node": "^26.1.0",
+    "oxfmt": "^0.57.0",
+    "oxlint": "^1.72.0",
     "picocolors": "^1.1.1",
-    "tsdown": "^0.21.7",
-    "typescript": "^6.0.2"
+    "tsdown": "^0.22.3",
+    "typescript": "^6.0.3"
   },
   "peerDependencies": {
     "typescript": "^5 || ^6"