npm - @dev-pi2pie/word-counter - Versions diffs - 0.1.6-canary.1 → 0.1.7-canary.1 - Mend

@dev-pi2pie/word-counter 0.1.6-canary.1 → 0.1.7-canary.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +6 -0
package/dist/cjs/detector.cjs +2 -1
package/dist/cjs/markdown.cjs +25 -12
package/dist/esm/bin.mjs +42 -19
package/dist/esm/detector.mjs +2 -1
package/dist/esm/index2.d.mts +1 -1
package/dist/esm/markdown.mjs +25 -12
package/dist/esm/worker/count-worker.mjs +27 -13
package/dist/wasm-language-detector/language_detector.js +5 -6
package/dist/wasm-language-detector/language_detector_bg.wasm +0 -0
package/dist/wasm-language-detector/package.json +1 -1
package/package.json +9 -12

package/README.md CHANGED Viewed

@@ -111,6 +111,7 @@ Inspect detector behavior without count output:
 ```bash
 word-counter inspect "こんにちは、世界！これはテストです。"
 word-counter inspect --detector wasm --view engine "This sentence should clearly be detected as English for the wasm detector path."
+word-counter inspect --detector wasm --view engine --content-gate strict "Readers understand this behavior."
 word-counter inspect --detector regex -f json "こんにちは、世界！これはテストです。"
 word-counter inspect --detector regex -f json --pretty "こんにちは、世界！これはテストです。"
 word-counter inspect --detector wasm --content-gate off "mode: debug\ntee: true\npath: logs\nUse this for testing."
@@ -144,6 +145,11 @@ Detector mode notes:
 - Technical-noise-heavy Latin windows stay conservative and may remain `und-Latn` even when the detector produces a wrong-but-confident language guess.
 - inspect/debug disclosure uses `contentGate` as the canonical gate field.
 - legacy debug/evidence payloads still emit `qualityGate` as a compatibility alias derived from `contentGate.passed`.
+- `inspect --view engine` stays raw:
+  - it shows the detector sample plus raw/normalized/remapped Whatlang output
+  - it does not apply `eligibility` or `contentGate` policy decisions
+  - if engine view uses an explicit or effective non-default content-gate mode, the CLI emits a cyan info note and points to `--view pipeline`
+- `inspect --view pipeline` is the inspect surface for `eligibility`, `contentGate`, acceptance, and fallback reasoning.
 - for practical verification, use `inspect` to compare direct mode outcomes across `default`, `strict`, `loose`, and `off`; use `--debug --detector-evidence` when you specifically need counting-flow event details or legacy `qualityGate` compatibility
 - `word-counter inspect` supports:
   - positional text input

package/dist/cjs/detector.cjs CHANGED Viewed

@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }

package/dist/cjs/markdown.cjs CHANGED Viewed

@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -370,7 +383,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {

package/dist/esm/bin.mjs CHANGED Viewed

@@ -15,7 +15,7 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
 var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
-var __commonJSMin = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
+var __commonJSMin = (cb, mod) => () => (mod || (cb((mod = { exports: {} }).exports, mod), cb = null), mod.exports);
 var __copyProps = (to, from, except, desc) => {
 	if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
 		key = keys[i];
@@ -262,7 +262,8 @@ function collectTotalOfCounts(result) {
 	return counts;
 }
 function parseTotalOfToken(token) {
-	const canonical = TOTAL_OF_PART_ALIASES[token.trim().toLowerCase()];
+	const normalized = token.trim().toLowerCase();
+	const canonical = TOTAL_OF_PART_ALIASES[normalized];
 	if (canonical) return canonical;
 	throw new Error(`Invalid --total-of part: ${token}. Allowed: ${TOTAL_OF_PARTS.join(", ")}.`);
 }
@@ -1271,7 +1272,7 @@ function meetsRequiredNodeVersion(version) {
 	return version.patch >= REQUIRED_NODE_VERSION.patch;
 }
 function resolveRuntimeSummary(overrides = {}) {
-	const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.6-canary.1");
+	const packageVersion = normalizePackageVersion(overrides.packageVersion ?? "0.1.7-canary.1");
 	const nodeVersion = overrides.nodeVersion ?? process.version;
 	const parsedNodeVersion = parseNodeVersion(nodeVersion);
 	return {
@@ -2027,7 +2028,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -2138,11 +2139,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -2159,6 +2165,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -2168,9 +2184,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -2248,13 +2262,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -2352,7 +2366,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -2429,7 +2443,7 @@ function resolveLatinHintRules$1(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules$1(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {
@@ -3766,7 +3780,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }
@@ -4968,7 +4983,7 @@ const INSPECT_HELP_LINES = [
 	"",
 	"Options:",
 	"  -d, --detector <mode>  inspect detector mode (wasm, regex) (default: regex)",
-	"  --content-gate <mode>  content gate mode (default, strict, loose, off) (default: default)",
+	"  --content-gate <mode>  content gate mode for pipeline policy inspection (default, strict, loose, off) (default: default)",
 	"  --view <view>      inspect view (pipeline, engine) (default: pipeline)",
 	"  -f, --format <format>  inspect output format (standard, json) (default: standard)",
 	"  --pretty          pretty print inspect JSON output",
@@ -5223,6 +5238,13 @@ function emitConfigNotes$1(notes) {
 		console.error(import_picocolors.default.yellow(warningLine));
 	}
 }
+function shouldEmitEngineContentGateInfo(validated) {
+	if (validated.view !== "engine" || validated.detector !== "wasm") return false;
+	return validated.sources.contentGate || validated.contentGateMode !== "default";
+}
+function emitEngineContentGateInfo() {
+	console.error(import_picocolors.default.cyan("Info: `--content-gate` does not affect `inspect --view engine`; engine view shows raw detector output. Use `--view pipeline` to inspect eligibility and content-gate restrictions."));
+}
 async function executeInspectCommand({ argv, runtime }) {
 	const parsed = validateInspectInvocation(argv);
 	if (!parsed.ok) {
@@ -5254,6 +5276,7 @@ async function executeInspectCommand({ argv, runtime }) {
 		process.exitCode = 1;
 		return;
 	}
+	if (shouldEmitEngineContentGateInfo(validated)) emitEngineContentGateInfo();
 	try {
 		if (validated.paths.length === 0) {
 			const input = await loadSingleInspectInput(void 0, validated.textTokens, validated.section);
@@ -5362,7 +5385,7 @@ function normalizeVersion(value) {
 	return trimmed;
 }
 function resolvePackageVersion(options = {}) {
-	const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.6-canary.1");
+	const embeddedVersion = normalizeVersion(options.embeddedVersion ?? "0.1.7-canary.1");
 	if (embeddedVersion) return embeddedVersion;
 	const maxLevels = options.maxLevels ?? 8;
 	const resolveFromPath = options.resolveFromPath ?? resolveVersionFromPath;
@@ -5560,7 +5583,7 @@ function aggregateSectionedResults(results, preserveCollectorSegments) {
 			existing.items.push(item.result);
 		}
 	}
-	const sourceOrder = new Map([["frontmatter", 0], ["content", 1]]);
+	const sourceOrder = /* @__PURE__ */ new Map([["frontmatter", 0], ["content", 1]]);
 	const items = [...grouped.values()].sort((left, right) => {
 		const sourceDiff = (sourceOrder.get(left.source) ?? 0) - (sourceOrder.get(right.source) ?? 0);
 		if (sourceDiff !== 0) return sourceDiff;

package/dist/esm/detector.mjs CHANGED Viewed

@@ -966,7 +966,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }

package/dist/esm/index2.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
 import { _ as NonWordCollection, a as SectionMode, b as WordCounterOptions, c as appendAll, d as countCharsForLocale, f as countWordsForLocale, h as LatinHintRule, i as ParsedMarkdown, l as wordCounter, n as parseMarkdown, o as SectionedResult, p as segmentTextByLocale, r as FrontmatterType, s as showSingularOrPluralWord, t as countSections, u as DEFAULT_LATIN_HINT_RULES, v as WordCounterBreakdown, x as WordCounterResult, y as WordCounterMode } from "./index.mjs";
-export { DEFAULT_LATIN_HINT_RULES, FrontmatterType, LatinHintRule, NonWordCollection, ParsedMarkdown, SectionMode, SectionedResult, WordCounterBreakdown, WordCounterMode, WordCounterOptions, WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };
+export { DEFAULT_LATIN_HINT_RULES, type FrontmatterType, type LatinHintRule, type NonWordCollection, type ParsedMarkdown, type SectionMode, type SectionedResult, type WordCounterBreakdown, type WordCounterMode, type WordCounterOptions, type WordCounterResult, appendAll, countCharsForLocale, countSections, countWordsForLocale, wordCounter as default, wordCounter, parseMarkdown, segmentTextByLocale, showSingularOrPluralWord };

package/dist/esm/markdown.mjs CHANGED Viewed

@@ -45,7 +45,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -156,11 +156,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -177,6 +182,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -186,9 +201,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -266,13 +279,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -370,7 +383,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -447,7 +460,7 @@ function resolveLatinHintRules(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {

package/dist/esm/worker/count-worker.mjs CHANGED Viewed

@@ -500,7 +500,7 @@ const keycapEmojiRegex = /[0-9#*]\uFE0F?\u20E3/u;
 const symbolRegex = /\p{S}/u;
 const punctuationRegex = /\p{P}/u;
 const whitespaceRegex = /\s/u;
-const newlineChars = new Set([
+const newlineChars = /* @__PURE__ */ new Set([
 	"\n",
 	"\r",
 	"\u2028",
@@ -611,11 +611,16 @@ function analyzeChunk(chunk, collectNonWords, includeWhitespace) {
 	const segmenter = getSegmenter(chunk.locale);
 	const segments = [];
 	const nonWords = collectNonWords ? createNonWordCollection() : null;
-	for (const part of segmenter.segment(chunk.text)) if (part.isWordLike) segments.push(part.segment);
-	else if (collectNonWords && nonWords) {
-		if (includeWhitespace) addWhitespace(nonWords, part.segment);
+	for (const part of segmenter.segment(chunk.text)) {
 		const category = classifyNonWordSegment(part.segment);
-		if (category) addNonWord(nonWords, category, part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) addNonWord(nonWords, category, part.segment);
+			continue;
+		}
+		if (part.isWordLike) segments.push(part.segment);
+		else if (collectNonWords && nonWords) {
+			if (includeWhitespace) addWhitespace(nonWords, part.segment);
+		}
 	}
 	return {
 		locale: chunk.locale,
@@ -632,6 +637,16 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 	let wordChars = 0;
 	let nonWordChars = 0;
 	for (const part of segmenter.segment(chunk.text)) {
+		const category = classifyNonWordSegment(part.segment);
+		if (category) {
+			if (collectNonWords && nonWords) {
+				addNonWord(nonWords, category, part.segment);
+				const count = countCharsForLocale(part.segment, chunk.locale);
+				chars += count;
+				nonWordChars += count;
+			}
+			continue;
+		}
 		if (part.isWordLike) {
 			const count = countCharsForLocale(part.segment, chunk.locale);
 			chars += count;
@@ -641,9 +656,7 @@ function analyzeCharChunk(chunk, collectNonWords, includeWhitespace) {
 		if (collectNonWords && nonWords) {
 			let whitespaceCount = 0;
 			if (includeWhitespace) whitespaceCount = addWhitespace(nonWords, part.segment);
-			const category = classifyNonWordSegment(part.segment);
-			if (category) addNonWord(nonWords, category, part.segment);
-			if (category || whitespaceCount > 0) {
+			if (whitespaceCount > 0) {
 				const count = countCharsForLocale(part.segment, chunk.locale);
 				chars += count;
 				nonWordChars += count;
@@ -721,13 +734,13 @@ const MODE_ALIASES = {
 	characters: "char",
 	"char-collector": "char-collector"
 };
-const CHAR_MODE_ALIASES = new Set([
+const CHAR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"char",
 	"chars",
 	"character",
 	"characters"
 ]);
-const COLLECTOR_MODE_ALIASES = new Set([
+const COLLECTOR_MODE_ALIASES = /* @__PURE__ */ new Set([
 	"collector",
 	"collect",
 	"colle",
@@ -825,7 +838,7 @@ const regex = {
 	devanagari: /\p{Script=Devanagari}/u,
 	thai: /\p{Script=Thai}/u
 };
-const defaultLatinLocales = new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
+const defaultLatinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE, ...DEFAULT_LATIN_HINT_RULES.map((hint) => hint.tag)]);
 function isLatinLocale(locale, context) {
 	if (context) return context.latinLocales.has(locale);
 	return defaultLatinLocales.has(locale);
@@ -902,7 +915,7 @@ function resolveLatinHintRules(options) {
 function resolveLocaleDetectContext(options = {}) {
 	const latinHint = resolveLatinHint(options);
 	const latinHintRules = resolveLatinHintRules(options);
-	const latinLocales = new Set([DEFAULT_LOCALE]);
+	const latinLocales = /* @__PURE__ */ new Set([DEFAULT_LOCALE]);
 	for (const rule of latinHintRules) latinLocales.add(rule.tag);
 	if (latinHint) latinLocales.add(latinHint);
 	return {
@@ -1921,7 +1934,8 @@ function resolveWhatlangWasmModulePath() {
 }
 async function loadWhatlangWasmModule() {
 	if (!modulePromise) modulePromise = (async () => {
-		return requireFromHere(resolveWhatlangWasmModulePath());
+		const modulePath = resolveWhatlangWasmModulePath();
+		return requireFromHere(modulePath);
 	})();
 	return modulePromise;
 }

package/dist/wasm-language-detector/language_detector.js CHANGED Viewed

@@ -14,14 +14,13 @@ function detect_language(text, _route_tag) {
     return ret;
 }
 exports.detect_language = detect_language;
 function __wbg_get_imports() {
     const import0 = {
         __proto__: null,
-        __wbg___wbindgen_throw_bd5a70920abf0236: function(arg0, arg1) {
+        __wbg___wbindgen_throw_344f42d3211c4765: function(arg0, arg1) {
             throw new Error(getStringFromWasm0(arg0, arg1));
         },
-        __wbg_new_e4597c3f125a2038: function() {
+        __wbg_new_da52cf8fe3429cb2: function() {
             const ret = new Object();
             return ret;
         },
@@ -55,8 +54,7 @@ function __wbg_get_imports() {
 }
 function getStringFromWasm0(ptr, len) {
-    ptr = ptr >>> 0;
-    return decodeText(ptr, len);
+    return decodeText(ptr >>> 0, len);
 }
 let cachedUint8ArrayMemory0 = null;
@@ -128,5 +126,6 @@ let WASM_VECTOR_LEN = 0;
 const wasmPath = `${__dirname}/language_detector_bg.wasm`;
 const wasmBytes = require('fs').readFileSync(wasmPath);
 const wasmModule = new WebAssembly.Module(wasmBytes);
-let wasm = new WebAssembly.Instance(wasmModule, __wbg_get_imports()).exports;
+let wasmInstance = new WebAssembly.Instance(wasmModule, __wbg_get_imports());
+let wasm = wasmInstance.exports;
 wasm.__wbindgen_start();

package/dist/wasm-language-detector/language_detector_bg.wasm CHANGED Viewed

Binary file

package/dist/wasm-language-detector/package.json CHANGED Viewed

@@ -14,4 +14,4 @@
   ],
   "main": "language_detector.js",
   "types": "language_detector.d.ts"
-}
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dev-pi2pie/word-counter",
-  "version": "0.1.6-canary.1",
+  "version": "0.1.7-canary.1",
   "keywords": [
     "cli",
     "intl-segmenter",
@@ -56,24 +56,21 @@
     "format:check": "oxfmt --check src test scripts package.json tsconfig.json tsconfig.test.json .oxlintrc.json .oxfmtrc.json"
   },
   "dependencies": {
-    "commander": "^14.0.3",
-    "yaml": "^2.8.3"
+    "commander": "^15.0.0",
+    "yaml": "^2.9.0"
   },
   "devDependencies": {
-    "@types/bun": "^1.3.11",
-    "@types/node": "^25.5.0",
-    "oxfmt": "^0.43.0",
-    "oxlint": "^1.58.0",
+    "@types/bun": "^1.3.14",
+    "@types/node": "^26.1.0",
+    "oxfmt": "^0.57.0",
+    "oxlint": "^1.72.0",
     "picocolors": "^1.1.1",
-    "tsdown": "^0.21.7",
-    "typescript": "^6.0.2"
+    "tsdown": "^0.22.3",
+    "typescript": "^6.0.3"
   },
   "peerDependencies": {
     "typescript": "^5 || ^6"
   },
-  "overrides": {
-    "picomatch": "4.0.4"
-  },
   "engines": {
     "node": ">=22.18.0"
   }