npm - @dev-pi2pie/word-counter - Versions diffs - 0.1.5-canary.2 → 0.1.5-canary.3 - Mend

@dev-pi2pie/word-counter 0.1.5-canary.2 → 0.1.5-canary.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +25 -3
package/dist/cjs/detector.cjs +429 -15
package/dist/cjs/detector.cjs.map +1 -1
package/dist/cjs/markdown.cjs +6 -0
package/dist/esm/bin.mjs +788 -209
package/dist/esm/bin.mjs.map +1 -1
package/dist/esm/detector.d.mts +39 -1
package/dist/esm/detector.mjs +430 -16
package/dist/esm/detector.mjs.map +1 -1
package/dist/esm/index.mjs +1 -1
package/dist/esm/markdown.mjs +1 -1
package/dist/esm/worker/count-worker.mjs +480 -20
package/dist/esm/worker/count-worker.mjs.map +1 -1
package/dist/esm/worker-pool.mjs +16 -2
package/dist/esm/worker-pool.mjs.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -109,8 +109,10 @@ Detector mode notes:
 - `--detector wasm` only runs for ambiguous `und-Latn` and `und-Hani` chunks.
 - `--detector regex` keeps the original script/regex chunk-first detection path.
 - `--detector wasm` uses a detector-oriented ambiguous-window scoring pass before accepted tags are projected back onto the counting chunks.
+- In `--detector wasm` mode, Latin hint rules and explicit Latin hint flags are deferred until after detector evaluation and only relabel unresolved `und-Latn` output.
 - Very short chunks stay on the original `und-*` fallback.
 - Low-confidence or unsupported detector results fall back to `und-*`.
+- Technical-noise-heavy Latin windows stay conservative and may remain `und-Latn` even when the detector produces a wrong-but-confident language guess.
 Collect non-words (emoji/symbols/punctuation):
@@ -285,14 +287,24 @@ word-counter --path ./examples/test-case-multi-files-support --debug --verbose
 Use `--debug-report [path]` to route debug diagnostics to a JSONL report file:
-- no path: writes to current working directory with pattern `wc-debug-YYYYMMDD-HHmmss-<pid>.jsonl`
+- no path: writes to current working directory with pattern `wc-debug-YYYYMMDD-HHmmss-utc-<pid>.jsonl`
+- no path with `--detector-evidence`: writes with pattern `wc-detector-evidence-YYYYMMDD-HHmmss-utc-<pid>.jsonl`
 - path provided: writes to the specified location
 - default-name collision handling: appends `-<n>` suffix to avoid overwriting existing files
 - explicit path validation: existing directories are rejected (explicit paths are treated as file targets)
+- compatibility note: the autogenerated filename moved from the older local-time pattern to the new UTC `...-utc-...jsonl` pattern
 By default with `--debug-report`, debug lines are file-only (not mirrored to terminal).
 Use `--debug-report-tee` (alias: `--debug-tee`) to mirror to both file and `stderr`.
-Flag dependencies: `--verbose` requires `--debug`; `--debug-report` requires `--debug`; `--debug-report-tee`/`--debug-tee` requires `--debug-report`.
+Flag dependencies: `--verbose` requires `--debug`; `--detector-evidence` requires `--debug` and `--detector wasm`; `--debug-report` requires `--debug`; `--debug-report-tee`/`--debug-tee` requires `--debug-report`.
+Use `--detector-evidence` to add per-window detector evidence onto the same debug stream:
+- only meaningful with `--detector wasm`
+- compact mode emits bounded single-line previews plus detector decision metadata
+- verbose mode emits full raw detector windows and full normalized samples
+- evidence remains detector-window based even when output mode changes to `collector`, `char`, or another counting mode
+- fallback evidence reports the post-fallback final tag used by downstream counting output; in rare split-relabel cases it may also include `finalLocales`
 Examples:
@@ -301,17 +313,26 @@ word-counter --path ./examples/test-case-multi-files-support --debug --debug-rep
 word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl
 word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl --debug-report-tee
 word-counter --path ./examples/test-case-multi-files-support --debug --debug-report ./logs/debug.jsonl --debug-tee
+word-counter --detector wasm --debug --detector-evidence "This sentence should clearly be detected as English for the wasm detector path."
+word-counter --detector wasm --debug --verbose --detector-evidence "This sentence should clearly be detected as English for the wasm detector path."
+word-counter --detector wasm --debug --detector-evidence --debug-report
 ```
 Skip details stay debug-gated and can be suppressed with `--quiet-skips`.
+When `--format json` is combined with `--debug`, debug-only diagnostics are emitted under `debug.*`:
+- single input and merged batch may include `debug.detector`
+- per-file batch may include `debug.skipped`, `debug.detector`, and per-entry `files[i].debug.detector`
+- per-file top-level `skipped` is still emitted temporarily for compatibility
 ## How It Works
 - The runtime inspects each character's Unicode script to infer its likely locale tag (e.g., `und-Latn`, `und-Hani`, `ja`).
 - Adjacent characters that share the same locale tag are grouped into a chunk.
 - Each chunk is counted with `Intl.Segmenter` at `granularity: "word"`, caching segmenters to avoid re-instantiation.
 - Per-locale counts are summed into an overall total and printed to stdout.
-- With `--detector wasm`, ambiguous `und-Latn` and `und-Hani` chunks can be relabeled through the optional WASM detector before counting.
+- With `--detector wasm`, ambiguous `und-Latn` and `und-Hani` chunks can be relabeled through the optional WASM detector before counting; unresolved `und-Latn` chunks then fall back to the existing Latin hint rules and explicit Latin hint precedence.
 ## Locale vs Language Code
@@ -696,6 +717,7 @@ Example JSON (trimmed):
 - Detection is regex/script based, not statistical language-ID.
 - Ambiguous Latin defaults to `und-Latn`; Han fallback defaults to `und-Hani`.
 - `--detector wasm` is optional and conservative; it only runs for ambiguous chunks that meet minimum script-bearing length thresholds.
+- In `--detector wasm` mode, ambiguous Latin stays on `und-Latn` for detector eligibility first, then built-in/custom Latin rules and explicit Latin hints are applied only if the detector leaves that chunk unresolved.
 - The current first WASM engine is `whatlang`, remapped into this package's public tags.
 - The npm package ships one portable WASM artifact; users do not install per-OS detector packages.
 - Use explicit tag and hint flags when you need deterministic tagging.

package/dist/cjs/detector.cjs CHANGED Viewed

@@ -118,6 +118,41 @@ function buildWordCounterResultFromChunks(chunks, options = {}) {
 		}
 	};
 }
+function recordDetectorWindow(summary, routeTag) {
+	if (!summary) return;
+	summary.windowsTotal += 1;
+	if (routeTag === "und-Latn") {
+		summary.routes.latin += 1;
+		return;
+	}
+	if (routeTag === "und-Hani") summary.routes.han += 1;
+}
+function recordDetectorAccepted(summary, path) {
+	if (!summary) return;
+	summary.accepted += 1;
+	if (path === "reliable") {
+		summary.acceptancePaths.reliable += 1;
+		return;
+	}
+	summary.acceptancePaths.corroborated += 1;
+}
+function recordDetectorFallback(summary, reason) {
+	if (!summary) return;
+	summary.fallback += 1;
+	summary.fallbackReasons[reason] += 1;
+}
+function createDetectorEvidencePreview(text) {
+	const collapsed = text.replace(/\s+/gu, " ").trim();
+	const codePoints = Array.from(collapsed);
+	if (codePoints.length <= 160) return {
+		preview: collapsed,
+		truncated: false
+	};
+	return {
+		preview: codePoints.slice(0, 160).join(""),
+		truncated: true
+	};
+}
 //#endregion
 //#region src/detector/sections.ts
 function normalizeText(value) {
@@ -186,6 +221,7 @@ const LATIN_WASM_MIN_CONFIDENCE = .75;
 const HANI_WASM_MIN_CONFIDENCE = .9;
 const LATIN_SCRIPT_REGEX = /\p{Script=Latin}/u;
 const HAN_SCRIPT_REGEX = /\p{Script=Han}/u;
+const LATIN_WORD_REGEX = /\p{Script=Latin}+/gu;
 const DETECTOR_ROUTE_POLICIES = {
 	[require_markdown.DEFAULT_LOCALE]: {
 		routeTag: require_markdown.DEFAULT_LOCALE,
@@ -209,10 +245,6 @@ function countScriptBearingCharsForRoute(text, routeTag) {
 	for (const char of text) if (matcher.test(char)) count += 1;
 	return count;
 }
-function shouldRunWasmDetector(text, routeTag) {
-	const policy = DETECTOR_ROUTE_POLICIES[routeTag];
-	return countScriptBearingCharsForRoute(text, routeTag) >= policy.minScriptChars;
-}
 function normalizeDetectorSampleForRoute(text, routeTag) {
 	const matcher = routeTag === "und-Hani" ? HAN_SCRIPT_REGEX : LATIN_SCRIPT_REGEX;
 	return [...text].map((char) => {
@@ -221,6 +253,57 @@ function normalizeDetectorSampleForRoute(text, routeTag) {
 		return " ";
 	}).join("").replace(/\s+/g, " ").trim();
 }
+function countLatinWords(text) {
+	return text.match(LATIN_WORD_REGEX)?.length ?? 0;
+}
+function isTechnicalLikeLatinLine(line, latinWords) {
+	const trimmed = line.trim();
+	if (!trimmed) return false;
+	if (/^[>#$]/u.test(trimmed)) return true;
+	if (/(^|\s)--[a-z0-9][a-z0-9-]*/iu.test(trimmed)) return true;
+	if (/`[^`]+`/u.test(trimmed)) return true;
+	if (/(^|[\s"'`])(?:\.{0,2}\/|\/)?[\w./-]+\.[a-z0-9]{1,6}(?=$|[\s"'`])/iu.test(trimmed)) return true;
+	if (/^[\-\*\d.)\s]*[\p{L}\p{N}_.-]+:\s+\S/iu.test(trimmed) && latinWords <= 8) return true;
+	return false;
+}
+function shouldTreatLatinProseBlockAsSentenceLike(latinWords, lineCount, hasSentencePunctuation) {
+	if (latinWords < 4) return false;
+	if (hasSentencePunctuation) return true;
+	return lineCount <= 1 ? latinWords >= 5 : latinWords >= 8;
+}
+function shouldAcceptLatinDetectorWindow(text, normalizedSample) {
+	if (countLatinWords(normalizedSample) < 4) return false;
+	let proseWords = 0;
+	let technicalWords = 0;
+	let proseBlockWords = 0;
+	let proseBlockLines = 0;
+	let proseBlockHasSentencePunctuation = false;
+	const flushProseBlock = () => {
+		if (shouldTreatLatinProseBlockAsSentenceLike(proseBlockWords, proseBlockLines, proseBlockHasSentencePunctuation)) proseWords += proseBlockWords;
+		proseBlockWords = 0;
+		proseBlockLines = 0;
+		proseBlockHasSentencePunctuation = false;
+	};
+	for (const rawLine of text.split(/\r?\n/u)) {
+		const line = rawLine.trim();
+		if (!line || line === "---" || line === "```") {
+			flushProseBlock();
+			continue;
+		}
+		const latinWords = countLatinWords(line);
+		if (latinWords === 0) continue;
+		if (isTechnicalLikeLatinLine(line, latinWords)) {
+			flushProseBlock();
+			technicalWords += latinWords;
+			continue;
+		}
+		proseBlockWords += latinWords;
+		proseBlockLines += 1;
+		proseBlockHasSentencePunctuation ||= /[.!?]/u.test(line);
+	}
+	flushProseBlock();
+	return proseWords >= 4 && proseWords >= technicalWords;
+}
 //#endregion
 //#region src/detector/whatlang-wasm.ts
 const GENERATED_FOLDER_NAME = "wasm-language-detector";
@@ -304,12 +387,142 @@ function getDetectorFallbackTag(routeTag) {
 }
 //#endregion
 //#region src/detector/wasm.ts
+function createDeferredLatinPreSegmentOptions(options) {
+	return {
+		...options,
+		latinLanguageHint: void 0,
+		latinTagHint: void 0,
+		latinLocaleHint: void 0,
+		latinHintRules: void 0,
+		useDefaultLatinHints: false
+	};
+}
+function createRuleOnlyLatinOptions(options) {
+	return {
+		...options,
+		latinLanguageHint: void 0,
+		latinTagHint: void 0,
+		latinLocaleHint: void 0
+	};
+}
+function mergeAdjacentChunks(chunks) {
+	if (chunks.length === 0) return chunks;
+	const merged = [];
+	let last = chunks[0];
+	for (let index = 1; index < chunks.length; index += 1) {
+		const chunk = chunks[index];
+		if (chunk.locale === last.locale) {
+			last = {
+				locale: last.locale,
+				text: last.text + chunk.text
+			};
+			continue;
+		}
+		merged.push(last);
+		last = chunk;
+	}
+	merged.push(last);
+	return merged;
+}
+function reapplyDeferredLatinFallback(chunks, options) {
+	const relabeled = [];
+	for (const chunk of chunks) {
+		if (chunk.locale !== "und-Latn") {
+			relabeled.push(chunk);
+			continue;
+		}
+		relabeled.push(...require_markdown.segmentTextByLocale(chunk.text, options));
+	}
+	return mergeAdjacentChunks(relabeled);
+}
+function reapplyResolvedLatinHintRules(resolvedChunks, originalChunks, options) {
+	const relabeled = [];
+	const ruleOnlyOptions = createRuleOnlyLatinOptions(options);
+	for (let index = 0; index < resolvedChunks.length; index += 1) {
+		const chunk = resolvedChunks[index];
+		const originalChunk = originalChunks[index];
+		if (!chunk || !originalChunk) continue;
+		if (originalChunk.locale !== "und-Latn" || chunk.locale === "und-Latn") {
+			relabeled.push(chunk);
+			continue;
+		}
+		const hintedChunks = require_markdown.segmentTextByLocale(chunk.text, ruleOnlyOptions).map((hintedChunk) => ({
+			locale: hintedChunk.locale === "und-Latn" ? chunk.locale : hintedChunk.locale,
+			text: hintedChunk.text
+		}));
+		relabeled.push(...hintedChunks);
+	}
+	return mergeAdjacentChunks(relabeled);
+}
 function shouldAcceptDetectorTag(routeTag, confidence, reliable) {
 	const policy = DETECTOR_ROUTE_POLICIES[routeTag];
 	if (policy.requireReliable && reliable !== true) return false;
 	if (confidence === void 0) return false;
 	return confidence >= policy.minConfidence;
 }
+function resolveFallbackDebugOutcome(window, options) {
+	const fallbackTag = getDetectorFallbackTag(window.routeTag);
+	if (window.routeTag !== "und-Latn") return { finalTag: fallbackTag };
+	const finalLocales = reapplyDeferredLatinFallback([{
+		locale: fallbackTag,
+		text: window.text
+	}], options).map((chunk) => chunk.locale);
+	if (finalLocales.length === 1) return { finalTag: finalLocales[0] };
+	return finalLocales.length > 1 ? {
+		finalTag: fallbackTag,
+		finalLocales
+	} : { finalTag: fallbackTag };
+}
+function buildEvidenceSample(result, remappedTag) {
+	return {
+		lang: result?.lang ?? null,
+		script: result?.script ?? null,
+		confidence: result?.confidence ?? null,
+		reliable: result?.reliable ?? null,
+		remappedTag
+	};
+}
+function emitDetectorWindowEvidence({ window, windowIndex, normalizedSample, eligible, qualityGate, rawResult, rawRemappedTag, normalizedResult, normalizedRemappedTag, decision, debug }) {
+	const evidence = debug?.evidence;
+	if (!evidence || !debug.emit) return;
+	const routePolicy = DETECTOR_ROUTE_POLICIES[window.routeTag];
+	const baseDetails = {
+		engine: "whatlang-wasm",
+		routeTag: window.routeTag,
+		windowIndex,
+		startIndex: window.startIndex,
+		endIndex: window.endIndex,
+		mode: evidence.mode,
+		section: evidence.section,
+		textLength: window.text.length,
+		normalizedLength: normalizedSample.length,
+		normalizedApplied: normalizedSample !== window.text,
+		scriptChars: countScriptBearingCharsForRoute(window.text, window.routeTag),
+		minScriptChars: routePolicy.minScriptChars,
+		eligible,
+		qualityGate,
+		raw: buildEvidenceSample(rawResult, rawRemappedTag),
+		normalized: buildEvidenceSample(normalizedResult, normalizedRemappedTag),
+		decision
+	};
+	if (evidence.verbosity === "verbose") {
+		debug.emit("detector.window.evidence", {
+			...baseDetails,
+			text: window.text,
+			normalizedText: normalizedSample
+		}, { verbosity: "verbose" });
+		return;
+	}
+	const textPreview = createDetectorEvidencePreview(window.text);
+	const normalizedPreview = createDetectorEvidencePreview(normalizedSample);
+	debug.emit("detector.window.evidence", {
+		...baseDetails,
+		textPreview: textPreview.preview,
+		textPreviewTruncated: textPreview.truncated,
+		normalizedPreview: normalizedPreview.preview,
+		normalizedPreviewTruncated: normalizedPreview.truncated
+	}, { verbosity: "compact" });
+}
 function buildDetectorWindows(chunks) {
 	const windows = [];
 	for (let index = 0; index < chunks.length; index += 1) {
@@ -330,31 +543,231 @@ function buildDetectorWindows(chunks) {
 	}
 	return windows;
 }
-async function resolveWindowLocale(window) {
-	if (!shouldRunWasmDetector(window.text, window.routeTag)) return window.routeTag;
+async function resolveWindowLocale(window, windowIndex, options, debug) {
+	recordDetectorWindow(debug?.summary, window.routeTag);
+	debug?.emit?.("detector.window.start", {
+		routeTag: window.routeTag,
+		startIndex: window.startIndex,
+		endIndex: window.endIndex,
+		textLength: window.text.length
+	}, { verbosity: "verbose" });
+	const routePolicy = DETECTOR_ROUTE_POLICIES[window.routeTag];
+	const eligible = countScriptBearingCharsForRoute(window.text, window.routeTag) >= routePolicy.minScriptChars;
+	const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag);
+	const passesLatinQualityGate = window.routeTag !== "und-Latn" || shouldAcceptLatinDetectorWindow(window.text, normalizedSample);
+	if (!eligible) {
+		recordDetectorFallback(debug?.summary, "notEligible");
+		const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options);
+		emitDetectorWindowEvidence({
+			window,
+			windowIndex,
+			normalizedSample,
+			eligible,
+			qualityGate: passesLatinQualityGate,
+			rawResult: null,
+			rawRemappedTag: null,
+			normalizedResult: null,
+			normalizedRemappedTag: null,
+			decision: {
+				accepted: false,
+				path: null,
+				finalTag: fallbackDebugOutcome.finalTag,
+				...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+				fallbackReason: "notEligible"
+			},
+			debug
+		});
+		debug?.emit?.("detector.window.fallback", {
+			routeTag: window.routeTag,
+			finalTag: fallbackDebugOutcome.finalTag,
+			...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+			reason: "notEligible"
+		});
+		return window.routeTag;
+	}
 	const rawResult = await detectWithWhatlangWasm(window.text, window.routeTag);
 	const rawRemapped = rawResult ? remapWhatlangResult(rawResult, window.routeTag) : null;
-	const normalizedSample = normalizeDetectorSampleForRoute(window.text, window.routeTag);
 	const normalizedResult = normalizedSample.length > 0 && normalizedSample !== window.text ? await detectWithWhatlangWasm(normalizedSample, window.routeTag) : null;
+	debug?.emit?.("detector.window.sample", {
+		routeTag: window.routeTag,
+		normalizedApplied: normalizedSample.length > 0 && normalizedSample !== window.text,
+		normalizedLength: normalizedSample.length,
+		qualityGate: passesLatinQualityGate,
+		rawTag: rawRemapped?.tag ?? null,
+		rawConfidence: rawRemapped?.confidence ?? null,
+		rawReliable: rawRemapped?.reliable ?? null
+	}, { verbosity: "verbose" });
 	const normalizedRemapped = normalizedResult ? remapWhatlangResult(normalizedResult, window.routeTag) : null;
+	debug?.emit?.("detector.window.candidates", {
+		routeTag: window.routeTag,
+		normalizedTag: normalizedRemapped?.tag ?? null,
+		normalizedConfidence: normalizedRemapped?.confidence ?? null,
+		normalizedReliable: normalizedRemapped?.reliable ?? null
+	}, { verbosity: "verbose" });
 	const candidates = [rawRemapped, normalizedRemapped].filter((value) => value !== null);
-	if (candidates.length === 0) return getDetectorFallbackTag(window.routeTag);
+	if (candidates.length === 0) {
+		recordDetectorFallback(debug?.summary, "noCandidate");
+		const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options);
+		emitDetectorWindowEvidence({
+			window,
+			windowIndex,
+			normalizedSample,
+			eligible,
+			qualityGate: passesLatinQualityGate,
+			rawResult,
+			rawRemappedTag: rawRemapped?.tag ?? null,
+			normalizedResult,
+			normalizedRemappedTag: normalizedRemapped?.tag ?? null,
+			decision: {
+				accepted: false,
+				path: null,
+				finalTag: fallbackDebugOutcome.finalTag,
+				...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+				fallbackReason: "noCandidate"
+			},
+			debug
+		});
+		debug?.emit?.("detector.window.fallback", {
+			routeTag: window.routeTag,
+			finalTag: fallbackDebugOutcome.finalTag,
+			...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+			reason: "noCandidate"
+		});
+		return getDetectorFallbackTag(window.routeTag);
+	}
 	const strongestCandidate = candidates.reduce((best, current) => {
 		if (!best) return current;
 		return (current.confidence ?? 0) > (best.confidence ?? 0) ? current : best;
 	}, candidates[0]);
-	if (strongestCandidate && shouldAcceptDetectorTag(window.routeTag, strongestCandidate.confidence, strongestCandidate.reliable)) return strongestCandidate.tag;
-	if (window.routeTag === "und-Latn" && rawRemapped && normalizedRemapped && rawRemapped.tag === normalizedRemapped.tag) {
-		if (Math.max(rawRemapped.confidence ?? 0, normalizedRemapped.confidence ?? 0) >= .7) return rawRemapped.tag;
+	if (strongestCandidate && passesLatinQualityGate && shouldAcceptDetectorTag(window.routeTag, strongestCandidate.confidence, strongestCandidate.reliable)) {
+		recordDetectorAccepted(debug?.summary, "reliable");
+		emitDetectorWindowEvidence({
+			window,
+			windowIndex,
+			normalizedSample,
+			eligible,
+			qualityGate: passesLatinQualityGate,
+			rawResult,
+			rawRemappedTag: rawRemapped?.tag ?? null,
+			normalizedResult,
+			normalizedRemappedTag: normalizedRemapped?.tag ?? null,
+			decision: {
+				accepted: true,
+				path: "reliable",
+				finalTag: strongestCandidate.tag,
+				fallbackReason: null
+			},
+			debug
+		});
+		debug?.emit?.("detector.window.accepted", {
+			routeTag: window.routeTag,
+			finalTag: strongestCandidate.tag,
+			acceptancePath: "reliable",
+			confidence: strongestCandidate.confidence ?? null,
+			reliable: strongestCandidate.reliable ?? null
+		});
+		return strongestCandidate.tag;
+	}
+	if (window.routeTag === "und-Latn" && passesLatinQualityGate && rawRemapped && normalizedRemapped && rawRemapped.tag === normalizedRemapped.tag) {
+		const corroboratedConfidence = Math.max(rawRemapped.confidence ?? 0, normalizedRemapped.confidence ?? 0);
+		const hasReliableCorroboration = rawRemapped.reliable === true || normalizedRemapped.reliable === true;
+		if (hasReliableCorroboration && corroboratedConfidence >= .7) {
+			recordDetectorAccepted(debug?.summary, "corroborated");
+			emitDetectorWindowEvidence({
+				window,
+				windowIndex,
+				normalizedSample,
+				eligible,
+				qualityGate: passesLatinQualityGate,
+				rawResult,
+				rawRemappedTag: rawRemapped.tag,
+				normalizedResult,
+				normalizedRemappedTag: normalizedRemapped.tag,
+				decision: {
+					accepted: true,
+					path: "corroborated",
+					finalTag: rawRemapped.tag,
+					fallbackReason: null
+				},
+				debug
+			});
+			debug?.emit?.("detector.window.accepted", {
+				routeTag: window.routeTag,
+				finalTag: rawRemapped.tag,
+				acceptancePath: "corroborated",
+				confidence: corroboratedConfidence,
+				reliable: hasReliableCorroboration
+			});
+			return rawRemapped.tag;
+		}
+		if (!hasReliableCorroboration && corroboratedConfidence >= .7) {
+			recordDetectorFallback(debug?.summary, "corroborationUnreliable");
+			const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options);
+			emitDetectorWindowEvidence({
+				window,
+				windowIndex,
+				normalizedSample,
+				eligible,
+				qualityGate: passesLatinQualityGate,
+				rawResult,
+				rawRemappedTag: rawRemapped.tag,
+				normalizedResult,
+				normalizedRemappedTag: normalizedRemapped.tag,
+				decision: {
+					accepted: false,
+					path: null,
+					finalTag: fallbackDebugOutcome.finalTag,
+					...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+					fallbackReason: "corroborationUnreliable"
+				},
+				debug
+			});
+			debug?.emit?.("detector.window.fallback", {
+				routeTag: window.routeTag,
+				finalTag: fallbackDebugOutcome.finalTag,
+				...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+				reason: "corroborationUnreliable"
+			});
+			return getDetectorFallbackTag(window.routeTag);
+		}
 	}
+	const fallbackReason = passesLatinQualityGate ? "belowThreshold" : "qualityGate";
+	recordDetectorFallback(debug?.summary, fallbackReason);
+	const fallbackDebugOutcome = resolveFallbackDebugOutcome(window, options);
+	emitDetectorWindowEvidence({
+		window,
+		windowIndex,
+		normalizedSample,
+		eligible,
+		qualityGate: passesLatinQualityGate,
+		rawResult,
+		rawRemappedTag: rawRemapped?.tag ?? null,
+		normalizedResult,
+		normalizedRemappedTag: normalizedRemapped?.tag ?? null,
+		decision: {
+			accepted: false,
+			path: null,
+			finalTag: fallbackDebugOutcome.finalTag,
+			...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+			fallbackReason
+		},
+		debug
+	});
+	debug?.emit?.("detector.window.fallback", {
+		routeTag: window.routeTag,
+		finalTag: fallbackDebugOutcome.finalTag,
+		...fallbackDebugOutcome.finalLocales ? { finalLocales: fallbackDebugOutcome.finalLocales } : {},
+		reason: fallbackReason
+	});
 	return getDetectorFallbackTag(window.routeTag);
 }
 async function segmentTextByLocaleWithWasmDetector(text, options = {}) {
-	const chunks = require_markdown.segmentTextByLocale(text, options);
+	require_markdown.resolveLocaleDetectContext(options);
+	const chunks = require_markdown.segmentTextByLocale(text, createDeferredLatinPreSegmentOptions(options));
 	const resolved = [...chunks];
 	const windows = buildDetectorWindows(chunks);
-	for (const window of windows) {
-		const resolvedLocale = await resolveWindowLocale(window);
+	for (const [windowIndex, window] of windows.entries()) {
+		const resolvedLocale = await resolveWindowLocale(window, windowIndex, options, options.detectorDebug);
 		for (let index = window.startIndex; index <= window.endIndex; index += 1) {
 			const chunk = resolved[index];
 			if (!chunk) continue;
@@ -364,7 +777,8 @@ async function segmentTextByLocaleWithWasmDetector(text, options = {}) {
 			};
 		}
 	}
-	return resolved;
+	options.detectorDebug?.emit?.("detector.summary", options.detectorDebug.summary, { verbosity: "compact" });
+	return reapplyDeferredLatinFallback(reapplyResolvedLatinHintRules(resolved, chunks, options), options);
 }
 async function wordCounterWithWasmDetector(text, options = {}) {
 	return buildWordCounterResultFromChunks(await segmentTextByLocaleWithWasmDetector(text, options), options);