npm - flappa-doormal - Versions diffs - 2.20.0 → 2.22.0 - Mend

flappa-doormal 2.20.0 → 2.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.mjs CHANGED Viewed

@@ -710,7 +710,7 @@ const isArabicLetter = (ch) => /\p{Script=Arabic}/u.test(ch) && /\p{L}/u.test(ch
 const isCommonDelimiter = (ch) => /[:：\-–—ـ،؛.?!؟()[\]{}]/u.test(ch);
 //#endregion
 //#region src/analysis/line-starts.ts
-const resolveOptions$1 = (options = {}) => ({
+const resolveOptions$2 = (options = {}) => ({
 	includeFirstWordFallback: options.includeFirstWordFallback ?? true,
 	lineFilter: options.lineFilter,
 	maxExamples: options.maxExamples ?? 1,
@@ -939,7 +939,7 @@ const processPage = (page, tokenPriority, opts, acc) => {
 * Analyze pages and return the most common line-start patterns (top K).
 */
 const analyzeCommonLineStarts = (pages, options = {}) => {
-	const opts = resolveOptions$1(options);
+	const opts = resolveOptions$2(options);
 	const tokenPriority = buildTokenPriority();
 	const acc = /* @__PURE__ */ new Map();
 	for (const page of pages) processPage(page, tokenPriority, opts, acc);
@@ -952,7 +952,7 @@ const analyzeCommonLineStarts = (pages, options = {}) => {
 };
 //#endregion
 //#region src/analysis/repeating-sequences.ts
-const resolveOptions = (options) => {
+const resolveOptions$1 = (options) => {
 	const minElements = Math.max(1, options?.minElements ?? 1);
 	return {
 		contextChars: options?.contextChars ?? 50,
@@ -1106,7 +1106,7 @@ const extractPageNgrams = (page, items, opts, stats) => {
 * use `analyzeCommonLineStarts()` instead.
 */
 const analyzeRepeatingSequences = (pages, options) => {
-	const opts = resolveOptions(options);
+	const opts = resolveOptions$1(options);
 	const stats = /* @__PURE__ */ new Map();
 	for (const page of pages) {
 		if (!page.content) continue;
@@ -1119,636 +1119,821 @@ const analyzeRepeatingSequences = (pages, options) => {
 	}));
 };
 //#endregion
-//#region src/detection.ts
+//#region src/types/rules.ts
 /**
-* Token detection order - more specific patterns first to avoid partial matches.
-* Example: 'raqms' before 'raqm' so "٣٤" matches 'raqms' not just the first digit.
+* Pattern type key names for split rules.
 *
-* Tokens not in this list are appended in alphabetical order from TOKEN_PATTERNS.
+* Use this array to dynamically iterate over pattern types in UIs,
+* or use the `PatternTypeKey` type for type-safe string unions.
+*
+* @example
+* // Build a dropdown/select in UI
+* PATTERN_TYPE_KEYS.map(key => <option value={key}>{key}</option>)
+*
+* @example
+* // Type-safe pattern key validation
+* const validateKey = (k: string): k is PatternTypeKey =>
+*   (PATTERN_TYPE_KEYS as readonly string[]).includes(k);
 */
-const TOKEN_PRIORITY_ORDER = [
-	"basmalah",
-	"kitab",
-	"bab",
-	"fasl",
-	"naql",
-	"rumuz",
-	"numbered",
-	"raqms",
-	"raqm",
-	"tarqim",
-	"bullet",
-	"dash",
-	"harf"
+const PATTERN_TYPE_KEYS = [
+	"lineStartsWith",
+	"lineStartsAfter",
+	"lineEndsWith",
+	"template",
+	"regex",
+	"dictionaryEntry"
 ];
+//#endregion
+//#region src/optimization/optimize-rules.ts
+const MERGEABLE_KEYS = new Set([
+	"lineStartsWith",
+	"lineStartsAfter",
+	"lineEndsWith"
+]);
 /**
-* Gets the token detection priority order.
-* Returns tokens in priority order, with any TOKEN_PATTERNS not in the priority list appended.
+* Get the pattern type key for a rule.
 */
-const getTokenPriority = () => {
-	const allTokens = getAvailableTokens();
-	const prioritized = TOKEN_PRIORITY_ORDER.filter((t) => allTokens.includes(t));
-	const remaining = allTokens.filter((t) => !TOKEN_PRIORITY_ORDER.includes(t)).sort();
-	return [...prioritized, ...remaining];
+const getPatternKey = (rule) => PATTERN_TYPE_KEYS.find((key) => key in rule) ?? "regex";
+const getPatternArray = (rule, key) => {
+	const value = rule[key];
+	return Array.isArray(value) ? value : [];
 };
-const isRumuzStandalone = (text, startIndex, endIndex) => {
-	const before = startIndex > 0 ? text[startIndex - 1] : "";
-	const after = endIndex < text.length ? text[endIndex] : "";
-	const isWhitespace = (ch) => !!ch && /\s/u.test(ch);
-	const isOpenBracket = (ch) => !!ch && /[([{]/u.test(ch);
-	const isRightDelimiter = (ch) => !!ch && /[:：\-–—ـ،؛.?!؟)\]}]/u.test(ch);
-	const isArabicWordy = (ch) => !!ch && /[\u0600-\u06FF]/u.test(ch);
-	const leftOk = !before || isWhitespace(before) || isOpenBracket(before) || !isArabicWordy(before);
-	const rightOk = !after || isWhitespace(after) || isRightDelimiter(after) || !isArabicWordy(after);
-	return leftOk && rightOk;
+const getPatternString = (rule, key) => {
+	const value = rule[key];
+	return typeof value === "string" ? value : Array.isArray(value) ? value.join("\n") : value ? JSON.stringify(value) : "";
 };
-/**
-* Analyzes text and returns all detected token patterns with their positions.
-* Patterns are detected in priority order to avoid partial matches.
-*
-* @param text - The text to analyze for token patterns
-* @returns Array of detected patterns sorted by position
-*
-* @example
-* detectTokenPatterns("٣٤ - حدثنا")
-* // Returns: [
-* //   { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },
-* //   { token: 'dash', match: '-', index: 3, endIndex: 4 },
-* //   { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }
-* // ]
-*/
-const detectTokenPatterns = (text) => {
-	if (!text) return [];
-	const results = [];
-	const coveredRanges = [];
-	const isPositionCovered = (start, end) => {
-		return coveredRanges.some(([s, e]) => start >= s && start < e || end > s && end <= e || start <= s && end >= e);
-	};
-	for (const tokenName of getTokenPriority()) {
-		const pattern = TOKEN_PATTERNS[tokenName];
-		if (!pattern) continue;
-		try {
-			const regex = new RegExp(`(${pattern})`, "gu");
-			let match;
-			while ((match = regex.exec(text)) !== null) {
-				const startIndex = match.index;
-				const endIndex = startIndex + match[0].length;
-				if (tokenName === "rumuz" && !isRumuzStandalone(text, startIndex, endIndex)) continue;
-				if (isPositionCovered(startIndex, endIndex)) continue;
-				results.push({
-					endIndex,
-					index: startIndex,
-					match: match[0],
-					token: tokenName
-				});
-				coveredRanges.push([startIndex, endIndex]);
-			}
-		} catch {}
-	}
-	return results.sort((a, b) => a.index - b.index);
+const normalizePatterns = (patterns) => [...new Set(patterns)].sort((a, b) => b.length - a.length || a.localeCompare(b));
+const serializePrimitive = (value) => {
+	if (value === void 0) return "undefined";
+	if (typeof value === "number") return Number.isFinite(value) ? JSON.stringify(value) : JSON.stringify(String(value));
+	if (typeof value === "bigint") return JSON.stringify(`${value}n`);
+	if (typeof value === "symbol") return JSON.stringify(value.toString());
+	return JSON.stringify(value);
+};
+const stableSerializeArray = (values, seen) => `[${values.map((value) => stableSerializeValue(value, seen)).join(",")}]`;
+const stableSerializeObject = (value, seen) => {
+	if (seen.has(value)) throw new TypeError("Cannot optimize rules with circular option values");
+	seen.add(value);
+	const serialized = Object.entries(value).filter(([, entryValue]) => entryValue !== void 0).sort(([left], [right]) => left.localeCompare(right)).map(([entryKey, entryValue]) => `${JSON.stringify(entryKey)}:${stableSerializeValue(entryValue, seen)}`).join(",");
+	seen.delete(value);
+	return `{${serialized}}`;
+};
+const stableSerializeValue = (value, seen) => {
+	if (typeof value === "function") return JSON.stringify(`[Function:${value.name || "anonymous"}]`);
+	if (!value || typeof value !== "object") return serializePrimitive(value);
+	if (Array.isArray(value)) return stableSerializeArray(value, seen);
+	if (value instanceof Date) return JSON.stringify(value.toISOString());
+	if (value instanceof RegExp) return JSON.stringify(value.toString());
+	return stableSerializeObject(value, seen);
+};
+const stableSerialize = (value) => stableSerializeValue(value, /* @__PURE__ */ new WeakSet());
+const getDictionaryEntrySpecificityScore = (rule) => {
+	if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return 0;
+	const { allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords } = rule.dictionaryEntry;
+	return minLetters * 20 + maxLetters + (allowCommaSeparated ? 0 : 120) + (allowParenthesized ? 0 : 60) + (allowWhitespaceBeforeColon ? 0 : 20) + (midLineSubentries ? 0 : 160) + Math.min(stopWords.length, 25);
 };
-/**
-* Generates a template pattern from text using detected tokens.
-* Replaces matched portions with {{token}} syntax.
-*
-* @param text - Original text
-* @param detected - Array of detected patterns from detectTokenPatterns
-* @returns Template string with tokens, e.g., "{{raqms}} {{dash}} "
-*
-* @example
-* const detected = detectTokenPatterns("٣٤ - ");
-* generateTemplateFromText("٣٤ - ", detected);
-* // Returns: "{{raqms}} {{dash}} "
-*/
-const generateTemplateFromText = (text, detected) => {
-	if (!text || detected.length === 0) return text;
-	let template = text;
-	const sortedByIndexDesc = [...detected].sort((a, b) => b.index - a.index);
-	for (const d of sortedByIndexDesc) template = `${template.slice(0, d.index)}{{${d.token}}}${template.slice(d.endIndex)}`;
-	return template;
+const getSpecificityScore = (rule) => {
+	const key = getPatternKey(rule);
+	if (key === "dictionaryEntry") return getDictionaryEntrySpecificityScore(rule);
+	return MERGEABLE_KEYS.has(key) ? getPatternArray(rule, key).reduce((max, p) => Math.max(max, p.length), 0) : getPatternString(rule, key).length;
 };
-/**
-* Determines the best pattern type for auto-generated rules based on detected patterns.
-*
-* @param detected - Array of detected patterns
-* @returns Suggested pattern type and whether to use fuzzy matching
-*/
-const suggestPatternConfig = (detected) => {
-	const hasStructuralToken = detected.some((d) => [
-		"basmalah",
-		"kitab",
-		"bab",
-		"fasl"
-	].includes(d.token));
-	const hasNumberedPattern = detected.some((d) => [
-		"raqms",
-		"raqm",
-		"numbered"
-	].includes(d.token));
-	if (hasStructuralToken) return {
-		fuzzy: true,
-		metaType: detected.find((d) => [
-			"kitab",
-			"bab",
-			"fasl"
-		].includes(d.token))?.token || "chapter",
-		patternType: "lineStartsWith"
-	};
-	if (hasNumberedPattern) return {
-		fuzzy: false,
-		metaType: "hadith",
-		patternType: "lineStartsAfter"
-	};
+const createMergeKey = (rule) => {
+	const key = getPatternKey(rule);
+	return `${key}|${stableSerialize(Object.fromEntries(Object.entries(rule).filter(([field]) => field !== key)))}`;
+};
+const optimizeRules = (rules) => {
+	const output = [];
+	const indexByMergeKey = /* @__PURE__ */ new Map();
+	let mergedCount = 0;
+	for (const rule of rules) {
+		const key = getPatternKey(rule);
+		if (!MERGEABLE_KEYS.has(key)) {
+			output.push(rule);
+			continue;
+		}
+		const mergeKey = createMergeKey(rule);
+		const existingIndex = indexByMergeKey.get(mergeKey);
+		if (existingIndex === void 0) {
+			indexByMergeKey.set(mergeKey, output.length);
+			output.push({
+				...rule,
+				[key]: normalizePatterns(getPatternArray(rule, key))
+			});
+		} else {
+			const existing = output[existingIndex];
+			existing[key] = normalizePatterns([...getPatternArray(existing, key), ...getPatternArray(rule, key)]);
+			mergedCount++;
+		}
+	}
 	return {
-		fuzzy: false,
-		patternType: "lineStartsAfter"
+		mergedCount,
+		rules: output.sort((a, b) => getSpecificityScore(b) - getSpecificityScore(a))
 	};
 };
-/**
-* Analyzes text and generates a complete suggested rule configuration.
-*
-* @param text - Highlighted text from the page
-* @returns Suggested rule configuration or null if no patterns detected
-*/
-const analyzeTextForRule = (text) => {
-	const detected = detectTokenPatterns(text);
-	if (detected.length === 0) return null;
+//#endregion
+//#region src/segmentation/pattern-validator.ts
+const KNOWN_TOKENS = new Set(getAvailableTokens());
+const TOKEN_INSIDE_BRACES = /\{\{(\w+)(?::\w+)?\}\}/g;
+const BARE_TOKEN_REGEX = (() => {
+	const tokens = [...KNOWN_TOKENS].sort((a, b) => b.length - a.length);
+	return new RegExp(`(?<!\\{\\{)(${tokens.join("|")})(?::\\w+)?(?!\\}\\})`, "g");
+})();
+const createMalformedTokenIssue = (tokenLiteral, side) => {
+	const token = tokenLiteral.split(":", 1)[0] || void 0;
 	return {
-		detected,
-		template: generateTemplateFromText(text, detected),
-		...suggestPatternConfig(detected)
+		message: `Token "${tokenLiteral || "unknown"}" appears to be missing ${side} braces.`,
+		suggestion: tokenLiteral ? `{{${tokenLiteral}}}` : void 0,
+		token,
+		type: "missing_braces"
 	};
 };
-//#endregion
-//#region src/dictionary/arabic-dictionary-rule.ts
-const uniqueCanonicalWords = (words) => {
-	const seen = /* @__PURE__ */ new Set();
-	const result = [];
-	for (const word of words) {
-		const normalized = normalizeArabicForComparison(word);
-		if (!normalized || seen.has(normalized)) continue;
-		seen.add(normalized);
-		result.push(word);
+const detectMalformedLeftToken = (pattern) => {
+	for (let index = 0; index < pattern.length - 1; index++) {
+		if (pattern.slice(index, index + 2) !== "{{") continue;
+		const closeIndex = pattern.indexOf("}}", index + 2);
+		if (closeIndex === -1) return createMalformedTokenIssue(pattern.slice(index + 2).match(/^\w+(?::\w+)?/u)?.[0] ?? "", "closing");
+		index = closeIndex + 1;
 	}
-	return result;
-};
-const buildStopAlternation = (stopWords) => {
-	const unique = uniqueCanonicalWords(stopWords);
-	if (unique.length === 0) return "";
-	return unique.map((word) => makeDiacriticInsensitive(normalizeArabicForComparison(word))).join("|");
 };
-const buildHeadwordBody = ({ allowCommaSeparated, colonPattern, stopAlternation, stopwordBody, unit }) => {
-	if (!stopAlternation) return allowCommaSeparated ? `${unit}(?:\\s*[،,]\\s*${unit})*` : unit;
-	const guardedUnit = `(?!(?:${stopwordBody})${allowCommaSeparated ? `(?:\\s*[،,]\\s*|${colonPattern})` : colonPattern})${unit}`;
-	return allowCommaSeparated ? `${guardedUnit}(?:\\s*[،,]\\s*${guardedUnit})*` : guardedUnit;
+const detectMalformedRightToken = (pattern) => {
+	for (let index = 0; index < pattern.length - 1; index++) {
+		if (pattern.slice(index, index + 2) !== "}}") continue;
+		if (pattern.lastIndexOf("{{", index) === -1) return createMalformedTokenIssue(pattern.slice(0, index).match(/(\w+(?::\w+)?)$/u)?.[1] ?? "", "opening");
+	}
 };
-const buildBalancedMarker = ({ allowParenthesized, allowWhitespaceBeforeColon, captureName, headwordBody }) => {
-	const colon = allowWhitespaceBeforeColon ? "\\s*:" : ":";
-	const withCapture = `(?<${captureName}>${headwordBody})`;
-	if (!allowParenthesized) return `${withCapture}${colon}`;
-	return `(?:\\(\\s*${withCapture}\\s*\\)|${withCapture})${colon}`;
+const detectMalformedToken = (pattern) => detectMalformedLeftToken(pattern) ?? detectMalformedRightToken(pattern);
+/**
+* Validates a single pattern for common issues.
+*/
+const validatePattern = (pattern, seenPatterns) => {
+	if (!pattern.trim()) return {
+		message: "Empty pattern is not allowed",
+		type: "empty_pattern"
+	};
+	if (seenPatterns.has(pattern)) return {
+		message: `Duplicate pattern: "${pattern}"`,
+		pattern,
+		type: "duplicate"
+	};
+	seenPatterns.add(pattern);
+	TOKEN_INSIDE_BRACES.lastIndex = 0;
+	for (const match of pattern.matchAll(TOKEN_INSIDE_BRACES)) {
+		const name = match[1];
+		if (name && !KNOWN_TOKENS.has(name)) return {
+			message: `Unknown token: {{${name}}}. Available tokens: ${[...KNOWN_TOKENS].slice(0, 5).join(", ")}...`,
+			suggestion: "Check spelling or use a known token",
+			token: name,
+			type: "unknown_token"
+		};
+	}
+	const malformed = detectMalformedToken(pattern);
+	if (malformed) return malformed;
+	for (const match of pattern.matchAll(BARE_TOKEN_REGEX)) {
+		const [full, name] = match;
+		const idx = match.index;
+		if (pattern.slice(Math.max(0, idx - 2), idx) !== "{{" || pattern.slice(idx + full.length, idx + full.length + 2) !== "}}") return {
+			message: `Token "${name}" appears to be missing {{}}. Did you mean "{{${full}}}"?`,
+			suggestion: `{{${full}}}`,
+			token: name,
+			type: "missing_braces"
+		};
+	}
 };
-const validateDictionaryEntryOptions = ({ captureName = "lemma", maxLetters = 10, minLetters = 2 }) => {
-	if (!Number.isInteger(minLetters) || minLetters < 1) throw new Error(`createArabicDictionaryEntryRule: minLetters must be an integer >= 1, got ${minLetters}`);
-	if (!Number.isInteger(maxLetters) || maxLetters < minLetters) throw new Error(`createArabicDictionaryEntryRule: maxLetters must be an integer >= minLetters, got ${maxLetters}`);
-	if (!/^[A-Za-z_]\w*$/.test(captureName)) throw new Error(`createArabicDictionaryEntryRule: invalid captureName "${captureName}"`);
+/**
+* Validates an array of patterns, returning parallel array of issues.
+*/
+const validatePatternArray = (patterns) => {
+	const seen = /* @__PURE__ */ new Set();
+	const issues = patterns.map((p) => validatePattern(p, seen));
+	return issues.some(Boolean) ? issues : void 0;
 };
-const buildArabicDictionaryEntryRegexSource = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords }, capturePrefix) => {
-	validateDictionaryEntryOptions({
-		captureName,
-		maxLetters,
-		minLetters
-	});
-	const zeroWidthPrefix = "[\\u200E\\u200F\\u061C\\u200B\\u200C\\u200D\\uFEFF]*";
-	const wawWithMarks = `و${ARABIC_MARKS_CLASS}*`;
-	const alWithMarks = `ا${ARABIC_MARKS_CLASS}*ل${ARABIC_MARKS_CLASS}*`;
-	const lemmaUnit = `(?:${wawWithMarks})?(?:${alWithMarks})?${`${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}(?:${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}){${minLetters - 1},${maxLetters - 1}}`}`;
-	const stopAlternation = buildStopAlternation(stopWords);
-	const lemmaBody = buildHeadwordBody({
-		allowCommaSeparated,
-		colonPattern: allowWhitespaceBeforeColon ? "\\s*:" : ":",
-		stopAlternation,
-		stopwordBody: stopAlternation ? `(?:${wawWithMarks})?(?:${stopAlternation})` : "",
-		unit: lemmaUnit
-	});
-	const lineStartBoundary = `(?:(?<=^)|(?<=\\n))${zeroWidthPrefix}`;
-	const midLineTrigger = allowParenthesized ? `(?<=\\s)(?=(?:\\(\\s*)?${wawWithMarks}(?:${alWithMarks})?)` : `(?<=\\s)(?=${wawWithMarks}(?:${alWithMarks})?)`;
-	const prefixedCaptureName = capturePrefix ? `${capturePrefix}${captureName}` : captureName;
-	const regex = `(?:${lineStartBoundary}${midLineSubentries ? `|${midLineTrigger}` : ""})` + buildBalancedMarker({
-		allowParenthesized,
-		allowWhitespaceBeforeColon,
-		captureName: prefixedCaptureName,
-		headwordBody: lemmaBody
-	});
-	return {
-		captureNames: [prefixedCaptureName],
-		regex
-	};
+const applyRulePatternValidation = (result, key, patterns) => {
+	if (!patterns) return false;
+	const issues = validatePatternArray(patterns);
+	if (!issues) return false;
+	result[key] = issues;
+	return true;
+};
+const validateTemplateRule = (rule, result) => {
+	if (!("template" in rule)) return false;
+	const issue = validatePattern(rule.template, /* @__PURE__ */ new Set());
+	if (!issue) return false;
+	result.template = issue;
+	return true;
+};
+const validateRegexRule = (rule, result) => {
+	if (!("regex" in rule)) return false;
+	if (!rule.regex.trim()) {
+		result.regex = {
+			message: "Empty pattern is not allowed",
+			type: "empty_pattern"
+		};
+		return true;
+	}
+	try {
+		new RegExp(rule.regex, "u");
+		return false;
+	} catch (error) {
+		result.regex = {
+			message: error instanceof Error ? error.message : String(error),
+			pattern: rule.regex,
+			type: "invalid_regex"
+		};
+		return true;
+	}
+};
+const invalidDictionaryEntryIssue = (message) => ({
+	message,
+	type: "invalid_option"
+});
+const addBooleanDictionaryEntryIssue = (issues, key, value) => {
+	if (value !== void 0 && typeof value !== "boolean") issues[key] = invalidDictionaryEntryIssue(`${key} must be a boolean`);
+};
+const addCaptureNameIssue = (issues, captureName) => {
+	if (captureName !== void 0 && !/^[A-Za-z_]\w*$/.test(captureName)) issues.captureName = invalidDictionaryEntryIssue(`captureName must match /^[A-Za-z_]\\w*$/, got "${captureName}"`);
+};
+const addMinLettersIssue = (issues, minLetters) => {
+	if (minLetters !== void 0 && (!Number.isInteger(minLetters) || minLetters < 1)) issues.minLetters = invalidDictionaryEntryIssue("minLetters must be an integer >= 1");
+};
+const addMaxLettersIssue = (issues, maxLetters, minLetters) => {
+	const min = minLetters ?? 2;
+	if (maxLetters !== void 0 && (!Number.isInteger(maxLetters) || maxLetters < min)) issues.maxLetters = invalidDictionaryEntryIssue(`maxLetters must be an integer >= ${min}`);
+};
+const validateDictionaryEntryRule = (rule, result) => {
+	if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return false;
+	const issues = {};
+	const { allowCommaSeparated, allowParenthesized, allowWhitespaceBeforeColon, captureName, maxLetters, midLineSubentries, minLetters, stopWords } = rule.dictionaryEntry;
+	if (!Array.isArray(stopWords) || stopWords.some((word) => typeof word !== "string" || !word.trim())) issues.stopWords = invalidDictionaryEntryIssue("stopWords must be a string[] with non-empty entries");
+	addBooleanDictionaryEntryIssue(issues, "allowCommaSeparated", allowCommaSeparated);
+	addBooleanDictionaryEntryIssue(issues, "allowParenthesized", allowParenthesized);
+	addBooleanDictionaryEntryIssue(issues, "allowWhitespaceBeforeColon", allowWhitespaceBeforeColon);
+	addBooleanDictionaryEntryIssue(issues, "midLineSubentries", midLineSubentries);
+	addCaptureNameIssue(issues, captureName);
+	addMinLettersIssue(issues, minLetters);
+	addMaxLettersIssue(issues, maxLetters, minLetters);
+	if (Object.keys(issues).length === 0) return false;
+	result.dictionaryEntry = issues;
+	return true;
+};
+const formatValidationIssue = (_type, issue, loc) => {
+	if (!issue) return null;
+	if (issue.type === "missing_braces") return `${loc}: Missing {{}} around token "${issue.token}"`;
+	if (issue.type === "unknown_token") return `${loc}: Unknown token "{{${issue.token}}}"`;
+	if (issue.type === "duplicate") return `${loc}: Duplicate pattern "${issue.pattern}"`;
+	if (issue.type === "invalid_regex") return `${loc}: Invalid regex (${issue.message})`;
+	return `${loc}: ${issue.message || issue.type}`;
 };
 /**
-* Creates a reusable split rule for Arabic dictionary entries.
+* Validates split rules for common pattern issues.
 *
-* The returned rule preserves authoring intent as a serializable
-* `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
-* regex string.
+* Checks for:
+* - Missing `{{}}` around known token names (e.g., `raqms:num` instead of `{{raqms:num}}`)
+* - Unknown token names inside `{{}}` (e.g., `{{nonexistent}}`)
+* - Duplicate patterns within the same rule
 *
-* @example
-* createArabicDictionaryEntryRule({
-*   stopWords: ['وقيل', 'ويقال', 'قال'],
-*   pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
-* })
+* @param rules - Array of split rules to validate
+* @returns Array parallel to input with validation results (undefined if no issues)
 *
 * @example
-* createArabicDictionaryEntryRule({
-*   allowParenthesized: true,
-*   allowWhitespaceBeforeColon: true,
-*   allowCommaSeparated: true,
-*   stopWords: ['الليث', 'العجاج'],
-* })
+* const issues = validateRules([
+*   { lineStartsAfter: ['raqms:num'] },  // Missing braces
+*   { lineStartsWith: ['{{unknown}}'] }, // Unknown token
+* ]);
+* // issues[0]?.lineStartsAfter?.[0]?.type === 'missing_braces'
+* // issues[1]?.lineStartsWith?.[0]?.type === 'unknown_token'
 */
+const validateRules = (rules) => rules.map((rule) => {
+	const result = {};
+	const startsWithIssues = applyRulePatternValidation(result, "lineStartsWith", "lineStartsWith" in rule ? rule.lineStartsWith : void 0);
+	const startsAfterIssues = applyRulePatternValidation(result, "lineStartsAfter", "lineStartsAfter" in rule ? rule.lineStartsAfter : void 0);
+	const endsWithIssues = applyRulePatternValidation(result, "lineEndsWith", "lineEndsWith" in rule ? rule.lineEndsWith : void 0);
+	const templateIssues = validateTemplateRule(rule, result);
+	const regexIssues = validateRegexRule(rule, result);
+	const dictionaryEntryIssues = validateDictionaryEntryRule(rule, result);
+	return startsWithIssues || startsAfterIssues || endsWithIssues || templateIssues || regexIssues || dictionaryEntryIssues ? result : void 0;
+});
 /**
-* @deprecated Prefer the top-level `SegmentationOptions.dictionary` profile for
-* whole-book dictionary segmentation. Keep this helper for advanced single-rule
-* composition inside a broader `SplitRule[]` pipeline.
+* Formats a validation result array into a list of human-readable error messages.
+*
+* Useful for displaying validation errors in UIs.
+*
+* @param results - The result array from `validateRules()`
+* @returns Array of formatted error strings
+*
+* @example
+* const issues = validateRules(rules);
+* const errors = formatValidationReport(issues);
+* // ["Rule 1, lineStartsWith: Missing {{}} around token..."]
 */
-const createArabicDictionaryEntryRule = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, meta, midLineSubentries = true, minLetters = 2, pageStartPrevWordStoplist, samePagePrevWordStoplist, stopWords }) => {
-	validateDictionaryEntryOptions({
-		captureName,
-		maxLetters,
-		minLetters
-	});
-	return {
-		dictionaryEntry: {
-			allowCommaSeparated,
-			allowParenthesized,
-			allowWhitespaceBeforeColon,
-			captureName,
-			maxLetters,
-			midLineSubentries,
-			minLetters,
-			stopWords: uniqueCanonicalWords(stopWords)
-		},
-		meta,
-		pageStartPrevWordStoplist,
-		samePagePrevWordStoplist
-	};
+const formatValidationReport = (results) => results.flatMap((result, i) => {
+	if (!result) return [];
+	return Object.entries(result).flatMap(([type, issues]) => formatValidationIssues(type, issues, i + 1));
+});
+const formatValidationIssues = (type, issues, ruleNumber) => {
+	if (type === "dictionaryEntry" && issues && typeof issues === "object" && !Array.isArray(issues)) return Object.entries(issues).map(([field, issue]) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}.${field}`)).filter((msg) => msg !== null);
+	return (Array.isArray(issues) ? issues : [issues]).map((issue) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}`)).filter((msg) => msg !== null);
 };
 //#endregion
-//#region src/dictionary/heading-classifier.ts
-const HEADING_PREFIX$1 = "## ";
-const CODE_LINE_PATTERN$1 = getTokenPattern("harfs").replaceAll("\\s+", "[ \\t]+");
-const ARABIC_WORD_PATTERN = ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN;
-const PLAIN_ENTRY_RE = new RegExp(`^(?<lemma>${ARABIC_WORD_PATTERN}(?:\\s+${ARABIC_WORD_PATTERN}){0,1}|[([{]${ARABIC_WORD_PATTERN}(?:\\s+${ARABIC_WORD_PATTERN}){0,1}[)\\]}])\\s*:`, "u");
-const INLINE_SUBENTRY_RE = new RegExp(`(^|[\\s،؛,:.])(?<lemma>و${ARABIC_WORD_PATTERN})\\s*:`, "gu");
-const CODE_LINE_RE = new RegExp(`^(?:[[(])?(?<codes>${CODE_LINE_PATTERN$1})(?:[)\\]])?$`, "u");
-const PAIRED_FORMS_RE = new RegExp(`^(?<forms>${ARABIC_WORD_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_PATTERN})+)\\s*:`, "u");
-const ARABIC_BOUNDARY_OR_PUNCTUATION = "(?=$|[\\s:،؛()\\[\\]{}\\-–—]|[^\\p{Script=Arabic}])";
-const CHAPTER_HEADING_RE = new RegExp(`^(?:[([{]\\s*)?(?:باب|فصل|كتاب|حرف|أبواب)${ARABIC_BOUNDARY_OR_PUNCTUATION}`, "u");
-const CLUSTER_HEADING_RE = new RegExp(`^(?:\\(?\\s*)?(?:أبواب|أبنية)${ARABIC_BOUNDARY_OR_PUNCTUATION}|^(?=.{1,80}$).+?[،,].+?(?:مستعمل|مهمل|مستعملة|مستعملان)(?=$|[.،,:؛\\s])`, "u");
-const STATUS_HEADING_RE = new RegExp(`^(?:${CODE_LINE_PATTERN$1}|(?:(?:${ARABIC_WORD_PATTERN}\\s+){1,3}${ARABIC_WORD_PATTERN}|${ARABIC_WORD_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_PATTERN})+))\\s*:?[\\s]*(?:مستعمل|مستعملة|مستعملان|مهمل|مهملة)(?=$|[.،,:؛\\s])`, "u");
-const CODE_NOTE_HEADING_RE = new RegExp(`^(?:${ARABIC_WORD_PATTERN}\\s+){1,3}\\(.+\\)$`, "u");
-const COLON_NOISE_RE = /^.+:\s*.+$/u;
-const CHAPTER_TERMS = [
-	"باب",
-	"فصل",
-	"كتاب",
-	"حرف",
-	"أبواب"
-];
-const MARKER_PREFIXES = [
-	"بسم الله",
-	"توكلت على الله",
-	"آخر كتاب",
-	"ويتلوه"
-];
-const NOISE_TOKENS = [
-	"قال",
-	"وقيل",
-	"ويقال",
-	"وفي",
-	"يعني",
-	"فإذا"
-];
-const emptyCounts = () => ({
-	chapter: 0,
-	cluster: 0,
-	codeLine: 0,
-	entry: 0,
-	inlineSubentry: 0,
-	lineEntry: 0,
-	marker: 0,
-	noise: 0,
-	pairedForms: 0
-});
-const extractWrappedLemma = (lemma) => lemma.replace(/^[[{(]+|[\])}]+$/gu, "").trim();
-const stripLeadingWrappers = (text) => text.replace(/^[[{(]+\s*/u, "").trim();
-const isDelimitedPrefixMatch$1 = (text, prefix) => {
-	if (text === prefix) return true;
-	if (!text.startsWith(prefix)) return false;
-	const nextChar = text[prefix.length];
-	return nextChar === void 0 || /[\s:،؛()[\]{}\-–—]/u.test(nextChar);
+//#region src/preprocessing/transforms.ts
+/** Helper for exhaustive switch checking - TypeScript will error if a case is missed */
+const assertNever$2 = (x) => {
+	throw new Error(`Unknown preprocess transform type: ${JSON.stringify(x)}`);
 };
-const isCodeHeading = (text) => {
-	if (CODE_LINE_RE.test(text)) return true;
-	const words = text.trim().split(/\s+/u).filter(Boolean);
-	return words.length === 1 && (words[0]?.length ?? 0) === 1;
+/** Check if a character is whitespace (space, newline, tab, etc.) */
+const isWhitespace = (char) => /\s/.test(char);
+/**
+* Check if a character code is a zero-width control character.
+*
+* Covers:
+* - U+200B–U+200F (Zero Width Space, Joiners, Direction Marks)
+* - U+202A–U+202E (Bidirectional Formatting)
+* - U+2060–U+2064 (Word Joiner, Invisible Operators)
+* - U+FEFF (Byte Order Mark / Zero Width No-Break Space)
+*/
+const isZeroWidth = (code) => code >= 8203 && code <= 8207 || code >= 8234 && code <= 8238 || code >= 8288 && code <= 8292 || code === 65279;
+/**
+* Remove zero-width control characters from text.
+*
+* @param text - Input text
+* @param mode - 'strip' (default) removes entirely, 'space' replaces with space
+* @returns Text with zero-width characters removed or replaced
+*/
+const removeZeroWidth = (text, mode = "strip") => {
+	if (mode === "space") {
+		const parts = [];
+		let lastWasWhitespace = true;
+		for (let i = 0; i < text.length; i++) if (isZeroWidth(text.charCodeAt(i))) {
+			if (!lastWasWhitespace && parts.length > 0) {
+				parts.push(" ");
+				lastWasWhitespace = true;
+			}
+		} else {
+			const char = text[i];
+			parts.push(char);
+			lastWasWhitespace = isWhitespace(char);
+		}
+		return parts.join("");
+	}
+	return text.replace(/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/g, "");
 };
-const looksLikeNoiseHeading = (text) => {
-	const normalized = normalizeArabicForComparison(text);
-	const wordCount = text.trim().split(/\s+/u).filter(Boolean).length;
-	if (/(?:مستعمل|مهمل|مستعملة|مستعملان)(?=$|[.،,:؛\s])/u.test(text)) return false;
-	if (wordCount >= 8 && COLON_NOISE_RE.test(text)) return true;
-	return NOISE_TOKENS.some((token) => normalized.includes(normalizeArabicForComparison(token))) && wordCount >= 4;
+/**
+* Condense multiple periods (...) into ellipsis character (…).
+*
+* Prevents `{{tarqim}}` from false-matching inside ellipsis since
+* the `.` in tarqim matches individual periods.
+*
+* @param text - Input text
+* @returns Text with period sequences replaced by ellipsis
+*/
+const condenseEllipsis = (text) => text.replace(/\.{2,}/g, "…");
+/**
+* Join trailing و (waw) to the next word.
+*
+* Fixes OCR/digitization artifacts: ' و ' → ' و' (waw joined to next word)
+*
+* @param text - Input text
+* @returns Text with trailing waw joined to following word
+*/
+const fixTrailingWaw = (text) => text.replace(/ و /g, " و");
+/**
+* Check if a page ID is within a constraint range.
+*/
+const isInRange = (pageId, constraint) => {
+	if (constraint.min !== void 0 && pageId < constraint.min) return false;
+	if (constraint.max !== void 0 && pageId > constraint.max) return false;
+	return true;
+};
+/**
+* Normalize a transform to its object form.
+*/
+const normalizeTransform = (transform) => {
+	if (typeof transform === "string") return { type: transform };
+	return transform;
+};
+/**
+* Apply preprocessing transforms to a page's content.
+*
+* Transforms run in array order. Each can be limited to specific pages
+* via `min`/`max` constraints.
+*
+* @param content - Page content to transform
+* @param pageId - Page ID for constraint checking
+* @param transforms - Array of transforms to apply
+* @returns Transformed content
+*/
+const applyPreprocessToPage = (content, pageId, transforms) => {
+	let result = content;
+	for (const transform of transforms) {
+		const rule = normalizeTransform(transform);
+		if (!isInRange(pageId, rule)) continue;
+		switch (rule.type) {
+			case "removeZeroWidth":
+				result = removeZeroWidth(result, rule.mode ?? "strip");
+				break;
+			case "condenseEllipsis":
+				result = condenseEllipsis(result);
+				break;
+			case "fixTrailingWaw":
+				result = fixTrailingWaw(result);
+				break;
+			default: assertNever$2(rule.type);
+		}
+	}
+	return result;
 };
+//#endregion
+//#region src/validation/validate-segments.ts
 /**
-* Classifies a markdown heading line produced by `convertContentToMarkdown()`.
+* Creates a short preview string of text content for error reporting.
+* Truncates content exceeding PREVIEW_LIMIT.
 */
-const classifyDictionaryHeading = (line) => {
-	const text = line.startsWith(HEADING_PREFIX$1) ? line.slice(3).trim() : line.trim();
-	const unwrapped = stripLeadingWrappers(text);
-	if (!text) return "noise";
-	if (CHAPTER_HEADING_RE.test(text) || CHAPTER_TERMS.some((term) => isDelimitedPrefixMatch$1(normalizeArabicForComparison(unwrapped), normalizeArabicForComparison(term)))) return "chapter";
-	if (looksLikeNoiseHeading(text)) return "noise";
-	if (isCodeHeading(text)) return "marker";
-	if (MARKER_PREFIXES.some((token) => normalizeArabicForComparison(unwrapped).startsWith(normalizeArabicForComparison(token)))) return "marker";
-	if (STATUS_HEADING_RE.test(text) || CODE_NOTE_HEADING_RE.test(text)) return "marker";
-	if (CLUSTER_HEADING_RE.test(text)) return "cluster";
-	return "entry";
+const buildPreview = (text) => {
+	const normalized = text.replace(/\s+/g, " ").trim();
+	if (normalized.length <= 140) return normalized;
+	return `${normalized.slice(0, 140)}...`;
 };
-const createHeadingMatch = (kind, page, rawLine, lineNumber) => ({
-	kind,
-	lemma: kind === "entry" ? rawLine.slice(3).trim() : void 0,
-	line: lineNumber,
-	pageId: page.id,
-	text: rawLine
-});
-const createSurfaceMatch = (kind, page, text, lineNumber, lemma) => ({
-	kind,
-	lemma,
-	line: lineNumber,
-	pageId: page.id,
-	text
+/**
+* Creates a lightweight snapshot of a segment for inclusion in validation checks.
+*/
+const buildSegmentSnapshot = (segment) => ({
+	contentPreview: buildPreview(segment.content),
+	from: segment.from,
+	to: segment.to
 });
-const scanHeadingLine = (page, rawLine, lineNumber, matches) => {
-	if (!rawLine.startsWith(HEADING_PREFIX$1)) return false;
-	const kind = classifyDictionaryHeading(rawLine);
-	matches.push(createHeadingMatch(kind, page, rawLine, lineNumber));
-	return true;
-};
-const scanLineEntry = (page, rawLine, lineNumber, matches) => {
-	const lineEntry = rawLine.match(PLAIN_ENTRY_RE);
-	if (!lineEntry?.groups?.lemma) return;
-	matches.push(createSurfaceMatch("lineEntry", page, rawLine, lineNumber, extractWrappedLemma(lineEntry.groups.lemma)));
-};
-const scanPairedForms = (page, rawLine, lineNumber, matches) => {
-	const pairedForms = rawLine.match(PAIRED_FORMS_RE);
-	if (!pairedForms?.groups?.forms) return;
-	matches.push(createSurfaceMatch("pairedForms", page, rawLine, lineNumber, pairedForms.groups.forms));
-};
-const scanCodeLine = (page, rawLine, lineNumber, matches) => {
-	const codeLine = rawLine.match(CODE_LINE_RE);
-	if (!codeLine?.groups?.codes) return;
-	matches.push(createSurfaceMatch("codeLine", page, rawLine, lineNumber, codeLine.groups.codes));
-};
-const scanInlineSubentries = (page, rawLine, lineNumber, matches) => {
-	for (const match of rawLine.matchAll(INLINE_SUBENTRY_RE)) {
-		if (!match.groups?.lemma) continue;
-		matches.push(createSurfaceMatch("inlineSubentry", page, match.groups.lemma, lineNumber, match.groups.lemma));
-	}
-};
 /**
-* Extracts dictionary surface matches from a markdown page.
+* Normalizes page content by applying preprocessing transforms and standardizing line endings.
 */
-const scanDictionaryMarkdownPage = (page) => {
-	const lines = page.content.split(/\n/u);
-	const matches = [];
-	for (let index = 0; index < lines.length; index++) {
-		const rawLine = lines[index]?.trim() ?? "";
-		if (!rawLine) continue;
-		if (scanHeadingLine(page, rawLine, index + 1, matches)) continue;
-		scanLineEntry(page, rawLine, index + 1, matches);
-		scanPairedForms(page, rawLine, index + 1, matches);
-		scanCodeLine(page, rawLine, index + 1, matches);
-		scanInlineSubentries(page, rawLine, index + 1, matches);
-	}
-	return matches;
+const normalizePages = (pages, options) => {
+	const transforms = options.preprocess ?? [];
+	return pages.map((page) => {
+		return {
+			content: normalizeLineEndings(transforms.length ? applyPreprocessToPage(page.content, page.id, transforms) : page.content),
+			id: page.id
+		};
+	});
 };
 /**
-* Aggregates dictionary surface counts across markdown pages.
+* Joins all page content into a single string with boundary tracking.
+* Returns the joined string and a list of boundary mappings (start/end indices for each page).
 */
-const analyzeDictionaryMarkdownPages = (pages) => {
-	const counts = emptyCounts();
-	const matches = [];
-	for (const page of pages) {
-		const pageMatches = scanDictionaryMarkdownPage(page);
-		for (const match of pageMatches) {
-			counts[match.kind] += 1;
-			matches.push(match);
-		}
+const buildJoinedContent = (pages, joiner) => {
+	const boundaries = [];
+	const joined = pages.map((p) => p.content).join(joiner);
+	let offset = 0;
+	for (let i = 0; i < pages.length; i++) {
+		const content = pages[i].content;
+		const start = offset;
+		const end = start + content.length;
+		boundaries.push({
+			end,
+			id: pages[i].id,
+			start
+		});
+		offset += content.length + (i < pages.length - 1 ? joiner.length : 0);
 	}
 	return {
-		counts,
-		matches
+		boundaries,
+		joined
 	};
 };
-//#endregion
-//#region src/dictionary/profile.ts
-const normalizedProfileCache = /* @__PURE__ */ new WeakMap();
-const normalizeStopLemmaWord = (word) => normalizeArabicForComparison(word).replace(/^[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+/gu, "").replace(/[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+$/gu, "").trim();
-const uniqueNormalizedSet = (values, normalize) => new Set(values.map(normalize).filter(Boolean));
-const assertNever$2 = (value) => {
-	throw new Error(`Unhandled dictionary profile variant: ${JSON.stringify(value)}`);
-};
-const normalizeFamily = (family) => {
-	switch (family.use) {
-		case "heading": return {
-			...family,
-			allowNextLineColon: family.allowNextLineColon ?? false,
-			allowSingleLetter: family.allowSingleLetter ?? false
-		};
-		case "lineEntry": return {
-			...family,
-			allowMultiWord: family.allowMultiWord ?? false,
-			allowWhitespaceBeforeColon: family.allowWhitespaceBeforeColon ?? false,
-			wrappers: family.wrappers ?? "none"
-		};
-		case "inlineSubentry": return {
-			...family,
-			prefixes: family.prefixes ?? ["و"],
-			stripPrefixesFromLemma: family.stripPrefixesFromLemma ?? true
-		};
-		case "codeLine": return {
-			...family,
-			wrappers: family.wrappers ?? "either"
-		};
-		case "pairedForms": return {
-			...family,
-			requireStatusTail: family.requireStatusTail ?? false,
-			separator: family.separator ?? "comma"
-		};
-		default: return assertNever$2(family);
+/**
+* Binary search to find which page ID corresponds to a character offset in the joined content.
+* Returns undefined if the offset falls within a joiner gap or outside bounds.
+*/
+const findBoundaryIdForOffset = (offset, boundaries) => {
+	let lo = 0;
+	let hi = boundaries.length - 1;
+	while (lo <= hi) {
+		const mid = lo + hi >>> 1;
+		const boundary = boundaries[mid];
+		if (offset < boundary.start) hi = mid - 1;
+		else if (offset > boundary.end) lo = mid + 1;
+		else return boundary.id;
 	}
+	if (boundaries.length === 0) return;
+	const last = boundaries.at(-1);
+	return offset > last.end ? last.id : void 0;
 };
-const normalizeBlocker = (blocker) => {
-	switch (blocker.use) {
-		case "authorityIntro": return {
-			...blocker,
-			precision: blocker.precision ?? "high"
-		};
-		case "stopLemma": return {
-			...blocker,
-			normalizedWords: uniqueNormalizedSet(blocker.words, normalizeStopLemmaWord)
+/**
+* Helper to construct a standardized validation issue object.
+*/
+const createIssue$1 = (type, segment, segmentIndex, overrides = {}, pageMap) => {
+	const segmentSnapshot = buildSegmentSnapshot(segment);
+	const page = pageMap?.get(segment.from);
+	const matchIndex = overrides.matchIndex;
+	const { matchIndex: _ignored, ...restOverrides } = overrides;
+	const base = {
+		actual: {
+			from: segment.from,
+			to: segment.to
+		},
+		segment: segmentSnapshot,
+		segmentIndex,
+		...restOverrides
+	};
+	switch (type) {
+		case "page_not_found": return {
+			...base,
+			evidence: overrides.evidence ?? `Segment.from=${segment.from} does not exist in input pages.`,
+			hint: "Check page IDs passed into segmentPages() and validateSegments().",
+			severity: "error",
+			type
 		};
-		case "previousWord": return {
-			...blocker,
-			normalizedWords: uniqueNormalizedSet(blocker.words, normalizeArabicForComparison)
+		case "content_not_found": return {
+			...base,
+			evidence: overrides.evidence ?? "Segment content not found in any page content.",
+			hint: overrides.hint ?? "Check preprocessing options, joiner settings, or whitespace normalization.",
+			pageContext: page ? {
+				pageId: page.id,
+				pagePreview: buildPreview(page.content)
+			} : void 0,
+			severity: "error",
+			type
 		};
-		case "previousChar": return {
-			...blocker,
-			charSet: new Set(blocker.chars)
+		case "page_attribution_mismatch": {
+			const matchedFromId = overrides.expected?.from ?? overrides.actual?.from ?? segment.from;
+			const actualPage = pageMap?.get(matchedFromId);
+			return {
+				...base,
+				evidence: overrides.evidence ?? `Content found in joined content at page ${matchedFromId}, but segment.from=${segment.from}.`,
+				hint: overrides.hint ?? "Check duplicate content handling and boundary detection rules.",
+				pageContext: actualPage ? {
+					matchIndex: matchIndex ?? -1,
+					pageId: actualPage.id,
+					pagePreview: buildPreview(actualPage.content)
+				} : void 0,
+				severity: "error",
+				type
+			};
+		}
+		case "max_pages_violation": return {
+			...base,
+			evidence: overrides.evidence ?? `Segment spans pages ${segment.from}-${overrides.actual?.to}.`,
+			hint: overrides.hint ?? "Check maxPages windowing in breakpoint-processor.ts and page constraints.",
+			severity: "error",
+			type
+		};
+		default: return {
+			...base,
+			severity: "error",
+			type
 		};
-		case "intro":
-		case "pageContinuation": return blocker;
-		default: return assertNever$2(blocker);
-	}
-};
-const normalizeZone = (zone) => ({
-	blockers: (zone.blockers ?? []).map(normalizeBlocker),
-	families: zone.families.map(normalizeFamily),
-	name: zone.name,
-	when: zone.when ? {
-		activateAfter: zone.when.activateAfter,
-		maxPageId: zone.when.maxPageId,
-		minPageId: zone.when.minPageId
-	} : void 0
-});
-const createIssue$1 = (code, path, message, zoneName) => ({
-	code,
-	message,
-	path,
-	...zoneName ? { zoneName } : {}
-});
-const validateGate = (gate, zone, gateIndex, seenActivateAfterKeys, issues) => {
-	const gatePath = `zones[].when.activateAfter[${gateIndex}]`.replace("[]", `[${zone.name}]`);
-	if (gate.use === "headingText") {
-		if (!gate.match.trim()) issues.push(createIssue$1("invalid_gate_match", `${gatePath}.match`, `dictionary gate match must be non-empty`, zone.name));
-		if (gate.fuzzy !== void 0 && typeof gate.fuzzy !== "boolean") issues.push(createIssue$1("invalid_gate_fuzzy", `${gatePath}.fuzzy`, `dictionary gate fuzzy must be a boolean when provided`, zone.name));
 	}
-	const dedupeKey = `${gate.use}:${JSON.stringify(gate)}`;
-	if (seenActivateAfterKeys.has(dedupeKey)) issues.push(createIssue$1("duplicate_activate_after_gate", gatePath, `dictionary zone "${zone.name}" has duplicate activateAfter gates`, zone.name));
-	seenActivateAfterKeys.add(dedupeKey);
 };
-const validateFamily = (family, zone, familyIndex, issues) => {
-	const familyPath = `zones[].families[${familyIndex}]`.replace("[]", `[${zone.name}]`);
-	switch (family.use) {
-		case "heading":
-			if (family.classes.length === 0) issues.push(createIssue$1("empty_heading_classes", `${familyPath}.classes`, `dictionary heading family in zone "${zone.name}" must include at least one class`, zone.name));
-			if (family.emit === "chapter" && !family.classes.includes("chapter")) issues.push(createIssue$1("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "chapter" but never matches chapter headings`, zone.name));
-			if (family.emit === "marker" && !family.classes.includes("marker")) issues.push(createIssue$1("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "marker" but never matches marker headings`, zone.name));
-			if (family.emit === "entry" && !family.classes.includes("entry")) issues.push(createIssue$1("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "entry" but never matches entry headings`, zone.name));
-			break;
-		case "lineEntry": break;
-		case "inlineSubentry":
-			if (family.prefixes?.some((prefix) => !prefix.trim())) issues.push(createIssue$1("empty_inline_prefixes", `${familyPath}.prefixes`, `inlineSubentry prefixes must be non-empty strings`, zone.name));
-			break;
-		case "codeLine": break;
-		case "pairedForms": break;
-		default: assertNever$2(family);
+/**
+* Finds all occurrences of a content string within the joined text.
+* Respects search limits to avoid performance cliffs on highly repetitive content.
+*/
+const findJoinedMatches = (content, joined, searchStart, searchEnd, limit = Infinity) => {
+	const matches = [];
+	if (!content || searchStart >= searchEnd) return matches;
+	let idx = joined.indexOf(content, searchStart);
+	let count = 0;
+	while (idx >= 0 && idx < searchEnd && count < limit) {
+		matches.push({
+			end: idx + content.length - 1,
+			start: idx
+		});
+		idx = joined.indexOf(content, idx + 1);
+		if (idx >= searchEnd) break;
+		count++;
 	}
+	return matches;
 };
-const validateBlocker = (blocker, zone, blockerIndex, issues) => {
-	const blockerPath = `zones[].blockers[${blockerIndex}]`.replace("[]", `[${zone.name}]`);
-	switch (blocker.use) {
-		case "stopLemma":
-			if (blocker.words.length === 0 || blocker.words.some((word) => !word.trim())) issues.push(createIssue$1("invalid_stop_words", `${blockerPath}.words`, `stopLemma blocker in zone "${zone.name}" must include non-empty words`, zone.name));
-			break;
-		case "previousWord":
-			if (blocker.words.length === 0 || blocker.words.some((word) => !word.trim())) issues.push(createIssue$1("invalid_previous_words", `${blockerPath}.words`, `previousWord blocker in zone "${zone.name}" must include non-empty words`, zone.name));
-			break;
-		case "previousChar":
-			if (blocker.chars.length === 0 || blocker.chars.some((char) => !char)) issues.push(createIssue$1("invalid_previous_chars", `${blockerPath}.chars`, `previousChar blocker in zone "${zone.name}" must include chars`, zone.name));
-			break;
-		case "authorityIntro":
-		case "intro":
-		case "pageContinuation": break;
-		default: assertNever$2(blocker);
+/**
+* Verifies that a matched segment falls within the allowed maxTerms/maxPages constraints.
+* Checks both implicit spans (calculated from match end) and explicit segment.to claims.
+*/
+const checkMaxPagesViolation = (segment, segmentIndex, maxPages, matchEnd, _expectedBoundaryEnd, boundaries) => {
+	const actualToId = findBoundaryIdForOffset(matchEnd, boundaries);
+	if (actualToId === void 0) return [];
+	if (maxPages === 0) {
+		if (actualToId !== segment.from) return [createIssue$1("max_pages_violation", segment, segmentIndex, {
+			actual: {
+				from: segment.from,
+				to: actualToId
+			},
+			evidence: `Segment spans pages ${segment.from}-${actualToId} in joined content (maxPages=0).`,
+			expected: {
+				from: segment.from,
+				to: segment.from
+			}
+		})];
 	}
-};
-var DictionaryProfileValidationError = class extends Error {
-	issues;
-	constructor(issues) {
-		super(issues.length === 1 ? issues[0].message : `Dictionary profile validation failed with ${issues.length} issues`);
-		this.name = "DictionaryProfileValidationError";
-		this.issues = issues;
+	if (segment.to !== void 0) {
+		if (actualToId > segment.to) return [createIssue$1("max_pages_violation", segment, segmentIndex, {
+			actual: {
+				from: segment.from,
+				to: actualToId
+			},
+			evidence: `Segment content ends on page ${actualToId} but segment.to is ${segment.to}.`,
+			expected: {
+				from: segment.from,
+				to: segment.to
+			}
+		})];
+	} else if (maxPages !== void 0) {
+		const span = actualToId - segment.from;
+		if (span > maxPages) return [createIssue$1("max_pages_violation", segment, segmentIndex, {
+			actual: {
+				from: segment.from,
+				to: actualToId
+			},
+			evidence: `Segment spans ${span} pages (maxPages=${maxPages}).`,
+			expected: {
+				from: segment.from,
+				to: segment.from + maxPages
+			}
+		})];
 	}
+	return [];
 };
-const validateZone = (zone, zoneIndex, seenZoneNames, issues) => {
-	const zonePath = `zones[${zoneIndex}]`;
-	const trimmedName = zone.name.trim();
-	if (!trimmedName) issues.push(createIssue$1("empty_zone_name", `${zonePath}.name`, `dictionary zone name must be non-empty`));
-	else if (seenZoneNames.has(trimmedName)) issues.push(createIssue$1("duplicate_zone_name", `${zonePath}.name`, `dictionary zone names must be unique; duplicated "${trimmedName}"`, trimmedName));
-	else seenZoneNames.add(trimmedName);
-	if (zone.families.length === 0) issues.push(createIssue$1("empty_zone_families", `${zonePath}.families`, `dictionary zone "${zone.name}" must declare at least one family`, zone.name));
-	if (zone.when?.minPageId !== void 0 && zone.when?.maxPageId !== void 0 && zone.when.minPageId > zone.when.maxPageId) issues.push(createIssue$1("invalid_zone_page_range", `${zonePath}.when`, `dictionary zone "${zone.name}" has minPageId greater than maxPageId`, zone.name));
-	const seenActivateAfterKeys = /* @__PURE__ */ new Set();
-	for (let gateIndex = 0; gateIndex < (zone.when?.activateAfter?.length ?? 0); gateIndex++) validateGate(zone.when.activateAfter[gateIndex], zone, gateIndex, seenActivateAfterKeys, issues);
-	for (let familyIndex = 0; familyIndex < zone.families.length; familyIndex++) validateFamily(zone.families[familyIndex], zone, familyIndex, issues);
-	for (let blockerIndex = 0; blockerIndex < (zone.blockers?.length ?? 0); blockerIndex++) validateBlocker(zone.blockers[blockerIndex], zone, blockerIndex, issues);
+/**
+* Handles validation when content is not found in the expected boundary window.
+* Fallback strategy: search entire document if segment matches existing content elsewhere.
+*/
+const handleMissingBoundary = (segment, segmentIndex, joined, boundaries, pageMap) => {
+	const matches = findJoinedMatches(segment.content, joined, 0, joined.length, 1);
+	if (matches.length === 0) return [createIssue$1("content_not_found", segment, segmentIndex, { evidence: "Segment content not found in any page content." }, pageMap)];
+	const match = matches[0];
+	const actualFromId = findBoundaryIdForOffset(match.start, boundaries);
+	const actualToId = findBoundaryIdForOffset(match.end, boundaries);
+	return [createIssue$1("page_attribution_mismatch", segment, segmentIndex, {
+		actual: {
+			from: segment.from,
+			to: segment.to
+		},
+		evidence: `Content found in joined content at page ${actualFromId}, but segment.from=${segment.from}.`,
+		expected: {
+			from: actualFromId,
+			to: actualToId
+		},
+		matchIndex: match.start
+	}, pageMap)];
 };
 /**
-* Validates a dictionary profile without normalizing it.
+* Performs a widened search when the direct check fails.
+* Includes a small buffer around the expected position, and optionally a full-document search for short segments.
 */
-const validateDictionaryProfile = (profile) => {
-	const issues = [];
-	if (profile.version !== 2) issues.push(createIssue$1("invalid_version", "version", `dictionary profile version must be 2, got ${profile.version}`));
-	if (profile.zones.length === 0) {
-		issues.push(createIssue$1("missing_zones", "zones", `dictionary profile must contain at least one zone`));
-		return issues;
+const handleFallbackSearch = (segment, segmentIndex, joined, searchStart, searchEnd, expectedBoundary, boundaries, pageMap, maxPages, validationOptions) => {
+	const content = segment.content;
+	const bufferSize = 1e3;
+	const rawMatches = findJoinedMatches(content, joined, Math.max(0, searchStart - bufferSize), Math.min(joined.length, searchEnd + bufferSize), 5);
+	if (rawMatches.length === 0) {
+		const threshold = validationOptions?.fullSearchThreshold ?? 500;
+		if (content.length < threshold) {
+			const fullMatches = findJoinedMatches(content, joined, 0, joined.length, 50);
+			const validMatch = fullMatches.find((m) => {
+				return findBoundaryIdForOffset(m.start, boundaries) === segment.from;
+			});
+			if (validMatch) return checkMaxPagesViolation(segment, segmentIndex, maxPages, validMatch.end, expectedBoundary.end, boundaries);
+			if (fullMatches.length > 0) {
+				const match = fullMatches[0];
+				const actualFromId = findBoundaryIdForOffset(match.start, boundaries);
+				const actualToId = findBoundaryIdForOffset(match.end, boundaries);
+				return [createIssue$1("page_attribution_mismatch", segment, segmentIndex, {
+					actual: {
+						from: segment.from,
+						to: segment.to
+					},
+					evidence: `Content found in joined content at page ${actualFromId}, but segment.from=${segment.from}.`,
+					expected: {
+						from: actualFromId,
+						to: actualToId
+					},
+					matchIndex: match.start
+				}, pageMap)];
+			}
+		}
+		return [createIssue$1("content_not_found", segment, segmentIndex, {
+			evidence: `Segment content (${content.length} chars) not found in expected window.`,
+			hint: "Check page boundary attribution in segmenter.ts."
+		}, pageMap)];
 	}
-	const seenZoneNames = /* @__PURE__ */ new Set();
-	for (let zoneIndex = 0; zoneIndex < profile.zones.length; zoneIndex++) validateZone(profile.zones[zoneIndex], zoneIndex, seenZoneNames, issues);
-	return issues;
+	const alignedMatches = rawMatches.filter((m) => m.start >= expectedBoundary.start && m.start <= expectedBoundary.end);
+	if (alignedMatches.length > 0) {
+		const primary = alignedMatches[0];
+		return checkMaxPagesViolation(segment, segmentIndex, maxPages, primary.end, expectedBoundary.end, boundaries);
+	}
+	const primary = rawMatches[0];
+	const actualFromId = findBoundaryIdForOffset(primary.start, boundaries);
+	const actualToId = findBoundaryIdForOffset(primary.end, boundaries);
+	return [createIssue$1("page_attribution_mismatch", segment, segmentIndex, {
+		actual: {
+			from: segment.from,
+			to: segment.to
+		},
+		evidence: `Content found in joined content at page ${actualFromId}, but segment.from=${segment.from}.`,
+		expected: {
+			from: actualFromId,
+			to: actualToId
+		},
+		matchIndex: primary.start
+	}, pageMap)];
 };
 /**
-* Normalizes and validates a dictionary profile before runtime matching.
+* Calculates the search range end index based on segment.to or strict bounds.
 */
-const normalizeDictionaryProfile = (profile) => {
-	const cached = normalizedProfileCache.get(profile);
-	if (cached) return cached;
-	const issues = validateDictionaryProfile(profile);
-	if (issues.length > 0) throw new DictionaryProfileValidationError(issues);
-	const normalized = {
-		version: 2,
-		zones: profile.zones.map(normalizeZone)
-	};
-	normalizedProfileCache.set(profile, normalized);
-	return normalized;
+const getSearchRange = (segment, expectedBoundary, boundaryMap, joinedLength) => {
+	let searchEnd = expectedBoundary.end + 1;
+	if (segment.to !== void 0) {
+		const endBoundary = boundaryMap.get(segment.to);
+		if (endBoundary) searchEnd = endBoundary.end + 1;
+		else searchEnd = Math.min(joinedLength, expectedBoundary.end + 5e4);
+	}
+	return searchEnd;
 };
-//#endregion
-//#region src/types/rules.ts
 /**
-* Pattern type key names for split rules.
-*
-* Use this array to dynamically iterate over pattern types in UIs,
-* or use the `PatternTypeKey` type for type-safe string unions.
-*
-* @example
-* // Build a dropdown/select in UI
-* PATTERN_TYPE_KEYS.map(key => <option value={key}>{key}</option>)
+* Validates attribution for a single segment by searching for its content in the joined text.
+* Returns issues if content is missing, mis-attributed, or violates page limits.
+*/
+const getAttributionIssues = (segment, segmentIndex, maxPages, joined, boundaries, boundaryMap, pageMap, validationOptions) => {
+	if (!segment.content) return [createIssue$1("content_not_found", segment, segmentIndex, { evidence: "Segment content is empty." }, pageMap)];
+	const expectedBoundary = boundaryMap.get(segment.from);
+	if (!expectedBoundary) return handleMissingBoundary(segment, segmentIndex, joined, boundaries, pageMap);
+	const searchEnd = getSearchRange(segment, expectedBoundary, boundaryMap, joined.length);
+	const searchStart = expectedBoundary.start;
+	const idx = joined.indexOf(segment.content, searchStart);
+	if (idx !== -1 && idx < searchEnd) return checkMaxPagesViolation(segment, segmentIndex, maxPages, idx + segment.content.length - 1, expectedBoundary.end, boundaries);
+	return handleFallbackSearch(segment, segmentIndex, joined, searchStart, searchEnd, expectedBoundary, boundaries, pageMap, maxPages, validationOptions);
+};
+/**
+* Performs purely static checks on the segment metadata (Ids and spans) before expensive content searching.
+*/
+const checkStaticMaxPages = (segment, index, maxPages) => {
+	if (maxPages === void 0 || segment.to === void 0) return null;
+	if (maxPages === 0) {
+		if (segment.to !== segment.from) return createIssue$1("max_pages_violation", segment, index, {
+			evidence: "maxPages=0 requires all segments to stay within one page.",
+			expected: {
+				from: segment.from,
+				to: segment.from
+			},
+			hint: "Check boundary detection in breakpoint-utils.ts."
+		});
+		return null;
+	}
+	const span = segment.to - segment.from;
+	if (span > maxPages) return createIssue$1("max_pages_violation", segment, index, {
+		evidence: `Segment spans ${span} pages (maxPages=${maxPages}).`,
+		expected: {
+			from: segment.from,
+			to: segment.from + maxPages
+		},
+		hint: "Check breakpoint windowing and page attribution in breakpoint-processor.ts."
+	});
+	return null;
+};
+/**
+* Validates a list of segments against the source pages.
+* checks for:
+* - Page existence (invalid IDs)
+* - Content fidelity (content must exist in pages)
+* - Page attribution (from/to must match content location)
+* - Page constraints (maxPages violations)
 *
-* @example
-* // Type-safe pattern key validation
-* const validateKey = (k: string): k is PatternTypeKey =>
-*   (PATTERN_TYPE_KEYS as readonly string[]).includes(k);
+* @param pages Input pages used for segmentation
+* @param options Operations used during segmentation (for preprocessing/joining consistency)
+* @param segments The output segments to validate
+* @param validationOptions Optional settings for validation behavior
+* @returns A detailed validation report
 */
-const PATTERN_TYPE_KEYS = [
-	"lineStartsWith",
-	"lineStartsAfter",
-	"lineEndsWith",
-	"template",
-	"regex",
-	"dictionaryEntry"
-];
+const validateSegments = (pages, options, segments, validationOptions) => {
+	const normalizedPages = normalizePages(pages, options);
+	const { boundaries, joined } = buildJoinedContent(normalizedPages, options.pageJoiner === "newline" ? "\n" : " ");
+	const boundaryMap = /* @__PURE__ */ new Map();
+	const pageMap = /* @__PURE__ */ new Map();
+	for (const b of boundaries) boundaryMap.set(b.id, b);
+	for (const p of normalizedPages) pageMap.set(p.id, p);
+	const pageIds = new Set(normalizedPages.map((p) => p.id));
+	const maxPages = options.maxPages;
+	const issues = [];
+	for (let i = 0; i < segments.length; i++) {
+		const segment = segments[i];
+		if (!pageIds.has(segment.from)) {
+			issues.push(createIssue$1("page_not_found", segment, i));
+			continue;
+		}
+		if (segment.to !== void 0 && !pageIds.has(segment.to)) issues.push(createIssue$1("page_not_found", segment, i, { evidence: `Segment.to=${segment.to} does not exist in input pages.` }));
+		const staticMaxPageIssue = checkStaticMaxPages(segment, i, maxPages);
+		if (staticMaxPageIssue) issues.push(staticMaxPageIssue);
+		const attributionIssues = getAttributionIssues(segment, i, maxPages, joined, boundaries, boundaryMap, pageMap, validationOptions);
+		issues.push(...attributionIssues);
+	}
+	const errors = issues.filter((issue) => issue.severity === "error").length;
+	const warnings = issues.filter((issue) => issue.severity === "warn").length;
+	return {
+		issues,
+		ok: issues.length === 0,
+		summary: {
+			errors,
+			issues: issues.length,
+			pageCount: pages.length,
+			segmentCount: segments.length,
+			warnings
+		}
+	};
+};
 //#endregion
 //#region src/segmentation/debug-meta.ts
 const resolveDebugConfig = (debug) => {
@@ -1843,7 +2028,14 @@ const getSegmentDebugReason = (segment, options) => {
 	return getDebugReason(segment.meta, options);
 };
 //#endregion
-//#region src/dictionary/runtime.ts
+//#region src/dictionary/constants.ts
+/**
+* Shared constants used by the dictionary runtime: phrase lists, regex patterns,
+* keyword sets, and structural-leak detection data.
+*
+* Keeping these here allows both runtime.ts and heading-classifier.ts to import
+* from a single source of truth without circular dependencies.
+*/
 const INTRO_PHRASES = [
 	"وقال",
 	"قال",
@@ -1994,7 +2186,16 @@ const CONTINUATION_PREV_WORDS = [
 	"ثم",
 	"وجل"
 ];
-const AUTHORITY_RE = /^(?:(?:و)?قال\s+(?:أبو|ابن|ثعلب|الليث|الأزهري|الجوهري|الفراء)\b|(?:أبو|ابن|ثعلب|الليث|الأزهري|الجوهري|الفراء)\s+\S+)/u;
+const NORMALIZED_AUTHORITY_INTRO_PATTERN = [
+	"أبو",
+	"ابن",
+	"ثعلب",
+	"الليث",
+	"الأزهري",
+	"الجوهري",
+	"الفراء"
+].map((term) => escapeRegex(normalizeArabicForComparison(term))).join("|");
+const NORMALIZED_AUTHORITY_RE = new RegExp(`^(?:(?:و)?قال\\s+(?:${NORMALIZED_AUTHORITY_INTRO_PATTERN})(?=$|[\\s:،؛,.])|(?:${NORMALIZED_AUTHORITY_INTRO_PATTERN})\\s+\\S+)`, "u");
 const AUTHORITY_HEAD_WORDS = [
 	"الأزهري",
 	"الأصمعي",
@@ -2015,13 +2216,22 @@ const AUTHORITY_HEAD_WORDS = [
 	"ثعلب",
 	"شمر"
 ];
+/** Aggressive-precision authority terms (subset used for fast startsWith checks). */
+const AUTHORITY_AGGRESSIVE_TERMS = [
+	"الليث",
+	"الأزهري",
+	"الأصمعي",
+	"الجوهري",
+	"الفراء",
+	"ثعلب",
+	"شمر"
+];
 const STRONG_SENTENCE_TERMINATORS$1 = /[.!?؟؛۔…]$/u;
-const TRAILING_PAGE_WRAP_NOISE$1 = /[\s\u0660-\u0669\d«»"“”'‘’()[\]{}<>]+$/u;
-const TRAILING_WORD_DELIMITERS$1 = /[\s\u0660-\u0669\d«»"“”'‘’()[\]{}<>.,!?؟؛،:]+$/u;
+const TRAILING_PAGE_WRAP_NOISE$1 = /[\s\u0660-\u0669\d«»""'''()[\]{}<>]+$/u;
+const TRAILING_WORD_DELIMITERS$1 = /[\s\u0660-\u0669\d«»""'''()[\]{}<>.,!?؟؛،:]+$/u;
 const ARABIC_WORD_REGEX$1 = new RegExp(ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, "gu");
-const HEADING_PREFIX = "## ";
-const CODE_LINE_PATTERN = getTokenPattern("harfs").replaceAll("\\s+", "[ \\t]+");
-const BARE_CODE_LEMMA_RE = new RegExp(`^(?:${CODE_LINE_PATTERN})$`, "u");
+const CODE_LINE_PATTERN$1 = getTokenPattern("harfs").replaceAll("\\s+", "[ \\t]+");
+const BARE_CODE_LEMMA_RE = new RegExp(`^(?:${CODE_LINE_PATTERN$1})$`, "u");
 const STATUS_TAIL_PATTERN = "(?:مستعمل|مستعملة|مستعملان|مهمل|مهملة)";
 const GATE_TOKEN_MAP = {
 	bab: "باب",
@@ -2029,18 +2239,38 @@ const GATE_TOKEN_MAP = {
 	kitab: "كتاب"
 };
 const GATE_DELIMITER_RE = /[\s:،؛()[\]{}\-–—]/u;
-const assertNever$1 = (value) => {
-	throw new Error(`Unhandled dictionary runtime variant: ${JSON.stringify(value)}`);
-};
-const lineEntryRegexCache = /* @__PURE__ */ new WeakMap();
-const inlineSubentryRegexCache = /* @__PURE__ */ new WeakMap();
-const pairedFormsRegexCache = /* @__PURE__ */ new WeakMap();
+const normalizeStopLemmaWord = (text) => normalizeArabicForComparison(text).replace(/^[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+/gu, "").replace(/[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+$/gu, "").trim();
+/** Pre-normalized intro phrases for startsWith / endsWith checks. */
+const NORMALIZED_INTRO_PHRASES = INTRO_PHRASES.map(normalizeArabicForComparison);
+/** Pre-normalized intro tail phrases for endsWith checks. */
+const NORMALIZED_INTRO_TAIL_PHRASES = INTRO_TAIL_PHRASES.map(normalizeArabicForComparison);
+/** Pre-normalized authority head words as a Set for O(1) lookup. */
+const NORMALIZED_AUTHORITY_HEAD_WORDS_SET = new Set(AUTHORITY_HEAD_WORDS.map(normalizeStopLemmaWord));
+/** Pre-normalized aggressive authority terms for startsWith checks. */
+const NORMALIZED_AUTHORITY_AGGRESSIVE_TERMS = AUTHORITY_AGGRESSIVE_TERMS.map(normalizeArabicForComparison);
+/** Pre-normalized qualifier tail prefixes for startsWith checks. */
+const NORMALIZED_QUALIFIER_TAIL_PREFIXES = QUALIFIER_TAIL_PREFIXES.map(normalizeArabicForComparison);
+/** Pre-normalized structural lemma prefixes for startsWith checks. */
+const NORMALIZED_STRUCTURAL_LEMMA_PREFIXES = STRUCTURAL_LEMMA_PREFIXES.map(normalizeArabicForComparison);
+/** Pre-normalized structural line keywords for includes checks. */
+const NORMALIZED_STRUCTURAL_LINE_KEYWORDS = STRUCTURAL_LINE_KEYWORDS.map(normalizeArabicForComparison);
+/** Pre-normalized continuation prev words as a Set for O(1) lookup. */
+const NORMALIZED_CONTINUATION_PREV_WORDS_SET = new Set(CONTINUATION_PREV_WORDS.map(normalizeArabicForComparison));
+/** Pre-normalized 'ولل' prefix. */
+const NORMALIZED_WLAL_PREFIX = normalizeArabicForComparison("ولل");
+//#endregion
+//#region src/dictionary/dictionary-blockers.ts
+/**
+* Limit backwards scans to a small suffix; dictionary blockers only need the
+* immediate local context rather than an unbounded full-page search.
+*/
+const LAST_ARABIC_WORD_LOOKBACK_CHARS = 256;
+const MAX_INTRO_CONTEXT_CHARS = 240;
+const IGNORABLE_BOUNDARY_CHAR_RE = /(?:\s|\u200B|\u200C|\u200D|\u200E|\u200F|\u061C)/u;
 const trimTrailingPageWrapNoise$1 = (text) => text.trimEnd().replace(TRAILING_PAGE_WRAP_NOISE$1, "");
-const endsWithStrongSentenceTerminator$1 = (pageContent) => {
-	return STRONG_SENTENCE_TERMINATORS$1.test(trimTrailingPageWrapNoise$1(pageContent));
-};
+const endsWithStrongSentenceTerminator$1 = (pageContent) => STRONG_SENTENCE_TERMINATORS$1.test(trimTrailingPageWrapNoise$1(pageContent));
 const extractLastArabicWord$1 = (text, endExclusive = text.length) => {
-	const windowStart = Math.max(0, endExclusive - 256);
+	const windowStart = Math.max(0, endExclusive - LAST_ARABIC_WORD_LOOKBACK_CHARS);
 	const withoutTrailingDelimiters = trimTrailingPageWrapNoise$1(text.slice(windowStart, endExclusive)).replace(TRAILING_WORD_DELIMITERS$1, "");
 	let lastMatch = "";
 	ARABIC_WORD_REGEX$1.lastIndex = 0;
@@ -2050,105 +2280,339 @@ const extractLastArabicWord$1 = (text, endExclusive = text.length) => {
 const previousNonWhitespaceChar = (text, endExclusive = text.length) => {
 	for (let index = endExclusive - 1; index >= 0; index--) {
 		const char = text[index];
-		if (char && !/\s/u.test(char)) return char;
+		if (char && !IGNORABLE_BOUNDARY_CHAR_RE.test(char)) return char;
 	}
 	return "";
 };
-const normalizedEquals = (left, right) => normalizeArabicForComparison(left) === normalizeArabicForComparison(right);
-const normalizedStartsWith = (text, prefix) => normalizeArabicForComparison(text).startsWith(normalizeArabicForComparison(prefix));
-const normalizeStopLemma = (text) => normalizeArabicForComparison(text).replace(/^[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+/gu, "").replace(/[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+$/gu, "").trim();
-const getTrailingContext = (text, endExclusive, maxChars = 240) => text.slice(Math.max(0, endExclusive - maxChars), endExclusive);
-const isDelimitedPrefixMatch = (text, prefix) => {
+const isAtPageStart = (text, endExclusive) => {
+	for (let index = endExclusive - 1; index >= 0; index--) {
+		const char = text[index];
+		if (char && !IGNORABLE_BOUNDARY_CHAR_RE.test(char)) return false;
+	}
+	return true;
+};
+const normalizeStopLemma = normalizeStopLemmaWord;
+const getTrailingContext = (text, endExclusive, maxChars = MAX_INTRO_CONTEXT_CHARS) => text.slice(Math.max(0, endExclusive - maxChars), endExclusive);
+const normalizeIntroContextText = (text) => normalizeArabicForComparison(text).replace(/[/\\]+/gu, " ").replace(/[«»""'''()[\]{}]+/gu, " ").replace(/\s+/gu, " ").trim();
+const normalizeForIntroTailCheck = (text) => normalizeIntroContextText(text).replace(/[:؛،,.!?؟]+$/u, "").trimEnd();
+const isIntroCandidate = (text) => {
+	const normalized = normalizeIntroContextText(text);
+	return NORMALIZED_INTRO_PHRASES.some((phrase) => normalized.startsWith(phrase));
+};
+const endsWithIntroContext = (text) => {
+	const trimmed = text.trimEnd();
+	if (STRONG_SENTENCE_TERMINATORS$1.test(trimmed)) return false;
+	const normalized = normalizeForIntroTailCheck(trimmed);
+	if (!normalized) return false;
+	if (NORMALIZED_INTRO_PHRASES.some((phrase) => normalized.endsWith(phrase))) return true;
+	if (NORMALIZED_INTRO_TAIL_PHRASES.some((phrase) => normalized.endsWith(phrase))) return true;
+	return INTRO_TAIL_PATTERNS.some((pattern) => pattern.test(normalized));
+};
+const isAuthorityCandidate = (text, precision) => {
+	const head = normalizeStopLemma(text.split(":", 1)[0] ?? text);
+	if (head && NORMALIZED_AUTHORITY_HEAD_WORDS_SET.has(head)) return true;
+	const normalized = normalizeIntroContextText(text);
+	if (NORMALIZED_AUTHORITY_RE.test(normalized)) return true;
+	if (precision === "aggressive") return NORMALIZED_AUTHORITY_AGGRESSIVE_TERMS.some((term) => normalized.startsWith(term));
+	return false;
+};
+const hasBlockedQualifierTail = (lemma) => {
+	const parts = lemma.split(/[،,]/u).map((part) => part.trim()).filter(Boolean);
+	if (parts.length < 2) return false;
+	const tail = normalizeArabicForComparison(parts.slice(1).join(" "));
+	return NORMALIZED_QUALIFIER_TAIL_PREFIXES.some((prefix) => tail.startsWith(prefix));
+};
+const looksLikeStructuralLeak = (candidate) => {
+	if (!candidate.lemma) return false;
+	const normalizedLemma = normalizeArabicForComparison(candidate.lemma);
+	if (candidate.kind === "entry" && (/^[^\p{Script=Arabic}\d]+/u.test(candidate.lemma) || candidate.lemma.includes("{") || candidate.lemma.includes("}") || candidate.lemma.includes("##"))) return true;
+	if (candidate.kind === "entry" && BARE_CODE_LEMMA_RE.test(candidate.lemma) && (candidate.text === candidate.lemma || candidate.text === `## ${candidate.lemma}` || candidate.text.startsWith(`## ${candidate.lemma}`) || candidate.text.startsWith(`${candidate.lemma}\n## `))) return true;
+	if (candidate.family !== "pairedForms" && candidate.lemma.split(/\s+/u).filter(Boolean).length > 4) return true;
+	if (NORMALIZED_STRUCTURAL_LEMMA_PREFIXES.some((prefix) => normalizedLemma.startsWith(prefix))) return true;
+	if (normalizedLemma.startsWith(NORMALIZED_WLAL_PREFIX)) return true;
+	const structuralText = candidate.text.startsWith("## ") ? candidate.text.slice(3).trim() : candidate.text;
+	if (/^[\d\u0660-\u0669]+\s*-\s*\([^)]+\)(?:\s+##.*)?$/u.test(structuralText)) return true;
+	const normalizedText = normalizeArabicForComparison(structuralText);
+	if (STRUCTURAL_LINE_PATTERNS.some((pattern) => pattern.test(structuralText))) return NORMALIZED_STRUCTURAL_LINE_KEYWORDS.some((keyword) => normalizedText.includes(keyword));
+	return false;
+};
+const blockerApplies = (blocker, family) => !blocker.appliesTo || blocker.appliesTo.includes(family);
+const rejectsViaIntroBlocker = (candidate, blocker, localBeforeCandidate) => {
+	if (blocker.use !== "intro") return false;
+	return isIntroCandidate(candidate.probeText) || endsWithIntroContext(localBeforeCandidate);
+};
+const rejectsViaAuthorityBlocker = (candidate, blocker) => blocker.use === "authorityIntro" && isAuthorityCandidate(candidate.probeText, blocker.precision);
+const rejectsViaStopLemmaBlocker = (candidate, blocker) => {
+	if (blocker.use !== "stopLemma" || !candidate.lemma) return false;
+	const normalizedLemma = normalizeStopLemma(candidate.lemma);
+	return !!normalizedLemma && blocker.normalizedWords.has(normalizedLemma);
+};
+const previousWordIsBlocked = (blocker, word) => !!word && blocker.normalizedWords.has(normalizeArabicForComparison(word));
+const rejectsViaPageStartPreviousWord = (blocker, pageIndex, pages) => {
+	if (pageIndex === 0) return false;
+	const previousPage = pages[pageIndex - 1];
+	if (!previousPage || endsWithStrongSentenceTerminator$1(previousPage.content)) return false;
+	return previousWordIsBlocked(blocker, extractLastArabicWord$1(previousPage.content));
+};
+const rejectsViaPreviousWordBlocker = (pageContent, localIndex, blocker, pageIndex, pages) => {
+	if (blocker.use !== "previousWord") return false;
+	if (isAtPageStart(pageContent, localIndex)) {
+		if (blocker.scope === "pageStart") return rejectsViaPageStartPreviousWord(blocker, pageIndex, pages);
+		if (blocker.scope === "any" && rejectsViaPageStartPreviousWord(blocker, pageIndex, pages)) return true;
+	}
+	if (blocker.scope === "pageStart") return false;
+	return previousWordIsBlocked(blocker, extractLastArabicWord$1(pageContent, localIndex));
+};
+const rejectsViaPreviousCharBlocker = (pageContent, localIndex, blocker) => {
+	if (blocker.use !== "previousChar") return false;
+	const previousChar = previousNonWhitespaceChar(pageContent, localIndex);
+	return !!previousChar && blocker.charSet.has(previousChar);
+};
+const rejectsViaPageContinuationBlocker = (candidate, blocker, pageContent, pageIndex, pages) => {
+	if (blocker.use !== "pageContinuation") return false;
+	if (!isAtPageStart(pageContent, candidate.localIndex) || pageIndex === 0) return false;
+	const previousPage = pages[pageIndex - 1];
+	if (!previousPage || endsWithStrongSentenceTerminator$1(previousPage.content)) return false;
+	const previousWord = extractLastArabicWord$1(previousPage.content);
+	return !!previousWord && NORMALIZED_CONTINUATION_PREV_WORDS_SET.has(normalizeArabicForComparison(previousWord)) || endsWithIntroContext(previousPage.content) || isIntroCandidate(candidate.probeText) || isAuthorityCandidate(candidate.probeText, blocker.authorityPrecision);
+};
+const getBlockerRejectionReason = (blocker, candidate, localBeforeCandidate, pageContent, pageIndex, pages) => {
+	if (rejectsViaIntroBlocker(candidate, blocker, localBeforeCandidate)) return "intro";
+	if (rejectsViaAuthorityBlocker(candidate, blocker)) return "authorityIntro";
+	if (rejectsViaStopLemmaBlocker(candidate, blocker)) return "stopLemma";
+	if (rejectsViaPreviousWordBlocker(pageContent, candidate.localIndex, blocker, pageIndex, pages)) return "previousWord";
+	if (rejectsViaPreviousCharBlocker(pageContent, candidate.localIndex, blocker)) return "previousChar";
+	if (rejectsViaPageContinuationBlocker(candidate, blocker, pageContent, pageIndex, pages)) return "pageContinuation";
+	return null;
+};
+/**
+* Evaluates candidate rejection in two phases:
+*
+* Phase 1: global safety checks (not configurable per profile)
+* - `qualifierTail`: rejects comma-tail qualifier fragments such as "أي" and "قال"
+* - `structuralLeak`: rejects markdown artifacts, structural headings, and other non-lexeme leaks
+*
+* These are hard safety invariants for the Shamela-style dictionary surface,
+* so diagnostics report them alongside configurable blocker reasons.
+*
+* Phase 2: zone blockers (configurable per zone)
+* - iterates `zone.blockers` in declaration order
+* - returns the first matching rejection reason
+*/
+const getCandidateRejection = (candidate, zone, pageContext, pages) => {
+	const hasQualifierTail = hasBlockedQualifierTail(candidate.lemma ?? "");
+	if (hasQualifierTail || looksLikeStructuralLeak(candidate)) return { reason: hasQualifierTail ? "qualifierTail" : "structuralLeak" };
+	const localBeforeCandidate = getTrailingContext(pageContext.content, candidate.localIndex);
+	for (const blocker of zone.blockers) {
+		if (!blockerApplies(blocker, candidate.family)) continue;
+		const reason = getBlockerRejectionReason(blocker, candidate, localBeforeCandidate, pageContext.content, pageContext.index, pages);
+		if (reason) return { reason };
+	}
+	return null;
+};
+/**
+* Returns `true` when the candidate should be dropped (i.e. any rejection
+* reason exists).  Convenience wrapper over `getCandidateRejection`.
+*/
+const shouldRejectCandidate = (candidate, zone, pageContext, pages) => getCandidateRejection(candidate, zone, pageContext, pages) !== null;
+//#endregion
+//#region src/dictionary/heading-classifier.ts
+const HEADING_PREFIX = "## ";
+const CODE_LINE_PATTERN = getTokenPattern("harfs").replaceAll("\\s+", "[ \\t]+");
+const ARABIC_WORD_PATTERN = ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN;
+const PLAIN_ENTRY_RE = new RegExp(`^(?<lemma>${ARABIC_WORD_PATTERN}(?:\\s+${ARABIC_WORD_PATTERN}){0,1}|[([{]${ARABIC_WORD_PATTERN}(?:\\s+${ARABIC_WORD_PATTERN}){0,1}[)\\]}])\\s*:`, "u");
+const INLINE_SUBENTRY_RE = new RegExp(`(^|[\\s،؛,:.])(?<lemma>و${ARABIC_WORD_PATTERN})\\s*:`, "gu");
+const CODE_LINE_RE = new RegExp(`^(?:[[(])?(?<codes>${CODE_LINE_PATTERN})(?:[)\\]])?$`, "u");
+const PAIRED_FORMS_RE = new RegExp(`^(?<forms>${ARABIC_WORD_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_PATTERN})+)\\s*:`, "u");
+const ARABIC_BOUNDARY_OR_PUNCTUATION = "(?=$|[\\s:،؛()\\[\\]{}\\-–—]|[^\\p{Script=Arabic}])";
+const CHAPTER_HEADING_RE = new RegExp(`^(?:[([{]\\s*)?(?:باب|فصل|كتاب|حرف|أبواب)${ARABIC_BOUNDARY_OR_PUNCTUATION}`, "u");
+const CLUSTER_HEADING_RE = new RegExp(`^(?:\\(?\\s*)?(?:أبواب|أبنية)${ARABIC_BOUNDARY_OR_PUNCTUATION}|^(?=.{1,80}$).+?[،,].+?(?:مستعمل|مهمل|مستعملة|مستعملان)(?=$|[.،,:؛\\s])`, "u");
+const STATUS_HEADING_RE = new RegExp(`^(?:${CODE_LINE_PATTERN}|(?:(?:${ARABIC_WORD_PATTERN}\\s+){1,3}${ARABIC_WORD_PATTERN}|${ARABIC_WORD_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_PATTERN})+))\\s*:?[\\s]*(?:مستعمل|مستعملة|مستعملان|مهمل|مهملة)(?=$|[.،,:؛\\s])`, "u");
+const CODE_NOTE_HEADING_RE = new RegExp(`^(?:${ARABIC_WORD_PATTERN}\\s+){1,3}\\(.+\\)$`, "u");
+const COLON_NOISE_RE = /^.+:\s*.+$/u;
+const CHAPTER_TERMS = [
+	"باب",
+	"فصل",
+	"كتاب",
+	"حرف",
+	"أبواب"
+];
+const MARKER_PREFIXES = [
+	"بسم الله",
+	"توكلت على الله",
+	"آخر كتاب",
+	"ويتلوه"
+];
+const NOISE_TOKENS = [
+	"قال",
+	"وقيل",
+	"ويقال",
+	"وفي",
+	"يعني",
+	"فإذا"
+];
+const NORMALIZED_CHAPTER_TERMS = CHAPTER_TERMS.map(normalizeArabicForComparison);
+const NORMALIZED_MARKER_PREFIXES = MARKER_PREFIXES.map(normalizeArabicForComparison);
+const NORMALIZED_NOISE_TOKENS = NOISE_TOKENS.map(normalizeArabicForComparison);
+const emptyCounts = () => ({
+	chapter: 0,
+	cluster: 0,
+	codeLine: 0,
+	entry: 0,
+	inlineSubentry: 0,
+	lineEntry: 0,
+	marker: 0,
+	noise: 0,
+	pairedForms: 0
+});
+const extractWrappedLemma = (lemma) => lemma.replace(/^[[{(]+|[\])}]+$/gu, "").trim();
+const stripLeadingWrappers = (text) => text.replace(/^[[{(]+\s*/u, "").trim();
+const isDelimitedPrefixMatch$1 = (text, prefix) => {
 	if (text === prefix) return true;
 	if (!text.startsWith(prefix)) return false;
 	const nextChar = text[prefix.length];
-	return nextChar === void 0 || GATE_DELIMITER_RE.test(nextChar);
+	return nextChar === void 0 || /[\s:،؛()[\]{}\-–—]/u.test(nextChar);
+};
+const isCodeHeading = (text) => {
+	if (CODE_LINE_RE.test(text)) return true;
+	const words = text.trim().split(/\s+/u).filter(Boolean);
+	return words.length === 1 && (words[0]?.length ?? 0) === 1;
+};
+const looksLikeNoiseHeading = (text, normalizedText) => {
+	const wordCount = text.trim().split(/\s+/u).filter(Boolean).length;
+	if (/(?:مستعمل|مهمل|مستعملة|مستعملان)(?=$|[.،,:؛\s])/u.test(text)) return false;
+	if (wordCount >= 8 && COLON_NOISE_RE.test(text)) return true;
+	return NORMALIZED_NOISE_TOKENS.some((token) => normalizedText.includes(token)) && wordCount >= 4;
+};
+/**
+* Classifies a markdown heading line produced by `convertContentToMarkdown()`.
+*/
+const classifyDictionaryHeading = (line) => {
+	const text = line.startsWith(HEADING_PREFIX) ? line.slice(3).trim() : line.trim();
+	const unwrapped = stripLeadingWrappers(text);
+	const normalizedText = normalizeArabicForComparison(text);
+	const normalizedUnwrapped = normalizeArabicForComparison(unwrapped);
+	if (!text) return "noise";
+	if (CHAPTER_HEADING_RE.test(text) || NORMALIZED_CHAPTER_TERMS.some((term) => isDelimitedPrefixMatch$1(normalizedUnwrapped, term))) return "chapter";
+	if (looksLikeNoiseHeading(text, normalizedText)) return "noise";
+	if (isCodeHeading(text)) return "marker";
+	if (NORMALIZED_MARKER_PREFIXES.some((token) => normalizedUnwrapped.startsWith(token))) return "marker";
+	if (STATUS_HEADING_RE.test(text) || CODE_NOTE_HEADING_RE.test(text)) return "marker";
+	if (CLUSTER_HEADING_RE.test(text)) return "cluster";
+	return "entry";
+};
+const createHeadingMatch = (kind, page, rawLine, lineNumber) => ({
+	kind,
+	lemma: kind === "entry" ? rawLine.slice(3).trim() : void 0,
+	line: lineNumber,
+	pageId: page.id,
+	text: rawLine
+});
+const createSurfaceMatch = (kind, page, text, lineNumber, lemma) => ({
+	kind,
+	lemma,
+	line: lineNumber,
+	pageId: page.id,
+	text
+});
+const scanHeadingLine = (page, rawLine, lineNumber, matches) => {
+	if (!rawLine.startsWith(HEADING_PREFIX)) return false;
+	const kind = classifyDictionaryHeading(rawLine);
+	matches.push(createHeadingMatch(kind, page, rawLine, lineNumber));
+	return true;
+};
+const scanLineEntry = (page, rawLine, lineNumber, matches) => {
+	const lineEntry = rawLine.match(PLAIN_ENTRY_RE);
+	if (!lineEntry?.groups?.lemma) return;
+	matches.push(createSurfaceMatch("lineEntry", page, rawLine, lineNumber, extractWrappedLemma(lineEntry.groups.lemma)));
+};
+const scanPairedForms = (page, rawLine, lineNumber, matches) => {
+	const pairedForms = rawLine.match(PAIRED_FORMS_RE);
+	if (!pairedForms?.groups?.forms) return;
+	matches.push(createSurfaceMatch("pairedForms", page, rawLine, lineNumber, pairedForms.groups.forms));
 };
-const createPageContexts = (pages, pageMap, normalizedPages) => {
-	if (normalizedPages && normalizedPages.length !== pages.length) throw new Error(`Dictionary runtime expected ${pages.length} normalized pages, received ${normalizedPages.length}`);
-	if (pageMap.boundaries.length !== pages.length) throw new Error(`Dictionary runtime expected ${pages.length} page boundaries, received ${pageMap.boundaries.length}`);
-	const contexts = [];
-	for (let index = 0; index < pages.length; index++) {
-		const page = pages[index];
-		const boundary = pageMap.boundaries[index];
-		if (!page || !boundary) throw new Error(`Dictionary runtime encountered a missing page or boundary at index ${index}`);
-		const content = normalizedPages?.[index] ?? normalizeLineEndings(page.content);
-		contexts.push({
-			boundary,
-			content,
-			index,
-			lines: buildPageLines(content),
-			page
-		});
-	}
-	return contexts;
+const scanCodeLine = (page, rawLine, lineNumber, matches) => {
+	const codeLine = rawLine.match(CODE_LINE_RE);
+	if (!codeLine?.groups?.codes) return;
+	matches.push(createSurfaceMatch("codeLine", page, rawLine, lineNumber, codeLine.groups.codes));
 };
-const normalizeIntroContextText = (text) => normalizeArabicForComparison(text).replace(/[\\/]+/gu, " ").replace(/[«»"“”'‘’()[\]{}]+/gu, " ").replace(/\s+/gu, " ").trim();
-const startsWithConfiguredWord = (words, candidate) => words.some((word) => normalizedStartsWith(candidate, word));
-const buildPageLines = (content) => {
-	const parts = content.split("\n");
-	const lines = [];
-	let offset = 0;
-	for (let index = 0; index < parts.length; index++) {
-		const text = parts[index] ?? "";
-		lines.push({
-			lineNumber: index + 1,
-			start: offset,
-			text
-		});
-		offset += text.length + 1;
+const scanInlineSubentries = (page, rawLine, lineNumber, matches) => {
+	for (const match of rawLine.matchAll(INLINE_SUBENTRY_RE)) {
+		if (!match.groups?.lemma) continue;
+		matches.push(createSurfaceMatch("inlineSubentry", page, match.groups.lemma, lineNumber, match.groups.lemma));
 	}
-	return lines;
 };
-const headingMatchesGate = (headingText, gate) => {
-	if (gate.use === "headingText") {
-		const useFuzzy = gate.fuzzy ?? false;
-		const source = useFuzzy ? normalizeArabicForComparison(headingText) : headingText.trim();
-		const match = useFuzzy ? normalizeArabicForComparison(gate.match) : gate.match.trim();
-		return !!match && isDelimitedPrefixMatch(source, match);
+/**
+* Extracts dictionary surface matches from a markdown page.
+*/
+const scanDictionaryMarkdownPage = (page) => {
+	const lines = page.content.split(/\n/u);
+	const matches = [];
+	for (let index = 0; index < lines.length; index++) {
+		const rawLine = lines[index]?.trim() ?? "";
+		if (!rawLine) continue;
+		if (scanHeadingLine(page, rawLine, index + 1, matches)) continue;
+		scanLineEntry(page, rawLine, index + 1, matches);
+		scanPairedForms(page, rawLine, index + 1, matches);
+		scanCodeLine(page, rawLine, index + 1, matches);
+		scanInlineSubentries(page, rawLine, index + 1, matches);
 	}
-	return normalizedStartsWith(headingText, GATE_TOKEN_MAP[gate.token]);
-};
-const pageMatchesAnyGate = (page, gates) => page.lines.some((line) => {
-	const trimmed = line.text.trim();
-	if (!trimmed.startsWith(HEADING_PREFIX)) return false;
-	const headingText = trimmed.replace(/^##\s+/u, "").trim();
-	return gates.some((gate) => headingMatchesGate(headingText, gate));
-});
-const pageWithinZoneBounds = (zone, pageId) => {
-	if (zone.when?.minPageId !== void 0 && pageId < zone.when.minPageId) return false;
-	if (zone.when?.maxPageId !== void 0 && pageId > zone.when.maxPageId) return false;
-	return true;
+	return matches;
 };
-const findActivationPageId = (zone, pages) => {
+/**
+* Aggregates dictionary surface counts across markdown pages.
+*/
+const analyzeDictionaryMarkdownPages = (pages) => {
+	const counts = emptyCounts();
+	const matches = [];
 	for (const page of pages) {
-		if (!pageWithinZoneBounds(zone, page.page.id)) continue;
-		if (pageMatchesAnyGate(page, zone.when?.activateAfter ?? [])) return page.page.id;
-	}
-	return null;
-};
-const createZoneActivationMap = (profile, pages) => {
-	const activation = /* @__PURE__ */ new Map();
-	for (const zone of profile.zones) {
-		if (!zone.when?.activateAfter?.length) {
-			activation.set(zone.name, null);
-			continue;
+		const pageMatches = scanDictionaryMarkdownPage(page);
+		for (const match of pageMatches) {
+			counts[match.kind] += 1;
+			matches.push(match);
 		}
-		activation.set(zone.name, findActivationPageId(zone, pages));
 	}
-	return activation;
+	return {
+		counts,
+		matches
+	};
 };
-const pageMatchesZone = (zone, activationMap, pageId) => {
-	if (zone.when?.minPageId !== void 0 && pageId < zone.when.minPageId) return false;
-	if (zone.when?.maxPageId !== void 0 && pageId > zone.when.maxPageId) return false;
-	if (!zone.when?.activateAfter?.length) return true;
-	const activatedAt = activationMap.get(zone.name);
-	return activatedAt !== null && activatedAt !== void 0 && pageId >= activatedAt;
+//#endregion
+//#region src/dictionary/dictionary-candidates.ts
+const lineEntryRegexCache = /* @__PURE__ */ new WeakMap();
+const inlineSubentryRegexCache = /* @__PURE__ */ new WeakMap();
+const pairedFormsRegexCache = /* @__PURE__ */ new WeakMap();
+const STATUS_LINE_RE = new RegExp(`^(?:${CODE_LINE_PATTERN$1}|${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})+)\\s*:?[\\s]*${STATUS_TAIL_PATTERN}(?=$|[.،,:؛\\s])`, "u");
+const CODE_CORE_RE = new RegExp(`^${CODE_LINE_PATTERN$1}$`, "u");
+const STATUS_SUFFIX_RE = new RegExp(`(?:\\s*:?[\\s]*${STATUS_TAIL_PATTERN}.*)?$`, "u");
+const optionalSecondWord = (allowMultiWord) => allowMultiWord ? `(?:\\s+${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})?` : "";
+const wrappedWordPattern = (open, close, allowMultiWord) => `${open}${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}${optionalSecondWord(allowMultiWord)}${close}`;
+const bareWordPattern = (allowMultiWord) => `${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}${optionalSecondWord(allowMultiWord)}`;
+const createLineEntryRegex = (family) => {
+	const cached = lineEntryRegexCache.get(family);
+	if (cached) return cached;
+	const wrapperPattern = family.wrappers === "parentheses" ? wrappedWordPattern("\\(", "\\)", family.allowMultiWord) : family.wrappers === "brackets" ? wrappedWordPattern("\\[", "\\]", family.allowMultiWord) : family.wrappers === "curly" ? wrappedWordPattern("\\{", "\\}", family.allowMultiWord) : family.wrappers === "any" ? `(?:${wrappedWordPattern("\\(", "\\)", family.allowMultiWord)}|${wrappedWordPattern("\\[", "\\]", family.allowMultiWord)}|${wrappedWordPattern("\\{", "\\}", family.allowMultiWord)})` : bareWordPattern(family.allowMultiWord);
+	const colonSpacing = family.allowWhitespaceBeforeColon ? "\\s*:" : ":";
+	const regex = new RegExp(`^(?<lemma>${wrapperPattern})${colonSpacing}`, "u");
+	lineEntryRegexCache.set(family, regex);
+	return regex;
 };
-const resolveActiveZone = (profile, activationMap, pageId) => {
-	let activeZone = null;
-	for (const zone of profile.zones) if (pageMatchesZone(zone, activationMap, pageId)) activeZone = zone;
-	return activeZone;
+const parseWrappedCode = (text) => {
+	const paired = text.match(/^(?<open>[[(])(?<inner>.+)(?<close>[)\]])$/u);
+	if (!paired?.groups?.inner || !paired.groups.open || !paired.groups.close) return null;
+	return {
+		close: paired.groups.close,
+		inner: paired.groups.inner.trim(),
+		open: paired.groups.open,
+		paired: paired.groups.open === "(" && paired.groups.close === ")" || paired.groups.open === "[" && paired.groups.close === "]"
+	};
+};
+const collectHeadingCandidates = (pageStartOffset, line, nextLine, family, trimmed) => {
+	if (!trimmed.startsWith("## ")) return [];
+	const headingClass = classifyDictionaryHeading(trimmed);
+	if (headingClass === "noise") return [];
+	const candidate = createHeadingCandidate(pageStartOffset, line, nextLine, family, headingClass);
+	return candidate ? [candidate] : [];
 };
 const createHeadingCandidate = (pageStartOffset, line, nextLine, family, headingClass) => {
 	if (!family.classes.includes(headingClass)) return null;
@@ -2168,19 +2632,6 @@ const createHeadingCandidate = (pageStartOffset, line, nextLine, family, heading
 		text: line.text.trim()
 	};
 };
-const optionalSecondWord = (allowMultiWord) => allowMultiWord ? `(?:\\s+${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})?` : "";
-const wrappedWordPattern = (open, close, allowMultiWord) => `${open}${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}${optionalSecondWord(allowMultiWord)}${close}`;
-const bareWordPattern = (allowMultiWord) => `${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}${optionalSecondWord(allowMultiWord)}`;
-const STATUS_LINE_RE = new RegExp(`^(?:${CODE_LINE_PATTERN}|${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})+)\\s*:?[\\s]*${STATUS_TAIL_PATTERN}(?=$|[.،,:؛\\s])`, "u");
-const createLineEntryRegex = (family) => {
-	const cached = lineEntryRegexCache.get(family);
-	if (cached) return cached;
-	const wrapperPattern = family.wrappers === "parentheses" ? wrappedWordPattern("\\(", "\\)", family.allowMultiWord) : family.wrappers === "brackets" ? wrappedWordPattern("\\[", "\\]", family.allowMultiWord) : family.wrappers === "curly" ? wrappedWordPattern("\\{", "\\}", family.allowMultiWord) : family.wrappers === "any" ? `(?:${wrappedWordPattern("\\(", "\\)", family.allowMultiWord)}|${wrappedWordPattern("\\[", "\\]", family.allowMultiWord)}|${wrappedWordPattern("\\{", "\\}", family.allowMultiWord)})` : bareWordPattern(family.allowMultiWord);
-	const colonSpacing = family.allowWhitespaceBeforeColon ? "\\s*:" : ":";
-	const regex = new RegExp(`^(?<lemma>${wrapperPattern})${colonSpacing}`, "u");
-	lineEntryRegexCache.set(family, regex);
-	return regex;
-};
 const collectLineEntryCandidates = (pageStartOffset, line, family) => {
 	const trimmed = line.text.trim();
 	if (STATUS_LINE_RE.test(trimmed)) return [];
@@ -2198,17 +2649,22 @@ const collectLineEntryCandidates = (pageStartOffset, line, family) => {
 	}];
 };
 const collectInlineSubentryCandidates = (pageStartOffset, line, family) => {
-	const cached = inlineSubentryRegexCache.get(family);
-	const prefixes = family.prefixes.length > 0 ? family.prefixes.map(escapeRegex).join("|") : escapeRegex("و");
-	const regex = cached ?? new RegExp(`(^|[\\s،؛,:.])(?<lemma>(?:${prefixes})${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})\\s*:`, "gu");
-	if (!cached) inlineSubentryRegexCache.set(family, regex);
+	let cached = inlineSubentryRegexCache.get(family);
+	if (!cached) {
+		const prefixes = family.prefixes.length > 0 ? family.prefixes.map(escapeRegex).join("|") : escapeRegex("و");
+		cached = {
+			matchRegex: new RegExp(`(^|[\\s،؛,:.])(?<lemma>(?:${prefixes})${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})\\s*:`, "gu"),
+			stripPrefixRegex: new RegExp(`^(?:${prefixes})`, "u")
+		};
+		inlineSubentryRegexCache.set(family, cached);
+	}
 	const candidates = [];
-	for (const match of line.text.matchAll(regex)) {
+	for (const match of line.text.matchAll(cached.matchRegex)) {
 		if (!match.groups?.lemma || match.index === void 0) continue;
 		const lemmaIndex = match[0].indexOf(match.groups.lemma);
 		if (lemmaIndex < 0) continue;
 		const candidateStart = match.index + lemmaIndex;
-		const lemma = family.stripPrefixesFromLemma ? match.groups.lemma.replace(new RegExp(`^(?:${prefixes})`, "u"), "") : match.groups.lemma;
+		const lemma = family.stripPrefixesFromLemma ? match.groups.lemma.replace(cached.stripPrefixRegex, "") : match.groups.lemma;
 		candidates.push({
 			absoluteIndex: pageStartOffset + line.start + candidateStart,
 			family: "inlineSubentry",
@@ -2222,18 +2678,6 @@ const collectInlineSubentryCandidates = (pageStartOffset, line, family) => {
 	}
 	return candidates;
 };
-const CODE_CORE_RE = new RegExp(`^${CODE_LINE_PATTERN}$`, "u");
-const STATUS_SUFFIX_RE = new RegExp(`(?:\\s*:?[\\s]*${STATUS_TAIL_PATTERN}.*)?$`, "u");
-const parseWrappedCode = (text) => {
-	const paired = text.match(/^(?<open>[[(])(?<inner>.+)(?<close>[\])])$/u);
-	if (!paired?.groups?.inner || !paired.groups.open || !paired.groups.close) return null;
-	return {
-		close: paired.groups.close,
-		inner: paired.groups.inner.trim(),
-		open: paired.groups.open,
-		paired: paired.groups.open === "(" && paired.groups.close === ")" || paired.groups.open === "[" && paired.groups.close === "]"
-	};
-};
 const collectCodeLineCandidates = (pageStartOffset, line, family) => {
 	const trimmed = line.text.trim();
 	const bare = trimmed.replace(STATUS_SUFFIX_RE, "").trim();
@@ -2271,255 +2715,470 @@ const collectPairedFormsCandidates = (pageStartOffset, line, family) => {
 		text: line.text.trim()
 	}];
 };
-const blockerApplies = (blocker, family) => !blocker.appliesTo || blocker.appliesTo.includes(family);
-const isIntroCandidate = (text) => {
-	const normalized = normalizeIntroContextText(text);
-	return INTRO_PHRASES.some((phrase) => normalized.startsWith(normalizeArabicForComparison(phrase)));
+const assertNever$1 = (value) => {
+	throw new Error(`Unhandled dictionary candidate family: ${JSON.stringify(value)}`);
 };
-const endsWithIntroPhrase = (text) => {
-	const trimmed = text.trimEnd();
-	if (STRONG_SENTENCE_TERMINATORS$1.test(trimmed)) return false;
-	const normalized = normalizeIntroContextText(trimmed).trimEnd().replace(/[:؛،,.!?؟]+$/u, "").trimEnd();
-	return INTRO_PHRASES.some((phrase) => normalized.endsWith(normalizeArabicForComparison(phrase)));
+const collectCandidatesForFamily = (pageStartOffset, line, nextLine, family, trimmed) => {
+	switch (family.use) {
+		case "heading": return collectHeadingCandidates(pageStartOffset, line, nextLine, family, trimmed);
+		case "lineEntry": return collectLineEntryCandidates(pageStartOffset, line, family);
+		case "inlineSubentry": return collectInlineSubentryCandidates(pageStartOffset, line, family);
+		case "codeLine": return collectCodeLineCandidates(pageStartOffset, line, family);
+		case "pairedForms": return collectPairedFormsCandidates(pageStartOffset, line, family);
+		default: return assertNever$1(family);
+	}
+};
+const familyMayMatchLine = (family, trimmed) => {
+	switch (family.use) {
+		case "heading": return trimmed.startsWith("## ");
+		case "lineEntry":
+		case "inlineSubentry":
+		case "pairedForms": return trimmed.includes(":");
+		case "codeLine": return /^(?:[[(])?\p{Script=Arabic}/u.test(trimmed);
+		default: return assertNever$1(family);
+	}
+};
+/**
+* Collects all family candidates for a single dictionary line within a zone.
+*/
+const collectCandidatesForLine = (pageStartOffset, line, nextLine, zone) => {
+	const trimmed = line.text.trim();
+	if (!trimmed) return [];
+	const candidates = [];
+	for (const family of zone.families) {
+		if (!familyMayMatchLine(family, trimmed)) continue;
+		candidates.push(...collectCandidatesForFamily(pageStartOffset, line, nextLine, family, trimmed));
+	}
+	return candidates;
+};
+//#endregion
+//#region src/dictionary/dictionary-zones.ts
+const normalizedStartsWith = (text, prefix) => normalizeArabicForComparison(text).startsWith(normalizeArabicForComparison(prefix));
+const isDelimitedPrefixMatch = (text, prefix) => {
+	if (text === prefix) return true;
+	if (!text.startsWith(prefix)) return false;
+	const nextChar = text[prefix.length];
+	return nextChar === void 0 || GATE_DELIMITER_RE.test(nextChar);
+};
+const getHeadingTextGateMatch = (gate, useFuzzy) => {
+	if (useFuzzy) return "normalizedMatch" in gate ? gate.normalizedMatch : normalizeArabicForComparison(gate.match);
+	return "trimmedMatch" in gate ? gate.trimmedMatch : gate.match.trim();
+};
+const buildPageLines = (content) => {
+	const parts = content.split("\n");
+	const lines = [];
+	let offset = 0;
+	for (let index = 0; index < parts.length; index++) {
+		const text = parts[index] ?? "";
+		lines.push({
+			lineNumber: index + 1,
+			start: offset,
+			text
+		});
+		offset += text.length + 1;
+	}
+	return lines;
+};
+const headingMatchesGate = (headingText, gate) => {
+	if (gate.use === "headingText") {
+		const useFuzzy = gate.fuzzy ?? false;
+		const source = useFuzzy ? normalizeArabicForComparison(headingText) : headingText.trim();
+		const match = getHeadingTextGateMatch(gate, useFuzzy);
+		return !!match && isDelimitedPrefixMatch(source, match);
+	}
+	return normalizedStartsWith(headingText, GATE_TOKEN_MAP[gate.token]);
+};
+const createPageContext = (page, boundary, content, index) => {
+	let cachedLines;
+	const context = {
+		boundary,
+		content,
+		index,
+		page
+	};
+	Object.defineProperty(context, "lines", {
+		configurable: true,
+		enumerable: true,
+		get: () => {
+			cachedLines ??= buildPageLines(content);
+			return cachedLines;
+		}
+	});
+	return context;
+};
+const pageMatchesAnyGate = (page, gates) => page.lines.some((line) => {
+	const trimmed = line.text.trim();
+	if (!trimmed.startsWith("## ")) return false;
+	const headingText = trimmed.slice(3).trim();
+	return gates.some((gate) => headingMatchesGate(headingText, gate));
+});
+const pageWithinZoneBounds = (zone, pageId) => {
+	if (zone.when?.minPageId !== void 0 && pageId < zone.when.minPageId) return false;
+	if (zone.when?.maxPageId !== void 0 && pageId > zone.when.maxPageId) return false;
+	return true;
+};
+const findActivationPageId = (zone, pages) => {
+	for (const page of pages) {
+		if (!pageWithinZoneBounds(zone, page.page.id)) continue;
+		if (pageMatchesAnyGate(page, zone.when?.activateAfter ?? [])) return page.page.id;
+	}
+	return null;
+};
+const createZoneActivationMap = (profile, pages) => {
+	const activation = /* @__PURE__ */ new Map();
+	for (const zone of profile.zones) {
+		if (!zone.when?.activateAfter?.length) {
+			activation.set(zone.name, null);
+			continue;
+		}
+		activation.set(zone.name, findActivationPageId(zone, pages));
+	}
+	return activation;
+};
+const pageMatchesZone = (zone, activationMap, pageId) => {
+	if (zone.when?.minPageId !== void 0 && pageId < zone.when.minPageId) return false;
+	if (zone.when?.maxPageId !== void 0 && pageId > zone.when.maxPageId) return false;
+	if (!zone.when?.activateAfter?.length) return true;
+	const activatedAt = activationMap.get(zone.name);
+	return activatedAt !== null && activatedAt !== void 0 && pageId >= activatedAt;
 };
-const endsWithIntroContext = (text) => {
-	const trimmed = text.trimEnd();
-	if (STRONG_SENTENCE_TERMINATORS$1.test(trimmed)) return false;
-	const normalized = normalizeIntroContextText(trimmed).trimEnd().replace(/[:؛،,.!?؟]+$/u, "").trimEnd();
-	if (!normalized) return false;
-	if (INTRO_PHRASES.some((phrase) => normalized.endsWith(normalizeArabicForComparison(phrase)))) return true;
-	if (INTRO_TAIL_PHRASES.some((phrase) => normalized.endsWith(normalizeArabicForComparison(phrase)))) return true;
-	return INTRO_TAIL_PATTERNS.some((pattern) => pattern.test(normalized));
+const resolveActiveZone = (profile, activationMap, pageId) => {
+	let activeZone = null;
+	for (const zone of profile.zones) if (pageMatchesZone(zone, activationMap, pageId)) activeZone = zone;
+	return activeZone;
 };
-const isAuthorityCandidate = (text, precision) => {
-	const head = normalizeStopLemma(text.split(":", 1)[0] ?? text);
-	if (head && AUTHORITY_HEAD_WORDS.some((term) => normalizeStopLemma(term) === head)) return true;
-	if (AUTHORITY_RE.test(text)) return true;
-	if (precision === "aggressive") {
-		const normalized = normalizeIntroContextText(text);
-		return [
-			"الليث",
-			"الأزهري",
-			"الأصمعي",
-			"الجوهري",
-			"الفراء",
-			"ثعلب",
-			"شمر"
-		].some((term) => normalized.startsWith(normalizeArabicForComparison(term)));
+const createPageContexts = (pages, pageMap, normalizedPages) => {
+	if (normalizedPages && normalizedPages.length !== pages.length) throw new Error(`Dictionary runtime expected ${pages.length} normalized pages, received ${normalizedPages.length}`);
+	if (pageMap.boundaries.length !== pages.length) throw new Error(`Dictionary runtime expected ${pages.length} page boundaries, received ${pageMap.boundaries.length}`);
+	const contexts = [];
+	for (let index = 0; index < pages.length; index++) {
+		const page = pages[index];
+		const boundary = pageMap.boundaries[index];
+		if (!page || !boundary) throw new Error(`Dictionary runtime encountered a missing page or boundary at index ${index}`);
+		const content = normalizedPages?.[index] ?? normalizeLineEndings(page.content);
+		contexts.push(createPageContext(page, boundary, content, index));
 	}
-	return false;
+	return contexts;
 };
-const hasBlockedQualifierTail = (lemma) => {
-	const parts = lemma.split(/[،,]/u).map((part) => part.trim()).filter(Boolean);
-	if (parts.length < 2) return false;
-	return startsWithConfiguredWord(QUALIFIER_TAIL_PREFIXES, parts.slice(1).join(" "));
+//#endregion
+//#region src/dictionary/profile.ts
+const normalizedProfileCache = /* @__PURE__ */ new WeakMap();
+const PREVIOUS_WORD_SCOPES = [
+	"samePage",
+	"pageStart",
+	"any"
+];
+const BLOCKER_PRECISIONS = ["high", "aggressive"];
+const uniqueNormalizedSet = (values, normalize) => new Set(values.map(normalize).filter(Boolean));
+const assertNever = (value) => {
+	throw new Error(`Unhandled dictionary profile variant: ${JSON.stringify(value)}`);
 };
-const looksLikeStructuralLeak = (candidate) => {
-	if (!candidate.lemma) return false;
-	const normalizedLemma = normalizeArabicForComparison(candidate.lemma);
-	if (candidate.kind === "entry" && (/^[^\p{Script=Arabic}\d]+/u.test(candidate.lemma) || candidate.lemma.includes("{") || candidate.lemma.includes("}") || candidate.lemma.includes("##"))) return true;
-	if (candidate.kind === "entry" && BARE_CODE_LEMMA_RE.test(candidate.lemma) && (candidate.text === candidate.lemma || candidate.text === `${HEADING_PREFIX}${candidate.lemma}` || candidate.text.startsWith(`${HEADING_PREFIX}${candidate.lemma}`) || candidate.text.startsWith(`${candidate.lemma}\n${HEADING_PREFIX}`))) return true;
-	if (candidate.family !== "pairedForms" && candidate.lemma.split(/\s+/u).filter(Boolean).length > 4) return true;
-	if (startsWithConfiguredWord(STRUCTURAL_LEMMA_PREFIXES, candidate.lemma)) return true;
-	if (normalizedLemma.startsWith(normalizeArabicForComparison("ولل"))) return true;
-	const structuralText = candidate.text.startsWith(HEADING_PREFIX) ? candidate.text.slice(3).trim() : candidate.text;
-	if (/^[\d\u0660-\u0669]+\s*-\s*\([^)]+\)(?:\s+##.*)?$/u.test(structuralText)) return true;
-	const normalizedText = normalizeArabicForComparison(structuralText);
-	if (STRUCTURAL_LINE_PATTERNS.some((pattern) => pattern.test(structuralText))) return STRUCTURAL_LINE_KEYWORDS.some((keyword) => normalizedText.includes(normalizeArabicForComparison(keyword)));
-	return false;
+const normalizeFamily = (family) => {
+	switch (family.use) {
+		case "heading": return {
+			...family,
+			allowNextLineColon: family.allowNextLineColon ?? false,
+			allowSingleLetter: family.allowSingleLetter ?? false
+		};
+		case "lineEntry": return {
+			...family,
+			allowMultiWord: family.allowMultiWord ?? false,
+			allowWhitespaceBeforeColon: family.allowWhitespaceBeforeColon ?? false,
+			wrappers: family.wrappers ?? "none"
+		};
+		case "inlineSubentry": return {
+			...family,
+			prefixes: family.prefixes ?? ["و"],
+			stripPrefixesFromLemma: family.stripPrefixesFromLemma ?? true
+		};
+		case "codeLine": return {
+			...family,
+			wrappers: family.wrappers ?? "either"
+		};
+		case "pairedForms": return {
+			...family,
+			requireStatusTail: family.requireStatusTail ?? false,
+			separator: family.separator ?? "comma"
+		};
+		default: return assertNever(family);
+	}
 };
-const countLemma = (map, lemma) => {
-	if (!lemma) return;
-	map.set(lemma, (map.get(lemma) ?? 0) + 1);
+const normalizeBlocker = (blocker) => {
+	switch (blocker.use) {
+		case "authorityIntro": return {
+			...blocker,
+			precision: blocker.precision ?? "high"
+		};
+		case "stopLemma": return {
+			...blocker,
+			normalizedWords: uniqueNormalizedSet(blocker.words, normalizeStopLemmaWord)
+		};
+		case "previousWord": return {
+			...blocker,
+			normalizedWords: uniqueNormalizedSet(blocker.words, normalizeArabicForComparison),
+			scope: blocker.scope ?? "samePage"
+		};
+		case "previousChar": return {
+			...blocker,
+			charSet: new Set(blocker.chars)
+		};
+		case "intro": return blocker;
+		case "pageContinuation": return {
+			...blocker,
+			authorityPrecision: blocker.authorityPrecision ?? "high"
+		};
+		default: return assertNever(blocker);
+	}
 };
-const createInitialKindCounts = () => ({
-	chapter: 0,
-	entry: 0,
-	marker: 0
-});
-const createInitialReasonCounts = () => ({
-	authorityIntro: 0,
-	intro: 0,
-	pageContinuation: 0,
-	previousChar: 0,
-	previousWord: 0,
-	qualifierTail: 0,
-	stopLemma: 0,
-	structuralLeak: 0
+const normalizeGate = (gate) => {
+	if (gate.use === "headingToken") return gate;
+	const trimmedMatch = gate.match.trim();
+	return {
+		...gate,
+		normalizedMatch: normalizeArabicForComparison(trimmedMatch),
+		trimmedMatch
+	};
+};
+const normalizeZone = (zone) => ({
+	blockers: (zone.blockers ?? []).map(normalizeBlocker),
+	families: zone.families.map(normalizeFamily),
+	name: zone.name,
+	when: zone.when ? {
+		activateAfter: zone.when.activateAfter?.map(normalizeGate),
+		maxPageId: zone.when.maxPageId,
+		minPageId: zone.when.minPageId
+	} : void 0
 });
-const createInitialFamilyCounts = () => ({
-	codeLine: {
-		accepted: 0,
-		rejected: 0
-	},
-	heading: {
-		accepted: 0,
-		rejected: 0
-	},
-	inlineSubentry: {
-		accepted: 0,
-		rejected: 0
-	},
-	lineEntry: {
-		accepted: 0,
-		rejected: 0
-	},
-	pairedForms: {
-		accepted: 0,
-		rejected: 0
-	}
+const createIssue = (code, path, message, zoneName) => ({
+	code,
+	message,
+	path,
+	...zoneName ? { zoneName } : {}
 });
-const rejectsViaIntroBlocker = (candidate, blocker, localBeforeCandidate) => {
-	if (blocker.use !== "intro") return false;
-	return isIntroCandidate(candidate.probeText) || endsWithIntroPhrase(localBeforeCandidate) || endsWithIntroContext(localBeforeCandidate);
+const hasBlankString = (values) => values.length === 0 || values.some((value) => !value.trim());
+const pushBlockerIssue = (issues, code, path, message, zoneName) => {
+	issues.push(createIssue(code, path, message, zoneName));
 };
-const rejectsViaAuthorityBlocker = (candidate, blocker) => blocker.use === "authorityIntro" && isAuthorityCandidate(candidate.probeText, blocker.precision);
-const rejectsViaStopLemmaBlocker = (candidate, blocker) => blocker.use === "stopLemma" && !!candidate.lemma && !!normalizeStopLemma(candidate.lemma) && blocker.normalizedWords.has(normalizeStopLemma(candidate.lemma));
-const rejectsViaPreviousWordBlocker = (pageContent, localIndex, blocker) => {
-	if (blocker.use !== "previousWord") return false;
-	const lastWord = extractLastArabicWord$1(pageContent, localIndex);
-	return !!lastWord && blocker.normalizedWords.has(normalizeArabicForComparison(lastWord));
+const validateAuthorityPrecision = (issues, blockerPath, zoneName, code, fieldName, value, blockerUse) => {
+	if (value === void 0 || BLOCKER_PRECISIONS.includes(value)) return;
+	pushBlockerIssue(issues, code, `${blockerPath}.${fieldName}`, `${blockerUse} blocker in zone "${zoneName}" must use ${fieldName} "high" or "aggressive"`, zoneName);
 };
-const rejectsViaPreviousCharBlocker = (pageContent, localIndex, blocker) => {
-	if (blocker.use !== "previousChar") return false;
-	const previousChar = previousNonWhitespaceChar(pageContent, localIndex);
-	return !!previousChar && blocker.charSet.has(previousChar);
+const validatePreviousWordBlocker = (blocker, blockerPath, zoneName, issues) => {
+	if (hasBlankString(blocker.words)) pushBlockerIssue(issues, "invalid_previous_words", `${blockerPath}.words`, `previousWord blocker in zone "${zoneName}" must include non-empty words`, zoneName);
+	if (blocker.scope !== void 0 && !PREVIOUS_WORD_SCOPES.includes(blocker.scope)) pushBlockerIssue(issues, "invalid_previous_word_scope", `${blockerPath}.scope`, `previousWord blocker in zone "${zoneName}" must use scope "samePage", "pageStart", or "any"`, zoneName);
 };
-const rejectsViaPageContinuationBlocker = (candidate, blocker, localBeforeCandidate, pageIndex, pages) => {
-	if (blocker.use !== "pageContinuation") return false;
-	if (!(localBeforeCandidate.trim().length === 0) || pageIndex === 0) return false;
-	const previousPage = pages[pageIndex - 1];
-	if (!previousPage || endsWithStrongSentenceTerminator$1(previousPage.content)) return false;
-	const previousWord = extractLastArabicWord$1(previousPage.content);
-	return !!previousWord && CONTINUATION_PREV_WORDS.some((word) => normalizedEquals(word, previousWord)) || endsWithIntroContext(previousPage.content) || isIntroCandidate(candidate.probeText) || isAuthorityCandidate(candidate.probeText, "high");
+const validatePreviousCharBlocker = (blocker, blockerPath, zoneName, issues) => {
+	if (blocker.chars.length === 0 || blocker.chars.some((char) => !char)) pushBlockerIssue(issues, "invalid_previous_chars", `${blockerPath}.chars`, `previousChar blocker in zone "${zoneName}" must include chars`, zoneName);
 };
-const getBlockerRejectionReason = (blocker, candidate, localBeforeCandidate, pageContent, pageIndex, pages) => {
-	if (rejectsViaIntroBlocker(candidate, blocker, localBeforeCandidate)) return "intro";
-	if (rejectsViaAuthorityBlocker(candidate, blocker)) return "authorityIntro";
-	if (rejectsViaStopLemmaBlocker(candidate, blocker)) return "stopLemma";
-	if (rejectsViaPreviousWordBlocker(pageContent, candidate.localIndex, blocker)) return "previousWord";
-	if (rejectsViaPreviousCharBlocker(pageContent, candidate.localIndex, blocker)) return "previousChar";
-	if (rejectsViaPageContinuationBlocker(candidate, blocker, localBeforeCandidate, pageIndex, pages)) return "pageContinuation";
-	return null;
+const validateStopLemmaBlocker = (blocker, blockerPath, zoneName, issues) => {
+	if (hasBlankString(blocker.words)) pushBlockerIssue(issues, "invalid_stop_words", `${blockerPath}.words`, `stopLemma blocker in zone "${zoneName}" must include non-empty words`, zoneName);
 };
-const getCandidateRejection = (candidate, zone, pageContext, pages) => {
-	const hasQualifierTail = hasBlockedQualifierTail(candidate.lemma ?? "");
-	if (hasQualifierTail || looksLikeStructuralLeak(candidate)) return { reason: hasQualifierTail ? "qualifierTail" : "structuralLeak" };
-	const localBeforeCandidate = getTrailingContext(pageContext.content, candidate.localIndex);
-	for (const blocker of zone.blockers) {
-		if (!blockerApplies(blocker, candidate.family)) continue;
-		const reason = getBlockerRejectionReason(blocker, candidate, localBeforeCandidate, pageContext.content, pageContext.index, pages);
-		if (reason) return { reason };
+const validateGate = (gate, zone, gateIndex, seenActivateAfterKeys, issues) => {
+	const gatePath = `zones[].when.activateAfter[${gateIndex}]`.replace("[]", `[${zone.name}]`);
+	if (gate.use === "headingText") {
+		if (!gate.match.trim()) issues.push(createIssue("invalid_gate_match", `${gatePath}.match`, `dictionary gate match must be non-empty`, zone.name));
+		if (gate.fuzzy !== void 0 && typeof gate.fuzzy !== "boolean") issues.push(createIssue("invalid_gate_fuzzy", `${gatePath}.fuzzy`, `dictionary gate fuzzy must be a boolean when provided`, zone.name));
 	}
-	return null;
+	const dedupeKey = `${gate.use}:${JSON.stringify(gate)}`;
+	if (seenActivateAfterKeys.has(dedupeKey)) issues.push(createIssue("duplicate_activate_after_gate", gatePath, `dictionary zone "${zone.name}" has duplicate activateAfter gates`, zone.name));
+	seenActivateAfterKeys.add(dedupeKey);
 };
-const shouldRejectCandidate = (candidate, zone, pageContext, pages) => {
-	return getCandidateRejection(candidate, zone, pageContext, pages) !== null;
+const validateFamily = (family, zone, familyIndex, issues) => {
+	const familyPath = `zones[].families[${familyIndex}]`.replace("[]", `[${zone.name}]`);
+	switch (family.use) {
+		case "heading":
+			if (family.classes.length === 0) issues.push(createIssue("empty_heading_classes", `${familyPath}.classes`, `dictionary heading family in zone "${zone.name}" must include at least one class`, zone.name));
+			if (family.emit === "chapter" && !family.classes.includes("chapter")) issues.push(createIssue("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "chapter" but never matches chapter headings`, zone.name));
+			if (family.emit === "marker" && !family.classes.includes("marker")) issues.push(createIssue("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "marker" but never matches marker headings`, zone.name));
+			if (family.emit === "entry" && !family.classes.includes("entry")) issues.push(createIssue("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "entry" but never matches entry headings`, zone.name));
+			break;
+		case "lineEntry": break;
+		case "inlineSubentry":
+			if (family.prefixes?.some((prefix) => !prefix.trim())) issues.push(createIssue("empty_inline_prefixes", `${familyPath}.prefixes`, `inlineSubentry prefixes must be non-empty strings`, zone.name));
+			break;
+		case "codeLine": break;
+		case "pairedForms": break;
+		default: assertNever(family);
+	}
 };
-const collectHeadingCandidates = (pageStartOffset, line, nextLine, family, trimmed) => {
-	if (!trimmed.startsWith(HEADING_PREFIX)) return [];
-	const headingClass = classifyDictionaryHeading(trimmed);
-	if (headingClass === "noise") return [];
-	const candidate = createHeadingCandidate(pageStartOffset, line, nextLine, family, headingClass);
-	return candidate ? [candidate] : [];
+const validateBlocker = (blocker, zone, blockerIndex, issues) => {
+	const blockerPath = `zones[].blockers[${blockerIndex}]`.replace("[]", `[${zone.name}]`);
+	switch (blocker.use) {
+		case "authorityIntro":
+			validateAuthorityPrecision(issues, blockerPath, zone.name, "invalid_authority_intro_precision", "precision", blocker.precision, "authorityIntro");
+			break;
+		case "stopLemma":
+			validateStopLemmaBlocker(blocker, blockerPath, zone.name, issues);
+			break;
+		case "previousWord":
+			validatePreviousWordBlocker(blocker, blockerPath, zone.name, issues);
+			break;
+		case "previousChar":
+			validatePreviousCharBlocker(blocker, blockerPath, zone.name, issues);
+			break;
+		case "intro": break;
+		case "pageContinuation":
+			validateAuthorityPrecision(issues, blockerPath, zone.name, "invalid_continuation_precision", "authorityPrecision", blocker.authorityPrecision, "pageContinuation");
+			break;
+		default: assertNever(blocker);
+	}
 };
-const collectCandidatesForFamily = (pageStartOffset, line, nextLine, family, trimmed) => {
-	switch (family.use) {
-		case "heading": return collectHeadingCandidates(pageStartOffset, line, nextLine, family, trimmed);
-		case "lineEntry": return collectLineEntryCandidates(pageStartOffset, line, family);
-		case "inlineSubentry": return collectInlineSubentryCandidates(pageStartOffset, line, family);
-		case "codeLine": return collectCodeLineCandidates(pageStartOffset, line, family);
-		case "pairedForms": return collectPairedFormsCandidates(pageStartOffset, line, family);
-		default: return assertNever$1(family);
+var DictionaryProfileValidationError = class extends Error {
+	issues;
+	constructor(issues) {
+		super(issues.length === 1 ? issues[0].message : `Dictionary profile validation failed with ${issues.length} issues`);
+		this.name = "DictionaryProfileValidationError";
+		this.issues = issues;
 	}
 };
-const collectCandidatesForLine = (pageStartOffset, line, nextLine, zone) => {
-	const trimmed = line.text.trim();
-	const candidates = [];
-	if (!trimmed) return candidates;
-	for (const family of zone.families) candidates.push(...collectCandidatesForFamily(pageStartOffset, line, nextLine, family, trimmed));
-	return candidates;
+const validateZone = (zone, zoneIndex, seenZoneNames, issues) => {
+	const zonePath = `zones[${zoneIndex}]`;
+	const trimmedName = zone.name.trim();
+	if (!trimmedName) issues.push(createIssue("empty_zone_name", `${zonePath}.name`, `dictionary zone name must be non-empty`));
+	else if (seenZoneNames.has(trimmedName)) issues.push(createIssue("duplicate_zone_name", `${zonePath}.name`, `dictionary zone names must be unique; duplicated "${trimmedName}"`, trimmedName));
+	else seenZoneNames.add(trimmedName);
+	if (zone.families.length === 0) issues.push(createIssue("empty_zone_families", `${zonePath}.families`, `dictionary zone "${zone.name}" must declare at least one family`, zone.name));
+	if (zone.when?.minPageId !== void 0 && zone.when?.maxPageId !== void 0 && zone.when.minPageId > zone.when.maxPageId) issues.push(createIssue("invalid_zone_page_range", `${zonePath}.when`, `dictionary zone "${zone.name}" has minPageId greater than maxPageId`, zone.name));
+	const seenActivateAfterKeys = /* @__PURE__ */ new Set();
+	for (let gateIndex = 0; gateIndex < (zone.when?.activateAfter?.length ?? 0); gateIndex++) validateGate(zone.when.activateAfter[gateIndex], zone, gateIndex, seenActivateAfterKeys, issues);
+	for (let familyIndex = 0; familyIndex < zone.families.length; familyIndex++) validateFamily(zone.families[familyIndex], zone, familyIndex, issues);
+	for (let blockerIndex = 0; blockerIndex < (zone.blockers?.length ?? 0); blockerIndex++) validateBlocker(zone.blockers[blockerIndex], zone, blockerIndex, issues);
 };
-const candidateToSplitPoint = (candidate, debugMetaKey) => {
-	const baseMeta = candidate.lemma ? {
-		kind: candidate.kind,
-		lemma: candidate.lemma
-	} : { kind: candidate.kind };
-	const meta = debugMetaKey === void 0 ? baseMeta : mergeDebugIntoMeta(baseMeta, debugMetaKey, { dictionary: {
-		family: candidate.family,
-		...candidate.headingClass ? { headingClass: candidate.headingClass } : {}
-	} });
-	return {
-		contentStartOffset: candidate.contentStartOffset,
-		index: candidate.absoluteIndex,
-		meta
+/**
+* Validates a dictionary profile without normalizing it.
+*/
+const validateDictionaryProfile = (profile) => {
+	const issues = [];
+	if (profile.version !== 2) issues.push(createIssue("invalid_version", "version", `dictionary profile version must be 2, got ${profile.version}`));
+	if (profile.zones.length === 0) {
+		issues.push(createIssue("missing_zones", "zones", `dictionary profile must contain at least one zone`));
+		return issues;
+	}
+	const seenZoneNames = /* @__PURE__ */ new Set();
+	for (let zoneIndex = 0; zoneIndex < profile.zones.length; zoneIndex++) validateZone(profile.zones[zoneIndex], zoneIndex, seenZoneNames, issues);
+	return issues;
+};
+/**
+* Normalizes and validates a dictionary profile before runtime matching.
+*/
+const normalizeDictionaryProfile = (profile) => {
+	const cached = normalizedProfileCache.get(profile);
+	if (cached) return cached;
+	const issues = validateDictionaryProfile(profile);
+	if (issues.length > 0) throw new DictionaryProfileValidationError(issues);
+	const normalized = {
+		version: 2,
+		zones: profile.zones.map(normalizeZone)
 	};
+	normalizedProfileCache.set(profile, normalized);
+	return normalized;
+};
+//#endregion
+//#region src/dictionary/dictionary-diagnostics.ts
+const createInitialKindCounts = () => ({
+	chapter: 0,
+	entry: 0,
+	marker: 0
+});
+const createInitialReasonCounts = () => ({
+	authorityIntro: 0,
+	intro: 0,
+	pageContinuation: 0,
+	previousChar: 0,
+	previousWord: 0,
+	qualifierTail: 0,
+	stopLemma: 0,
+	structuralLeak: 0
+});
+const createInitialFamilyCounts = () => ({
+	codeLine: {
+		accepted: 0,
+		rejected: 0
+	},
+	heading: {
+		accepted: 0,
+		rejected: 0
+	},
+	inlineSubentry: {
+		accepted: 0,
+		rejected: 0
+	},
+	lineEntry: {
+		accepted: 0,
+		rejected: 0
+	},
+	pairedForms: {
+		accepted: 0,
+		rejected: 0
+	}
+});
+const countLemma = (map, lemma) => {
+	if (!lemma) return;
+	map.set(lemma, (map.get(lemma) ?? 0) + 1);
 };
 const pushDiagnosticSample = (samples, sampleLimit, sample) => {
 	if (samples.length < sampleLimit) samples.push(sample);
 };
 /**
-* Collects dictionary-profile split points using the pages-only markdown surface.
+* Builds a minimal `PageMap` from a pages array for use inside
+* `diagnoseDictionaryProfile`, which does not receive one from the segmenter.
 */
-const collectDictionarySplitPoints = (pages, profile, pageMap, normalizedPages, logger, debugMetaKey) => {
-	const normalizedProfile = normalizeDictionaryProfile(profile);
-	const pageContexts = createPageContexts(pages, pageMap, normalizedPages);
-	const activationMap = createZoneActivationMap(normalizedProfile, pageContexts);
-	const splitPoints = [];
-	logger?.debug?.("[dictionary] collecting split points", {
-		pageCount: pages.length,
-		zoneCount: normalizedProfile.zones.length
-	});
-	for (const pageContext of pageContexts) {
-		const zone = resolveActiveZone(normalizedProfile, activationMap, pageContext.page.id);
-		if (!zone) continue;
-		for (let lineIndex = 0; lineIndex < pageContext.lines.length; lineIndex++) {
-			const line = pageContext.lines[lineIndex];
-			const nextLine = pageContext.lines[lineIndex + 1];
-			const candidates = collectCandidatesForLine(pageContext.boundary.start, line, nextLine, zone);
-			for (const candidate of candidates) {
-				if (shouldRejectCandidate(candidate, zone, pageContext, pageContexts)) continue;
-				splitPoints.push(candidateToSplitPoint(candidate, debugMetaKey));
+const buildDiagnosticsPageMap = (pages, normalizedContents) => {
+	const boundaries = [];
+	const pageBreaks = [];
+	let offset = 0;
+	for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
+		const normalized = normalizedContents[pageIndex];
+		boundaries.push({
+			end: offset + normalized.length,
+			id: pages[pageIndex].id,
+			start: offset
+		});
+		if (pageIndex < pages.length - 1) {
+			pageBreaks.push(offset + normalized.length);
+			offset += normalized.length + 1;
+		} else offset += normalized.length;
+	}
+	const findBoundary = (off) => {
+		let lo = 0;
+		let hi = boundaries.length - 1;
+		while (lo <= hi) {
+			const mid = lo + hi >>> 1;
+			const boundary = boundaries[mid];
+			if (off < boundary.start) {
+				hi = mid - 1;
+				continue;
 			}
+			if (off > boundary.end) {
+				lo = mid + 1;
+				continue;
+			}
+			return boundary;
 		}
-	}
-	logger?.debug?.("[dictionary] collected split points", { splitPointCount: splitPoints.length });
-	return splitPoints;
+		return boundaries.at(-1);
+	};
+	return {
+		boundaries,
+		getId: (off) => findBoundary(off)?.id ?? 0,
+		pageBreaks,
+		pageIds: pages.map((page) => page.id)
+	};
 };
 /**
-* Collects authoring diagnostics for a dictionary profile without creating segments.
+* Collects tuning-oriented diagnostics for a dictionary profile without creating
+* segments. This output is intended for profile authoring workflows rather than
+* long-term compatibility guarantees.
 *
 * This is useful when tuning blockers and family choices for a new dictionary.
 */
 const diagnoseDictionaryProfile = (pages, profile, options = {}) => {
 	const normalizedProfile = normalizeDictionaryProfile(profile);
-	const pageMap = {
-		boundaries: [],
-		getId: (offset) => {
-			for (const boundary of pageMap.boundaries) if (offset >= boundary.start && offset <= boundary.end) return boundary.id;
-			return pageMap.boundaries.at(-1)?.id ?? 0;
-		},
-		pageBreaks: [],
-		pageIds: pages.map((page) => page.id)
-	};
-	let offset = 0;
-	const pageContexts = createPageContexts(pages, pageMap, pages.map((page, pageIndex) => {
-		const normalized = normalizeLineEndings(page.content);
-		pageMap.boundaries.push({
-			end: offset + normalized.length,
-			id: page.id,
-			start: offset
-		});
-		if (pageIndex < pages.length - 1) {
-			pageMap.pageBreaks.push(offset + normalized.length);
-			offset += normalized.length + 1;
-		} else offset += normalized.length;
-		return normalized;
-	}));
+	const normalizedPages = pages.map((page) => normalizeLineEndings(page.content));
+	const pageContexts = createPageContexts(pages, buildDiagnosticsPageMap(pages, normalizedPages), normalizedPages);
 	const activationMap = createZoneActivationMap(normalizedProfile, pageContexts);
 	const sampleLimit = options.sampleLimit ?? 50;
 	const acceptedKinds = createInitialKindCounts();
-	const blockerHits = createInitialReasonCounts();
+	const rejectionReasons = createInitialReasonCounts();
 	const familyCounts = createInitialFamilyCounts();
 	const zoneCounts = {};
 	const rejectedLemmaCounts = /* @__PURE__ */ new Map();
@@ -2551,7 +3210,7 @@ const diagnoseDictionaryProfile = (pages, profile, options = {}) => {
 				};
 				if (rejection) {
 					rejectedCount += 1;
-					blockerHits[rejection.reason] += 1;
+					rejectionReasons[rejection.reason] += 1;
 					familyCounts[candidate.family].rejected += 1;
 					zoneCounts[zone.name].rejected += 1;
 					countLemma(rejectedLemmaCounts, candidate.lemma);
@@ -2580,186 +3239,59 @@ const diagnoseDictionaryProfile = (pages, profile, options = {}) => {
 	return {
 		acceptedCount,
 		acceptedKinds,
-		blockerHits,
 		familyCounts,
 		pageCount: pages.length,
 		rejectedCount,
 		rejectedLemmas,
+		rejectionReasons,
 		samples,
 		zoneCounts
 	};
 };
 //#endregion
-//#region src/optimization/optimize-rules.ts
-const MERGEABLE_KEYS = new Set([
-	"lineStartsWith",
-	"lineStartsAfter",
-	"lineEndsWith"
-]);
-/**
-* Get the pattern type key for a rule.
-*/
-const getPatternKey = (rule) => PATTERN_TYPE_KEYS.find((key) => key in rule) ?? "regex";
-const getPatternArray = (rule, key) => {
-	const value = rule[key];
-	return Array.isArray(value) ? value : [];
-};
-const getPatternString = (rule, key) => {
-	const value = rule[key];
-	return typeof value === "string" ? value : Array.isArray(value) ? value.join("\n") : value ? JSON.stringify(value) : "";
-};
-const normalizePatterns = (patterns) => [...new Set(patterns)].sort((a, b) => b.length - a.length || a.localeCompare(b));
-const getDictionaryEntrySpecificityScore = (rule) => {
-	if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return 0;
-	const { allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords } = rule.dictionaryEntry;
-	return minLetters * 20 + maxLetters + (allowCommaSeparated ? 0 : 120) + (allowParenthesized ? 0 : 60) + (allowWhitespaceBeforeColon ? 0 : 20) + (midLineSubentries ? 0 : 160) + Math.min(stopWords.length, 25);
-};
-const getSpecificityScore = (rule) => {
-	const key = getPatternKey(rule);
-	if (key === "dictionaryEntry") return getDictionaryEntrySpecificityScore(rule);
-	return MERGEABLE_KEYS.has(key) ? getPatternArray(rule, key).reduce((max, p) => Math.max(max, p.length), 0) : getPatternString(rule, key).length;
-};
-const createMergeKey = (rule) => {
-	const key = getPatternKey(rule);
-	const { [key]: _, ...rest } = rule;
-	return `${key}|${JSON.stringify(rest)}`;
-};
-const optimizeRules = (rules) => {
-	const output = [];
-	const indexByMergeKey = /* @__PURE__ */ new Map();
-	let mergedCount = 0;
-	for (const rule of rules) {
-		const key = getPatternKey(rule);
-		if (!MERGEABLE_KEYS.has(key)) {
-			output.push(rule);
-			continue;
-		}
-		const mergeKey = createMergeKey(rule);
-		const existingIndex = indexByMergeKey.get(mergeKey);
-		if (existingIndex === void 0) {
-			indexByMergeKey.set(mergeKey, output.length);
-			output.push({
-				...rule,
-				[key]: normalizePatterns(getPatternArray(rule, key))
-			});
-		} else {
-			const existing = output[existingIndex];
-			existing[key] = normalizePatterns([...getPatternArray(existing, key), ...getPatternArray(rule, key)]);
-			mergedCount++;
-		}
-	}
+//#region src/dictionary/runtime.ts
+const candidateToSplitPoint = (candidate, debugMetaKey) => {
+	const baseMeta = candidate.lemma ? {
+		kind: candidate.kind,
+		lemma: candidate.lemma
+	} : { kind: candidate.kind };
+	const meta = debugMetaKey === void 0 ? baseMeta : mergeDebugIntoMeta(baseMeta, debugMetaKey, { dictionary: {
+		family: candidate.family,
+		...candidate.headingClass ? { headingClass: candidate.headingClass } : {}
+	} });
 	return {
-		mergedCount,
-		rules: output.sort((a, b) => getSpecificityScore(b) - getSpecificityScore(a))
-	};
-};
-//#endregion
-//#region src/preprocessing/transforms.ts
-/** Helper for exhaustive switch checking - TypeScript will error if a case is missed */
-const assertNever = (x) => {
-	throw new Error(`Unknown preprocess transform type: ${JSON.stringify(x)}`);
-};
-/** Check if a character is whitespace (space, newline, tab, etc.) */
-const isWhitespace = (char) => /\s/.test(char);
-/**
-* Check if a character code is a zero-width control character.
-*
-* Covers:
-* - U+200B–U+200F (Zero Width Space, Joiners, Direction Marks)
-* - U+202A–U+202E (Bidirectional Formatting)
-* - U+2060–U+2064 (Word Joiner, Invisible Operators)
-* - U+FEFF (Byte Order Mark / Zero Width No-Break Space)
-*/
-const isZeroWidth = (code) => code >= 8203 && code <= 8207 || code >= 8234 && code <= 8238 || code >= 8288 && code <= 8292 || code === 65279;
-/**
-* Remove zero-width control characters from text.
-*
-* @param text - Input text
-* @param mode - 'strip' (default) removes entirely, 'space' replaces with space
-* @returns Text with zero-width characters removed or replaced
-*/
-const removeZeroWidth = (text, mode = "strip") => {
-	if (mode === "space") {
-		const parts = [];
-		let lastWasWhitespace = true;
-		for (let i = 0; i < text.length; i++) if (isZeroWidth(text.charCodeAt(i))) {
-			if (!lastWasWhitespace && parts.length > 0) {
-				parts.push(" ");
-				lastWasWhitespace = true;
-			}
-		} else {
-			const char = text[i];
-			parts.push(char);
-			lastWasWhitespace = isWhitespace(char);
-		}
-		return parts.join("");
-	}
-	return text.replace(/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/g, "");
-};
-/**
-* Condense multiple periods (...) into ellipsis character (…).
-*
-* Prevents `{{tarqim}}` from false-matching inside ellipsis since
-* the `.` in tarqim matches individual periods.
-*
-* @param text - Input text
-* @returns Text with period sequences replaced by ellipsis
-*/
-const condenseEllipsis = (text) => text.replace(/\.{2,}/g, "…");
-/**
-* Join trailing و (waw) to the next word.
-*
-* Fixes OCR/digitization artifacts: ' و ' → ' و' (waw joined to next word)
-*
-* @param text - Input text
-* @returns Text with trailing waw joined to following word
-*/
-const fixTrailingWaw = (text) => text.replace(/ و /g, " و");
-/**
-* Check if a page ID is within a constraint range.
-*/
-const isInRange = (pageId, constraint) => {
-	if (constraint.min !== void 0 && pageId < constraint.min) return false;
-	if (constraint.max !== void 0 && pageId > constraint.max) return false;
-	return true;
-};
-/**
-* Normalize a transform to its object form.
-*/
-const normalizeTransform = (transform) => {
-	if (typeof transform === "string") return { type: transform };
-	return transform;
+		contentStartOffset: candidate.contentStartOffset,
+		index: candidate.absoluteIndex,
+		meta
+	};
 };
 /**
-* Apply preprocessing transforms to a page's content.
-*
-* Transforms run in array order. Each can be limited to specific pages
-* via `min`/`max` constraints.
-*
-* @param content - Page content to transform
-* @param pageId - Page ID for constraint checking
-* @param transforms - Array of transforms to apply
-* @returns Transformed content
+* Collects dictionary-profile split points using the pages-only markdown surface.
 */
-const applyPreprocessToPage = (content, pageId, transforms) => {
-	let result = content;
-	for (const transform of transforms) {
-		const rule = normalizeTransform(transform);
-		if (!isInRange(pageId, rule)) continue;
-		switch (rule.type) {
-			case "removeZeroWidth":
-				result = removeZeroWidth(result, rule.mode ?? "strip");
-				break;
-			case "condenseEllipsis":
-				result = condenseEllipsis(result);
-				break;
-			case "fixTrailingWaw":
-				result = fixTrailingWaw(result);
-				break;
-			default: assertNever(rule.type);
+const collectDictionarySplitPoints = (pages, profile, pageMap, normalizedPages, logger, debugMetaKey) => {
+	const normalizedProfile = normalizeDictionaryProfile(profile);
+	const pageContexts = createPageContexts(pages, pageMap, normalizedPages);
+	const activationMap = createZoneActivationMap(normalizedProfile, pageContexts);
+	const splitPoints = [];
+	logger?.debug?.("[dictionary] collecting split points", {
+		pageCount: pages.length,
+		zoneCount: normalizedProfile.zones.length
+	});
+	for (const pageContext of pageContexts) {
+		const zone = resolveActiveZone(normalizedProfile, activationMap, pageContext.page.id);
+		if (!zone) continue;
+		for (let lineIndex = 0; lineIndex < pageContext.lines.length; lineIndex++) {
+			const line = pageContext.lines[lineIndex];
+			const nextLine = pageContext.lines[lineIndex + 1];
+			const candidates = collectCandidatesForLine(pageContext.boundary.start, line, nextLine, zone);
+			for (const candidate of candidates) {
+				if (shouldRejectCandidate(candidate, zone, pageContext, pageContexts)) continue;
+				splitPoints.push(candidateToSplitPoint(candidate, debugMetaKey));
+			}
 		}
 	}
-	return result;
+	logger?.debug?.("[dictionary] collected split points", { splitPointCount: splitPoints.length });
+	return splitPoints;
 };
 const WINDOW_PREFIX_LENGTHS = [
 	80,
@@ -3656,219 +4188,16 @@ const findBreakPosition = (remainingContent, currentFromIdx, toIdx, windowEndIdx
 *
 * @param content The text content
 * @param targetPosition The desired split position (hard limit)
-* @param lookbackChars How far back to search for a safe break
-* @returns The new split position (index), or -1 if no safe break found
-*/
-const findSafeBreakPosition = (content, targetPosition, lookbackChars = 100) => {
-	const startSearch = Math.max(0, targetPosition - lookbackChars);
-	for (let i = targetPosition - 1; i >= startSearch; i--) {
-		const char = content[i];
-		if (STOP_CHARACTERS.test(char)) return i + 1;
-	}
-	return -1;
-};
-//#endregion
-//#region src/segmentation/pattern-validator.ts
-const KNOWN_TOKENS = new Set(getAvailableTokens());
-const TOKEN_INSIDE_BRACES = /\{\{(\w+)(?::\w+)?\}\}/g;
-const BARE_TOKEN_REGEX = (() => {
-	const tokens = [...KNOWN_TOKENS].sort((a, b) => b.length - a.length);
-	return new RegExp(`(?<!\\{\\{)(${tokens.join("|")})(?::\\w+)?(?!\\}\\})`, "g");
-})();
-const createMalformedTokenIssue = (tokenLiteral, side) => {
-	const token = tokenLiteral.split(":", 1)[0] || void 0;
-	return {
-		message: `Token "${tokenLiteral || "unknown"}" appears to be missing ${side} braces.`,
-		suggestion: tokenLiteral ? `{{${tokenLiteral}}}` : void 0,
-		token,
-		type: "missing_braces"
-	};
-};
-const detectMalformedLeftToken = (pattern) => {
-	for (let index = 0; index < pattern.length - 1; index++) {
-		if (pattern.slice(index, index + 2) !== "{{") continue;
-		const closeIndex = pattern.indexOf("}}", index + 2);
-		if (closeIndex === -1) return createMalformedTokenIssue(pattern.slice(index + 2).match(/^\w+(?::\w+)?/u)?.[0] ?? "", "closing");
-		index = closeIndex + 1;
-	}
-};
-const detectMalformedRightToken = (pattern) => {
-	for (let index = 0; index < pattern.length - 1; index++) {
-		if (pattern.slice(index, index + 2) !== "}}") continue;
-		if (pattern.lastIndexOf("{{", index) === -1) return createMalformedTokenIssue(pattern.slice(0, index).match(/(\w+(?::\w+)?)$/u)?.[1] ?? "", "opening");
-	}
-};
-const detectMalformedToken = (pattern) => detectMalformedLeftToken(pattern) ?? detectMalformedRightToken(pattern);
-/**
-* Validates a single pattern for common issues.
-*/
-const validatePattern = (pattern, seenPatterns) => {
-	if (!pattern.trim()) return {
-		message: "Empty pattern is not allowed",
-		type: "empty_pattern"
-	};
-	if (seenPatterns.has(pattern)) return {
-		message: `Duplicate pattern: "${pattern}"`,
-		pattern,
-		type: "duplicate"
-	};
-	seenPatterns.add(pattern);
-	TOKEN_INSIDE_BRACES.lastIndex = 0;
-	for (const match of pattern.matchAll(TOKEN_INSIDE_BRACES)) {
-		const name = match[1];
-		if (name && !KNOWN_TOKENS.has(name)) return {
-			message: `Unknown token: {{${name}}}. Available tokens: ${[...KNOWN_TOKENS].slice(0, 5).join(", ")}...`,
-			suggestion: "Check spelling or use a known token",
-			token: name,
-			type: "unknown_token"
-		};
-	}
-	const malformed = detectMalformedToken(pattern);
-	if (malformed) return malformed;
-	for (const match of pattern.matchAll(BARE_TOKEN_REGEX)) {
-		const [full, name] = match;
-		const idx = match.index;
-		if (pattern.slice(Math.max(0, idx - 2), idx) !== "{{" || pattern.slice(idx + full.length, idx + full.length + 2) !== "}}") return {
-			message: `Token "${name}" appears to be missing {{}}. Did you mean "{{${full}}}"?`,
-			suggestion: `{{${full}}}`,
-			token: name,
-			type: "missing_braces"
-		};
-	}
-};
-/**
-* Validates an array of patterns, returning parallel array of issues.
-*/
-const validatePatternArray = (patterns) => {
-	const seen = /* @__PURE__ */ new Set();
-	const issues = patterns.map((p) => validatePattern(p, seen));
-	return issues.some(Boolean) ? issues : void 0;
-};
-const applyRulePatternValidation = (result, key, patterns) => {
-	if (!patterns) return false;
-	const issues = validatePatternArray(patterns);
-	if (!issues) return false;
-	result[key] = issues;
-	return true;
-};
-const validateTemplateRule = (rule, result) => {
-	if (!("template" in rule)) return false;
-	const issue = validatePattern(rule.template, /* @__PURE__ */ new Set());
-	if (!issue) return false;
-	result.template = issue;
-	return true;
-};
-const validateRegexRule = (rule, result) => {
-	if (!("regex" in rule)) return false;
-	if (!rule.regex.trim()) {
-		result.regex = {
-			message: "Empty pattern is not allowed",
-			type: "empty_pattern"
-		};
-		return true;
-	}
-	try {
-		new RegExp(rule.regex, "u");
-		return false;
-	} catch (error) {
-		result.regex = {
-			message: error instanceof Error ? error.message : String(error),
-			pattern: rule.regex,
-			type: "invalid_regex"
-		};
-		return true;
-	}
-};
-const invalidDictionaryEntryIssue = (message) => ({
-	message,
-	type: "invalid_option"
-});
-const addBooleanDictionaryEntryIssue = (issues, key, value) => {
-	if (value !== void 0 && typeof value !== "boolean") issues[key] = invalidDictionaryEntryIssue(`${key} must be a boolean`);
-};
-const addCaptureNameIssue = (issues, captureName) => {
-	if (captureName !== void 0 && !/^[A-Za-z_]\w*$/.test(captureName)) issues.captureName = invalidDictionaryEntryIssue(`captureName must match /^[A-Za-z_]\\w*$/, got "${captureName}"`);
-};
-const addMinLettersIssue = (issues, minLetters) => {
-	if (minLetters !== void 0 && (!Number.isInteger(minLetters) || minLetters < 1)) issues.minLetters = invalidDictionaryEntryIssue("minLetters must be an integer >= 1");
-};
-const addMaxLettersIssue = (issues, maxLetters, minLetters) => {
-	const min = minLetters ?? 2;
-	if (maxLetters !== void 0 && (!Number.isInteger(maxLetters) || maxLetters < min)) issues.maxLetters = invalidDictionaryEntryIssue(`maxLetters must be an integer >= ${min}`);
-};
-const validateDictionaryEntryRule = (rule, result) => {
-	if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return false;
-	const issues = {};
-	const { allowCommaSeparated, allowParenthesized, allowWhitespaceBeforeColon, captureName, maxLetters, midLineSubentries, minLetters, stopWords } = rule.dictionaryEntry;
-	if (!Array.isArray(stopWords) || stopWords.some((word) => typeof word !== "string" || !word.trim())) issues.stopWords = invalidDictionaryEntryIssue("stopWords must be a string[] with non-empty entries");
-	addBooleanDictionaryEntryIssue(issues, "allowCommaSeparated", allowCommaSeparated);
-	addBooleanDictionaryEntryIssue(issues, "allowParenthesized", allowParenthesized);
-	addBooleanDictionaryEntryIssue(issues, "allowWhitespaceBeforeColon", allowWhitespaceBeforeColon);
-	addBooleanDictionaryEntryIssue(issues, "midLineSubentries", midLineSubentries);
-	addCaptureNameIssue(issues, captureName);
-	addMinLettersIssue(issues, minLetters);
-	addMaxLettersIssue(issues, maxLetters, minLetters);
-	if (Object.keys(issues).length === 0) return false;
-	result.dictionaryEntry = issues;
-	return true;
-};
-const formatValidationIssue = (_type, issue, loc) => {
-	if (!issue) return null;
-	if (issue.type === "missing_braces") return `${loc}: Missing {{}} around token "${issue.token}"`;
-	if (issue.type === "unknown_token") return `${loc}: Unknown token "{{${issue.token}}}"`;
-	if (issue.type === "duplicate") return `${loc}: Duplicate pattern "${issue.pattern}"`;
-	if (issue.type === "invalid_regex") return `${loc}: Invalid regex (${issue.message})`;
-	return `${loc}: ${issue.message || issue.type}`;
-};
-/**
-* Validates split rules for common pattern issues.
-*
-* Checks for:
-* - Missing `{{}}` around known token names (e.g., `raqms:num` instead of `{{raqms:num}}`)
-* - Unknown token names inside `{{}}` (e.g., `{{nonexistent}}`)
-* - Duplicate patterns within the same rule
-*
-* @param rules - Array of split rules to validate
-* @returns Array parallel to input with validation results (undefined if no issues)
-*
-* @example
-* const issues = validateRules([
-*   { lineStartsAfter: ['raqms:num'] },  // Missing braces
-*   { lineStartsWith: ['{{unknown}}'] }, // Unknown token
-* ]);
-* // issues[0]?.lineStartsAfter?.[0]?.type === 'missing_braces'
-* // issues[1]?.lineStartsWith?.[0]?.type === 'unknown_token'
-*/
-const validateRules = (rules) => rules.map((rule) => {
-	const result = {};
-	const startsWithIssues = applyRulePatternValidation(result, "lineStartsWith", "lineStartsWith" in rule ? rule.lineStartsWith : void 0);
-	const startsAfterIssues = applyRulePatternValidation(result, "lineStartsAfter", "lineStartsAfter" in rule ? rule.lineStartsAfter : void 0);
-	const endsWithIssues = applyRulePatternValidation(result, "lineEndsWith", "lineEndsWith" in rule ? rule.lineEndsWith : void 0);
-	const templateIssues = validateTemplateRule(rule, result);
-	const regexIssues = validateRegexRule(rule, result);
-	const dictionaryEntryIssues = validateDictionaryEntryRule(rule, result);
-	return startsWithIssues || startsAfterIssues || endsWithIssues || templateIssues || regexIssues || dictionaryEntryIssues ? result : void 0;
-});
-/**
-* Formats a validation result array into a list of human-readable error messages.
-*
-* Useful for displaying validation errors in UIs.
-*
-* @param results - The result array from `validateRules()`
-* @returns Array of formatted error strings
-*
-* @example
-* const issues = validateRules(rules);
-* const errors = formatValidationReport(issues);
-* // ["Rule 1, lineStartsWith: Missing {{}} around token..."]
+* @param lookbackChars How far back to search for a safe break
+* @returns The new split position (index), or -1 if no safe break found
 */
-const formatValidationReport = (results) => results.flatMap((result, i) => {
-	if (!result) return [];
-	return Object.entries(result).flatMap(([type, issues]) => formatValidationIssues(type, issues, i + 1));
-});
-const formatValidationIssues = (type, issues, ruleNumber) => {
-	if (type === "dictionaryEntry" && issues && typeof issues === "object" && !Array.isArray(issues)) return Object.entries(issues).map(([field, issue]) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}.${field}`)).filter((msg) => msg !== null);
-	return (Array.isArray(issues) ? issues : [issues]).map((issue) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}`)).filter((msg) => msg !== null);
+const findSafeBreakPosition = (content, targetPosition, lookbackChars = 100) => {
+	const startSearch = Math.max(0, targetPosition - lookbackChars);
+	for (let i = targetPosition - 1; i >= startSearch; i--) {
+		const char = content[i];
+		if (STOP_CHARACTERS.test(char)) return i + 1;
+	}
+	return -1;
 };
 //#endregion
 //#region src/segmentation/breakpoint-processor.ts
@@ -4130,7 +4459,6 @@ const computeIterationWindow = (fullContent, cursorPos, currentFromIdx, fromIdx,
 	const sliceEnd = Math.max(cursorPos + 1, Math.min(sliceEndByPages, sliceEndByLength));
 	return {
 		remainingContent: fullContent.slice(cursorPos, sliceEnd),
-		sliceEnd,
 		windowEndIdx
 	};
 };
@@ -4161,31 +4489,87 @@ const updateLastBreakpointFromFound = (found, lastBreakpoint) => {
 	};
 	return lastBreakpoint;
 };
-const appendPieceAndAdvance = (fullContent, cursorPos, breakPos, pieceContent, currentFromIdx, fromIdx, toIdx, pageIds, boundaryPositions, normalizedPages, maxPages, isFirstPiece, debugMetaKey, originalMeta, lastBreakpoint, result, logger, contentLengthSplit) => {
-	let { actualEndIdx, actualStartIdx } = computePiecePages(cursorPos, breakPos, boundaryPositions, fromIdx, toIdx);
-	if (actualStartIdx < currentFromIdx) {
-		logger?.warn?.("[breakpoints] Page attribution drift detected; clamping actualStartIdx", {
+const buildIterativeContext = (segment, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, expandedBreakpoints, maxPages, prefer, logger, debugMetaKey, maxContentLength) => {
+	const fullContent = segment.content;
+	const boundaryPositions = buildBoundaryPositions(fullContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger);
+	logger?.debug?.("[breakpoints] boundaryPositions built", {
+		boundaryPositions,
+		fromIdx,
+		fullContentLength: fullContent.length,
+		toIdx
+	});
+	return {
+		boundaryPositions,
+		cumulativeOffsets,
+		debugMetaKey,
+		expandedBreakpoints,
+		fromIdx,
+		fullContent,
+		logger,
+		maxContentLength,
+		maxPages,
+		normalizedPages,
+		pageIds,
+		prefer,
+		segment,
+		toIdx
+	};
+};
+const createInitialIterativeState = (fromIdx) => ({
+	currentFromIdx: fromIdx,
+	cursorPos: 0,
+	isFirstPiece: true,
+	lastBreakpoint: null
+});
+const hasIterationWorkRemaining = (state, context) => state.cursorPos < context.fullContent.length && state.currentFromIdx <= context.toIdx;
+const prepareIteration = (context, state) => {
+	if (!hasIterationWorkRemaining(state, context)) return null;
+	const { remainingContent, windowEndIdx } = computeIterationWindow(context.fullContent, state.cursorPos, state.currentFromIdx, context.fromIdx, context.toIdx, context.pageIds, context.boundaryPositions, context.maxPages, context.maxContentLength);
+	if (!remainingContent.trim()) return null;
+	const actualRemainingContent = context.fullContent.slice(state.cursorPos);
+	const actualEndPos = Math.max(state.cursorPos, context.fullContent.length - 1);
+	return {
+		actualRemainingContent,
+		actualRemainingEndIdx: Math.min(findPageIndexForPosition(actualEndPos, context.boundaryPositions, context.fromIdx), context.toIdx),
+		remainingContent,
+		windowEndIdx,
+		windowEndPosition: computeWindowEndPositionForIteration(remainingContent, state.cursorPos, state.currentFromIdx, context.fromIdx, windowEndIdx, context.toIdx, context.pageIds, context.boundaryPositions, context.normalizedPages, context.cumulativeOffsets, context.maxPages, context.maxContentLength, context.logger)
+	};
+};
+const buildPageBoundaryBreakpoint = (context, state) => {
+	const pageBoundaryIdx = context.expandedBreakpoints.findIndex((bp) => bp.regex === null);
+	return pageBoundaryIdx >= 0 ? {
+		breakpointIndex: pageBoundaryIdx,
+		rule: { pattern: "" }
+	} : state.lastBreakpoint;
+};
+const appendPieceAndAdvance = (context, state, breakPos, pieceContent, result, contentLengthSplit) => {
+	let { actualEndIdx, actualStartIdx } = computePiecePages(state.cursorPos, breakPos, context.boundaryPositions, context.fromIdx, context.toIdx);
+	if (actualStartIdx < state.currentFromIdx) {
+		context.logger?.warn?.("[breakpoints] Page attribution drift detected; clamping actualStartIdx", {
 			actualStartIdx,
-			currentFromIdx
+			currentFromIdx: state.currentFromIdx
 		});
-		actualStartIdx = currentFromIdx;
+		actualStartIdx = state.currentFromIdx;
 	}
-	if (maxPages === 0) {
-		actualEndIdx = Math.min(actualEndIdx, currentFromIdx);
-		actualStartIdx = Math.min(actualStartIdx, currentFromIdx);
-	} else if (maxPages > 0) {
-		const maxAllowedEndIdx = computeWindowEndIdx(actualStartIdx, toIdx, pageIds, maxPages);
+	if (context.maxPages === 0) {
+		actualEndIdx = Math.min(actualEndIdx, state.currentFromIdx);
+		actualStartIdx = Math.min(actualStartIdx, state.currentFromIdx);
+	} else if (context.maxPages > 0) {
+		const maxAllowedEndIdx = computeWindowEndIdx(actualStartIdx, context.toIdx, context.pageIds, context.maxPages);
 		actualEndIdx = Math.min(actualEndIdx, maxAllowedEndIdx);
 	}
-	const meta = getSegmentMetaWithDebug(isFirstPiece, debugMetaKey, originalMeta, lastBreakpoint, contentLengthSplit);
-	const pieceSeg = createPieceSegment(pieceContent, actualStartIdx, actualEndIdx, pageIds, meta, true);
+	const meta = getSegmentMetaWithDebug(state.isFirstPiece, context.debugMetaKey, context.segment.meta, state.lastBreakpoint, contentLengthSplit);
+	const pieceSeg = createPieceSegment(pieceContent, actualStartIdx, actualEndIdx, context.pageIds, meta, true);
 	if (pieceSeg) result.push(pieceSeg);
-	const next = advanceCursorAndIndex(fullContent, breakPos, actualEndIdx, toIdx, pageIds, normalizedPages);
+	const next = advanceCursorAndIndex(context.fullContent, breakPos, actualEndIdx, context.toIdx, context.pageIds, context.normalizedPages);
 	let nextFromIdx = next.currentFromIdx;
-	if (maxPages === 0) nextFromIdx = findPageIndexForPosition(next.cursorPos, boundaryPositions, fromIdx);
+	if (context.maxPages === 0) nextFromIdx = findPageIndexForPosition(next.cursorPos, context.boundaryPositions, context.fromIdx);
 	return {
+		...state,
 		currentFromIdx: nextFromIdx,
-		cursorPos: next.cursorPos
+		cursorPos: next.cursorPos,
+		isFirstPiece: false
 	};
 };
 const tryProcessOversizedSegmentFastPath = (segment, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, expandedBreakpoints, maxPages, logger, debugMetaKey, maxContentLength) => {
@@ -4201,109 +4585,84 @@ const tryProcessOversizedSegmentFastPath = (segment, fromIdx, toIdx, pageIds, no
 * For maxPages=0 with maxContentLength: if current page's remaining content fits,
 * create a segment and advance to next page without applying breakpoints.
 */
-const tryHandleCurrentPageFit = (fullContent, cursorPos, currentFromIdx, fromIdx, actualRemainingEndIdx, boundaryPositions, pageIds, expandedBreakpoints, maxPages, maxContentLength, isFirstPiece, debugMetaKey, segmentMeta, lastBreakpoint, result) => {
-	if (maxPages !== 0 || !maxContentLength || currentFromIdx >= actualRemainingEndIdx) return { handled: false };
-	const currentPageEndPos = boundaryPositions[currentFromIdx - fromIdx + 1] ?? fullContent.length;
-	const currentPageRemainingContent = fullContent.slice(cursorPos, currentPageEndPos).trim();
-	if (!currentPageRemainingContent) return { handled: false };
-	const currentPageFitsInLength = currentPageRemainingContent.length <= maxContentLength;
-	const currentPageHasExclusions = hasAnyExclusionsInRange(expandedBreakpoints, pageIds, currentFromIdx, currentFromIdx);
-	if (!currentPageFitsInLength || currentPageHasExclusions) return { handled: false };
-	const pageBoundaryIdx = expandedBreakpoints.findIndex((bp) => bp.regex === null);
-	const pageBoundaryBreakpoint = pageBoundaryIdx >= 0 ? {
-		breakpointIndex: pageBoundaryIdx,
-		rule: { pattern: "" }
-	} : lastBreakpoint;
-	const includeMeta = isFirstPiece || Boolean(debugMetaKey);
-	const meta = getSegmentMetaWithDebug(isFirstPiece, debugMetaKey, segmentMeta, pageBoundaryBreakpoint);
-	const seg = createSegment(currentPageRemainingContent, pageIds[currentFromIdx], void 0, includeMeta ? meta : void 0);
+const tryHandleCurrentPageFit = (context, state, actualRemainingEndIdx, result) => {
+	if (context.maxPages !== 0 || !context.maxContentLength || state.currentFromIdx >= actualRemainingEndIdx) return null;
+	const boundaryIdx = state.currentFromIdx - context.fromIdx + 1;
+	const currentPageEndPos = context.boundaryPositions[boundaryIdx] ?? context.fullContent.length;
+	const currentPageRemainingContent = context.fullContent.slice(state.cursorPos, currentPageEndPos).trim();
+	if (!currentPageRemainingContent) return null;
+	const currentPageFitsInLength = currentPageRemainingContent.length <= context.maxContentLength;
+	const currentPageHasExclusions = hasAnyExclusionsInRange(context.expandedBreakpoints, context.pageIds, state.currentFromIdx, state.currentFromIdx);
+	if (!currentPageFitsInLength || currentPageHasExclusions) return null;
+	const pageBoundaryBreakpoint = buildPageBoundaryBreakpoint(context, state);
+	const includeMeta = state.isFirstPiece || Boolean(context.debugMetaKey);
+	const meta = getSegmentMetaWithDebug(state.isFirstPiece, context.debugMetaKey, context.segment.meta, pageBoundaryBreakpoint);
+	const seg = createSegment(currentPageRemainingContent, context.pageIds[state.currentFromIdx], void 0, includeMeta ? meta : void 0);
 	if (seg) result.push(seg);
-	let newCursorPos = currentPageEndPos;
-	while (newCursorPos < fullContent.length && /\s/.test(fullContent[newCursorPos])) newCursorPos++;
 	return {
-		handled: true,
-		newCursorPos,
-		newFromIdx: currentFromIdx + 1,
-		newLastBreakpoint: pageBoundaryBreakpoint
+		...state,
+		currentFromIdx: state.currentFromIdx + 1,
+		cursorPos: skipWhitespace(context.fullContent, currentPageEndPos),
+		isFirstPiece: false,
+		lastBreakpoint: pageBoundaryBreakpoint
+	};
+};
+const tryFinalizeIteration = (context, state, prepared, result) => handleOversizedSegmentFit(prepared.actualRemainingContent, state.currentFromIdx, prepared.actualRemainingEndIdx, context.pageIds, context.expandedBreakpoints, context.maxPages, context.maxContentLength, state.isFirstPiece, context.debugMetaKey, context.segment.meta, state.lastBreakpoint, result);
+const applyBreakpointToIteration = (context, state, prepared, iteration, result) => {
+	context.logger?.trace?.(`[breakpoints] iteration=${iteration}`, {
+		currentFromIdx: state.currentFromIdx,
+		cursorPos: state.cursorPos,
+		windowEndIdx: prepared.windowEndIdx,
+		windowEndPosition: prepared.windowEndPosition
+	});
+	const found = findBreakOffsetForWindow(prepared.remainingContent, state.currentFromIdx, prepared.windowEndIdx, context.toIdx, prepared.windowEndPosition, context.pageIds, context.expandedBreakpoints, context.cumulativeOffsets, context.normalizedPages, context.prefer, context.maxContentLength);
+	const breakOffset = ensureProgressingBreakOffset(found.breakOffset, prepared.remainingContent, state.cursorPos, context.maxContentLength, context.logger);
+	const nextState = {
+		...state,
+		lastBreakpoint: updateLastBreakpointFromFound(found, state.lastBreakpoint)
+	};
+	const breakPos = state.cursorPos + breakOffset;
+	const pieceContent = context.fullContent.slice(state.cursorPos, breakPos).trim();
+	if (!pieceContent) return {
+		...nextState,
+		cursorPos: breakPos,
+		isFirstPiece: false
 	};
+	return appendPieceAndAdvance(context, nextState, breakPos, pieceContent, result, found.contentLengthSplit);
 };
 const processOversizedSegmentIterative = (segment, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, expandedBreakpoints, maxPages, prefer, logger, debugMetaKey, maxContentLength) => {
 	const result = [];
-	const fullContent = segment.content;
 	const pageCount = toIdx - fromIdx + 1;
 	logger?.debug?.("[breakpoints] processOversizedSegment: Using iterative path", {
-		contentLength: fullContent.length,
+		contentLength: segment.content.length,
 		fromIdx,
 		maxContentLength,
 		maxPages,
 		pageCount,
 		toIdx
 	});
-	let cursorPos = 0;
-	let currentFromIdx = fromIdx;
-	let isFirstPiece = true;
-	let lastBreakpoint = null;
-	const boundaryPositions = buildBoundaryPositions(fullContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger);
-	logger?.debug?.("[breakpoints] boundaryPositions built", {
-		boundaryPositions,
-		fromIdx,
-		fullContentLength: fullContent.length,
-		toIdx
-	});
-	const MAX_SAFE_ITERATIONS = 1e5;
-	let didHitMaxIterations = true;
-	for (let i = 1; i <= MAX_SAFE_ITERATIONS; i++) {
-		if (cursorPos >= fullContent.length || currentFromIdx > toIdx) {
-			didHitMaxIterations = false;
-			break;
-		}
-		const { remainingContent, windowEndIdx } = computeIterationWindow(fullContent, cursorPos, currentFromIdx, fromIdx, toIdx, pageIds, boundaryPositions, maxPages, maxContentLength);
-		if (!remainingContent.trim()) {
-			didHitMaxIterations = false;
-			break;
-		}
-		const actualRemainingContent = fullContent.slice(cursorPos);
-		const actualEndPos = Math.max(cursorPos, fullContent.length - 1);
-		const actualRemainingEndIdx = Math.min(findPageIndexForPosition(actualEndPos, boundaryPositions, fromIdx), toIdx);
-		const currentPageFit = tryHandleCurrentPageFit(fullContent, cursorPos, currentFromIdx, fromIdx, actualRemainingEndIdx, boundaryPositions, pageIds, expandedBreakpoints, maxPages, maxContentLength, isFirstPiece, debugMetaKey, segment.meta, lastBreakpoint, result);
-		if (currentPageFit.handled) {
-			cursorPos = currentPageFit.newCursorPos;
-			currentFromIdx = currentPageFit.newFromIdx;
-			lastBreakpoint = currentPageFit.newLastBreakpoint;
-			isFirstPiece = false;
+	const context = buildIterativeContext(segment, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, expandedBreakpoints, maxPages, prefer, logger, debugMetaKey, maxContentLength);
+	let state = createInitialIterativeState(fromIdx);
+	for (let iteration = 1;; iteration++) {
+		const prepared = prepareIteration(context, state);
+		if (!prepared) break;
+		const currentPageFitState = tryHandleCurrentPageFit(context, state, prepared.actualRemainingEndIdx, result);
+		if (currentPageFitState) {
+			state = currentPageFitState;
 			continue;
 		}
-		if (handleOversizedSegmentFit(actualRemainingContent, currentFromIdx, actualRemainingEndIdx, pageIds, expandedBreakpoints, maxPages, maxContentLength, isFirstPiece, debugMetaKey, segment.meta, lastBreakpoint, result)) {
-			didHitMaxIterations = false;
+		if (tryFinalizeIteration(context, state, prepared, result)) break;
+		const nextState = applyBreakpointToIteration(context, state, prepared, iteration, result);
+		if (nextState.cursorPos <= state.cursorPos) {
+			context.logger?.error?.("[breakpoints] Iterative splitting stalled; aborting to avoid an infinite loop", {
+				cursorPos: state.cursorPos,
+				iteration,
+				nextCursorPos: nextState.cursorPos
+			});
 			break;
 		}
-		const windowEndPosition = computeWindowEndPositionForIteration(remainingContent, cursorPos, currentFromIdx, fromIdx, windowEndIdx, toIdx, pageIds, boundaryPositions, normalizedPages, cumulativeOffsets, maxPages, maxContentLength, logger);
-		logger?.trace?.(`[breakpoints] iteration=${i}`, {
-			currentFromIdx,
-			cursorPos,
-			windowEndIdx,
-			windowEndPosition
-		});
-		const found = findBreakOffsetForWindow(remainingContent, currentFromIdx, windowEndIdx, toIdx, windowEndPosition, pageIds, expandedBreakpoints, cumulativeOffsets, normalizedPages, prefer, maxContentLength);
-		const breakOffset = ensureProgressingBreakOffset(found.breakOffset, remainingContent, cursorPos, maxContentLength, logger);
-		lastBreakpoint = updateLastBreakpointFromFound(found, lastBreakpoint);
-		const breakPos = cursorPos + breakOffset;
-		const pieceContent = fullContent.slice(cursorPos, breakPos).trim();
-		if (!pieceContent) {
-			cursorPos = breakPos;
-			isFirstPiece = false;
-			continue;
-		}
-		const next = appendPieceAndAdvance(fullContent, cursorPos, breakPos, pieceContent, currentFromIdx, fromIdx, toIdx, pageIds, boundaryPositions, normalizedPages, maxPages, isFirstPiece, debugMetaKey, segment.meta, lastBreakpoint, result, logger, found.contentLengthSplit);
-		cursorPos = next.cursorPos;
-		currentFromIdx = next.currentFromIdx;
-		isFirstPiece = false;
+		state = nextState;
 	}
-	if (didHitMaxIterations) logger?.error?.("[breakpoints] Stopped processing oversized segment: reached MAX_SAFE_ITERATIONS", {
-		cursorPos,
-		fullContentLength: fullContent.length,
-		iterations: MAX_SAFE_ITERATIONS
-	});
 	logger?.debug?.("[breakpoints] processOversizedSegment: Complete", { resultCount: result.length });
 	return result;
 };
@@ -4377,6 +4736,120 @@ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoi
 	return result;
 };
 //#endregion
+//#region src/dictionary/arabic-dictionary-rule.ts
+const uniqueCanonicalWords = (words) => {
+	const seen = /* @__PURE__ */ new Set();
+	const result = [];
+	for (const word of words) {
+		const normalized = normalizeArabicForComparison(word);
+		if (!normalized || seen.has(normalized)) continue;
+		seen.add(normalized);
+		result.push(word);
+	}
+	return result;
+};
+const buildStopAlternation = (stopWords) => {
+	const unique = uniqueCanonicalWords(stopWords);
+	if (unique.length === 0) return "";
+	return unique.map((word) => makeDiacriticInsensitive(normalizeArabicForComparison(word))).join("|");
+};
+const buildHeadwordBody = ({ allowCommaSeparated, colonPattern, stopAlternation, stopwordBody, unit }) => {
+	if (!stopAlternation) return allowCommaSeparated ? `${unit}(?:\\s*[،,]\\s*${unit})*` : unit;
+	const guardedUnit = `(?!(?:${stopwordBody})${allowCommaSeparated ? `(?:\\s*[،,]\\s*|${colonPattern})` : colonPattern})${unit}`;
+	return allowCommaSeparated ? `${guardedUnit}(?:\\s*[،,]\\s*${guardedUnit})*` : guardedUnit;
+};
+const buildBalancedMarker = ({ allowParenthesized, allowWhitespaceBeforeColon, captureName, headwordBody }) => {
+	const colon = allowWhitespaceBeforeColon ? "\\s*:" : ":";
+	const withCapture = `(?<${captureName}>${headwordBody})`;
+	if (!allowParenthesized) return `${withCapture}${colon}`;
+	return `(?:\\(\\s*${withCapture}\\s*\\)|${withCapture})${colon}`;
+};
+const validateDictionaryEntryOptions = ({ captureName = "lemma", maxLetters = 10, minLetters = 2 }) => {
+	if (!Number.isInteger(minLetters) || minLetters < 1) throw new Error(`createArabicDictionaryEntryRule: minLetters must be an integer >= 1, got ${minLetters}`);
+	if (!Number.isInteger(maxLetters) || maxLetters < minLetters) throw new Error(`createArabicDictionaryEntryRule: maxLetters must be an integer >= minLetters, got ${maxLetters}`);
+	if (!/^[A-Za-z_]\w*$/.test(captureName)) throw new Error(`createArabicDictionaryEntryRule: invalid captureName "${captureName}"`);
+};
+const buildArabicDictionaryEntryRegexSource = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords }, capturePrefix) => {
+	validateDictionaryEntryOptions({
+		captureName,
+		maxLetters,
+		minLetters
+	});
+	const zeroWidthPrefix = "[\\u200E\\u200F\\u061C\\u200B\\u200C\\u200D\\uFEFF]*";
+	const wawWithMarks = `و${ARABIC_MARKS_CLASS}*`;
+	const alWithMarks = `ا${ARABIC_MARKS_CLASS}*ل${ARABIC_MARKS_CLASS}*`;
+	const lemmaUnit = `(?:${wawWithMarks})?(?:${alWithMarks})?${`${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}(?:${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}){${minLetters - 1},${maxLetters - 1}}`}`;
+	const stopAlternation = buildStopAlternation(stopWords);
+	const lemmaBody = buildHeadwordBody({
+		allowCommaSeparated,
+		colonPattern: allowWhitespaceBeforeColon ? "\\s*:" : ":",
+		stopAlternation,
+		stopwordBody: stopAlternation ? `(?:${wawWithMarks})?(?:${stopAlternation})` : "",
+		unit: lemmaUnit
+	});
+	const lineStartBoundary = `(?:(?<=^)|(?<=\\n))${zeroWidthPrefix}`;
+	const midLineTrigger = allowParenthesized ? `(?<=\\s)(?=(?:\\(\\s*)?${wawWithMarks}(?:${alWithMarks})?)` : `(?<=\\s)(?=${wawWithMarks}(?:${alWithMarks})?)`;
+	const prefixedCaptureName = capturePrefix ? `${capturePrefix}${captureName}` : captureName;
+	const regex = `(?:${lineStartBoundary}${midLineSubentries ? `|${midLineTrigger}` : ""})` + buildBalancedMarker({
+		allowParenthesized,
+		allowWhitespaceBeforeColon,
+		captureName: prefixedCaptureName,
+		headwordBody: lemmaBody
+	});
+	return {
+		captureNames: [prefixedCaptureName],
+		regex
+	};
+};
+/**
+* Creates a reusable split rule for Arabic dictionary entries.
+*
+* The returned rule preserves authoring intent as a serializable
+* `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
+* regex string.
+*
+* @example
+* createArabicDictionaryEntryRule({
+*   stopWords: ['وقيل', 'ويقال', 'قال'],
+*   pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
+* })
+*
+* @example
+* createArabicDictionaryEntryRule({
+*   allowParenthesized: true,
+*   allowWhitespaceBeforeColon: true,
+*   allowCommaSeparated: true,
+*   stopWords: ['الليث', 'العجاج'],
+* })
+*/
+/**
+* @deprecated Prefer the top-level `SegmentationOptions.dictionary` profile for
+* whole-book dictionary segmentation. Keep this helper for advanced single-rule
+* composition inside a broader `SplitRule[]` pipeline.
+*/
+const createArabicDictionaryEntryRule = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, meta, midLineSubentries = true, minLetters = 2, pageStartPrevWordStoplist, samePagePrevWordStoplist, stopWords }) => {
+	validateDictionaryEntryOptions({
+		captureName,
+		maxLetters,
+		minLetters
+	});
+	return {
+		dictionaryEntry: {
+			allowCommaSeparated,
+			allowParenthesized,
+			allowWhitespaceBeforeColon,
+			captureName,
+			maxLetters,
+			midLineSubentries,
+			minLetters,
+			stopWords: uniqueCanonicalWords(stopWords)
+		},
+		meta,
+		pageStartPrevWordStoplist,
+		samePagePrevWordStoplist
+	};
+};
+//#endregion
 //#region src/segmentation/rule-regex.ts
 /**
 * Checks if a regex pattern contains standard (anonymous) capturing groups.
@@ -5319,425 +5792,566 @@ const buildSegments = (splitPoints, content, pageMap, rules, pageJoiner, hasDict
 	const createSegmentsFromSplitPoints = () => {
 		const result = [];
 		for (let i = 0; i < splitPoints.length; i++) {
-			const sp = splitPoints[i];
-			const end = splitPoints[i + 1]?.index ?? content.length;
-			const s = createSegment(sp.index, end, sp.meta, sp.capturedContent, sp.namedCaptures, sp.contentStartOffset);
-			if (s) result.push(s);
-		}
-		return result;
-	};
-	const segments = [];
-	if (!splitPoints.length) {
-		const firstId = pageMap.getId(0);
-		if (hasDictionaryProfile || anyRuleAllowsId(rules, firstId)) {
-			const s = createSegment(0, content.length);
-			if (s) segments.push(s);
-		}
-		return segments;
-	}
-	if (splitPoints[0].index > 0) {
-		const firstId = pageMap.getId(0);
-		if (hasDictionaryProfile || anyRuleAllowsId(rules, firstId)) {
-			const s = createSegment(0, splitPoints[0].index);
-			if (s) segments.push(s);
-		}
-	}
-	return [...segments, ...createSegmentsFromSplitPoints()];
-};
-//#endregion
-//#region src/validation/validate-segments.ts
-/**
-* Creates a short preview string of text content for error reporting.
-* Truncates content exceeding PREVIEW_LIMIT.
-*/
-const buildPreview = (text) => {
-	const normalized = text.replace(/\s+/g, " ").trim();
-	if (normalized.length <= 140) return normalized;
-	return `${normalized.slice(0, 140)}...`;
-};
-/**
-* Creates a lightweight snapshot of a segment for inclusion in validation checks.
-*/
-const buildSegmentSnapshot = (segment) => ({
-	contentPreview: buildPreview(segment.content),
-	from: segment.from,
-	to: segment.to
-});
-/**
-* Normalizes page content by applying preprocessing transforms and standardizing line endings.
-*/
-const normalizePages = (pages, options) => {
-	const transforms = options.preprocess ?? [];
-	return pages.map((page) => {
-		return {
-			content: normalizeLineEndings(transforms.length ? applyPreprocessToPage(page.content, page.id, transforms) : page.content),
-			id: page.id
-		};
-	});
-};
-/**
-* Joins all page content into a single string with boundary tracking.
-* Returns the joined string and a list of boundary mappings (start/end indices for each page).
-*/
-const buildJoinedContent = (pages, joiner) => {
-	const boundaries = [];
-	const joined = pages.map((p) => p.content).join(joiner);
-	let offset = 0;
-	for (let i = 0; i < pages.length; i++) {
-		const content = pages[i].content;
-		const start = offset;
-		const end = start + content.length;
-		boundaries.push({
-			end,
-			id: pages[i].id,
-			start
-		});
-		offset += content.length + (i < pages.length - 1 ? joiner.length : 0);
+			const sp = splitPoints[i];
+			const end = splitPoints[i + 1]?.index ?? content.length;
+			const s = createSegment(sp.index, end, sp.meta, sp.capturedContent, sp.namedCaptures, sp.contentStartOffset);
+			if (s) result.push(s);
+		}
+		return result;
+	};
+	const segments = [];
+	if (!splitPoints.length) {
+		const firstId = pageMap.getId(0);
+		if (hasDictionaryProfile || anyRuleAllowsId(rules, firstId)) {
+			const s = createSegment(0, content.length);
+			if (s) segments.push(s);
+		}
+		return segments;
+	}
+	if (splitPoints[0].index > 0) {
+		const firstId = pageMap.getId(0);
+		if (hasDictionaryProfile || anyRuleAllowsId(rules, firstId)) {
+			const s = createSegment(0, splitPoints[0].index);
+			if (s) segments.push(s);
+		}
 	}
+	return [...segments, ...createSegmentsFromSplitPoints()];
+};
+//#endregion
+//#region src/analysis/segmentation-advisor.ts
+const ZERO_WIDTH_REGEX = /[\u061C\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/gu;
+const ELLIPSIS_REGEX = /\.{3,}/g;
+const TRAILING_WAW_REGEX = /\sو\s+(?=[\p{Script=Arabic}])/gu;
+const STRUCTURAL_META_BY_TOKEN = {
+	bab: "chapter",
+	basmalah: "basmalah",
+	fasl: "section",
+	kitab: "book"
+};
+const NUMBER_TOKENS = [
+	"numbered",
+	"raqms",
+	"raqm",
+	"nums",
+	"num"
+];
+const DEFAULT_BREAKPOINTS = [{
+	pattern: "{{tarqim}}\\s*",
+	split: "after"
+}, ""];
+const resolveOptions = (pages, options = {}) => {
+	const minCount = pages.length >= 25 ? 3 : 2;
 	return {
-		boundaries,
-		joined
+		maxRules: options.maxRules ?? 4,
+		minLineStartCount: options.minLineStartCount ?? minCount,
+		minRepeatingCount: options.minRepeatingCount ?? minCount,
+		sampleSegments: options.sampleSegments ?? 5,
+		topLineStarts: options.topLineStarts ?? 12,
+		topRepeatingSequences: options.topRepeatingSequences ?? 8
 	};
 };
-/**
-* Binary search to find which page ID corresponds to a character offset in the joined content.
-* Returns undefined if the offset falls within a joiner gap or outside bounds.
-*/
-const findBoundaryIdForOffset = (offset, boundaries) => {
-	let lo = 0;
-	let hi = boundaries.length - 1;
-	while (lo <= hi) {
-		const mid = lo + hi >>> 1;
-		const boundary = boundaries[mid];
-		if (offset < boundary.start) hi = mid - 1;
-		else if (offset > boundary.end) lo = mid + 1;
-		else return boundary.id;
+const countMatches = (text, regex) => text.match(regex)?.length ?? 0;
+const getDetections = (pages) => pages.reduce((acc, page) => ({
+	ellipsisCount: acc.ellipsisCount + countMatches(page.content, ELLIPSIS_REGEX),
+	trailingWawCount: acc.trailingWawCount + countMatches(page.content, TRAILING_WAW_REGEX),
+	zeroWidthCount: acc.zeroWidthCount + countMatches(page.content, ZERO_WIDTH_REGEX)
+}), {
+	ellipsisCount: 0,
+	trailingWawCount: 0,
+	zeroWidthCount: 0
+});
+const getPreprocessSuggestions = (detections) => {
+	const suggestions = [];
+	if (detections.zeroWidthCount > 0) suggestions.push({
+		count: detections.zeroWidthCount,
+		reason: "Invisible directional/zero-width marks can break anchors and token matching.",
+		transform: "removeZeroWidth"
+	});
+	if (detections.ellipsisCount > 0) suggestions.push({
+		count: detections.ellipsisCount,
+		reason: "Repeated periods often cause noisy punctuation breakpoints.",
+		transform: "condenseEllipsis"
+	});
+	if (detections.trailingWawCount > 0) suggestions.push({
+		count: detections.trailingWawCount,
+		reason: "Separated waw prefixes are a common digitization artifact in Arabic corpora.",
+		transform: "fixTrailingWaw"
+	});
+	return suggestions;
+};
+const extractTokenNames = (pattern) => [...pattern.matchAll(/\{\{(\w+)(?::[^}]+)?\}\}/g)].map((match) => match[1]);
+const getStructuralMeta = (tokens) => {
+	for (const token of tokens) if (token in STRUCTURAL_META_BY_TOKEN) return STRUCTURAL_META_BY_TOKEN[token];
+};
+const applyFirstTokenReplacement = (pattern, token, replacement) => {
+	const target = `{{${token}}}`;
+	return pattern.includes(target) ? pattern.replace(target, replacement) : pattern;
+};
+const addNamedCaptures = (pattern) => {
+	let next = pattern;
+	if (next.includes("{{numbered}}")) next = next.replace("{{numbered}}", "{{raqms:num}} {{dash}} ");
+	else for (const token of NUMBER_TOKENS) {
+		const replacement = token === "num" ? "{{num:num}}" : `{{${token}:num}}`;
+		const replaced = applyFirstTokenReplacement(next, token, replacement);
+		if (replaced !== next) {
+			next = replaced;
+			break;
+		}
 	}
-	if (boundaries.length === 0) return;
-	const last = boundaries.at(-1);
-	return offset > last.end ? last.id : void 0;
-};
-/**
-* Helper to construct a standardized validation issue object.
-*/
-const createIssue = (type, segment, segmentIndex, overrides = {}, pageMap) => {
-	const segmentSnapshot = buildSegmentSnapshot(segment);
-	const page = pageMap?.get(segment.from);
-	const matchIndex = overrides.matchIndex;
-	const { matchIndex: _ignored, ...restOverrides } = overrides;
-	const base = {
-		actual: {
-			from: segment.from,
-			to: segment.to
-		},
-		segment: segmentSnapshot,
-		segmentIndex,
-		...restOverrides
-	};
-	switch (type) {
-		case "page_not_found": return {
-			...base,
-			evidence: overrides.evidence ?? `Segment.from=${segment.from} does not exist in input pages.`,
-			hint: "Check page IDs passed into segmentPages() and validateSegments().",
-			severity: "error",
-			type
-		};
-		case "content_not_found": return {
-			...base,
-			evidence: overrides.evidence ?? "Segment content not found in any page content.",
-			hint: overrides.hint ?? "Check preprocessing options, joiner settings, or whitespace normalization.",
-			pageContext: page ? {
-				pageId: page.id,
-				pagePreview: buildPreview(page.content)
-			} : void 0,
-			severity: "error",
-			type
+	if (next.includes("{{rumuz}}")) next = next.replace("{{rumuz}}", "{{rumuz:source}}");
+	return next;
+};
+const findTokenIndex = (pattern, token) => {
+	const plainIndex = pattern.indexOf(`{{${token}}}`);
+	const namedIndex = pattern.indexOf(`{{${token}:`);
+	if (plainIndex === -1) return namedIndex;
+	if (namedIndex === -1) return plainIndex;
+	return Math.min(plainIndex, namedIndex);
+};
+const trimNumberBoundaryPattern = (pattern) => {
+	const stopTokens = [
+		"naql",
+		"bab",
+		"basmalah",
+		"fasl",
+		"kitab"
+	];
+	let end = pattern.length;
+	for (const token of stopTokens) {
+		const index = findTokenIndex(pattern, token);
+		if (index >= 0) end = Math.min(end, index);
+	}
+	return pattern.slice(0, end).trimEnd();
+};
+const getRuleMeta = (tokens) => {
+	const structural = getStructuralMeta(tokens);
+	if (structural) return { type: structural };
+	if (tokens.includes("naql") || tokens.some((token) => NUMBER_TOKENS.includes(token))) return { type: "entry" };
+};
+const getSuggestionConfidence = (tokens, shape) => {
+	if (getStructuralMeta(tokens)) return "high";
+	if (tokens.some((token) => NUMBER_TOKENS.includes(token)) || tokens.includes("naql")) return "high";
+	if (shape === "sequence" && tokens.includes("rumuz")) return "medium";
+	return tokens.length > 0 ? "medium" : "low";
+};
+const getSuggestionReason = (tokens, source) => {
+	const structural = getStructuralMeta(tokens);
+	if (structural) return `Repeated structural marker suggests ${structural}-style boundaries.`;
+	if (tokens.some((token) => NUMBER_TOKENS.includes(token))) return "Repeated numbering marker is a strong candidate for entry boundaries.";
+	if (tokens.includes("naql")) return source === "line-start" ? "Repeated transmission phrase appears at line starts and can anchor segments." : "Repeated transmission phrase inside prose is a good candidate for template-based splitting.";
+	return source === "line-start" ? "Frequent line-start signature is worth trying as a structural boundary." : "Frequent tokenized sequence may help split continuous prose.";
+};
+const createRule = (pattern, tokens, shape) => {
+	const fuzzy = shouldDefaultToFuzzy(pattern);
+	const meta = getRuleMeta(tokens);
+	if (shape === "line-start") {
+		if (getStructuralMeta(tokens)) return meta ? {
+			fuzzy,
+			lineStartsWith: [pattern],
+			meta,
+			split: "at"
+		} : {
+			fuzzy,
+			lineStartsWith: [pattern],
+			split: "at"
 		};
-		case "page_attribution_mismatch": {
-			const matchedFromId = overrides.expected?.from ?? overrides.actual?.from ?? segment.from;
-			const actualPage = pageMap?.get(matchedFromId);
-			return {
-				...base,
-				evidence: overrides.evidence ?? `Content found in joined content at page ${matchedFromId}, but segment.from=${segment.from}.`,
-				hint: overrides.hint ?? "Check duplicate content handling and boundary detection rules.",
-				pageContext: actualPage ? {
-					matchIndex: matchIndex ?? -1,
-					pageId: actualPage.id,
-					pagePreview: buildPreview(actualPage.content)
-				} : void 0,
-				severity: "error",
-				type
+		if (tokens.some((token) => NUMBER_TOKENS.includes(token))) {
+			const captured = addNamedCaptures(trimNumberBoundaryPattern(pattern));
+			return meta ? {
+				fuzzy,
+				lineStartsAfter: [captured],
+				meta,
+				split: "at"
+			} : {
+				fuzzy,
+				lineStartsAfter: [captured],
+				split: "at"
 			};
 		}
-		case "max_pages_violation": return {
-			...base,
-			evidence: overrides.evidence ?? `Segment spans pages ${segment.from}-${overrides.actual?.to}.`,
-			hint: overrides.hint ?? "Check maxPages windowing in breakpoint-processor.ts and page constraints.",
-			severity: "error",
-			type
-		};
-		default: return {
-			...base,
-			severity: "error",
-			type
+		return meta ? {
+			fuzzy,
+			lineStartsWith: [pattern],
+			meta,
+			split: "at"
+		} : {
+			fuzzy,
+			lineStartsWith: [pattern],
+			split: "at"
 		};
 	}
+	const captured = addNamedCaptures(pattern);
+	return meta ? {
+		fuzzy,
+		meta,
+		split: "at",
+		template: captured
+	} : {
+		fuzzy,
+		split: "at",
+		template: captured
+	};
 };
-/**
-* Finds all occurrences of a content string within the joined text.
-* Respects search limits to avoid performance cliffs on highly repetitive content.
-*/
-const findJoinedMatches = (content, joined, searchStart, searchEnd, limit = Infinity) => {
-	const matches = [];
-	if (!content || searchStart >= searchEnd) return matches;
-	let idx = joined.indexOf(content, searchStart);
-	let count = 0;
-	while (idx >= 0 && idx < searchEnd && count < limit) {
-		matches.push({
-			end: idx + content.length - 1,
-			start: idx
-		});
-		idx = joined.indexOf(content, idx + 1);
-		if (idx >= searchEnd) break;
-		count++;
-	}
-	return matches;
+const createLineStartSuggestion = (pattern) => {
+	const tokens = extractTokenNames(pattern.pattern);
+	return {
+		confidence: getSuggestionConfidence(tokens, "line-start"),
+		count: pattern.count,
+		example: {
+			pageId: pattern.examples[0]?.pageId ?? -1,
+			text: pattern.examples[0]?.line ?? ""
+		},
+		pattern: pattern.pattern,
+		reason: getSuggestionReason(tokens, "line-start"),
+		rule: createRule(pattern.pattern, tokens, "line-start"),
+		source: "line-start"
+	};
 };
-/**
-* Verifies that a matched segment falls within the allowed maxTerms/maxPages constraints.
-* Checks both implicit spans (calculated from match end) and explicit segment.to claims.
-*/
-const checkMaxPagesViolation = (segment, segmentIndex, maxPages, matchEnd, _expectedBoundaryEnd, boundaries) => {
-	const actualToId = findBoundaryIdForOffset(matchEnd, boundaries);
-	if (actualToId === void 0) return [];
-	if (maxPages === 0) {
-		if (actualToId !== segment.from) return [createIssue("max_pages_violation", segment, segmentIndex, {
-			actual: {
-				from: segment.from,
-				to: actualToId
-			},
-			evidence: `Segment spans pages ${segment.from}-${actualToId} in joined content (maxPages=0).`,
-			expected: {
-				from: segment.from,
-				to: segment.from
-			}
-		})];
+const createRepeatingSuggestion = (pattern) => {
+	const tokens = extractTokenNames(pattern.pattern);
+	return {
+		confidence: getSuggestionConfidence(tokens, "sequence"),
+		count: pattern.count,
+		example: {
+			pageId: pattern.examples[0]?.pageId ?? -1,
+			text: pattern.examples[0]?.text ?? ""
+		},
+		pattern: pattern.pattern,
+		reason: getSuggestionReason(tokens, "repeating-sequence"),
+		rule: createRule(pattern.pattern, tokens, "sequence"),
+		source: "repeating-sequence"
+	};
+};
+const confidenceScore = (confidence) => confidence === "high" ? 3 : confidence === "medium" ? 2 : 1;
+const sourceScore = (mode, source) => {
+	if (mode === "structured") return source === "line-start" ? 3 : 1;
+	if (mode === "continuous") return source === "repeating-sequence" ? 3 : 1;
+	return source === "line-start" ? 3 : 2;
+};
+const compareSuggestions = (mode, left, right) => sourceScore(mode, right.source) - sourceScore(mode, left.source) || confidenceScore(right.confidence) - confidenceScore(left.confidence) || right.count - left.count || left.pattern.localeCompare(right.pattern);
+const dedupeSuggestions = (suggestions) => {
+	const seen = /* @__PURE__ */ new Set();
+	const deduped = [];
+	for (const suggestion of suggestions) {
+		const key = JSON.stringify(suggestion.rule);
+		if (seen.has(key)) continue;
+		seen.add(key);
+		deduped.push(suggestion);
 	}
-	if (segment.to !== void 0) {
-		if (actualToId > segment.to) return [createIssue("max_pages_violation", segment, segmentIndex, {
-			actual: {
-				from: segment.from,
-				to: actualToId
-			},
-			evidence: `Segment content ends on page ${actualToId} but segment.to is ${segment.to}.`,
-			expected: {
-				from: segment.from,
-				to: segment.to
-			}
-		})];
-	} else if (maxPages !== void 0) {
-		const span = actualToId - segment.from;
-		if (span > maxPages) return [createIssue("max_pages_violation", segment, segmentIndex, {
-			actual: {
-				from: segment.from,
-				to: actualToId
+	return deduped;
+};
+const chooseAssessment = (pages, lineStarts, repeatingSequences) => {
+	const totalLines = pages.reduce((sum, page) => sum + page.content.split("\n").length, 0);
+	const topLine = lineStarts[0]?.count ?? 0;
+	const topSequence = repeatingSequences[0]?.count ?? 0;
+	const hasDenseLineBreaks = totalLines > pages.length;
+	if (topLine >= Math.max(2, topSequence) && hasDenseLineBreaks) return {
+		mode: "structured",
+		reason: "Frequent repeated line-start markers dominate and the text has strong line structure."
+	};
+	if (topSequence > topLine && !hasDenseLineBreaks) return {
+		mode: "continuous",
+		reason: "Tokenized prose sequences are stronger than line-start signals and the pages are mostly continuous text."
+	};
+	return {
+		mode: "mixed",
+		reason: "The book shows both structural line markers and inline recurring sequences."
+	};
+};
+const getRecommendedOptions = (mode, suggestions, maxRules, preprocess) => {
+	const primarySource = mode === "continuous" ? "repeating-sequence" : "line-start";
+	const sourceMatched = suggestions.filter((suggestion) => suggestion.source === primarySource);
+	const selectedRules = (sourceMatched.length > 0 ? sourceMatched : suggestions).slice(0, maxRules).map((suggestion) => suggestion.rule);
+	const optimized = optimizeRules(selectedRules);
+	const baseOptions = primarySource === "line-start" ? {
+		pageJoiner: "newline",
+		rules: optimized.rules
+	} : { rules: optimized.rules };
+	return {
+		optimization: {
+			mergedCount: optimized.mergedCount,
+			optimizedRuleCount: optimized.rules.length,
+			originalRuleCount: selectedRules.length
+		},
+		options: preprocess.length > 0 ? {
+			...baseOptions,
+			preprocess
+		} : baseOptions
+	};
+};
+const evaluateRecommendation = (pages, options, sampleSegments) => {
+	if ((options.rules?.length ?? 0) === 0) return { segmentSamples: [] };
+	try {
+		const segments = segmentPages(pages, options);
+		const validation = validateSegments(pages, options, segments);
+		const totalLength = segments.reduce((sum, segment) => sum + segment.content.length, 0);
+		const multiPageSegments = segments.filter((segment) => segment.to !== void 0 && segment.to !== segment.from).length;
+		return {
+			evaluation: {
+				averageSegmentLength: segments.length === 0 ? 0 : totalLength / segments.length,
+				maxSegmentLength: Math.max(0, ...segments.map((segment) => segment.content.length)),
+				multiPageSegments,
+				segmentCount: segments.length,
+				validation
 			},
-			evidence: `Segment spans ${span} pages (maxPages=${maxPages}).`,
-			expected: {
-				from: segment.from,
-				to: segment.from + maxPages
-			}
-		})];
+			segmentSamples: segments.slice(0, sampleSegments)
+		};
+	} catch {
+		return { segmentSamples: [] };
 	}
-	return [];
+};
+const toTemplateFallbackRule = (rule) => {
+	if (!("lineStartsAfter" in rule) || !Array.isArray(rule.lineStartsAfter) || rule.lineStartsAfter.length !== 1) return null;
+	return rule.meta ? {
+		meta: rule.meta,
+		split: rule.split,
+		template: `^${rule.lineStartsAfter[0]}`
+	} : {
+		split: rule.split,
+		template: `^${rule.lineStartsAfter[0]}`
+	};
+};
+const getTemplateFallbackOptions = (options) => {
+	if ((options.rules?.length ?? 0) === 0) return null;
+	const fallbackRules = options.rules?.map(toTemplateFallbackRule).filter((rule) => rule !== null);
+	if (!fallbackRules || fallbackRules.length !== options.rules?.length || fallbackRules.length === 0) return null;
+	return options.preprocess ? {
+		pageJoiner: "newline",
+		preprocess: options.preprocess,
+		rules: fallbackRules
+	} : {
+		pageJoiner: "newline",
+		rules: fallbackRules
+	};
+};
+const shouldUseTemplateFallback = (primary, fallback) => {
+	if (!fallback) return false;
+	if (!primary) return true;
+	return fallback.segmentCount > primary.segmentCount && fallback.validation.summary.issues <= primary.validation.summary.issues;
+};
+const getBreakpointSuggestions = (pages, evaluation) => {
+	const averagePageLength = pages.length === 0 ? 0 : pages.reduce((sum, page) => sum + page.content.length, 0) / pages.length;
+	if (!((evaluation?.multiPageSegments ?? 0) > 0 || (evaluation?.maxSegmentLength ?? 0) > 4e3 || averagePageLength > 2500)) return [];
+	return [{
+		breakpoints: DEFAULT_BREAKPOINTS,
+		maxPages: 1,
+		prefer: "longer",
+		reason: "Some segments are likely to grow large enough that sentence punctuation plus page-boundary fallback is worth testing."
+	}];
 };
 /**
-* Handles validation when content is not found in the expected boundary window.
-* Fallback strategy: search entire document if segment matches existing content elsewhere.
+* Generate a machine-readable draft segmentation report for AI agents.
+*
+* This helper is intentionally deterministic: it inspects pages, drafts
+* candidate rules, validates them, and evaluates its own recommendation.
 */
-const handleMissingBoundary = (segment, segmentIndex, joined, boundaries, pageMap) => {
-	const matches = findJoinedMatches(segment.content, joined, 0, joined.length, 1);
-	if (matches.length === 0) return [createIssue("content_not_found", segment, segmentIndex, { evidence: "Segment content not found in any page content." }, pageMap)];
-	const match = matches[0];
-	const actualFromId = findBoundaryIdForOffset(match.start, boundaries);
-	const actualToId = findBoundaryIdForOffset(match.end, boundaries);
-	return [createIssue("page_attribution_mismatch", segment, segmentIndex, {
-		actual: {
-			from: segment.from,
-			to: segment.to
-		},
-		evidence: `Content found in joined content at page ${actualFromId}, but segment.from=${segment.from}.`,
-		expected: {
-			from: actualFromId,
-			to: actualToId
+const suggestSegmentationOptions = (pages, options = {}) => {
+	const resolved = resolveOptions(pages, options);
+	const detections = getDetections(pages);
+	const preprocessSuggestions = getPreprocessSuggestions(detections);
+	const preprocess = preprocessSuggestions.map((suggestion) => suggestion.transform);
+	const lineStarts = analyzeCommonLineStarts(pages, {
+		minCount: resolved.minLineStartCount,
+		sortBy: "count",
+		topK: resolved.topLineStarts
+	});
+	const repeatingSequences = analyzeRepeatingSequences(pages, {
+		maxElements: 3,
+		minCount: resolved.minRepeatingCount,
+		minElements: 1,
+		topK: resolved.topRepeatingSequences
+	});
+	const assessment = chooseAssessment(pages, lineStarts, repeatingSequences);
+	const lineSuggestions = lineStarts.map(createLineStartSuggestion);
+	const repeatingSuggestions = repeatingSequences.map(createRepeatingSuggestion);
+	const ruleSuggestions = dedupeSuggestions([...lineSuggestions, ...repeatingSuggestions]).sort((left, right) => compareSuggestions(assessment.mode, left, right));
+	const { optimization, options: recommendedOptions } = getRecommendedOptions(assessment.mode, ruleSuggestions, resolved.maxRules, preprocess);
+	const primary = evaluateRecommendation(pages, recommendedOptions, resolved.sampleSegments);
+	const fallbackOptions = getTemplateFallbackOptions(recommendedOptions);
+	const fallback = fallbackOptions ? evaluateRecommendation(pages, fallbackOptions, resolved.sampleSegments) : void 0;
+	const finalOptions = shouldUseTemplateFallback(primary.evaluation, fallback?.evaluation) && fallbackOptions ? fallbackOptions : recommendedOptions;
+	const finalEvaluation = finalOptions === fallbackOptions && fallback ? fallback : primary;
+	const ruleValidation = validateRules(finalOptions.rules ?? []).filter((result) => result !== void 0);
+	const ruleValidationErrors = formatValidationReport(ruleValidation);
+	return {
+		assessment,
+		breakpointSuggestions: getBreakpointSuggestions(pages, finalEvaluation.evaluation),
+		evaluation: finalEvaluation.evaluation,
+		lineStarts,
+		optimization,
+		preprocess: {
+			detections,
+			suggestions: preprocessSuggestions
 		},
-		matchIndex: match.start
-	}, pageMap)];
+		recommendedOptions: finalOptions,
+		repeatingSequences,
+		ruleSuggestions,
+		ruleValidation,
+		ruleValidationErrors,
+		segmentSamples: finalEvaluation.segmentSamples
+	};
 };
+//#endregion
+//#region src/detection.ts
+/**
+* Token detection order - more specific patterns first to avoid partial matches.
+* Example: 'raqms' before 'raqm' so "٣٤" matches 'raqms' not just the first digit.
+*
+* Tokens not in this list are appended in alphabetical order from TOKEN_PATTERNS.
+*/
+const TOKEN_PRIORITY_ORDER = [
+	"basmalah",
+	"kitab",
+	"bab",
+	"fasl",
+	"naql",
+	"rumuz",
+	"numbered",
+	"raqms",
+	"raqm",
+	"tarqim",
+	"bullet",
+	"dash",
+	"harf"
+];
 /**
-* Performs a widened search when the direct check fails.
-* Includes a small buffer around the expected position, and optionally a full-document search for short segments.
+* Gets the token detection priority order.
+* Returns tokens in priority order, with any TOKEN_PATTERNS not in the priority list appended.
 */
-const handleFallbackSearch = (segment, segmentIndex, joined, searchStart, searchEnd, expectedBoundary, boundaries, pageMap, maxPages, validationOptions) => {
-	const content = segment.content;
-	const bufferSize = 1e3;
-	const rawMatches = findJoinedMatches(content, joined, Math.max(0, searchStart - bufferSize), Math.min(joined.length, searchEnd + bufferSize), 5);
-	if (rawMatches.length === 0) {
-		const threshold = validationOptions?.fullSearchThreshold ?? 500;
-		if (content.length < threshold) {
-			const fullMatches = findJoinedMatches(content, joined, 0, joined.length, 50);
-			const validMatch = fullMatches.find((m) => {
-				return findBoundaryIdForOffset(m.start, boundaries) === segment.from;
-			});
-			if (validMatch) return checkMaxPagesViolation(segment, segmentIndex, maxPages, validMatch.end, expectedBoundary.end, boundaries);
-			if (fullMatches.length > 0) {
-				const match = fullMatches[0];
-				const actualFromId = findBoundaryIdForOffset(match.start, boundaries);
-				const actualToId = findBoundaryIdForOffset(match.end, boundaries);
-				return [createIssue("page_attribution_mismatch", segment, segmentIndex, {
-					actual: {
-						from: segment.from,
-						to: segment.to
-					},
-					evidence: `Content found in joined content at page ${actualFromId}, but segment.from=${segment.from}.`,
-					expected: {
-						from: actualFromId,
-						to: actualToId
-					},
-					matchIndex: match.start
-				}, pageMap)];
-			}
-		}
-		return [createIssue("content_not_found", segment, segmentIndex, {
-			evidence: `Segment content (${content.length} chars) not found in expected window.`,
-			hint: "Check page boundary attribution in segmenter.ts."
-		}, pageMap)];
-	}
-	const alignedMatches = rawMatches.filter((m) => m.start >= expectedBoundary.start && m.start <= expectedBoundary.end);
-	if (alignedMatches.length > 0) {
-		const primary = alignedMatches[0];
-		return checkMaxPagesViolation(segment, segmentIndex, maxPages, primary.end, expectedBoundary.end, boundaries);
-	}
-	const primary = rawMatches[0];
-	const actualFromId = findBoundaryIdForOffset(primary.start, boundaries);
-	const actualToId = findBoundaryIdForOffset(primary.end, boundaries);
-	return [createIssue("page_attribution_mismatch", segment, segmentIndex, {
-		actual: {
-			from: segment.from,
-			to: segment.to
-		},
-		evidence: `Content found in joined content at page ${actualFromId}, but segment.from=${segment.from}.`,
-		expected: {
-			from: actualFromId,
-			to: actualToId
-		},
-		matchIndex: primary.start
-	}, pageMap)];
+const getTokenPriority = () => {
+	const allTokens = getAvailableTokens();
+	const prioritized = TOKEN_PRIORITY_ORDER.filter((t) => allTokens.includes(t));
+	const remaining = allTokens.filter((t) => !TOKEN_PRIORITY_ORDER.includes(t)).sort();
+	return [...prioritized, ...remaining];
+};
+const isRumuzStandalone = (text, startIndex, endIndex) => {
+	const before = startIndex > 0 ? text[startIndex - 1] : "";
+	const after = endIndex < text.length ? text[endIndex] : "";
+	const isWhitespace = (ch) => !!ch && /\s/u.test(ch);
+	const isOpenBracket = (ch) => !!ch && /[([{]/u.test(ch);
+	const isRightDelimiter = (ch) => !!ch && /[:：\-–—ـ،؛.?!؟)\]}]/u.test(ch);
+	const isArabicWordy = (ch) => !!ch && /[\u0600-\u06FF]/u.test(ch);
+	const leftOk = !before || isWhitespace(before) || isOpenBracket(before) || !isArabicWordy(before);
+	const rightOk = !after || isWhitespace(after) || isRightDelimiter(after) || !isArabicWordy(after);
+	return leftOk && rightOk;
 };
 /**
-* Calculates the search range end index based on segment.to or strict bounds.
+* Analyzes text and returns all detected token patterns with their positions.
+* Patterns are detected in priority order to avoid partial matches.
+*
+* @param text - The text to analyze for token patterns
+* @returns Array of detected patterns sorted by position
+*
+* @example
+* detectTokenPatterns("٣٤ - حدثنا")
+* // Returns: [
+* //   { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },
+* //   { token: 'dash', match: '-', index: 3, endIndex: 4 },
+* //   { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }
+* // ]
 */
-const getSearchRange = (segment, expectedBoundary, boundaryMap, joinedLength) => {
-	let searchEnd = expectedBoundary.end + 1;
-	if (segment.to !== void 0) {
-		const endBoundary = boundaryMap.get(segment.to);
-		if (endBoundary) searchEnd = endBoundary.end + 1;
-		else searchEnd = Math.min(joinedLength, expectedBoundary.end + 5e4);
+const detectTokenPatterns = (text) => {
+	if (!text) return [];
+	const results = [];
+	const coveredRanges = [];
+	const isPositionCovered = (start, end) => {
+		return coveredRanges.some(([s, e]) => start >= s && start < e || end > s && end <= e || start <= s && end >= e);
+	};
+	for (const tokenName of getTokenPriority()) {
+		const pattern = TOKEN_PATTERNS[tokenName];
+		if (!pattern) continue;
+		try {
+			const regex = new RegExp(`(${pattern})`, "gu");
+			let match;
+			while ((match = regex.exec(text)) !== null) {
+				const startIndex = match.index;
+				const endIndex = startIndex + match[0].length;
+				if (tokenName === "rumuz" && !isRumuzStandalone(text, startIndex, endIndex)) continue;
+				if (isPositionCovered(startIndex, endIndex)) continue;
+				results.push({
+					endIndex,
+					index: startIndex,
+					match: match[0],
+					token: tokenName
+				});
+				coveredRanges.push([startIndex, endIndex]);
+			}
+		} catch {}
 	}
-	return searchEnd;
+	return results.sort((a, b) => a.index - b.index);
 };
 /**
-* Validates attribution for a single segment by searching for its content in the joined text.
-* Returns issues if content is missing, mis-attributed, or violates page limits.
+* Generates a template pattern from text using detected tokens.
+* Replaces matched portions with {{token}} syntax.
+*
+* @param text - Original text
+* @param detected - Array of detected patterns from detectTokenPatterns
+* @returns Template string with tokens, e.g., "{{raqms}} {{dash}} "
+*
+* @example
+* const detected = detectTokenPatterns("٣٤ - ");
+* generateTemplateFromText("٣٤ - ", detected);
+* // Returns: "{{raqms}} {{dash}} "
 */
-const getAttributionIssues = (segment, segmentIndex, maxPages, joined, boundaries, boundaryMap, pageMap, validationOptions) => {
-	if (!segment.content) return [createIssue("content_not_found", segment, segmentIndex, { evidence: "Segment content is empty." }, pageMap)];
-	const expectedBoundary = boundaryMap.get(segment.from);
-	if (!expectedBoundary) return handleMissingBoundary(segment, segmentIndex, joined, boundaries, pageMap);
-	const searchEnd = getSearchRange(segment, expectedBoundary, boundaryMap, joined.length);
-	const searchStart = expectedBoundary.start;
-	const idx = joined.indexOf(segment.content, searchStart);
-	if (idx !== -1 && idx < searchEnd) return checkMaxPagesViolation(segment, segmentIndex, maxPages, idx + segment.content.length - 1, expectedBoundary.end, boundaries);
-	return handleFallbackSearch(segment, segmentIndex, joined, searchStart, searchEnd, expectedBoundary, boundaries, pageMap, maxPages, validationOptions);
+const generateTemplateFromText = (text, detected) => {
+	if (!text || detected.length === 0) return text;
+	let template = text;
+	const sortedByIndexDesc = [...detected].sort((a, b) => b.index - a.index);
+	for (const d of sortedByIndexDesc) template = `${template.slice(0, d.index)}{{${d.token}}}${template.slice(d.endIndex)}`;
+	return template;
 };
 /**
-* Performs purely static checks on the segment metadata (Ids and spans) before expensive content searching.
+* Determines the best pattern type for auto-generated rules based on detected patterns.
+*
+* @param detected - Array of detected patterns
+* @returns Suggested pattern type and whether to use fuzzy matching
 */
-const checkStaticMaxPages = (segment, index, maxPages) => {
-	if (maxPages === void 0 || segment.to === void 0) return null;
-	if (maxPages === 0) {
-		if (segment.to !== segment.from) return createIssue("max_pages_violation", segment, index, {
-			evidence: "maxPages=0 requires all segments to stay within one page.",
-			expected: {
-				from: segment.from,
-				to: segment.from
-			},
-			hint: "Check boundary detection in breakpoint-utils.ts."
-		});
-		return null;
-	}
-	const span = segment.to - segment.from;
-	if (span > maxPages) return createIssue("max_pages_violation", segment, index, {
-		evidence: `Segment spans ${span} pages (maxPages=${maxPages}).`,
-		expected: {
-			from: segment.from,
-			to: segment.from + maxPages
-		},
-		hint: "Check breakpoint windowing and page attribution in breakpoint-processor.ts."
-	});
-	return null;
+const suggestPatternConfig = (detected) => {
+	const hasStructuralToken = detected.some((d) => [
+		"basmalah",
+		"kitab",
+		"bab",
+		"fasl"
+	].includes(d.token));
+	const hasNumberedPattern = detected.some((d) => [
+		"raqms",
+		"raqm",
+		"numbered"
+	].includes(d.token));
+	if (hasStructuralToken) return {
+		fuzzy: true,
+		metaType: detected.find((d) => [
+			"kitab",
+			"bab",
+			"fasl"
+		].includes(d.token))?.token || "chapter",
+		patternType: "lineStartsWith"
+	};
+	if (hasNumberedPattern) return {
+		fuzzy: false,
+		metaType: "hadith",
+		patternType: "lineStartsAfter"
+	};
+	return {
+		fuzzy: false,
+		patternType: "lineStartsAfter"
+	};
 };
 /**
-* Validates a list of segments against the source pages.
-* checks for:
-* - Page existence (invalid IDs)
-* - Content fidelity (content must exist in pages)
-* - Page attribution (from/to must match content location)
-* - Page constraints (maxPages violations)
+* Analyzes text and generates a complete suggested rule configuration.
 *
-* @param pages Input pages used for segmentation
-* @param options Operations used during segmentation (for preprocessing/joining consistency)
-* @param segments The output segments to validate
-* @param validationOptions Optional settings for validation behavior
-* @returns A detailed validation report
+* @param text - Highlighted text from the page
+* @returns Suggested rule configuration or null if no patterns detected
 */
-const validateSegments = (pages, options, segments, validationOptions) => {
-	const normalizedPages = normalizePages(pages, options);
-	const { boundaries, joined } = buildJoinedContent(normalizedPages, options.pageJoiner === "newline" ? "\n" : " ");
-	const boundaryMap = /* @__PURE__ */ new Map();
-	const pageMap = /* @__PURE__ */ new Map();
-	for (const b of boundaries) boundaryMap.set(b.id, b);
-	for (const p of normalizedPages) pageMap.set(p.id, p);
-	const pageIds = new Set(normalizedPages.map((p) => p.id));
-	const maxPages = options.maxPages;
-	const issues = [];
-	for (let i = 0; i < segments.length; i++) {
-		const segment = segments[i];
-		if (!pageIds.has(segment.from)) {
-			issues.push(createIssue("page_not_found", segment, i));
-			continue;
-		}
-		if (segment.to !== void 0 && !pageIds.has(segment.to)) issues.push(createIssue("page_not_found", segment, i, { evidence: `Segment.to=${segment.to} does not exist in input pages.` }));
-		const staticMaxPageIssue = checkStaticMaxPages(segment, i, maxPages);
-		if (staticMaxPageIssue) issues.push(staticMaxPageIssue);
-		const attributionIssues = getAttributionIssues(segment, i, maxPages, joined, boundaries, boundaryMap, pageMap, validationOptions);
-		issues.push(...attributionIssues);
-	}
-	const errors = issues.filter((issue) => issue.severity === "error").length;
-	const warnings = issues.filter((issue) => issue.severity === "warn").length;
+const analyzeTextForRule = (text) => {
+	const detected = detectTokenPatterns(text);
+	if (detected.length === 0) return null;
 	return {
-		issues,
-		ok: issues.length === 0,
-		summary: {
-			errors,
-			issues: issues.length,
-			pageCount: pages.length,
-			segmentCount: segments.length,
-			warnings
-		}
+		detected,
+		template: generateTemplateFromText(text, detected),
+		...suggestPatternConfig(detected)
 	};
 };
 //#endregion
-export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, DictionaryProfileValidationError, PATTERN_TYPE_KEYS, TOKEN_PATTERNS, Token, analyzeCommonLineStarts, analyzeDictionaryMarkdownPages, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, classifyDictionaryHeading, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, diagnoseDictionaryProfile, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, scanDictionaryMarkdownPage, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, templateToRegex, validateDictionaryProfile, validateRules, validateSegments, withCapture };
+export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, DictionaryProfileValidationError, PATTERN_TYPE_KEYS, TOKEN_PATTERNS, Token, analyzeCommonLineStarts, analyzeDictionaryMarkdownPages, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, classifyDictionaryHeading, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, diagnoseDictionaryProfile, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, scanDictionaryMarkdownPage, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, suggestSegmentationOptions, templateToRegex, validateDictionaryProfile, validateRules, validateSegments, withCapture };
 //# sourceMappingURL=index.mjs.map