npm - flappa-doormal - Versions diffs - 2.18.0 → 2.20.0 - Mend

flappa-doormal 2.18.0 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.mjs CHANGED Viewed

@@ -148,7 +148,7 @@ numbered: "{{raqms}} {{dash}} " };
 const expandCompositeTokensInTemplate = (template) => {
 	let out = template;
 	for (let i = 0; i < 10; i++) {
-		const next = out.replace(/\{\{(\w+)\}\}/g, (m, tokenName) => COMPOSITE_TOKENS[tokenName] ?? m);
+		const next = out.replace(/\{\{(\w+)\}\}/g, (m, tokenName) => tokenName in COMPOSITE_TOKENS ? COMPOSITE_TOKENS[tokenName] : m);
 		if (next === out) break;
 		out = next;
 	}
@@ -162,7 +162,8 @@ const expandCompositeTokensInTemplate = (template) => {
 * @returns Expanded pattern with base tokens replaced
 * @internal
 */
-const expandBaseTokens = (template) => template.replace(/\{\{(\w+)\}\}/g, (_, tokenName) => BASE_TOKENS[tokenName] ?? `{{${tokenName}}}`);
+const expandBaseTokens = (template) => template.replace(/\{\{(\w+)\}\}/g, (_, tokenName) => tokenName in BASE_TOKENS ? BASE_TOKENS[tokenName] : `{{${tokenName}}}`);
+const EXPANDED_COMPOSITE_TOKENS = Object.fromEntries(Object.entries(COMPOSITE_TOKENS).map(([key, value]) => [key, expandBaseTokens(value)]));
 /**
 * Token definitions mapping human-readable token names to regex patterns.
 *
@@ -190,7 +191,7 @@ const expandBaseTokens = (template) => template.replace(/\{\{(\w+)\}\}/g, (_, to
 */
 const TOKEN_PATTERNS = {
 	...BASE_TOKENS,
-	...Object.fromEntries(Object.entries(COMPOSITE_TOKENS).map(([k, v]) => [k, expandBaseTokens(v)]))
+	...EXPANDED_COMPOSITE_TOKENS
 };
 /**
 * Regex pattern for matching tokens with optional named capture syntax.
@@ -283,8 +284,8 @@ const expandTokenLiteral = (literal, opts) => {
 	if (!parsed) return literal;
 	const { tokenName, captureName } = parsed;
 	if (!tokenName && captureName) return `(?<${opts.registerCapture(captureName)}>.+)`;
+	if (!(tokenName in TOKEN_PATTERNS)) return literal;
 	let tokenPattern = TOKEN_PATTERNS[tokenName];
-	if (!tokenPattern) return literal;
 	tokenPattern = maybeApplyFuzzyToTokenPattern(tokenPattern, opts.fuzzyTransform);
 	if (captureName) return `(?<${opts.registerCapture(captureName)}>${tokenPattern})`;
 	return tokenPattern;
@@ -490,7 +491,7 @@ const applyTokenMappings = (template, mappings) => {
 * // → '{{raqms}} {{dash}}'
 */
 const stripTokenMappings = (template) => {
-	return template.replace(/\{\{([^:}]+):[^}]+\}\}/g, "{{$1}}");
+	return template.replace(/\{\{([^:}]*)?:[^}]+\}\}/g, (_match, tokenName) => `{{${tokenName ?? ""}}}`);
 };
 //#endregion
 //#region src/utils/textUtils.ts
@@ -1279,6 +1280,451 @@ const analyzeTextForRule = (text) => {
 	};
 };
 //#endregion
+//#region src/dictionary/arabic-dictionary-rule.ts
+const uniqueCanonicalWords = (words) => {
+	const seen = /* @__PURE__ */ new Set();
+	const result = [];
+	for (const word of words) {
+		const normalized = normalizeArabicForComparison(word);
+		if (!normalized || seen.has(normalized)) continue;
+		seen.add(normalized);
+		result.push(word);
+	}
+	return result;
+};
+const buildStopAlternation = (stopWords) => {
+	const unique = uniqueCanonicalWords(stopWords);
+	if (unique.length === 0) return "";
+	return unique.map((word) => makeDiacriticInsensitive(normalizeArabicForComparison(word))).join("|");
+};
+const buildHeadwordBody = ({ allowCommaSeparated, colonPattern, stopAlternation, stopwordBody, unit }) => {
+	if (!stopAlternation) return allowCommaSeparated ? `${unit}(?:\\s*[،,]\\s*${unit})*` : unit;
+	const guardedUnit = `(?!(?:${stopwordBody})${allowCommaSeparated ? `(?:\\s*[،,]\\s*|${colonPattern})` : colonPattern})${unit}`;
+	return allowCommaSeparated ? `${guardedUnit}(?:\\s*[،,]\\s*${guardedUnit})*` : guardedUnit;
+};
+const buildBalancedMarker = ({ allowParenthesized, allowWhitespaceBeforeColon, captureName, headwordBody }) => {
+	const colon = allowWhitespaceBeforeColon ? "\\s*:" : ":";
+	const withCapture = `(?<${captureName}>${headwordBody})`;
+	if (!allowParenthesized) return `${withCapture}${colon}`;
+	return `(?:\\(\\s*${withCapture}\\s*\\)|${withCapture})${colon}`;
+};
+const validateDictionaryEntryOptions = ({ captureName = "lemma", maxLetters = 10, minLetters = 2 }) => {
+	if (!Number.isInteger(minLetters) || minLetters < 1) throw new Error(`createArabicDictionaryEntryRule: minLetters must be an integer >= 1, got ${minLetters}`);
+	if (!Number.isInteger(maxLetters) || maxLetters < minLetters) throw new Error(`createArabicDictionaryEntryRule: maxLetters must be an integer >= minLetters, got ${maxLetters}`);
+	if (!/^[A-Za-z_]\w*$/.test(captureName)) throw new Error(`createArabicDictionaryEntryRule: invalid captureName "${captureName}"`);
+};
+const buildArabicDictionaryEntryRegexSource = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords }, capturePrefix) => {
+	validateDictionaryEntryOptions({
+		captureName,
+		maxLetters,
+		minLetters
+	});
+	const zeroWidthPrefix = "[\\u200E\\u200F\\u061C\\u200B\\u200C\\u200D\\uFEFF]*";
+	const wawWithMarks = `و${ARABIC_MARKS_CLASS}*`;
+	const alWithMarks = `ا${ARABIC_MARKS_CLASS}*ل${ARABIC_MARKS_CLASS}*`;
+	const lemmaUnit = `(?:${wawWithMarks})?(?:${alWithMarks})?${`${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}(?:${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}){${minLetters - 1},${maxLetters - 1}}`}`;
+	const stopAlternation = buildStopAlternation(stopWords);
+	const lemmaBody = buildHeadwordBody({
+		allowCommaSeparated,
+		colonPattern: allowWhitespaceBeforeColon ? "\\s*:" : ":",
+		stopAlternation,
+		stopwordBody: stopAlternation ? `(?:${wawWithMarks})?(?:${stopAlternation})` : "",
+		unit: lemmaUnit
+	});
+	const lineStartBoundary = `(?:(?<=^)|(?<=\\n))${zeroWidthPrefix}`;
+	const midLineTrigger = allowParenthesized ? `(?<=\\s)(?=(?:\\(\\s*)?${wawWithMarks}(?:${alWithMarks})?)` : `(?<=\\s)(?=${wawWithMarks}(?:${alWithMarks})?)`;
+	const prefixedCaptureName = capturePrefix ? `${capturePrefix}${captureName}` : captureName;
+	const regex = `(?:${lineStartBoundary}${midLineSubentries ? `|${midLineTrigger}` : ""})` + buildBalancedMarker({
+		allowParenthesized,
+		allowWhitespaceBeforeColon,
+		captureName: prefixedCaptureName,
+		headwordBody: lemmaBody
+	});
+	return {
+		captureNames: [prefixedCaptureName],
+		regex
+	};
+};
+/**
+* Creates a reusable split rule for Arabic dictionary entries.
+*
+* The returned rule preserves authoring intent as a serializable
+* `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
+* regex string.
+*
+* @example
+* createArabicDictionaryEntryRule({
+*   stopWords: ['وقيل', 'ويقال', 'قال'],
+*   pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
+* })
+*
+* @example
+* createArabicDictionaryEntryRule({
+*   allowParenthesized: true,
+*   allowWhitespaceBeforeColon: true,
+*   allowCommaSeparated: true,
+*   stopWords: ['الليث', 'العجاج'],
+* })
+*/
+/**
+* @deprecated Prefer the top-level `SegmentationOptions.dictionary` profile for
+* whole-book dictionary segmentation. Keep this helper for advanced single-rule
+* composition inside a broader `SplitRule[]` pipeline.
+*/
+const createArabicDictionaryEntryRule = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, meta, midLineSubentries = true, minLetters = 2, pageStartPrevWordStoplist, samePagePrevWordStoplist, stopWords }) => {
+	validateDictionaryEntryOptions({
+		captureName,
+		maxLetters,
+		minLetters
+	});
+	return {
+		dictionaryEntry: {
+			allowCommaSeparated,
+			allowParenthesized,
+			allowWhitespaceBeforeColon,
+			captureName,
+			maxLetters,
+			midLineSubentries,
+			minLetters,
+			stopWords: uniqueCanonicalWords(stopWords)
+		},
+		meta,
+		pageStartPrevWordStoplist,
+		samePagePrevWordStoplist
+	};
+};
+//#endregion
+//#region src/dictionary/heading-classifier.ts
+const HEADING_PREFIX$1 = "## ";
+const CODE_LINE_PATTERN$1 = getTokenPattern("harfs").replaceAll("\\s+", "[ \\t]+");
+const ARABIC_WORD_PATTERN = ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN;
+const PLAIN_ENTRY_RE = new RegExp(`^(?<lemma>${ARABIC_WORD_PATTERN}(?:\\s+${ARABIC_WORD_PATTERN}){0,1}|[([{]${ARABIC_WORD_PATTERN}(?:\\s+${ARABIC_WORD_PATTERN}){0,1}[)\\]}])\\s*:`, "u");
+const INLINE_SUBENTRY_RE = new RegExp(`(^|[\\s،؛,:.])(?<lemma>و${ARABIC_WORD_PATTERN})\\s*:`, "gu");
+const CODE_LINE_RE = new RegExp(`^(?:[[(])?(?<codes>${CODE_LINE_PATTERN$1})(?:[)\\]])?$`, "u");
+const PAIRED_FORMS_RE = new RegExp(`^(?<forms>${ARABIC_WORD_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_PATTERN})+)\\s*:`, "u");
+const ARABIC_BOUNDARY_OR_PUNCTUATION = "(?=$|[\\s:،؛()\\[\\]{}\\-–—]|[^\\p{Script=Arabic}])";
+const CHAPTER_HEADING_RE = new RegExp(`^(?:[([{]\\s*)?(?:باب|فصل|كتاب|حرف|أبواب)${ARABIC_BOUNDARY_OR_PUNCTUATION}`, "u");
+const CLUSTER_HEADING_RE = new RegExp(`^(?:\\(?\\s*)?(?:أبواب|أبنية)${ARABIC_BOUNDARY_OR_PUNCTUATION}|^(?=.{1,80}$).+?[،,].+?(?:مستعمل|مهمل|مستعملة|مستعملان)(?=$|[.،,:؛\\s])`, "u");
+const STATUS_HEADING_RE = new RegExp(`^(?:${CODE_LINE_PATTERN$1}|(?:(?:${ARABIC_WORD_PATTERN}\\s+){1,3}${ARABIC_WORD_PATTERN}|${ARABIC_WORD_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_PATTERN})+))\\s*:?[\\s]*(?:مستعمل|مستعملة|مستعملان|مهمل|مهملة)(?=$|[.،,:؛\\s])`, "u");
+const CODE_NOTE_HEADING_RE = new RegExp(`^(?:${ARABIC_WORD_PATTERN}\\s+){1,3}\\(.+\\)$`, "u");
+const COLON_NOISE_RE = /^.+:\s*.+$/u;
+const CHAPTER_TERMS = [
+	"باب",
+	"فصل",
+	"كتاب",
+	"حرف",
+	"أبواب"
+];
+const MARKER_PREFIXES = [
+	"بسم الله",
+	"توكلت على الله",
+	"آخر كتاب",
+	"ويتلوه"
+];
+const NOISE_TOKENS = [
+	"قال",
+	"وقيل",
+	"ويقال",
+	"وفي",
+	"يعني",
+	"فإذا"
+];
+const emptyCounts = () => ({
+	chapter: 0,
+	cluster: 0,
+	codeLine: 0,
+	entry: 0,
+	inlineSubentry: 0,
+	lineEntry: 0,
+	marker: 0,
+	noise: 0,
+	pairedForms: 0
+});
+const extractWrappedLemma = (lemma) => lemma.replace(/^[[{(]+|[\])}]+$/gu, "").trim();
+const stripLeadingWrappers = (text) => text.replace(/^[[{(]+\s*/u, "").trim();
+const isDelimitedPrefixMatch$1 = (text, prefix) => {
+	if (text === prefix) return true;
+	if (!text.startsWith(prefix)) return false;
+	const nextChar = text[prefix.length];
+	return nextChar === void 0 || /[\s:،؛()[\]{}\-–—]/u.test(nextChar);
+};
+const isCodeHeading = (text) => {
+	if (CODE_LINE_RE.test(text)) return true;
+	const words = text.trim().split(/\s+/u).filter(Boolean);
+	return words.length === 1 && (words[0]?.length ?? 0) === 1;
+};
+const looksLikeNoiseHeading = (text) => {
+	const normalized = normalizeArabicForComparison(text);
+	const wordCount = text.trim().split(/\s+/u).filter(Boolean).length;
+	if (/(?:مستعمل|مهمل|مستعملة|مستعملان)(?=$|[.،,:؛\s])/u.test(text)) return false;
+	if (wordCount >= 8 && COLON_NOISE_RE.test(text)) return true;
+	return NOISE_TOKENS.some((token) => normalized.includes(normalizeArabicForComparison(token))) && wordCount >= 4;
+};
+/**
+* Classifies a markdown heading line produced by `convertContentToMarkdown()`.
+*/
+const classifyDictionaryHeading = (line) => {
+	const text = line.startsWith(HEADING_PREFIX$1) ? line.slice(3).trim() : line.trim();
+	const unwrapped = stripLeadingWrappers(text);
+	if (!text) return "noise";
+	if (CHAPTER_HEADING_RE.test(text) || CHAPTER_TERMS.some((term) => isDelimitedPrefixMatch$1(normalizeArabicForComparison(unwrapped), normalizeArabicForComparison(term)))) return "chapter";
+	if (looksLikeNoiseHeading(text)) return "noise";
+	if (isCodeHeading(text)) return "marker";
+	if (MARKER_PREFIXES.some((token) => normalizeArabicForComparison(unwrapped).startsWith(normalizeArabicForComparison(token)))) return "marker";
+	if (STATUS_HEADING_RE.test(text) || CODE_NOTE_HEADING_RE.test(text)) return "marker";
+	if (CLUSTER_HEADING_RE.test(text)) return "cluster";
+	return "entry";
+};
+const createHeadingMatch = (kind, page, rawLine, lineNumber) => ({
+	kind,
+	lemma: kind === "entry" ? rawLine.slice(3).trim() : void 0,
+	line: lineNumber,
+	pageId: page.id,
+	text: rawLine
+});
+const createSurfaceMatch = (kind, page, text, lineNumber, lemma) => ({
+	kind,
+	lemma,
+	line: lineNumber,
+	pageId: page.id,
+	text
+});
+const scanHeadingLine = (page, rawLine, lineNumber, matches) => {
+	if (!rawLine.startsWith(HEADING_PREFIX$1)) return false;
+	const kind = classifyDictionaryHeading(rawLine);
+	matches.push(createHeadingMatch(kind, page, rawLine, lineNumber));
+	return true;
+};
+const scanLineEntry = (page, rawLine, lineNumber, matches) => {
+	const lineEntry = rawLine.match(PLAIN_ENTRY_RE);
+	if (!lineEntry?.groups?.lemma) return;
+	matches.push(createSurfaceMatch("lineEntry", page, rawLine, lineNumber, extractWrappedLemma(lineEntry.groups.lemma)));
+};
+const scanPairedForms = (page, rawLine, lineNumber, matches) => {
+	const pairedForms = rawLine.match(PAIRED_FORMS_RE);
+	if (!pairedForms?.groups?.forms) return;
+	matches.push(createSurfaceMatch("pairedForms", page, rawLine, lineNumber, pairedForms.groups.forms));
+};
+const scanCodeLine = (page, rawLine, lineNumber, matches) => {
+	const codeLine = rawLine.match(CODE_LINE_RE);
+	if (!codeLine?.groups?.codes) return;
+	matches.push(createSurfaceMatch("codeLine", page, rawLine, lineNumber, codeLine.groups.codes));
+};
+const scanInlineSubentries = (page, rawLine, lineNumber, matches) => {
+	for (const match of rawLine.matchAll(INLINE_SUBENTRY_RE)) {
+		if (!match.groups?.lemma) continue;
+		matches.push(createSurfaceMatch("inlineSubentry", page, match.groups.lemma, lineNumber, match.groups.lemma));
+	}
+};
+/**
+* Extracts dictionary surface matches from a markdown page.
+*/
+const scanDictionaryMarkdownPage = (page) => {
+	const lines = page.content.split(/\n/u);
+	const matches = [];
+	for (let index = 0; index < lines.length; index++) {
+		const rawLine = lines[index]?.trim() ?? "";
+		if (!rawLine) continue;
+		if (scanHeadingLine(page, rawLine, index + 1, matches)) continue;
+		scanLineEntry(page, rawLine, index + 1, matches);
+		scanPairedForms(page, rawLine, index + 1, matches);
+		scanCodeLine(page, rawLine, index + 1, matches);
+		scanInlineSubentries(page, rawLine, index + 1, matches);
+	}
+	return matches;
+};
+/**
+* Aggregates dictionary surface counts across markdown pages.
+*/
+const analyzeDictionaryMarkdownPages = (pages) => {
+	const counts = emptyCounts();
+	const matches = [];
+	for (const page of pages) {
+		const pageMatches = scanDictionaryMarkdownPage(page);
+		for (const match of pageMatches) {
+			counts[match.kind] += 1;
+			matches.push(match);
+		}
+	}
+	return {
+		counts,
+		matches
+	};
+};
+//#endregion
+//#region src/dictionary/profile.ts
+const normalizedProfileCache = /* @__PURE__ */ new WeakMap();
+const normalizeStopLemmaWord = (word) => normalizeArabicForComparison(word).replace(/^[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+/gu, "").replace(/[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+$/gu, "").trim();
+const uniqueNormalizedSet = (values, normalize) => new Set(values.map(normalize).filter(Boolean));
+const assertNever$2 = (value) => {
+	throw new Error(`Unhandled dictionary profile variant: ${JSON.stringify(value)}`);
+};
+const normalizeFamily = (family) => {
+	switch (family.use) {
+		case "heading": return {
+			...family,
+			allowNextLineColon: family.allowNextLineColon ?? false,
+			allowSingleLetter: family.allowSingleLetter ?? false
+		};
+		case "lineEntry": return {
+			...family,
+			allowMultiWord: family.allowMultiWord ?? false,
+			allowWhitespaceBeforeColon: family.allowWhitespaceBeforeColon ?? false,
+			wrappers: family.wrappers ?? "none"
+		};
+		case "inlineSubentry": return {
+			...family,
+			prefixes: family.prefixes ?? ["و"],
+			stripPrefixesFromLemma: family.stripPrefixesFromLemma ?? true
+		};
+		case "codeLine": return {
+			...family,
+			wrappers: family.wrappers ?? "either"
+		};
+		case "pairedForms": return {
+			...family,
+			requireStatusTail: family.requireStatusTail ?? false,
+			separator: family.separator ?? "comma"
+		};
+		default: return assertNever$2(family);
+	}
+};
+const normalizeBlocker = (blocker) => {
+	switch (blocker.use) {
+		case "authorityIntro": return {
+			...blocker,
+			precision: blocker.precision ?? "high"
+		};
+		case "stopLemma": return {
+			...blocker,
+			normalizedWords: uniqueNormalizedSet(blocker.words, normalizeStopLemmaWord)
+		};
+		case "previousWord": return {
+			...blocker,
+			normalizedWords: uniqueNormalizedSet(blocker.words, normalizeArabicForComparison)
+		};
+		case "previousChar": return {
+			...blocker,
+			charSet: new Set(blocker.chars)
+		};
+		case "intro":
+		case "pageContinuation": return blocker;
+		default: return assertNever$2(blocker);
+	}
+};
+const normalizeZone = (zone) => ({
+	blockers: (zone.blockers ?? []).map(normalizeBlocker),
+	families: zone.families.map(normalizeFamily),
+	name: zone.name,
+	when: zone.when ? {
+		activateAfter: zone.when.activateAfter,
+		maxPageId: zone.when.maxPageId,
+		minPageId: zone.when.minPageId
+	} : void 0
+});
+const createIssue$1 = (code, path, message, zoneName) => ({
+	code,
+	message,
+	path,
+	...zoneName ? { zoneName } : {}
+});
+const validateGate = (gate, zone, gateIndex, seenActivateAfterKeys, issues) => {
+	const gatePath = `zones[].when.activateAfter[${gateIndex}]`.replace("[]", `[${zone.name}]`);
+	if (gate.use === "headingText") {
+		if (!gate.match.trim()) issues.push(createIssue$1("invalid_gate_match", `${gatePath}.match`, `dictionary gate match must be non-empty`, zone.name));
+		if (gate.fuzzy !== void 0 && typeof gate.fuzzy !== "boolean") issues.push(createIssue$1("invalid_gate_fuzzy", `${gatePath}.fuzzy`, `dictionary gate fuzzy must be a boolean when provided`, zone.name));
+	}
+	const dedupeKey = `${gate.use}:${JSON.stringify(gate)}`;
+	if (seenActivateAfterKeys.has(dedupeKey)) issues.push(createIssue$1("duplicate_activate_after_gate", gatePath, `dictionary zone "${zone.name}" has duplicate activateAfter gates`, zone.name));
+	seenActivateAfterKeys.add(dedupeKey);
+};
+const validateFamily = (family, zone, familyIndex, issues) => {
+	const familyPath = `zones[].families[${familyIndex}]`.replace("[]", `[${zone.name}]`);
+	switch (family.use) {
+		case "heading":
+			if (family.classes.length === 0) issues.push(createIssue$1("empty_heading_classes", `${familyPath}.classes`, `dictionary heading family in zone "${zone.name}" must include at least one class`, zone.name));
+			if (family.emit === "chapter" && !family.classes.includes("chapter")) issues.push(createIssue$1("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "chapter" but never matches chapter headings`, zone.name));
+			if (family.emit === "marker" && !family.classes.includes("marker")) issues.push(createIssue$1("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "marker" but never matches marker headings`, zone.name));
+			if (family.emit === "entry" && !family.classes.includes("entry")) issues.push(createIssue$1("inert_heading_family", familyPath, `dictionary heading family in zone "${zone.name}" emits "entry" but never matches entry headings`, zone.name));
+			break;
+		case "lineEntry": break;
+		case "inlineSubentry":
+			if (family.prefixes?.some((prefix) => !prefix.trim())) issues.push(createIssue$1("empty_inline_prefixes", `${familyPath}.prefixes`, `inlineSubentry prefixes must be non-empty strings`, zone.name));
+			break;
+		case "codeLine": break;
+		case "pairedForms": break;
+		default: assertNever$2(family);
+	}
+};
+const validateBlocker = (blocker, zone, blockerIndex, issues) => {
+	const blockerPath = `zones[].blockers[${blockerIndex}]`.replace("[]", `[${zone.name}]`);
+	switch (blocker.use) {
+		case "stopLemma":
+			if (blocker.words.length === 0 || blocker.words.some((word) => !word.trim())) issues.push(createIssue$1("invalid_stop_words", `${blockerPath}.words`, `stopLemma blocker in zone "${zone.name}" must include non-empty words`, zone.name));
+			break;
+		case "previousWord":
+			if (blocker.words.length === 0 || blocker.words.some((word) => !word.trim())) issues.push(createIssue$1("invalid_previous_words", `${blockerPath}.words`, `previousWord blocker in zone "${zone.name}" must include non-empty words`, zone.name));
+			break;
+		case "previousChar":
+			if (blocker.chars.length === 0 || blocker.chars.some((char) => !char)) issues.push(createIssue$1("invalid_previous_chars", `${blockerPath}.chars`, `previousChar blocker in zone "${zone.name}" must include chars`, zone.name));
+			break;
+		case "authorityIntro":
+		case "intro":
+		case "pageContinuation": break;
+		default: assertNever$2(blocker);
+	}
+};
+var DictionaryProfileValidationError = class extends Error {
+	issues;
+	constructor(issues) {
+		super(issues.length === 1 ? issues[0].message : `Dictionary profile validation failed with ${issues.length} issues`);
+		this.name = "DictionaryProfileValidationError";
+		this.issues = issues;
+	}
+};
+const validateZone = (zone, zoneIndex, seenZoneNames, issues) => {
+	const zonePath = `zones[${zoneIndex}]`;
+	const trimmedName = zone.name.trim();
+	if (!trimmedName) issues.push(createIssue$1("empty_zone_name", `${zonePath}.name`, `dictionary zone name must be non-empty`));
+	else if (seenZoneNames.has(trimmedName)) issues.push(createIssue$1("duplicate_zone_name", `${zonePath}.name`, `dictionary zone names must be unique; duplicated "${trimmedName}"`, trimmedName));
+	else seenZoneNames.add(trimmedName);
+	if (zone.families.length === 0) issues.push(createIssue$1("empty_zone_families", `${zonePath}.families`, `dictionary zone "${zone.name}" must declare at least one family`, zone.name));
+	if (zone.when?.minPageId !== void 0 && zone.when?.maxPageId !== void 0 && zone.when.minPageId > zone.when.maxPageId) issues.push(createIssue$1("invalid_zone_page_range", `${zonePath}.when`, `dictionary zone "${zone.name}" has minPageId greater than maxPageId`, zone.name));
+	const seenActivateAfterKeys = /* @__PURE__ */ new Set();
+	for (let gateIndex = 0; gateIndex < (zone.when?.activateAfter?.length ?? 0); gateIndex++) validateGate(zone.when.activateAfter[gateIndex], zone, gateIndex, seenActivateAfterKeys, issues);
+	for (let familyIndex = 0; familyIndex < zone.families.length; familyIndex++) validateFamily(zone.families[familyIndex], zone, familyIndex, issues);
+	for (let blockerIndex = 0; blockerIndex < (zone.blockers?.length ?? 0); blockerIndex++) validateBlocker(zone.blockers[blockerIndex], zone, blockerIndex, issues);
+};
+/**
+* Validates a dictionary profile without normalizing it.
+*/
+const validateDictionaryProfile = (profile) => {
+	const issues = [];
+	if (profile.version !== 2) issues.push(createIssue$1("invalid_version", "version", `dictionary profile version must be 2, got ${profile.version}`));
+	if (profile.zones.length === 0) {
+		issues.push(createIssue$1("missing_zones", "zones", `dictionary profile must contain at least one zone`));
+		return issues;
+	}
+	const seenZoneNames = /* @__PURE__ */ new Set();
+	for (let zoneIndex = 0; zoneIndex < profile.zones.length; zoneIndex++) validateZone(profile.zones[zoneIndex], zoneIndex, seenZoneNames, issues);
+	return issues;
+};
+/**
+* Normalizes and validates a dictionary profile before runtime matching.
+*/
+const normalizeDictionaryProfile = (profile) => {
+	const cached = normalizedProfileCache.get(profile);
+	if (cached) return cached;
+	const issues = validateDictionaryProfile(profile);
+	if (issues.length > 0) throw new DictionaryProfileValidationError(issues);
+	const normalized = {
+		version: 2,
+		zones: profile.zones.map(normalizeZone)
+	};
+	normalizedProfileCache.set(profile, normalized);
+	return normalized;
+};
+//#endregion
 //#region src/types/rules.ts
 /**
 * Pattern type key names for split rules.
@@ -1300,9 +1746,850 @@ const PATTERN_TYPE_KEYS = [
 	"lineStartsAfter",
 	"lineEndsWith",
 	"template",
-	"regex"
+	"regex",
+	"dictionaryEntry"
 ];
 //#endregion
+//#region src/segmentation/debug-meta.ts
+const resolveDebugConfig = (debug) => {
+	if (debug === true) return {
+		includeBreakpoint: true,
+		includeRule: true,
+		metaKey: "_flappa"
+	};
+	if (!debug || typeof debug !== "object") return null;
+	const { metaKey, include } = debug;
+	const includeRule = Array.isArray(include) ? include.includes("rule") : true;
+	return {
+		includeBreakpoint: Array.isArray(include) ? include.includes("breakpoint") : true,
+		includeRule,
+		metaKey: typeof metaKey === "string" && metaKey ? metaKey : "_flappa"
+	};
+};
+const getRulePatternType = (rule) => {
+	return PATTERN_TYPE_KEYS.find((key) => key in rule) ?? "regex";
+};
+const isPlainObject$1 = (v) => Boolean(v) && typeof v === "object" && !Array.isArray(v);
+const mergeDebugIntoMeta = (meta, metaKey, patch) => {
+	const out = meta ? { ...meta } : {};
+	const existing = out[metaKey];
+	out[metaKey] = {
+		...isPlainObject$1(existing) ? existing : {},
+		...patch
+	};
+	return out;
+};
+const buildRuleDebugPatch = (ruleIndex, rule, wordIndex) => {
+	const patternType = getRulePatternType(rule);
+	const patterns = rule[patternType];
+	const word = wordIndex !== void 0 && Array.isArray(patterns) && patterns[wordIndex] !== void 0 ? patterns[wordIndex] : void 0;
+	return { rule: {
+		index: ruleIndex,
+		patternType,
+		...wordIndex !== void 0 ? { wordIndex } : {},
+		...word !== void 0 ? { word } : {}
+	} };
+};
+const buildBreakpointDebugPatch = (breakpointIndex, rule, wordIndex) => ({ breakpoint: {
+	index: breakpointIndex,
+	kind: rule.pattern === "" ? "pageBoundary" : rule.regex ? "regex" : "pattern",
+	pattern: rule.pattern ?? rule.regex,
+	...wordIndex !== void 0 ? { wordIndex } : {},
+	...wordIndex !== void 0 && rule.words ? { word: rule.words[wordIndex] } : {}
+} });
+/**
+* Helper to format the debug info into a human-readable string.
+* @param meta - The segment metadata object
+* @param options - Formatting options
+*/
+const formatRuleReason = (rule, concise) => {
+	const { index, patternType, wordIndex, word } = rule;
+	if (concise) return `Rule: ${word ? `"${word}"` : patternType}`;
+	const wordInfo = word ? ` (Matched: "${word}")` : "";
+	return `Rule #${index} (${patternType})${wordIndex !== void 0 ? ` [idx:${wordIndex}]` : ""}${wordInfo}`;
+};
+const formatBreakpointReason = (breakpoint, concise) => {
+	const { index, kind, pattern, wordIndex, word } = breakpoint;
+	if (kind === "pageBoundary") return concise ? "Breakpoint: <page-boundary>" : "Page Boundary (Fallback)";
+	if (concise) return `Breakpoint: ${word ? `"${word}"` : `"${pattern}"`}`;
+	if (word) return `Breakpoint #${index} (Words) [idx:${wordIndex}] - "${word}"`;
+	return `Breakpoint #${index} (${kind}) - "${pattern}"`;
+};
+const formatContentLengthReason = (split, concise) => {
+	const { maxContentLength, splitReason } = split;
+	if (concise) return `> ${maxContentLength} (${splitReason})`;
+	return `Safety Split (${splitReason}) > ${maxContentLength}`;
+};
+/**
+* Helper to format the debug info into a human-readable string.
+* @param meta - The segment metadata object
+* @param options - Formatting options
+*/
+const getDebugReason = (meta, options) => {
+	const debug = meta?._flappa;
+	if (!debug) return "-";
+	const concise = options?.concise;
+	if (debug.rule) return formatRuleReason(debug.rule, concise);
+	if (debug.breakpoint) return formatBreakpointReason(debug.breakpoint, concise);
+	if (debug.contentLengthSplit) return formatContentLengthReason(debug.contentLengthSplit, concise);
+	return "Unknown";
+};
+/**
+* Convenience helper to get the formatted debug reason directly from a segment.
+* @param segment - The segment object
+* @param options - Formatting options
+*/
+const getSegmentDebugReason = (segment, options) => {
+	return getDebugReason(segment.meta, options);
+};
+//#endregion
+//#region src/dictionary/runtime.ts
+const INTRO_PHRASES = [
+	"وقال",
+	"قال",
+	"وفي الحديث",
+	"في الحديث",
+	"وفي حديث",
+	"في حديث",
+	"وفي رواية",
+	"في رواية",
+	"وفي قراءة",
+	"في قراءة",
+	"وفي قول",
+	"في قول",
+	"وفي كلام",
+	"في كلام",
+	"ومنه قول",
+	"ومنها قول",
+	"وقرأ",
+	"قرأ",
+	"قراءة",
+	"حديث",
+	"ويقال",
+	"وقيل",
+	"قلت",
+	"فقال",
+	"قال الشاعر",
+	"أنشد",
+	"وأنشد"
+];
+const INTRO_TAIL_PHRASES = [
+	"بفتح",
+	"بالفتح",
+	"بكسر",
+	"بالكسر",
+	"بضم",
+	"بالضم",
+	"بالتحريك",
+	"حديث",
+	"الحديث",
+	"في التنزيل",
+	"وفي التنزيل",
+	"في التنزيل العزيز",
+	"وفي التنزيل العزيز",
+	"في مقتل",
+	"وفي مقتل",
+	"في المجاز",
+	"وفي المجاز",
+	"من المجاز",
+	"ومن المجاز",
+	"في رواية",
+	"وفي رواية",
+	"في قراءة",
+	"وفي قراءة",
+	"في قول",
+	"وفي قول",
+	"في كلام",
+	"وفي كلام",
+	"في صفة",
+	"وفي صفة",
+	"في خطبته",
+	"وفي خطبته",
+	"ومنه قول",
+	"ومنها قول",
+	"يقال لرقبة",
+	"على جهتين",
+	"قوله جل",
+	"قوله جل وعز",
+	"جل وعز",
+	"ومنه حديث",
+	"ومنه الحديث",
+	"كرم الله",
+	"صلى الله عليه",
+	"رضي الله عنه",
+	"رضي الله عنها",
+	"رضي الله عنهما",
+	"قال ابو",
+	"وقال ابو",
+	"عن ابي",
+	"قال ابن",
+	"وقال ابن",
+	"عن ابن"
+];
+const INTRO_TAIL_PATTERNS = [
+	/(?:^|\s)(?:في|وفي|ومنه|ومنها)\s+(?:حديث|الحديث|رواية|قراءة|قول|كلام|مقتل|صفة|خطبته)(?:\s+\S+){0,8}$/u,
+	/(?:^|\s)(?:حديث|الحديث|رواية|قراءة|قول|كلام)(?:\s+\S+){1,8}$/u,
+	/(?:^|\s)(?:قوله|قول(?:ه|هم)?|قال(?:\s+قائل)?|وقرأ|قرأ|قراءة)\s+(?:جل(?:\s+وعز)?|[^\s]+)$/u,
+	/(?:^|\s)(?:ابو|ابي|ابا|ابن|بن|بنت)(?:\s+\S+){1,4}$/u,
+	/(?:^|\s)(?:قال|وقال|انشد|وانشد|روي|وروي|اخبر|واخبر)(?:\s+\S+){0,4}$/u
+];
+const QUALIFIER_TAIL_PREFIXES = [
+	"أي",
+	"قال",
+	"تقول",
+	"يقال",
+	"يقول",
+	"يريد",
+	"يُريد",
+	"ويقال",
+	"ويقول",
+	"وجمعه",
+	"وجمعها",
+	"والجميع",
+	"والجمع"
+];
+const STRUCTURAL_LEMMA_PREFIXES = [
+	"لجزء",
+	"جزء",
+	"ومما يستدرك عليه",
+	"آخر حرف",
+	"كتاب حرف"
+];
+const STRUCTURAL_LINE_PATTERNS = [
+	/^\d+\s*-\s*\(.+\)$/u,
+	/^\(.+\)$/u,
+	/^\(.+\)\s*##\s*/u
+];
+const STRUCTURAL_LINE_KEYWORDS = [
+	"باب",
+	"فصل",
+	"حرف",
+	"أبواب",
+	"كتاب",
+	"المعجمة",
+	"المهملة",
+	"المثناة"
+];
+const CONTINUATION_PREV_WORDS = [
+	"بفتح",
+	"بالفتح",
+	"بكسر",
+	"بالكسر",
+	"بضم",
+	"بالضم",
+	"بالتحريك",
+	"قال",
+	"وقال",
+	"وقيل",
+	"ويقال",
+	"يقال",
+	"قلت",
+	"فقال",
+	"قالوا",
+	"من",
+	"في",
+	"على",
+	"إذا",
+	"نحو",
+	"ثم",
+	"وجل"
+];
+const AUTHORITY_RE = /^(?:(?:و)?قال\s+(?:أبو|ابن|ثعلب|الليث|الأزهري|الجوهري|الفراء)\b|(?:أبو|ابن|ثعلب|الليث|الأزهري|الجوهري|الفراء)\s+\S+)/u;
+const AUTHORITY_HEAD_WORDS = [
+	"الأزهري",
+	"الأصمعي",
+	"الأشجعي",
+	"الأموي",
+	"الأمويّ",
+	"الجوهري",
+	"الرياشي",
+	"الزجاج",
+	"الزجاجي",
+	"الشيباني",
+	"الفراء",
+	"الكسائي",
+	"اللحياني",
+	"الليث",
+	"المبرد",
+	"المنذري",
+	"ثعلب",
+	"شمر"
+];
+const STRONG_SENTENCE_TERMINATORS$1 = /[.!?؟؛۔…]$/u;
+const TRAILING_PAGE_WRAP_NOISE$1 = /[\s\u0660-\u0669\d«»"“”'‘’()[\]{}<>]+$/u;
+const TRAILING_WORD_DELIMITERS$1 = /[\s\u0660-\u0669\d«»"“”'‘’()[\]{}<>.,!?؟؛،:]+$/u;
+const ARABIC_WORD_REGEX$1 = new RegExp(ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, "gu");
+const HEADING_PREFIX = "## ";
+const CODE_LINE_PATTERN = getTokenPattern("harfs").replaceAll("\\s+", "[ \\t]+");
+const BARE_CODE_LEMMA_RE = new RegExp(`^(?:${CODE_LINE_PATTERN})$`, "u");
+const STATUS_TAIL_PATTERN = "(?:مستعمل|مستعملة|مستعملان|مهمل|مهملة)";
+const GATE_TOKEN_MAP = {
+	bab: "باب",
+	fasl: "فصل",
+	kitab: "كتاب"
+};
+const GATE_DELIMITER_RE = /[\s:،؛()[\]{}\-–—]/u;
+const assertNever$1 = (value) => {
+	throw new Error(`Unhandled dictionary runtime variant: ${JSON.stringify(value)}`);
+};
+const lineEntryRegexCache = /* @__PURE__ */ new WeakMap();
+const inlineSubentryRegexCache = /* @__PURE__ */ new WeakMap();
+const pairedFormsRegexCache = /* @__PURE__ */ new WeakMap();
+const trimTrailingPageWrapNoise$1 = (text) => text.trimEnd().replace(TRAILING_PAGE_WRAP_NOISE$1, "");
+const endsWithStrongSentenceTerminator$1 = (pageContent) => {
+	return STRONG_SENTENCE_TERMINATORS$1.test(trimTrailingPageWrapNoise$1(pageContent));
+};
+const extractLastArabicWord$1 = (text, endExclusive = text.length) => {
+	const windowStart = Math.max(0, endExclusive - 256);
+	const withoutTrailingDelimiters = trimTrailingPageWrapNoise$1(text.slice(windowStart, endExclusive)).replace(TRAILING_WORD_DELIMITERS$1, "");
+	let lastMatch = "";
+	ARABIC_WORD_REGEX$1.lastIndex = 0;
+	for (const match of withoutTrailingDelimiters.matchAll(ARABIC_WORD_REGEX$1)) lastMatch = match[0];
+	return lastMatch;
+};
+const previousNonWhitespaceChar = (text, endExclusive = text.length) => {
+	for (let index = endExclusive - 1; index >= 0; index--) {
+		const char = text[index];
+		if (char && !/\s/u.test(char)) return char;
+	}
+	return "";
+};
+const normalizedEquals = (left, right) => normalizeArabicForComparison(left) === normalizeArabicForComparison(right);
+const normalizedStartsWith = (text, prefix) => normalizeArabicForComparison(text).startsWith(normalizeArabicForComparison(prefix));
+const normalizeStopLemma = (text) => normalizeArabicForComparison(text).replace(/^[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+/gu, "").replace(/[\s:؛،,.!?؟()[\]{}«»"'“”‘’]+$/gu, "").trim();
+const getTrailingContext = (text, endExclusive, maxChars = 240) => text.slice(Math.max(0, endExclusive - maxChars), endExclusive);
+const isDelimitedPrefixMatch = (text, prefix) => {
+	if (text === prefix) return true;
+	if (!text.startsWith(prefix)) return false;
+	const nextChar = text[prefix.length];
+	return nextChar === void 0 || GATE_DELIMITER_RE.test(nextChar);
+};
+const createPageContexts = (pages, pageMap, normalizedPages) => {
+	if (normalizedPages && normalizedPages.length !== pages.length) throw new Error(`Dictionary runtime expected ${pages.length} normalized pages, received ${normalizedPages.length}`);
+	if (pageMap.boundaries.length !== pages.length) throw new Error(`Dictionary runtime expected ${pages.length} page boundaries, received ${pageMap.boundaries.length}`);
+	const contexts = [];
+	for (let index = 0; index < pages.length; index++) {
+		const page = pages[index];
+		const boundary = pageMap.boundaries[index];
+		if (!page || !boundary) throw new Error(`Dictionary runtime encountered a missing page or boundary at index ${index}`);
+		const content = normalizedPages?.[index] ?? normalizeLineEndings(page.content);
+		contexts.push({
+			boundary,
+			content,
+			index,
+			lines: buildPageLines(content),
+			page
+		});
+	}
+	return contexts;
+};
+const normalizeIntroContextText = (text) => normalizeArabicForComparison(text).replace(/[\\/]+/gu, " ").replace(/[«»"“”'‘’()[\]{}]+/gu, " ").replace(/\s+/gu, " ").trim();
+const startsWithConfiguredWord = (words, candidate) => words.some((word) => normalizedStartsWith(candidate, word));
+const buildPageLines = (content) => {
+	const parts = content.split("\n");
+	const lines = [];
+	let offset = 0;
+	for (let index = 0; index < parts.length; index++) {
+		const text = parts[index] ?? "";
+		lines.push({
+			lineNumber: index + 1,
+			start: offset,
+			text
+		});
+		offset += text.length + 1;
+	}
+	return lines;
+};
+const headingMatchesGate = (headingText, gate) => {
+	if (gate.use === "headingText") {
+		const useFuzzy = gate.fuzzy ?? false;
+		const source = useFuzzy ? normalizeArabicForComparison(headingText) : headingText.trim();
+		const match = useFuzzy ? normalizeArabicForComparison(gate.match) : gate.match.trim();
+		return !!match && isDelimitedPrefixMatch(source, match);
+	}
+	return normalizedStartsWith(headingText, GATE_TOKEN_MAP[gate.token]);
+};
+const pageMatchesAnyGate = (page, gates) => page.lines.some((line) => {
+	const trimmed = line.text.trim();
+	if (!trimmed.startsWith(HEADING_PREFIX)) return false;
+	const headingText = trimmed.replace(/^##\s+/u, "").trim();
+	return gates.some((gate) => headingMatchesGate(headingText, gate));
+});
+const pageWithinZoneBounds = (zone, pageId) => {
+	if (zone.when?.minPageId !== void 0 && pageId < zone.when.minPageId) return false;
+	if (zone.when?.maxPageId !== void 0 && pageId > zone.when.maxPageId) return false;
+	return true;
+};
+const findActivationPageId = (zone, pages) => {
+	for (const page of pages) {
+		if (!pageWithinZoneBounds(zone, page.page.id)) continue;
+		if (pageMatchesAnyGate(page, zone.when?.activateAfter ?? [])) return page.page.id;
+	}
+	return null;
+};
+const createZoneActivationMap = (profile, pages) => {
+	const activation = /* @__PURE__ */ new Map();
+	for (const zone of profile.zones) {
+		if (!zone.when?.activateAfter?.length) {
+			activation.set(zone.name, null);
+			continue;
+		}
+		activation.set(zone.name, findActivationPageId(zone, pages));
+	}
+	return activation;
+};
+const pageMatchesZone = (zone, activationMap, pageId) => {
+	if (zone.when?.minPageId !== void 0 && pageId < zone.when.minPageId) return false;
+	if (zone.when?.maxPageId !== void 0 && pageId > zone.when.maxPageId) return false;
+	if (!zone.when?.activateAfter?.length) return true;
+	const activatedAt = activationMap.get(zone.name);
+	return activatedAt !== null && activatedAt !== void 0 && pageId >= activatedAt;
+};
+const resolveActiveZone = (profile, activationMap, pageId) => {
+	let activeZone = null;
+	for (const zone of profile.zones) if (pageMatchesZone(zone, activationMap, pageId)) activeZone = zone;
+	return activeZone;
+};
+const createHeadingCandidate = (pageStartOffset, line, nextLine, family, headingClass) => {
+	if (!family.classes.includes(headingClass)) return null;
+	const headingText = line.text.trim().slice(3).trim();
+	if (!family.allowSingleLetter && headingClass === "entry" && headingText.length <= 1) return null;
+	if (headingClass === "entry" && !family.allowNextLineColon && nextLine?.text.trimStart().startsWith(":")) return null;
+	return {
+		absoluteIndex: pageStartOffset + line.start,
+		contentStartOffset: 3,
+		family: "heading",
+		headingClass,
+		kind: family.emit,
+		lemma: family.emit === "entry" ? headingText : void 0,
+		lineNumber: line.lineNumber,
+		localIndex: line.start,
+		probeText: line.text.trim(),
+		text: line.text.trim()
+	};
+};
+const optionalSecondWord = (allowMultiWord) => allowMultiWord ? `(?:\\s+${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})?` : "";
+const wrappedWordPattern = (open, close, allowMultiWord) => `${open}${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}${optionalSecondWord(allowMultiWord)}${close}`;
+const bareWordPattern = (allowMultiWord) => `${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}${optionalSecondWord(allowMultiWord)}`;
+const STATUS_LINE_RE = new RegExp(`^(?:${CODE_LINE_PATTERN}|${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}(?:\\s*[،,]\\s*${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})+)\\s*:?[\\s]*${STATUS_TAIL_PATTERN}(?=$|[.،,:؛\\s])`, "u");
+const createLineEntryRegex = (family) => {
+	const cached = lineEntryRegexCache.get(family);
+	if (cached) return cached;
+	const wrapperPattern = family.wrappers === "parentheses" ? wrappedWordPattern("\\(", "\\)", family.allowMultiWord) : family.wrappers === "brackets" ? wrappedWordPattern("\\[", "\\]", family.allowMultiWord) : family.wrappers === "curly" ? wrappedWordPattern("\\{", "\\}", family.allowMultiWord) : family.wrappers === "any" ? `(?:${wrappedWordPattern("\\(", "\\)", family.allowMultiWord)}|${wrappedWordPattern("\\[", "\\]", family.allowMultiWord)}|${wrappedWordPattern("\\{", "\\}", family.allowMultiWord)})` : bareWordPattern(family.allowMultiWord);
+	const colonSpacing = family.allowWhitespaceBeforeColon ? "\\s*:" : ":";
+	const regex = new RegExp(`^(?<lemma>${wrapperPattern})${colonSpacing}`, "u");
+	lineEntryRegexCache.set(family, regex);
+	return regex;
+};
+const collectLineEntryCandidates = (pageStartOffset, line, family) => {
+	const trimmed = line.text.trim();
+	if (STATUS_LINE_RE.test(trimmed)) return [];
+	const match = trimmed.match(createLineEntryRegex(family));
+	if (!match?.groups?.lemma) return [];
+	return [{
+		absoluteIndex: pageStartOffset + line.start,
+		family: "lineEntry",
+		kind: "entry",
+		lemma: match.groups.lemma.replace(/^[[{(]+|[\])}]+$/gu, "").trim(),
+		lineNumber: line.lineNumber,
+		localIndex: line.start,
+		probeText: trimmed,
+		text: trimmed
+	}];
+};
+const collectInlineSubentryCandidates = (pageStartOffset, line, family) => {
+	const cached = inlineSubentryRegexCache.get(family);
+	const prefixes = family.prefixes.length > 0 ? family.prefixes.map(escapeRegex).join("|") : escapeRegex("و");
+	const regex = cached ?? new RegExp(`(^|[\\s،؛,:.])(?<lemma>(?:${prefixes})${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})\\s*:`, "gu");
+	if (!cached) inlineSubentryRegexCache.set(family, regex);
+	const candidates = [];
+	for (const match of line.text.matchAll(regex)) {
+		if (!match.groups?.lemma || match.index === void 0) continue;
+		const lemmaIndex = match[0].indexOf(match.groups.lemma);
+		if (lemmaIndex < 0) continue;
+		const candidateStart = match.index + lemmaIndex;
+		const lemma = family.stripPrefixesFromLemma ? match.groups.lemma.replace(new RegExp(`^(?:${prefixes})`, "u"), "") : match.groups.lemma;
+		candidates.push({
+			absoluteIndex: pageStartOffset + line.start + candidateStart,
+			family: "inlineSubentry",
+			kind: "entry",
+			lemma,
+			lineNumber: line.lineNumber,
+			localIndex: line.start + candidateStart,
+			probeText: line.text.slice(candidateStart).trimStart(),
+			text: line.text.trim()
+		});
+	}
+	return candidates;
+};
+const CODE_CORE_RE = new RegExp(`^${CODE_LINE_PATTERN}$`, "u");
+const STATUS_SUFFIX_RE = new RegExp(`(?:\\s*:?[\\s]*${STATUS_TAIL_PATTERN}.*)?$`, "u");
+const parseWrappedCode = (text) => {
+	const paired = text.match(/^(?<open>[[(])(?<inner>.+)(?<close>[\])])$/u);
+	if (!paired?.groups?.inner || !paired.groups.open || !paired.groups.close) return null;
+	return {
+		close: paired.groups.close,
+		inner: paired.groups.inner.trim(),
+		open: paired.groups.open,
+		paired: paired.groups.open === "(" && paired.groups.close === ")" || paired.groups.open === "[" && paired.groups.close === "]"
+	};
+};
+const collectCodeLineCandidates = (pageStartOffset, line, family) => {
+	const trimmed = line.text.trim();
+	const bare = trimmed.replace(STATUS_SUFFIX_RE, "").trim();
+	const wrapped = parseWrappedCode(bare);
+	const inner = wrapped?.inner ?? bare;
+	if (!CODE_CORE_RE.test(inner)) return [];
+	if (!(family.wrappers === "either" ? true : family.wrappers === "none" ? wrapped === null : family.wrappers === "paired" ? wrapped?.paired === true : wrapped !== null && !wrapped.paired)) return [];
+	return [{
+		absoluteIndex: pageStartOffset + line.start,
+		family: "codeLine",
+		kind: "marker",
+		lemma: inner,
+		lineNumber: line.lineNumber,
+		localIndex: line.start,
+		probeText: trimmed,
+		text: trimmed
+	}];
+};
+const collectPairedFormsCandidates = (pageStartOffset, line, family) => {
+	const cached = pairedFormsRegexCache.get(family);
+	const separator = family.separator === "space" ? "\\s+" : "\\s*[،,]\\s*";
+	const statusTail = family.requireStatusTail ? "\\s*:\\s*(?:مستعمل|مستعملة|مستعملان|مهمل|مهملة).*" : "\\s*:";
+	const regex = cached ?? new RegExp(`^(?<forms>${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN}(?:${separator}${ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN})+)${statusTail}`, "u");
+	if (!cached) pairedFormsRegexCache.set(family, regex);
+	const match = line.text.trim().match(regex);
+	if (!match?.groups?.forms) return [];
+	return [{
+		absoluteIndex: pageStartOffset + line.start,
+		family: "pairedForms",
+		kind: family.emit,
+		lemma: family.emit === "entry" ? match.groups.forms : void 0,
+		lineNumber: line.lineNumber,
+		localIndex: line.start,
+		probeText: line.text.trim(),
+		text: line.text.trim()
+	}];
+};
+const blockerApplies = (blocker, family) => !blocker.appliesTo || blocker.appliesTo.includes(family);
+const isIntroCandidate = (text) => {
+	const normalized = normalizeIntroContextText(text);
+	return INTRO_PHRASES.some((phrase) => normalized.startsWith(normalizeArabicForComparison(phrase)));
+};
+const endsWithIntroPhrase = (text) => {
+	const trimmed = text.trimEnd();
+	if (STRONG_SENTENCE_TERMINATORS$1.test(trimmed)) return false;
+	const normalized = normalizeIntroContextText(trimmed).trimEnd().replace(/[:؛،,.!?؟]+$/u, "").trimEnd();
+	return INTRO_PHRASES.some((phrase) => normalized.endsWith(normalizeArabicForComparison(phrase)));
+};
+const endsWithIntroContext = (text) => {
+	const trimmed = text.trimEnd();
+	if (STRONG_SENTENCE_TERMINATORS$1.test(trimmed)) return false;
+	const normalized = normalizeIntroContextText(trimmed).trimEnd().replace(/[:؛،,.!?؟]+$/u, "").trimEnd();
+	if (!normalized) return false;
+	if (INTRO_PHRASES.some((phrase) => normalized.endsWith(normalizeArabicForComparison(phrase)))) return true;
+	if (INTRO_TAIL_PHRASES.some((phrase) => normalized.endsWith(normalizeArabicForComparison(phrase)))) return true;
+	return INTRO_TAIL_PATTERNS.some((pattern) => pattern.test(normalized));
+};
+const isAuthorityCandidate = (text, precision) => {
+	const head = normalizeStopLemma(text.split(":", 1)[0] ?? text);
+	if (head && AUTHORITY_HEAD_WORDS.some((term) => normalizeStopLemma(term) === head)) return true;
+	if (AUTHORITY_RE.test(text)) return true;
+	if (precision === "aggressive") {
+		const normalized = normalizeIntroContextText(text);
+		return [
+			"الليث",
+			"الأزهري",
+			"الأصمعي",
+			"الجوهري",
+			"الفراء",
+			"ثعلب",
+			"شمر"
+		].some((term) => normalized.startsWith(normalizeArabicForComparison(term)));
+	}
+	return false;
+};
+const hasBlockedQualifierTail = (lemma) => {
+	const parts = lemma.split(/[،,]/u).map((part) => part.trim()).filter(Boolean);
+	if (parts.length < 2) return false;
+	return startsWithConfiguredWord(QUALIFIER_TAIL_PREFIXES, parts.slice(1).join(" "));
+};
+const looksLikeStructuralLeak = (candidate) => {
+	if (!candidate.lemma) return false;
+	const normalizedLemma = normalizeArabicForComparison(candidate.lemma);
+	if (candidate.kind === "entry" && (/^[^\p{Script=Arabic}\d]+/u.test(candidate.lemma) || candidate.lemma.includes("{") || candidate.lemma.includes("}") || candidate.lemma.includes("##"))) return true;
+	if (candidate.kind === "entry" && BARE_CODE_LEMMA_RE.test(candidate.lemma) && (candidate.text === candidate.lemma || candidate.text === `${HEADING_PREFIX}${candidate.lemma}` || candidate.text.startsWith(`${HEADING_PREFIX}${candidate.lemma}`) || candidate.text.startsWith(`${candidate.lemma}\n${HEADING_PREFIX}`))) return true;
+	if (candidate.family !== "pairedForms" && candidate.lemma.split(/\s+/u).filter(Boolean).length > 4) return true;
+	if (startsWithConfiguredWord(STRUCTURAL_LEMMA_PREFIXES, candidate.lemma)) return true;
+	if (normalizedLemma.startsWith(normalizeArabicForComparison("ولل"))) return true;
+	const structuralText = candidate.text.startsWith(HEADING_PREFIX) ? candidate.text.slice(3).trim() : candidate.text;
+	if (/^[\d\u0660-\u0669]+\s*-\s*\([^)]+\)(?:\s+##.*)?$/u.test(structuralText)) return true;
+	const normalizedText = normalizeArabicForComparison(structuralText);
+	if (STRUCTURAL_LINE_PATTERNS.some((pattern) => pattern.test(structuralText))) return STRUCTURAL_LINE_KEYWORDS.some((keyword) => normalizedText.includes(normalizeArabicForComparison(keyword)));
+	return false;
+};
+const countLemma = (map, lemma) => {
+	if (!lemma) return;
+	map.set(lemma, (map.get(lemma) ?? 0) + 1);
+};
+const createInitialKindCounts = () => ({
+	chapter: 0,
+	entry: 0,
+	marker: 0
+});
+const createInitialReasonCounts = () => ({
+	authorityIntro: 0,
+	intro: 0,
+	pageContinuation: 0,
+	previousChar: 0,
+	previousWord: 0,
+	qualifierTail: 0,
+	stopLemma: 0,
+	structuralLeak: 0
+});
+const createInitialFamilyCounts = () => ({
+	codeLine: {
+		accepted: 0,
+		rejected: 0
+	},
+	heading: {
+		accepted: 0,
+		rejected: 0
+	},
+	inlineSubentry: {
+		accepted: 0,
+		rejected: 0
+	},
+	lineEntry: {
+		accepted: 0,
+		rejected: 0
+	},
+	pairedForms: {
+		accepted: 0,
+		rejected: 0
+	}
+});
+const rejectsViaIntroBlocker = (candidate, blocker, localBeforeCandidate) => {
+	if (blocker.use !== "intro") return false;
+	return isIntroCandidate(candidate.probeText) || endsWithIntroPhrase(localBeforeCandidate) || endsWithIntroContext(localBeforeCandidate);
+};
+const rejectsViaAuthorityBlocker = (candidate, blocker) => blocker.use === "authorityIntro" && isAuthorityCandidate(candidate.probeText, blocker.precision);
+const rejectsViaStopLemmaBlocker = (candidate, blocker) => blocker.use === "stopLemma" && !!candidate.lemma && !!normalizeStopLemma(candidate.lemma) && blocker.normalizedWords.has(normalizeStopLemma(candidate.lemma));
+const rejectsViaPreviousWordBlocker = (pageContent, localIndex, blocker) => {
+	if (blocker.use !== "previousWord") return false;
+	const lastWord = extractLastArabicWord$1(pageContent, localIndex);
+	return !!lastWord && blocker.normalizedWords.has(normalizeArabicForComparison(lastWord));
+};
+const rejectsViaPreviousCharBlocker = (pageContent, localIndex, blocker) => {
+	if (blocker.use !== "previousChar") return false;
+	const previousChar = previousNonWhitespaceChar(pageContent, localIndex);
+	return !!previousChar && blocker.charSet.has(previousChar);
+};
+const rejectsViaPageContinuationBlocker = (candidate, blocker, localBeforeCandidate, pageIndex, pages) => {
+	if (blocker.use !== "pageContinuation") return false;
+	if (!(localBeforeCandidate.trim().length === 0) || pageIndex === 0) return false;
+	const previousPage = pages[pageIndex - 1];
+	if (!previousPage || endsWithStrongSentenceTerminator$1(previousPage.content)) return false;
+	const previousWord = extractLastArabicWord$1(previousPage.content);
+	return !!previousWord && CONTINUATION_PREV_WORDS.some((word) => normalizedEquals(word, previousWord)) || endsWithIntroContext(previousPage.content) || isIntroCandidate(candidate.probeText) || isAuthorityCandidate(candidate.probeText, "high");
+};
+const getBlockerRejectionReason = (blocker, candidate, localBeforeCandidate, pageContent, pageIndex, pages) => {
+	if (rejectsViaIntroBlocker(candidate, blocker, localBeforeCandidate)) return "intro";
+	if (rejectsViaAuthorityBlocker(candidate, blocker)) return "authorityIntro";
+	if (rejectsViaStopLemmaBlocker(candidate, blocker)) return "stopLemma";
+	if (rejectsViaPreviousWordBlocker(pageContent, candidate.localIndex, blocker)) return "previousWord";
+	if (rejectsViaPreviousCharBlocker(pageContent, candidate.localIndex, blocker)) return "previousChar";
+	if (rejectsViaPageContinuationBlocker(candidate, blocker, localBeforeCandidate, pageIndex, pages)) return "pageContinuation";
+	return null;
+};
+const getCandidateRejection = (candidate, zone, pageContext, pages) => {
+	const hasQualifierTail = hasBlockedQualifierTail(candidate.lemma ?? "");
+	if (hasQualifierTail || looksLikeStructuralLeak(candidate)) return { reason: hasQualifierTail ? "qualifierTail" : "structuralLeak" };
+	const localBeforeCandidate = getTrailingContext(pageContext.content, candidate.localIndex);
+	for (const blocker of zone.blockers) {
+		if (!blockerApplies(blocker, candidate.family)) continue;
+		const reason = getBlockerRejectionReason(blocker, candidate, localBeforeCandidate, pageContext.content, pageContext.index, pages);
+		if (reason) return { reason };
+	}
+	return null;
+};
+const shouldRejectCandidate = (candidate, zone, pageContext, pages) => {
+	return getCandidateRejection(candidate, zone, pageContext, pages) !== null;
+};
+const collectHeadingCandidates = (pageStartOffset, line, nextLine, family, trimmed) => {
+	if (!trimmed.startsWith(HEADING_PREFIX)) return [];
+	const headingClass = classifyDictionaryHeading(trimmed);
+	if (headingClass === "noise") return [];
+	const candidate = createHeadingCandidate(pageStartOffset, line, nextLine, family, headingClass);
+	return candidate ? [candidate] : [];
+};
+const collectCandidatesForFamily = (pageStartOffset, line, nextLine, family, trimmed) => {
+	switch (family.use) {
+		case "heading": return collectHeadingCandidates(pageStartOffset, line, nextLine, family, trimmed);
+		case "lineEntry": return collectLineEntryCandidates(pageStartOffset, line, family);
+		case "inlineSubentry": return collectInlineSubentryCandidates(pageStartOffset, line, family);
+		case "codeLine": return collectCodeLineCandidates(pageStartOffset, line, family);
+		case "pairedForms": return collectPairedFormsCandidates(pageStartOffset, line, family);
+		default: return assertNever$1(family);
+	}
+};
+const collectCandidatesForLine = (pageStartOffset, line, nextLine, zone) => {
+	const trimmed = line.text.trim();
+	const candidates = [];
+	if (!trimmed) return candidates;
+	for (const family of zone.families) candidates.push(...collectCandidatesForFamily(pageStartOffset, line, nextLine, family, trimmed));
+	return candidates;
+};
+const candidateToSplitPoint = (candidate, debugMetaKey) => {
+	const baseMeta = candidate.lemma ? {
+		kind: candidate.kind,
+		lemma: candidate.lemma
+	} : { kind: candidate.kind };
+	const meta = debugMetaKey === void 0 ? baseMeta : mergeDebugIntoMeta(baseMeta, debugMetaKey, { dictionary: {
+		family: candidate.family,
+		...candidate.headingClass ? { headingClass: candidate.headingClass } : {}
+	} });
+	return {
+		contentStartOffset: candidate.contentStartOffset,
+		index: candidate.absoluteIndex,
+		meta
+	};
+};
+const pushDiagnosticSample = (samples, sampleLimit, sample) => {
+	if (samples.length < sampleLimit) samples.push(sample);
+};
+/**
+* Collects dictionary-profile split points using the pages-only markdown surface.
+*/
+const collectDictionarySplitPoints = (pages, profile, pageMap, normalizedPages, logger, debugMetaKey) => {
+	const normalizedProfile = normalizeDictionaryProfile(profile);
+	const pageContexts = createPageContexts(pages, pageMap, normalizedPages);
+	const activationMap = createZoneActivationMap(normalizedProfile, pageContexts);
+	const splitPoints = [];
+	logger?.debug?.("[dictionary] collecting split points", {
+		pageCount: pages.length,
+		zoneCount: normalizedProfile.zones.length
+	});
+	for (const pageContext of pageContexts) {
+		const zone = resolveActiveZone(normalizedProfile, activationMap, pageContext.page.id);
+		if (!zone) continue;
+		for (let lineIndex = 0; lineIndex < pageContext.lines.length; lineIndex++) {
+			const line = pageContext.lines[lineIndex];
+			const nextLine = pageContext.lines[lineIndex + 1];
+			const candidates = collectCandidatesForLine(pageContext.boundary.start, line, nextLine, zone);
+			for (const candidate of candidates) {
+				if (shouldRejectCandidate(candidate, zone, pageContext, pageContexts)) continue;
+				splitPoints.push(candidateToSplitPoint(candidate, debugMetaKey));
+			}
+		}
+	}
+	logger?.debug?.("[dictionary] collected split points", { splitPointCount: splitPoints.length });
+	return splitPoints;
+};
+/**
+* Collects authoring diagnostics for a dictionary profile without creating segments.
+*
+* This is useful when tuning blockers and family choices for a new dictionary.
+*/
+const diagnoseDictionaryProfile = (pages, profile, options = {}) => {
+	const normalizedProfile = normalizeDictionaryProfile(profile);
+	const pageMap = {
+		boundaries: [],
+		getId: (offset) => {
+			for (const boundary of pageMap.boundaries) if (offset >= boundary.start && offset <= boundary.end) return boundary.id;
+			return pageMap.boundaries.at(-1)?.id ?? 0;
+		},
+		pageBreaks: [],
+		pageIds: pages.map((page) => page.id)
+	};
+	let offset = 0;
+	const pageContexts = createPageContexts(pages, pageMap, pages.map((page, pageIndex) => {
+		const normalized = normalizeLineEndings(page.content);
+		pageMap.boundaries.push({
+			end: offset + normalized.length,
+			id: page.id,
+			start: offset
+		});
+		if (pageIndex < pages.length - 1) {
+			pageMap.pageBreaks.push(offset + normalized.length);
+			offset += normalized.length + 1;
+		} else offset += normalized.length;
+		return normalized;
+	}));
+	const activationMap = createZoneActivationMap(normalizedProfile, pageContexts);
+	const sampleLimit = options.sampleLimit ?? 50;
+	const acceptedKinds = createInitialKindCounts();
+	const blockerHits = createInitialReasonCounts();
+	const familyCounts = createInitialFamilyCounts();
+	const zoneCounts = {};
+	const rejectedLemmaCounts = /* @__PURE__ */ new Map();
+	const samples = [];
+	let acceptedCount = 0;
+	let rejectedCount = 0;
+	for (const pageContext of pageContexts) {
+		const zone = resolveActiveZone(normalizedProfile, activationMap, pageContext.page.id);
+		if (!zone) continue;
+		zoneCounts[zone.name] ??= {
+			accepted: 0,
+			rejected: 0
+		};
+		for (let lineIndex = 0; lineIndex < pageContext.lines.length; lineIndex++) {
+			const line = pageContext.lines[lineIndex];
+			const nextLine = pageContext.lines[lineIndex + 1];
+			const candidates = collectCandidatesForLine(pageContext.boundary.start, line, nextLine, zone);
+			for (const candidate of candidates) {
+				const rejection = getCandidateRejection(candidate, zone, pageContext, pageContexts);
+				const sampleBase = {
+					absoluteIndex: candidate.absoluteIndex,
+					family: candidate.family,
+					kind: candidate.kind,
+					lemma: candidate.lemma,
+					line: candidate.lineNumber,
+					pageId: pageContext.page.id,
+					text: candidate.text,
+					zone: zone.name
+				};
+				if (rejection) {
+					rejectedCount += 1;
+					blockerHits[rejection.reason] += 1;
+					familyCounts[candidate.family].rejected += 1;
+					zoneCounts[zone.name].rejected += 1;
+					countLemma(rejectedLemmaCounts, candidate.lemma);
+					pushDiagnosticSample(samples, sampleLimit, {
+						...sampleBase,
+						accepted: false,
+						reason: rejection.reason
+					});
+					continue;
+				}
+				acceptedCount += 1;
+				acceptedKinds[candidate.kind] += 1;
+				familyCounts[candidate.family].accepted += 1;
+				zoneCounts[zone.name].accepted += 1;
+				pushDiagnosticSample(samples, sampleLimit, {
+					...sampleBase,
+					accepted: true
+				});
+			}
+		}
+	}
+	const rejectedLemmas = [...rejectedLemmaCounts.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])).map(([lemma, count]) => ({
+		count,
+		lemma
+	}));
+	return {
+		acceptedCount,
+		acceptedKinds,
+		blockerHits,
+		familyCounts,
+		pageCount: pages.length,
+		rejectedCount,
+		rejectedLemmas,
+		samples,
+		zoneCounts
+	};
+};
+//#endregion
 //#region src/optimization/optimize-rules.ts
 const MERGEABLE_KEYS = new Set([
 	"lineStartsWith",
@@ -1319,11 +2606,17 @@ const getPatternArray = (rule, key) => {
 };
 const getPatternString = (rule, key) => {
 	const value = rule[key];
-	return typeof value === "string" ? value : Array.isArray(value) ? value.join("\n") : "";
+	return typeof value === "string" ? value : Array.isArray(value) ? value.join("\n") : value ? JSON.stringify(value) : "";
 };
 const normalizePatterns = (patterns) => [...new Set(patterns)].sort((a, b) => b.length - a.length || a.localeCompare(b));
+const getDictionaryEntrySpecificityScore = (rule) => {
+	if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return 0;
+	const { allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords } = rule.dictionaryEntry;
+	return minLetters * 20 + maxLetters + (allowCommaSeparated ? 0 : 120) + (allowParenthesized ? 0 : 60) + (allowWhitespaceBeforeColon ? 0 : 20) + (midLineSubentries ? 0 : 160) + Math.min(stopWords.length, 25);
+};
 const getSpecificityScore = (rule) => {
 	const key = getPatternKey(rule);
+	if (key === "dictionaryEntry") return getDictionaryEntrySpecificityScore(rule);
 	return MERGEABLE_KEYS.has(key) ? getPatternArray(rule, key).reduce((max, p) => Math.max(max, p.length), 0) : getPatternString(rule, key).length;
 };
 const createMergeKey = (rule) => {
@@ -1468,89 +2761,6 @@ const applyPreprocessToPage = (content, pageId, transforms) => {
 	}
 	return result;
 };
-//#endregion
-//#region src/segmentation/arabic-dictionary-rule.ts
-const uniqueNormalizedWords = (words) => {
-	const seen = /* @__PURE__ */ new Set();
-	const result = [];
-	for (const word of words) {
-		const normalized = normalizeArabicForComparison(word);
-		if (!normalized || seen.has(normalized)) continue;
-		seen.add(normalized);
-		result.push(normalized);
-	}
-	return result;
-};
-const buildStopAlternation = (stopWords) => {
-	const unique = uniqueNormalizedWords(stopWords);
-	if (unique.length === 0) return "";
-	return unique.map((word) => makeDiacriticInsensitive(word)).join("|");
-};
-const buildHeadwordBody = ({ allowCommaSeparated, colonPattern, stopAlternation, stopwordBody, unit }) => {
-	if (!stopAlternation) return allowCommaSeparated ? `${unit}(?:\\s*[،,]\\s*${unit})*` : unit;
-	const guardedUnit = `(?!(?:${stopwordBody})${allowCommaSeparated ? `(?:\\s*[،,]\\s*|${colonPattern})` : colonPattern})${unit}`;
-	return allowCommaSeparated ? `${guardedUnit}(?:\\s*[،,]\\s*${guardedUnit})*` : guardedUnit;
-};
-const buildBalancedMarker = ({ allowParenthesized, allowWhitespaceBeforeColon, captureName, headwordBody }) => {
-	const colon = allowWhitespaceBeforeColon ? "\\s*:" : ":";
-	const withCapture = captureName ? `(?<${captureName}>${headwordBody})` : `(?:${headwordBody})`;
-	if (!allowParenthesized) return `${withCapture}${colon}`;
-	return `(?:\\(\\s*${withCapture}\\s*\\)|${withCapture})${colon}`;
-};
-/**
-* Creates a reusable split rule for Arabic dictionary entries.
-*
-* The generated rule:
-* - keeps the lemma marker in `segment.content`
-* - stores the lemma in `segment.meta[captureName]`
-* - matches root entries at true line/page starts
-* - matches mid-line subentries conservatively when they begin with `و`
-* - can optionally support parenthesized headwords like `(عنبر) :`
-* - can optionally support comma-separated headword lists like `سبد، دبس:`
-*
-* @example
-* createArabicDictionaryEntryRule({
-*   stopWords: ['وقيل', 'ويقال', 'قال'],
-*   pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
-* })
-*
-* @example
-* createArabicDictionaryEntryRule({
-*   allowParenthesized: true,
-*   allowWhitespaceBeforeColon: true,
-*   allowCommaSeparated: true,
-*   stopWords: ['الليث', 'العجاج'],
-* })
-*/
-const createArabicDictionaryEntryRule = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, meta, minLetters = 2, pageStartPrevWordStoplist, samePagePrevWordStoplist, stopWords }) => {
-	if (!Number.isInteger(minLetters) || minLetters < 1) throw new Error(`createArabicDictionaryEntryRule: minLetters must be an integer >= 1, got ${minLetters}`);
-	if (!Number.isInteger(maxLetters) || maxLetters < minLetters) throw new Error(`createArabicDictionaryEntryRule: maxLetters must be an integer >= minLetters, got ${maxLetters}`);
-	if (!captureName.match(/^[A-Za-z_]\w*$/)) throw new Error(`createArabicDictionaryEntryRule: invalid captureName "${captureName}"`);
-	const zeroWidthPrefix = "[\\u200E\\u200F\\u061C\\u200B\\u200C\\u200D\\uFEFF]*";
-	const wawWithMarks = `و${ARABIC_MARKS_CLASS}*`;
-	const alWithMarks = `ا${ARABIC_MARKS_CLASS}*ل${ARABIC_MARKS_CLASS}*`;
-	const lemmaUnit = `(?:${wawWithMarks})?(?:${alWithMarks})?${`${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}(?:${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}){${minLetters - 1},${maxLetters - 1}}`}`;
-	const stopAlternation = buildStopAlternation(stopWords);
-	const lemmaBody = buildHeadwordBody({
-		allowCommaSeparated,
-		colonPattern: allowWhitespaceBeforeColon ? "\\s*:" : ":",
-		stopAlternation,
-		stopwordBody: stopAlternation ? `(?:${wawWithMarks})?(?:${stopAlternation})` : "",
-		unit: lemmaUnit
-	});
-	return {
-		meta,
-		pageStartPrevWordStoplist,
-		regex: `(?:${`(?:(?<=^)|(?<=\\n))${zeroWidthPrefix}`}|${allowParenthesized ? `(?<=\\s)(?=(?:\\(\\s*)?${wawWithMarks}(?:${alWithMarks})?)` : `(?<=\\s)(?=${wawWithMarks}(?:${alWithMarks})?)`})` + buildBalancedMarker({
-			allowParenthesized,
-			allowWhitespaceBeforeColon,
-			captureName,
-			headwordBody: lemmaBody
-		}),
-		samePagePrevWordStoplist,
-		split: "at"
-	};
-};
 const WINDOW_PREFIX_LENGTHS = [
 	80,
 	60,
@@ -2458,106 +3668,37 @@ const findSafeBreakPosition = (content, targetPosition, lookbackChars = 100) =>
 	return -1;
 };
 //#endregion
-//#region src/segmentation/debug-meta.ts
-const resolveDebugConfig = (debug) => {
-	if (debug === true) return {
-		includeBreakpoint: true,
-		includeRule: true,
-		metaKey: "_flappa"
-	};
-	if (!debug || typeof debug !== "object") return null;
-	const { metaKey, include } = debug;
-	const includeRule = Array.isArray(include) ? include.includes("rule") : true;
-	return {
-		includeBreakpoint: Array.isArray(include) ? include.includes("breakpoint") : true,
-		includeRule,
-		metaKey: typeof metaKey === "string" && metaKey ? metaKey : "_flappa"
-	};
-};
-const getRulePatternType = (rule) => {
-	return PATTERN_TYPE_KEYS.find((key) => key in rule) ?? "regex";
-};
-const isPlainObject = (v) => Boolean(v) && typeof v === "object" && !Array.isArray(v);
-const mergeDebugIntoMeta = (meta, metaKey, patch) => {
-	const out = meta ? { ...meta } : {};
-	const existing = out[metaKey];
-	out[metaKey] = {
-		...isPlainObject(existing) ? existing : {},
-		...patch
-	};
-	return out;
-};
-const buildRuleDebugPatch = (ruleIndex, rule, wordIndex) => {
-	const patternType = getRulePatternType(rule);
-	const patterns = rule[patternType];
-	const word = wordIndex !== void 0 && Array.isArray(patterns) && patterns[wordIndex] !== void 0 ? patterns[wordIndex] : void 0;
-	return { rule: {
-		index: ruleIndex,
-		patternType,
-		...wordIndex !== void 0 ? { wordIndex } : {},
-		...word !== void 0 ? { word } : {}
-	} };
-};
-const buildBreakpointDebugPatch = (breakpointIndex, rule, wordIndex) => ({ breakpoint: {
-	index: breakpointIndex,
-	kind: rule.pattern === "" ? "pageBoundary" : rule.regex ? "regex" : "pattern",
-	pattern: rule.pattern ?? rule.regex,
-	...wordIndex !== void 0 ? { wordIndex } : {},
-	...wordIndex !== void 0 && rule.words ? { word: rule.words[wordIndex] } : {}
-} });
-/**
-* Helper to format the debug info into a human-readable string.
-* @param meta - The segment metadata object
-* @param options - Formatting options
-*/
-const formatRuleReason = (rule, concise) => {
-	const { index, patternType, wordIndex, word } = rule;
-	if (concise) return `Rule: ${word ? `"${word}"` : patternType}`;
-	const wordInfo = word ? ` (Matched: "${word}")` : "";
-	return `Rule #${index} (${patternType})${wordIndex !== void 0 ? ` [idx:${wordIndex}]` : ""}${wordInfo}`;
-};
-const formatBreakpointReason = (breakpoint, concise) => {
-	const { index, kind, pattern, wordIndex, word } = breakpoint;
-	if (kind === "pageBoundary") return concise ? "Breakpoint: <page-boundary>" : "Page Boundary (Fallback)";
-	if (concise) return `Breakpoint: ${word ? `"${word}"` : `"${pattern}"`}`;
-	if (word) return `Breakpoint #${index} (Words) [idx:${wordIndex}] - "${word}"`;
-	return `Breakpoint #${index} (${kind}) - "${pattern}"`;
-};
-const formatContentLengthReason = (split, concise) => {
-	const { maxContentLength, splitReason } = split;
-	if (concise) return `> ${maxContentLength} (${splitReason})`;
-	return `Safety Split (${splitReason}) > ${maxContentLength}`;
-};
-/**
-* Helper to format the debug info into a human-readable string.
-* @param meta - The segment metadata object
-* @param options - Formatting options
-*/
-const getDebugReason = (meta, options) => {
-	const debug = meta?._flappa;
-	if (!debug) return "-";
-	const concise = options?.concise;
-	if (debug.rule) return formatRuleReason(debug.rule, concise);
-	if (debug.breakpoint) return formatBreakpointReason(debug.breakpoint, concise);
-	if (debug.contentLengthSplit) return formatContentLengthReason(debug.contentLengthSplit, concise);
-	return "Unknown";
-};
-/**
-* Convenience helper to get the formatted debug reason directly from a segment.
-* @param segment - The segment object
-* @param options - Formatting options
-*/
-const getSegmentDebugReason = (segment, options) => {
-	return getDebugReason(segment.meta, options);
-};
-//#endregion
 //#region src/segmentation/pattern-validator.ts
 const KNOWN_TOKENS = new Set(getAvailableTokens());
 const TOKEN_INSIDE_BRACES = /\{\{(\w+)(?::\w+)?\}\}/g;
-const buildBareTokenRegex = () => {
+const BARE_TOKEN_REGEX = (() => {
 	const tokens = [...KNOWN_TOKENS].sort((a, b) => b.length - a.length);
 	return new RegExp(`(?<!\\{\\{)(${tokens.join("|")})(?::\\w+)?(?!\\}\\})`, "g");
+})();
+const createMalformedTokenIssue = (tokenLiteral, side) => {
+	const token = tokenLiteral.split(":", 1)[0] || void 0;
+	return {
+		message: `Token "${tokenLiteral || "unknown"}" appears to be missing ${side} braces.`,
+		suggestion: tokenLiteral ? `{{${tokenLiteral}}}` : void 0,
+		token,
+		type: "missing_braces"
+	};
+};
+const detectMalformedLeftToken = (pattern) => {
+	for (let index = 0; index < pattern.length - 1; index++) {
+		if (pattern.slice(index, index + 2) !== "{{") continue;
+		const closeIndex = pattern.indexOf("}}", index + 2);
+		if (closeIndex === -1) return createMalformedTokenIssue(pattern.slice(index + 2).match(/^\w+(?::\w+)?/u)?.[0] ?? "", "closing");
+		index = closeIndex + 1;
+	}
 };
+const detectMalformedRightToken = (pattern) => {
+	for (let index = 0; index < pattern.length - 1; index++) {
+		if (pattern.slice(index, index + 2) !== "}}") continue;
+		if (pattern.lastIndexOf("{{", index) === -1) return createMalformedTokenIssue(pattern.slice(0, index).match(/(\w+(?::\w+)?)$/u)?.[1] ?? "", "opening");
+	}
+};
+const detectMalformedToken = (pattern) => detectMalformedLeftToken(pattern) ?? detectMalformedRightToken(pattern);
 /**
 * Validates a single pattern for common issues.
 */
@@ -2575,14 +3716,16 @@ const validatePattern = (pattern, seenPatterns) => {
 	TOKEN_INSIDE_BRACES.lastIndex = 0;
 	for (const match of pattern.matchAll(TOKEN_INSIDE_BRACES)) {
 		const name = match[1];
-		if (!KNOWN_TOKENS.has(name)) return {
+		if (name && !KNOWN_TOKENS.has(name)) return {
 			message: `Unknown token: {{${name}}}. Available tokens: ${[...KNOWN_TOKENS].slice(0, 5).join(", ")}...`,
 			suggestion: "Check spelling or use a known token",
 			token: name,
 			type: "unknown_token"
 		};
 	}
-	for (const match of pattern.matchAll(buildBareTokenRegex())) {
+	const malformed = detectMalformedToken(pattern);
+	if (malformed) return malformed;
+	for (const match of pattern.matchAll(BARE_TOKEN_REGEX)) {
 		const [full, name] = match;
 		const idx = match.index;
 		if (pattern.slice(Math.max(0, idx - 2), idx) !== "{{" || pattern.slice(idx + full.length, idx + full.length + 2) !== "}}") return {
@@ -2609,14 +3752,14 @@ const applyRulePatternValidation = (result, key, patterns) => {
 	return true;
 };
 const validateTemplateRule = (rule, result) => {
-	if (rule.template === void 0) return false;
+	if (!("template" in rule)) return false;
 	const issue = validatePattern(rule.template, /* @__PURE__ */ new Set());
 	if (!issue) return false;
 	result.template = issue;
 	return true;
 };
 const validateRegexRule = (rule, result) => {
-	if (rule.regex === void 0) return false;
+	if (!("regex" in rule)) return false;
 	if (!rule.regex.trim()) {
 		result.regex = {
 			message: "Empty pattern is not allowed",
@@ -2636,6 +3779,39 @@ const validateRegexRule = (rule, result) => {
 		return true;
 	}
 };
+const invalidDictionaryEntryIssue = (message) => ({
+	message,
+	type: "invalid_option"
+});
+const addBooleanDictionaryEntryIssue = (issues, key, value) => {
+	if (value !== void 0 && typeof value !== "boolean") issues[key] = invalidDictionaryEntryIssue(`${key} must be a boolean`);
+};
+const addCaptureNameIssue = (issues, captureName) => {
+	if (captureName !== void 0 && !/^[A-Za-z_]\w*$/.test(captureName)) issues.captureName = invalidDictionaryEntryIssue(`captureName must match /^[A-Za-z_]\\w*$/, got "${captureName}"`);
+};
+const addMinLettersIssue = (issues, minLetters) => {
+	if (minLetters !== void 0 && (!Number.isInteger(minLetters) || minLetters < 1)) issues.minLetters = invalidDictionaryEntryIssue("minLetters must be an integer >= 1");
+};
+const addMaxLettersIssue = (issues, maxLetters, minLetters) => {
+	const min = minLetters ?? 2;
+	if (maxLetters !== void 0 && (!Number.isInteger(maxLetters) || maxLetters < min)) issues.maxLetters = invalidDictionaryEntryIssue(`maxLetters must be an integer >= ${min}`);
+};
+const validateDictionaryEntryRule = (rule, result) => {
+	if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return false;
+	const issues = {};
+	const { allowCommaSeparated, allowParenthesized, allowWhitespaceBeforeColon, captureName, maxLetters, midLineSubentries, minLetters, stopWords } = rule.dictionaryEntry;
+	if (!Array.isArray(stopWords) || stopWords.some((word) => typeof word !== "string" || !word.trim())) issues.stopWords = invalidDictionaryEntryIssue("stopWords must be a string[] with non-empty entries");
+	addBooleanDictionaryEntryIssue(issues, "allowCommaSeparated", allowCommaSeparated);
+	addBooleanDictionaryEntryIssue(issues, "allowParenthesized", allowParenthesized);
+	addBooleanDictionaryEntryIssue(issues, "allowWhitespaceBeforeColon", allowWhitespaceBeforeColon);
+	addBooleanDictionaryEntryIssue(issues, "midLineSubentries", midLineSubentries);
+	addCaptureNameIssue(issues, captureName);
+	addMinLettersIssue(issues, minLetters);
+	addMaxLettersIssue(issues, maxLetters, minLetters);
+	if (Object.keys(issues).length === 0) return false;
+	result.dictionaryEntry = issues;
+	return true;
+};
 const formatValidationIssue = (_type, issue, loc) => {
 	if (!issue) return null;
 	if (issue.type === "missing_braces") return `${loc}: Missing {{}} around token "${issue.token}"`;
@@ -2665,12 +3841,13 @@ const formatValidationIssue = (_type, issue, loc) => {
 */
 const validateRules = (rules) => rules.map((rule) => {
 	const result = {};
-	const startsWithIssues = applyRulePatternValidation(result, "lineStartsWith", rule.lineStartsWith);
-	const startsAfterIssues = applyRulePatternValidation(result, "lineStartsAfter", rule.lineStartsAfter);
-	const endsWithIssues = applyRulePatternValidation(result, "lineEndsWith", rule.lineEndsWith);
+	const startsWithIssues = applyRulePatternValidation(result, "lineStartsWith", "lineStartsWith" in rule ? rule.lineStartsWith : void 0);
+	const startsAfterIssues = applyRulePatternValidation(result, "lineStartsAfter", "lineStartsAfter" in rule ? rule.lineStartsAfter : void 0);
+	const endsWithIssues = applyRulePatternValidation(result, "lineEndsWith", "lineEndsWith" in rule ? rule.lineEndsWith : void 0);
 	const templateIssues = validateTemplateRule(rule, result);
 	const regexIssues = validateRegexRule(rule, result);
-	return startsWithIssues || startsAfterIssues || endsWithIssues || templateIssues || regexIssues ? result : void 0;
+	const dictionaryEntryIssues = validateDictionaryEntryRule(rule, result);
+	return startsWithIssues || startsAfterIssues || endsWithIssues || templateIssues || regexIssues || dictionaryEntryIssues ? result : void 0;
 });
 /**
 * Formats a validation result array into a list of human-readable error messages.
@@ -2687,8 +3864,12 @@ const validateRules = (rules) => rules.map((rule) => {
 */
 const formatValidationReport = (results) => results.flatMap((result, i) => {
 	if (!result) return [];
-	return Object.entries(result).flatMap(([type, issues]) => (Array.isArray(issues) ? issues : [issues]).map((issue) => formatValidationIssue(type, issue, `Rule ${i + 1}, ${type}`)).filter((msg) => msg !== null));
+	return Object.entries(result).flatMap(([type, issues]) => formatValidationIssues(type, issues, i + 1));
 });
+const formatValidationIssues = (type, issues, ruleNumber) => {
+	if (type === "dictionaryEntry" && issues && typeof issues === "object" && !Array.isArray(issues)) return Object.entries(issues).map(([field, issue]) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}.${field}`)).filter((msg) => msg !== null);
+	return (Array.isArray(issues) ? issues : [issues]).map((issue) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}`)).filter((msg) => msg !== null);
+};
 //#endregion
 //#region src/segmentation/breakpoint-processor.ts
 const buildPageIdToIndexMap = (pageIds) => new Map(pageIds.map((id, i) => [id, i]));
@@ -3336,6 +4517,7 @@ const buildLineBasedRuleRegex = (rule, fuzzy, capturePrefix) => {
 	if ("lineStartsWith" in rule && Array.isArray(rule.lineStartsWith) && rule.lineStartsWith.length > 0) return buildLineStartsWithRegexSource(rule.lineStartsWith, fuzzy, capturePrefix);
 	if ("lineEndsWith" in rule && Array.isArray(rule.lineEndsWith) && rule.lineEndsWith.length > 0) return buildLineEndsWithRegexSource(rule.lineEndsWith, fuzzy, capturePrefix);
 	if ("template" in rule && typeof rule.template === "string") return buildTemplateRegexSource(rule.template, capturePrefix);
+	if ("dictionaryEntry" in rule && rule.dictionaryEntry) return buildArabicDictionaryEntryRegexSource(rule.dictionaryEntry, capturePrefix);
 	return null;
 };
 /**
@@ -3358,7 +4540,7 @@ const buildRuleRegex = (rule, capturePrefix) => {
 	let finalRegex = ruleRegexSource?.regex;
 	let allCaptureNames = ruleRegexSource?.captureNames ?? [];
 	if (!finalRegex && "regex" in rule && typeof rule.regex === "string") finalRegex = rule.regex;
-	if (!finalRegex) throw new Error("Rule must specify exactly one pattern type: regex, template, lineStartsWith, lineStartsAfter, or lineEndsWith");
+	if (!finalRegex) throw new Error("Rule must specify exactly one pattern type: regex, template, lineStartsWith, lineStartsAfter, lineEndsWith, or dictionaryEntry");
 	if (allCaptureNames.length === 0) allCaptureNames = extractNamedCaptureNames(finalRegex);
 	return {
 		captureNames: allCaptureNames,
@@ -3902,14 +5084,20 @@ const mergeRecord = (existing, incoming) => existing || incoming ? {
 	...existing ?? {},
 	...incoming ?? {}
 } : void 0;
+const isPlainObject = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
 const mergeSplitPoints = (existing, incoming) => {
 	const preferred = prefersIncomingSplitPoint(existing, incoming) ? incoming : existing;
 	const fallback = preferred === incoming ? existing : incoming;
+	const meta = mergeRecord(existing.meta, incoming.meta);
+	if (meta && isPlainObject(existing.meta?._flappa) && isPlainObject(incoming.meta?._flappa)) meta._flappa = {
+		...existing.meta._flappa,
+		...incoming.meta._flappa
+	};
 	return {
 		...fallback,
 		...preferred,
 		contentStartOffset: preferred.contentStartOffset ?? fallback.contentStartOffset,
-		meta: mergeRecord(existing.meta, incoming.meta),
+		meta,
 		namedCaptures: mergeRecord(existing.namedCaptures, incoming.namedCaptures)
 	};
 };
@@ -4035,7 +5223,7 @@ const convertPageBreaks = (content, startOffset, pageBreaks, pageJoiner) => {
 * });
 */
 const segmentPages = (pages, options) => {
-	const { rules = [], breakpoints = [], prefer = "longer", pageJoiner = "space", logger, maxContentLength, preprocess } = options;
+	const { dictionary, rules = [], breakpoints = [], prefer = "longer", pageJoiner = "space", logger, maxContentLength, preprocess } = options;
 	if (maxContentLength && maxContentLength < 50) throw new Error(`maxContentLength must be at least 50 characters.`);
 	const maxPages = options.maxPages ?? Number.MAX_SAFE_INTEGER;
 	const hasLimits = options.maxPages !== void 0 || maxContentLength !== void 0;
@@ -4059,13 +5247,17 @@ const segmentPages = (pages, options) => {
 		pageIds: pageMap.pageIds,
 		totalContentLength: matchContent.length
 	});
-	const splitPoints = collectSplitPointsFromRules(rules, matchContent, pageMap, debugMetaKey, logger);
+	const splitPointsFromRules = collectSplitPointsFromRules(rules, matchContent, pageMap, debugMetaKey, logger);
+	const splitPointsFromDictionary = dictionary ? collectDictionarySplitPoints(preprocessedPages, dictionary, pageMap, normalizedContent, logger, debugMetaKey) : [];
+	const splitPoints = [...splitPointsFromRules, ...splitPointsFromDictionary];
 	const unique = dedupeSplitPoints(splitPoints);
 	logger?.debug?.("[segmenter] split points collected", {
+		dictionarySplitPoints: splitPointsFromDictionary.length,
 		rawSplitPoints: splitPoints.length,
+		ruleSplitPoints: splitPointsFromRules.length,
 		uniqueSplitPoints: unique.length
 	});
-	let segments = buildSegments(unique, matchContent, pageMap, rules, pageJoiner);
+	let segments = buildSegments(unique, matchContent, pageMap, rules, pageJoiner, dictionary !== void 0);
 	logger?.debug?.("[segmenter] structural segments built", { segmentCount: segments.length });
 	segments = ensureFallbackSegment(segments, preprocessedPages, normalizedContent, pageJoiner);
 	if (hasLimits) {
@@ -4092,7 +5284,7 @@ const segmentPages = (pages, options) => {
 * @param rules - Original rules (for constraint checking on first segment)
 * @returns Array of segment objects
 */
-const buildSegments = (splitPoints, content, pageMap, rules, pageJoiner) => {
+const buildSegments = (splitPoints, content, pageMap, rules, pageJoiner, hasDictionaryProfile) => {
 	const getActualStart = (start, contentStartOffset) => start + (contentStartOffset ?? 0);
 	const trimSegmentText = (sliced, capturedContent, contentStartOffset) => capturedContent?.trim() ?? (contentStartOffset ? sliced.trim() : sliced.replace(/[\s\n]+$/, ""));
 	const getAdjustedStart = (actualStart, sliced, contentStartOffset) => actualStart + (contentStartOffset ? sliced.length - sliced.trimStart().length : 0);
@@ -4136,14 +5328,16 @@ const buildSegments = (splitPoints, content, pageMap, rules, pageJoiner) => {
 	};
 	const segments = [];
 	if (!splitPoints.length) {
-		if (anyRuleAllowsId(rules, pageMap.getId(0))) {
+		const firstId = pageMap.getId(0);
+		if (hasDictionaryProfile || anyRuleAllowsId(rules, firstId)) {
 			const s = createSegment(0, content.length);
 			if (s) segments.push(s);
 		}
 		return segments;
 	}
 	if (splitPoints[0].index > 0) {
-		if (anyRuleAllowsId(rules, pageMap.getId(0))) {
+		const firstId = pageMap.getId(0);
+		if (hasDictionaryProfile || anyRuleAllowsId(rules, firstId)) {
 			const s = createSegment(0, splitPoints[0].index);
 			if (s) segments.push(s);
 		}
@@ -4544,6 +5738,6 @@ const validateSegments = (pages, options, segments, validationOptions) => {
 	};
 };
 //#endregion
-export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, PATTERN_TYPE_KEYS, TOKEN_PATTERNS, Token, analyzeCommonLineStarts, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, templateToRegex, validateRules, validateSegments, withCapture };
+export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, DictionaryProfileValidationError, PATTERN_TYPE_KEYS, TOKEN_PATTERNS, Token, analyzeCommonLineStarts, analyzeDictionaryMarkdownPages, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, classifyDictionaryHeading, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, diagnoseDictionaryProfile, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, scanDictionaryMarkdownPage, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, templateToRegex, validateDictionaryProfile, validateRules, validateSegments, withCapture };
 //# sourceMappingURL=index.mjs.map