wobble-bibble 1.3.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/dist/index.d.ts +16 -4
- package/dist/index.js +228 -128
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -101,10 +101,19 @@ const error: ValidationError = {
|
|
|
101
101
|
|
|
102
102
|
| Function | Description |
|
|
103
103
|
|----------|-------------|
|
|
104
|
-
| `validateTranslationResponse(segments, response, options?)` | Unified validator for LLM translation responses (IDs, Arabic leak, invented IDs, gaps, speaker-label drift, etc.) |
|
|
104
|
+
| `validateTranslationResponse(segments, response, options?)` | Unified validator for LLM translation responses (IDs, Arabic leak, invented IDs, gaps, speaker-label drift, Allah vs God usage, etc.) |
|
|
105
105
|
| `VALIDATION_ERROR_TYPE_INFO` | Human-readable descriptions for each `ValidationErrorType` (for UI/logging) |
|
|
106
106
|
| `normalizeTranslationTextWithMap(text)` | Normalize response text and return a normalized-index → raw-index map |
|
|
107
107
|
|
|
108
|
+
### Fixers
|
|
109
|
+
|
|
110
|
+
| Function | Description |
|
|
111
|
+
|----------|-------------|
|
|
112
|
+
| `fixCollapsedSpeakerLines(text, config?)` | Insert line breaks before mid-line speaker labels; infers labels if none provided |
|
|
113
|
+
| `fixAll(text, options)` | Apply fixers for selected `ValidationErrorType`s (e.g., `collapsed_speakers`) |
|
|
114
|
+
| `FixConfig` | Configuration for fixers (optional speaker labels, punctuation) |
|
|
115
|
+
| `FixResult` | Fix output (fixed text, applied fixes, counts) |
|
|
116
|
+
|
|
108
117
|
### Utilities
|
|
109
118
|
|
|
110
119
|
| Function | Description |
|
package/dist/index.d.ts
CHANGED
|
@@ -36,7 +36,7 @@ type PromptId = 'master_prompt' | 'encyclopedia_mixed' | 'fatawa' | 'fiqh' | 'ha
|
|
|
36
36
|
declare const PROMPTS: readonly [{
|
|
37
37
|
readonly id: "master_prompt";
|
|
38
38
|
readonly name: "Master Prompt";
|
|
39
|
-
readonly content: "ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS (Do not create, modify, or \"continue\" segment IDs. Output IDs verbatim exactly as they appear in the source input/metadata. Alphabetic suffixes (e.g., P5511a) are allowed IF AND ONLY IF that exact ID appears in the source. Any ID not present verbatim in the source is INVENTED. EXAMPLE: If P5803b ends with a questioner line, that line stays under P5803b — do NOT invent P5803c. If an expected ID is missing from the source, output: \"ID - [MISSING]\".)\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \"translit (English)\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\nARABIC LEAK (Hard ban):\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ « » , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \"صلى الله عليه وسلم\" or \"صلى الله عليه وآله وسلم\". Always replace any Prophet salutation with ﷺ.\nWORD CHOICE (Allah vs god):\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \"God\" / \"god\" / \"Allāh\". (This is the only exception to ALA-LC diacritics.)\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden outputs include (any casing/punctuation), including common variants:\n- God willing / if God wills / should God will\n- By God / I swear by God\n- Praise be to God / thanks be to God / all praise is due to God / praise belongs to God\n- God knows best / God knows\n- God forbid\n- O God\n- In the name of God\n- God Almighty / Almighty God / God Most High\n- By God's grace / By God’s grace\n- God's ... / God’s ... / ... of God / mercy of God / the mercy of God\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \"Allah\" (never \"God\").\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\nLOCKED FORMULAE (Do NOT translate):\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\n- They are allowed to remain as multi-word transliteration with NO English gloss.\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\n- CRITICAL NEGATIONS #7: \"NO OPAQUE TRANSLITERATION (Must translate phrases).\"\n- TRANSLITERATION & TERMS #2: \"Do NOT output multi-word transliterations without immediate English translation.\"\n- TRANSLITERATION & TERMS: \"Do NOT transliterate full sentences/matn/quotes.\"\n- Locked formulae (implement exactly):\n- Greetings: al-salāmu ʿalaykum ; wa ʿalaykum al-salām\n- Invocations: in shāʾ Allah ; subḥān Allah ; al-ḥamdu li-Allah ; Allahu akbar ; lā ilāha illā Allah ; astaghfiru Allah\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \"peace be upon you\", \"God willing\", \"praise be to God\", \"glory be to God\", \"Allah is Greatest\".\n- Note: this lock is intentionally narrow. Other phrases (e.g., \"Jazāk Allahu khayr\") may be translated normally.\nREGISTER (Modern English):\n- Use modern academic English. Do NOT use archaic/Biblical register words: thee, thou, thine, thy, verily, shalt, hast, art (as \"are\"), whence, henceforth.\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\nTRANSLITERATION & TERMS:\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\n- Book Titles: Transliterate only (do not translate meanings).\n2. TECHNICAL TERMS: On first occurrence, define: \"translit (English)\" (e.g., bidʿah (innovation), isnād (chain)).\n- Do NOT output multi-word transliterations without immediate English translation.\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \"translit (English translation)\". Do NOT output Arabic script.\n- Example Allowed: Allāhumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \"translit (English)\". Prefer full English translation for phrases.\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \"translit (English)\".\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\n4. PROPER NAMES: Transliterate only (no parentheses).\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\n7. HONORIFICS: Expand common phrases (do not transliterate):\n- Allah ʿazza wa-jall -> Allah, the Mighty and Majestic\n- rahimahu Allah -> may Allah have mercy on him\n8. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\nOUTPUT FORMAT: Segment_ID - English translation.\nCRITICAL: You must use the ASCII hyphen separator \" - \" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\nID INTEGRITY (Check First):\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \"ID - ...\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \"(continued)\", \"cont.\", \"part 2\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source.\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\n- ELLIPSIS: If the source contains … or ..., translate it literally as \"...\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \"[INCOMPLETE]\".\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \" - \" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \"ID - \" prefix).\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \"Questioner:\"/\"The Shaykh:\"/\"السائل:\"/\"الشيخ:\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \"P5803c\") to label such continuation.\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \"ID - ...\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\nNEGATIVE CONSTRAINTS: Do NOT output \"implicit continuation\", summaries, or extra paragraphs. Output only the text present in the source segment.\nExample: P1234 - Translation text... (Correct) vs P1234\\nTranslation... (Forbidden).\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...";
|
|
39
|
+
readonly content: "ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS.\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \"translit (English)\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\nARABIC LEAK (Hard ban):\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \"صلى الله عليه وسلم\" or \"صلى الله عليه وآله وسلم\". Always replace any Prophet salutation with ﷺ.\nWORD CHOICE (Allah vs god):\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \"God\" / \"god\" / \"Allāh\". (This is the only exception to ALA-LC diacritics.)\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden: any \"God ...\" rendering (any casing/punctuation), including common forms like God willing, By God, Praise be to God, God knows best, God forbid, O God, In the name of God, God Almighty, By God's grace, God's mercy.\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \"Allah\" (never \"God\").\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\nLOCKED FORMULAE (Do NOT translate):\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\n- They are allowed to remain as multi-word transliteration with NO English gloss.\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\n- CRITICAL NEGATIONS #7: \"NO OPAQUE TRANSLITERATION (Must translate phrases).\"\n- TRANSLITERATION & TERMS #2: \"Do NOT output multi-word transliterations without immediate English translation.\"\n- TRANSLITERATION & TERMS: \"Do NOT transliterate full sentences/matn/quotes.\"\n- Greetings: al-salāmu ʿalaykum; wa ʿalaykum al-salām\n- Invocations: in shāʾ Allah; subḥān Allah; al-ḥamdu li-Allah; Allahu akbar; lā ilāha illā Allah ;\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \"peace be upon you\", \"God willing\", \"praise be to God\", \"glory be to God\", \"Allah is Greatest\".\n- Note: this lock is intentionally narrow. Other phrases (e.g., \"Jazāk Allahu khayr\") may be translated normally.\nREGISTER (Modern English):\n- Use modern academic English.\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\nTRANSLITERATION & TERMS:\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\n- Book Titles: Transliterate only (do not translate meanings).\n2. TECHNICAL TERMS: On first occurrence, define: \"translit (English)\" (e.g., bidʿah (innovation), isnād (chain)).\n- Do NOT output multi-word transliterations without immediate English translation.\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \"translit (English translation)\". Do NOT output Arabic script.\n- Example Allowed: Allahumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \"translit (English)\". Prefer full English translation for phrases.\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \"translit (English)\".\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\n4. PROPER NAMES: Transliterate only (no parentheses).\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\n7. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\nOUTPUT FORMAT: Segment_ID - English translation.\nCRITICAL: You must use the ASCII hyphen separator \" - \" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\nID INTEGRITY (Check First):\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \"ID - ...\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \"(continued)\", \"cont.\", \"part 2\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source. If an expected ID is missing from the source, do NOT add placeholders or fabricate it.\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\n- ELLIPSIS: If the source contains … or ..., translate it literally as \"...\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \"[INCOMPLETE]\".\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \" - \" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \"ID - \" prefix).\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \"Questioner:\"/\"The Shaykh:\"/\"السائل:\"/\"الشيخ:\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \"P5803c\") to label such continuation.\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \"ID - ...\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\nNEGATIVE CONSTRAINTS: Do NOT output \"implicit continuation\", summaries, or extra paragraphs. Output only the text present in the source segment.\nExample: P1234 - Translation text... (Correct) vs P1234\\nTranslation... (Forbidden).\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...";
|
|
40
40
|
}, {
|
|
41
41
|
readonly id: "encyclopedia_mixed";
|
|
42
42
|
readonly name: "Encyclopedia Mixed";
|
|
@@ -142,7 +142,7 @@ type Segment = {
|
|
|
142
142
|
* Machine-readable error types emitted by the validator.
|
|
143
143
|
* Keep these stable: clients may map them to UI severities.
|
|
144
144
|
*/
|
|
145
|
-
type ValidationErrorType = 'invalid_marker_format' | 'no_valid_markers' | 'newline_after_id' | 'duplicate_id' | 'invented_id' | 'missing_id_gap' | 'mismatched_colons' | 'collapsed_speakers' | 'truncated_segment' | 'implicit_continuation' | 'meta_talk' | 'arabic_leak' | 'wrong_diacritics' | 'empty_parentheses' | 'length_mismatch' | 'all_caps' | 'archaic_register' | 'multiword_translit_without_gloss';
|
|
145
|
+
type ValidationErrorType = 'invalid_marker_format' | 'no_valid_markers' | 'newline_after_id' | 'duplicate_id' | 'invented_id' | 'missing_id_gap' | 'mismatched_colons' | 'collapsed_speakers' | 'truncated_segment' | 'implicit_continuation' | 'meta_talk' | 'arabic_leak' | 'wrong_diacritics' | 'empty_parentheses' | 'length_mismatch' | 'all_caps' | 'archaic_register' | 'god_usage' | 'multiword_translit_without_gloss';
|
|
146
146
|
/**
|
|
147
147
|
* A character index range in a string. End is exclusive.
|
|
148
148
|
*/
|
|
@@ -182,6 +182,9 @@ type ValidationError = {
|
|
|
182
182
|
range: Range;
|
|
183
183
|
matchText: string;
|
|
184
184
|
id?: string;
|
|
185
|
+
/**
|
|
186
|
+
* Stable rule identifier for tooling/triage; may be more specific than type.
|
|
187
|
+
*/
|
|
185
188
|
ruleId?: string;
|
|
186
189
|
};
|
|
187
190
|
type ValidationRule = {
|
|
@@ -200,7 +203,7 @@ type FixConfig = {
|
|
|
200
203
|
* Speaker labels to recognize when fixing collapsed speaker lines.
|
|
201
204
|
* Example: ["Questioner", "The Shaykh", "Mu'adhdhin"]
|
|
202
205
|
*/
|
|
203
|
-
speakerLabels
|
|
206
|
+
speakerLabels?: string[];
|
|
204
207
|
/**
|
|
205
208
|
* Punctuation tokens that may appear before a collapsed speaker label.
|
|
206
209
|
* These are used to detect " ... The Shaykh:" and similar patterns.
|
|
@@ -246,8 +249,8 @@ declare const formatExcerptsForPrompt: (segments: Segment[], prompt: string) =>
|
|
|
246
249
|
*/
|
|
247
250
|
declare const normalizeTranslationText: (content: string) => string;
|
|
248
251
|
declare const normalizeTranslationTextWithMap: (content: string) => {
|
|
249
|
-
normalized: string;
|
|
250
252
|
indexMap: number[];
|
|
253
|
+
normalized: string;
|
|
251
254
|
};
|
|
252
255
|
/**
|
|
253
256
|
* Extract translation IDs from normalized response, in order.
|
|
@@ -289,7 +292,13 @@ declare const parseTranslationsInOrder: (rawText: string) => {
|
|
|
289
292
|
}[];
|
|
290
293
|
//#endregion
|
|
291
294
|
//#region src/fix.d.ts
|
|
295
|
+
/**
|
|
296
|
+
* Fixes collapsed speaker lines by inserting newlines before mid-line labels.
|
|
297
|
+
*/
|
|
292
298
|
declare const fixCollapsedSpeakerLines: (text: string, config?: FixConfig) => FixResult;
|
|
299
|
+
/**
|
|
300
|
+
* Apply all fixers requested by type, in order.
|
|
301
|
+
*/
|
|
293
302
|
declare const fixAll: (text: string, options: FixAllOptions) => FixResult;
|
|
294
303
|
//#endregion
|
|
295
304
|
//#region src/validation.d.ts
|
|
@@ -318,6 +327,9 @@ declare const VALIDATION_ERROR_TYPE_INFO: {
|
|
|
318
327
|
readonly empty_parentheses: {
|
|
319
328
|
readonly description: "Excessive \"()\" patterns detected, often indicating failed/empty term-pairs.";
|
|
320
329
|
};
|
|
330
|
+
readonly god_usage: {
|
|
331
|
+
readonly description: "Forbidden \"God\" usage detected where \"Allah\" should be used.";
|
|
332
|
+
};
|
|
321
333
|
readonly implicit_continuation: {
|
|
322
334
|
readonly description: "The response includes continuation/meta phrasing (e.g., \"continued:\", \"implicit continuation\").";
|
|
323
335
|
};
|
package/dist/index.js
CHANGED
|
@@ -50,10 +50,28 @@ const ARCHAIC_WORDS = [
|
|
|
50
50
|
const MAX_EMPTY_PARENTHESES = 3;
|
|
51
51
|
const MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK = 50;
|
|
52
52
|
const MIN_TRANSLATION_RATIO = .25;
|
|
53
|
+
/**
|
|
54
|
+
* Heuristic pattern for inferring speaker labels in English translations.
|
|
55
|
+
* Matches 1-3 capitalized words ending with a colon (e.g., "Questioner:", "The Shaykh:").
|
|
56
|
+
*/
|
|
57
|
+
const SPEAKER_LABEL_GUESS_PATTERN = /(?:^|\n|\s)([A-Z][\p{L}'ʿʾāīūḥṣḍṭẓ-]*(?:\s+[A-Z][\p{L}'ʿʾāīūḥṣḍṭẓ-]*){0,2})\s*:/gu;
|
|
58
|
+
const DEFAULT_LEADING_PUNCTUATION = [
|
|
59
|
+
".",
|
|
60
|
+
"?",
|
|
61
|
+
"!",
|
|
62
|
+
"…",
|
|
63
|
+
"،",
|
|
64
|
+
"؛",
|
|
65
|
+
":",
|
|
66
|
+
":",
|
|
67
|
+
"-",
|
|
68
|
+
"–",
|
|
69
|
+
"—"
|
|
70
|
+
];
|
|
53
71
|
|
|
54
72
|
//#endregion
|
|
55
73
|
//#region .generated/prompts.ts
|
|
56
|
-
const MASTER_PROMPT = "ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS (Do not create, modify, or \"continue\" segment IDs. Output IDs verbatim exactly as they appear in the source input/metadata. Alphabetic suffixes (e.g., P5511a) are allowed IF AND ONLY IF that exact ID appears in the source. Any ID not present verbatim in the source is INVENTED. EXAMPLE: If P5803b ends with a questioner line, that line stays under P5803b — do NOT invent P5803c. If an expected ID is missing from the source, output: \"ID - [MISSING]\".)\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \"translit (English)\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\nARABIC LEAK (Hard ban):\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ « » , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \"صلى الله عليه وسلم\" or \"صلى الله عليه وآله وسلم\". Always replace any Prophet salutation with ﷺ.\nWORD CHOICE (Allah vs god):\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \"God\" / \"god\" / \"Allāh\". (This is the only exception to ALA-LC diacritics.)\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden outputs include (any casing/punctuation), including common variants:\n- God willing / if God wills / should God will\n- By God / I swear by God\n- Praise be to God / thanks be to God / all praise is due to God / praise belongs to God\n- God knows best / God knows\n- God forbid\n- O God\n- In the name of God\n- God Almighty / Almighty God / God Most High\n- By God's grace / By God’s grace\n- God's ... / God’s ... / ... of God / mercy of God / the mercy of God\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \"Allah\" (never \"God\").\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\nLOCKED FORMULAE (Do NOT translate):\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\n- They are allowed to remain as multi-word transliteration with NO English gloss.\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\n- CRITICAL NEGATIONS #7: \"NO OPAQUE TRANSLITERATION (Must translate phrases).\"\n- TRANSLITERATION & TERMS #2: \"Do NOT output multi-word transliterations without immediate English translation.\"\n- TRANSLITERATION & TERMS: \"Do NOT transliterate full sentences/matn/quotes.\"\n- Locked formulae (implement exactly):\n- Greetings: al-salāmu ʿalaykum ; wa ʿalaykum al-salām\n- Invocations: in shāʾ Allah ; subḥān Allah ; al-ḥamdu li-Allah ; Allahu akbar ; lā ilāha illā Allah ; astaghfiru Allah\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \"peace be upon you\", \"God willing\", \"praise be to God\", \"glory be to God\", \"Allah is Greatest\".\n- Note: this lock is intentionally narrow. Other phrases (e.g., \"Jazāk Allahu khayr\") may be translated normally.\nREGISTER (Modern English):\n- Use modern academic English. Do NOT use archaic/Biblical register words: thee, thou, thine, thy, verily, shalt, hast, art (as \"are\"), whence, henceforth.\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\nTRANSLITERATION & TERMS:\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\n- Book Titles: Transliterate only (do not translate meanings).\n2. TECHNICAL TERMS: On first occurrence, define: \"translit (English)\" (e.g., bidʿah (innovation), isnād (chain)).\n- Do NOT output multi-word transliterations without immediate English translation.\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \"translit (English translation)\". Do NOT output Arabic script.\n- Example Allowed: Allāhumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \"translit (English)\". Prefer full English translation for phrases.\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \"translit (English)\".\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\n4. PROPER NAMES: Transliterate only (no parentheses).\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\n7. HONORIFICS: Expand common phrases (do not transliterate):\n- Allah ʿazza wa-jall -> Allah, the Mighty and Majestic\n- rahimahu Allah -> may Allah have mercy on him\n8. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\nOUTPUT FORMAT: Segment_ID - English translation.\nCRITICAL: You must use the ASCII hyphen separator \" - \" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\nID INTEGRITY (Check First):\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \"ID - ...\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \"(continued)\", \"cont.\", \"part 2\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source.\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\n- ELLIPSIS: If the source contains … or ..., translate it literally as \"...\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \"[INCOMPLETE]\".\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \" - \" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \"ID - \" prefix).\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \"Questioner:\"/\"The Shaykh:\"/\"السائل:\"/\"الشيخ:\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \"P5803c\") to label such continuation.\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \"ID - ...\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\nNEGATIVE CONSTRAINTS: Do NOT output \"implicit continuation\", summaries, or extra paragraphs. Output only the text present in the source segment.\nExample: P1234 - Translation text... (Correct) vs P1234\\nTranslation... (Forbidden).\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...";
|
|
74
|
+
const MASTER_PROMPT = "ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS.\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \"translit (English)\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\nARABIC LEAK (Hard ban):\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \"صلى الله عليه وسلم\" or \"صلى الله عليه وآله وسلم\". Always replace any Prophet salutation with ﷺ.\nWORD CHOICE (Allah vs god):\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \"God\" / \"god\" / \"Allāh\". (This is the only exception to ALA-LC diacritics.)\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden: any \"God ...\" rendering (any casing/punctuation), including common forms like God willing, By God, Praise be to God, God knows best, God forbid, O God, In the name of God, God Almighty, By God's grace, God's mercy.\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \"Allah\" (never \"God\").\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\nLOCKED FORMULAE (Do NOT translate):\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\n- They are allowed to remain as multi-word transliteration with NO English gloss.\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\n- CRITICAL NEGATIONS #7: \"NO OPAQUE TRANSLITERATION (Must translate phrases).\"\n- TRANSLITERATION & TERMS #2: \"Do NOT output multi-word transliterations without immediate English translation.\"\n- TRANSLITERATION & TERMS: \"Do NOT transliterate full sentences/matn/quotes.\"\n- Greetings: al-salāmu ʿalaykum; wa ʿalaykum al-salām\n- Invocations: in shāʾ Allah; subḥān Allah; al-ḥamdu li-Allah; Allahu akbar; lā ilāha illā Allah ;\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \"peace be upon you\", \"God willing\", \"praise be to God\", \"glory be to God\", \"Allah is Greatest\".\n- Note: this lock is intentionally narrow. Other phrases (e.g., \"Jazāk Allahu khayr\") may be translated normally.\nREGISTER (Modern English):\n- Use modern academic English.\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\nTRANSLITERATION & TERMS:\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\n- Book Titles: Transliterate only (do not translate meanings).\n2. TECHNICAL TERMS: On first occurrence, define: \"translit (English)\" (e.g., bidʿah (innovation), isnād (chain)).\n- Do NOT output multi-word transliterations without immediate English translation.\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \"translit (English translation)\". Do NOT output Arabic script.\n- Example Allowed: Allahumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \"translit (English)\". Prefer full English translation for phrases.\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \"translit (English)\".\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\n4. PROPER NAMES: Transliterate only (no parentheses).\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\n7. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\nOUTPUT FORMAT: Segment_ID - English translation.\nCRITICAL: You must use the ASCII hyphen separator \" - \" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\nID INTEGRITY (Check First):\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \"ID - ...\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \"(continued)\", \"cont.\", \"part 2\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source. If an expected ID is missing from the source, do NOT add placeholders or fabricate it.\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\n- ELLIPSIS: If the source contains … or ..., translate it literally as \"...\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \"[INCOMPLETE]\".\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \" - \" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \"ID - \" prefix).\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \"Questioner:\"/\"The Shaykh:\"/\"السائل:\"/\"الشيخ:\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \"P5803c\") to label such continuation.\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \"ID - ...\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\nNEGATIVE CONSTRAINTS: Do NOT output \"implicit continuation\", summaries, or extra paragraphs. Output only the text present in the source segment.\nExample: P1234 - Translation text... (Correct) vs P1234\\nTranslation... (Forbidden).\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...";
|
|
57
75
|
const ENCYCLOPEDIA_MIXED = "NO MODE TAGS: Do not output any mode labels or bracket tags.\nSTRUCTURE (Apply First):\n- LINE BREAKS (CRITICAL): Preserve the source line breaks around speaker turns. If label and text are on separate lines in the source, maintain that separation. Do NOT merge two source lines into one.\n- EXCEPTION: If the speaker label is the VERY FIRST token after the \"ID - \" prefix, keep it on the same line. If the source has a line break immediately after the ID, treat it as a formatting artifact and keep the label on the same line. (Correct: P5455 - Questioner: Text...) (Wrong: P5455 \\n Questioner: Text...).\n- INTERNAL Q&A: If segment has multiple turns, preserve the source line breaks between turns. Output Segment ID ONLY ONCE at the start of the first line. Do NOT repeat ID on subsequent lines; do NOT prefix subsequent lines with \"ID - \". (e.g. P5455 - Questioner: ... \\n The Shaykh: ...).\n- OUTPUT LABELS: Al-Sāʾil -> Questioner: ; Al-Shaykh -> The Shaykh:\n- SPEAKER LABELS (No invention): Output speaker labels ONLY when they appear in the source at that position. Do NOT add \"Questioner:\"/\"The Shaykh:\" to unlabeled text. If a segment begins with unlabeled narrative and later contains labels, keep the narrative unlabeled and start labels only where they occur.\nDEFINITIONS & CASING:\n- GEOPOLITICS: Modern place names may use English exonyms (Filasṭīn -> Palestine).\n- PLURALS: Do not pluralize term-pairs by appending \"s\" (e.g., \"ḥadīth (report)s\"). Use the English plural or rephrase.\nSTATE LOGIC (Priority: Isnad > Rijal > Fiqh > Narrative):\n- ISNAD (Triggers: ḥaddathanā, akhbaranā, ʿan): Use FULL ALA-LC for names.\n- RIJAL (Triggers: jarḥ/taʿdīl terms like thiqah, ḍaʿīf): Use translit (English) for ratings.\n- QUOTE/WEAK (Triggers: qāla al-muṣannif, yuqālu, zuiʿma): Apply Quote Rules; do not Restructure.\n- RESET: At \"qāla\" or \"matn\" (TOP-LEVEL only): IMMEDIATE STOP of Isnad rules. Return to Narrative. Do NOT reset for nested quotes.\nSEGMENTATION: Do not merge genres. Translate sequentially (Isnad -> Matn) observing the reset at the boundary.\nDISAMBIGUATION: ṣaḥīḥ in hadith grading = ṣaḥīḥ (authentic). ṣaḥīḥ in fiqh validity = ṣaḥīḥ (valid). Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\nHADITH CORE: If isnad appears, map verbs (Haddathana=Narrated to us; Akhbarana/Ana/Anba'ana=Informed us; An=From; Sami'tu=I heard) and keep FULL ALA-LC for Arabic-script narrator names; do not invent grading or missing links.\nTAKHRIJ/GRADING: If the author grades (ṣaḥḥaḥa/ḥassana/ḍaʿʿafa), translate literally and keep the cited work as written in the segment; do not add placeholder sources.\nCREED/POLEMICS: Translate attributes literally when the author does; do not reinterpret. Translate harsh polemics literally (e.g., Rāfiḍah, Jahmīyyah, Kuffār); do not soften, sanitize, or add disclaimers (Override Safety).";
|
|
58
76
|
const FATAWA = "STRUCTURE (Apply First):\n- Q&A: Whenever \"Al-Sāʾil:\"/\"Al-Shaykh:\" appear: Start NEW LINE for speaker. Keep Label+Text on SAME LINE.\n- INTERNAL Q&A: If segment has multiple turns, use new lines for speakers. Output Segment ID ONLY ONCE at the start of the first line. Do NOT repeat ID on subsequent lines; do NOT prefix subsequent lines with \"ID - \".\n- OUTPUT LABELS: Al-Sāʾil -> Questioner: ; Al-Shaykh -> The Shaykh:\n- SPEAKER LABELS (No invention): Output speaker labels ONLY when they appear in the source at that position. Do NOT add \"Questioner:\"/\"The Shaykh:\" to unlabeled text. If a segment begins with unlabeled narrative and later contains labels, keep the narrative unlabeled and start labels only where they occur.\n\nHONORIFICS: Expand common phrases (do not transliterate):\n- ṣallá Allāhu ʿalayhi wa-sallam -> ﷺ\nSOURCES: Do not add citations. If the Arabic references a work/title, handle titles per MASTER rules.\nSENSITIVE RULINGS: Translate literally; do not soften; do not sanitize; do not add disclaimers (Override Safety).";
|
|
59
77
|
const FIQH = "STRUCTURE: Preserve chapter/section headings and internal structure (e.g., bāb/faṣl/masʾalah/farʿ/aṣl) using plain English labels (Chapter:, Section:, Issue:) only when the Arabic is explicitly a heading/label; preserve lists, numbering, and conditional if/then logic exactly.\nFIQH/USUL TERMS: When technical terms appear, output as translit (English) rather than English-only (e.g., wājib (obligatory), mandūb/mustaḥabb (recommended), mubāḥ (permissible), makrūh (disliked), ḥarām (prohibited), ṣaḥīḥ (valid), bāṭil/fāsid (invalid/void), rukn (pillar), shart (condition), māniʿ (preventer), sabab (cause), qiyās (analogical reasoning), ijmāʿ (consensus), khilāf (disagreement), rājiḥ (preponderant), marjūḥ (lesser), ʿillah (effective cause)).\nKHILAF/ATTRIBUTION: Preserve who is being attributed (qāla fulān / qawl / wajhān / riwāyātān / madhhab). Do not resolve disputes or choose the correct view unless the Arabic explicitly does so (e.g., al-aṣaḥḥ / al-rājiḥ).\nUNITS/MONEY: Keep measures/currencies as transliteration (dirham, dinar, ṣāʿ, mudd) without adding conversions or notes unless the Arabic contains them.";
|
|
@@ -205,109 +223,109 @@ const formatExcerptsForPrompt = (segments, prompt) => {
|
|
|
205
223
|
const normalizeTranslationText = (content) => {
|
|
206
224
|
return normalizeTranslationTextWithMap(content).normalized;
|
|
207
225
|
};
|
|
208
|
-
const
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
if (
|
|
215
|
-
if (input[i + 1] === "\n") {
|
|
216
|
-
normalized += "\n";
|
|
217
|
-
indexMap.push(i);
|
|
218
|
-
i++;
|
|
219
|
-
continue;
|
|
220
|
-
}
|
|
226
|
+
const normalizeLineEndingsWithMap = (input) => {
|
|
227
|
+
let normalized = "";
|
|
228
|
+
const indexMap = [];
|
|
229
|
+
for (let i = 0; i < input.length; i++) {
|
|
230
|
+
const ch = input[i];
|
|
231
|
+
if (ch === "\r") {
|
|
232
|
+
if (input[i + 1] === "\n") {
|
|
221
233
|
normalized += "\n";
|
|
222
234
|
indexMap.push(i);
|
|
235
|
+
i++;
|
|
223
236
|
continue;
|
|
224
237
|
}
|
|
225
|
-
normalized +=
|
|
238
|
+
normalized += "\n";
|
|
226
239
|
indexMap.push(i);
|
|
240
|
+
continue;
|
|
227
241
|
}
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
242
|
+
normalized += ch;
|
|
243
|
+
indexMap.push(i);
|
|
244
|
+
}
|
|
245
|
+
return {
|
|
246
|
+
indexMap,
|
|
247
|
+
normalized
|
|
232
248
|
};
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
indexMap.push(map[i]);
|
|
243
|
-
}
|
|
244
|
-
normalized += match[1];
|
|
245
|
-
indexMap.push(map[matchIndex]);
|
|
246
|
-
normalized += "\n";
|
|
247
|
-
indexMap.push(map[matchIndex]);
|
|
248
|
-
const marker = match[2];
|
|
249
|
-
for (let j = 0; j < marker.length; j++) {
|
|
250
|
-
normalized += marker[j];
|
|
251
|
-
indexMap.push(map[matchIndex + 1 + j]);
|
|
252
|
-
}
|
|
253
|
-
lastIndex = matchIndex + match[0].length;
|
|
254
|
-
}
|
|
255
|
-
for (let i = lastIndex; i < text.length; i++) {
|
|
249
|
+
};
|
|
250
|
+
const insertNewlinesBeforeMergedMarkers = (text, map) => {
|
|
251
|
+
const mergedMarkerNoSpacePattern = new RegExp(`([^\\s\\n])(${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes})`, "g");
|
|
252
|
+
let normalized = "";
|
|
253
|
+
const indexMap = [];
|
|
254
|
+
let lastIndex = 0;
|
|
255
|
+
for (const match of text.matchAll(mergedMarkerNoSpacePattern)) {
|
|
256
|
+
const matchIndex = match.index ?? 0;
|
|
257
|
+
for (let i = lastIndex; i < matchIndex; i++) {
|
|
256
258
|
normalized += text[i];
|
|
257
259
|
indexMap.push(map[i]);
|
|
258
260
|
}
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
const indexMap = [];
|
|
268
|
-
let lastIndex = 0;
|
|
269
|
-
for (const match of text.matchAll(mergedMarkerWithSpacePattern)) {
|
|
270
|
-
const matchIndex = match.index ?? 0;
|
|
271
|
-
for (let i = lastIndex; i < matchIndex; i++) {
|
|
272
|
-
normalized += text[i];
|
|
273
|
-
indexMap.push(map[i]);
|
|
274
|
-
}
|
|
275
|
-
normalized += "\n";
|
|
276
|
-
indexMap.push(map[matchIndex]);
|
|
277
|
-
const marker = match[1];
|
|
278
|
-
for (let j = 0; j < marker.length; j++) {
|
|
279
|
-
normalized += marker[j];
|
|
280
|
-
indexMap.push(map[matchIndex + 1 + j]);
|
|
281
|
-
}
|
|
282
|
-
lastIndex = matchIndex + match[0].length;
|
|
261
|
+
normalized += match[1];
|
|
262
|
+
indexMap.push(map[matchIndex]);
|
|
263
|
+
normalized += "\n";
|
|
264
|
+
indexMap.push(map[matchIndex]);
|
|
265
|
+
const marker = match[2];
|
|
266
|
+
for (let j = 0; j < marker.length; j++) {
|
|
267
|
+
normalized += marker[j];
|
|
268
|
+
indexMap.push(map[matchIndex + 1 + j]);
|
|
283
269
|
}
|
|
284
|
-
|
|
270
|
+
lastIndex = matchIndex + match[0].length;
|
|
271
|
+
}
|
|
272
|
+
for (let i = lastIndex; i < text.length; i++) {
|
|
273
|
+
normalized += text[i];
|
|
274
|
+
indexMap.push(map[i]);
|
|
275
|
+
}
|
|
276
|
+
return {
|
|
277
|
+
indexMap,
|
|
278
|
+
normalized
|
|
279
|
+
};
|
|
280
|
+
};
|
|
281
|
+
const replaceSpaceBeforeMarkerWithNewline = (text, map) => {
|
|
282
|
+
const mergedMarkerWithSpacePattern = new RegExp(` (${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes})`, "g");
|
|
283
|
+
let normalized = "";
|
|
284
|
+
const indexMap = [];
|
|
285
|
+
let lastIndex = 0;
|
|
286
|
+
for (const match of text.matchAll(mergedMarkerWithSpacePattern)) {
|
|
287
|
+
const matchIndex = match.index ?? 0;
|
|
288
|
+
for (let i = lastIndex; i < matchIndex; i++) {
|
|
285
289
|
normalized += text[i];
|
|
286
290
|
indexMap.push(map[i]);
|
|
287
291
|
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
+
normalized += "\n";
|
|
293
|
+
indexMap.push(map[matchIndex]);
|
|
294
|
+
const marker = match[1];
|
|
295
|
+
for (let j = 0; j < marker.length; j++) {
|
|
296
|
+
normalized += marker[j];
|
|
297
|
+
indexMap.push(map[matchIndex + 1 + j]);
|
|
298
|
+
}
|
|
299
|
+
lastIndex = matchIndex + match[0].length;
|
|
300
|
+
}
|
|
301
|
+
for (let i = lastIndex; i < text.length; i++) {
|
|
302
|
+
normalized += text[i];
|
|
303
|
+
indexMap.push(map[i]);
|
|
304
|
+
}
|
|
305
|
+
return {
|
|
306
|
+
indexMap,
|
|
307
|
+
normalized
|
|
292
308
|
};
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
continue;
|
|
302
|
-
}
|
|
303
|
-
normalized += text[i];
|
|
309
|
+
};
|
|
310
|
+
const removeEscapedBrackets = (text, map) => {
|
|
311
|
+
let normalized = "";
|
|
312
|
+
const indexMap = [];
|
|
313
|
+
for (let i = 0; i < text.length; i++) {
|
|
314
|
+
if (text[i] === "\\" && text[i + 1] === "[") {
|
|
315
|
+
i++;
|
|
316
|
+
normalized += "[";
|
|
304
317
|
indexMap.push(map[i]);
|
|
318
|
+
continue;
|
|
305
319
|
}
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
320
|
+
normalized += text[i];
|
|
321
|
+
indexMap.push(map[i]);
|
|
322
|
+
}
|
|
323
|
+
return {
|
|
324
|
+
indexMap,
|
|
325
|
+
normalized
|
|
310
326
|
};
|
|
327
|
+
};
|
|
328
|
+
const normalizeTranslationTextWithMap = (content) => {
|
|
311
329
|
const lineEndingNormalized = normalizeLineEndingsWithMap(content);
|
|
312
330
|
const insertedNewlines = insertNewlinesBeforeMergedMarkers(lineEndingNormalized.normalized, lineEndingNormalized.indexMap);
|
|
313
331
|
const spaceReplaced = replaceSpaceBeforeMarkerWithNewline(insertedNewlines.normalized, insertedNewlines.indexMap);
|
|
@@ -387,69 +405,120 @@ const splitResponseById = (text) => {
|
|
|
387
405
|
for (const entry of parseTranslationsInOrder(text)) map.set(entry.id, entry.translation);
|
|
388
406
|
return map;
|
|
389
407
|
};
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
"!",
|
|
398
|
-
"…",
|
|
399
|
-
"،",
|
|
400
|
-
"؛",
|
|
401
|
-
":",
|
|
402
|
-
":",
|
|
403
|
-
"-",
|
|
404
|
-
"–",
|
|
405
|
-
"—"
|
|
406
|
-
];
|
|
407
|
-
const buildLabelPattern = (labels) => {
|
|
408
|
+
/**
|
|
409
|
+
* Build a regex alternation for speaker labels (with trailing colon).
|
|
410
|
+
*
|
|
411
|
+
* @param labels - Speaker labels without trailing colons
|
|
412
|
+
* @returns Regex alternation string (no flags)
|
|
413
|
+
*/
|
|
414
|
+
const buildSpeakerLabelPattern = (labels) => {
|
|
408
415
|
return `(?:${labels.map((label) => `${escapeRegExp(label)}\\s*:`).join("|")})`;
|
|
409
416
|
};
|
|
417
|
+
/**
|
|
418
|
+
* Build a regex alternation for punctuation tokens.
|
|
419
|
+
*
|
|
420
|
+
* @param punctuation - Punctuation tokens to include
|
|
421
|
+
* @returns Regex alternation string (no flags)
|
|
422
|
+
*/
|
|
410
423
|
const buildPunctuationPattern = (punctuation) => punctuation.map((token) => escapeRegExp(token)).join("|");
|
|
424
|
+
/**
|
|
425
|
+
* Build a line-start regex for speaker labels, accounting for optional "ID - " prefix.
|
|
426
|
+
*
|
|
427
|
+
* @param labels - Speaker labels without trailing colons
|
|
428
|
+
* @returns RegExp that matches line-start labels (with optional ID prefix)
|
|
429
|
+
*/
|
|
411
430
|
const buildLineStartLabelPattern = (labels) => {
|
|
412
|
-
const labelPattern =
|
|
431
|
+
const labelPattern = buildSpeakerLabelPattern(labels);
|
|
413
432
|
const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;
|
|
414
433
|
return /* @__PURE__ */ new RegExp(`^(?:${MARKER_ID_PATTERN}${optionalSpace}${dashes}\\s*)?(${labelPattern})`);
|
|
415
434
|
};
|
|
435
|
+
/**
|
|
436
|
+
* Infer repeated speaker labels from the text (only labels that appear 2+ times).
|
|
437
|
+
*
|
|
438
|
+
* @param text - Full translation text
|
|
439
|
+
* @returns Labels ordered by first appearance
|
|
440
|
+
*/
|
|
441
|
+
const inferSpeakerLabels = (text) => {
|
|
442
|
+
const counts = /* @__PURE__ */ new Map();
|
|
443
|
+
for (const match of text.matchAll(SPEAKER_LABEL_GUESS_PATTERN)) {
|
|
444
|
+
const label = match[1];
|
|
445
|
+
if (!label) continue;
|
|
446
|
+
const entry = counts.get(label);
|
|
447
|
+
if (entry) entry.count += 1;
|
|
448
|
+
else counts.set(label, {
|
|
449
|
+
count: 1,
|
|
450
|
+
firstIndex: match.index ?? 0
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
return [...counts.entries()].filter(([, info]) => info.count >= 2).sort((a, b) => a[1].firstIndex - b[1].firstIndex).map(([label]) => label);
|
|
454
|
+
};
|
|
455
|
+
/**
|
|
456
|
+
* Escape special regex characters in a literal string.
|
|
457
|
+
*
|
|
458
|
+
* @param s - Raw string to escape
|
|
459
|
+
* @returns Escaped string safe for RegExp construction
|
|
460
|
+
*/
|
|
461
|
+
const escapeRegExp = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
462
|
+
|
|
463
|
+
//#endregion
|
|
464
|
+
//#region src/fix.ts
|
|
465
|
+
/**
|
|
466
|
+
* Fixes collapsed speaker lines by inserting newlines before mid-line labels.
|
|
467
|
+
*/
|
|
416
468
|
const fixCollapsedSpeakerLines = (text, config) => {
|
|
417
|
-
|
|
418
|
-
|
|
469
|
+
const speakerLabels = config?.speakerLabels?.length ? config.speakerLabels : inferSpeakerLabels(text);
|
|
470
|
+
if (speakerLabels.length === 0) return {
|
|
471
|
+
applied: [],
|
|
472
|
+
counts: { fixCollapsedSpeakerLines: 0 },
|
|
473
|
+
text
|
|
474
|
+
};
|
|
419
475
|
const leadingPunctuation = config?.leadingPunctuation ?? DEFAULT_LEADING_PUNCTUATION;
|
|
420
|
-
const labelPattern =
|
|
476
|
+
const labelPattern = buildSpeakerLabelPattern(speakerLabels);
|
|
421
477
|
let count = 0;
|
|
422
478
|
const punctuationPattern = buildPunctuationPattern(leadingPunctuation);
|
|
423
479
|
const trailingPunctPattern = punctuationPattern ? /* @__PURE__ */ new RegExp(`(?:${punctuationPattern})+\\s*$`) : void 0;
|
|
424
480
|
const lineStartPattern = buildLineStartLabelPattern(speakerLabels);
|
|
425
481
|
const labelRegex = new RegExp(labelPattern, "g");
|
|
426
|
-
|
|
482
|
+
/**
|
|
483
|
+
* Format a prefix + label insertion, preserving trailing punctuation.
|
|
484
|
+
*/
|
|
485
|
+
const formatInsertion = (prefix, matchText) => {
|
|
486
|
+
if (!trailingPunctPattern) return `${prefix.replace(/\s+$/, "")}\n${matchText}`;
|
|
487
|
+
const punctMatch = prefix.match(trailingPunctPattern);
|
|
488
|
+
if (!punctMatch) return `${prefix.replace(/\s+$/, "")}\n${matchText}`;
|
|
489
|
+
const punct = punctMatch[0].replace(/\s+$/, "");
|
|
490
|
+
return `${prefix.slice(0, -punctMatch[0].length)}${punct}\n${matchText}`;
|
|
491
|
+
};
|
|
492
|
+
/**
|
|
493
|
+
* Apply collapsed-speaker fixes within a single line.
|
|
494
|
+
*/
|
|
495
|
+
const updateLine = (line) => {
|
|
427
496
|
const startMatch = line.match(lineStartPattern);
|
|
428
497
|
const lineStartLabelIndex = startMatch ? startMatch[0].length - startMatch[1].length : -1;
|
|
429
498
|
let lastIndex = 0;
|
|
430
499
|
let updated = "";
|
|
500
|
+
let lineCount = 0;
|
|
431
501
|
for (const match of line.matchAll(labelRegex)) {
|
|
432
502
|
const idx = match.index ?? 0;
|
|
433
|
-
if (idx === lineStartLabelIndex) continue;
|
|
434
|
-
if (idx === 0) continue;
|
|
503
|
+
if (idx === lineStartLabelIndex || idx === 0) continue;
|
|
435
504
|
const prefix = line.slice(lastIndex, idx);
|
|
436
|
-
|
|
437
|
-
const punctMatch = prefix.match(trailingPunctPattern);
|
|
438
|
-
if (punctMatch) {
|
|
439
|
-
const punct = punctMatch[0].replace(/\s+$/, "");
|
|
440
|
-
const beforePunct = prefix.slice(0, -punctMatch[0].length);
|
|
441
|
-
updated += beforePunct + punct + "\n" + match[0];
|
|
442
|
-
lastIndex = idx + match[0].length;
|
|
443
|
-
count += 1;
|
|
444
|
-
continue;
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
updated += prefix.replace(/\s+$/, "") + "\n" + match[0];
|
|
505
|
+
updated += formatInsertion(prefix, match[0]);
|
|
448
506
|
lastIndex = idx + match[0].length;
|
|
449
|
-
|
|
507
|
+
lineCount += 1;
|
|
450
508
|
}
|
|
451
|
-
if (
|
|
452
|
-
|
|
509
|
+
if (lineCount === 0) return {
|
|
510
|
+
line,
|
|
511
|
+
lineCount
|
|
512
|
+
};
|
|
513
|
+
return {
|
|
514
|
+
line: `${updated}${line.slice(lastIndex)}`,
|
|
515
|
+
lineCount
|
|
516
|
+
};
|
|
517
|
+
};
|
|
518
|
+
const fixed = text.split("\n").map((line) => {
|
|
519
|
+
const result = updateLine(line);
|
|
520
|
+
count += result.lineCount;
|
|
521
|
+
return result.line;
|
|
453
522
|
}).join("\n");
|
|
454
523
|
return {
|
|
455
524
|
applied: count > 0 ? ["fixCollapsedSpeakerLines"] : [],
|
|
@@ -461,6 +530,9 @@ const FIXERS_BY_TYPE = {
|
|
|
461
530
|
collapsed_speakers: fixCollapsedSpeakerLines,
|
|
462
531
|
mismatched_colons: fixCollapsedSpeakerLines
|
|
463
532
|
};
|
|
533
|
+
/**
|
|
534
|
+
* Apply all fixers requested by type, in order.
|
|
535
|
+
*/
|
|
464
536
|
const fixAll = (text, options) => {
|
|
465
537
|
const requested = options.types;
|
|
466
538
|
const applied = [];
|
|
@@ -502,6 +574,7 @@ const VALIDATION_ERROR_TYPE_INFO = {
|
|
|
502
574
|
collapsed_speakers: { description: "Speaker labels appear mid-line instead of starting on a new line." },
|
|
503
575
|
duplicate_id: { description: "The same segment ID appears more than once in the response." },
|
|
504
576
|
empty_parentheses: { description: "Excessive \"()\" patterns detected, often indicating failed/empty term-pairs." },
|
|
577
|
+
god_usage: { description: "Forbidden \"God\" usage detected where \"Allah\" should be used." },
|
|
505
578
|
implicit_continuation: { description: "The response includes continuation/meta phrasing (e.g., \"continued:\", \"implicit continuation\")." },
|
|
506
579
|
invalid_marker_format: { description: "A segment marker line is malformed (e.g., wrong ID shape or missing content after the dash)." },
|
|
507
580
|
invented_id: { description: "The response contains a segment ID that does not exist in the provided source corpus." },
|
|
@@ -515,7 +588,7 @@ const VALIDATION_ERROR_TYPE_INFO = {
|
|
|
515
588
|
truncated_segment: { description: "A segment appears truncated (e.g., only \"…\", \"...\", or \"[INCOMPLETE]\")." },
|
|
516
589
|
wrong_diacritics: { description: "Wrong diacritics like â/ã/á were detected (should use macrons like ā ī ū)." }
|
|
517
590
|
};
|
|
518
|
-
const
|
|
591
|
+
const buildWordPattern = (words, flags = "gi") => new RegExp(`\\b(?:${words.map((w) => escapeRegExp(w)).join("|")})\\b`, flags);
|
|
519
592
|
const trimRange = (text, start, end) => {
|
|
520
593
|
let s = start;
|
|
521
594
|
let e = end;
|
|
@@ -996,14 +1069,36 @@ const validateAllCaps = (context) => {
|
|
|
996
1069
|
* validateArchaicRegister('verily thou shalt')[0]?.type === 'archaic_register'
|
|
997
1070
|
*/
|
|
998
1071
|
const validateArchaicRegister = (context) => {
|
|
1072
|
+
const pattern = buildWordPattern(ARCHAIC_WORDS);
|
|
999
1073
|
const errors = [];
|
|
1000
|
-
for (const
|
|
1074
|
+
for (const match of context.normalizedResponse.matchAll(pattern)) {
|
|
1001
1075
|
const matchText = match[0];
|
|
1002
1076
|
const idx = match.index ?? 0;
|
|
1003
1077
|
errors.push(makeErrorFromNormalized(context, "archaic_register", `Archaic/Biblical register word detected: "${matchText}"`, matchText, idx, idx + matchText.length));
|
|
1004
1078
|
}
|
|
1005
1079
|
return errors;
|
|
1006
1080
|
};
|
|
1081
|
+
/**
|
|
1082
|
+
* Detect forbidden "God" usage when the source Arabic includes الله.
|
|
1083
|
+
*/
|
|
1084
|
+
const validateGodUsage = (context) => {
|
|
1085
|
+
const errors = [];
|
|
1086
|
+
const godPattern = /\bGod(?:'s|’s|s)?\b/g;
|
|
1087
|
+
const allahPattern = /الله/;
|
|
1088
|
+
for (const marker of context.markers) {
|
|
1089
|
+
const seg = context.segmentById.get(marker.id);
|
|
1090
|
+
if (!seg || !allahPattern.test(seg.text)) continue;
|
|
1091
|
+
const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);
|
|
1092
|
+
for (const match of translation.matchAll(godPattern)) {
|
|
1093
|
+
const matchText = match[0];
|
|
1094
|
+
const idx = match.index ?? 0;
|
|
1095
|
+
const normalizedStart = marker.translationStart + idx;
|
|
1096
|
+
const normalizedEnd = normalizedStart + matchText.length;
|
|
1097
|
+
errors.push(makeErrorFromNormalized(context, "god_usage", `Forbidden "God" usage detected in "${marker.id}" - use "Allah" when the source contains الله`, matchText, normalizedStart, normalizedEnd, marker.id));
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
return errors;
|
|
1101
|
+
};
|
|
1007
1102
|
const getLineStartLabelCounts = (text) => {
|
|
1008
1103
|
const prefixes = /* @__PURE__ */ new Map();
|
|
1009
1104
|
const lines = text.split(/\n/);
|
|
@@ -1187,6 +1282,11 @@ const DEFAULT_RULES = [
|
|
|
1187
1282
|
run: validateArchaicRegister,
|
|
1188
1283
|
type: "archaic_register"
|
|
1189
1284
|
},
|
|
1285
|
+
{
|
|
1286
|
+
id: "god_usage",
|
|
1287
|
+
run: validateGodUsage,
|
|
1288
|
+
type: "god_usage"
|
|
1289
|
+
},
|
|
1190
1290
|
{
|
|
1191
1291
|
id: "mismatched_colons",
|
|
1192
1292
|
run: validateMismatchedColons,
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","names":["rawStart","matchText"],"sources":["../src/constants.ts","../.generated/prompts.ts","../src/prompts.ts","../src/textUtils.ts","../src/fix.ts","../src/validation.ts"],"sourcesContent":["/**\n * Supported marker types for segments.\n */\nexport enum Markers {\n /** B - Book reference */\n Book = 'B',\n /** F - Footnote reference */\n Footnote = 'F',\n /** T - Heading reference */\n Heading = 'T',\n /** C - Chapter reference */\n Chapter = 'C',\n /** N - Note reference */\n Note = 'N',\n /** P - Translation/Plain segment */\n Plain = 'P',\n}\n\n/**\n * Regex parts for building translation marker patterns.\n */\nexport const TRANSLATION_MARKER_PARTS = {\n /** Dash variations (hyphen, en dash, em dash) */\n dashes: '[-–—]',\n /** Numeric portion of the reference */\n digits: '\\\\d+',\n /** Valid marker prefixes (Book, Chapter, Footnote, Translation, Page) */\n markers: `[${Markers.Book}${Markers.Chapter}${Markers.Footnote}${Markers.Heading}${Markers.Plain}${Markers.Note}]`,\n /** Optional whitespace before dash */\n optionalSpace: '\\\\s?',\n /** Valid single-letter suffixes */\n suffix: '[a-z]',\n} as const;\n\n/**\n * Pattern for a segment ID (e.g., P1234, B45a).\n */\nexport const MARKER_ID_PATTERN = `${TRANSLATION_MARKER_PARTS.markers}${TRANSLATION_MARKER_PARTS.digits}${TRANSLATION_MARKER_PARTS.suffix}?`;\n\n/**\n * English tokens that indicate archaic/Biblical register and should be flagged.\n */\nexport const ARCHAIC_WORDS = [\n 'thee',\n 'thou',\n 'thine',\n 'thy',\n 'verily',\n 'shalt',\n 'hast',\n 'whence',\n 'henceforth',\n 'saith',\n 'behold',\n] as const;\n\nexport const MAX_EMPTY_PARENTHESES = 3;\nexport const MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK = 50;\nexport const MIN_TRANSLATION_RATIO = 0.25;\n\nexport const COLON_PATTERN = /[::]/g;\n","// AUTO-GENERATED FILE - DO NOT EDIT\n// Generated from prompts/*.md by scripts/generate-prompts.ts\n\n// =============================================================================\n// PROMPT TYPE\n// =============================================================================\n\nexport type PromptId = 'master_prompt' | 'encyclopedia_mixed' | 'fatawa' | 'fiqh' | 'hadith' | 'jarh_wa_tadil' | 'tafsir' | 'usul_al_fiqh';\n\n// =============================================================================\n// RAW PROMPT CONTENT\n// =============================================================================\n\nexport const MASTER_PROMPT = \"ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS (Do not create, modify, or \\\"continue\\\" segment IDs. Output IDs verbatim exactly as they appear in the source input/metadata. Alphabetic suffixes (e.g., P5511a) are allowed IF AND ONLY IF that exact ID appears in the source. Any ID not present verbatim in the source is INVENTED. EXAMPLE: If P5803b ends with a questioner line, that line stays under P5803b — do NOT invent P5803c. If an expected ID is missing from the source, output: \\\"ID - [MISSING]\\\".)\\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \\\"translit (English)\\\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\\nARABIC LEAK (Hard ban):\\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ « » , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \\\"صلى الله عليه وسلم\\\" or \\\"صلى الله عليه وآله وسلم\\\". Always replace any Prophet salutation with ﷺ.\\nWORD CHOICE (Allah vs god):\\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \\\"God\\\" / \\\"god\\\" / \\\"Allāh\\\". (This is the only exception to ALA-LC diacritics.)\\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden outputs include (any casing/punctuation), including common variants:\\n- God willing / if God wills / should God will\\n- By God / I swear by God\\n- Praise be to God / thanks be to God / all praise is due to God / praise belongs to God\\n- God knows best / God knows\\n- God forbid\\n- O God\\n- In the name of God\\n- God Almighty / Almighty God / God Most High\\n- By God's grace / By God’s grace\\n- God's ... / God’s ... / ... of God / mercy of God / the mercy of God\\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \\\"Allah\\\" (never \\\"God\\\").\\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\\nLOCKED FORMULAE (Do NOT translate):\\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\\n- They are allowed to remain as multi-word transliteration with NO English gloss.\\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\\n- CRITICAL NEGATIONS #7: \\\"NO OPAQUE TRANSLITERATION (Must translate phrases).\\\"\\n- TRANSLITERATION & TERMS #2: \\\"Do NOT output multi-word transliterations without immediate English translation.\\\"\\n- TRANSLITERATION & TERMS: \\\"Do NOT transliterate full sentences/matn/quotes.\\\"\\n- Locked formulae (implement exactly):\\n- Greetings: al-salāmu ʿalaykum ; wa ʿalaykum al-salām\\n- Invocations: in shāʾ Allah ; subḥān Allah ; al-ḥamdu li-Allah ; Allahu akbar ; lā ilāha illā Allah ; astaghfiru Allah\\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \\\"peace be upon you\\\", \\\"God willing\\\", \\\"praise be to God\\\", \\\"glory be to God\\\", \\\"Allah is Greatest\\\".\\n- Note: this lock is intentionally narrow. Other phrases (e.g., \\\"Jazāk Allahu khayr\\\") may be translated normally.\\nREGISTER (Modern English):\\n- Use modern academic English. Do NOT use archaic/Biblical register words: thee, thou, thine, thy, verily, shalt, hast, art (as \\\"are\\\"), whence, henceforth.\\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\\nTRANSLITERATION & TERMS:\\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\\n- Book Titles: Transliterate only (do not translate meanings).\\n2. TECHNICAL TERMS: On first occurrence, define: \\\"translit (English)\\\" (e.g., bidʿah (innovation), isnād (chain)).\\n- Do NOT output multi-word transliterations without immediate English translation.\\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \\\"translit (English translation)\\\". Do NOT output Arabic script.\\n- Example Allowed: Allāhumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \\\"translit (English)\\\". Prefer full English translation for phrases.\\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \\\"translit (English)\\\".\\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\\n4. PROPER NAMES: Transliterate only (no parentheses).\\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\\n7. HONORIFICS: Expand common phrases (do not transliterate):\\n- Allah ʿazza wa-jall -> Allah, the Mighty and Majestic\\n- rahimahu Allah -> may Allah have mercy on him\\n8. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\\nOUTPUT FORMAT: Segment_ID - English translation.\\nCRITICAL: You must use the ASCII hyphen separator \\\" - \\\" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\\nID INTEGRITY (Check First):\\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \\\"ID - ...\\\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \\\"(continued)\\\", \\\"cont.\\\", \\\"part 2\\\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source.\\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\\n- ELLIPSIS: If the source contains … or ..., translate it literally as \\\"...\\\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \\\"[INCOMPLETE]\\\".\\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \\\" - \\\" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \\\"ID - \\\" prefix).\\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \\\"Questioner:\\\"/\\\"The Shaykh:\\\"/\\\"السائل:\\\"/\\\"الشيخ:\\\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \\\"P5803c\\\") to label such continuation.\\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \\\"ID - ...\\\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\\nNEGATIVE CONSTRAINTS: Do NOT output \\\"implicit continuation\\\", summaries, or extra paragraphs. Output only the text present in the source segment.\\nExample: P1234 - Translation text... (Correct) vs P1234\\\\nTranslation... (Forbidden).\\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...\";\n\nexport const ENCYCLOPEDIA_MIXED = \"NO MODE TAGS: Do not output any mode labels or bracket tags.\\nSTRUCTURE (Apply First):\\n- LINE BREAKS (CRITICAL): Preserve the source line breaks around speaker turns. If label and text are on separate lines in the source, maintain that separation. Do NOT merge two source lines into one.\\n- EXCEPTION: If the speaker label is the VERY FIRST token after the \\\"ID - \\\" prefix, keep it on the same line. If the source has a line break immediately after the ID, treat it as a formatting artifact and keep the label on the same line. (Correct: P5455 - Questioner: Text...) (Wrong: P5455 \\\\n Questioner: Text...).\\n- INTERNAL Q&A: If segment has multiple turns, preserve the source line breaks between turns. Output Segment ID ONLY ONCE at the start of the first line. Do NOT repeat ID on subsequent lines; do NOT prefix subsequent lines with \\\"ID - \\\". (e.g. P5455 - Questioner: ... \\\\n The Shaykh: ...).\\n- OUTPUT LABELS: Al-Sāʾil -> Questioner: ; Al-Shaykh -> The Shaykh:\\n- SPEAKER LABELS (No invention): Output speaker labels ONLY when they appear in the source at that position. Do NOT add \\\"Questioner:\\\"/\\\"The Shaykh:\\\" to unlabeled text. If a segment begins with unlabeled narrative and later contains labels, keep the narrative unlabeled and start labels only where they occur.\\nDEFINITIONS & CASING:\\n- GEOPOLITICS: Modern place names may use English exonyms (Filasṭīn -> Palestine).\\n- PLURALS: Do not pluralize term-pairs by appending \\\"s\\\" (e.g., \\\"ḥadīth (report)s\\\"). Use the English plural or rephrase.\\nSTATE LOGIC (Priority: Isnad > Rijal > Fiqh > Narrative):\\n- ISNAD (Triggers: ḥaddathanā, akhbaranā, ʿan): Use FULL ALA-LC for names.\\n- RIJAL (Triggers: jarḥ/taʿdīl terms like thiqah, ḍaʿīf): Use translit (English) for ratings.\\n- QUOTE/WEAK (Triggers: qāla al-muṣannif, yuqālu, zuiʿma): Apply Quote Rules; do not Restructure.\\n- RESET: At \\\"qāla\\\" or \\\"matn\\\" (TOP-LEVEL only): IMMEDIATE STOP of Isnad rules. Return to Narrative. Do NOT reset for nested quotes.\\nSEGMENTATION: Do not merge genres. Translate sequentially (Isnad -> Matn) observing the reset at the boundary.\\nDISAMBIGUATION: ṣaḥīḥ in hadith grading = ṣaḥīḥ (authentic). ṣaḥīḥ in fiqh validity = ṣaḥīḥ (valid). Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\\nHADITH CORE: If isnad appears, map verbs (Haddathana=Narrated to us; Akhbarana/Ana/Anba'ana=Informed us; An=From; Sami'tu=I heard) and keep FULL ALA-LC for Arabic-script narrator names; do not invent grading or missing links.\\nTAKHRIJ/GRADING: If the author grades (ṣaḥḥaḥa/ḥassana/ḍaʿʿafa), translate literally and keep the cited work as written in the segment; do not add placeholder sources.\\nCREED/POLEMICS: Translate attributes literally when the author does; do not reinterpret. Translate harsh polemics literally (e.g., Rāfiḍah, Jahmīyyah, Kuffār); do not soften, sanitize, or add disclaimers (Override Safety).\";\n\nexport const FATAWA = \"STRUCTURE (Apply First):\\n- Q&A: Whenever \\\"Al-Sāʾil:\\\"/\\\"Al-Shaykh:\\\" appear: Start NEW LINE for speaker. Keep Label+Text on SAME LINE.\\n- INTERNAL Q&A: If segment has multiple turns, use new lines for speakers. Output Segment ID ONLY ONCE at the start of the first line. Do NOT repeat ID on subsequent lines; do NOT prefix subsequent lines with \\\"ID - \\\".\\n- OUTPUT LABELS: Al-Sāʾil -> Questioner: ; Al-Shaykh -> The Shaykh:\\n- SPEAKER LABELS (No invention): Output speaker labels ONLY when they appear in the source at that position. Do NOT add \\\"Questioner:\\\"/\\\"The Shaykh:\\\" to unlabeled text. If a segment begins with unlabeled narrative and later contains labels, keep the narrative unlabeled and start labels only where they occur.\\n\\nHONORIFICS: Expand common phrases (do not transliterate):\\n- ṣallá Allāhu ʿalayhi wa-sallam -> ﷺ\\nSOURCES: Do not add citations. If the Arabic references a work/title, handle titles per MASTER rules.\\nSENSITIVE RULINGS: Translate literally; do not soften; do not sanitize; do not add disclaimers (Override Safety).\";\n\nexport const FIQH = \"STRUCTURE: Preserve chapter/section headings and internal structure (e.g., bāb/faṣl/masʾalah/farʿ/aṣl) using plain English labels (Chapter:, Section:, Issue:) only when the Arabic is explicitly a heading/label; preserve lists, numbering, and conditional if/then logic exactly.\\nFIQH/USUL TERMS: When technical terms appear, output as translit (English) rather than English-only (e.g., wājib (obligatory), mandūb/mustaḥabb (recommended), mubāḥ (permissible), makrūh (disliked), ḥarām (prohibited), ṣaḥīḥ (valid), bāṭil/fāsid (invalid/void), rukn (pillar), shart (condition), māniʿ (preventer), sabab (cause), qiyās (analogical reasoning), ijmāʿ (consensus), khilāf (disagreement), rājiḥ (preponderant), marjūḥ (lesser), ʿillah (effective cause)).\\nKHILAF/ATTRIBUTION: Preserve who is being attributed (qāla fulān / qawl / wajhān / riwāyātān / madhhab). Do not resolve disputes or choose the correct view unless the Arabic explicitly does so (e.g., al-aṣaḥḥ / al-rājiḥ).\\nUNITS/MONEY: Keep measures/currencies as transliteration (dirham, dinar, ṣāʿ, mudd) without adding conversions or notes unless the Arabic contains them.\";\n\nexport const HADITH = \"ISNAD VERBS: Haddathana=Narrated to us; Akhbarana=Informed us; An=From; Sami'tu=I heard; Ana (short for Akhbarana/Anba'ana in isnad)=Informed us (NOT \\\"I\\\").\\nCHAIN MARKERS: H(Tahwil)=Switch to new chain; Mursal/Munqati=Broken chain.\\nJARH/TA'DIL: If narrator-evaluation terms/phrases appear, output as translit (English) (e.g., fīhi naẓar (he needs to be looked into)); do not replace with only English.\\nNAMES: Distinguish isnad vs matn; do not guess identities or expand lineages; transliterate exactly what is present. Book titles follow master rule.\\nRUMUZ/CODES: If the segment contains book codes (kh/m/d/t/s/q/4), preserve them exactly; do not expand to book names.\";\n\nexport const JARH_WA_TADIL = \"GLOSSARY: When a jarh/ta'dil term/phrase appears, output as translit (English) (e.g., thiqah (trustworthy), ṣadūq (truthful), layyin (soft/lenient), ḍaʿīf (weak), matrūk (abandoned), kadhdhāb (liar), dajjāl (imposter), munkar al-ḥadīth (narrates denounced hadith)).\\nRUMUZ: Preserve book codes in Latin exactly as in the segment (e.g., (kh) (m) (d t q) (4) (a)); do not expand unless the Arabic segment itself expands them.\\nQALA: Translate as \\\"He said:\\\" and start a new line for each new critic.\\nDATES: Use (d. 256 AH) or (born 194 AH).\\nNO HARM: Translate \\\"There is no harm in him\\\"; no notes.\\nPOLEMICS: Harsh terms (e.g., dajjāl, khabīth, rāfiḍī) must be translated literally; do not soften.\";\n\nexport const TAFSIR = \"AYAH CITES: Do not output surah names unless the Arabic includes the name. Use [2:255]. If the segment contains quoted Qur'an text, translate it in braces: {…} [2:255].\\nATTRIBUTES: Translate Allah’s attributes as the author intends; if the author is literal, keep literal (e.g., Hand, Face); do not add metaphorical reinterpretation unless the author does; mirror the author’s theology (Ash'ari vs Salafi) exactly.\\nI'RAB TERMS: Mubtada=Subject; Khabar=Predicate; Fa'il=Agent/Doer; Maf'ul=Object.\\nPROPHET NAMES: Use Arabic equivalents with ALA-LC diacritics (e.g., Mūsá, ʿĪsá, Dāwūd, Yūsuf).\\nPOETRY: Preserve line breaks (one English line per Arabic line); no bullets; prioritize literal structure/grammar over rhyme.\";\n\nexport const USUL_AL_FIQH = \"STRUCTURE: Preserve the argument structure (claims, objections \\\"if it is said...\\\", replies \\\"we say...\\\", evidences, counter-evidences). Preserve explicit labels (faṣl, masʾalah, qāla, qīla, qulna) as plain English equivalents only when the Arabic is explicitly a label.\\nUSUL TERMS: When technical terms appear, output as translit (English) (e.g., ʿāmm (general), khāṣṣ (specific), muṭlaq (absolute), muqayyad (restricted), amr (command), nahy (prohibition), ḥaqīqah (literal), majāz (figurative), mujmal (ambiguous), mubayyan (clarified), naṣṣ (explicit text), ẓāhir (apparent), mafhūm (implication), manṭūq (stated meaning), dalīl (evidence), qiyās (analogical reasoning), ʿillah (effective cause), sabab (cause), shart (condition), māniʿ (preventer), ijmāʿ (consensus), naskh (abrogation)).\\nDISPUTE HANDLING: Do not resolve methodological disputes or harmonize schools unless the Arabic explicitly chooses (e.g., al-rājiḥ / al-aṣaḥḥ / ṣaḥīḥ). Preserve attribution to the madhhab/scholars as written.\\nQUR'AN/HADITH: Keep verse references in the segment’s style; do not invent references. If a hadith isnad appears, follow MASTER isnad/name rules.\";\n\n// =============================================================================\n// PROMPT METADATA\n// =============================================================================\n\nexport const PROMPTS = [\n {\n id: 'master_prompt' as const,\n name: 'Master Prompt',\n content: MASTER_PROMPT,\n },\n {\n id: 'encyclopedia_mixed' as const,\n name: 'Encyclopedia Mixed',\n content: ENCYCLOPEDIA_MIXED,\n },\n {\n id: 'fatawa' as const,\n name: 'Fatawa',\n content: FATAWA,\n },\n {\n id: 'fiqh' as const,\n name: 'Fiqh',\n content: FIQH,\n },\n {\n id: 'hadith' as const,\n name: 'Hadith',\n content: HADITH,\n },\n {\n id: 'jarh_wa_tadil' as const,\n name: 'Jarh Wa Tadil',\n content: JARH_WA_TADIL,\n },\n {\n id: 'tafsir' as const,\n name: 'Tafsir',\n content: TAFSIR,\n },\n {\n id: 'usul_al_fiqh' as const,\n name: 'Usul Al Fiqh',\n content: USUL_AL_FIQH,\n },\n] as const;\n\nexport type PromptMetadata = (typeof PROMPTS)[number];\n","import { MASTER_PROMPT, PROMPTS, type PromptId, type PromptMetadata } from '@generated/prompts';\n\nexport type { PromptId, PromptMetadata };\n\n/**\n * A stacked prompt ready for use with an LLM.\n */\nexport type StackedPrompt = {\n /** Unique identifier */\n id: PromptId;\n /** Human-readable name */\n name: string;\n /** The full prompt content (master + addon if applicable) */\n content: string;\n /** Whether this is the master prompt (not stacked) */\n isMaster: boolean;\n};\n\n/**\n * Stacks a master prompt with a specialized addon prompt.\n *\n * @param master - The master/base prompt\n * @param addon - The specialized addon prompt\n * @returns Combined prompt text\n */\nexport const stackPrompts = (master: string, addon: string): string => {\n if (!master) {\n return addon;\n }\n if (!addon) {\n return master;\n }\n return `${master}\\n${addon}`;\n};\n\n/**\n * Gets all available prompts as stacked prompts (master + addon combined).\n * Master prompt is returned as-is, addon prompts are stacked with master.\n *\n * @returns Array of all stacked prompts\n */\nexport const getPrompts = (): StackedPrompt[] => {\n return PROMPTS.map((prompt) => ({\n content: prompt.id === 'master_prompt' ? prompt.content : stackPrompts(MASTER_PROMPT, prompt.content),\n id: prompt.id,\n isMaster: prompt.id === 'master_prompt',\n name: prompt.name,\n }));\n};\n\n/**\n * Gets a specific prompt by ID (strongly typed).\n * Returns the stacked version (master + addon) for addon prompts.\n *\n * @param id - The prompt ID to retrieve\n * @returns The stacked prompt\n * @throws Error if prompt ID is not found\n */\nexport const getPrompt = (id: PromptId): StackedPrompt => {\n const prompt = PROMPTS.find((p) => p.id === id);\n if (!prompt) {\n throw new Error(`Prompt not found: ${id}`);\n }\n\n return {\n content: prompt.id === 'master_prompt' ? prompt.content : stackPrompts(MASTER_PROMPT, prompt.content),\n id: prompt.id,\n isMaster: prompt.id === 'master_prompt',\n name: prompt.name,\n };\n};\n\n/**\n * Gets the raw stacked prompt text for a specific prompt ID.\n * Convenience method for when you just need the text.\n *\n * @param id - The prompt ID\n * @returns The stacked prompt content string\n */\nexport const getStackedPrompt = (id: PromptId): string => {\n return getPrompt(id).content;\n};\n\n/**\n * Gets the list of available prompt IDs.\n * Useful for UI dropdowns or validation.\n *\n * @returns Array of prompt IDs\n */\nexport const getPromptIds = (): PromptId[] => {\n return PROMPTS.map((p) => p.id);\n};\n\n/**\n * Gets just the master prompt content.\n * Useful when you need to use a custom addon.\n *\n * @returns The master prompt content\n */\nexport const getMasterPrompt = (): string => {\n return MASTER_PROMPT;\n};\n","/**\n * Segment type is shared across the library.\n */\nimport { MARKER_ID_PATTERN, TRANSLATION_MARKER_PARTS } from './constants';\nimport type { Segment } from './types';\n\n/**\n * Formats excerpts for an LLM prompt by combining the prompt rules with the segment text.\n * Each segment is formatted as \"ID - Text\" and separated by double newlines.\n *\n * @param segments - Array of segments to format\n * @param prompt - The instruction/system prompt to prepend\n * @returns Combined prompt and formatted text\n */\nexport const formatExcerptsForPrompt = (segments: Segment[], prompt: string) => {\n const formatted = segments.map((e) => `${e.id} - ${e.text}`).join('\\n\\n');\n return [prompt, formatted].join('\\n\\n');\n};\n\n/**\n * Normalize line endings and split merged markers onto separate lines.\n *\n * @example\n * // \"helloP1 - ...\" becomes split onto a new line before \"P1 -\"\n * normalizeTranslationText('helloP1 - x').includes('\\\\nP1 -') === true\n */\nexport const normalizeTranslationText = (content: string) => {\n return normalizeTranslationTextWithMap(content).normalized;\n};\n\nexport const normalizeTranslationTextWithMap = (content: string) => {\n const normalizeLineEndingsWithMap = (input: string) => {\n let normalized = '';\n const indexMap: number[] = [];\n for (let i = 0; i < input.length; i++) {\n const ch = input[i];\n if (ch === '\\r') {\n if (input[i + 1] === '\\n') {\n normalized += '\\n';\n indexMap.push(i);\n i++;\n continue;\n }\n normalized += '\\n';\n indexMap.push(i);\n continue;\n }\n normalized += ch;\n indexMap.push(i);\n }\n return { normalized, indexMap };\n };\n\n const insertNewlinesBeforeMergedMarkers = (text: string, map: number[]) => {\n const mergedMarkerNoSpacePattern = new RegExp(\n `([^\\\\s\\\\n])(${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes})`,\n 'g',\n );\n let normalized = '';\n const indexMap: number[] = [];\n let lastIndex = 0;\n for (const match of text.matchAll(mergedMarkerNoSpacePattern)) {\n const matchIndex = match.index ?? 0;\n for (let i = lastIndex; i < matchIndex; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n normalized += match[1];\n indexMap.push(map[matchIndex]);\n normalized += '\\n';\n indexMap.push(map[matchIndex]);\n const marker = match[2];\n for (let j = 0; j < marker.length; j++) {\n normalized += marker[j];\n indexMap.push(map[matchIndex + 1 + j]);\n }\n lastIndex = matchIndex + match[0].length;\n }\n for (let i = lastIndex; i < text.length; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n return { normalized, indexMap };\n };\n\n const replaceSpaceBeforeMarkerWithNewline = (text: string, map: number[]) => {\n const mergedMarkerWithSpacePattern = new RegExp(\n ` (${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes})`,\n 'g',\n );\n let normalized = '';\n const indexMap: number[] = [];\n let lastIndex = 0;\n for (const match of text.matchAll(mergedMarkerWithSpacePattern)) {\n const matchIndex = match.index ?? 0;\n for (let i = lastIndex; i < matchIndex; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n normalized += '\\n';\n indexMap.push(map[matchIndex]);\n const marker = match[1];\n for (let j = 0; j < marker.length; j++) {\n normalized += marker[j];\n indexMap.push(map[matchIndex + 1 + j]);\n }\n lastIndex = matchIndex + match[0].length;\n }\n for (let i = lastIndex; i < text.length; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n return { normalized, indexMap };\n };\n\n const removeEscapedBrackets = (text: string, map: number[]) => {\n let normalized = '';\n const indexMap: number[] = [];\n for (let i = 0; i < text.length; i++) {\n if (text[i] === '\\\\' && text[i + 1] === '[') {\n i++;\n normalized += '[';\n indexMap.push(map[i]);\n continue;\n }\n normalized += text[i];\n indexMap.push(map[i]);\n }\n return { normalized, indexMap };\n };\n\n const lineEndingNormalized = normalizeLineEndingsWithMap(content);\n const insertedNewlines = insertNewlinesBeforeMergedMarkers(lineEndingNormalized.normalized, lineEndingNormalized.indexMap);\n const spaceReplaced = replaceSpaceBeforeMarkerWithNewline(insertedNewlines.normalized, insertedNewlines.indexMap);\n return removeEscapedBrackets(spaceReplaced.normalized, spaceReplaced.indexMap);\n};\n\n/**\n * Extract translation IDs from normalized response, in order.\n *\n * @example\n * extractTranslationIds('P1 - a\\\\nP2b - b') // => ['P1', 'P2b']\n */\nexport const extractTranslationIds = (text: string) => {\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const pattern = new RegExp(`^(${MARKER_ID_PATTERN})${optionalSpace}${dashes}`, 'gm');\n const ids: string[] = [];\n for (const match of text.matchAll(pattern)) {\n ids.push(match[1]);\n }\n return ids;\n};\n\n/**\n * Parses bulk translation text into a Map for efficient O(1) lookup.\n *\n * Handles multi-line translations: subsequent non-marker lines belong to the previous ID.\n *\n * @param rawText - Raw text containing translations in format \"ID - Translation text\"\n * @returns An object with `count` and `translationMap`\n *\n * @example\n * parseTranslations('P1 - a\\\\nP2 - b').count === 2\n */\nexport const parseTranslations = (rawText: string) => {\n const normalized = normalizeTranslationText(rawText);\n const translationMap = splitResponseById(normalized);\n return { count: translationMap.size, translationMap };\n};\n\n/**\n * Parse translations into an ordered array (preserving the original response order).\n *\n * This differs from `parseTranslations()` which returns a Map and therefore cannot represent\n * duplicates as separate entries.\n *\n * @param rawText - Raw text containing translations in format \"ID - Translation text\"\n * @returns Array of `{ id, translation }` entries in appearance order\n *\n * @example\n * parseTranslationsInOrder('P1 - a\\\\nP2 - b').map((e) => e.id) // => ['P1', 'P2']\n */\nexport const parseTranslationsInOrder = (rawText: string) => {\n const normalized = normalizeTranslationText(rawText);\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const headerPattern = new RegExp(`^(${MARKER_ID_PATTERN})${optionalSpace}${dashes}\\\\s*`, 'gm');\n const matches = [...normalized.matchAll(headerPattern)];\n\n const entries: Array<{ id: string; translation: string }> = [];\n for (let i = 0; i < matches.length; i++) {\n const id = matches[i][1];\n const start = matches[i].index ?? 0;\n const nextStart = i + 1 < matches.length ? (matches[i + 1].index ?? normalized.length) : normalized.length;\n const chunk = normalized.slice(start, nextStart).trimEnd();\n const prefixPattern = new RegExp(`^${id}${optionalSpace}${dashes}\\\\s*`);\n const translation = chunk.replace(prefixPattern, '').trim();\n entries.push({ id, translation });\n }\n return entries;\n};\n\n/**\n * Split the response into a per-ID map. Values contain translation content only (prefix removed).\n *\n * @example\n * splitResponseById('P1 - a\\\\nP2 - b').get('P1') === 'a'\n */\nexport const splitResponseById = (text: string) => {\n const map = new Map<string, string>();\n for (const entry of parseTranslationsInOrder(text)) {\n map.set(entry.id, entry.translation);\n }\n return map;\n};\n\nexport const escapeRegExp = (s: string) => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n","import { MARKER_ID_PATTERN, TRANSLATION_MARKER_PARTS } from './constants';\nimport { escapeRegExp } from './textUtils';\nimport type { FixAllOptions, FixConfig, FixResult, ValidationErrorType } from './types';\n\nconst DEFAULT_LEADING_PUNCTUATION = ['.', '?', '!', '…', '،', '؛', ':', ':', '-', '–', '—'];\n\nconst buildLabelPattern = (labels: string[]) => {\n const parts = labels.map((label) => `${escapeRegExp(label)}\\\\s*:`).join('|');\n return `(?:${parts})`;\n};\n\nconst buildPunctuationPattern = (punctuation: string[]) => punctuation.map((token) => escapeRegExp(token)).join('|');\n\nconst buildLineStartLabelPattern = (labels: string[]) => {\n const labelPattern = buildLabelPattern(labels);\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n return new RegExp(`^(?:${MARKER_ID_PATTERN}${optionalSpace}${dashes}\\\\s*)?(${labelPattern})`);\n};\n\nexport const fixCollapsedSpeakerLines = (text: string, config?: FixConfig): FixResult => {\n if (!config?.speakerLabels?.length) {\n throw new Error('fixCollapsedSpeakerLines requires speakerLabels in FixConfig');\n }\n const speakerLabels = config.speakerLabels;\n const leadingPunctuation = config?.leadingPunctuation ?? DEFAULT_LEADING_PUNCTUATION;\n const labelPattern = buildLabelPattern(speakerLabels);\n let count = 0;\n const punctuationPattern = buildPunctuationPattern(leadingPunctuation);\n const trailingPunctPattern = punctuationPattern ? new RegExp(`(?:${punctuationPattern})+\\\\s*$`) : undefined;\n const lineStartPattern = buildLineStartLabelPattern(speakerLabels);\n const labelRegex = new RegExp(labelPattern, 'g');\n\n const fixed = text\n .split('\\n')\n .map((line) => {\n const startMatch = line.match(lineStartPattern);\n const lineStartLabelIndex = startMatch ? startMatch[0].length - startMatch[1].length : -1;\n let lastIndex = 0;\n let updated = '';\n\n for (const match of line.matchAll(labelRegex)) {\n const idx = match.index ?? 0;\n if (idx === lineStartLabelIndex) {\n continue;\n }\n if (idx === 0) {\n continue;\n }\n const prefix = line.slice(lastIndex, idx);\n if (trailingPunctPattern) {\n const punctMatch = prefix.match(trailingPunctPattern);\n if (punctMatch) {\n const punct = punctMatch[0].replace(/\\s+$/, '');\n const beforePunct = prefix.slice(0, -punctMatch[0].length);\n updated += beforePunct + punct + '\\n' + match[0];\n lastIndex = idx + match[0].length;\n count += 1;\n continue;\n }\n }\n updated += prefix.replace(/\\s+$/, '') + '\\n' + match[0];\n lastIndex = idx + match[0].length;\n count += 1;\n }\n\n if (count === 0) {\n return line;\n }\n return updated + line.slice(lastIndex);\n })\n .join('\\n');\n return {\n applied: count > 0 ? ['fixCollapsedSpeakerLines'] : [],\n counts: { fixCollapsedSpeakerLines: count },\n text: fixed,\n };\n};\n\nconst FIXERS_BY_TYPE: Partial<Record<ValidationErrorType, (text: string, config?: FixConfig) => FixResult>> = {\n collapsed_speakers: fixCollapsedSpeakerLines,\n mismatched_colons: fixCollapsedSpeakerLines,\n};\n\nexport const fixAll = (text: string, options: FixAllOptions): FixResult => {\n const requested = options.types;\n const applied: string[] = [];\n const skipped: string[] = [];\n let currentText = text;\n const counts: Record<string, number> = {};\n\n for (const type of requested) {\n const fixer = FIXERS_BY_TYPE[type];\n if (!fixer) {\n skipped.push(type);\n continue;\n }\n const result = fixer(currentText, options.config);\n currentText = result.text;\n applied.push(type);\n for (const [key, value] of Object.entries(result.counts)) {\n counts[key] = (counts[key] ?? 0) + value;\n }\n }\n\n return {\n applied,\n counts,\n requested,\n skipped,\n text: currentText,\n };\n};\n","import {\n ARCHAIC_WORDS,\n MARKER_ID_PATTERN,\n MAX_EMPTY_PARENTHESES,\n MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK,\n MIN_TRANSLATION_RATIO,\n TRANSLATION_MARKER_PARTS,\n} from './constants';\nimport { escapeRegExp, normalizeTranslationTextWithMap } from './textUtils';\nimport type {\n Range,\n Segment,\n TranslationMarker,\n ValidationConfig,\n ValidationContext,\n ValidationError,\n ValidationErrorType,\n ValidationResponseResult,\n ValidationRule,\n} from './types';\n\n/**\n * Human-readable descriptions for each `ValidationErrorType`, intended for client UIs and logs.\n *\n * @example\n * VALIDATION_ERROR_TYPE_INFO.arabic_leak.description\n */\nexport const VALIDATION_ERROR_TYPE_INFO = {\n all_caps: {\n description: 'ALL CAPS “shouting” detected (run of N uppercase words).',\n },\n arabic_leak: {\n description: 'Arabic script was detected in output (except ﷺ).',\n },\n archaic_register: {\n description: 'Archaic/Biblical English detected (e.g., thou, verily, shalt).',\n },\n collapsed_speakers: {\n description: 'Speaker labels appear mid-line instead of starting on a new line.',\n },\n duplicate_id: {\n description: 'The same segment ID appears more than once in the response.',\n },\n empty_parentheses: {\n description: 'Excessive \"()\" patterns detected, often indicating failed/empty term-pairs.',\n },\n implicit_continuation: {\n description: 'The response includes continuation/meta phrasing (e.g., \"continued:\", \"implicit continuation\").',\n },\n invalid_marker_format: {\n description: 'A segment marker line is malformed (e.g., wrong ID shape or missing content after the dash).',\n },\n invented_id: {\n description: 'The response contains a segment ID that does not exist in the provided source corpus.',\n },\n length_mismatch: {\n description: 'Translation appears too short relative to Arabic source (heuristic truncation check).',\n },\n meta_talk: {\n description: 'The response includes translator/editor notes instead of pure translation.',\n },\n mismatched_colons: {\n description:\n 'Per-segment mismatch between Arabic and translation line-start speaker labels (detected as line-start prefixes ending in \":\").',\n },\n missing_id_gap: {\n description:\n 'A gap was detected: the response includes two IDs whose corpus order implies one or more intermediate IDs are missing.',\n },\n multiword_translit_without_gloss: {\n description: 'A multi-word transliteration phrase was detected without an immediate parenthetical gloss.',\n },\n newline_after_id: {\n description: 'The response used \"ID -\\\\nText\" instead of \"ID - Text\" (newline immediately after the marker).',\n },\n no_valid_markers: {\n description: 'No valid \"ID - ...\" markers were found anywhere in the response.',\n },\n truncated_segment: {\n description: 'A segment appears truncated (e.g., only \"…\", \"...\", or \"[INCOMPLETE]\").',\n },\n wrong_diacritics: {\n description: 'Wrong diacritics like â/ã/á were detected (should use macrons like ā ī ū).',\n },\n} as const satisfies Record<ValidationErrorType, { description: string }>;\n\nconst ARCHAIC_PATTERNS = ARCHAIC_WORDS.map((w) => new RegExp(`\\\\b${escapeRegExp(w)}\\\\b`, 'gi'));\n\nconst trimRange = (text: string, start: number, end: number) => {\n let s = start;\n let e = end;\n while (s < e && /\\s/.test(text[s])) {\n s++;\n }\n while (e > s && /\\s/.test(text[e - 1])) {\n e--;\n }\n return { end: e, start: s };\n};\n\nconst toRawRange = (normalizedStart: number, normalizedEnd: number, indexMap: number[], rawLength: number): Range => {\n if (normalizedEnd <= normalizedStart) {\n const rawStart = indexMap[normalizedStart] ?? 0;\n return { end: rawStart, start: rawStart };\n }\n const rawStart = indexMap[normalizedStart] ?? 0;\n const rawEndBase = indexMap[Math.max(normalizedEnd - 1, normalizedStart)] ?? rawStart;\n const rawEnd = Math.min(rawLength, rawEndBase + 1);\n return { end: rawEnd, start: rawStart };\n};\n\nconst buildMarkers = (normalized: string, indexMap: number[], rawLength: number): TranslationMarker[] => {\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const headerPattern = new RegExp(`^(${MARKER_ID_PATTERN})${optionalSpace}${dashes}\\\\s*`, 'gm');\n const matches = [...normalized.matchAll(headerPattern)];\n const markers: TranslationMarker[] = [];\n\n for (let i = 0; i < matches.length; i++) {\n const match = matches[i];\n const id = match[1];\n const normalizedStart = match.index ?? 0;\n const normalizedEnd = normalizedStart + match[0].length;\n const nextStart = i + 1 < matches.length ? (matches[i + 1].index ?? normalized.length) : normalized.length;\n const translationRange = trimRange(normalized, normalizedEnd, nextStart);\n const headerRange = toRawRange(normalizedStart, normalizedEnd, indexMap, rawLength);\n const translationRawRange = toRawRange(translationRange.start, translationRange.end, indexMap, rawLength);\n\n markers.push({\n headerText: match[0],\n id,\n normalizedEnd,\n normalizedStart,\n rawEnd: headerRange.end,\n rawStart: headerRange.start,\n rawTranslationEnd: translationRawRange.end,\n rawTranslationStart: translationRawRange.start,\n translationEnd: translationRange.end,\n translationStart: translationRange.start,\n });\n }\n\n return markers;\n};\n\nconst buildResponseById = (markers: TranslationMarker[], normalized: string) => {\n const responseById = new Map<string, string>();\n for (const marker of markers) {\n const translationText = normalized.slice(marker.translationStart, marker.translationEnd).trim();\n responseById.set(marker.id, translationText);\n }\n return responseById;\n};\n\nconst DEFAULT_VALIDATION_CONFIG: ValidationConfig = {\n allCapsWordRunThreshold: 5,\n};\n\nconst buildValidationContext = (\n segments: Segment[],\n rawResponse: string,\n config: ValidationConfig,\n): ValidationContext => {\n const { normalized, indexMap } = normalizeTranslationTextWithMap(rawResponse);\n const markers = buildMarkers(normalized, indexMap, rawResponse.length);\n const parsedIds = markers.map((m) => m.id);\n const segmentById = new Map<string, Segment>();\n for (const s of segments) {\n segmentById.set(s.id, s);\n }\n const responseById = buildResponseById(markers, normalized);\n return {\n config,\n indexMap,\n markers,\n normalizedResponse: normalized,\n parsedIds,\n rawResponse,\n responseById,\n segmentById,\n segments,\n };\n};\n\nconst makeErrorFromNormalized = (\n context: ValidationContext,\n type: ValidationErrorType,\n message: string,\n matchText: string,\n normalizedStart: number,\n normalizedEnd: number,\n id?: string,\n): ValidationError => {\n let resolvedId = id;\n if (!resolvedId) {\n // Try to find which marker contains this error range\n for (const marker of context.markers) {\n // Check if error falls within the translation content of a marker\n // We use loose bounds to catch errors at boundaries\n if (normalizedStart >= marker.translationStart && normalizedEnd <= marker.translationEnd) {\n resolvedId = marker.id;\n break;\n }\n }\n }\n\n return {\n id: resolvedId,\n matchText,\n message,\n range: toRawRange(normalizedStart, normalizedEnd, context.indexMap, context.rawResponse.length),\n type,\n };\n};\n\nconst makeErrorFromRawRange = (\n type: ValidationErrorType,\n message: string,\n matchText: string,\n range: Range,\n id?: string,\n): ValidationError => ({\n id,\n matchText,\n message,\n range,\n type,\n});\n\n/**\n * Validate an LLM translation response against a set of Arabic source segments.\n *\n * Rules are expressed as a list of typed errors. The caller decides severity.\n * The validator normalizes the response first (marker splitting + line endings).\n *\n * Important: `segments` may be the full corpus. The validator reduces to only\n * those IDs parsed from the response (plus detects missing-ID gaps between IDs).\n *\n * @example\n * // Pass (no errors)\n * validateTranslationResponse(\n * [{ id: 'P1', text: 'نص عربي طويل...' }],\n * 'P1 - A complete translation.'\n * ).errors.length === 0\n *\n * @example\n * // Fail (invented ID)\n * validateTranslationResponse(\n * [{ id: 'P1', text: 'نص عربي طويل...' }],\n * 'P2 - This ID is not in the corpus.'\n * ).errors.some(e => e.type === 'invented_id') === true\n */\nexport const validateTranslationResponse = (\n segments: Segment[],\n response: string,\n options?: { rules?: ValidationRule[]; config?: Partial<ValidationConfig> },\n): ValidationResponseResult => {\n const config = { ...DEFAULT_VALIDATION_CONFIG, ...options?.config };\n const context = buildValidationContext(segments, response, config);\n if (context.parsedIds.length === 0) {\n return {\n errors: [\n {\n matchText: response,\n message: 'No valid translation markers found',\n range: { end: response.length, start: 0 },\n ruleId: 'no_valid_markers',\n type: 'no_valid_markers',\n },\n ],\n normalizedResponse: context.normalizedResponse,\n parsedIds: [],\n };\n }\n\n const rules = options?.rules ?? DEFAULT_RULES;\n const errors = rules.flatMap((rule) => rule.run(context).map((e) => ({ ...e, ruleId: e.ruleId ?? rule.id })));\n\n return { errors, normalizedResponse: context.normalizedResponse, parsedIds: context.parsedIds };\n};\n\n/**\n * Validate translation marker format (single-line errors).\n *\n * @example\n * // Fail: malformed marker\n * validateMarkerFormat('B1234$5 - x')[0]?.type === 'invalid_marker_format'\n */\nconst validateMarkerFormat = (context: ValidationContext): ValidationError[] => {\n const text = context.normalizedResponse;\n const { markers, digits, suffix, dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const errors: ValidationError[] = [];\n\n const invalidRefPattern = new RegExp(\n `^${markers}(?=${digits})(?=.*${dashes})(?!${digits}${suffix}*${optionalSpace}${dashes})[^\\\\s-–—]+${optionalSpace}${dashes}`,\n 'gm',\n );\n for (const match of text.matchAll(invalidRefPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Invalid reference format \"${matchText.trim()}\" - expected format is letter + numbers + optional suffix (a-j) + dash`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const spaceBeforePattern = new RegExp(` ${markers}${digits}${suffix}+${optionalSpace}${dashes}`, 'gm');\n for (const match of text.matchAll(spaceBeforePattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Suspicious reference found: \"${matchText}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const suffixNoDashPattern = new RegExp(`^${markers}${digits}${suffix}(?! ${dashes})`, 'gm');\n for (const match of text.matchAll(suffixNoDashPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Suspicious reference found: \"${matchText}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const emptyAfterDashPattern = new RegExp(`^${MARKER_ID_PATTERN}${optionalSpace}${dashes}\\\\s*$`, 'gm');\n for (const match of text.matchAll(emptyAfterDashPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Reference \"${matchText.trim()}\" has dash but no content after it`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const dollarSignPattern = new RegExp(`^${markers}${digits}\\\\$${digits}`, 'gm');\n for (const match of text.matchAll(dollarSignPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Invalid reference format \"${matchText}\" - contains $ character`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n return errors;\n};\n\n/**\n * Detect newline after an ID line (formatting bug).\n *\n * @example\n * // Fail: newline after \"P1 -\"\n * validateNewlineAfterId('P1 -\\\\nText')[0]?.type === 'newline_after_id'\n */\nconst validateNewlineAfterId = (context: ValidationContext): ValidationError[] => {\n const pattern = new RegExp(\n `^${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes}\\\\s*\\\\n`,\n 'gm',\n );\n const errors: ValidationError[] = [];\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'newline_after_id',\n `Invalid format: newline after ID \"${matchText.trim()}\" - use \"ID - Text\" format`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect duplicated IDs in the parsed ID list.\n *\n * @example\n * validateDuplicateIds(['P1','P1'])[0]?.type === 'duplicate_id'\n */\nconst validateDuplicateIds = (context: ValidationContext): ValidationError[] => {\n const seen = new Set<string>();\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n if (seen.has(marker.id)) {\n errors.push(\n makeErrorFromRawRange(\n 'duplicate_id',\n `Duplicate ID \"${marker.id}\" detected - each segment should appear only once`,\n marker.headerText,\n { end: marker.rawEnd, start: marker.rawStart },\n marker.id,\n ),\n );\n } else {\n seen.add(marker.id);\n }\n }\n return errors;\n};\n\n/**\n * Detect IDs in the response that do not exist in the passed segment corpus.\n *\n * @example\n * validateInventedIds(['P1','P2'], new Map([['P1',{id:'P1',text:'x'}]]) )[0]?.type === 'invented_id'\n */\nconst validateInventedIds = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n if (!context.segmentById.has(marker.id)) {\n errors.push(\n makeErrorFromRawRange(\n 'invented_id',\n `Invented ID detected: \"${marker.id}\" - this ID does not exist in the source`,\n marker.headerText,\n { end: marker.rawEnd, start: marker.rawStart },\n marker.id,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect a “gap”: response contains IDs A and C, but the corpus order includes B between them.\n * This only checks for missing IDs between consecutive IDs within each response-ordered block.\n *\n * @example\n * // Corpus: P1, P2, P3. Response: P1, P3 => missing_id_gap includes P2\n */\nconst buildSegmentIndexById = (segments: Segment[]) => {\n const indexById = new Map<string, number>();\n for (let i = 0; i < segments.length; i++) {\n indexById.set(segments[i].id, i);\n }\n return indexById;\n};\n\nconst getGapIndices = (context: ValidationContext, indexById: Map<string, number>, aId: string, bId: string) => {\n if (!context.segmentById.has(aId) || !context.segmentById.has(bId)) {\n return;\n }\n const ia = indexById.get(aId);\n const ib = indexById.get(bId);\n if (ia == null || ib == null || ib < ia) {\n return;\n }\n return { end: ib, start: ia };\n};\n\nconst collectMissingIds = (\n context: ValidationContext,\n startIdx: number,\n endIdx: number,\n parsedIdSet: Set<string>,\n missing: Set<string>,\n) => {\n const found: string[] = [];\n for (let j = startIdx + 1; j < endIdx; j++) {\n const midId = context.segments[j]?.id;\n if (!midId) {\n continue;\n }\n if (!context.segmentById.has(midId) || parsedIdSet.has(midId) || missing.has(midId)) {\n continue;\n }\n missing.add(midId);\n found.push(midId);\n }\n return found;\n};\n\nconst validateMissingIdGaps = (context: ValidationContext): ValidationError[] => {\n const indexById = buildSegmentIndexById(context.segments);\n\n const parsedIdSet = new Set(context.parsedIds);\n const missing = new Set<string>();\n const errors: ValidationError[] = [];\n\n for (let i = 0; i < context.markers.length - 1; i++) {\n const a = context.markers[i];\n const b = context.markers[i + 1];\n const gap = getGapIndices(context, indexById, a.id, b.id);\n if (!gap) {\n continue;\n }\n const newlyMissing = collectMissingIds(context, gap.start, gap.end, parsedIdSet, missing);\n for (const midId of newlyMissing) {\n errors.push(\n makeErrorFromRawRange(\n 'missing_id_gap',\n `Missing segment ID detected between translated IDs: \"${midId}\"`,\n context.rawResponse.slice(b.rawStart, b.rawEnd),\n { end: b.rawEnd, start: b.rawStart },\n midId,\n ),\n );\n }\n }\n\n return errors;\n};\n\n/**\n * Detect segments that appear truncated (just \"…\" / \"...\" / \"[INCOMPLETE]\").\n *\n * @example\n * validateTruncatedSegments('P1 - …')[0]?.type === 'truncated_segment'\n */\nconst validateTruncatedSegments = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n const content = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd).trim();\n const isEllipsis = content === '…' || content === '...';\n const sourceText = context.segmentById.get(marker.id)?.text ?? '';\n const sourceIsEllipsisOnly = /^\\s*(?:…|\\.{3})\\s*$/.test(sourceText);\n if (!content || content === '[INCOMPLETE]' || (isEllipsis && !sourceIsEllipsisOnly)) {\n errors.push(\n makeErrorFromRawRange(\n 'truncated_segment',\n `Truncated segment detected: \"${marker.id}\" - segments must be fully translated`,\n context.rawResponse.slice(marker.rawTranslationStart, marker.rawTranslationEnd),\n { end: marker.rawTranslationEnd, start: marker.rawTranslationStart },\n marker.id,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect implicit continuation markers.\n *\n * @example\n * validateImplicitContinuation('P1 - continued: ...')[0]?.type === 'implicit_continuation'\n */\nconst validateImplicitContinuation = (context: ValidationContext): ValidationError[] => {\n const patterns = [/implicit continuation/gi, /\\bcontinuation:/gi, /\\bcontinued:/gi];\n const errors: ValidationError[] = [];\n for (const pattern of patterns) {\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'implicit_continuation',\n `Detected \"${matchText}\" - do not add implicit continuation text`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect meta-talk (translator/editor notes).\n *\n * @example\n * validateMetaTalk(\"P1 - (Translator's note: ...)\")[0]?.type === 'meta_talk'\n */\nconst validateMetaTalk = (context: ValidationContext): ValidationError[] => {\n const patterns = [/\\(note:/gi, /\\(translator'?s? note:/gi, /\\[editor:/gi, /\\[note:/gi, /\\(ed\\.:/gi, /\\(trans\\.:/gi];\n const errors: ValidationError[] = [];\n for (const pattern of patterns) {\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'meta_talk',\n `Detected meta-talk \"${matchText}\" - output translation only, no translator/editor notes`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect Arabic script characters (except ﷺ).\n *\n * @example\n * validateArabicLeak('P1 - الله')[0]?.type === 'arabic_leak'\n *\n * @example\n * // Pass: ﷺ allowed\n * validateArabicLeak('P1 - Muḥammad ﷺ said...').length === 0\n */\nconst validateArabicLeak = (context: ValidationContext): ValidationError[] => {\n const arabicPattern = /[\\u0600-\\u06FF\\u0750-\\u077F\\uFB50-\\uFDF9\\uFDFB-\\uFDFF\\uFE70-\\uFEFF]+/g;\n const errors: ValidationError[] = [];\n\n for (const marker of context.markers) {\n const text = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n let longestMatch: RegExpMatchArray | undefined;\n for (const match of text.matchAll(arabicPattern)) {\n const matchText = match[0].replace(/ﷺ/g, '').trim();\n if (!matchText) {\n continue;\n }\n if (!longestMatch || matchText.length > longestMatch[0].replace(/ﷺ/g, '').trim().length) {\n longestMatch = match;\n }\n }\n if (!longestMatch) {\n continue;\n }\n const matchText = longestMatch[0].replace(/ﷺ/g, '').trim();\n const idx = longestMatch.index ?? 0;\n const normalizedStart = marker.translationStart + idx;\n const normalizedEnd = normalizedStart + longestMatch[0].length;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'arabic_leak',\n `Arabic script detected: \"${matchText}\"`,\n matchText,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n\n return errors;\n};\n\n/**\n * Detect wrong diacritics (â/ã/á) that indicate failed ALA-LC macrons.\n *\n * @example\n * validateWrongDiacritics('kâfir')[0]?.type === 'wrong_diacritics'\n */\nconst validateWrongDiacritics = (context: ValidationContext): ValidationError[] => {\n const wrongPattern = /[âêîôûãñéíóú]/gi;\n const errors: ValidationError[] = [];\n for (const match of context.normalizedResponse.matchAll(wrongPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'wrong_diacritics',\n `Wrong diacritic \"${matchText}\" detected - use macrons (ā, ī, ū) instead`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect excessive empty parentheses \"()\" which often indicates failed transliterations.\n *\n * @example\n * // Fail: too many \"()\"\n * validateEmptyParentheses('() () () ()')[0]?.type === 'empty_parentheses'\n */\nconst validateEmptyParentheses = (context: ValidationContext): ValidationError[] => {\n const matches = [...context.normalizedResponse.matchAll(/\\(\\)/g)];\n if (matches.length <= MAX_EMPTY_PARENTHESES) {\n return [];\n }\n return matches.map((match) => {\n const idx = match.index ?? 0;\n return makeErrorFromNormalized(\n context,\n 'empty_parentheses',\n `Found ${matches.length} empty parentheses \"()\" - this usually indicates failed transliterations. Please check if the LLM omitted Arabic/transliterated terms.`,\n match[0],\n idx,\n idx + match[0].length,\n );\n });\n};\n\n/**\n * Detect truncated translation vs Arabic source (ratio-based).\n *\n * @example\n * // Fail: long Arabic + very short translation\n * detectTruncatedTranslation('نص عربي طويل ... (50+ chars)', 'Short') !== undefined\n */\nconst detectTruncatedTranslation = (arabicText: string, translationText: string) => {\n const arabic = (arabicText || '').trim();\n const translation = (translationText || '').trim();\n\n if (arabic.length < MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK) {\n return;\n }\n if (translation.length === 0) {\n return `Translation appears empty but Arabic text has ${arabic.length} characters`;\n }\n\n const ratio = translation.length / arabic.length;\n if (ratio < MIN_TRANSLATION_RATIO) {\n const expectedMinLength = Math.round(arabic.length * MIN_TRANSLATION_RATIO);\n return `Translation appears truncated: ${translation.length} chars for ${arabic.length} char Arabic text (expected at least ~${expectedMinLength} chars)`;\n }\n};\n\n/**\n * Validate per-ID translation lengths (response subset only).\n *\n * @example\n * // Produces a length_mismatch error for the first truncated segment found\n */\nconst validateTranslationLengthsForResponse = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n const seg = context.segmentById.get(marker.id);\n if (!seg) {\n continue;\n }\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd).trim();\n const error = detectTruncatedTranslation(seg.text, translation);\n if (error) {\n errors.push(\n makeErrorFromRawRange(\n 'length_mismatch',\n `Translation for \"${marker.id}\" ${error.replace('Translation ', '').toLowerCase()}`,\n translation,\n { end: marker.rawTranslationEnd, start: marker.rawTranslationStart },\n marker.id,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect “shouting” ALL CAPS words.\n *\n * @example\n * validateAllCaps('THIS IS LOUD')[0]?.type === 'all_caps'\n */\nconst validateAllCaps = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n const runThreshold = context.config.allCapsWordRunThreshold;\n const runPattern = new RegExp(`\\\\b(?:[A-Z]{2,}\\\\b\\\\s+){${runThreshold - 1}}[A-Z]{2,}\\\\b`, 'g');\n for (const match of context.normalizedResponse.matchAll(runPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'all_caps',\n `ALL CAPS detected: \"${matchText.trim()}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect archaic/Biblical register tokens.\n *\n * @example\n * validateArchaicRegister('verily thou shalt')[0]?.type === 'archaic_register'\n */\nconst validateArchaicRegister = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const re of ARCHAIC_PATTERNS) {\n for (const match of context.normalizedResponse.matchAll(re)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'archaic_register',\n `Archaic/Biblical register word detected: \"${matchText}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n }\n return errors;\n};\n\ntype LineStartLabelCounts = {\n total: number;\n prefixes: Map<string, number>;\n};\n\nconst getLineStartLabelCounts = (text: string): LineStartLabelCounts => {\n const prefixes = new Map<string, number>();\n const lines = text.split(/\\n/);\n const maxPrefixLength = 28;\n const maxWords = 3;\n\n for (const rawLine of lines) {\n const line = rawLine.trimStart();\n if (!line) {\n continue;\n }\n const colonIdx = line.search(/[::]/);\n if (colonIdx <= 0) {\n continue;\n }\n const prefix = line.slice(0, colonIdx).trim();\n if (!prefix || prefix.length > maxPrefixLength) {\n continue;\n }\n const words = prefix.split(/\\s+/);\n if (words.length > maxWords) {\n continue;\n }\n const count = prefixes.get(prefix) ?? 0;\n prefixes.set(prefix, count + 1);\n }\n\n let total = 0;\n for (const count of prefixes.values()) {\n total += count;\n }\n\n return { prefixes, total };\n};\n\n/**\n * Detect per-segment mismatch in colon counts between Arabic segment text and its translation chunk.\n *\n * This is intentionally heuristic and avoids hardcoding speaker label tokens.\n *\n * @example\n * // Arabic: \"الشيخ: ... السائل: ...\" => 2 colons\n * // Translation: \"The Shaykh: ...\" => 1 colon => mismatched_colons\n */\nconst validateMismatchedColons = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n\n for (const marker of context.markers) {\n const seg = context.segmentById.get(marker.id);\n if (!seg) {\n continue;\n }\n\n const arabicLabels = getLineStartLabelCounts(seg.text);\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n const englishLabels = getLineStartLabelCounts(translation);\n\n if (arabicLabels.total !== englishLabels.total && (arabicLabels.total > 0 || englishLabels.total > 0)) {\n errors.push(\n makeErrorFromRawRange(\n 'mismatched_colons',\n `Speaker label count mismatch in \"${marker.id}\": Arabic has ${arabicLabels.total} line-start labels but translation has ${englishLabels.total}. This may indicate dropped/moved speaker turns or formatting drift.`,\n translation.trim(),\n { end: marker.rawTranslationEnd, start: marker.rawTranslationStart },\n marker.id,\n ),\n );\n }\n }\n\n return errors;\n};\n\n/**\n * Detect collapsed speaker labels that appear mid-line instead of at line start.\n *\n * This uses translation line-start labels as the reference set, then flags\n * occurrences of those labels inside the same segment's text.\n */\nconst findCollapsedSpeakerLabel = (text: string) => {\n const lineStartLabels = getLineStartLabelCounts(text).prefixes;\n if (lineStartLabels.size === 0) {\n return;\n }\n const labelPattern = [...lineStartLabels.keys()].map((label) => escapeRegExp(label)).join('|');\n if (!labelPattern) {\n return;\n }\n const lines = text.split('\\n');\n let offset = 0;\n const pattern = new RegExp(`\\\\b(${labelPattern})\\\\s*:`, 'g');\n for (const line of lines) {\n for (const match of line.matchAll(pattern)) {\n const idx = match.index ?? 0;\n if (idx > 0) {\n return { index: offset + idx, label: match[1] };\n }\n }\n offset += line.length + 1;\n }\n};\n\nconst validateCollapsedSpeakers = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n\n for (const marker of context.markers) {\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n if (!translation) {\n continue;\n }\n const matched = findCollapsedSpeakerLabel(translation);\n if (!matched) {\n continue;\n }\n const normalizedStart = marker.translationStart + matched.index;\n const normalizedEnd = normalizedStart + matched.label.length + 1;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'collapsed_speakers',\n `Collapsed speaker label detected in \"${marker.id}\": \"${matched.label}:\" should start on a new line`,\n `${matched.label}:`,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n\n return errors;\n};\n\n/**\n * Detect multi-word transliteration patterns without immediate parenthetical gloss.\n *\n * @example\n * // Fail: \"al-hajr fi al-madajīʿ\" without \"(English ...)\" nearby\n * // => multiword_translit_without_gloss\n */\nconst validateMultiwordTranslitWithoutGloss = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n const phrasePattern = /\\b(al-[a-zʿʾāīūḥṣḍṭẓ-]+)\\s+fi\\s+(al-[a-zʿʾāīūḥṣḍṭẓ-]+)\\b/gi;\n\n for (const marker of context.markers) {\n const text = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n if (!text) {\n continue;\n }\n\n for (const m of text.matchAll(phrasePattern)) {\n const phrase = `${m[1]} fi ${m[2]}`;\n const idx = m.index ?? -1;\n if (idx >= 0) {\n const after = text.slice(idx, Math.min(text.length, idx + phrase.length + 25));\n if (!after.includes('(')) {\n const normalizedStart = marker.translationStart + idx;\n const normalizedEnd = normalizedStart + phrase.length;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'multiword_translit_without_gloss',\n `Multi-word transliteration without immediate gloss in \"${marker.id}\": \"${phrase}\"`,\n phrase,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n }\n }\n }\n\n return errors;\n};\n\nconst DEFAULT_RULES: ValidationRule[] = [\n { id: 'invalid_marker_format', run: validateMarkerFormat, type: 'invalid_marker_format' },\n { id: 'newline_after_id', run: validateNewlineAfterId, type: 'newline_after_id' },\n { id: 'truncated_segment', run: validateTruncatedSegments, type: 'truncated_segment' },\n { id: 'implicit_continuation', run: validateImplicitContinuation, type: 'implicit_continuation' },\n { id: 'meta_talk', run: validateMetaTalk, type: 'meta_talk' },\n { id: 'duplicate_id', run: validateDuplicateIds, type: 'duplicate_id' },\n { id: 'invented_id', run: validateInventedIds, type: 'invented_id' },\n { id: 'missing_id_gap', run: validateMissingIdGaps, type: 'missing_id_gap' },\n { id: 'arabic_leak', run: validateArabicLeak, type: 'arabic_leak' },\n { id: 'wrong_diacritics', run: validateWrongDiacritics, type: 'wrong_diacritics' },\n { id: 'empty_parentheses', run: validateEmptyParentheses, type: 'empty_parentheses' },\n { id: 'length_mismatch', run: validateTranslationLengthsForResponse, type: 'length_mismatch' },\n { id: 'all_caps', run: validateAllCaps, type: 'all_caps' },\n { id: 'archaic_register', run: validateArchaicRegister, type: 'archaic_register' },\n { id: 'mismatched_colons', run: validateMismatchedColons, type: 'mismatched_colons' },\n { id: 'collapsed_speakers', run: validateCollapsedSpeakers, type: 'collapsed_speakers' },\n {\n id: 'multiword_translit_without_gloss',\n run: validateMultiwordTranslitWithoutGloss,\n type: 'multiword_translit_without_gloss',\n },\n];\n"],"mappings":";;;;AAGA,IAAY,4CAAL;;AAEH;;AAEA;;AAEA;;AAEA;;AAEA;;AAEA;;;;;;AAMJ,MAAa,2BAA2B;CAEpC,QAAQ;CAER,QAAQ;CAER,SAAS,IAAI,QAAQ,OAAO,QAAQ,UAAU,QAAQ,WAAW,QAAQ,UAAU,QAAQ,QAAQ,QAAQ,KAAK;CAEhH,eAAe;CAEf,QAAQ;CACX;;;;AAKD,MAAa,oBAAoB,GAAG,yBAAyB,UAAU,yBAAyB,SAAS,yBAAyB,OAAO;;;;AAKzI,MAAa,gBAAgB;CACzB;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACH;AAED,MAAa,wBAAwB;AACrC,MAAa,yCAAyC;AACtD,MAAa,wBAAwB;;;;AC7CrC,MAAa,gBAAgB;AAE7B,MAAa,qBAAqB;AAElC,MAAa,SAAS;AAEtB,MAAa,OAAO;AAEpB,MAAa,SAAS;AAEtB,MAAa,gBAAgB;AAE7B,MAAa,SAAS;AAEtB,MAAa,eAAe;AAM5B,MAAa,UAAU;CACnB;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACJ;;;;;;;;;;;ACjDD,MAAa,gBAAgB,QAAgB,UAA0B;AACnE,KAAI,CAAC,OACD,QAAO;AAEX,KAAI,CAAC,MACD,QAAO;AAEX,QAAO,GAAG,OAAO,IAAI;;;;;;;;AASzB,MAAa,mBAAoC;AAC7C,QAAO,QAAQ,KAAK,YAAY;EAC5B,SAAS,OAAO,OAAO,kBAAkB,OAAO,UAAU,aAAa,eAAe,OAAO,QAAQ;EACrG,IAAI,OAAO;EACX,UAAU,OAAO,OAAO;EACxB,MAAM,OAAO;EAChB,EAAE;;;;;;;;;;AAWP,MAAa,aAAa,OAAgC;CACtD,MAAM,SAAS,QAAQ,MAAM,MAAM,EAAE,OAAO,GAAG;AAC/C,KAAI,CAAC,OACD,OAAM,IAAI,MAAM,qBAAqB,KAAK;AAG9C,QAAO;EACH,SAAS,OAAO,OAAO,kBAAkB,OAAO,UAAU,aAAa,eAAe,OAAO,QAAQ;EACrG,IAAI,OAAO;EACX,UAAU,OAAO,OAAO;EACxB,MAAM,OAAO;EAChB;;;;;;;;;AAUL,MAAa,oBAAoB,OAAyB;AACtD,QAAO,UAAU,GAAG,CAAC;;;;;;;;AASzB,MAAa,qBAAiC;AAC1C,QAAO,QAAQ,KAAK,MAAM,EAAE,GAAG;;;;;;;;AASnC,MAAa,wBAAgC;AACzC,QAAO;;;;;;;;;;;;;;;;ACtFX,MAAa,2BAA2B,UAAqB,WAAmB;AAE5E,QAAO,CAAC,QADU,SAAS,KAAK,MAAM,GAAG,EAAE,GAAG,KAAK,EAAE,OAAO,CAAC,KAAK,OAAO,CAC/C,CAAC,KAAK,OAAO;;;;;;;;;AAU3C,MAAa,4BAA4B,YAAoB;AACzD,QAAO,gCAAgC,QAAQ,CAAC;;AAGpD,MAAa,mCAAmC,YAAoB;CAChE,MAAM,+BAA+B,UAAkB;EACnD,IAAI,aAAa;EACjB,MAAM,WAAqB,EAAE;AAC7B,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACnC,MAAM,KAAK,MAAM;AACjB,OAAI,OAAO,MAAM;AACb,QAAI,MAAM,IAAI,OAAO,MAAM;AACvB,mBAAc;AACd,cAAS,KAAK,EAAE;AAChB;AACA;;AAEJ,kBAAc;AACd,aAAS,KAAK,EAAE;AAChB;;AAEJ,iBAAc;AACd,YAAS,KAAK,EAAE;;AAEpB,SAAO;GAAE;GAAY;GAAU;;CAGnC,MAAM,qCAAqC,MAAc,QAAkB;EACvE,MAAM,6BAA6B,IAAI,OACnC,eAAe,oBAAoB,yBAAyB,gBAAgB,yBAAyB,OAAO,IAC5G,IACH;EACD,IAAI,aAAa;EACjB,MAAM,WAAqB,EAAE;EAC7B,IAAI,YAAY;AAChB,OAAK,MAAM,SAAS,KAAK,SAAS,2BAA2B,EAAE;GAC3D,MAAM,aAAa,MAAM,SAAS;AAClC,QAAK,IAAI,IAAI,WAAW,IAAI,YAAY,KAAK;AACzC,kBAAc,KAAK;AACnB,aAAS,KAAK,IAAI,GAAG;;AAEzB,iBAAc,MAAM;AACpB,YAAS,KAAK,IAAI,YAAY;AAC9B,iBAAc;AACd,YAAS,KAAK,IAAI,YAAY;GAC9B,MAAM,SAAS,MAAM;AACrB,QAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACpC,kBAAc,OAAO;AACrB,aAAS,KAAK,IAAI,aAAa,IAAI,GAAG;;AAE1C,eAAY,aAAa,MAAM,GAAG;;AAEtC,OAAK,IAAI,IAAI,WAAW,IAAI,KAAK,QAAQ,KAAK;AAC1C,iBAAc,KAAK;AACnB,YAAS,KAAK,IAAI,GAAG;;AAEzB,SAAO;GAAE;GAAY;GAAU;;CAGnC,MAAM,uCAAuC,MAAc,QAAkB;EACzE,MAAM,+BAA+B,IAAI,OACrC,KAAK,oBAAoB,yBAAyB,gBAAgB,yBAAyB,OAAO,IAClG,IACH;EACD,IAAI,aAAa;EACjB,MAAM,WAAqB,EAAE;EAC7B,IAAI,YAAY;AAChB,OAAK,MAAM,SAAS,KAAK,SAAS,6BAA6B,EAAE;GAC7D,MAAM,aAAa,MAAM,SAAS;AAClC,QAAK,IAAI,IAAI,WAAW,IAAI,YAAY,KAAK;AACzC,kBAAc,KAAK;AACnB,aAAS,KAAK,IAAI,GAAG;;AAEzB,iBAAc;AACd,YAAS,KAAK,IAAI,YAAY;GAC9B,MAAM,SAAS,MAAM;AACrB,QAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACpC,kBAAc,OAAO;AACrB,aAAS,KAAK,IAAI,aAAa,IAAI,GAAG;;AAE1C,eAAY,aAAa,MAAM,GAAG;;AAEtC,OAAK,IAAI,IAAI,WAAW,IAAI,KAAK,QAAQ,KAAK;AAC1C,iBAAc,KAAK;AACnB,YAAS,KAAK,IAAI,GAAG;;AAEzB,SAAO;GAAE;GAAY;GAAU;;CAGnC,MAAM,yBAAyB,MAAc,QAAkB;EAC3D,IAAI,aAAa;EACjB,MAAM,WAAqB,EAAE;AAC7B,OAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,OAAI,KAAK,OAAO,QAAQ,KAAK,IAAI,OAAO,KAAK;AACzC;AACA,kBAAc;AACd,aAAS,KAAK,IAAI,GAAG;AACrB;;AAEJ,iBAAc,KAAK;AACnB,YAAS,KAAK,IAAI,GAAG;;AAEzB,SAAO;GAAE;GAAY;GAAU;;CAGnC,MAAM,uBAAuB,4BAA4B,QAAQ;CACjE,MAAM,mBAAmB,kCAAkC,qBAAqB,YAAY,qBAAqB,SAAS;CAC1H,MAAM,gBAAgB,oCAAoC,iBAAiB,YAAY,iBAAiB,SAAS;AACjH,QAAO,sBAAsB,cAAc,YAAY,cAAc,SAAS;;;;;;;;AASlF,MAAa,yBAAyB,SAAiB;CACnD,MAAM,EAAE,QAAQ,kBAAkB;CAClC,MAAM,UAAU,IAAI,OAAO,KAAK,kBAAkB,GAAG,gBAAgB,UAAU,KAAK;CACpF,MAAM,MAAgB,EAAE;AACxB,MAAK,MAAM,SAAS,KAAK,SAAS,QAAQ,CACtC,KAAI,KAAK,MAAM,GAAG;AAEtB,QAAO;;;;;;;;;;;;;AAcX,MAAa,qBAAqB,YAAoB;CAElD,MAAM,iBAAiB,kBADJ,yBAAyB,QAAQ,CACA;AACpD,QAAO;EAAE,OAAO,eAAe;EAAM;EAAgB;;;;;;;;;;;;;;AAezD,MAAa,4BAA4B,YAAoB;CACzD,MAAM,aAAa,yBAAyB,QAAQ;CACpD,MAAM,EAAE,QAAQ,kBAAkB;CAClC,MAAM,gBAAgB,IAAI,OAAO,KAAK,kBAAkB,GAAG,gBAAgB,OAAO,OAAO,KAAK;CAC9F,MAAM,UAAU,CAAC,GAAG,WAAW,SAAS,cAAc,CAAC;CAEvD,MAAM,UAAsD,EAAE;AAC9D,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACrC,MAAM,KAAK,QAAQ,GAAG;EACtB,MAAM,QAAQ,QAAQ,GAAG,SAAS;EAClC,MAAM,YAAY,IAAI,IAAI,QAAQ,SAAU,QAAQ,IAAI,GAAG,SAAS,WAAW,SAAU,WAAW;EACpG,MAAM,QAAQ,WAAW,MAAM,OAAO,UAAU,CAAC,SAAS;EAC1D,MAAM,gCAAgB,IAAI,OAAO,IAAI,KAAK,gBAAgB,OAAO,MAAM;EACvE,MAAM,cAAc,MAAM,QAAQ,eAAe,GAAG,CAAC,MAAM;AAC3D,UAAQ,KAAK;GAAE;GAAI;GAAa,CAAC;;AAErC,QAAO;;;;;;;;AASX,MAAa,qBAAqB,SAAiB;CAC/C,MAAM,sBAAM,IAAI,KAAqB;AACrC,MAAK,MAAM,SAAS,yBAAyB,KAAK,CAC9C,KAAI,IAAI,MAAM,IAAI,MAAM,YAAY;AAExC,QAAO;;AAGX,MAAa,gBAAgB,MAAc,EAAE,QAAQ,uBAAuB,OAAO;;;;ACnNnF,MAAM,8BAA8B;CAAC;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAI;AAE3F,MAAM,qBAAqB,WAAqB;AAE5C,QAAO,MADO,OAAO,KAAK,UAAU,GAAG,aAAa,MAAM,CAAC,OAAO,CAAC,KAAK,IAAI,CACzD;;AAGvB,MAAM,2BAA2B,gBAA0B,YAAY,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC,KAAK,IAAI;AAEpH,MAAM,8BAA8B,WAAqB;CACrD,MAAM,eAAe,kBAAkB,OAAO;CAC9C,MAAM,EAAE,QAAQ,kBAAkB;AAClC,wBAAO,IAAI,OAAO,OAAO,oBAAoB,gBAAgB,OAAO,SAAS,aAAa,GAAG;;AAGjG,MAAa,4BAA4B,MAAc,WAAkC;AACrF,KAAI,CAAC,QAAQ,eAAe,OACxB,OAAM,IAAI,MAAM,+DAA+D;CAEnF,MAAM,gBAAgB,OAAO;CAC7B,MAAM,qBAAqB,QAAQ,sBAAsB;CACzD,MAAM,eAAe,kBAAkB,cAAc;CACrD,IAAI,QAAQ;CACZ,MAAM,qBAAqB,wBAAwB,mBAAmB;CACtE,MAAM,uBAAuB,qCAAqB,IAAI,OAAO,MAAM,mBAAmB,SAAS,GAAG;CAClG,MAAM,mBAAmB,2BAA2B,cAAc;CAClE,MAAM,aAAa,IAAI,OAAO,cAAc,IAAI;CAEhD,MAAM,QAAQ,KACT,MAAM,KAAK,CACX,KAAK,SAAS;EACX,MAAM,aAAa,KAAK,MAAM,iBAAiB;EAC/C,MAAM,sBAAsB,aAAa,WAAW,GAAG,SAAS,WAAW,GAAG,SAAS;EACvF,IAAI,YAAY;EAChB,IAAI,UAAU;AAEd,OAAK,MAAM,SAAS,KAAK,SAAS,WAAW,EAAE;GAC3C,MAAM,MAAM,MAAM,SAAS;AAC3B,OAAI,QAAQ,oBACR;AAEJ,OAAI,QAAQ,EACR;GAEJ,MAAM,SAAS,KAAK,MAAM,WAAW,IAAI;AACzC,OAAI,sBAAsB;IACtB,MAAM,aAAa,OAAO,MAAM,qBAAqB;AACrD,QAAI,YAAY;KACZ,MAAM,QAAQ,WAAW,GAAG,QAAQ,QAAQ,GAAG;KAC/C,MAAM,cAAc,OAAO,MAAM,GAAG,CAAC,WAAW,GAAG,OAAO;AAC1D,gBAAW,cAAc,QAAQ,OAAO,MAAM;AAC9C,iBAAY,MAAM,MAAM,GAAG;AAC3B,cAAS;AACT;;;AAGR,cAAW,OAAO,QAAQ,QAAQ,GAAG,GAAG,OAAO,MAAM;AACrD,eAAY,MAAM,MAAM,GAAG;AAC3B,YAAS;;AAGb,MAAI,UAAU,EACV,QAAO;AAEX,SAAO,UAAU,KAAK,MAAM,UAAU;GACxC,CACD,KAAK,KAAK;AACf,QAAO;EACH,SAAS,QAAQ,IAAI,CAAC,2BAA2B,GAAG,EAAE;EACtD,QAAQ,EAAE,0BAA0B,OAAO;EAC3C,MAAM;EACT;;AAGL,MAAM,iBAAwG;CAC1G,oBAAoB;CACpB,mBAAmB;CACtB;AAED,MAAa,UAAU,MAAc,YAAsC;CACvE,MAAM,YAAY,QAAQ;CAC1B,MAAM,UAAoB,EAAE;CAC5B,MAAM,UAAoB,EAAE;CAC5B,IAAI,cAAc;CAClB,MAAM,SAAiC,EAAE;AAEzC,MAAK,MAAM,QAAQ,WAAW;EAC1B,MAAM,QAAQ,eAAe;AAC7B,MAAI,CAAC,OAAO;AACR,WAAQ,KAAK,KAAK;AAClB;;EAEJ,MAAM,SAAS,MAAM,aAAa,QAAQ,OAAO;AACjD,gBAAc,OAAO;AACrB,UAAQ,KAAK,KAAK;AAClB,OAAK,MAAM,CAAC,KAAK,UAAU,OAAO,QAAQ,OAAO,OAAO,CACpD,QAAO,QAAQ,OAAO,QAAQ,KAAK;;AAI3C,QAAO;EACH;EACA;EACA;EACA;EACA,MAAM;EACT;;;;;;;;;;;ACnFL,MAAa,6BAA6B;CACtC,UAAU,EACN,aAAa,4DAChB;CACD,aAAa,EACT,aAAa,oDAChB;CACD,kBAAkB,EACd,aAAa,kEAChB;CACD,oBAAoB,EAChB,aAAa,qEAChB;CACD,cAAc,EACV,aAAa,+DAChB;CACD,mBAAmB,EACf,aAAa,iFAChB;CACD,uBAAuB,EACnB,aAAa,uGAChB;CACD,uBAAuB,EACnB,aAAa,gGAChB;CACD,aAAa,EACT,aAAa,yFAChB;CACD,iBAAiB,EACb,aAAa,yFAChB;CACD,WAAW,EACP,aAAa,8EAChB;CACD,mBAAmB,EACf,aACI,oIACP;CACD,gBAAgB,EACZ,aACI,0HACP;CACD,kCAAkC,EAC9B,aAAa,8FAChB;CACD,kBAAkB,EACd,aAAa,sGAChB;CACD,kBAAkB,EACd,aAAa,sEAChB;CACD,mBAAmB,EACf,aAAa,iFAChB;CACD,kBAAkB,EACd,aAAa,8EAChB;CACJ;AAED,MAAM,mBAAmB,cAAc,KAAK,MAAM,IAAI,OAAO,MAAM,aAAa,EAAE,CAAC,MAAM,KAAK,CAAC;AAE/F,MAAM,aAAa,MAAc,OAAe,QAAgB;CAC5D,IAAI,IAAI;CACR,IAAI,IAAI;AACR,QAAO,IAAI,KAAK,KAAK,KAAK,KAAK,GAAG,CAC9B;AAEJ,QAAO,IAAI,KAAK,KAAK,KAAK,KAAK,IAAI,GAAG,CAClC;AAEJ,QAAO;EAAE,KAAK;EAAG,OAAO;EAAG;;AAG/B,MAAM,cAAc,iBAAyB,eAAuB,UAAoB,cAA6B;AACjH,KAAI,iBAAiB,iBAAiB;EAClC,MAAMA,aAAW,SAAS,oBAAoB;AAC9C,SAAO;GAAE,KAAKA;GAAU,OAAOA;GAAU;;CAE7C,MAAM,WAAW,SAAS,oBAAoB;CAC9C,MAAM,aAAa,SAAS,KAAK,IAAI,gBAAgB,GAAG,gBAAgB,KAAK;AAE7E,QAAO;EAAE,KADM,KAAK,IAAI,WAAW,aAAa,EAAE;EAC5B,OAAO;EAAU;;AAG3C,MAAM,gBAAgB,YAAoB,UAAoB,cAA2C;CACrG,MAAM,EAAE,QAAQ,kBAAkB;CAClC,MAAM,gBAAgB,IAAI,OAAO,KAAK,kBAAkB,GAAG,gBAAgB,OAAO,OAAO,KAAK;CAC9F,MAAM,UAAU,CAAC,GAAG,WAAW,SAAS,cAAc,CAAC;CACvD,MAAM,UAA+B,EAAE;AAEvC,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACrC,MAAM,QAAQ,QAAQ;EACtB,MAAM,KAAK,MAAM;EACjB,MAAM,kBAAkB,MAAM,SAAS;EACvC,MAAM,gBAAgB,kBAAkB,MAAM,GAAG;EAEjD,MAAM,mBAAmB,UAAU,YAAY,eAD7B,IAAI,IAAI,QAAQ,SAAU,QAAQ,IAAI,GAAG,SAAS,WAAW,SAAU,WAAW,OAC5B;EACxE,MAAM,cAAc,WAAW,iBAAiB,eAAe,UAAU,UAAU;EACnF,MAAM,sBAAsB,WAAW,iBAAiB,OAAO,iBAAiB,KAAK,UAAU,UAAU;AAEzG,UAAQ,KAAK;GACT,YAAY,MAAM;GAClB;GACA;GACA;GACA,QAAQ,YAAY;GACpB,UAAU,YAAY;GACtB,mBAAmB,oBAAoB;GACvC,qBAAqB,oBAAoB;GACzC,gBAAgB,iBAAiB;GACjC,kBAAkB,iBAAiB;GACtC,CAAC;;AAGN,QAAO;;AAGX,MAAM,qBAAqB,SAA8B,eAAuB;CAC5E,MAAM,+BAAe,IAAI,KAAqB;AAC9C,MAAK,MAAM,UAAU,SAAS;EAC1B,MAAM,kBAAkB,WAAW,MAAM,OAAO,kBAAkB,OAAO,eAAe,CAAC,MAAM;AAC/F,eAAa,IAAI,OAAO,IAAI,gBAAgB;;AAEhD,QAAO;;AAGX,MAAM,4BAA8C,EAChD,yBAAyB,GAC5B;AAED,MAAM,0BACF,UACA,aACA,WACoB;CACpB,MAAM,EAAE,YAAY,aAAa,gCAAgC,YAAY;CAC7E,MAAM,UAAU,aAAa,YAAY,UAAU,YAAY,OAAO;CACtE,MAAM,YAAY,QAAQ,KAAK,MAAM,EAAE,GAAG;CAC1C,MAAM,8BAAc,IAAI,KAAsB;AAC9C,MAAK,MAAM,KAAK,SACZ,aAAY,IAAI,EAAE,IAAI,EAAE;AAG5B,QAAO;EACH;EACA;EACA;EACA,oBAAoB;EACpB;EACA;EACA,cARiB,kBAAkB,SAAS,WAAW;EASvD;EACA;EACH;;AAGL,MAAM,2BACF,SACA,MACA,SACA,WACA,iBACA,eACA,OACkB;CAClB,IAAI,aAAa;AACjB,KAAI,CAAC,YAED;OAAK,MAAM,UAAU,QAAQ,QAGzB,KAAI,mBAAmB,OAAO,oBAAoB,iBAAiB,OAAO,gBAAgB;AACtF,gBAAa,OAAO;AACpB;;;AAKZ,QAAO;EACH,IAAI;EACJ;EACA;EACA,OAAO,WAAW,iBAAiB,eAAe,QAAQ,UAAU,QAAQ,YAAY,OAAO;EAC/F;EACH;;AAGL,MAAM,yBACF,MACA,SACA,WACA,OACA,QACmB;CACnB;CACA;CACA;CACA;CACA;CACH;;;;;;;;;;;;;;;;;;;;;;;;AAyBD,MAAa,+BACT,UACA,UACA,YAC2B;CAE3B,MAAM,UAAU,uBAAuB,UAAU,UADlC;EAAE,GAAG;EAA2B,GAAG,SAAS;EAAQ,CACD;AAClE,KAAI,QAAQ,UAAU,WAAW,EAC7B,QAAO;EACH,QAAQ,CACJ;GACI,WAAW;GACX,SAAS;GACT,OAAO;IAAE,KAAK,SAAS;IAAQ,OAAO;IAAG;GACzC,QAAQ;GACR,MAAM;GACT,CACJ;EACD,oBAAoB,QAAQ;EAC5B,WAAW,EAAE;EAChB;AAML,QAAO;EAAE,SAHK,SAAS,SAAS,eACX,SAAS,SAAS,KAAK,IAAI,QAAQ,CAAC,KAAK,OAAO;GAAE,GAAG;GAAG,QAAQ,EAAE,UAAU,KAAK;GAAI,EAAE,CAAC;EAE5F,oBAAoB,QAAQ;EAAoB,WAAW,QAAQ;EAAW;;;;;;;;;AAUnG,MAAM,wBAAwB,YAAkD;CAC5E,MAAM,OAAO,QAAQ;CACrB,MAAM,EAAE,SAAS,QAAQ,QAAQ,QAAQ,kBAAkB;CAC3D,MAAM,SAA4B,EAAE;CAEpC,MAAM,oBAAoB,IAAI,OAC1B,IAAI,QAAQ,KAAK,OAAO,QAAQ,OAAO,MAAM,SAAS,OAAO,GAAG,gBAAgB,OAAO,aAAa,gBAAgB,UACpH,KACH;AACD,MAAK,MAAM,SAAS,KAAK,SAAS,kBAAkB,EAAE;EAClD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,6BAA6B,UAAU,MAAM,CAAC,yEAC9C,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,qBAAqB,IAAI,OAAO,IAAI,UAAU,SAAS,OAAO,GAAG,gBAAgB,UAAU,KAAK;AACtG,MAAK,MAAM,SAAS,KAAK,SAAS,mBAAmB,EAAE;EACnD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,gCAAgC,UAAU,IAC1C,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,sBAAsB,IAAI,OAAO,IAAI,UAAU,SAAS,OAAO,MAAM,OAAO,IAAI,KAAK;AAC3F,MAAK,MAAM,SAAS,KAAK,SAAS,oBAAoB,EAAE;EACpD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,gCAAgC,UAAU,IAC1C,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,wBAAwB,IAAI,OAAO,IAAI,oBAAoB,gBAAgB,OAAO,QAAQ,KAAK;AACrG,MAAK,MAAM,SAAS,KAAK,SAAS,sBAAsB,EAAE;EACtD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,cAAc,UAAU,MAAM,CAAC,qCAC/B,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,oBAAoB,IAAI,OAAO,IAAI,UAAU,OAAO,KAAK,UAAU,KAAK;AAC9E,MAAK,MAAM,SAAS,KAAK,SAAS,kBAAkB,EAAE;EAClD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,6BAA6B,UAAU,2BACvC,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGL,QAAO;;;;;;;;;AAUX,MAAM,0BAA0B,YAAkD;CAC9E,MAAM,UAAU,IAAI,OAChB,IAAI,oBAAoB,yBAAyB,gBAAgB,yBAAyB,OAAO,UACjG,KACH;CACD,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,oBACA,qCAAqC,UAAU,MAAM,CAAC,6BACtD,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;;;;AASX,MAAM,wBAAwB,YAAkD;CAC5E,MAAM,uBAAO,IAAI,KAAa;CAC9B,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,QACzB,KAAI,KAAK,IAAI,OAAO,GAAG,CACnB,QAAO,KACH,sBACI,gBACA,iBAAiB,OAAO,GAAG,oDAC3B,OAAO,YACP;EAAE,KAAK,OAAO;EAAQ,OAAO,OAAO;EAAU,EAC9C,OAAO,GACV,CACJ;KAED,MAAK,IAAI,OAAO,GAAG;AAG3B,QAAO;;;;;;;;AASX,MAAM,uBAAuB,YAAkD;CAC3E,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,QACzB,KAAI,CAAC,QAAQ,YAAY,IAAI,OAAO,GAAG,CACnC,QAAO,KACH,sBACI,eACA,0BAA0B,OAAO,GAAG,2CACpC,OAAO,YACP;EAAE,KAAK,OAAO;EAAQ,OAAO,OAAO;EAAU,EAC9C,OAAO,GACV,CACJ;AAGT,QAAO;;;;;;;;;AAUX,MAAM,yBAAyB,aAAwB;CACnD,MAAM,4BAAY,IAAI,KAAqB;AAC3C,MAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,IACjC,WAAU,IAAI,SAAS,GAAG,IAAI,EAAE;AAEpC,QAAO;;AAGX,MAAM,iBAAiB,SAA4B,WAAgC,KAAa,QAAgB;AAC5G,KAAI,CAAC,QAAQ,YAAY,IAAI,IAAI,IAAI,CAAC,QAAQ,YAAY,IAAI,IAAI,CAC9D;CAEJ,MAAM,KAAK,UAAU,IAAI,IAAI;CAC7B,MAAM,KAAK,UAAU,IAAI,IAAI;AAC7B,KAAI,MAAM,QAAQ,MAAM,QAAQ,KAAK,GACjC;AAEJ,QAAO;EAAE,KAAK;EAAI,OAAO;EAAI;;AAGjC,MAAM,qBACF,SACA,UACA,QACA,aACA,YACC;CACD,MAAM,QAAkB,EAAE;AAC1B,MAAK,IAAI,IAAI,WAAW,GAAG,IAAI,QAAQ,KAAK;EACxC,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACnC,MAAI,CAAC,MACD;AAEJ,MAAI,CAAC,QAAQ,YAAY,IAAI,MAAM,IAAI,YAAY,IAAI,MAAM,IAAI,QAAQ,IAAI,MAAM,CAC/E;AAEJ,UAAQ,IAAI,MAAM;AAClB,QAAM,KAAK,MAAM;;AAErB,QAAO;;AAGX,MAAM,yBAAyB,YAAkD;CAC7E,MAAM,YAAY,sBAAsB,QAAQ,SAAS;CAEzD,MAAM,cAAc,IAAI,IAAI,QAAQ,UAAU;CAC9C,MAAM,0BAAU,IAAI,KAAa;CACjC,MAAM,SAA4B,EAAE;AAEpC,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,SAAS,GAAG,KAAK;EACjD,MAAM,IAAI,QAAQ,QAAQ;EAC1B,MAAM,IAAI,QAAQ,QAAQ,IAAI;EAC9B,MAAM,MAAM,cAAc,SAAS,WAAW,EAAE,IAAI,EAAE,GAAG;AACzD,MAAI,CAAC,IACD;EAEJ,MAAM,eAAe,kBAAkB,SAAS,IAAI,OAAO,IAAI,KAAK,aAAa,QAAQ;AACzF,OAAK,MAAM,SAAS,aAChB,QAAO,KACH,sBACI,kBACA,wDAAwD,MAAM,IAC9D,QAAQ,YAAY,MAAM,EAAE,UAAU,EAAE,OAAO,EAC/C;GAAE,KAAK,EAAE;GAAQ,OAAO,EAAE;GAAU,EACpC,MACH,CACJ;;AAIT,QAAO;;;;;;;;AASX,MAAM,6BAA6B,YAAkD;CACjF,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,UAAU,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe,CAAC,MAAM;EACvG,MAAM,aAAa,YAAY,OAAO,YAAY;EAClD,MAAM,aAAa,QAAQ,YAAY,IAAI,OAAO,GAAG,EAAE,QAAQ;EAC/D,MAAM,uBAAuB,sBAAsB,KAAK,WAAW;AACnE,MAAI,CAAC,WAAW,YAAY,kBAAmB,cAAc,CAAC,qBAC1D,QAAO,KACH,sBACI,qBACA,gCAAgC,OAAO,GAAG,wCAC1C,QAAQ,YAAY,MAAM,OAAO,qBAAqB,OAAO,kBAAkB,EAC/E;GAAE,KAAK,OAAO;GAAmB,OAAO,OAAO;GAAqB,EACpE,OAAO,GACV,CACJ;;AAGT,QAAO;;;;;;;;AASX,MAAM,gCAAgC,YAAkD;CACpF,MAAM,WAAW;EAAC;EAA2B;EAAqB;EAAiB;CACnF,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,WAAW,SAClB,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,aAAa,UAAU,4CACvB,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGT,QAAO;;;;;;;;AASX,MAAM,oBAAoB,YAAkD;CACxE,MAAM,WAAW;EAAC;EAAa;EAA4B;EAAe;EAAa;EAAa;EAAe;CACnH,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,WAAW,SAClB,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,aACA,uBAAuB,UAAU,0DACjC,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGT,QAAO;;;;;;;;;;;;AAaX,MAAM,sBAAsB,YAAkD;CAC1E,MAAM,gBAAgB;CACtB,MAAM,SAA4B,EAAE;AAEpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,OAAO,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;EAC7F,IAAI;AACJ,OAAK,MAAM,SAAS,KAAK,SAAS,cAAc,EAAE;GAC9C,MAAMC,cAAY,MAAM,GAAG,QAAQ,MAAM,GAAG,CAAC,MAAM;AACnD,OAAI,CAACA,YACD;AAEJ,OAAI,CAAC,gBAAgBA,YAAU,SAAS,aAAa,GAAG,QAAQ,MAAM,GAAG,CAAC,MAAM,CAAC,OAC7E,gBAAe;;AAGvB,MAAI,CAAC,aACD;EAEJ,MAAM,YAAY,aAAa,GAAG,QAAQ,MAAM,GAAG,CAAC,MAAM;EAC1D,MAAM,MAAM,aAAa,SAAS;EAClC,MAAM,kBAAkB,OAAO,mBAAmB;EAClD,MAAM,gBAAgB,kBAAkB,aAAa,GAAG;AACxD,SAAO,KACH,wBACI,SACA,eACA,4BAA4B,UAAU,IACtC,WACA,iBACA,eACA,OAAO,GACV,CACJ;;AAGL,QAAO;;;;;;;;AASX,MAAM,2BAA2B,YAAkD;CAC/E,MAAM,eAAe;CACrB,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,aAAa,EAAE;EACnE,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,oBACA,oBAAoB,UAAU,6CAC9B,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;;;;;AAUX,MAAM,4BAA4B,YAAkD;CAChF,MAAM,UAAU,CAAC,GAAG,QAAQ,mBAAmB,SAAS,QAAQ,CAAC;AACjE,KAAI,QAAQ,UAAU,sBAClB,QAAO,EAAE;AAEb,QAAO,QAAQ,KAAK,UAAU;EAC1B,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,wBACH,SACA,qBACA,SAAS,QAAQ,OAAO,yIACxB,MAAM,IACN,KACA,MAAM,MAAM,GAAG,OAClB;GACH;;;;;;;;;AAUN,MAAM,8BAA8B,YAAoB,oBAA4B;CAChF,MAAM,UAAU,cAAc,IAAI,MAAM;CACxC,MAAM,eAAe,mBAAmB,IAAI,MAAM;AAElD,KAAI,OAAO,SAAS,uCAChB;AAEJ,KAAI,YAAY,WAAW,EACvB,QAAO,iDAAiD,OAAO,OAAO;AAI1E,KADc,YAAY,SAAS,OAAO,SAC9B,uBAAuB;EAC/B,MAAM,oBAAoB,KAAK,MAAM,OAAO,SAAS,sBAAsB;AAC3E,SAAO,kCAAkC,YAAY,OAAO,aAAa,OAAO,OAAO,wCAAwC,kBAAkB;;;;;;;;;AAUzJ,MAAM,yCAAyC,YAAkD;CAC7F,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,MAAM,QAAQ,YAAY,IAAI,OAAO,GAAG;AAC9C,MAAI,CAAC,IACD;EAEJ,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe,CAAC,MAAM;EAC3G,MAAM,QAAQ,2BAA2B,IAAI,MAAM,YAAY;AAC/D,MAAI,MACA,QAAO,KACH,sBACI,mBACA,oBAAoB,OAAO,GAAG,IAAI,MAAM,QAAQ,gBAAgB,GAAG,CAAC,aAAa,IACjF,aACA;GAAE,KAAK,OAAO;GAAmB,OAAO,OAAO;GAAqB,EACpE,OAAO,GACV,CACJ;;AAGT,QAAO;;;;;;;;AASX,MAAM,mBAAmB,YAAkD;CACvE,MAAM,SAA4B,EAAE;CACpC,MAAM,eAAe,QAAQ,OAAO;CACpC,MAAM,aAAa,IAAI,OAAO,2BAA2B,eAAe,EAAE,gBAAgB,IAAI;AAC9F,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,WAAW,EAAE;EACjE,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,YACA,uBAAuB,UAAU,MAAM,CAAC,IACxC,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;;;;AASX,MAAM,2BAA2B,YAAkD;CAC/E,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,MAAM,iBACb,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,GAAG,EAAE;EACzD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,oBACA,6CAA6C,UAAU,IACvD,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGT,QAAO;;AAQX,MAAM,2BAA2B,SAAuC;CACpE,MAAM,2BAAW,IAAI,KAAqB;CAC1C,MAAM,QAAQ,KAAK,MAAM,KAAK;CAC9B,MAAM,kBAAkB;CACxB,MAAM,WAAW;AAEjB,MAAK,MAAM,WAAW,OAAO;EACzB,MAAM,OAAO,QAAQ,WAAW;AAChC,MAAI,CAAC,KACD;EAEJ,MAAM,WAAW,KAAK,OAAO,OAAO;AACpC,MAAI,YAAY,EACZ;EAEJ,MAAM,SAAS,KAAK,MAAM,GAAG,SAAS,CAAC,MAAM;AAC7C,MAAI,CAAC,UAAU,OAAO,SAAS,gBAC3B;AAGJ,MADc,OAAO,MAAM,MAAM,CACvB,SAAS,SACf;EAEJ,MAAM,QAAQ,SAAS,IAAI,OAAO,IAAI;AACtC,WAAS,IAAI,QAAQ,QAAQ,EAAE;;CAGnC,IAAI,QAAQ;AACZ,MAAK,MAAM,SAAS,SAAS,QAAQ,CACjC,UAAS;AAGb,QAAO;EAAE;EAAU;EAAO;;;;;;;;;;;AAY9B,MAAM,4BAA4B,YAAkD;CAChF,MAAM,SAA4B,EAAE;AAEpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,MAAM,QAAQ,YAAY,IAAI,OAAO,GAAG;AAC9C,MAAI,CAAC,IACD;EAGJ,MAAM,eAAe,wBAAwB,IAAI,KAAK;EACtD,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;EACpG,MAAM,gBAAgB,wBAAwB,YAAY;AAE1D,MAAI,aAAa,UAAU,cAAc,UAAU,aAAa,QAAQ,KAAK,cAAc,QAAQ,GAC/F,QAAO,KACH,sBACI,qBACA,oCAAoC,OAAO,GAAG,gBAAgB,aAAa,MAAM,yCAAyC,cAAc,MAAM,uEAC9I,YAAY,MAAM,EAClB;GAAE,KAAK,OAAO;GAAmB,OAAO,OAAO;GAAqB,EACpE,OAAO,GACV,CACJ;;AAIT,QAAO;;;;;;;;AASX,MAAM,6BAA6B,SAAiB;CAChD,MAAM,kBAAkB,wBAAwB,KAAK,CAAC;AACtD,KAAI,gBAAgB,SAAS,EACzB;CAEJ,MAAM,eAAe,CAAC,GAAG,gBAAgB,MAAM,CAAC,CAAC,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC,KAAK,IAAI;AAC9F,KAAI,CAAC,aACD;CAEJ,MAAM,QAAQ,KAAK,MAAM,KAAK;CAC9B,IAAI,SAAS;CACb,MAAM,UAAU,IAAI,OAAO,OAAO,aAAa,SAAS,IAAI;AAC5D,MAAK,MAAM,QAAQ,OAAO;AACtB,OAAK,MAAM,SAAS,KAAK,SAAS,QAAQ,EAAE;GACxC,MAAM,MAAM,MAAM,SAAS;AAC3B,OAAI,MAAM,EACN,QAAO;IAAE,OAAO,SAAS;IAAK,OAAO,MAAM;IAAI;;AAGvD,YAAU,KAAK,SAAS;;;AAIhC,MAAM,6BAA6B,YAAkD;CACjF,MAAM,SAA4B,EAAE;AAEpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;AACpG,MAAI,CAAC,YACD;EAEJ,MAAM,UAAU,0BAA0B,YAAY;AACtD,MAAI,CAAC,QACD;EAEJ,MAAM,kBAAkB,OAAO,mBAAmB,QAAQ;EAC1D,MAAM,gBAAgB,kBAAkB,QAAQ,MAAM,SAAS;AAC/D,SAAO,KACH,wBACI,SACA,sBACA,wCAAwC,OAAO,GAAG,MAAM,QAAQ,MAAM,gCACtE,GAAG,QAAQ,MAAM,IACjB,iBACA,eACA,OAAO,GACV,CACJ;;AAGL,QAAO;;;;;;;;;AAUX,MAAM,yCAAyC,YAAkD;CAC7F,MAAM,SAA4B,EAAE;CACpC,MAAM,gBAAgB;AAEtB,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,OAAO,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;AAC7F,MAAI,CAAC,KACD;AAGJ,OAAK,MAAM,KAAK,KAAK,SAAS,cAAc,EAAE;GAC1C,MAAM,SAAS,GAAG,EAAE,GAAG,MAAM,EAAE;GAC/B,MAAM,MAAM,EAAE,SAAS;AACvB,OAAI,OAAO,GAEP;QAAI,CADU,KAAK,MAAM,KAAK,KAAK,IAAI,KAAK,QAAQ,MAAM,OAAO,SAAS,GAAG,CAAC,CACnE,SAAS,IAAI,EAAE;KACtB,MAAM,kBAAkB,OAAO,mBAAmB;KAClD,MAAM,gBAAgB,kBAAkB,OAAO;AAC/C,YAAO,KACH,wBACI,SACA,oCACA,0DAA0D,OAAO,GAAG,MAAM,OAAO,IACjF,QACA,iBACA,eACA,OAAO,GACV,CACJ;;;;;AAMjB,QAAO;;AAGX,MAAM,gBAAkC;CACpC;EAAE,IAAI;EAAyB,KAAK;EAAsB,MAAM;EAAyB;CACzF;EAAE,IAAI;EAAoB,KAAK;EAAwB,MAAM;EAAoB;CACjF;EAAE,IAAI;EAAqB,KAAK;EAA2B,MAAM;EAAqB;CACtF;EAAE,IAAI;EAAyB,KAAK;EAA8B,MAAM;EAAyB;CACjG;EAAE,IAAI;EAAa,KAAK;EAAkB,MAAM;EAAa;CAC7D;EAAE,IAAI;EAAgB,KAAK;EAAsB,MAAM;EAAgB;CACvE;EAAE,IAAI;EAAe,KAAK;EAAqB,MAAM;EAAe;CACpE;EAAE,IAAI;EAAkB,KAAK;EAAuB,MAAM;EAAkB;CAC5E;EAAE,IAAI;EAAe,KAAK;EAAoB,MAAM;EAAe;CACnE;EAAE,IAAI;EAAoB,KAAK;EAAyB,MAAM;EAAoB;CAClF;EAAE,IAAI;EAAqB,KAAK;EAA0B,MAAM;EAAqB;CACrF;EAAE,IAAI;EAAmB,KAAK;EAAuC,MAAM;EAAmB;CAC9F;EAAE,IAAI;EAAY,KAAK;EAAiB,MAAM;EAAY;CAC1D;EAAE,IAAI;EAAoB,KAAK;EAAyB,MAAM;EAAoB;CAClF;EAAE,IAAI;EAAqB,KAAK;EAA0B,MAAM;EAAqB;CACrF;EAAE,IAAI;EAAsB,KAAK;EAA2B,MAAM;EAAsB;CACxF;EACI,IAAI;EACJ,KAAK;EACL,MAAM;EACT;CACJ"}
|
|
1
|
+
{"version":3,"file":"index.js","names":["rawStart","matchText"],"sources":["../src/constants.ts","../.generated/prompts.ts","../src/prompts.ts","../src/textUtils.ts","../src/fix.ts","../src/validation.ts"],"sourcesContent":["/**\n * Supported marker types for segments.\n */\nexport enum Markers {\n /** B - Book reference */\n Book = 'B',\n /** F - Footnote reference */\n Footnote = 'F',\n /** T - Heading reference */\n Heading = 'T',\n /** C - Chapter reference */\n Chapter = 'C',\n /** N - Note reference */\n Note = 'N',\n /** P - Translation/Plain segment */\n Plain = 'P',\n}\n\n/**\n * Regex parts for building translation marker patterns.\n */\nexport const TRANSLATION_MARKER_PARTS = {\n /** Dash variations (hyphen, en dash, em dash) */\n dashes: '[-–—]',\n /** Numeric portion of the reference */\n digits: '\\\\d+',\n /** Valid marker prefixes (Book, Chapter, Footnote, Translation, Page) */\n markers: `[${Markers.Book}${Markers.Chapter}${Markers.Footnote}${Markers.Heading}${Markers.Plain}${Markers.Note}]`,\n /** Optional whitespace before dash */\n optionalSpace: '\\\\s?',\n /** Valid single-letter suffixes */\n suffix: '[a-z]',\n} as const;\n\n/**\n * Pattern for a segment ID (e.g., P1234, B45a).\n */\nexport const MARKER_ID_PATTERN = `${TRANSLATION_MARKER_PARTS.markers}${TRANSLATION_MARKER_PARTS.digits}${TRANSLATION_MARKER_PARTS.suffix}?`;\n\n/**\n * English tokens that indicate archaic/Biblical register and should be flagged.\n */\nexport const ARCHAIC_WORDS = [\n 'thee',\n 'thou',\n 'thine',\n 'thy',\n 'verily',\n 'shalt',\n 'hast',\n 'whence',\n 'henceforth',\n 'saith',\n 'behold',\n] as const;\n\nexport const MAX_EMPTY_PARENTHESES = 3;\nexport const MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK = 50;\nexport const MIN_TRANSLATION_RATIO = 0.25;\n\nexport const COLON_PATTERN = /[::]/g;\n\n/**\n * Heuristic pattern for inferring speaker labels in English translations.\n * Matches 1-3 capitalized words ending with a colon (e.g., \"Questioner:\", \"The Shaykh:\").\n */\nexport const SPEAKER_LABEL_GUESS_PATTERN =\n /(?:^|\\n|\\s)([A-Z][\\p{L}'ʿʾāīūḥṣḍṭẓ-]*(?:\\s+[A-Z][\\p{L}'ʿʾāīūḥṣḍṭẓ-]*){0,2})\\s*:/gu;\n\nexport const DEFAULT_LEADING_PUNCTUATION = ['.', '?', '!', '…', '،', '؛', ':', ':', '-', '–', '—'];\n","// AUTO-GENERATED FILE - DO NOT EDIT\n// Generated from prompts/*.md by scripts/generate-prompts.ts\n\n// =============================================================================\n// PROMPT TYPE\n// =============================================================================\n\nexport type PromptId = 'master_prompt' | 'encyclopedia_mixed' | 'fatawa' | 'fiqh' | 'hadith' | 'jarh_wa_tadil' | 'tafsir' | 'usul_al_fiqh';\n\n// =============================================================================\n// RAW PROMPT CONTENT\n// =============================================================================\n\nexport const MASTER_PROMPT = \"ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS.\\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \\\"translit (English)\\\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\\nARABIC LEAK (Hard ban):\\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \\\"صلى الله عليه وسلم\\\" or \\\"صلى الله عليه وآله وسلم\\\". Always replace any Prophet salutation with ﷺ.\\nWORD CHOICE (Allah vs god):\\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \\\"God\\\" / \\\"god\\\" / \\\"Allāh\\\". (This is the only exception to ALA-LC diacritics.)\\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden: any \\\"God ...\\\" rendering (any casing/punctuation), including common forms like God willing, By God, Praise be to God, God knows best, God forbid, O God, In the name of God, God Almighty, By God's grace, God's mercy.\\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \\\"Allah\\\" (never \\\"God\\\").\\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\\nLOCKED FORMULAE (Do NOT translate):\\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\\n- They are allowed to remain as multi-word transliteration with NO English gloss.\\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\\n- CRITICAL NEGATIONS #7: \\\"NO OPAQUE TRANSLITERATION (Must translate phrases).\\\"\\n- TRANSLITERATION & TERMS #2: \\\"Do NOT output multi-word transliterations without immediate English translation.\\\"\\n- TRANSLITERATION & TERMS: \\\"Do NOT transliterate full sentences/matn/quotes.\\\"\\n- Greetings: al-salāmu ʿalaykum; wa ʿalaykum al-salām\\n- Invocations: in shāʾ Allah; subḥān Allah; al-ḥamdu li-Allah; Allahu akbar; lā ilāha illā Allah ;\\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \\\"peace be upon you\\\", \\\"God willing\\\", \\\"praise be to God\\\", \\\"glory be to God\\\", \\\"Allah is Greatest\\\".\\n- Note: this lock is intentionally narrow. Other phrases (e.g., \\\"Jazāk Allahu khayr\\\") may be translated normally.\\nREGISTER (Modern English):\\n- Use modern academic English.\\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\\nTRANSLITERATION & TERMS:\\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\\n- Book Titles: Transliterate only (do not translate meanings).\\n2. TECHNICAL TERMS: On first occurrence, define: \\\"translit (English)\\\" (e.g., bidʿah (innovation), isnād (chain)).\\n- Do NOT output multi-word transliterations without immediate English translation.\\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \\\"translit (English translation)\\\". Do NOT output Arabic script.\\n- Example Allowed: Allahumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \\\"translit (English)\\\". Prefer full English translation for phrases.\\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \\\"translit (English)\\\".\\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\\n4. PROPER NAMES: Transliterate only (no parentheses).\\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\\n7. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\\nOUTPUT FORMAT: Segment_ID - English translation.\\nCRITICAL: You must use the ASCII hyphen separator \\\" - \\\" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\\nID INTEGRITY (Check First):\\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \\\"ID - ...\\\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \\\"(continued)\\\", \\\"cont.\\\", \\\"part 2\\\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source. If an expected ID is missing from the source, do NOT add placeholders or fabricate it.\\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\\n- ELLIPSIS: If the source contains … or ..., translate it literally as \\\"...\\\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \\\"[INCOMPLETE]\\\".\\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \\\" - \\\" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \\\"ID - \\\" prefix).\\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \\\"Questioner:\\\"/\\\"The Shaykh:\\\"/\\\"السائل:\\\"/\\\"الشيخ:\\\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \\\"P5803c\\\") to label such continuation.\\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \\\"ID - ...\\\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\\nNEGATIVE CONSTRAINTS: Do NOT output \\\"implicit continuation\\\", summaries, or extra paragraphs. Output only the text present in the source segment.\\nExample: P1234 - Translation text... (Correct) vs P1234\\\\nTranslation... (Forbidden).\\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...\";\n\nexport const ENCYCLOPEDIA_MIXED = \"NO MODE TAGS: Do not output any mode labels or bracket tags.\\nSTRUCTURE (Apply First):\\n- LINE BREAKS (CRITICAL): Preserve the source line breaks around speaker turns. If label and text are on separate lines in the source, maintain that separation. Do NOT merge two source lines into one.\\n- EXCEPTION: If the speaker label is the VERY FIRST token after the \\\"ID - \\\" prefix, keep it on the same line. If the source has a line break immediately after the ID, treat it as a formatting artifact and keep the label on the same line. (Correct: P5455 - Questioner: Text...) (Wrong: P5455 \\\\n Questioner: Text...).\\n- INTERNAL Q&A: If segment has multiple turns, preserve the source line breaks between turns. Output Segment ID ONLY ONCE at the start of the first line. Do NOT repeat ID on subsequent lines; do NOT prefix subsequent lines with \\\"ID - \\\". (e.g. P5455 - Questioner: ... \\\\n The Shaykh: ...).\\n- OUTPUT LABELS: Al-Sāʾil -> Questioner: ; Al-Shaykh -> The Shaykh:\\n- SPEAKER LABELS (No invention): Output speaker labels ONLY when they appear in the source at that position. Do NOT add \\\"Questioner:\\\"/\\\"The Shaykh:\\\" to unlabeled text. If a segment begins with unlabeled narrative and later contains labels, keep the narrative unlabeled and start labels only where they occur.\\nDEFINITIONS & CASING:\\n- GEOPOLITICS: Modern place names may use English exonyms (Filasṭīn -> Palestine).\\n- PLURALS: Do not pluralize term-pairs by appending \\\"s\\\" (e.g., \\\"ḥadīth (report)s\\\"). Use the English plural or rephrase.\\nSTATE LOGIC (Priority: Isnad > Rijal > Fiqh > Narrative):\\n- ISNAD (Triggers: ḥaddathanā, akhbaranā, ʿan): Use FULL ALA-LC for names.\\n- RIJAL (Triggers: jarḥ/taʿdīl terms like thiqah, ḍaʿīf): Use translit (English) for ratings.\\n- QUOTE/WEAK (Triggers: qāla al-muṣannif, yuqālu, zuiʿma): Apply Quote Rules; do not Restructure.\\n- RESET: At \\\"qāla\\\" or \\\"matn\\\" (TOP-LEVEL only): IMMEDIATE STOP of Isnad rules. Return to Narrative. Do NOT reset for nested quotes.\\nSEGMENTATION: Do not merge genres. Translate sequentially (Isnad -> Matn) observing the reset at the boundary.\\nDISAMBIGUATION: ṣaḥīḥ in hadith grading = ṣaḥīḥ (authentic). ṣaḥīḥ in fiqh validity = ṣaḥīḥ (valid). Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\\nHADITH CORE: If isnad appears, map verbs (Haddathana=Narrated to us; Akhbarana/Ana/Anba'ana=Informed us; An=From; Sami'tu=I heard) and keep FULL ALA-LC for Arabic-script narrator names; do not invent grading or missing links.\\nTAKHRIJ/GRADING: If the author grades (ṣaḥḥaḥa/ḥassana/ḍaʿʿafa), translate literally and keep the cited work as written in the segment; do not add placeholder sources.\\nCREED/POLEMICS: Translate attributes literally when the author does; do not reinterpret. Translate harsh polemics literally (e.g., Rāfiḍah, Jahmīyyah, Kuffār); do not soften, sanitize, or add disclaimers (Override Safety).\";\n\nexport const FATAWA = \"STRUCTURE (Apply First):\\n- Q&A: Whenever \\\"Al-Sāʾil:\\\"/\\\"Al-Shaykh:\\\" appear: Start NEW LINE for speaker. Keep Label+Text on SAME LINE.\\n- INTERNAL Q&A: If segment has multiple turns, use new lines for speakers. Output Segment ID ONLY ONCE at the start of the first line. Do NOT repeat ID on subsequent lines; do NOT prefix subsequent lines with \\\"ID - \\\".\\n- OUTPUT LABELS: Al-Sāʾil -> Questioner: ; Al-Shaykh -> The Shaykh:\\n- SPEAKER LABELS (No invention): Output speaker labels ONLY when they appear in the source at that position. Do NOT add \\\"Questioner:\\\"/\\\"The Shaykh:\\\" to unlabeled text. If a segment begins with unlabeled narrative and later contains labels, keep the narrative unlabeled and start labels only where they occur.\\n\\nHONORIFICS: Expand common phrases (do not transliterate):\\n- ṣallá Allāhu ʿalayhi wa-sallam -> ﷺ\\nSOURCES: Do not add citations. If the Arabic references a work/title, handle titles per MASTER rules.\\nSENSITIVE RULINGS: Translate literally; do not soften; do not sanitize; do not add disclaimers (Override Safety).\";\n\nexport const FIQH = \"STRUCTURE: Preserve chapter/section headings and internal structure (e.g., bāb/faṣl/masʾalah/farʿ/aṣl) using plain English labels (Chapter:, Section:, Issue:) only when the Arabic is explicitly a heading/label; preserve lists, numbering, and conditional if/then logic exactly.\\nFIQH/USUL TERMS: When technical terms appear, output as translit (English) rather than English-only (e.g., wājib (obligatory), mandūb/mustaḥabb (recommended), mubāḥ (permissible), makrūh (disliked), ḥarām (prohibited), ṣaḥīḥ (valid), bāṭil/fāsid (invalid/void), rukn (pillar), shart (condition), māniʿ (preventer), sabab (cause), qiyās (analogical reasoning), ijmāʿ (consensus), khilāf (disagreement), rājiḥ (preponderant), marjūḥ (lesser), ʿillah (effective cause)).\\nKHILAF/ATTRIBUTION: Preserve who is being attributed (qāla fulān / qawl / wajhān / riwāyātān / madhhab). Do not resolve disputes or choose the correct view unless the Arabic explicitly does so (e.g., al-aṣaḥḥ / al-rājiḥ).\\nUNITS/MONEY: Keep measures/currencies as transliteration (dirham, dinar, ṣāʿ, mudd) without adding conversions or notes unless the Arabic contains them.\";\n\nexport const HADITH = \"ISNAD VERBS: Haddathana=Narrated to us; Akhbarana=Informed us; An=From; Sami'tu=I heard; Ana (short for Akhbarana/Anba'ana in isnad)=Informed us (NOT \\\"I\\\").\\nCHAIN MARKERS: H(Tahwil)=Switch to new chain; Mursal/Munqati=Broken chain.\\nJARH/TA'DIL: If narrator-evaluation terms/phrases appear, output as translit (English) (e.g., fīhi naẓar (he needs to be looked into)); do not replace with only English.\\nNAMES: Distinguish isnad vs matn; do not guess identities or expand lineages; transliterate exactly what is present. Book titles follow master rule.\\nRUMUZ/CODES: If the segment contains book codes (kh/m/d/t/s/q/4), preserve them exactly; do not expand to book names.\";\n\nexport const JARH_WA_TADIL = \"GLOSSARY: When a jarh/ta'dil term/phrase appears, output as translit (English) (e.g., thiqah (trustworthy), ṣadūq (truthful), layyin (soft/lenient), ḍaʿīf (weak), matrūk (abandoned), kadhdhāb (liar), dajjāl (imposter), munkar al-ḥadīth (narrates denounced hadith)).\\nRUMUZ: Preserve book codes in Latin exactly as in the segment (e.g., (kh) (m) (d t q) (4) (a)); do not expand unless the Arabic segment itself expands them.\\nQALA: Translate as \\\"He said:\\\" and start a new line for each new critic.\\nDATES: Use (d. 256 AH) or (born 194 AH).\\nNO HARM: Translate \\\"There is no harm in him\\\"; no notes.\\nPOLEMICS: Harsh terms (e.g., dajjāl, khabīth, rāfiḍī) must be translated literally; do not soften.\";\n\nexport const TAFSIR = \"AYAH CITES: Do not output surah names unless the Arabic includes the name. Use [2:255]. If the segment contains quoted Qur'an text, translate it in braces: {…} [2:255].\\nATTRIBUTES: Translate Allah’s attributes as the author intends; if the author is literal, keep literal (e.g., Hand, Face); do not add metaphorical reinterpretation unless the author does; mirror the author’s theology (Ash'ari vs Salafi) exactly.\\nI'RAB TERMS: Mubtada=Subject; Khabar=Predicate; Fa'il=Agent/Doer; Maf'ul=Object.\\nPROPHET NAMES: Use Arabic equivalents with ALA-LC diacritics (e.g., Mūsá, ʿĪsá, Dāwūd, Yūsuf).\\nPOETRY: Preserve line breaks (one English line per Arabic line); no bullets; prioritize literal structure/grammar over rhyme.\";\n\nexport const USUL_AL_FIQH = \"STRUCTURE: Preserve the argument structure (claims, objections \\\"if it is said...\\\", replies \\\"we say...\\\", evidences, counter-evidences). Preserve explicit labels (faṣl, masʾalah, qāla, qīla, qulna) as plain English equivalents only when the Arabic is explicitly a label.\\nUSUL TERMS: When technical terms appear, output as translit (English) (e.g., ʿāmm (general), khāṣṣ (specific), muṭlaq (absolute), muqayyad (restricted), amr (command), nahy (prohibition), ḥaqīqah (literal), majāz (figurative), mujmal (ambiguous), mubayyan (clarified), naṣṣ (explicit text), ẓāhir (apparent), mafhūm (implication), manṭūq (stated meaning), dalīl (evidence), qiyās (analogical reasoning), ʿillah (effective cause), sabab (cause), shart (condition), māniʿ (preventer), ijmāʿ (consensus), naskh (abrogation)).\\nDISPUTE HANDLING: Do not resolve methodological disputes or harmonize schools unless the Arabic explicitly chooses (e.g., al-rājiḥ / al-aṣaḥḥ / ṣaḥīḥ). Preserve attribution to the madhhab/scholars as written.\\nQUR'AN/HADITH: Keep verse references in the segment’s style; do not invent references. If a hadith isnad appears, follow MASTER isnad/name rules.\";\n\n// =============================================================================\n// PROMPT METADATA\n// =============================================================================\n\nexport const PROMPTS = [\n {\n id: 'master_prompt' as const,\n name: 'Master Prompt',\n content: MASTER_PROMPT,\n },\n {\n id: 'encyclopedia_mixed' as const,\n name: 'Encyclopedia Mixed',\n content: ENCYCLOPEDIA_MIXED,\n },\n {\n id: 'fatawa' as const,\n name: 'Fatawa',\n content: FATAWA,\n },\n {\n id: 'fiqh' as const,\n name: 'Fiqh',\n content: FIQH,\n },\n {\n id: 'hadith' as const,\n name: 'Hadith',\n content: HADITH,\n },\n {\n id: 'jarh_wa_tadil' as const,\n name: 'Jarh Wa Tadil',\n content: JARH_WA_TADIL,\n },\n {\n id: 'tafsir' as const,\n name: 'Tafsir',\n content: TAFSIR,\n },\n {\n id: 'usul_al_fiqh' as const,\n name: 'Usul Al Fiqh',\n content: USUL_AL_FIQH,\n },\n] as const;\n\nexport type PromptMetadata = (typeof PROMPTS)[number];\n","import { MASTER_PROMPT, PROMPTS, type PromptId, type PromptMetadata } from '@generated/prompts';\n\nexport type { PromptId, PromptMetadata };\n\n/**\n * A stacked prompt ready for use with an LLM.\n */\nexport type StackedPrompt = {\n /** Unique identifier */\n id: PromptId;\n /** Human-readable name */\n name: string;\n /** The full prompt content (master + addon if applicable) */\n content: string;\n /** Whether this is the master prompt (not stacked) */\n isMaster: boolean;\n};\n\n/**\n * Stacks a master prompt with a specialized addon prompt.\n *\n * @param master - The master/base prompt\n * @param addon - The specialized addon prompt\n * @returns Combined prompt text\n */\nexport const stackPrompts = (master: string, addon: string): string => {\n if (!master) {\n return addon;\n }\n if (!addon) {\n return master;\n }\n return `${master}\\n${addon}`;\n};\n\n/**\n * Gets all available prompts as stacked prompts (master + addon combined).\n * Master prompt is returned as-is, addon prompts are stacked with master.\n *\n * @returns Array of all stacked prompts\n */\nexport const getPrompts = (): StackedPrompt[] => {\n return PROMPTS.map((prompt) => ({\n content: prompt.id === 'master_prompt' ? prompt.content : stackPrompts(MASTER_PROMPT, prompt.content),\n id: prompt.id,\n isMaster: prompt.id === 'master_prompt',\n name: prompt.name,\n }));\n};\n\n/**\n * Gets a specific prompt by ID (strongly typed).\n * Returns the stacked version (master + addon) for addon prompts.\n *\n * @param id - The prompt ID to retrieve\n * @returns The stacked prompt\n * @throws Error if prompt ID is not found\n */\nexport const getPrompt = (id: PromptId): StackedPrompt => {\n const prompt = PROMPTS.find((p) => p.id === id);\n if (!prompt) {\n throw new Error(`Prompt not found: ${id}`);\n }\n\n return {\n content: prompt.id === 'master_prompt' ? prompt.content : stackPrompts(MASTER_PROMPT, prompt.content),\n id: prompt.id,\n isMaster: prompt.id === 'master_prompt',\n name: prompt.name,\n };\n};\n\n/**\n * Gets the raw stacked prompt text for a specific prompt ID.\n * Convenience method for when you just need the text.\n *\n * @param id - The prompt ID\n * @returns The stacked prompt content string\n */\nexport const getStackedPrompt = (id: PromptId): string => {\n return getPrompt(id).content;\n};\n\n/**\n * Gets the list of available prompt IDs.\n * Useful for UI dropdowns or validation.\n *\n * @returns Array of prompt IDs\n */\nexport const getPromptIds = (): PromptId[] => {\n return PROMPTS.map((p) => p.id);\n};\n\n/**\n * Gets just the master prompt content.\n * Useful when you need to use a custom addon.\n *\n * @returns The master prompt content\n */\nexport const getMasterPrompt = (): string => {\n return MASTER_PROMPT;\n};\n","/**\n * Segment type is shared across the library.\n */\nimport { MARKER_ID_PATTERN, SPEAKER_LABEL_GUESS_PATTERN, TRANSLATION_MARKER_PARTS } from './constants';\nimport type { Segment } from './types';\n\n/**\n * Formats excerpts for an LLM prompt by combining the prompt rules with the segment text.\n * Each segment is formatted as \"ID - Text\" and separated by double newlines.\n *\n * @param segments - Array of segments to format\n * @param prompt - The instruction/system prompt to prepend\n * @returns Combined prompt and formatted text\n */\nexport const formatExcerptsForPrompt = (segments: Segment[], prompt: string) => {\n const formatted = segments.map((e) => `${e.id} - ${e.text}`).join('\\n\\n');\n return [prompt, formatted].join('\\n\\n');\n};\n\n/**\n * Normalize line endings and split merged markers onto separate lines.\n *\n * @example\n * // \"helloP1 - ...\" becomes split onto a new line before \"P1 -\"\n * normalizeTranslationText('helloP1 - x').includes('\\\\nP1 -') === true\n */\nexport const normalizeTranslationText = (content: string) => {\n return normalizeTranslationTextWithMap(content).normalized;\n};\n\nconst normalizeLineEndingsWithMap = (input: string) => {\n let normalized = '';\n const indexMap: number[] = [];\n for (let i = 0; i < input.length; i++) {\n const ch = input[i];\n if (ch === '\\r') {\n if (input[i + 1] === '\\n') {\n normalized += '\\n';\n indexMap.push(i);\n i++;\n continue;\n }\n normalized += '\\n';\n indexMap.push(i);\n continue;\n }\n normalized += ch;\n indexMap.push(i);\n }\n return { indexMap, normalized };\n};\n\nconst insertNewlinesBeforeMergedMarkers = (text: string, map: number[]) => {\n const mergedMarkerNoSpacePattern = new RegExp(\n `([^\\\\s\\\\n])(${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes})`,\n 'g',\n );\n let normalized = '';\n const indexMap: number[] = [];\n let lastIndex = 0;\n for (const match of text.matchAll(mergedMarkerNoSpacePattern)) {\n const matchIndex = match.index ?? 0;\n for (let i = lastIndex; i < matchIndex; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n normalized += match[1];\n indexMap.push(map[matchIndex]);\n normalized += '\\n';\n indexMap.push(map[matchIndex]);\n const marker = match[2];\n for (let j = 0; j < marker.length; j++) {\n normalized += marker[j];\n indexMap.push(map[matchIndex + 1 + j]);\n }\n lastIndex = matchIndex + match[0].length;\n }\n for (let i = lastIndex; i < text.length; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n return { indexMap, normalized };\n};\n\nconst replaceSpaceBeforeMarkerWithNewline = (text: string, map: number[]) => {\n const mergedMarkerWithSpacePattern = new RegExp(\n ` (${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes})`,\n 'g',\n );\n let normalized = '';\n const indexMap: number[] = [];\n let lastIndex = 0;\n for (const match of text.matchAll(mergedMarkerWithSpacePattern)) {\n const matchIndex = match.index ?? 0;\n for (let i = lastIndex; i < matchIndex; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n normalized += '\\n';\n indexMap.push(map[matchIndex]);\n const marker = match[1];\n for (let j = 0; j < marker.length; j++) {\n normalized += marker[j];\n indexMap.push(map[matchIndex + 1 + j]);\n }\n lastIndex = matchIndex + match[0].length;\n }\n for (let i = lastIndex; i < text.length; i++) {\n normalized += text[i];\n indexMap.push(map[i]);\n }\n return { indexMap, normalized };\n};\n\nconst removeEscapedBrackets = (text: string, map: number[]) => {\n let normalized = '';\n const indexMap: number[] = [];\n for (let i = 0; i < text.length; i++) {\n if (text[i] === '\\\\' && text[i + 1] === '[') {\n i++;\n normalized += '[';\n indexMap.push(map[i]);\n continue;\n }\n normalized += text[i];\n indexMap.push(map[i]);\n }\n return { indexMap, normalized };\n};\n\nexport const normalizeTranslationTextWithMap = (content: string) => {\n const lineEndingNormalized = normalizeLineEndingsWithMap(content);\n const insertedNewlines = insertNewlinesBeforeMergedMarkers(\n lineEndingNormalized.normalized,\n lineEndingNormalized.indexMap,\n );\n const spaceReplaced = replaceSpaceBeforeMarkerWithNewline(insertedNewlines.normalized, insertedNewlines.indexMap);\n return removeEscapedBrackets(spaceReplaced.normalized, spaceReplaced.indexMap);\n};\n\n/**\n * Extract translation IDs from normalized response, in order.\n *\n * @example\n * extractTranslationIds('P1 - a\\\\nP2b - b') // => ['P1', 'P2b']\n */\nexport const extractTranslationIds = (text: string) => {\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const pattern = new RegExp(`^(${MARKER_ID_PATTERN})${optionalSpace}${dashes}`, 'gm');\n const ids: string[] = [];\n for (const match of text.matchAll(pattern)) {\n ids.push(match[1]);\n }\n return ids;\n};\n\n/**\n * Parses bulk translation text into a Map for efficient O(1) lookup.\n *\n * Handles multi-line translations: subsequent non-marker lines belong to the previous ID.\n *\n * @param rawText - Raw text containing translations in format \"ID - Translation text\"\n * @returns An object with `count` and `translationMap`\n *\n * @example\n * parseTranslations('P1 - a\\\\nP2 - b').count === 2\n */\nexport const parseTranslations = (rawText: string) => {\n const normalized = normalizeTranslationText(rawText);\n const translationMap = splitResponseById(normalized);\n return { count: translationMap.size, translationMap };\n};\n\n/**\n * Parse translations into an ordered array (preserving the original response order).\n *\n * This differs from `parseTranslations()` which returns a Map and therefore cannot represent\n * duplicates as separate entries.\n *\n * @param rawText - Raw text containing translations in format \"ID - Translation text\"\n * @returns Array of `{ id, translation }` entries in appearance order\n *\n * @example\n * parseTranslationsInOrder('P1 - a\\\\nP2 - b').map((e) => e.id) // => ['P1', 'P2']\n */\nexport const parseTranslationsInOrder = (rawText: string) => {\n const normalized = normalizeTranslationText(rawText);\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const headerPattern = new RegExp(`^(${MARKER_ID_PATTERN})${optionalSpace}${dashes}\\\\s*`, 'gm');\n const matches = [...normalized.matchAll(headerPattern)];\n\n const entries: Array<{ id: string; translation: string }> = [];\n for (let i = 0; i < matches.length; i++) {\n const id = matches[i][1];\n const start = matches[i].index ?? 0;\n const nextStart = i + 1 < matches.length ? (matches[i + 1].index ?? normalized.length) : normalized.length;\n const chunk = normalized.slice(start, nextStart).trimEnd();\n const prefixPattern = new RegExp(`^${id}${optionalSpace}${dashes}\\\\s*`);\n const translation = chunk.replace(prefixPattern, '').trim();\n entries.push({ id, translation });\n }\n return entries;\n};\n\n/**\n * Split the response into a per-ID map. Values contain translation content only (prefix removed).\n *\n * @example\n * splitResponseById('P1 - a\\\\nP2 - b').get('P1') === 'a'\n */\nexport const splitResponseById = (text: string) => {\n const map = new Map<string, string>();\n for (const entry of parseTranslationsInOrder(text)) {\n map.set(entry.id, entry.translation);\n }\n return map;\n};\n\n/**\n * Build a regex alternation for speaker labels (with trailing colon).\n *\n * @param labels - Speaker labels without trailing colons\n * @returns Regex alternation string (no flags)\n */\nexport const buildSpeakerLabelPattern = (labels: string[]) => {\n const parts = labels.map((label) => `${escapeRegExp(label)}\\\\s*:`).join('|');\n return `(?:${parts})`;\n};\n\n/**\n * Build a regex alternation for punctuation tokens.\n *\n * @param punctuation - Punctuation tokens to include\n * @returns Regex alternation string (no flags)\n */\nexport const buildPunctuationPattern = (punctuation: string[]) =>\n punctuation.map((token) => escapeRegExp(token)).join('|');\n\n/**\n * Build a line-start regex for speaker labels, accounting for optional \"ID - \" prefix.\n *\n * @param labels - Speaker labels without trailing colons\n * @returns RegExp that matches line-start labels (with optional ID prefix)\n */\nexport const buildLineStartLabelPattern = (labels: string[]) => {\n const labelPattern = buildSpeakerLabelPattern(labels);\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n return new RegExp(`^(?:${MARKER_ID_PATTERN}${optionalSpace}${dashes}\\\\s*)?(${labelPattern})`);\n};\n\n/**\n * Infer repeated speaker labels from the text (only labels that appear 2+ times).\n *\n * @param text - Full translation text\n * @returns Labels ordered by first appearance\n */\nexport const inferSpeakerLabels = (text: string) => {\n const counts = new Map<string, { count: number; firstIndex: number }>();\n\n for (const match of text.matchAll(SPEAKER_LABEL_GUESS_PATTERN)) {\n const label = match[1];\n if (!label) {\n continue;\n }\n const entry = counts.get(label);\n if (entry) {\n entry.count += 1;\n } else {\n counts.set(label, { count: 1, firstIndex: match.index ?? 0 });\n }\n }\n\n return [...counts.entries()]\n .filter(([, info]) => info.count >= 2)\n .sort((a, b) => a[1].firstIndex - b[1].firstIndex)\n .map(([label]) => label);\n};\n\n/**\n * Escape special regex characters in a literal string.\n *\n * @param s - Raw string to escape\n * @returns Escaped string safe for RegExp construction\n */\nexport const escapeRegExp = (s: string) => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n","import { DEFAULT_LEADING_PUNCTUATION } from './constants';\nimport {\n buildLineStartLabelPattern,\n buildPunctuationPattern,\n buildSpeakerLabelPattern,\n inferSpeakerLabels,\n} from './textUtils';\nimport type { FixAllOptions, FixConfig, FixResult, ValidationErrorType } from './types';\n\n/**\n * Fixes collapsed speaker lines by inserting newlines before mid-line labels.\n */\nexport const fixCollapsedSpeakerLines = (text: string, config?: FixConfig): FixResult => {\n const speakerLabels = config?.speakerLabels?.length ? config.speakerLabels : inferSpeakerLabels(text);\n if (speakerLabels.length === 0) {\n return {\n applied: [],\n counts: { fixCollapsedSpeakerLines: 0 },\n text,\n };\n }\n const leadingPunctuation = config?.leadingPunctuation ?? DEFAULT_LEADING_PUNCTUATION;\n const labelPattern = buildSpeakerLabelPattern(speakerLabels);\n let count = 0;\n const punctuationPattern = buildPunctuationPattern(leadingPunctuation);\n const trailingPunctPattern = punctuationPattern ? new RegExp(`(?:${punctuationPattern})+\\\\s*$`) : undefined;\n const lineStartPattern = buildLineStartLabelPattern(speakerLabels);\n const labelRegex = new RegExp(labelPattern, 'g');\n\n /**\n * Format a prefix + label insertion, preserving trailing punctuation.\n */\n const formatInsertion = (prefix: string, matchText: string) => {\n if (!trailingPunctPattern) {\n return `${prefix.replace(/\\s+$/, '')}\\n${matchText}`;\n }\n const punctMatch = prefix.match(trailingPunctPattern);\n if (!punctMatch) {\n return `${prefix.replace(/\\s+$/, '')}\\n${matchText}`;\n }\n const punct = punctMatch[0].replace(/\\s+$/, '');\n const beforePunct = prefix.slice(0, -punctMatch[0].length);\n return `${beforePunct}${punct}\\n${matchText}`;\n };\n\n /**\n * Apply collapsed-speaker fixes within a single line.\n */\n const updateLine = (line: string) => {\n const startMatch = line.match(lineStartPattern);\n const lineStartLabelIndex = startMatch ? startMatch[0].length - startMatch[1].length : -1;\n let lastIndex = 0;\n let updated = '';\n let lineCount = 0;\n\n for (const match of line.matchAll(labelRegex)) {\n const idx = match.index ?? 0;\n if (idx === lineStartLabelIndex || idx === 0) {\n continue;\n }\n const prefix = line.slice(lastIndex, idx);\n updated += formatInsertion(prefix, match[0]);\n lastIndex = idx + match[0].length;\n lineCount += 1;\n }\n\n if (lineCount === 0) {\n return { line, lineCount };\n }\n return { line: `${updated}${line.slice(lastIndex)}`, lineCount };\n };\n\n const fixed = text\n .split('\\n')\n .map((line) => {\n const result = updateLine(line);\n count += result.lineCount;\n return result.line;\n })\n .join('\\n');\n return {\n applied: count > 0 ? ['fixCollapsedSpeakerLines'] : [],\n counts: { fixCollapsedSpeakerLines: count },\n text: fixed,\n };\n};\n\nconst FIXERS_BY_TYPE: Partial<Record<ValidationErrorType, (text: string, config?: FixConfig) => FixResult>> = {\n collapsed_speakers: fixCollapsedSpeakerLines,\n mismatched_colons: fixCollapsedSpeakerLines,\n};\n\n/**\n * Apply all fixers requested by type, in order.\n */\nexport const fixAll = (text: string, options: FixAllOptions): FixResult => {\n const requested = options.types;\n const applied: string[] = [];\n const skipped: string[] = [];\n let currentText = text;\n const counts: Record<string, number> = {};\n\n for (const type of requested) {\n const fixer = FIXERS_BY_TYPE[type];\n if (!fixer) {\n skipped.push(type);\n continue;\n }\n const result = fixer(currentText, options.config);\n currentText = result.text;\n applied.push(type);\n for (const [key, value] of Object.entries(result.counts)) {\n counts[key] = (counts[key] ?? 0) + value;\n }\n }\n\n return {\n applied,\n counts,\n requested,\n skipped,\n text: currentText,\n };\n};\n","import {\n ARCHAIC_WORDS,\n MARKER_ID_PATTERN,\n MAX_EMPTY_PARENTHESES,\n MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK,\n MIN_TRANSLATION_RATIO,\n TRANSLATION_MARKER_PARTS,\n} from './constants';\nimport { escapeRegExp, normalizeTranslationTextWithMap } from './textUtils';\nimport type {\n Range,\n Segment,\n TranslationMarker,\n ValidationConfig,\n ValidationContext,\n ValidationError,\n ValidationErrorType,\n ValidationResponseResult,\n ValidationRule,\n} from './types';\n\n/**\n * Human-readable descriptions for each `ValidationErrorType`, intended for client UIs and logs.\n *\n * @example\n * VALIDATION_ERROR_TYPE_INFO.arabic_leak.description\n */\nexport const VALIDATION_ERROR_TYPE_INFO = {\n all_caps: {\n description: 'ALL CAPS “shouting” detected (run of N uppercase words).',\n },\n arabic_leak: {\n description: 'Arabic script was detected in output (except ﷺ).',\n },\n archaic_register: {\n description: 'Archaic/Biblical English detected (e.g., thou, verily, shalt).',\n },\n collapsed_speakers: {\n description: 'Speaker labels appear mid-line instead of starting on a new line.',\n },\n duplicate_id: {\n description: 'The same segment ID appears more than once in the response.',\n },\n empty_parentheses: {\n description: 'Excessive \"()\" patterns detected, often indicating failed/empty term-pairs.',\n },\n god_usage: {\n description: 'Forbidden \"God\" usage detected where \"Allah\" should be used.',\n },\n implicit_continuation: {\n description: 'The response includes continuation/meta phrasing (e.g., \"continued:\", \"implicit continuation\").',\n },\n invalid_marker_format: {\n description: 'A segment marker line is malformed (e.g., wrong ID shape or missing content after the dash).',\n },\n invented_id: {\n description: 'The response contains a segment ID that does not exist in the provided source corpus.',\n },\n length_mismatch: {\n description: 'Translation appears too short relative to Arabic source (heuristic truncation check).',\n },\n meta_talk: {\n description: 'The response includes translator/editor notes instead of pure translation.',\n },\n mismatched_colons: {\n description:\n 'Per-segment mismatch between Arabic and translation line-start speaker labels (detected as line-start prefixes ending in \":\").',\n },\n missing_id_gap: {\n description:\n 'A gap was detected: the response includes two IDs whose corpus order implies one or more intermediate IDs are missing.',\n },\n multiword_translit_without_gloss: {\n description: 'A multi-word transliteration phrase was detected without an immediate parenthetical gloss.',\n },\n newline_after_id: {\n description: 'The response used \"ID -\\\\nText\" instead of \"ID - Text\" (newline immediately after the marker).',\n },\n no_valid_markers: {\n description: 'No valid \"ID - ...\" markers were found anywhere in the response.',\n },\n truncated_segment: {\n description: 'A segment appears truncated (e.g., only \"…\", \"...\", or \"[INCOMPLETE]\").',\n },\n wrong_diacritics: {\n description: 'Wrong diacritics like â/ã/á were detected (should use macrons like ā ī ū).',\n },\n} as const satisfies Record<ValidationErrorType, { description: string }>;\n\nconst buildWordPattern = (words: readonly string[], flags = 'gi') =>\n new RegExp(`\\\\b(?:${words.map((w) => escapeRegExp(w)).join('|')})\\\\b`, flags);\n\nconst trimRange = (text: string, start: number, end: number) => {\n let s = start;\n let e = end;\n while (s < e && /\\s/.test(text[s])) {\n s++;\n }\n while (e > s && /\\s/.test(text[e - 1])) {\n e--;\n }\n return { end: e, start: s };\n};\n\nconst toRawRange = (normalizedStart: number, normalizedEnd: number, indexMap: number[], rawLength: number): Range => {\n if (normalizedEnd <= normalizedStart) {\n const rawStart = indexMap[normalizedStart] ?? 0;\n return { end: rawStart, start: rawStart };\n }\n const rawStart = indexMap[normalizedStart] ?? 0;\n const rawEndBase = indexMap[Math.max(normalizedEnd - 1, normalizedStart)] ?? rawStart;\n const rawEnd = Math.min(rawLength, rawEndBase + 1);\n return { end: rawEnd, start: rawStart };\n};\n\nconst buildMarkers = (normalized: string, indexMap: number[], rawLength: number): TranslationMarker[] => {\n const { dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const headerPattern = new RegExp(`^(${MARKER_ID_PATTERN})${optionalSpace}${dashes}\\\\s*`, 'gm');\n const matches = [...normalized.matchAll(headerPattern)];\n const markers: TranslationMarker[] = [];\n\n for (let i = 0; i < matches.length; i++) {\n const match = matches[i];\n const id = match[1];\n const normalizedStart = match.index ?? 0;\n const normalizedEnd = normalizedStart + match[0].length;\n const nextStart = i + 1 < matches.length ? (matches[i + 1].index ?? normalized.length) : normalized.length;\n const translationRange = trimRange(normalized, normalizedEnd, nextStart);\n const headerRange = toRawRange(normalizedStart, normalizedEnd, indexMap, rawLength);\n const translationRawRange = toRawRange(translationRange.start, translationRange.end, indexMap, rawLength);\n\n markers.push({\n headerText: match[0],\n id,\n normalizedEnd,\n normalizedStart,\n rawEnd: headerRange.end,\n rawStart: headerRange.start,\n rawTranslationEnd: translationRawRange.end,\n rawTranslationStart: translationRawRange.start,\n translationEnd: translationRange.end,\n translationStart: translationRange.start,\n });\n }\n\n return markers;\n};\n\nconst buildResponseById = (markers: TranslationMarker[], normalized: string) => {\n const responseById = new Map<string, string>();\n for (const marker of markers) {\n const translationText = normalized.slice(marker.translationStart, marker.translationEnd).trim();\n responseById.set(marker.id, translationText);\n }\n return responseById;\n};\n\nconst DEFAULT_VALIDATION_CONFIG: ValidationConfig = {\n allCapsWordRunThreshold: 5,\n};\n\nconst buildValidationContext = (\n segments: Segment[],\n rawResponse: string,\n config: ValidationConfig,\n): ValidationContext => {\n const { normalized, indexMap } = normalizeTranslationTextWithMap(rawResponse);\n const markers = buildMarkers(normalized, indexMap, rawResponse.length);\n const parsedIds = markers.map((m) => m.id);\n const segmentById = new Map<string, Segment>();\n for (const s of segments) {\n segmentById.set(s.id, s);\n }\n const responseById = buildResponseById(markers, normalized);\n return {\n config,\n indexMap,\n markers,\n normalizedResponse: normalized,\n parsedIds,\n rawResponse,\n responseById,\n segmentById,\n segments,\n };\n};\n\nconst makeErrorFromNormalized = (\n context: ValidationContext,\n type: ValidationErrorType,\n message: string,\n matchText: string,\n normalizedStart: number,\n normalizedEnd: number,\n id?: string,\n): ValidationError => {\n let resolvedId = id;\n if (!resolvedId) {\n // Try to find which marker contains this error range\n for (const marker of context.markers) {\n // Check if error falls within the translation content of a marker\n // We use loose bounds to catch errors at boundaries\n if (normalizedStart >= marker.translationStart && normalizedEnd <= marker.translationEnd) {\n resolvedId = marker.id;\n break;\n }\n }\n }\n\n return {\n id: resolvedId,\n matchText,\n message,\n range: toRawRange(normalizedStart, normalizedEnd, context.indexMap, context.rawResponse.length),\n type,\n };\n};\n\nconst makeErrorFromRawRange = (\n type: ValidationErrorType,\n message: string,\n matchText: string,\n range: Range,\n id?: string,\n): ValidationError => ({\n id,\n matchText,\n message,\n range,\n type,\n});\n\n/**\n * Validate an LLM translation response against a set of Arabic source segments.\n *\n * Rules are expressed as a list of typed errors. The caller decides severity.\n * The validator normalizes the response first (marker splitting + line endings).\n *\n * Important: `segments` may be the full corpus. The validator reduces to only\n * those IDs parsed from the response (plus detects missing-ID gaps between IDs).\n *\n * @example\n * // Pass (no errors)\n * validateTranslationResponse(\n * [{ id: 'P1', text: 'نص عربي طويل...' }],\n * 'P1 - A complete translation.'\n * ).errors.length === 0\n *\n * @example\n * // Fail (invented ID)\n * validateTranslationResponse(\n * [{ id: 'P1', text: 'نص عربي طويل...' }],\n * 'P2 - This ID is not in the corpus.'\n * ).errors.some(e => e.type === 'invented_id') === true\n */\nexport const validateTranslationResponse = (\n segments: Segment[],\n response: string,\n options?: { rules?: ValidationRule[]; config?: Partial<ValidationConfig> },\n): ValidationResponseResult => {\n const config = { ...DEFAULT_VALIDATION_CONFIG, ...options?.config };\n const context = buildValidationContext(segments, response, config);\n if (context.parsedIds.length === 0) {\n return {\n errors: [\n {\n matchText: response,\n message: 'No valid translation markers found',\n range: { end: response.length, start: 0 },\n ruleId: 'no_valid_markers',\n type: 'no_valid_markers',\n },\n ],\n normalizedResponse: context.normalizedResponse,\n parsedIds: [],\n };\n }\n\n const rules = options?.rules ?? DEFAULT_RULES;\n const errors = rules.flatMap((rule) => rule.run(context).map((e) => ({ ...e, ruleId: e.ruleId ?? rule.id })));\n\n return { errors, normalizedResponse: context.normalizedResponse, parsedIds: context.parsedIds };\n};\n\n/**\n * Validate translation marker format (single-line errors).\n *\n * @example\n * // Fail: malformed marker\n * validateMarkerFormat('B1234$5 - x')[0]?.type === 'invalid_marker_format'\n */\nconst validateMarkerFormat = (context: ValidationContext): ValidationError[] => {\n const text = context.normalizedResponse;\n const { markers, digits, suffix, dashes, optionalSpace } = TRANSLATION_MARKER_PARTS;\n const errors: ValidationError[] = [];\n\n const invalidRefPattern = new RegExp(\n `^${markers}(?=${digits})(?=.*${dashes})(?!${digits}${suffix}*${optionalSpace}${dashes})[^\\\\s-–—]+${optionalSpace}${dashes}`,\n 'gm',\n );\n for (const match of text.matchAll(invalidRefPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Invalid reference format \"${matchText.trim()}\" - expected format is letter + numbers + optional suffix (a-j) + dash`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const spaceBeforePattern = new RegExp(` ${markers}${digits}${suffix}+${optionalSpace}${dashes}`, 'gm');\n for (const match of text.matchAll(spaceBeforePattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Suspicious reference found: \"${matchText}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const suffixNoDashPattern = new RegExp(`^${markers}${digits}${suffix}(?! ${dashes})`, 'gm');\n for (const match of text.matchAll(suffixNoDashPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Suspicious reference found: \"${matchText}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const emptyAfterDashPattern = new RegExp(`^${MARKER_ID_PATTERN}${optionalSpace}${dashes}\\\\s*$`, 'gm');\n for (const match of text.matchAll(emptyAfterDashPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Reference \"${matchText.trim()}\" has dash but no content after it`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n const dollarSignPattern = new RegExp(`^${markers}${digits}\\\\$${digits}`, 'gm');\n for (const match of text.matchAll(dollarSignPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'invalid_marker_format',\n `Invalid reference format \"${matchText}\" - contains $ character`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n\n return errors;\n};\n\n/**\n * Detect newline after an ID line (formatting bug).\n *\n * @example\n * // Fail: newline after \"P1 -\"\n * validateNewlineAfterId('P1 -\\\\nText')[0]?.type === 'newline_after_id'\n */\nconst validateNewlineAfterId = (context: ValidationContext): ValidationError[] => {\n const pattern = new RegExp(\n `^${MARKER_ID_PATTERN}${TRANSLATION_MARKER_PARTS.optionalSpace}${TRANSLATION_MARKER_PARTS.dashes}\\\\s*\\\\n`,\n 'gm',\n );\n const errors: ValidationError[] = [];\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'newline_after_id',\n `Invalid format: newline after ID \"${matchText.trim()}\" - use \"ID - Text\" format`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect duplicated IDs in the parsed ID list.\n *\n * @example\n * validateDuplicateIds(['P1','P1'])[0]?.type === 'duplicate_id'\n */\nconst validateDuplicateIds = (context: ValidationContext): ValidationError[] => {\n const seen = new Set<string>();\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n if (seen.has(marker.id)) {\n errors.push(\n makeErrorFromRawRange(\n 'duplicate_id',\n `Duplicate ID \"${marker.id}\" detected - each segment should appear only once`,\n marker.headerText,\n { end: marker.rawEnd, start: marker.rawStart },\n marker.id,\n ),\n );\n } else {\n seen.add(marker.id);\n }\n }\n return errors;\n};\n\n/**\n * Detect IDs in the response that do not exist in the passed segment corpus.\n *\n * @example\n * validateInventedIds(['P1','P2'], new Map([['P1',{id:'P1',text:'x'}]]) )[0]?.type === 'invented_id'\n */\nconst validateInventedIds = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n if (!context.segmentById.has(marker.id)) {\n errors.push(\n makeErrorFromRawRange(\n 'invented_id',\n `Invented ID detected: \"${marker.id}\" - this ID does not exist in the source`,\n marker.headerText,\n { end: marker.rawEnd, start: marker.rawStart },\n marker.id,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect a “gap”: response contains IDs A and C, but the corpus order includes B between them.\n * This only checks for missing IDs between consecutive IDs within each response-ordered block.\n *\n * @example\n * // Corpus: P1, P2, P3. Response: P1, P3 => missing_id_gap includes P2\n */\nconst buildSegmentIndexById = (segments: Segment[]) => {\n const indexById = new Map<string, number>();\n for (let i = 0; i < segments.length; i++) {\n indexById.set(segments[i].id, i);\n }\n return indexById;\n};\n\nconst getGapIndices = (context: ValidationContext, indexById: Map<string, number>, aId: string, bId: string) => {\n if (!context.segmentById.has(aId) || !context.segmentById.has(bId)) {\n return;\n }\n const ia = indexById.get(aId);\n const ib = indexById.get(bId);\n if (ia == null || ib == null || ib < ia) {\n return;\n }\n return { end: ib, start: ia };\n};\n\nconst collectMissingIds = (\n context: ValidationContext,\n startIdx: number,\n endIdx: number,\n parsedIdSet: Set<string>,\n missing: Set<string>,\n) => {\n const found: string[] = [];\n for (let j = startIdx + 1; j < endIdx; j++) {\n const midId = context.segments[j]?.id;\n if (!midId) {\n continue;\n }\n if (!context.segmentById.has(midId) || parsedIdSet.has(midId) || missing.has(midId)) {\n continue;\n }\n missing.add(midId);\n found.push(midId);\n }\n return found;\n};\n\nconst validateMissingIdGaps = (context: ValidationContext): ValidationError[] => {\n const indexById = buildSegmentIndexById(context.segments);\n\n const parsedIdSet = new Set(context.parsedIds);\n const missing = new Set<string>();\n const errors: ValidationError[] = [];\n\n for (let i = 0; i < context.markers.length - 1; i++) {\n const a = context.markers[i];\n const b = context.markers[i + 1];\n const gap = getGapIndices(context, indexById, a.id, b.id);\n if (!gap) {\n continue;\n }\n const newlyMissing = collectMissingIds(context, gap.start, gap.end, parsedIdSet, missing);\n for (const midId of newlyMissing) {\n errors.push(\n makeErrorFromRawRange(\n 'missing_id_gap',\n `Missing segment ID detected between translated IDs: \"${midId}\"`,\n context.rawResponse.slice(b.rawStart, b.rawEnd),\n { end: b.rawEnd, start: b.rawStart },\n midId,\n ),\n );\n }\n }\n\n return errors;\n};\n\n/**\n * Detect segments that appear truncated (just \"…\" / \"...\" / \"[INCOMPLETE]\").\n *\n * @example\n * validateTruncatedSegments('P1 - …')[0]?.type === 'truncated_segment'\n */\nconst validateTruncatedSegments = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n const content = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd).trim();\n const isEllipsis = content === '…' || content === '...';\n const sourceText = context.segmentById.get(marker.id)?.text ?? '';\n const sourceIsEllipsisOnly = /^\\s*(?:…|\\.{3})\\s*$/.test(sourceText);\n if (!content || content === '[INCOMPLETE]' || (isEllipsis && !sourceIsEllipsisOnly)) {\n errors.push(\n makeErrorFromRawRange(\n 'truncated_segment',\n `Truncated segment detected: \"${marker.id}\" - segments must be fully translated`,\n context.rawResponse.slice(marker.rawTranslationStart, marker.rawTranslationEnd),\n { end: marker.rawTranslationEnd, start: marker.rawTranslationStart },\n marker.id,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect implicit continuation markers.\n *\n * @example\n * validateImplicitContinuation('P1 - continued: ...')[0]?.type === 'implicit_continuation'\n */\nconst validateImplicitContinuation = (context: ValidationContext): ValidationError[] => {\n const patterns = [/implicit continuation/gi, /\\bcontinuation:/gi, /\\bcontinued:/gi];\n const errors: ValidationError[] = [];\n for (const pattern of patterns) {\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'implicit_continuation',\n `Detected \"${matchText}\" - do not add implicit continuation text`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect meta-talk (translator/editor notes).\n *\n * @example\n * validateMetaTalk(\"P1 - (Translator's note: ...)\")[0]?.type === 'meta_talk'\n */\nconst validateMetaTalk = (context: ValidationContext): ValidationError[] => {\n const patterns = [/\\(note:/gi, /\\(translator'?s? note:/gi, /\\[editor:/gi, /\\[note:/gi, /\\(ed\\.:/gi, /\\(trans\\.:/gi];\n const errors: ValidationError[] = [];\n for (const pattern of patterns) {\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'meta_talk',\n `Detected meta-talk \"${matchText}\" - output translation only, no translator/editor notes`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect Arabic script characters (except ﷺ).\n *\n * @example\n * validateArabicLeak('P1 - الله')[0]?.type === 'arabic_leak'\n *\n * @example\n * // Pass: ﷺ allowed\n * validateArabicLeak('P1 - Muḥammad ﷺ said...').length === 0\n */\nconst validateArabicLeak = (context: ValidationContext): ValidationError[] => {\n const arabicPattern = /[\\u0600-\\u06FF\\u0750-\\u077F\\uFB50-\\uFDF9\\uFDFB-\\uFDFF\\uFE70-\\uFEFF]+/g;\n const errors: ValidationError[] = [];\n\n for (const marker of context.markers) {\n const text = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n let longestMatch: RegExpMatchArray | undefined;\n for (const match of text.matchAll(arabicPattern)) {\n const matchText = match[0].replace(/ﷺ/g, '').trim();\n if (!matchText) {\n continue;\n }\n if (!longestMatch || matchText.length > longestMatch[0].replace(/ﷺ/g, '').trim().length) {\n longestMatch = match;\n }\n }\n if (!longestMatch) {\n continue;\n }\n const matchText = longestMatch[0].replace(/ﷺ/g, '').trim();\n const idx = longestMatch.index ?? 0;\n const normalizedStart = marker.translationStart + idx;\n const normalizedEnd = normalizedStart + longestMatch[0].length;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'arabic_leak',\n `Arabic script detected: \"${matchText}\"`,\n matchText,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n\n return errors;\n};\n\n/**\n * Detect wrong diacritics (â/ã/á) that indicate failed ALA-LC macrons.\n *\n * @example\n * validateWrongDiacritics('kâfir')[0]?.type === 'wrong_diacritics'\n */\nconst validateWrongDiacritics = (context: ValidationContext): ValidationError[] => {\n const wrongPattern = /[âêîôûãñéíóú]/gi;\n const errors: ValidationError[] = [];\n for (const match of context.normalizedResponse.matchAll(wrongPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'wrong_diacritics',\n `Wrong diacritic \"${matchText}\" detected - use macrons (ā, ī, ū) instead`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect excessive empty parentheses \"()\" which often indicates failed transliterations.\n *\n * @example\n * // Fail: too many \"()\"\n * validateEmptyParentheses('() () () ()')[0]?.type === 'empty_parentheses'\n */\nconst validateEmptyParentheses = (context: ValidationContext): ValidationError[] => {\n const matches = [...context.normalizedResponse.matchAll(/\\(\\)/g)];\n if (matches.length <= MAX_EMPTY_PARENTHESES) {\n return [];\n }\n return matches.map((match) => {\n const idx = match.index ?? 0;\n return makeErrorFromNormalized(\n context,\n 'empty_parentheses',\n `Found ${matches.length} empty parentheses \"()\" - this usually indicates failed transliterations. Please check if the LLM omitted Arabic/transliterated terms.`,\n match[0],\n idx,\n idx + match[0].length,\n );\n });\n};\n\n/**\n * Detect truncated translation vs Arabic source (ratio-based).\n *\n * @example\n * // Fail: long Arabic + very short translation\n * detectTruncatedTranslation('نص عربي طويل ... (50+ chars)', 'Short') !== undefined\n */\nconst detectTruncatedTranslation = (arabicText: string, translationText: string) => {\n const arabic = (arabicText || '').trim();\n const translation = (translationText || '').trim();\n\n if (arabic.length < MIN_ARABIC_LENGTH_FOR_TRUNCATION_CHECK) {\n return;\n }\n if (translation.length === 0) {\n return `Translation appears empty but Arabic text has ${arabic.length} characters`;\n }\n\n const ratio = translation.length / arabic.length;\n if (ratio < MIN_TRANSLATION_RATIO) {\n const expectedMinLength = Math.round(arabic.length * MIN_TRANSLATION_RATIO);\n return `Translation appears truncated: ${translation.length} chars for ${arabic.length} char Arabic text (expected at least ~${expectedMinLength} chars)`;\n }\n};\n\n/**\n * Validate per-ID translation lengths (response subset only).\n *\n * @example\n * // Produces a length_mismatch error for the first truncated segment found\n */\nconst validateTranslationLengthsForResponse = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n for (const marker of context.markers) {\n const seg = context.segmentById.get(marker.id);\n if (!seg) {\n continue;\n }\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd).trim();\n const error = detectTruncatedTranslation(seg.text, translation);\n if (error) {\n errors.push(\n makeErrorFromRawRange(\n 'length_mismatch',\n `Translation for \"${marker.id}\" ${error.replace('Translation ', '').toLowerCase()}`,\n translation,\n { end: marker.rawTranslationEnd, start: marker.rawTranslationStart },\n marker.id,\n ),\n );\n }\n }\n return errors;\n};\n\n/**\n * Detect “shouting” ALL CAPS words.\n *\n * @example\n * validateAllCaps('THIS IS LOUD')[0]?.type === 'all_caps'\n */\nconst validateAllCaps = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n const runThreshold = context.config.allCapsWordRunThreshold;\n const runPattern = new RegExp(`\\\\b(?:[A-Z]{2,}\\\\b\\\\s+){${runThreshold - 1}}[A-Z]{2,}\\\\b`, 'g');\n for (const match of context.normalizedResponse.matchAll(runPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'all_caps',\n `ALL CAPS detected: \"${matchText.trim()}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect archaic/Biblical register tokens.\n *\n * @example\n * validateArchaicRegister('verily thou shalt')[0]?.type === 'archaic_register'\n */\nconst validateArchaicRegister = (context: ValidationContext): ValidationError[] => {\n const pattern = buildWordPattern(ARCHAIC_WORDS);\n const errors: ValidationError[] = [];\n for (const match of context.normalizedResponse.matchAll(pattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'archaic_register',\n `Archaic/Biblical register word detected: \"${matchText}\"`,\n matchText,\n idx,\n idx + matchText.length,\n ),\n );\n }\n return errors;\n};\n\n/**\n * Detect forbidden \"God\" usage when the source Arabic includes الله.\n */\nconst validateGodUsage = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n const godPattern = /\\bGod(?:'s|’s|s)?\\b/g;\n const allahPattern = /الله/;\n\n for (const marker of context.markers) {\n const seg = context.segmentById.get(marker.id);\n if (!seg || !allahPattern.test(seg.text)) {\n continue;\n }\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n for (const match of translation.matchAll(godPattern)) {\n const matchText = match[0];\n const idx = match.index ?? 0;\n const normalizedStart = marker.translationStart + idx;\n const normalizedEnd = normalizedStart + matchText.length;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'god_usage',\n `Forbidden \"God\" usage detected in \"${marker.id}\" - use \"Allah\" when the source contains الله`,\n matchText,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n }\n\n return errors;\n};\n\ntype LineStartLabelCounts = {\n total: number;\n prefixes: Map<string, number>;\n};\n\nconst getLineStartLabelCounts = (text: string): LineStartLabelCounts => {\n const prefixes = new Map<string, number>();\n const lines = text.split(/\\n/);\n const maxPrefixLength = 28;\n const maxWords = 3;\n\n for (const rawLine of lines) {\n const line = rawLine.trimStart();\n if (!line) {\n continue;\n }\n const colonIdx = line.search(/[::]/);\n if (colonIdx <= 0) {\n continue;\n }\n const prefix = line.slice(0, colonIdx).trim();\n if (!prefix || prefix.length > maxPrefixLength) {\n continue;\n }\n const words = prefix.split(/\\s+/);\n if (words.length > maxWords) {\n continue;\n }\n const count = prefixes.get(prefix) ?? 0;\n prefixes.set(prefix, count + 1);\n }\n\n let total = 0;\n for (const count of prefixes.values()) {\n total += count;\n }\n\n return { prefixes, total };\n};\n\n/**\n * Detect per-segment mismatch in colon counts between Arabic segment text and its translation chunk.\n *\n * This is intentionally heuristic and avoids hardcoding speaker label tokens.\n *\n * @example\n * // Arabic: \"الشيخ: ... السائل: ...\" => 2 colons\n * // Translation: \"The Shaykh: ...\" => 1 colon => mismatched_colons\n */\nconst validateMismatchedColons = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n\n for (const marker of context.markers) {\n const seg = context.segmentById.get(marker.id);\n if (!seg) {\n continue;\n }\n\n const arabicLabels = getLineStartLabelCounts(seg.text);\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n const englishLabels = getLineStartLabelCounts(translation);\n\n if (arabicLabels.total !== englishLabels.total && (arabicLabels.total > 0 || englishLabels.total > 0)) {\n errors.push(\n makeErrorFromRawRange(\n 'mismatched_colons',\n `Speaker label count mismatch in \"${marker.id}\": Arabic has ${arabicLabels.total} line-start labels but translation has ${englishLabels.total}. This may indicate dropped/moved speaker turns or formatting drift.`,\n translation.trim(),\n { end: marker.rawTranslationEnd, start: marker.rawTranslationStart },\n marker.id,\n ),\n );\n }\n }\n\n return errors;\n};\n\n/**\n * Detect collapsed speaker labels that appear mid-line instead of at line start.\n *\n * This uses translation line-start labels as the reference set, then flags\n * occurrences of those labels inside the same segment's text.\n */\nconst findCollapsedSpeakerLabel = (text: string) => {\n const lineStartLabels = getLineStartLabelCounts(text).prefixes;\n if (lineStartLabels.size === 0) {\n return;\n }\n const labelPattern = [...lineStartLabels.keys()].map((label) => escapeRegExp(label)).join('|');\n if (!labelPattern) {\n return;\n }\n const lines = text.split('\\n');\n let offset = 0;\n const pattern = new RegExp(`\\\\b(${labelPattern})\\\\s*:`, 'g');\n for (const line of lines) {\n for (const match of line.matchAll(pattern)) {\n const idx = match.index ?? 0;\n if (idx > 0) {\n return { index: offset + idx, label: match[1] };\n }\n }\n offset += line.length + 1;\n }\n};\n\nconst validateCollapsedSpeakers = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n\n for (const marker of context.markers) {\n const translation = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n if (!translation) {\n continue;\n }\n const matched = findCollapsedSpeakerLabel(translation);\n if (!matched) {\n continue;\n }\n const normalizedStart = marker.translationStart + matched.index;\n const normalizedEnd = normalizedStart + matched.label.length + 1;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'collapsed_speakers',\n `Collapsed speaker label detected in \"${marker.id}\": \"${matched.label}:\" should start on a new line`,\n `${matched.label}:`,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n\n return errors;\n};\n\n/**\n * Detect multi-word transliteration patterns without immediate parenthetical gloss.\n *\n * @example\n * // Fail: \"al-hajr fi al-madajīʿ\" without \"(English ...)\" nearby\n * // => multiword_translit_without_gloss\n */\nconst validateMultiwordTranslitWithoutGloss = (context: ValidationContext): ValidationError[] => {\n const errors: ValidationError[] = [];\n const phrasePattern = /\\b(al-[a-zʿʾāīūḥṣḍṭẓ-]+)\\s+fi\\s+(al-[a-zʿʾāīūḥṣḍṭẓ-]+)\\b/gi;\n\n for (const marker of context.markers) {\n const text = context.normalizedResponse.slice(marker.translationStart, marker.translationEnd);\n if (!text) {\n continue;\n }\n\n for (const m of text.matchAll(phrasePattern)) {\n const phrase = `${m[1]} fi ${m[2]}`;\n const idx = m.index ?? -1;\n if (idx >= 0) {\n const after = text.slice(idx, Math.min(text.length, idx + phrase.length + 25));\n if (!after.includes('(')) {\n const normalizedStart = marker.translationStart + idx;\n const normalizedEnd = normalizedStart + phrase.length;\n errors.push(\n makeErrorFromNormalized(\n context,\n 'multiword_translit_without_gloss',\n `Multi-word transliteration without immediate gloss in \"${marker.id}\": \"${phrase}\"`,\n phrase,\n normalizedStart,\n normalizedEnd,\n marker.id,\n ),\n );\n }\n }\n }\n }\n\n return errors;\n};\n\nconst DEFAULT_RULES: ValidationRule[] = [\n { id: 'invalid_marker_format', run: validateMarkerFormat, type: 'invalid_marker_format' },\n { id: 'newline_after_id', run: validateNewlineAfterId, type: 'newline_after_id' },\n { id: 'truncated_segment', run: validateTruncatedSegments, type: 'truncated_segment' },\n { id: 'implicit_continuation', run: validateImplicitContinuation, type: 'implicit_continuation' },\n { id: 'meta_talk', run: validateMetaTalk, type: 'meta_talk' },\n { id: 'duplicate_id', run: validateDuplicateIds, type: 'duplicate_id' },\n { id: 'invented_id', run: validateInventedIds, type: 'invented_id' },\n { id: 'missing_id_gap', run: validateMissingIdGaps, type: 'missing_id_gap' },\n { id: 'arabic_leak', run: validateArabicLeak, type: 'arabic_leak' },\n { id: 'wrong_diacritics', run: validateWrongDiacritics, type: 'wrong_diacritics' },\n { id: 'empty_parentheses', run: validateEmptyParentheses, type: 'empty_parentheses' },\n { id: 'length_mismatch', run: validateTranslationLengthsForResponse, type: 'length_mismatch' },\n { id: 'all_caps', run: validateAllCaps, type: 'all_caps' },\n { id: 'archaic_register', run: validateArchaicRegister, type: 'archaic_register' },\n { id: 'god_usage', run: validateGodUsage, type: 'god_usage' },\n { id: 'mismatched_colons', run: validateMismatchedColons, type: 'mismatched_colons' },\n { id: 'collapsed_speakers', run: validateCollapsedSpeakers, type: 'collapsed_speakers' },\n {\n id: 'multiword_translit_without_gloss',\n run: validateMultiwordTranslitWithoutGloss,\n type: 'multiword_translit_without_gloss',\n },\n];\n"],"mappings":";;;;AAGA,IAAY,4CAAL;;AAEH;;AAEA;;AAEA;;AAEA;;AAEA;;AAEA;;;;;;AAMJ,MAAa,2BAA2B;CAEpC,QAAQ;CAER,QAAQ;CAER,SAAS,IAAI,QAAQ,OAAO,QAAQ,UAAU,QAAQ,WAAW,QAAQ,UAAU,QAAQ,QAAQ,QAAQ,KAAK;CAEhH,eAAe;CAEf,QAAQ;CACX;;;;AAKD,MAAa,oBAAoB,GAAG,yBAAyB,UAAU,yBAAyB,SAAS,yBAAyB,OAAO;;;;AAKzI,MAAa,gBAAgB;CACzB;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACH;AAED,MAAa,wBAAwB;AACrC,MAAa,yCAAyC;AACtD,MAAa,wBAAwB;;;;;AAQrC,MAAa,8BACT;AAEJ,MAAa,8BAA8B;CAAC;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAK;CAAI;;;;ACxDlG,MAAa,gBAAgB;AAE7B,MAAa,qBAAqB;AAElC,MAAa,SAAS;AAEtB,MAAa,OAAO;AAEpB,MAAa,SAAS;AAEtB,MAAa,gBAAgB;AAE7B,MAAa,SAAS;AAEtB,MAAa,eAAe;AAM5B,MAAa,UAAU;CACnB;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACD;EACI,IAAI;EACJ,MAAM;EACN,SAAS;EACZ;CACJ;;;;;;;;;;;ACjDD,MAAa,gBAAgB,QAAgB,UAA0B;AACnE,KAAI,CAAC,OACD,QAAO;AAEX,KAAI,CAAC,MACD,QAAO;AAEX,QAAO,GAAG,OAAO,IAAI;;;;;;;;AASzB,MAAa,mBAAoC;AAC7C,QAAO,QAAQ,KAAK,YAAY;EAC5B,SAAS,OAAO,OAAO,kBAAkB,OAAO,UAAU,aAAa,eAAe,OAAO,QAAQ;EACrG,IAAI,OAAO;EACX,UAAU,OAAO,OAAO;EACxB,MAAM,OAAO;EAChB,EAAE;;;;;;;;;;AAWP,MAAa,aAAa,OAAgC;CACtD,MAAM,SAAS,QAAQ,MAAM,MAAM,EAAE,OAAO,GAAG;AAC/C,KAAI,CAAC,OACD,OAAM,IAAI,MAAM,qBAAqB,KAAK;AAG9C,QAAO;EACH,SAAS,OAAO,OAAO,kBAAkB,OAAO,UAAU,aAAa,eAAe,OAAO,QAAQ;EACrG,IAAI,OAAO;EACX,UAAU,OAAO,OAAO;EACxB,MAAM,OAAO;EAChB;;;;;;;;;AAUL,MAAa,oBAAoB,OAAyB;AACtD,QAAO,UAAU,GAAG,CAAC;;;;;;;;AASzB,MAAa,qBAAiC;AAC1C,QAAO,QAAQ,KAAK,MAAM,EAAE,GAAG;;;;;;;;AASnC,MAAa,wBAAgC;AACzC,QAAO;;;;;;;;;;;;;;;;ACtFX,MAAa,2BAA2B,UAAqB,WAAmB;AAE5E,QAAO,CAAC,QADU,SAAS,KAAK,MAAM,GAAG,EAAE,GAAG,KAAK,EAAE,OAAO,CAAC,KAAK,OAAO,CAC/C,CAAC,KAAK,OAAO;;;;;;;;;AAU3C,MAAa,4BAA4B,YAAoB;AACzD,QAAO,gCAAgC,QAAQ,CAAC;;AAGpD,MAAM,+BAA+B,UAAkB;CACnD,IAAI,aAAa;CACjB,MAAM,WAAqB,EAAE;AAC7B,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACnC,MAAM,KAAK,MAAM;AACjB,MAAI,OAAO,MAAM;AACb,OAAI,MAAM,IAAI,OAAO,MAAM;AACvB,kBAAc;AACd,aAAS,KAAK,EAAE;AAChB;AACA;;AAEJ,iBAAc;AACd,YAAS,KAAK,EAAE;AAChB;;AAEJ,gBAAc;AACd,WAAS,KAAK,EAAE;;AAEpB,QAAO;EAAE;EAAU;EAAY;;AAGnC,MAAM,qCAAqC,MAAc,QAAkB;CACvE,MAAM,6BAA6B,IAAI,OACnC,eAAe,oBAAoB,yBAAyB,gBAAgB,yBAAyB,OAAO,IAC5G,IACH;CACD,IAAI,aAAa;CACjB,MAAM,WAAqB,EAAE;CAC7B,IAAI,YAAY;AAChB,MAAK,MAAM,SAAS,KAAK,SAAS,2BAA2B,EAAE;EAC3D,MAAM,aAAa,MAAM,SAAS;AAClC,OAAK,IAAI,IAAI,WAAW,IAAI,YAAY,KAAK;AACzC,iBAAc,KAAK;AACnB,YAAS,KAAK,IAAI,GAAG;;AAEzB,gBAAc,MAAM;AACpB,WAAS,KAAK,IAAI,YAAY;AAC9B,gBAAc;AACd,WAAS,KAAK,IAAI,YAAY;EAC9B,MAAM,SAAS,MAAM;AACrB,OAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACpC,iBAAc,OAAO;AACrB,YAAS,KAAK,IAAI,aAAa,IAAI,GAAG;;AAE1C,cAAY,aAAa,MAAM,GAAG;;AAEtC,MAAK,IAAI,IAAI,WAAW,IAAI,KAAK,QAAQ,KAAK;AAC1C,gBAAc,KAAK;AACnB,WAAS,KAAK,IAAI,GAAG;;AAEzB,QAAO;EAAE;EAAU;EAAY;;AAGnC,MAAM,uCAAuC,MAAc,QAAkB;CACzE,MAAM,+BAA+B,IAAI,OACrC,KAAK,oBAAoB,yBAAyB,gBAAgB,yBAAyB,OAAO,IAClG,IACH;CACD,IAAI,aAAa;CACjB,MAAM,WAAqB,EAAE;CAC7B,IAAI,YAAY;AAChB,MAAK,MAAM,SAAS,KAAK,SAAS,6BAA6B,EAAE;EAC7D,MAAM,aAAa,MAAM,SAAS;AAClC,OAAK,IAAI,IAAI,WAAW,IAAI,YAAY,KAAK;AACzC,iBAAc,KAAK;AACnB,YAAS,KAAK,IAAI,GAAG;;AAEzB,gBAAc;AACd,WAAS,KAAK,IAAI,YAAY;EAC9B,MAAM,SAAS,MAAM;AACrB,OAAK,IAAI,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACpC,iBAAc,OAAO;AACrB,YAAS,KAAK,IAAI,aAAa,IAAI,GAAG;;AAE1C,cAAY,aAAa,MAAM,GAAG;;AAEtC,MAAK,IAAI,IAAI,WAAW,IAAI,KAAK,QAAQ,KAAK;AAC1C,gBAAc,KAAK;AACnB,WAAS,KAAK,IAAI,GAAG;;AAEzB,QAAO;EAAE;EAAU;EAAY;;AAGnC,MAAM,yBAAyB,MAAc,QAAkB;CAC3D,IAAI,aAAa;CACjB,MAAM,WAAqB,EAAE;AAC7B,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,MAAI,KAAK,OAAO,QAAQ,KAAK,IAAI,OAAO,KAAK;AACzC;AACA,iBAAc;AACd,YAAS,KAAK,IAAI,GAAG;AACrB;;AAEJ,gBAAc,KAAK;AACnB,WAAS,KAAK,IAAI,GAAG;;AAEzB,QAAO;EAAE;EAAU;EAAY;;AAGnC,MAAa,mCAAmC,YAAoB;CAChE,MAAM,uBAAuB,4BAA4B,QAAQ;CACjE,MAAM,mBAAmB,kCACrB,qBAAqB,YACrB,qBAAqB,SACxB;CACD,MAAM,gBAAgB,oCAAoC,iBAAiB,YAAY,iBAAiB,SAAS;AACjH,QAAO,sBAAsB,cAAc,YAAY,cAAc,SAAS;;;;;;;;AASlF,MAAa,yBAAyB,SAAiB;CACnD,MAAM,EAAE,QAAQ,kBAAkB;CAClC,MAAM,UAAU,IAAI,OAAO,KAAK,kBAAkB,GAAG,gBAAgB,UAAU,KAAK;CACpF,MAAM,MAAgB,EAAE;AACxB,MAAK,MAAM,SAAS,KAAK,SAAS,QAAQ,CACtC,KAAI,KAAK,MAAM,GAAG;AAEtB,QAAO;;;;;;;;;;;;;AAcX,MAAa,qBAAqB,YAAoB;CAElD,MAAM,iBAAiB,kBADJ,yBAAyB,QAAQ,CACA;AACpD,QAAO;EAAE,OAAO,eAAe;EAAM;EAAgB;;;;;;;;;;;;;;AAezD,MAAa,4BAA4B,YAAoB;CACzD,MAAM,aAAa,yBAAyB,QAAQ;CACpD,MAAM,EAAE,QAAQ,kBAAkB;CAClC,MAAM,gBAAgB,IAAI,OAAO,KAAK,kBAAkB,GAAG,gBAAgB,OAAO,OAAO,KAAK;CAC9F,MAAM,UAAU,CAAC,GAAG,WAAW,SAAS,cAAc,CAAC;CAEvD,MAAM,UAAsD,EAAE;AAC9D,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACrC,MAAM,KAAK,QAAQ,GAAG;EACtB,MAAM,QAAQ,QAAQ,GAAG,SAAS;EAClC,MAAM,YAAY,IAAI,IAAI,QAAQ,SAAU,QAAQ,IAAI,GAAG,SAAS,WAAW,SAAU,WAAW;EACpG,MAAM,QAAQ,WAAW,MAAM,OAAO,UAAU,CAAC,SAAS;EAC1D,MAAM,gCAAgB,IAAI,OAAO,IAAI,KAAK,gBAAgB,OAAO,MAAM;EACvE,MAAM,cAAc,MAAM,QAAQ,eAAe,GAAG,CAAC,MAAM;AAC3D,UAAQ,KAAK;GAAE;GAAI;GAAa,CAAC;;AAErC,QAAO;;;;;;;;AASX,MAAa,qBAAqB,SAAiB;CAC/C,MAAM,sBAAM,IAAI,KAAqB;AACrC,MAAK,MAAM,SAAS,yBAAyB,KAAK,CAC9C,KAAI,IAAI,MAAM,IAAI,MAAM,YAAY;AAExC,QAAO;;;;;;;;AASX,MAAa,4BAA4B,WAAqB;AAE1D,QAAO,MADO,OAAO,KAAK,UAAU,GAAG,aAAa,MAAM,CAAC,OAAO,CAAC,KAAK,IAAI,CACzD;;;;;;;;AASvB,MAAa,2BAA2B,gBACpC,YAAY,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC,KAAK,IAAI;;;;;;;AAQ7D,MAAa,8BAA8B,WAAqB;CAC5D,MAAM,eAAe,yBAAyB,OAAO;CACrD,MAAM,EAAE,QAAQ,kBAAkB;AAClC,wBAAO,IAAI,OAAO,OAAO,oBAAoB,gBAAgB,OAAO,SAAS,aAAa,GAAG;;;;;;;;AASjG,MAAa,sBAAsB,SAAiB;CAChD,MAAM,yBAAS,IAAI,KAAoD;AAEvE,MAAK,MAAM,SAAS,KAAK,SAAS,4BAA4B,EAAE;EAC5D,MAAM,QAAQ,MAAM;AACpB,MAAI,CAAC,MACD;EAEJ,MAAM,QAAQ,OAAO,IAAI,MAAM;AAC/B,MAAI,MACA,OAAM,SAAS;MAEf,QAAO,IAAI,OAAO;GAAE,OAAO;GAAG,YAAY,MAAM,SAAS;GAAG,CAAC;;AAIrE,QAAO,CAAC,GAAG,OAAO,SAAS,CAAC,CACvB,QAAQ,GAAG,UAAU,KAAK,SAAS,EAAE,CACrC,MAAM,GAAG,MAAM,EAAE,GAAG,aAAa,EAAE,GAAG,WAAW,CACjD,KAAK,CAAC,WAAW,MAAM;;;;;;;;AAShC,MAAa,gBAAgB,MAAc,EAAE,QAAQ,uBAAuB,OAAO;;;;;;;AChRnF,MAAa,4BAA4B,MAAc,WAAkC;CACrF,MAAM,gBAAgB,QAAQ,eAAe,SAAS,OAAO,gBAAgB,mBAAmB,KAAK;AACrG,KAAI,cAAc,WAAW,EACzB,QAAO;EACH,SAAS,EAAE;EACX,QAAQ,EAAE,0BAA0B,GAAG;EACvC;EACH;CAEL,MAAM,qBAAqB,QAAQ,sBAAsB;CACzD,MAAM,eAAe,yBAAyB,cAAc;CAC5D,IAAI,QAAQ;CACZ,MAAM,qBAAqB,wBAAwB,mBAAmB;CACtE,MAAM,uBAAuB,qCAAqB,IAAI,OAAO,MAAM,mBAAmB,SAAS,GAAG;CAClG,MAAM,mBAAmB,2BAA2B,cAAc;CAClE,MAAM,aAAa,IAAI,OAAO,cAAc,IAAI;;;;CAKhD,MAAM,mBAAmB,QAAgB,cAAsB;AAC3D,MAAI,CAAC,qBACD,QAAO,GAAG,OAAO,QAAQ,QAAQ,GAAG,CAAC,IAAI;EAE7C,MAAM,aAAa,OAAO,MAAM,qBAAqB;AACrD,MAAI,CAAC,WACD,QAAO,GAAG,OAAO,QAAQ,QAAQ,GAAG,CAAC,IAAI;EAE7C,MAAM,QAAQ,WAAW,GAAG,QAAQ,QAAQ,GAAG;AAE/C,SAAO,GADa,OAAO,MAAM,GAAG,CAAC,WAAW,GAAG,OAAO,GAClC,MAAM,IAAI;;;;;CAMtC,MAAM,cAAc,SAAiB;EACjC,MAAM,aAAa,KAAK,MAAM,iBAAiB;EAC/C,MAAM,sBAAsB,aAAa,WAAW,GAAG,SAAS,WAAW,GAAG,SAAS;EACvF,IAAI,YAAY;EAChB,IAAI,UAAU;EACd,IAAI,YAAY;AAEhB,OAAK,MAAM,SAAS,KAAK,SAAS,WAAW,EAAE;GAC3C,MAAM,MAAM,MAAM,SAAS;AAC3B,OAAI,QAAQ,uBAAuB,QAAQ,EACvC;GAEJ,MAAM,SAAS,KAAK,MAAM,WAAW,IAAI;AACzC,cAAW,gBAAgB,QAAQ,MAAM,GAAG;AAC5C,eAAY,MAAM,MAAM,GAAG;AAC3B,gBAAa;;AAGjB,MAAI,cAAc,EACd,QAAO;GAAE;GAAM;GAAW;AAE9B,SAAO;GAAE,MAAM,GAAG,UAAU,KAAK,MAAM,UAAU;GAAI;GAAW;;CAGpE,MAAM,QAAQ,KACT,MAAM,KAAK,CACX,KAAK,SAAS;EACX,MAAM,SAAS,WAAW,KAAK;AAC/B,WAAS,OAAO;AAChB,SAAO,OAAO;GAChB,CACD,KAAK,KAAK;AACf,QAAO;EACH,SAAS,QAAQ,IAAI,CAAC,2BAA2B,GAAG,EAAE;EACtD,QAAQ,EAAE,0BAA0B,OAAO;EAC3C,MAAM;EACT;;AAGL,MAAM,iBAAwG;CAC1G,oBAAoB;CACpB,mBAAmB;CACtB;;;;AAKD,MAAa,UAAU,MAAc,YAAsC;CACvE,MAAM,YAAY,QAAQ;CAC1B,MAAM,UAAoB,EAAE;CAC5B,MAAM,UAAoB,EAAE;CAC5B,IAAI,cAAc;CAClB,MAAM,SAAiC,EAAE;AAEzC,MAAK,MAAM,QAAQ,WAAW;EAC1B,MAAM,QAAQ,eAAe;AAC7B,MAAI,CAAC,OAAO;AACR,WAAQ,KAAK,KAAK;AAClB;;EAEJ,MAAM,SAAS,MAAM,aAAa,QAAQ,OAAO;AACjD,gBAAc,OAAO;AACrB,UAAQ,KAAK,KAAK;AAClB,OAAK,MAAM,CAAC,KAAK,UAAU,OAAO,QAAQ,OAAO,OAAO,CACpD,QAAO,QAAQ,OAAO,QAAQ,KAAK;;AAI3C,QAAO;EACH;EACA;EACA;EACA;EACA,MAAM;EACT;;;;;;;;;;;AC/FL,MAAa,6BAA6B;CACtC,UAAU,EACN,aAAa,4DAChB;CACD,aAAa,EACT,aAAa,oDAChB;CACD,kBAAkB,EACd,aAAa,kEAChB;CACD,oBAAoB,EAChB,aAAa,qEAChB;CACD,cAAc,EACV,aAAa,+DAChB;CACD,mBAAmB,EACf,aAAa,iFAChB;CACD,WAAW,EACP,aAAa,oEAChB;CACD,uBAAuB,EACnB,aAAa,uGAChB;CACD,uBAAuB,EACnB,aAAa,gGAChB;CACD,aAAa,EACT,aAAa,yFAChB;CACD,iBAAiB,EACb,aAAa,yFAChB;CACD,WAAW,EACP,aAAa,8EAChB;CACD,mBAAmB,EACf,aACI,oIACP;CACD,gBAAgB,EACZ,aACI,0HACP;CACD,kCAAkC,EAC9B,aAAa,8FAChB;CACD,kBAAkB,EACd,aAAa,sGAChB;CACD,kBAAkB,EACd,aAAa,sEAChB;CACD,mBAAmB,EACf,aAAa,iFAChB;CACD,kBAAkB,EACd,aAAa,8EAChB;CACJ;AAED,MAAM,oBAAoB,OAA0B,QAAQ,SACxD,IAAI,OAAO,SAAS,MAAM,KAAK,MAAM,aAAa,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,OAAO,MAAM;AAEjF,MAAM,aAAa,MAAc,OAAe,QAAgB;CAC5D,IAAI,IAAI;CACR,IAAI,IAAI;AACR,QAAO,IAAI,KAAK,KAAK,KAAK,KAAK,GAAG,CAC9B;AAEJ,QAAO,IAAI,KAAK,KAAK,KAAK,KAAK,IAAI,GAAG,CAClC;AAEJ,QAAO;EAAE,KAAK;EAAG,OAAO;EAAG;;AAG/B,MAAM,cAAc,iBAAyB,eAAuB,UAAoB,cAA6B;AACjH,KAAI,iBAAiB,iBAAiB;EAClC,MAAMA,aAAW,SAAS,oBAAoB;AAC9C,SAAO;GAAE,KAAKA;GAAU,OAAOA;GAAU;;CAE7C,MAAM,WAAW,SAAS,oBAAoB;CAC9C,MAAM,aAAa,SAAS,KAAK,IAAI,gBAAgB,GAAG,gBAAgB,KAAK;AAE7E,QAAO;EAAE,KADM,KAAK,IAAI,WAAW,aAAa,EAAE;EAC5B,OAAO;EAAU;;AAG3C,MAAM,gBAAgB,YAAoB,UAAoB,cAA2C;CACrG,MAAM,EAAE,QAAQ,kBAAkB;CAClC,MAAM,gBAAgB,IAAI,OAAO,KAAK,kBAAkB,GAAG,gBAAgB,OAAO,OAAO,KAAK;CAC9F,MAAM,UAAU,CAAC,GAAG,WAAW,SAAS,cAAc,CAAC;CACvD,MAAM,UAA+B,EAAE;AAEvC,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACrC,MAAM,QAAQ,QAAQ;EACtB,MAAM,KAAK,MAAM;EACjB,MAAM,kBAAkB,MAAM,SAAS;EACvC,MAAM,gBAAgB,kBAAkB,MAAM,GAAG;EAEjD,MAAM,mBAAmB,UAAU,YAAY,eAD7B,IAAI,IAAI,QAAQ,SAAU,QAAQ,IAAI,GAAG,SAAS,WAAW,SAAU,WAAW,OAC5B;EACxE,MAAM,cAAc,WAAW,iBAAiB,eAAe,UAAU,UAAU;EACnF,MAAM,sBAAsB,WAAW,iBAAiB,OAAO,iBAAiB,KAAK,UAAU,UAAU;AAEzG,UAAQ,KAAK;GACT,YAAY,MAAM;GAClB;GACA;GACA;GACA,QAAQ,YAAY;GACpB,UAAU,YAAY;GACtB,mBAAmB,oBAAoB;GACvC,qBAAqB,oBAAoB;GACzC,gBAAgB,iBAAiB;GACjC,kBAAkB,iBAAiB;GACtC,CAAC;;AAGN,QAAO;;AAGX,MAAM,qBAAqB,SAA8B,eAAuB;CAC5E,MAAM,+BAAe,IAAI,KAAqB;AAC9C,MAAK,MAAM,UAAU,SAAS;EAC1B,MAAM,kBAAkB,WAAW,MAAM,OAAO,kBAAkB,OAAO,eAAe,CAAC,MAAM;AAC/F,eAAa,IAAI,OAAO,IAAI,gBAAgB;;AAEhD,QAAO;;AAGX,MAAM,4BAA8C,EAChD,yBAAyB,GAC5B;AAED,MAAM,0BACF,UACA,aACA,WACoB;CACpB,MAAM,EAAE,YAAY,aAAa,gCAAgC,YAAY;CAC7E,MAAM,UAAU,aAAa,YAAY,UAAU,YAAY,OAAO;CACtE,MAAM,YAAY,QAAQ,KAAK,MAAM,EAAE,GAAG;CAC1C,MAAM,8BAAc,IAAI,KAAsB;AAC9C,MAAK,MAAM,KAAK,SACZ,aAAY,IAAI,EAAE,IAAI,EAAE;AAG5B,QAAO;EACH;EACA;EACA;EACA,oBAAoB;EACpB;EACA;EACA,cARiB,kBAAkB,SAAS,WAAW;EASvD;EACA;EACH;;AAGL,MAAM,2BACF,SACA,MACA,SACA,WACA,iBACA,eACA,OACkB;CAClB,IAAI,aAAa;AACjB,KAAI,CAAC,YAED;OAAK,MAAM,UAAU,QAAQ,QAGzB,KAAI,mBAAmB,OAAO,oBAAoB,iBAAiB,OAAO,gBAAgB;AACtF,gBAAa,OAAO;AACpB;;;AAKZ,QAAO;EACH,IAAI;EACJ;EACA;EACA,OAAO,WAAW,iBAAiB,eAAe,QAAQ,UAAU,QAAQ,YAAY,OAAO;EAC/F;EACH;;AAGL,MAAM,yBACF,MACA,SACA,WACA,OACA,QACmB;CACnB;CACA;CACA;CACA;CACA;CACH;;;;;;;;;;;;;;;;;;;;;;;;AAyBD,MAAa,+BACT,UACA,UACA,YAC2B;CAE3B,MAAM,UAAU,uBAAuB,UAAU,UADlC;EAAE,GAAG;EAA2B,GAAG,SAAS;EAAQ,CACD;AAClE,KAAI,QAAQ,UAAU,WAAW,EAC7B,QAAO;EACH,QAAQ,CACJ;GACI,WAAW;GACX,SAAS;GACT,OAAO;IAAE,KAAK,SAAS;IAAQ,OAAO;IAAG;GACzC,QAAQ;GACR,MAAM;GACT,CACJ;EACD,oBAAoB,QAAQ;EAC5B,WAAW,EAAE;EAChB;AAML,QAAO;EAAE,SAHK,SAAS,SAAS,eACX,SAAS,SAAS,KAAK,IAAI,QAAQ,CAAC,KAAK,OAAO;GAAE,GAAG;GAAG,QAAQ,EAAE,UAAU,KAAK;GAAI,EAAE,CAAC;EAE5F,oBAAoB,QAAQ;EAAoB,WAAW,QAAQ;EAAW;;;;;;;;;AAUnG,MAAM,wBAAwB,YAAkD;CAC5E,MAAM,OAAO,QAAQ;CACrB,MAAM,EAAE,SAAS,QAAQ,QAAQ,QAAQ,kBAAkB;CAC3D,MAAM,SAA4B,EAAE;CAEpC,MAAM,oBAAoB,IAAI,OAC1B,IAAI,QAAQ,KAAK,OAAO,QAAQ,OAAO,MAAM,SAAS,OAAO,GAAG,gBAAgB,OAAO,aAAa,gBAAgB,UACpH,KACH;AACD,MAAK,MAAM,SAAS,KAAK,SAAS,kBAAkB,EAAE;EAClD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,6BAA6B,UAAU,MAAM,CAAC,yEAC9C,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,qBAAqB,IAAI,OAAO,IAAI,UAAU,SAAS,OAAO,GAAG,gBAAgB,UAAU,KAAK;AACtG,MAAK,MAAM,SAAS,KAAK,SAAS,mBAAmB,EAAE;EACnD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,gCAAgC,UAAU,IAC1C,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,sBAAsB,IAAI,OAAO,IAAI,UAAU,SAAS,OAAO,MAAM,OAAO,IAAI,KAAK;AAC3F,MAAK,MAAM,SAAS,KAAK,SAAS,oBAAoB,EAAE;EACpD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,gCAAgC,UAAU,IAC1C,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,wBAAwB,IAAI,OAAO,IAAI,oBAAoB,gBAAgB,OAAO,QAAQ,KAAK;AACrG,MAAK,MAAM,SAAS,KAAK,SAAS,sBAAsB,EAAE;EACtD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,cAAc,UAAU,MAAM,CAAC,qCAC/B,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;CAGL,MAAM,oBAAoB,IAAI,OAAO,IAAI,UAAU,OAAO,KAAK,UAAU,KAAK;AAC9E,MAAK,MAAM,SAAS,KAAK,SAAS,kBAAkB,EAAE;EAClD,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,6BAA6B,UAAU,2BACvC,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGL,QAAO;;;;;;;;;AAUX,MAAM,0BAA0B,YAAkD;CAC9E,MAAM,UAAU,IAAI,OAChB,IAAI,oBAAoB,yBAAyB,gBAAgB,yBAAyB,OAAO,UACjG,KACH;CACD,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,oBACA,qCAAqC,UAAU,MAAM,CAAC,6BACtD,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;;;;AASX,MAAM,wBAAwB,YAAkD;CAC5E,MAAM,uBAAO,IAAI,KAAa;CAC9B,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,QACzB,KAAI,KAAK,IAAI,OAAO,GAAG,CACnB,QAAO,KACH,sBACI,gBACA,iBAAiB,OAAO,GAAG,oDAC3B,OAAO,YACP;EAAE,KAAK,OAAO;EAAQ,OAAO,OAAO;EAAU,EAC9C,OAAO,GACV,CACJ;KAED,MAAK,IAAI,OAAO,GAAG;AAG3B,QAAO;;;;;;;;AASX,MAAM,uBAAuB,YAAkD;CAC3E,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,QACzB,KAAI,CAAC,QAAQ,YAAY,IAAI,OAAO,GAAG,CACnC,QAAO,KACH,sBACI,eACA,0BAA0B,OAAO,GAAG,2CACpC,OAAO,YACP;EAAE,KAAK,OAAO;EAAQ,OAAO,OAAO;EAAU,EAC9C,OAAO,GACV,CACJ;AAGT,QAAO;;;;;;;;;AAUX,MAAM,yBAAyB,aAAwB;CACnD,MAAM,4BAAY,IAAI,KAAqB;AAC3C,MAAK,IAAI,IAAI,GAAG,IAAI,SAAS,QAAQ,IACjC,WAAU,IAAI,SAAS,GAAG,IAAI,EAAE;AAEpC,QAAO;;AAGX,MAAM,iBAAiB,SAA4B,WAAgC,KAAa,QAAgB;AAC5G,KAAI,CAAC,QAAQ,YAAY,IAAI,IAAI,IAAI,CAAC,QAAQ,YAAY,IAAI,IAAI,CAC9D;CAEJ,MAAM,KAAK,UAAU,IAAI,IAAI;CAC7B,MAAM,KAAK,UAAU,IAAI,IAAI;AAC7B,KAAI,MAAM,QAAQ,MAAM,QAAQ,KAAK,GACjC;AAEJ,QAAO;EAAE,KAAK;EAAI,OAAO;EAAI;;AAGjC,MAAM,qBACF,SACA,UACA,QACA,aACA,YACC;CACD,MAAM,QAAkB,EAAE;AAC1B,MAAK,IAAI,IAAI,WAAW,GAAG,IAAI,QAAQ,KAAK;EACxC,MAAM,QAAQ,QAAQ,SAAS,IAAI;AACnC,MAAI,CAAC,MACD;AAEJ,MAAI,CAAC,QAAQ,YAAY,IAAI,MAAM,IAAI,YAAY,IAAI,MAAM,IAAI,QAAQ,IAAI,MAAM,CAC/E;AAEJ,UAAQ,IAAI,MAAM;AAClB,QAAM,KAAK,MAAM;;AAErB,QAAO;;AAGX,MAAM,yBAAyB,YAAkD;CAC7E,MAAM,YAAY,sBAAsB,QAAQ,SAAS;CAEzD,MAAM,cAAc,IAAI,IAAI,QAAQ,UAAU;CAC9C,MAAM,0BAAU,IAAI,KAAa;CACjC,MAAM,SAA4B,EAAE;AAEpC,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,SAAS,GAAG,KAAK;EACjD,MAAM,IAAI,QAAQ,QAAQ;EAC1B,MAAM,IAAI,QAAQ,QAAQ,IAAI;EAC9B,MAAM,MAAM,cAAc,SAAS,WAAW,EAAE,IAAI,EAAE,GAAG;AACzD,MAAI,CAAC,IACD;EAEJ,MAAM,eAAe,kBAAkB,SAAS,IAAI,OAAO,IAAI,KAAK,aAAa,QAAQ;AACzF,OAAK,MAAM,SAAS,aAChB,QAAO,KACH,sBACI,kBACA,wDAAwD,MAAM,IAC9D,QAAQ,YAAY,MAAM,EAAE,UAAU,EAAE,OAAO,EAC/C;GAAE,KAAK,EAAE;GAAQ,OAAO,EAAE;GAAU,EACpC,MACH,CACJ;;AAIT,QAAO;;;;;;;;AASX,MAAM,6BAA6B,YAAkD;CACjF,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,UAAU,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe,CAAC,MAAM;EACvG,MAAM,aAAa,YAAY,OAAO,YAAY;EAClD,MAAM,aAAa,QAAQ,YAAY,IAAI,OAAO,GAAG,EAAE,QAAQ;EAC/D,MAAM,uBAAuB,sBAAsB,KAAK,WAAW;AACnE,MAAI,CAAC,WAAW,YAAY,kBAAmB,cAAc,CAAC,qBAC1D,QAAO,KACH,sBACI,qBACA,gCAAgC,OAAO,GAAG,wCAC1C,QAAQ,YAAY,MAAM,OAAO,qBAAqB,OAAO,kBAAkB,EAC/E;GAAE,KAAK,OAAO;GAAmB,OAAO,OAAO;GAAqB,EACpE,OAAO,GACV,CACJ;;AAGT,QAAO;;;;;;;;AASX,MAAM,gCAAgC,YAAkD;CACpF,MAAM,WAAW;EAAC;EAA2B;EAAqB;EAAiB;CACnF,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,WAAW,SAClB,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,yBACA,aAAa,UAAU,4CACvB,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGT,QAAO;;;;;;;;AASX,MAAM,oBAAoB,YAAkD;CACxE,MAAM,WAAW;EAAC;EAAa;EAA4B;EAAe;EAAa;EAAa;EAAe;CACnH,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,WAAW,SAClB,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,aACA,uBAAuB,UAAU,0DACjC,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAGT,QAAO;;;;;;;;;;;;AAaX,MAAM,sBAAsB,YAAkD;CAC1E,MAAM,gBAAgB;CACtB,MAAM,SAA4B,EAAE;AAEpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,OAAO,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;EAC7F,IAAI;AACJ,OAAK,MAAM,SAAS,KAAK,SAAS,cAAc,EAAE;GAC9C,MAAMC,cAAY,MAAM,GAAG,QAAQ,MAAM,GAAG,CAAC,MAAM;AACnD,OAAI,CAACA,YACD;AAEJ,OAAI,CAAC,gBAAgBA,YAAU,SAAS,aAAa,GAAG,QAAQ,MAAM,GAAG,CAAC,MAAM,CAAC,OAC7E,gBAAe;;AAGvB,MAAI,CAAC,aACD;EAEJ,MAAM,YAAY,aAAa,GAAG,QAAQ,MAAM,GAAG,CAAC,MAAM;EAC1D,MAAM,MAAM,aAAa,SAAS;EAClC,MAAM,kBAAkB,OAAO,mBAAmB;EAClD,MAAM,gBAAgB,kBAAkB,aAAa,GAAG;AACxD,SAAO,KACH,wBACI,SACA,eACA,4BAA4B,UAAU,IACtC,WACA,iBACA,eACA,OAAO,GACV,CACJ;;AAGL,QAAO;;;;;;;;AASX,MAAM,2BAA2B,YAAkD;CAC/E,MAAM,eAAe;CACrB,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,aAAa,EAAE;EACnE,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,oBACA,oBAAoB,UAAU,6CAC9B,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;;;;;AAUX,MAAM,4BAA4B,YAAkD;CAChF,MAAM,UAAU,CAAC,GAAG,QAAQ,mBAAmB,SAAS,QAAQ,CAAC;AACjE,KAAI,QAAQ,UAAU,sBAClB,QAAO,EAAE;AAEb,QAAO,QAAQ,KAAK,UAAU;EAC1B,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,wBACH,SACA,qBACA,SAAS,QAAQ,OAAO,yIACxB,MAAM,IACN,KACA,MAAM,MAAM,GAAG,OAClB;GACH;;;;;;;;;AAUN,MAAM,8BAA8B,YAAoB,oBAA4B;CAChF,MAAM,UAAU,cAAc,IAAI,MAAM;CACxC,MAAM,eAAe,mBAAmB,IAAI,MAAM;AAElD,KAAI,OAAO,SAAS,uCAChB;AAEJ,KAAI,YAAY,WAAW,EACvB,QAAO,iDAAiD,OAAO,OAAO;AAI1E,KADc,YAAY,SAAS,OAAO,SAC9B,uBAAuB;EAC/B,MAAM,oBAAoB,KAAK,MAAM,OAAO,SAAS,sBAAsB;AAC3E,SAAO,kCAAkC,YAAY,OAAO,aAAa,OAAO,OAAO,wCAAwC,kBAAkB;;;;;;;;;AAUzJ,MAAM,yCAAyC,YAAkD;CAC7F,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,MAAM,QAAQ,YAAY,IAAI,OAAO,GAAG;AAC9C,MAAI,CAAC,IACD;EAEJ,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe,CAAC,MAAM;EAC3G,MAAM,QAAQ,2BAA2B,IAAI,MAAM,YAAY;AAC/D,MAAI,MACA,QAAO,KACH,sBACI,mBACA,oBAAoB,OAAO,GAAG,IAAI,MAAM,QAAQ,gBAAgB,GAAG,CAAC,aAAa,IACjF,aACA;GAAE,KAAK,OAAO;GAAmB,OAAO,OAAO;GAAqB,EACpE,OAAO,GACV,CACJ;;AAGT,QAAO;;;;;;;;AASX,MAAM,mBAAmB,YAAkD;CACvE,MAAM,SAA4B,EAAE;CACpC,MAAM,eAAe,QAAQ,OAAO;CACpC,MAAM,aAAa,IAAI,OAAO,2BAA2B,eAAe,EAAE,gBAAgB,IAAI;AAC9F,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,WAAW,EAAE;EACjE,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,YACA,uBAAuB,UAAU,MAAM,CAAC,IACxC,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;;;;AASX,MAAM,2BAA2B,YAAkD;CAC/E,MAAM,UAAU,iBAAiB,cAAc;CAC/C,MAAM,SAA4B,EAAE;AACpC,MAAK,MAAM,SAAS,QAAQ,mBAAmB,SAAS,QAAQ,EAAE;EAC9D,MAAM,YAAY,MAAM;EACxB,MAAM,MAAM,MAAM,SAAS;AAC3B,SAAO,KACH,wBACI,SACA,oBACA,6CAA6C,UAAU,IACvD,WACA,KACA,MAAM,UAAU,OACnB,CACJ;;AAEL,QAAO;;;;;AAMX,MAAM,oBAAoB,YAAkD;CACxE,MAAM,SAA4B,EAAE;CACpC,MAAM,aAAa;CACnB,MAAM,eAAe;AAErB,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,MAAM,QAAQ,YAAY,IAAI,OAAO,GAAG;AAC9C,MAAI,CAAC,OAAO,CAAC,aAAa,KAAK,IAAI,KAAK,CACpC;EAEJ,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;AACpG,OAAK,MAAM,SAAS,YAAY,SAAS,WAAW,EAAE;GAClD,MAAM,YAAY,MAAM;GACxB,MAAM,MAAM,MAAM,SAAS;GAC3B,MAAM,kBAAkB,OAAO,mBAAmB;GAClD,MAAM,gBAAgB,kBAAkB,UAAU;AAClD,UAAO,KACH,wBACI,SACA,aACA,sCAAsC,OAAO,GAAG,gDAChD,WACA,iBACA,eACA,OAAO,GACV,CACJ;;;AAIT,QAAO;;AAQX,MAAM,2BAA2B,SAAuC;CACpE,MAAM,2BAAW,IAAI,KAAqB;CAC1C,MAAM,QAAQ,KAAK,MAAM,KAAK;CAC9B,MAAM,kBAAkB;CACxB,MAAM,WAAW;AAEjB,MAAK,MAAM,WAAW,OAAO;EACzB,MAAM,OAAO,QAAQ,WAAW;AAChC,MAAI,CAAC,KACD;EAEJ,MAAM,WAAW,KAAK,OAAO,OAAO;AACpC,MAAI,YAAY,EACZ;EAEJ,MAAM,SAAS,KAAK,MAAM,GAAG,SAAS,CAAC,MAAM;AAC7C,MAAI,CAAC,UAAU,OAAO,SAAS,gBAC3B;AAGJ,MADc,OAAO,MAAM,MAAM,CACvB,SAAS,SACf;EAEJ,MAAM,QAAQ,SAAS,IAAI,OAAO,IAAI;AACtC,WAAS,IAAI,QAAQ,QAAQ,EAAE;;CAGnC,IAAI,QAAQ;AACZ,MAAK,MAAM,SAAS,SAAS,QAAQ,CACjC,UAAS;AAGb,QAAO;EAAE;EAAU;EAAO;;;;;;;;;;;AAY9B,MAAM,4BAA4B,YAAkD;CAChF,MAAM,SAA4B,EAAE;AAEpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,MAAM,QAAQ,YAAY,IAAI,OAAO,GAAG;AAC9C,MAAI,CAAC,IACD;EAGJ,MAAM,eAAe,wBAAwB,IAAI,KAAK;EACtD,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;EACpG,MAAM,gBAAgB,wBAAwB,YAAY;AAE1D,MAAI,aAAa,UAAU,cAAc,UAAU,aAAa,QAAQ,KAAK,cAAc,QAAQ,GAC/F,QAAO,KACH,sBACI,qBACA,oCAAoC,OAAO,GAAG,gBAAgB,aAAa,MAAM,yCAAyC,cAAc,MAAM,uEAC9I,YAAY,MAAM,EAClB;GAAE,KAAK,OAAO;GAAmB,OAAO,OAAO;GAAqB,EACpE,OAAO,GACV,CACJ;;AAIT,QAAO;;;;;;;;AASX,MAAM,6BAA6B,SAAiB;CAChD,MAAM,kBAAkB,wBAAwB,KAAK,CAAC;AACtD,KAAI,gBAAgB,SAAS,EACzB;CAEJ,MAAM,eAAe,CAAC,GAAG,gBAAgB,MAAM,CAAC,CAAC,KAAK,UAAU,aAAa,MAAM,CAAC,CAAC,KAAK,IAAI;AAC9F,KAAI,CAAC,aACD;CAEJ,MAAM,QAAQ,KAAK,MAAM,KAAK;CAC9B,IAAI,SAAS;CACb,MAAM,UAAU,IAAI,OAAO,OAAO,aAAa,SAAS,IAAI;AAC5D,MAAK,MAAM,QAAQ,OAAO;AACtB,OAAK,MAAM,SAAS,KAAK,SAAS,QAAQ,EAAE;GACxC,MAAM,MAAM,MAAM,SAAS;AAC3B,OAAI,MAAM,EACN,QAAO;IAAE,OAAO,SAAS;IAAK,OAAO,MAAM;IAAI;;AAGvD,YAAU,KAAK,SAAS;;;AAIhC,MAAM,6BAA6B,YAAkD;CACjF,MAAM,SAA4B,EAAE;AAEpC,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,cAAc,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;AACpG,MAAI,CAAC,YACD;EAEJ,MAAM,UAAU,0BAA0B,YAAY;AACtD,MAAI,CAAC,QACD;EAEJ,MAAM,kBAAkB,OAAO,mBAAmB,QAAQ;EAC1D,MAAM,gBAAgB,kBAAkB,QAAQ,MAAM,SAAS;AAC/D,SAAO,KACH,wBACI,SACA,sBACA,wCAAwC,OAAO,GAAG,MAAM,QAAQ,MAAM,gCACtE,GAAG,QAAQ,MAAM,IACjB,iBACA,eACA,OAAO,GACV,CACJ;;AAGL,QAAO;;;;;;;;;AAUX,MAAM,yCAAyC,YAAkD;CAC7F,MAAM,SAA4B,EAAE;CACpC,MAAM,gBAAgB;AAEtB,MAAK,MAAM,UAAU,QAAQ,SAAS;EAClC,MAAM,OAAO,QAAQ,mBAAmB,MAAM,OAAO,kBAAkB,OAAO,eAAe;AAC7F,MAAI,CAAC,KACD;AAGJ,OAAK,MAAM,KAAK,KAAK,SAAS,cAAc,EAAE;GAC1C,MAAM,SAAS,GAAG,EAAE,GAAG,MAAM,EAAE;GAC/B,MAAM,MAAM,EAAE,SAAS;AACvB,OAAI,OAAO,GAEP;QAAI,CADU,KAAK,MAAM,KAAK,KAAK,IAAI,KAAK,QAAQ,MAAM,OAAO,SAAS,GAAG,CAAC,CACnE,SAAS,IAAI,EAAE;KACtB,MAAM,kBAAkB,OAAO,mBAAmB;KAClD,MAAM,gBAAgB,kBAAkB,OAAO;AAC/C,YAAO,KACH,wBACI,SACA,oCACA,0DAA0D,OAAO,GAAG,MAAM,OAAO,IACjF,QACA,iBACA,eACA,OAAO,GACV,CACJ;;;;;AAMjB,QAAO;;AAGX,MAAM,gBAAkC;CACpC;EAAE,IAAI;EAAyB,KAAK;EAAsB,MAAM;EAAyB;CACzF;EAAE,IAAI;EAAoB,KAAK;EAAwB,MAAM;EAAoB;CACjF;EAAE,IAAI;EAAqB,KAAK;EAA2B,MAAM;EAAqB;CACtF;EAAE,IAAI;EAAyB,KAAK;EAA8B,MAAM;EAAyB;CACjG;EAAE,IAAI;EAAa,KAAK;EAAkB,MAAM;EAAa;CAC7D;EAAE,IAAI;EAAgB,KAAK;EAAsB,MAAM;EAAgB;CACvE;EAAE,IAAI;EAAe,KAAK;EAAqB,MAAM;EAAe;CACpE;EAAE,IAAI;EAAkB,KAAK;EAAuB,MAAM;EAAkB;CAC5E;EAAE,IAAI;EAAe,KAAK;EAAoB,MAAM;EAAe;CACnE;EAAE,IAAI;EAAoB,KAAK;EAAyB,MAAM;EAAoB;CAClF;EAAE,IAAI;EAAqB,KAAK;EAA0B,MAAM;EAAqB;CACrF;EAAE,IAAI;EAAmB,KAAK;EAAuC,MAAM;EAAmB;CAC9F;EAAE,IAAI;EAAY,KAAK;EAAiB,MAAM;EAAY;CAC1D;EAAE,IAAI;EAAoB,KAAK;EAAyB,MAAM;EAAoB;CAClF;EAAE,IAAI;EAAa,KAAK;EAAkB,MAAM;EAAa;CAC7D;EAAE,IAAI;EAAqB,KAAK;EAA0B,MAAM;EAAqB;CACrF;EAAE,IAAI;EAAsB,KAAK;EAA2B,MAAM;EAAsB;CACxF;EACI,IAAI;EACJ,KAAK;EACL,MAAM;EACT;CACJ"}
|