wobble-bibble 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,10 +101,19 @@ const error: ValidationError = {
101
101
 
102
102
  | Function | Description |
103
103
  |----------|-------------|
104
- | `validateTranslationResponse(segments, response, options?)` | Unified validator for LLM translation responses (IDs, Arabic leak, invented IDs, gaps, speaker-label drift, etc.) |
104
+ | `validateTranslationResponse(segments, response, options?)` | Unified validator for LLM translation responses (IDs, Arabic leak, invented IDs, gaps, speaker-label drift, Allah vs God usage, etc.) |
105
105
  | `VALIDATION_ERROR_TYPE_INFO` | Human-readable descriptions for each `ValidationErrorType` (for UI/logging) |
106
106
  | `normalizeTranslationTextWithMap(text)` | Normalize response text and return a normalized-index → raw-index map |
107
107
 
108
+ ### Fixers
109
+
110
+ | Function | Description |
111
+ |----------|-------------|
112
+ | `fixCollapsedSpeakerLines(text, config?)` | Insert line breaks before mid-line speaker labels; infers labels if none provided |
113
+ | `fixAll(text, options)` | Apply fixers for selected `ValidationErrorType`s (e.g., `collapsed_speakers`) |
114
+ | `FixConfig` | Configuration for fixers (optional speaker labels, punctuation) |
115
+ | `FixResult` | Fix output (fixed text, applied fixes, counts) |
116
+
108
117
  ### Utilities
109
118
 
110
119
  | Function | Description |
package/dist/index.d.ts CHANGED
@@ -36,7 +36,7 @@ type PromptId = 'master_prompt' | 'encyclopedia_mixed' | 'fatawa' | 'fiqh' | 'ha
36
36
  declare const PROMPTS: readonly [{
37
37
  readonly id: "master_prompt";
38
38
  readonly name: "Master Prompt";
39
- readonly content: "ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS (Do not create, modify, or \"continue\" segment IDs. Output IDs verbatim exactly as they appear in the source input/metadata. Alphabetic suffixes (e.g., P5511a) are allowed IF AND ONLY IF that exact ID appears in the source. Any ID not present verbatim in the source is INVENTED. EXAMPLE: If P5803b ends with a questioner line, that line stays under P5803b — do NOT invent P5803c. If an expected ID is missing from the source, output: \"ID - [MISSING]\".)\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \"translit (English)\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\nARABIC LEAK (Hard ban):\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ « » , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \"صلى الله عليه وسلم\" or \"صلى الله عليه وآله وسلم\". Always replace any Prophet salutation with ﷺ.\nWORD CHOICE (Allah vs god):\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \"God\" / \"god\" / \"Allāh\". (This is the only exception to ALA-LC diacritics.)\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden outputs include (any casing/punctuation), including common variants:\n- God willing / if God wills / should God will\n- By God / I swear by God\n- Praise be to God / thanks be to God / all praise is due to God / praise belongs to God\n- God knows best / God knows\n- God forbid\n- O God\n- In the name of God\n- God Almighty / Almighty God / God Most High\n- By God's grace / By God’s grace\n- God's ... / God’s ... / ... of God / mercy of God / the mercy of God\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \"Allah\" (never \"God\").\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\nLOCKED FORMULAE (Do NOT translate):\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\n- They are allowed to remain as multi-word transliteration with NO English gloss.\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\n- CRITICAL NEGATIONS #7: \"NO OPAQUE TRANSLITERATION (Must translate phrases).\"\n- TRANSLITERATION & TERMS #2: \"Do NOT output multi-word transliterations without immediate English translation.\"\n- TRANSLITERATION & TERMS: \"Do NOT transliterate full sentences/matn/quotes.\"\n- Locked formulae (implement exactly):\n- Greetings: al-salāmu ʿalaykum ; wa ʿalaykum al-salām\n- Invocations: in shāʾ Allah ; subḥān Allah ; al-ḥamdu li-Allah ; Allahu akbar ; lā ilāha illā Allah ; astaghfiru Allah\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \"peace be upon you\", \"God willing\", \"praise be to God\", \"glory be to God\", \"Allah is Greatest\".\n- Note: this lock is intentionally narrow. Other phrases (e.g., \"Jazāk Allahu khayr\") may be translated normally.\nREGISTER (Modern English):\n- Use modern academic English. Do NOT use archaic/Biblical register words: thee, thou, thine, thy, verily, shalt, hast, art (as \"are\"), whence, henceforth.\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\nTRANSLITERATION & TERMS:\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\n- Book Titles: Transliterate only (do not translate meanings).\n2. TECHNICAL TERMS: On first occurrence, define: \"translit (English)\" (e.g., bidʿah (innovation), isnād (chain)).\n- Do NOT output multi-word transliterations without immediate English translation.\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \"translit (English translation)\". Do NOT output Arabic script.\n- Example Allowed: Allāhumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \"translit (English)\". Prefer full English translation for phrases.\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \"translit (English)\".\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\n4. PROPER NAMES: Transliterate only (no parentheses).\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\n7. HONORIFICS: Expand common phrases (do not transliterate):\n- Allah ʿazza wa-jall -> Allah, the Mighty and Majestic\n- rahimahu Allah -> may Allah have mercy on him\n8. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\nOUTPUT FORMAT: Segment_ID - English translation.\nCRITICAL: You must use the ASCII hyphen separator \" - \" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\nID INTEGRITY (Check First):\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \"ID - ...\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \"(continued)\", \"cont.\", \"part 2\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source.\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\n- ELLIPSIS: If the source contains … or ..., translate it literally as \"...\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \"[INCOMPLETE]\".\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \" - \" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \"ID - \" prefix).\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \"Questioner:\"/\"The Shaykh:\"/\"السائل:\"/\"الشيخ:\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \"P5803c\") to label such continuation.\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \"ID - ...\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\nNEGATIVE CONSTRAINTS: Do NOT output \"implicit continuation\", summaries, or extra paragraphs. Output only the text present in the source segment.\nExample: P1234 - Translation text... (Correct) vs P1234\\nTranslation... (Forbidden).\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...";
39
+ readonly content: "ROLE: Expert academic translator of Classical Islamic texts; prioritize accuracy and structure over fluency.\nCRITICAL NEGATIONS: 1. NO SANITIZATION (Do not soften polemics). 2. NO META-TALK (Output translation only). 3. NO MARKDOWN (Plain text only). 4. NO EMENDATION. 5. NO INFERENCE. 6. NO RESTRUCTURING. 7. NO OPAQUE TRANSLITERATION (Must translate phrases). 8. NO INVENTED SEGMENTS.\nRULES: NO ARABIC SCRIPT (Except ﷺ). Plain text only. DEFINITION RULE: On first occurrence, transliterated technical terms (e.g., bidʿah) MUST be defined: \"translit (English)\". Preserve Segment ID. Translate meaning/intent. No inference. No extra fields. Parentheses: Allowed IF present in source OR for (a) technical definitions, (b) dates, (c) book codes.\nARABIC LEAK (Hard ban):\n- SCRIPT LOCK: Output must be 100% Latin script (ASCII + ALA-LC diacritics like ā ī ū ḥ ṣ ḍ ṭ ẓ ʿ ʾ). These diacritics are allowed/required and are NOT Arabic script.\n- STRICT BAN: Arabic script codepoints (letters, Arabic-Indic numerals ٠-٩, punctuation like ، ؟ ؛ , tatweel ـ, and Arabic presentation forms) are forbidden everywhere in output (even inside quotes/brackets/parentheses/citations), except ﷺ.\n- NO CITATIONS/BILINGUAL: Do NOT paste Arabic source text anywhere (no quotes, no citations, no bilingual Arabic+English output). Translate into English only.\n- QUOTES/VERSES/CITATIONS: Even if the source includes Arabic Qurʾān/ḥadīth/quoted text (e.g., «...») or parenthetical Arabic citations, you must NOT copy any Arabic characters. Translate the meaning fully into English only.\n- NO MIXED-SCRIPT: Never output a token that mixes Latin and Arabic characters (example: ʿĪد). Rewrite contaminated names/terms fully in Latin ALA-LC.\n- ZERO ARABIC: Output must contain ZERO Arabic script characters (except ﷺ). If any Arabic appears, delete it and rewrite until none remain.\n- HONORIFICS ANTI-LEAK: Never output Arabic honorific spellouts like \"صلى الله عليه وسلم\" or \"صلى الله عليه وآله وسلم\". Always replace any Prophet salutation with ﷺ.\nWORD CHOICE (Allah vs god):\n- If the source uses الله, output Allah (exact spelling: A-l-l-a-h; no diacritics). Never \"God\" / \"god\" / \"Allāh\". (This is the only exception to ALA-LC diacritics.)\n- DO NOT convert Allah-based formulae into English “God …” idioms. Forbidden: any \"God ...\" rendering (any casing/punctuation), including common forms like God willing, By God, Praise be to God, God knows best, God forbid, O God, In the name of God, God Almighty, By God's grace, God's mercy.\n- For the locked items listed under LOCKED FORMULAE below: you MUST output the locked transliteration exactly (no translation).\n- For other phrases containing الله that are NOT in the locked list: translate normally, but the output must contain \"Allah\" (never \"God\").\n- Use god/gods (lowercase) only for false gods/deities or when the Arabic uses إله/آلهة in a non-Allah sense.\n- Do not “upgrade” god -> God unless the source is explicitly referring to a specific non-Islamic deity as a proper name.\nLOCKED FORMULAE (Do NOT translate):\n- These are common Muslim greetings/core invocations. Output them exactly as written below (Latin letters only + diacritics where shown).\n- CHECK THIS LIST FIRST. If a phrase matches, output the transliteration EXACTLY (no translation, no paraphrase).\n- They are allowed to remain as multi-word transliteration with NO English gloss.\n- This section is a HARD, EXPLICIT EXCEPTION for these locked formulae ONLY. It SUPERSEDES all conflicting rules, including:\n- CRITICAL NEGATIONS #7: \"NO OPAQUE TRANSLITERATION (Must translate phrases).\"\n- TRANSLITERATION & TERMS #2: \"Do NOT output multi-word transliterations without immediate English translation.\"\n- TRANSLITERATION & TERMS: \"Do NOT transliterate full sentences/matn/quotes.\"\n- Greetings: al-salāmu ʿalaykum; wa ʿalaykum al-salām\n- Invocations: in shāʾ Allah; subḥān Allah; al-ḥamdu li-Allah; Allahu akbar; lā ilāha illā Allah ;\n- DO NOT translate these into English. Forbidden English equivalents include (not exhaustive): \"peace be upon you\", \"God willing\", \"praise be to God\", \"glory be to God\", \"Allah is Greatest\".\n- Note: this lock is intentionally narrow. Other phrases (e.g., \"Jazāk Allahu khayr\") may be translated normally.\nREGISTER (Modern English):\n- Use modern academic English.\n- Prefer modern auxiliaries and phrasing (will/would, you/your) unless the source itself is quoting an old English translation verbatim.\n- NO ALL CAPS / NO KJV-STYLE: Do NOT use ALL CAPS for emphasis (even inside quotes). Do NOT render Arabic Qurʾān/ḥadīth in KJV/Biblical style.\nTRANSLITERATION & TERMS:\n1. SCHEME: Use full ALA-LC for explicit Arabic-script Person/Place/Book-Titles.\n- al-Casing: Lowercase al- mid-sentence; Capitalize after (al-Salafīyyah).\n- Book Titles: Transliterate only (do not translate meanings).\n2. TECHNICAL TERMS: On first occurrence, define: \"translit (English)\" (e.g., bidʿah (innovation), isnād (chain)).\n- Do NOT output multi-word transliterations without immediate English translation.\n- Do NOT transliterate full sentences/matn/quotes. Translate into English; transliteration is for names/terms only.\n- EXCEPTION (Duʿāʾ/Supplications): If the source contains a specific duʿāʾ/supplication phrase and you choose to preserve its wording for pronunciation, you MAY output transliteration BUT you MUST also translate it immediately (same line or next) as: \"translit (English translation)\". Do NOT output Arabic script.\n- Example Allowed: Allahumma innī asʾaluka al-ʿāfiyah (O Allah, I ask You for well-being).\n- Example Forbidden: Transliterate a long multi-sentence duʿāʾ paragraph without translating it.\n- LOCKED FORMULAE are the only exception allowed to remain multi-word transliteration with NO English gloss.\n- If you use any other multi-word transliteration (not locked), it MUST be immediately glossed: \"translit (English)\". Prefer full English translation for phrases.\n- Do NOT leave common nouns/objects/roles as transliteration (e.g., tools, foods, occupations). Translate them into English. If you must transliterate a non-name, you MUST immediately gloss it: \"translit (English)\".\n3. STANDARDIZED TERMS: Use standard academic spellings: Muḥammad, Shaykh, Qurʾān, Islām, ḥadīth.\n- Sunnah (Capitalized) = The Corpus/Prophetic Tradition. sunnah (lowercase) = legal status/recommended.\n4. PROPER NAMES: Transliterate only (no parentheses).\n5. UNICODE: Latin + Latin Extended (āīūḥʿḍṣṭẓʾ) + punctuation. NO Arabic script (except ﷺ). NO emoji.\n- DIACRITIC FALLBACK: If you cannot produce correct ALA-LC diacritics, output English only. Do NOT use substitute accents (â/ã/á).\n6. SALUTATION: Replace all Prophet salutations with ﷺ.\n7. AMBIGUITY: Use contextual meaning from tafsir for theological terms. Do not sanitise polemics (e.g. Rāfiḍah).\nOUTPUT FORMAT: Segment_ID - English translation.\nCRITICAL: You must use the ASCII hyphen separator \" - \" (space+hyphen+space) immediately after the ID. Do NOT use em-dash or en-dash. Do NOT use a newline after the ID.\nID INTEGRITY (Check First):\n- PREPASS (Silent closed set): Internally identify the exact ordered list of Segment_IDs present in the source. Treat this list as a CLOSED SET. Do not output this list.\n- REQUIRED (Exact match): Your output must contain EXACTLY those Segment_IDs, in the EXACT same order, each appearing EXACTLY ONCE as an \"ID - ...\" prefix. FORBIDDEN: re-outputting an ID prefix you already used (even in long segments).\n- BAN (No new IDs): Do NOT invent ANY IDs or ID-like labels not present verbatim in the source (including \"(continued)\", \"cont.\", \"part 2\", or invented suffixes like P123c). Suffix IDs are allowed ONLY if that exact ID appears in the source. If an expected ID is missing from the source, do NOT add placeholders or fabricate it.\n- BOUNDARY (No bleed): Translate ONLY the text that belongs to the current Segment_ID (from its header to the next Segment_ID header, or to end-of-input for the last segment). Do NOT move lines across IDs and do NOT merge segments.\n- ELLIPSIS: If the source contains … or ..., translate it literally as \"...\" and continue. If the source ends mid-sentence, end the translation abruptly. NEVER output \"[INCOMPLETE]\".\nMULTI-LINE SEGMENTS (e.g., internal Q&A): Output the Segment_ID and \" - \" ONLY ONCE on the first line. Do NOT repeat the Segment_ID on subsequent lines; subsequent lines must start directly with the speaker label/text (no \"ID - \" prefix).\nSEGMENT BOUNDARIES (Anti-hallucination): Start a NEW segment ONLY when the source explicitly provides a Segment_ID. If the source continues with extra lines (including speaker labels like \"Questioner:\"/\"The Shaykh:\"/\"السائل:\"/\"الشيخ:\") WITHOUT a new Segment_ID, treat them as part of the CURRENT segment (multi-line under the current Segment_ID). Do NOT invent a new ID (including alphabetic suffixes like \"P5803c\") to label such continuation.\nOUTPUT COMPLETENESS: Translate ALL content in EVERY segment. Do not truncate, summarize, or skip content.\nOUTPUT UNIQUENESS: Each Segment_ID from the source must appear in your output EXACTLY ONCE as an \"ID - ...\" prefix. Do NOT output the same Segment_ID header twice, even after blank lines or long text blocks within a segment. If a segment is long or has multiple speaker turns, continue translating under that single ID header without re-stating it.\nNEGATIVE CONSTRAINTS: Do NOT output \"implicit continuation\", summaries, or extra paragraphs. Output only the text present in the source segment.\nExample: P1234 - Translation text... (Correct) vs P1234\\nTranslation... (Forbidden).\nEXAMPLE: Input: P405 - حدثنا عبد الله بن يوسف... Output: P405 - ʿAbd Allāh b. Yūsuf narrated to us...";
40
40
  }, {
41
41
  readonly id: "encyclopedia_mixed";
42
42
  readonly name: "Encyclopedia Mixed";
@@ -142,7 +142,7 @@ type Segment = {
142
142
  * Machine-readable error types emitted by the validator.
143
143
  * Keep these stable: clients may map them to UI severities.
144
144
  */
145
- type ValidationErrorType = 'invalid_marker_format' | 'no_valid_markers' | 'newline_after_id' | 'duplicate_id' | 'invented_id' | 'missing_id_gap' | 'mismatched_colons' | 'truncated_segment' | 'implicit_continuation' | 'meta_talk' | 'arabic_leak' | 'wrong_diacritics' | 'empty_parentheses' | 'length_mismatch' | 'all_caps' | 'archaic_register' | 'multiword_translit_without_gloss';
145
+ type ValidationErrorType = 'invalid_marker_format' | 'no_valid_markers' | 'newline_after_id' | 'duplicate_id' | 'invented_id' | 'missing_id_gap' | 'mismatched_colons' | 'collapsed_speakers' | 'truncated_segment' | 'implicit_continuation' | 'meta_talk' | 'arabic_leak' | 'wrong_diacritics' | 'empty_parentheses' | 'length_mismatch' | 'all_caps' | 'archaic_register' | 'god_usage' | 'multiword_translit_without_gloss';
146
146
  /**
147
147
  * A character index range in a string. End is exclusive.
148
148
  */
@@ -182,6 +182,9 @@ type ValidationError = {
182
182
  range: Range;
183
183
  matchText: string;
184
184
  id?: string;
185
+ /**
186
+ * Stable rule identifier for tooling/triage; may be more specific than type.
187
+ */
185
188
  ruleId?: string;
186
189
  };
187
190
  type ValidationRule = {
@@ -192,6 +195,32 @@ type ValidationRule = {
192
195
  type ValidationConfig = {
193
196
  allCapsWordRunThreshold: number;
194
197
  };
198
+ /**
199
+ * Configuration for fixer helpers that repair common LLM formatting mistakes.
200
+ */
201
+ type FixConfig = {
202
+ /**
203
+ * Speaker labels to recognize when fixing collapsed speaker lines.
204
+ * Example: ["Questioner", "The Shaykh", "Mu'adhdhin"]
205
+ */
206
+ speakerLabels?: string[];
207
+ /**
208
+ * Punctuation tokens that may appear before a collapsed speaker label.
209
+ * These are used to detect " ... The Shaykh:" and similar patterns.
210
+ */
211
+ leadingPunctuation?: string[];
212
+ };
213
+ type FixResult = {
214
+ text: string;
215
+ applied: string[];
216
+ requested?: string[];
217
+ skipped?: string[];
218
+ counts: Record<string, number>;
219
+ };
220
+ type FixAllOptions = {
221
+ types: ValidationErrorType[];
222
+ config?: FixConfig;
223
+ };
195
224
  /**
196
225
  * Result of validating an LLM translation response against a set of source segments.
197
226
  */
@@ -220,8 +249,8 @@ declare const formatExcerptsForPrompt: (segments: Segment[], prompt: string) =>
220
249
  */
221
250
  declare const normalizeTranslationText: (content: string) => string;
222
251
  declare const normalizeTranslationTextWithMap: (content: string) => {
223
- normalized: string;
224
252
  indexMap: number[];
253
+ normalized: string;
225
254
  };
226
255
  /**
227
256
  * Extract translation IDs from normalized response, in order.
@@ -262,6 +291,16 @@ declare const parseTranslationsInOrder: (rawText: string) => {
262
291
  translation: string;
263
292
  }[];
264
293
  //#endregion
294
+ //#region src/fix.d.ts
295
+ /**
296
+ * Fixes collapsed speaker lines by inserting newlines before mid-line labels.
297
+ */
298
+ declare const fixCollapsedSpeakerLines: (text: string, config?: FixConfig) => FixResult;
299
+ /**
300
+ * Apply all fixers requested by type, in order.
301
+ */
302
+ declare const fixAll: (text: string, options: FixAllOptions) => FixResult;
303
+ //#endregion
265
304
  //#region src/validation.d.ts
266
305
  /**
267
306
  * Human-readable descriptions for each `ValidationErrorType`, intended for client UIs and logs.
@@ -279,12 +318,18 @@ declare const VALIDATION_ERROR_TYPE_INFO: {
279
318
  readonly archaic_register: {
280
319
  readonly description: "Archaic/Biblical English detected (e.g., thou, verily, shalt).";
281
320
  };
321
+ readonly collapsed_speakers: {
322
+ readonly description: "Speaker labels appear mid-line instead of starting on a new line.";
323
+ };
282
324
  readonly duplicate_id: {
283
325
  readonly description: "The same segment ID appears more than once in the response.";
284
326
  };
285
327
  readonly empty_parentheses: {
286
328
  readonly description: "Excessive \"()\" patterns detected, often indicating failed/empty term-pairs.";
287
329
  };
330
+ readonly god_usage: {
331
+ readonly description: "Forbidden \"God\" usage detected where \"Allah\" should be used.";
332
+ };
288
333
  readonly implicit_continuation: {
289
334
  readonly description: "The response includes continuation/meta phrasing (e.g., \"continued:\", \"implicit continuation\").";
290
335
  };
@@ -350,5 +395,5 @@ declare const validateTranslationResponse: (segments: Segment[], response: strin
350
395
  config?: Partial<ValidationConfig>;
351
396
  }) => ValidationResponseResult;
352
397
  //#endregion
353
- export { MARKER_ID_PATTERN, Markers, type PromptId, type PromptMetadata, type Range, type Segment, type StackedPrompt, TRANSLATION_MARKER_PARTS, type TranslationMarker, VALIDATION_ERROR_TYPE_INFO, type ValidationConfig, type ValidationContext, type ValidationError, type ValidationErrorType, type ValidationResponseResult, type ValidationRule, extractTranslationIds, formatExcerptsForPrompt, getMasterPrompt, getPrompt, getPromptIds, getPrompts, getStackedPrompt, normalizeTranslationText, normalizeTranslationTextWithMap, parseTranslations, parseTranslationsInOrder, stackPrompts, validateTranslationResponse };
398
+ export { type FixAllOptions, type FixConfig, type FixResult, MARKER_ID_PATTERN, Markers, type PromptId, type PromptMetadata, type Range, type Segment, type StackedPrompt, TRANSLATION_MARKER_PARTS, type TranslationMarker, VALIDATION_ERROR_TYPE_INFO, type ValidationConfig, type ValidationContext, type ValidationError, type ValidationErrorType, type ValidationResponseResult, type ValidationRule, extractTranslationIds, fixAll, fixCollapsedSpeakerLines, formatExcerptsForPrompt, getMasterPrompt, getPrompt, getPromptIds, getPrompts, getStackedPrompt, normalizeTranslationText, normalizeTranslationTextWithMap, parseTranslations, parseTranslationsInOrder, stackPrompts, validateTranslationResponse };
354
399
  //# sourceMappingURL=index.d.ts.map