bitaboom 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -789,36 +789,6 @@ unescapeSpaces('regular text');
789
789
 
790
790
  ---
791
791
 
792
-
793
- ## sanitizeArabic — unified Arabic text sanitizer
794
-
795
- `sanitizeArabic(input, optionsOrPreset)` provides fast, configurable cleanup for Arabic text and replaces older per-rule utilities.
796
- It supports presets (`"light"`, `"search"`, `"aggressive"`) and fine-grained options like `stripDiacritics`, `stripTatweel`, `normalizeAlif`,
797
- `replaceAlifMaqsurah`, `replaceTaMarbutahWithHa`, `stripZeroWidth`, `zeroWidthToSpace`, `stripLatinAndSymbols`, `lettersAndSpacesOnly`,
798
- `keepOnlyArabicLetters`, `collapseWhitespace`, `trim`, and `removeHijriMarker`. For one-off rules, use `base: 'none'` to apply only what you specify.
799
-
800
- **Examples**
801
-
802
- ```ts
803
- import { sanitizeArabic } from 'bitaboom';
804
-
805
- // Light display cleanup
806
- sanitizeArabic(' مرحبا\u200C\u200D بالعالم ', 'light'); // → 'مرحبا بالعالم'
807
-
808
- // Tolerant search normalization
809
- sanitizeArabic('اَلسَّلَامُ عَلَيْكُمْ', 'search'); // → 'السلام عليكم'
810
-
811
- // Indexing-friendly text (letters + spaces only)
812
- sanitizeArabic('اَلسَّلَامُ 1435/3/29 هـ — www', 'aggressive'); // → 'السلام'
813
-
814
- // Tatweel-only, preserving dates/list markers
815
- sanitizeArabic('أبـــتِـــكَةُ', { base: 'none', stripTatweel: true }); // → 'أبتِكَةُ'
816
-
817
- // Zero-width controls → spaces
818
- sanitizeArabic('يَخْلُوَ ‏. ‏ قَالَ غَرِيبٌ ‏. ‏', { base: 'none', stripZeroWidth: true, zeroWidthToSpace: true });
819
- // → 'يَخْلُوَ . قَالَ غَرِيبٌ . '
820
- ```
821
-
822
792
  ## makeDiacriticInsensitiveRegex — tolerant Arabic matcher
823
793
 
824
794
  `makeDiacriticInsensitiveRegex(needle, opts?)` returns a `RegExp` that matches Arabic text while ignoring diacritics,
package/dist/index.d.ts CHANGED
@@ -88,80 +88,6 @@ declare const removeSolitaryArabicLetters: (text: string) => string;
88
88
  */
89
89
  declare const replaceEnglishPunctuationWithArabic: (text: string) => string;
90
90
 
91
- /**
92
- * Ultra-fast Arabic text sanitizer for search/indexing/display.
93
- * Optimized for very high call rates: avoids per-call object spreads and minimizes allocations.
94
- * Options can merge over a base preset or `'none'` to apply exactly the rules you request.
95
- */
96
- type SanitizePreset = 'light' | 'search' | 'aggressive';
97
- type SanitizeBase = 'none' | SanitizePreset;
98
- /**
99
- * Public options for {@link sanitizeArabic}. When you pass an options object, it overlays the chosen
100
- * `base` (default `'light'`) without allocating merged objects on the hot path; flags are resolved
101
- * directly into local booleans for speed.
102
- */
103
- type SanitizeOptions = {
104
- /** Base to merge over. `'none'` applies only the options you specify. Default when passing an object: `'light'`. */
105
- base?: SanitizeBase;
106
- /** Unicode NFC normalization. Default: `true` in all presets. */
107
- nfc?: boolean;
108
- /** Strip zero-width controls (U+200B–U+200F, U+202A–U+202E, U+2060–U+2064, U+FEFF). Default: `true` in presets. */
109
- stripZeroWidth?: boolean;
110
- /** If stripping zero-width, replace them with a space instead of removing. Default: `false`. */
111
- zeroWidthToSpace?: boolean;
112
- /** Remove Arabic diacritics (tashkīl). Default: `true` in `'search'`/`'aggressive'`. */
113
- stripDiacritics?: boolean;
114
- /**
115
- * Remove tatweel (ـ).
116
- * - `true` is treated as `'safe'` (preserves tatweel after digits or 'ه' for dates/list markers)
117
- * - `'safe'` or `'all'` explicitly
118
- * - `false` to keep tatweel
119
- * Default: `'all'` in `'search'`/`'aggressive'`, `false` in `'light'`.
120
- */
121
- stripTatweel?: boolean | 'safe' | 'all';
122
- /** Normalize آ/أ/إ → ا. Default: `true` in `'search'`/`'aggressive'`. */
123
- normalizeAlif?: boolean;
124
- /** Replace ى → ي. Default: `true` in `'search'`/`'aggressive'`. */
125
- replaceAlifMaqsurah?: boolean;
126
- /** Replace ة → ه (lossy). Default: `true` in `'aggressive'` only. */
127
- replaceTaMarbutahWithHa?: boolean;
128
- /** Strip Latin letters/digits and common OCR noise into spaces. Default: `true` in `'aggressive'`. */
129
- stripLatinAndSymbols?: boolean;
130
- /** Keep only Arabic letters (no whitespace). Use for compact keys, not FTS. */
131
- keepOnlyArabicLetters?: boolean;
132
- /** Keep Arabic letters + spaces (drops digits/punct/symbols). Great for FTS. Default: `true` in `'aggressive'`. */
133
- lettersAndSpacesOnly?: boolean;
134
- /** Collapse runs of whitespace to a single space. Default: `true`. */
135
- collapseWhitespace?: boolean;
136
- /** Trim leading/trailing whitespace. Default: `true`. */
137
- trim?: boolean;
138
- /**
139
- * Remove the Hijri date marker ("هـ" or bare "ه" if tatweel already removed) when it follows a date-like token
140
- * (digits/slashes/hyphens/spaces). Example: `1435/3/29 هـ` → `1435/3/29`.
141
- * Default: `true` in `'search'`/`'aggressive'`, `false` in `'light'`.
142
- */
143
- removeHijriMarker?: boolean;
144
- };
145
- /**
146
- * Sanitizes Arabic text according to a preset or custom options.
147
- *
148
- * Presets:
149
- * - `'light'`: NFC, zero-width removal, collapse/trim spaces.
150
- * - `'search'`: removes diacritics and tatweel, normalizes Alif and ى→ي, removes Hijri marker.
151
- * - `'aggressive'`: ideal for FTS; keeps letters+spaces only and strips common noise.
152
- *
153
- * Custom options:
154
- * - Passing an options object overlays the selected `base` preset (default `'light'`).
155
- * - Use `base: 'none'` to apply **only** the rules you specify (e.g., tatweel only).
156
- *
157
- * Examples:
158
- * ```ts
159
- * sanitizeArabic('أبـــتِـــكَةُ', { base: 'none', stripTatweel: true }); // 'أبتِكَةُ'
160
- * sanitizeArabic('1435/3/29 هـ', 'aggressive'); // '1435 3 29'
161
- * sanitizeArabic('اَلسَّلَامُ عَلَيْكُمْ', 'search'); // 'السلام عليكم'
162
- * ```
163
- */
164
- declare const sanitizeArabic: (input: string, optionsOrPreset?: SanitizePreset | SanitizeOptions) => string;
165
91
  /**
166
92
  * Escape a string so it can be safely embedded into a RegExp source.
167
93
  *
@@ -730,4 +656,4 @@ declare const normalizeTransliteratedEnglish: (text: string) => string;
730
656
  */
731
657
  declare const extractInitials: (fullName: string) => string;
732
658
 
733
- export { type MakeRegexOptions, type SanitizeBase, type SanitizeOptions, type SanitizePreset, addSpaceBeforeAndAfterPunctuation, addSpaceBetweenArabicTextAndNumbers, applySmartQuotes, arabicNumeralToNumber, cleanExtremeArabicUnderscores, cleanLiteralNewLines, cleanMultilines, cleanSpacesBeforePeriod, cleanSymbolsAndPartReferences, cleanTrailingPageNumbers, condenseAsterisks, condenseColons, condenseDashes, condenseEllipsis, condensePeriods, condenseUnderscores, convertUrduSymbolsToArabic, doubleToSingleBrackets, ensureSpaceBeforeBrackets, ensureSpaceBeforeQuotes, escapeRegex, extractInitials, fixBracketTypos, fixCurlyBraces, fixMismatchedQuotationMarks, fixTrailingWow, formatStringBySentence, getArabicScore, hasWordInSingleLine, insertLineBreaksAfterPunctuation, isAllUppercase, isBalanced, isJsonStructureValid, isOnlyPunctuation, makeDiacriticInsensitive, makeDiacriticInsensitiveRegex, normalize, normalizeArabicPrefixesToAl, normalizeDoubleApostrophes, normalizeJsonSyntax, normalizeSlashInReferences, normalizeSpaces, normalizeTransliteratedEnglish, parsePageRanges, reduceMultilineBreaksToDouble, reduceMultilineBreaksToSingle, removeArabicPrefixes, removeDeathYear, removeMarkdownFormatting, removeNonIndexSignatures, removeNumbersAndDashes, removeRedundantPunctuation, removeSingleDigitReferences, removeSingularCodes, removeSolitaryArabicLetters, removeSpaceInsideBrackets, removeUrls, replaceDoubleBracketsWithArrows, replaceEnglishPunctuationWithArabic, replaceLineBreaksWithSpaces, replaceSalutationsWithSymbol, sanitizeArabic, splitByQuotes, stripAllDigits, stripBoldStyling, stripItalicsStyling, stripStyling, toTitleCase, trimSpaceInsideQuotes, truncate, truncateMiddle, unescapeSpaces };
659
+ export { type MakeRegexOptions, addSpaceBeforeAndAfterPunctuation, addSpaceBetweenArabicTextAndNumbers, applySmartQuotes, arabicNumeralToNumber, cleanExtremeArabicUnderscores, cleanLiteralNewLines, cleanMultilines, cleanSpacesBeforePeriod, cleanSymbolsAndPartReferences, cleanTrailingPageNumbers, condenseAsterisks, condenseColons, condenseDashes, condenseEllipsis, condensePeriods, condenseUnderscores, convertUrduSymbolsToArabic, doubleToSingleBrackets, ensureSpaceBeforeBrackets, ensureSpaceBeforeQuotes, escapeRegex, extractInitials, fixBracketTypos, fixCurlyBraces, fixMismatchedQuotationMarks, fixTrailingWow, formatStringBySentence, getArabicScore, hasWordInSingleLine, insertLineBreaksAfterPunctuation, isAllUppercase, isBalanced, isJsonStructureValid, isOnlyPunctuation, makeDiacriticInsensitive, makeDiacriticInsensitiveRegex, normalize, normalizeArabicPrefixesToAl, normalizeDoubleApostrophes, normalizeJsonSyntax, normalizeSlashInReferences, normalizeSpaces, normalizeTransliteratedEnglish, parsePageRanges, reduceMultilineBreaksToDouble, reduceMultilineBreaksToSingle, removeArabicPrefixes, removeDeathYear, removeMarkdownFormatting, removeNonIndexSignatures, removeNumbersAndDashes, removeRedundantPunctuation, removeSingleDigitReferences, removeSingularCodes, removeSolitaryArabicLetters, removeSpaceInsideBrackets, removeUrls, replaceDoubleBracketsWithArrows, replaceEnglishPunctuationWithArabic, replaceLineBreaksWithSpaces, replaceSalutationsWithSymbol, splitByQuotes, stripAllDigits, stripBoldStyling, stripItalicsStyling, stripStyling, toTitleCase, trimSpaceInsideQuotes, truncate, truncateMiddle, unescapeSpaces };
package/dist/index.js CHANGED
@@ -1,10 +1,10 @@
1
- var ce=e=>parseInt(e.replace(/[\u0660-\u0669]/g,r=>(r.charCodeAt(0)-1632).toString()),10),ue=e=>e.replace(/(?<!\d ?ه|اه)ـ(?=\r?$)|^ـ(?!اهـ)/gm,""),pe=e=>e.replace(/ھ/g,"\u0647").replace(/ی/g,"\u064A"),ge=e=>{if(!e)return 0;let r=/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/g,t=/[0-9\u0660-\u0669\u06F0-\u06F9]/g,n=/[^\s0-9\u0660-\u0669\u06F0-\u06F9]/g,s=e.replace(t,""),l=s.match(r)||[],o=s.match(n)||[];return o.length===0?0:l.length/o.length},fe=e=>e.replace(/ و /g," \u0648"),De=e=>e.replace(/([\u0600-\u06FF]+)(\d+)/g,"$1 $2"),me=e=>e.replace(/(?<![0-9] ?)-|(?<=[\u0600-\u06FF])\s?\d\s?(?=[\u0600-\u06FF])/g," ").replace(/(?<=[\u0600-\u06FF]\s)(\d+\s)+\d+(?=(\s[\u0600-\u06FF]|$))/g," "),be=e=>e.replace(/[[({][\u0621-\u064A\u0660-\u0669][\])}]/g,""),he=e=>e.replace(/(^| )[\u0621-\u064A]( |$)/g," "),Ae=e=>e.replace(/\?|؟\./g,"\u061F").replace(/(;|؛)\s*(\1\s*)*/g,"\u061B").replace(/,|-،/g,"\u060C");var y=/\s+/g,h=/\u0640/g,M=/[\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06ED]/g,z=/[أإآٱ]/g,F=/\u0649/g,W=/\u0629/g,L=/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/g,k=/[A-Za-z]+[0-9]*|[0-9]+|[¬§`=]|[/]{2,}|[&]|[ﷺ]/g,E=/[^\u0621-\u063A\u0641-\u064A\u0671\u067E\u0686\u06A4-\u06AF\u06CC\u06D2\u06D3]/g,B=/[^\u0621-\u063A\u0641-\u064A\u0671\u067E\u0686\u06A4-\u06AF\u06CC\u06D2\u06D3\s]/g,R=e=>e===32,_=e=>e>=48&&e<=57||e>=1632&&e<=1641,O=e=>e.replace(h,(r,t,n)=>{let s=t-1;for(;s>=0&&R(n.charCodeAt(s));)s--;if(s>=0){let l=n.charCodeAt(s);if(_(l)||l===1607)return"\u0640"}return""}),v=e=>e.replace(/([0-9\u0660-\u0669][0-9\u0660-\u0669/\-\s]*?)\s*ه(?:ـ)?(?=(?:\s|$|[^\p{L}\p{N}]))/gu,"$1"),P=(e,r)=>r&&e.normalize?e.normalize("NFC"):e,w=(e,r,t)=>r?e.replace(L,t?" ":""):e,N=(e,r,t)=>(r&&(e=e.replace(M,"")),t==="safe"?O(e):t==="all"?e.replace(h,""):e),H=(e,r,t,n)=>(r&&(e=e.replace(z,"\u0627")),t&&(e=e.replace(F,"\u064A")),n&&(e=e.replace(W,"\u0647")),e),I=(e,r)=>r?e.replace(k," "):e,j=(e,r,t)=>r?e.replace(B," "):t?e.replace(E,""):e,q=(e,r,t)=>(r&&(e=e.replace(y," ")),t&&(e=e.trim()),e),i=(e,r)=>r===void 0?e:!!r,Z=(e,r)=>r===void 0?e:r===!0?"safe":r===!1?!1:r,b={light:{nfc:!0,stripZeroWidth:!0,zeroWidthToSpace:!1,stripDiacritics:!1,stripTatweel:!1,normalizeAlif:!1,replaceAlifMaqsurah:!1,replaceTaMarbutahWithHa:!1,stripLatinAndSymbols:!1,keepOnlyArabicLetters:!1,lettersAndSpacesOnly:!1,collapseWhitespace:!0,trim:!0,removeHijriMarker:!1},search:{nfc:!0,stripZeroWidth:!0,zeroWidthToSpace:!1,stripDiacritics:!0,stripTatweel:"all",normalizeAlif:!0,replaceAlifMaqsurah:!0,replaceTaMarbutahWithHa:!1,stripLatinAndSymbols:!1,keepOnlyArabicLetters:!1,lettersAndSpacesOnly:!1,collapseWhitespace:!0,trim:!0,removeHijriMarker:!0},aggressive:{nfc:!0,stripZeroWidth:!0,zeroWidthToSpace:!1,stripDiacritics:!0,stripTatweel:"all",normalizeAlif:!0,replaceAlifMaqsurah:!0,replaceTaMarbutahWithHa:!0,stripLatinAndSymbols:!0,keepOnlyArabicLetters:!1,lettersAndSpacesOnly:!0,collapseWhitespace:!0,trim:!0,removeHijriMarker:!0}},U={nfc:!1,stripZeroWidth:!1,zeroWidthToSpace:!1,stripDiacritics:!1,stripTatweel:!1,normalizeAlif:!1,replaceAlifMaqsurah:!1,replaceTaMarbutahWithHa:!1,stripLatinAndSymbols:!1,keepOnlyArabicLetters:!1,lettersAndSpacesOnly:!1,collapseWhitespace:!1,trim:!1,removeHijriMarker:!1},Se=(e,r="search")=>{if(!e)return"";let t,n=null;if(typeof r=="string")t=b[r];else{let m=r.base??"light";t=m==="none"?U:b[m],n=r}let s=i(t.nfc,n?.nfc),l=i(t.stripZeroWidth,n?.stripZeroWidth),o=i(t.zeroWidthToSpace,n?.zeroWidthToSpace),u=i(t.stripDiacritics,n?.stripDiacritics),p=i(t.normalizeAlif,n?.normalizeAlif),g=i(t.replaceAlifMaqsurah,n?.replaceAlifMaqsurah),c=i(t.replaceTaMarbutahWithHa,n?.replaceTaMarbutahWithHa),d=i(t.stripLatinAndSymbols,n?.stripLatinAndSymbols),D=i(t.lettersAndSpacesOnly,n?.lettersAndSpacesOnly),S=i(t.keepOnlyArabicLetters,n?.keepOnlyArabicLetters),x=i(t.collapseWhitespace,n?.collapseWhitespace),C=i(t.trim,n?.trim),$=i(t.removeHijriMarker,n?.removeHijriMarker),T=Z(t.stripTatweel,n?.stripTatweel),a=e;return a=P(a,s),a=w(a,l,o),$&&(a=v(a)),a=N(a,u,T),a=H(a,p,g,c),D||(a=I(a,d)),a=j(a,D,S),a=q(a,x,C),a},X="[\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]",Q="\\u0640",f=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),xe=(e,r={})=>{let{equivalences:t={alif:!0,taMarbutahHa:!0,alifMaqsurahYa:!0},allowTatweel:n=!0,ignoreDiacritics:s=!0,flexWhitespace:l=!0,flags:o="u"}=r;if(e.length>5e3)throw new Error("makeDiacriticInsensitiveRegex: needle too long");let u=c=>{switch(c){case"\u0627":case"\u0623":case"\u0625":case"\u0622":return t.alif?"[\u0627\u0623\u0625\u0622]":"\u0627";case"\u0629":case"\u0647":return t.taMarbutahHa?"[\u0647\u0629]":f(c);case"\u0649":case"\u064A":return t.alifMaqsurahYa?"[\u0649\u064A]":f(c);default:return f(c)}},p=`${s?`${X}*`:""}${n?`${Q}*`:""}`,g="";for(let c of Array.from(e))/\s/.test(c)?g+=l?"\\s+":"\\s*":g+=`${u(c)}${p}`;return new RegExp(g,o)};var $e=e=>{let r=/([.?!؟])/g;return e.replace(r,`$1
1
+ var E=e=>parseInt(e.replace(/[\u0660-\u0669]/g,r=>(r.charCodeAt(0)-1632).toString()),10),P=e=>e.replace(/(?<!\d ?ه|اه)ـ(?=\r?$)|^ـ(?!اهـ)/gm,""),v=e=>e.replace(/ھ/g,"\u0647").replace(/ی/g,"\u064A"),z=e=>{if(!e)return 0;let r=/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/g,t=/[0-9\u0660-\u0669\u06F0-\u06F9]/g,s=/[^\s0-9\u0660-\u0669\u06F0-\u06F9]/g,n=e.replace(t,""),u=n.match(r)||[],a=n.match(s)||[];return a.length===0?0:u.length/a.length},L=e=>e.replace(/ و /g," \u0648"),T=e=>e.replace(/([\u0600-\u06FF]+)(\d+)/g,"$1 $2"),R=e=>e.replace(/(?<![0-9] ?)-|(?<=[\u0600-\u06FF])\s?\d\s?(?=[\u0600-\u06FF])/g," ").replace(/(?<=[\u0600-\u06FF]\s)(\d+\s)+\d+(?=(\s[\u0600-\u06FF]|$))/g," "),w=e=>e.replace(/[[({][\u0621-\u064A\u0660-\u0669][\])}]/g,""),_=e=>e.replace(/(^| )[\u0621-\u064A]( |$)/g," "),I=e=>e.replace(/\?|؟\./g,"\u061F").replace(/(;|؛)\s*(\1\s*)*/g,"\u061B").replace(/,|-،/g,"\u060C");var D="[\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]",m="\\u0640",c=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),W=(e,r={})=>{let{equivalences:t={alif:!0,taMarbutahHa:!0,alifMaqsurahYa:!0},allowTatweel:s=!0,ignoreDiacritics:n=!0,flexWhitespace:u=!0,flags:a="u"}=r;if(e.length>5e3)throw new Error("makeDiacriticInsensitiveRegex: needle too long");let i=o=>{switch(o){case"\u0627":case"\u0623":case"\u0625":case"\u0622":return t.alif?"[\u0627\u0623\u0625\u0622]":"\u0627";case"\u0629":case"\u0647":return t.taMarbutahHa?"[\u0647\u0629]":c(o);case"\u0649":case"\u064A":return t.alifMaqsurahYa?"[\u0649\u064A]":c(o);default:return c(o)}},l=`${n?`${D}*`:""}${s?`${m}*`:""}`,p="";for(let o of Array.from(e))/\s/.test(o)?p+=u?"\\s+":"\\s*":p+=`${i(o)}${l}`;return new RegExp(p,a)};var U=e=>{let r=/([.?!؟])/g;return e.replace(r,`$1
2
2
  `).replace(/\n\s+/g,`
3
- `).trim()},Te=e=>e.replace(/( ?)([.!?,،؟;؛])((?![ '”“)"\]\n])|(?=\s{2,}))/g,"$1$2 ").replace(/\s([.!?,،؟;؛])\s*([ '”“)"\]\n])/g,"$1$2").replace(/([^\s\w\d'”“)"\]]+)\s+([.!?,،؟;؛])|([.!?,،؟;؛])\s+$/g,"$1$2$3").replace(/(?<=\D)( ?: ?)(?!(\d+:)|(:\d+))|(?<=\d) ?: ?(?=\D)|(?<=\D) ?: ?(?=\d)/g,": "),ye=e=>e.replace(/[“”]/g,'"').replace(/"([^"]*)"/g,"\u201C$1\u201D").replace(/^”/g,"\u201C"),Me=e=>e.replace(/\\n|\r/g,`
4
- `),ze=e=>e.replace(/^ +| +$/gm,""),Fe=e=>/^\s*\S+\s*$/gm.test(e),We=e=>/^[\u0020-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u007e0-9٠-٩]+$/.test(e),Le=e=>e.replace(/\s+([.؟!,،؛:?])/g,"$1"),ke=e=>e.replace(/(\*\s*)+/g,"*"),Ee=e=>e.replace(/[.-]?:[.-]?/g,":"),Be=e=>e.replace(/-{2,}/g,"-"),Re=e=>e.replace(/\.{2,}/g,"\u2026"),_e=e=>e.replace(/(\n\s*){3,}/g,`
3
+ `).trim()},q=e=>e.replace(/( ?)([.!?,،؟;؛])((?![ '”“)"\]\n])|(?=\s{2,}))/g,"$1$2 ").replace(/\s([.!?,،؟;؛])\s*([ '”“)"\]\n])/g,"$1$2").replace(/([^\s\w\d'”“)"\]]+)\s+([.!?,،؟;؛])|([.!?,،؟;؛])\s+$/g,"$1$2$3").replace(/(?<=\D)( ?: ?)(?!(\d+:)|(:\d+))|(?<=\d) ?: ?(?=\D)|(?<=\D) ?: ?(?=\d)/g,": "),j=e=>e.replace(/[“”]/g,'"').replace(/"([^"]*)"/g,"\u201C$1\u201D").replace(/^”/g,"\u201C"),Q=e=>e.replace(/\\n|\r/g,`
4
+ `),H=e=>e.replace(/^ +| +$/gm,""),J=e=>/^\s*\S+\s*$/gm.test(e),Y=e=>/^[\u0020-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u007e0-9٠-٩]+$/.test(e),Z=e=>e.replace(/\s+([.؟!,،؛:?])/g,"$1"),K=e=>e.replace(/(\*\s*)+/g,"*"),V=e=>e.replace(/[.-]?:[.-]?/g,":"),G=e=>e.replace(/-{2,}/g,"-"),X=e=>e.replace(/\.{2,}/g,"\u2026"),ee=e=>e.replace(/(\n\s*){3,}/g,`
5
5
 
6
- `),Oe=e=>e.replace(/(\n\s*){2,}/g,`
7
- `),ve=e=>e.replace(/\. +\./g,"."),Pe=e=>e.replace(/ـ{2,}/g,"\u0640").replace(/_+/g,"_"),we=e=>e.replace(/(\(|\)){2,}|(\[|\]){2,}/g,"$1$2"),Ne=e=>e.replace(/(\S) *(\([^)]*\))/g,"$1 $2"),He=e=>e.replace(/(\S) *(«[^»]*»)/g,"$1 $2"),Ie=e=>e.replace(/\(«|\( \(/g,"\xAB").replace(/»\)|\) \)/g,"\xBB").replace(/\)([0-9\u0660-\u0669]+)\)/g,"($1)").replace(/\)([0-9\u0660-\u0669]+)\(/g,"($1)"),je=e=>{let r=e;return r=r.replace(/\(([^(){}]+)\}/g,"{$1}"),r.replace(/\{([^(){}]+)\)/g,"{$1}")},qe=e=>e.replace(/«([^»)]+)\)/g,"\xAB$1\xBB").replace(/\(([^()]+)»/g,"\xAB$1\xBB").replace(/«([^»]+)(?=\s*$|$)/g,"\xAB$1\xBB"),Ze=e=>{let r=/^\((?:\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\)\s/,t=[],n=e.split(`
8
- `),s="";return n.forEach(l=>{let o=l.trim(),u=r.test(o),p=/^\(\d+\/\d+\)/.test(o);if(u&&!p)s&&(t.push(s.trim()),s=""),t.push(o);else{s+=`${o} `;let g=s.trim().slice(-1);/[.!؟]/.test(g)&&(t.push(s.trim()),s="")}}),s&&t.push(s.trim()),t.join(`
9
- `)},Ue=e=>{let r=e.replace(/[^\p{L}]/gu,"");return r.length===0?!1:r===r.toUpperCase()},Xe=e=>e.replace(/(\d+)\s?\/\s?(\d+)/g,"$1/$2"),A=e=>e.replace(/[ \t]+/g," "),Qe=e=>e.replace(/([؟!])[.،]/g,"$1"),Ye=e=>e.replace(/([[(])\s*(.*?)\s*([\])])/g,"$1$2$3"),Je=e=>e.replace(/\(\(\s?/g,"\xAB").replace(/\s?\)\)/g,"\xBB"),Y=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").trim(),J=e=>{let r={"\u{1D44E}":"I","\u{1D468}":"g","\u{1D63C}":"!","\u{1D44F}":"J","\u{1D469}":"h","\u{1D63D}":"?","\u{1D450}":"K","\u{1D46A}":"i","\u{1D451}":"L","\u{1D46B}":"j","\u{1D63F}":",","\u{1D452}":"M","\u{1D46C}":"k","\u{1D640}":".","\u{1D453}":"N","\u{1D46D}":"l","\u{1D454}":"O","\u{1D46E}":"m","\u{1D46F}":"n","\u{1D456}":"Q","\u{1D470}":"o","\u{1D457}":"R","\u{1D471}":"p","\u{1D458}":"S","\u{1D472}":"q","\u{1D459}":"T","\u{1D473}":"r","\u{1D647}":"-","\u{1D45A}":"U","\u{1D474}":"s","\u{1D45B}":"V","\u{1D475}":"t","\u{1D45C}":"W","\u{1D476}":"u","\u{1D45D}":"X","\u{1D477}":"v","\u{1D45E}":"Y","\u{1D478}":"w","\u{1D45F}":"Z","\u{1D479}":"x","\u{1D446}":"A","\u{1D47A}":"y","\u{1D447}":"B","\u{1D47B}":"z","\u{1D462}":"a","\u{1D448}":"C","\u{1D463}":"b","\u{1D449}":"D","\u{1D464}":"c","\u{1D44A}":"E","\u{1D465}":"d","\u{1D44B}":"F","\u{1D466}":"e","\u{1D44C}":"G","\u{1D467}":"f","\u{1D44D}":"H","\u{1D455}":"P"};return e.replace(/[\uD835\uDC62-\uD835\uDC7B\uD835\uDC46-\uD835\uDC5F\u{1D63C}-\u{1D647}]/gu,t=>r[t]||t)},Ve=e=>J(Y(e)),Ke=e=>e.toLowerCase().split(" ").map(r=>{if(r.length===0)return r;let t=r.match(/\p{L}/u);if(!t||t.index===void 0)return r;let n=t.index;return r.slice(0,n)+r.charAt(n).toUpperCase()+r.slice(n+1)}).join(" "),Ge=e=>e.replace(/([“”"]|«) *(.*?) *([“”"]|»)/g,"$1$2$3");var rr=e=>{let r=e.replace(/(\b\d+\b)(?=:)/g,'"$1"');return r=r.replace(/:\s*'([^']+)'/g,': "$1"'),r=r.replace(/:\s*"([^"]+)"/g,': "$1"'),JSON.stringify(JSON.parse(r))},tr=e=>/^{(\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\3\s*,)*(?:\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\5\s*)}$/.test(e.trim()),nr=e=>{let r=/(?:[^\s"]+|"(.*?)")+/g;return(e.match(r)||[]).map(t=>t.startsWith('"')?t.slice(1,-1):t)},V=e=>{let r=0;for(let t of e)t==='"'&&r++;return r%2===0},K={"(":")","[":"]","{":"}"},G=new Set(["(","[","{"]),ee=new Set([")","]","}"]),re=e=>{let r=[];for(let t of e)if(G.has(t))r.push(t);else if(ee.has(t)){let n=r.pop();if(!n||K[n]!==t)return!1}return r.length===0},sr=e=>V(e)&&re(e),ar=e=>{if(e.includes("-")){let[r,t]=e.split("-").map(Number);if(r>t)throw new Error("Start page cannot be greater than end page");return Array.from({length:t-r+1},(n,s)=>r+s)}else return e.split(",").map(Number)};var lr=e=>e.replace(/ *\(?:\d+(?:\/\d+){0,2}\)? *| *\[\d+(?:\/\d+)?\] *| *«\d+» *|\d+\/\d+(?:\/\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\]…ـ¬.\\/*()"]/g," "),cr=e=>e.replace(/-\[\d+\]-/g,""),ur=e=>e.replace(/\s+/g," "),pr=e=>e.replace(/[0-9]/g,""),gr=e=>e.replace(/\[(d)\.\s*\d{1,4}[hH]\]\s*|\((d)\.\s*\d{1,4}[hH]\)\s*/g,""),fr=e=>e.replace(/[\d-]/g,""),Dr=e=>e.replace(/\(\d{1}\)|\[\d{1}\]|«\d»/g,""),mr=e=>e.replace(/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,""),br=e=>e.replace(/\*\*([^*]+)\*\*/g,"$1").replace(/__([^_]+)__/g,"$1").replace(/\*([^*]+)\*/g,"$1").replace(/_([^_]+)_/g,"$1").replace(/~~([^~]+)~~/g,"$1").replace(/^\s*>\s?/gm,"").replace(/!\[[^\]]*]\([^)]*\)/g,"").replace(/\[([^\]]+)]\([^)]*\)/g,"$1").replace(/^#+\s*/gm,"").replace(/^\s*[-*+]\s+/gm,"").replace(/^\s*\d+\.\s+/gm,"").replace(/`/gm,""),hr=(e,r=150)=>e.length>r?`${e.substring(0,r-1)}\u2026`:e,Ar=(e,r=50,t)=>{if(e.length<=r)return e;let n=Math.max(3,Math.floor(r/3)),s=t??n,o=r-1-s;if(o<1)return`${e.substring(0,r-1)}\u2026`;let u=e.substring(0,o),p=e.substring(e.length-s);return`${u}\u2026${p}`},dr=e=>e.replace(/\\ /g," ").trim(),te="[\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652]",ne=[["\u0627","\u0622","\u0623","\u0625"],["\u0629","\u0647"],["\u0649","\u064A"]],se=e=>{for(let r of ne)if(r.includes(e))return`[${r.map(t=>f(t)).join("")}]`;return f(e)},ae=e=>e.normalize("NFC").replace(/[\u200C\u200D]/g,"").replace(/\s+/g," ").trim(),Sr=e=>{let r=`${te}*`,t=ae(e);return Array.from(t).map(n=>se(n)+r).join("")};var $r=e=>e.replace(/(\b|\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g,"$1al-").replace(/(\b|\W)(Ash-S|ash-S)/g,"$1al-S").replace(/al- (.+?)\b/g,"al-$1"),Tr=e=>e.replace(/ʿʿ/g,"\u02BF").replace(/ʾʾ/g,"\u02BE"),yr=e=>e.replace(/\(peace be upon him\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\)*/gi,"$1 \uFDFA").replace(/,\s*ﷺ\s*,/g," \uFDFA"),oe=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").replace(/`|ʾ|ʿ|-/g,""),ie=e=>A(e.replace(/(\bal-|\bli-|\bbi-|\bfī|\bwa[-\s]+|\bl-|\bliʿl|\Bʿalá|\Bʿan|\bb\.)/gi,"")),le=e=>oe(ie(e)),Mr=e=>le(e).trim().split(/[ -]/).slice(0,2).map(t=>t.charAt(0).toUpperCase()).join("");export{Te as addSpaceBeforeAndAfterPunctuation,De as addSpaceBetweenArabicTextAndNumbers,ye as applySmartQuotes,ce as arabicNumeralToNumber,ue as cleanExtremeArabicUnderscores,Me as cleanLiteralNewLines,ze as cleanMultilines,Le as cleanSpacesBeforePeriod,lr as cleanSymbolsAndPartReferences,cr as cleanTrailingPageNumbers,ke as condenseAsterisks,Ee as condenseColons,Be as condenseDashes,Re as condenseEllipsis,ve as condensePeriods,Pe as condenseUnderscores,pe as convertUrduSymbolsToArabic,we as doubleToSingleBrackets,Ne as ensureSpaceBeforeBrackets,He as ensureSpaceBeforeQuotes,f as escapeRegex,Mr as extractInitials,Ie as fixBracketTypos,je as fixCurlyBraces,qe as fixMismatchedQuotationMarks,fe as fixTrailingWow,Ze as formatStringBySentence,ge as getArabicScore,Fe as hasWordInSingleLine,$e as insertLineBreaksAfterPunctuation,Ue as isAllUppercase,sr as isBalanced,tr as isJsonStructureValid,We as isOnlyPunctuation,Sr as makeDiacriticInsensitive,xe as makeDiacriticInsensitiveRegex,oe as normalize,$r as normalizeArabicPrefixesToAl,Tr as normalizeDoubleApostrophes,rr as normalizeJsonSyntax,Xe as normalizeSlashInReferences,A as normalizeSpaces,le as normalizeTransliteratedEnglish,ar as parsePageRanges,_e as reduceMultilineBreaksToDouble,Oe as reduceMultilineBreaksToSingle,ie as removeArabicPrefixes,gr as removeDeathYear,br as removeMarkdownFormatting,me as removeNonIndexSignatures,fr as removeNumbersAndDashes,Qe as removeRedundantPunctuation,Dr as removeSingleDigitReferences,be as removeSingularCodes,he as removeSolitaryArabicLetters,Ye as removeSpaceInsideBrackets,mr as removeUrls,Je as replaceDoubleBracketsWithArrows,Ae as replaceEnglishPunctuationWithArabic,ur as replaceLineBreaksWithSpaces,yr as replaceSalutationsWithSymbol,Se as sanitizeArabic,nr as splitByQuotes,pr as stripAllDigits,Y as stripBoldStyling,J as stripItalicsStyling,Ve as stripStyling,Ke as toTitleCase,Ge as trimSpaceInsideQuotes,hr as truncate,Ar as truncateMiddle,dr as unescapeSpaces};
6
+ `),re=e=>e.replace(/(\n\s*){2,}/g,`
7
+ `),te=e=>e.replace(/\. +\./g,"."),ne=e=>e.replace(/ـ{2,}/g,"\u0640").replace(/_+/g,"_"),se=e=>e.replace(/(\(|\)){2,}|(\[|\]){2,}/g,"$1$2"),ae=e=>e.replace(/(\S) *(\([^)]*\))/g,"$1 $2"),oe=e=>e.replace(/(\S) *(«[^»]*»)/g,"$1 $2"),ue=e=>e.replace(/\(«|\( \(/g,"\xAB").replace(/»\)|\) \)/g,"\xBB").replace(/\)([0-9\u0660-\u0669]+)\)/g,"($1)").replace(/\)([0-9\u0660-\u0669]+)\(/g,"($1)"),ce=e=>{let r=e;return r=r.replace(/\(([^(){}]+)\}/g,"{$1}"),r.replace(/\{([^(){}]+)\)/g,"{$1}")},ie=e=>e.replace(/«([^»)]+)\)/g,"\xAB$1\xBB").replace(/\(([^()]+)»/g,"\xAB$1\xBB").replace(/«([^»]+)(?=\s*$|$)/g,"\xAB$1\xBB"),le=e=>{let r=/^\((?:\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\)\s/,t=[],s=e.split(`
8
+ `),n="";return s.forEach(u=>{let a=u.trim(),i=r.test(a),l=/^\(\d+\/\d+\)/.test(a);if(i&&!l)n&&(t.push(n.trim()),n=""),t.push(a);else{n+=`${a} `;let p=n.trim().slice(-1);/[.!؟]/.test(p)&&(t.push(n.trim()),n="")}}),n&&t.push(n.trim()),t.join(`
9
+ `)},pe=e=>{let r=e.replace(/[^\p{L}]/gu,"");return r.length===0?!1:r===r.toUpperCase()},ge=e=>e.replace(/(\d+)\s?\/\s?(\d+)/g,"$1/$2"),g=e=>e.replace(/[ \t]+/g," "),De=e=>e.replace(/([؟!])[.،]/g,"$1"),me=e=>e.replace(/([[(])\s*(.*?)\s*([\])])/g,"$1$2$3"),xe=e=>e.replace(/\(\(\s?/g,"\xAB").replace(/\s?\)\)/g,"\xBB"),x=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").trim(),d=e=>{let r={"\u{1D44E}":"I","\u{1D468}":"g","\u{1D63C}":"!","\u{1D44F}":"J","\u{1D469}":"h","\u{1D63D}":"?","\u{1D450}":"K","\u{1D46A}":"i","\u{1D451}":"L","\u{1D46B}":"j","\u{1D63F}":",","\u{1D452}":"M","\u{1D46C}":"k","\u{1D640}":".","\u{1D453}":"N","\u{1D46D}":"l","\u{1D454}":"O","\u{1D46E}":"m","\u{1D46F}":"n","\u{1D456}":"Q","\u{1D470}":"o","\u{1D457}":"R","\u{1D471}":"p","\u{1D458}":"S","\u{1D472}":"q","\u{1D459}":"T","\u{1D473}":"r","\u{1D647}":"-","\u{1D45A}":"U","\u{1D474}":"s","\u{1D45B}":"V","\u{1D475}":"t","\u{1D45C}":"W","\u{1D476}":"u","\u{1D45D}":"X","\u{1D477}":"v","\u{1D45E}":"Y","\u{1D478}":"w","\u{1D45F}":"Z","\u{1D479}":"x","\u{1D446}":"A","\u{1D47A}":"y","\u{1D447}":"B","\u{1D47B}":"z","\u{1D462}":"a","\u{1D448}":"C","\u{1D463}":"b","\u{1D449}":"D","\u{1D464}":"c","\u{1D44A}":"E","\u{1D465}":"d","\u{1D44B}":"F","\u{1D466}":"e","\u{1D44C}":"G","\u{1D467}":"f","\u{1D44D}":"H","\u{1D455}":"P"};return e.replace(/[\uD835\uDC62-\uD835\uDC7B\uD835\uDC46-\uD835\uDC5F\u{1D63C}-\u{1D647}]/gu,t=>r[t]||t)},de=e=>d(x(e)),Ce=e=>e.toLowerCase().split(" ").map(r=>{if(r.length===0)return r;let t=r.match(/\p{L}/u);if(!t||t.index===void 0)return r;let s=t.index;return r.slice(0,s)+r.charAt(s).toUpperCase()+r.slice(s+1)}).join(" "),fe=e=>e.replace(/([“”"]|«) *(.*?) *([“”"]|»)/g,"$1$2$3");var $e=e=>{let r=e.replace(/(\b\d+\b)(?=:)/g,'"$1"');return r=r.replace(/:\s*'([^']+)'/g,': "$1"'),r=r.replace(/:\s*"([^"]+)"/g,': "$1"'),JSON.stringify(JSON.parse(r))},be=e=>/^{(\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\3\s*,)*(?:\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\5\s*)}$/.test(e.trim()),Ae=e=>{let r=/(?:[^\s"]+|"(.*?)")+/g;return(e.match(r)||[]).map(t=>t.startsWith('"')?t.slice(1,-1):t)},C=e=>{let r=0;for(let t of e)t==='"'&&r++;return r%2===0},f={"(":")","[":"]","{":"}"},h=new Set(["(","[","{"]),$=new Set([")","]","}"]),b=e=>{let r=[];for(let t of e)if(h.has(t))r.push(t);else if($.has(t)){let s=r.pop();if(!s||f[s]!==t)return!1}return r.length===0},Se=e=>C(e)&&b(e),Fe=e=>{if(e.includes("-")){let[r,t]=e.split("-").map(Number);if(r>t)throw new Error("Start page cannot be greater than end page");return Array.from({length:t-r+1},(s,n)=>r+n)}else return e.split(",").map(Number)};var ye=e=>e.replace(/ *\(?:\d+(?:\/\d+){0,2}\)? *| *\[\d+(?:\/\d+)?\] *| *«\d+» *|\d+\/\d+(?:\/\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\]…ـ¬.\\/*()"]/g," "),Me=e=>e.replace(/-\[\d+\]-/g,""),Ee=e=>e.replace(/\s+/g," "),Pe=e=>e.replace(/[0-9]/g,""),ve=e=>e.replace(/\[(d)\.\s*\d{1,4}[hH]\]\s*|\((d)\.\s*\d{1,4}[hH]\)\s*/g,""),ze=e=>e.replace(/[\d-]/g,""),Le=e=>e.replace(/\(\d{1}\)|\[\d{1}\]|«\d»/g,""),Te=e=>e.replace(/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,""),Re=e=>e.replace(/\*\*([^*]+)\*\*/g,"$1").replace(/__([^_]+)__/g,"$1").replace(/\*([^*]+)\*/g,"$1").replace(/_([^_]+)_/g,"$1").replace(/~~([^~]+)~~/g,"$1").replace(/^\s*>\s?/gm,"").replace(/!\[[^\]]*]\([^)]*\)/g,"").replace(/\[([^\]]+)]\([^)]*\)/g,"$1").replace(/^#+\s*/gm,"").replace(/^\s*[-*+]\s+/gm,"").replace(/^\s*\d+\.\s+/gm,"").replace(/`/gm,""),we=(e,r=150)=>e.length>r?`${e.substring(0,r-1)}\u2026`:e,_e=(e,r=50,t)=>{if(e.length<=r)return e;let s=Math.max(3,Math.floor(r/3)),n=t??s,a=r-1-n;if(a<1)return`${e.substring(0,r-1)}\u2026`;let i=e.substring(0,a),l=e.substring(e.length-n);return`${i}\u2026${l}`},Ie=e=>e.replace(/\\ /g," ").trim(),A="[\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652]",S=[["\u0627","\u0622","\u0623","\u0625"],["\u0629","\u0647"],["\u0649","\u064A"]],F=e=>{for(let r of S)if(r.includes(e))return`[${r.map(t=>c(t)).join("")}]`;return c(e)},B=e=>e.normalize("NFC").replace(/[\u200C\u200D]/g,"").replace(/\s+/g," ").trim(),Ne=e=>{let r=`${A}*`,t=B(e);return Array.from(t).map(s=>F(s)+r).join("")};var Ue=e=>e.replace(/(\b|\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g,"$1al-").replace(/(\b|\W)(Ash-S|ash-S)/g,"$1al-S").replace(/al- (.+?)\b/g,"al-$1"),qe=e=>e.replace(/ʿʿ/g,"\u02BF").replace(/ʾʾ/g,"\u02BE"),je=e=>e.replace(/\(peace be upon him\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\)*/gi,"$1 \uFDFA").replace(/,\s*ﷺ\s*,/g," \uFDFA"),k=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").replace(/`|ʾ|ʿ|-/g,""),y=e=>g(e.replace(/(\bal-|\bli-|\bbi-|\bfī|\bwa[-\s]+|\bl-|\bliʿl|\Bʿalá|\Bʿan|\bb\.)/gi,"")),M=e=>k(y(e)),Qe=e=>M(e).trim().split(/[ -]/).slice(0,2).map(t=>t.charAt(0).toUpperCase()).join("");export{q as addSpaceBeforeAndAfterPunctuation,T as addSpaceBetweenArabicTextAndNumbers,j as applySmartQuotes,E as arabicNumeralToNumber,P as cleanExtremeArabicUnderscores,Q as cleanLiteralNewLines,H as cleanMultilines,Z as cleanSpacesBeforePeriod,ye as cleanSymbolsAndPartReferences,Me as cleanTrailingPageNumbers,K as condenseAsterisks,V as condenseColons,G as condenseDashes,X as condenseEllipsis,te as condensePeriods,ne as condenseUnderscores,v as convertUrduSymbolsToArabic,se as doubleToSingleBrackets,ae as ensureSpaceBeforeBrackets,oe as ensureSpaceBeforeQuotes,c as escapeRegex,Qe as extractInitials,ue as fixBracketTypos,ce as fixCurlyBraces,ie as fixMismatchedQuotationMarks,L as fixTrailingWow,le as formatStringBySentence,z as getArabicScore,J as hasWordInSingleLine,U as insertLineBreaksAfterPunctuation,pe as isAllUppercase,Se as isBalanced,be as isJsonStructureValid,Y as isOnlyPunctuation,Ne as makeDiacriticInsensitive,W as makeDiacriticInsensitiveRegex,k as normalize,Ue as normalizeArabicPrefixesToAl,qe as normalizeDoubleApostrophes,$e as normalizeJsonSyntax,ge as normalizeSlashInReferences,g as normalizeSpaces,M as normalizeTransliteratedEnglish,Fe as parsePageRanges,ee as reduceMultilineBreaksToDouble,re as reduceMultilineBreaksToSingle,y as removeArabicPrefixes,ve as removeDeathYear,Re as removeMarkdownFormatting,R as removeNonIndexSignatures,ze as removeNumbersAndDashes,De as removeRedundantPunctuation,Le as removeSingleDigitReferences,w as removeSingularCodes,_ as removeSolitaryArabicLetters,me as removeSpaceInsideBrackets,Te as removeUrls,xe as replaceDoubleBracketsWithArrows,I as replaceEnglishPunctuationWithArabic,Ee as replaceLineBreaksWithSpaces,je as replaceSalutationsWithSymbol,Ae as splitByQuotes,Pe as stripAllDigits,x as stripBoldStyling,d as stripItalicsStyling,de as stripStyling,Ce as toTitleCase,fe as trimSpaceInsideQuotes,we as truncate,_e as truncateMiddle,Ie as unescapeSpaces};
10
10
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/arabic.ts","../src/cleaning.ts","../src/formatting.ts","../src/parsing.ts","../src/sanitization.ts","../src/transliteration.ts"],"sourcesContent":["/**\n * Converts Arabic-Indic numerals (٠-٩) to a JavaScript number.\n *\n * This function finds all Arabic-Indic digits in the input string and converts them\n * to their corresponding Arabic (Western) digits, then parses the result as an integer.\n *\n * Arabic-Indic digits mapping:\n * - ٠ → 0, ١ → 1, ٢ → 2, ٣ → 3, ٤ → 4\n * - ٥ → 5, ٦ → 6, ٧ → 7, ٨ → 8, ٩ → 9\n *\n * @param arabic - The string containing Arabic-Indic numerals to convert\n * @returns The parsed integer value of the converted numerals\n *\n * @example\n * ```typescript\n * arabicNumeralToNumber(\"١٢٣\"); // returns 123\n * arabicNumeralToNumber(\"٥٠\"); // returns 50\n * arabicNumeralToNumber(\"abc١٢٣xyz\"); // returns 123 (non-digits ignored)\n * arabicNumeralToNumber(\"\"); // returns NaN\n * ```\n *\n * Returns NaN if no valid Arabic-Indic digits are found\n */\nexport const arabicNumeralToNumber = (arabic: string) => {\n return parseInt(\n arabic.replace(/[\\u0660-\\u0669]/g, (c) => (c.charCodeAt(0) - 0x0660).toString()),\n 10,\n );\n};\n\n/**\n * Removes extreme Arabic underscores (ـ) that appear at the beginning or end of a line or in text.\n * Does not affect Hijri dates (e.g., 1424هـ) or specific Arabic terms.\n * Example: \"ـThis is a textـ\" will be changed to \"This is a text\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with extreme underscores removed.\n */\nexport const cleanExtremeArabicUnderscores = (text: string) => {\n return text.replace(/(?<!\\d ?ه|اه)ـ(?=\\r?$)|^ـ(?!اهـ)/gm, '');\n};\n\n/**\n * Converts Urdu symbols to their Arabic equivalents.\n * Example: 'ھذا' will be changed to 'هذا', 'ی' to 'ي'.\n * @param {string} text - The input text containing Urdu symbols.\n * @returns {string} - The modified text with Urdu symbols converted to Arabic symbols.\n */\nexport const convertUrduSymbolsToArabic = (text: string) => {\n return text.replace(/ھ/g, 'ه').replace(/ی/g, 'ي');\n};\n\n/**\n * Calculates the proportion of Arabic characters in text relative to total non-whitespace, non-digit characters.\n * Digits (ASCII and Arabic-Indic variants) are excluded from both numerator and denominator.\n * @param text - The input text to analyze\n * @returns A decimal between 0-1 representing the Arabic character ratio (0 = no Arabic, 1 = all Arabic)\n */\nexport const getArabicScore = (text: string) => {\n if (!text) {\n return 0;\n }\n // Arabic letters (letters/ranges only)\n const arabicLettersPattern = /[\\u0600-\\u06FF\\u0750-\\u077F\\u08A0-\\u08FF\\uFB50-\\uFDFF\\uFE70-\\uFEFF]/g;\n // ASCII digits + Arabic-Indic digits + Extended Arabic-Indic digits\n const allDigitPattern = /[0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n // Counted characters exclude whitespace and all listed digits\n const countedCharsPattern = /[^\\s0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n const cleaned = text.replace(allDigitPattern, '');\n const arabicMatches = cleaned.match(arabicLettersPattern) || [];\n const totalMatches = cleaned.match(countedCharsPattern) || [];\n return totalMatches.length === 0 ? 0 : arabicMatches.length / totalMatches.length;\n};\n\n/**\n * Fixes the trailing \"و\" (wow) in phrases such as \"عليكم و رحمة\" to \"عليكم ورحمة\".\n * This function attempts to correct phrases where \"و\" appears unnecessarily, particularly in greetings.\n * Example: 'السلام عليكم و رحمة' will be changed to 'السلام عليكم ورحمة'.\n * @param {string} text - The input text containing the \"و\" character.\n * @returns {string} - The modified text with unnecessary trailing \"و\" characters corrected.\n */\nexport const fixTrailingWow = (text: string) => {\n return text.replace(/ و /g, ' و');\n};\n\n/**\n * Inserts a space between Arabic text and numbers.\n * Example: 'الآية37' will be changed to 'الآية 37'.\n * @param {string} text - The input text containing Arabic text followed by numbers.\n * @returns {string} - The modified text with spaces inserted between Arabic text and numbers.\n */\nexport const addSpaceBetweenArabicTextAndNumbers = (text: string) => {\n return text.replace(/([\\u0600-\\u06FF]+)(\\d+)/g, '$1 $2');\n};\n\n/**\n * Removes single-digit numbers surrounded by Arabic text. Also removes dashes (-) not followed by a number.\n * For example, removes '3' from 'وهب 3 وقال' but does not remove '121' from 'لوحه 121 الجرح'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with non-index numbers and dashes removed.\n */\nexport const removeNonIndexSignatures = (text: string) => {\n return text\n .replace(/(?<![0-9] ?)-|(?<=[\\u0600-\\u06FF])\\s?\\d\\s?(?=[\\u0600-\\u06FF])/g, ' ')\n .replace(/(?<=[\\u0600-\\u06FF]\\s)(\\d+\\s)+\\d+(?=(\\s[\\u0600-\\u06FF]|$))/g, ' ');\n};\n\n/**\n * Removes characters enclosed in square brackets [] or parentheses () if they are Arabic letters or Arabic-Indic numerals.\n * Example: '[س]' or '(س)' will be removed.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with singular codes removed.\n */\nexport const removeSingularCodes = (text: string) => {\n return text.replace(/[[({][\\u0621-\\u064A\\u0660-\\u0669][\\])}]/g, '');\n};\n\n/**\n * Removes solitary Arabic letters unless they are the 'ha' letter, which is used in Hijri years.\n * Example: \"ب ا الكلمات ت\" will be changed to \"ا الكلمات\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with solitary Arabic letters removed.\n */\nexport const removeSolitaryArabicLetters = (text: string) => {\n return text.replace(/(^| )[\\u0621-\\u064A]( |$)/g, ' ');\n};\n\n/**\n * Replaces English punctuation (question mark and semicolon) with their Arabic equivalents.\n * Example: '?' will be replaced with '؟', and ';' with '؛'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.\n */\nexport const replaceEnglishPunctuationWithArabic = (text: string) => {\n return text\n .replace(/\\?|؟\\./g, '؟')\n .replace(/(;|؛)\\s*(\\1\\s*)*/g, '؛')\n .replace(/,|-،/g, '،');\n};\n","/**\n * Ultra-fast Arabic text sanitizer for search/indexing/display.\n * Optimized for very high call rates: avoids per-call object spreads and minimizes allocations.\n * Options can merge over a base preset or `'none'` to apply exactly the rules you request.\n */\nexport type SanitizePreset = 'light' | 'search' | 'aggressive';\nexport type SanitizeBase = 'none' | SanitizePreset;\n\n/**\n * Public options for {@link sanitizeArabic}. When you pass an options object, it overlays the chosen\n * `base` (default `'light'`) without allocating merged objects on the hot path; flags are resolved\n * directly into local booleans for speed.\n */\nexport type SanitizeOptions = {\n /** Base to merge over. `'none'` applies only the options you specify. Default when passing an object: `'light'`. */\n base?: SanitizeBase;\n\n /** Unicode NFC normalization. Default: `true` in all presets. */\n nfc?: boolean;\n\n /** Strip zero-width controls (U+200B–U+200F, U+202A–U+202E, U+2060–U+2064, U+FEFF). Default: `true` in presets. */\n stripZeroWidth?: boolean;\n\n /** If stripping zero-width, replace them with a space instead of removing. Default: `false`. */\n zeroWidthToSpace?: boolean;\n\n /** Remove Arabic diacritics (tashkīl). Default: `true` in `'search'`/`'aggressive'`. */\n stripDiacritics?: boolean;\n\n /**\n * Remove tatweel (ـ).\n * - `true` is treated as `'safe'` (preserves tatweel after digits or 'ه' for dates/list markers)\n * - `'safe'` or `'all'` explicitly\n * - `false` to keep tatweel\n * Default: `'all'` in `'search'`/`'aggressive'`, `false` in `'light'`.\n */\n stripTatweel?: boolean | 'safe' | 'all';\n\n /** Normalize آ/أ/إ → ا. Default: `true` in `'search'`/`'aggressive'`. */\n normalizeAlif?: boolean;\n\n /** Replace ى → ي. Default: `true` in `'search'`/`'aggressive'`. */\n replaceAlifMaqsurah?: boolean;\n\n /** Replace ة → ه (lossy). Default: `true` in `'aggressive'` only. */\n replaceTaMarbutahWithHa?: boolean;\n\n /** Strip Latin letters/digits and common OCR noise into spaces. Default: `true` in `'aggressive'`. */\n stripLatinAndSymbols?: boolean;\n\n /** Keep only Arabic letters (no whitespace). Use for compact keys, not FTS. */\n keepOnlyArabicLetters?: boolean;\n\n /** Keep Arabic letters + spaces (drops digits/punct/symbols). Great for FTS. Default: `true` in `'aggressive'`. */\n lettersAndSpacesOnly?: boolean;\n\n /** Collapse runs of whitespace to a single space. Default: `true`. */\n collapseWhitespace?: boolean;\n\n /** Trim leading/trailing whitespace. Default: `true`. */\n trim?: boolean;\n\n /**\n * Remove the Hijri date marker (\"هـ\" or bare \"ه\" if tatweel already removed) when it follows a date-like token\n * (digits/slashes/hyphens/spaces). Example: `1435/3/29 هـ` → `1435/3/29`.\n * Default: `true` in `'search'`/`'aggressive'`, `false` in `'light'`.\n */\n removeHijriMarker?: boolean;\n};\n\n/** Fully-resolved internal preset options (no `base`, and tatweel as a mode). */\ntype PresetOptions = {\n nfc: boolean;\n stripZeroWidth: boolean;\n zeroWidthToSpace: boolean;\n stripDiacritics: boolean;\n stripTatweel: false | 'safe' | 'all';\n normalizeAlif: boolean;\n replaceAlifMaqsurah: boolean;\n replaceTaMarbutahWithHa: boolean;\n stripLatinAndSymbols: boolean;\n keepOnlyArabicLetters: boolean;\n lettersAndSpacesOnly: boolean;\n collapseWhitespace: boolean;\n trim: boolean;\n removeHijriMarker: boolean;\n};\n\nconst RX_SPACES = /\\s+/g;\nconst RX_TATWEEL = /\\u0640/g;\nconst RX_DIACRITICS = /[\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]/g;\nconst RX_ALIF_VARIANTS = /[أإآٱ]/g;\nconst RX_ALIF_MAQSURAH = /\\u0649/g;\nconst RX_TA_MARBUTAH = /\\u0629/g;\nconst RX_ZERO_WIDTH = /[\\u200B-\\u200F\\u202A-\\u202E\\u2060-\\u2064\\uFEFF]/g;\nconst RX_LATIN_AND_SYMBOLS = /[A-Za-z]+[0-9]*|[0-9]+|[¬§`=]|[/]{2,}|[&]|[ﷺ]/g;\nconst RX_NON_ARABIC_LETTERS = /[^\\u0621-\\u063A\\u0641-\\u064A\\u0671\\u067E\\u0686\\u06A4-\\u06AF\\u06CC\\u06D2\\u06D3]/g;\nconst RX_NOT_LETTERS_OR_SPACE = /[^\\u0621-\\u063A\\u0641-\\u064A\\u0671\\u067E\\u0686\\u06A4-\\u06AF\\u06CC\\u06D2\\u06D3\\s]/g;\n\n/**\n * Returns `true` if the code point is ASCII space.\n */\nconst isAsciiSpace = (code: number): boolean => code === 32;\n\n/**\n * Returns `true` if the code point is a Western digit or Arabic-Indic digit.\n */\nconst isDigitCodePoint = (code: number): boolean => (code >= 48 && code <= 57) || (code >= 0x0660 && code <= 0x0669);\n\n/**\n * Removes tatweel while preserving a tatweel that immediately follows a digit or 'ه'.\n * This protects list markers and Hijri date forms.\n */\nconst removeTatweelSafely = (s: string): string =>\n s.replace(RX_TATWEEL, (_m, i: number, str: string) => {\n let j = i - 1;\n while (j >= 0 && isAsciiSpace(str.charCodeAt(j))) {\n j--;\n }\n if (j >= 0) {\n const prev = str.charCodeAt(j);\n if (isDigitCodePoint(prev) || prev === 0x0647) {\n return 'ـ';\n }\n }\n return '';\n });\n\n/**\n * Removes the Hijri date marker when it immediately follows a date-like token.\n */\nconst removeHijriDateMarker = (s: string): string =>\n s.replace(/([0-9\\u0660-\\u0669][0-9\\u0660-\\u0669/\\-\\s]*?)\\s*ه(?:ـ)?(?=(?:\\s|$|[^\\p{L}\\p{N}]))/gu, '$1');\n\n/**\n * Applies NFC normalization if available and requested.\n */\nconst applyNfcNormalization = (s: string, enable: boolean): string => (enable && s.normalize ? s.normalize('NFC') : s);\n\n/**\n * Removes zero-width controls, optionally replacing them with spaces.\n */\nconst removeZeroWidthControls = (s: string, enable: boolean, asSpace: boolean): string =>\n enable ? s.replace(RX_ZERO_WIDTH, asSpace ? ' ' : '') : s;\n\n/**\n * Removes diacritics and tatweel according to the selected mode.\n */\nconst removeDiacriticsAndTatweel = (\n s: string,\n removeDiacritics: boolean,\n tatweelMode: false | 'safe' | 'all',\n): string => {\n if (removeDiacritics) {\n s = s.replace(RX_DIACRITICS, '');\n }\n if (tatweelMode === 'safe') {\n return removeTatweelSafely(s);\n }\n if (tatweelMode === 'all') {\n return s.replace(RX_TATWEEL, '');\n }\n return s;\n};\n\n/**\n * Applies canonical character mappings: Alif variants, alif maqṣūrah, tāʾ marbūṭa.\n */\nconst applyCharacterMappings = (\n s: string,\n normalizeAlif: boolean,\n maqsurahToYa: boolean,\n taMarbutahToHa: boolean,\n): string => {\n if (normalizeAlif) {\n s = s.replace(RX_ALIF_VARIANTS, 'ا');\n }\n if (maqsurahToYa) {\n s = s.replace(RX_ALIF_MAQSURAH, 'ي');\n }\n if (taMarbutahToHa) {\n s = s.replace(RX_TA_MARBUTAH, 'ه');\n }\n return s;\n};\n\n/**\n * Removes Latin letters/digits and common OCR noise by converting them to spaces.\n */\nconst removeLatinAndSymbolNoise = (s: string, enable: boolean): string =>\n enable ? s.replace(RX_LATIN_AND_SYMBOLS, ' ') : s;\n\n/**\n * Applies letter filters:\n * - `lettersAndSpacesOnly`: keep Arabic letters and whitespace, drop everything else to spaces.\n * - `lettersOnly`: keep only Arabic letters, drop everything else.\n */\nconst applyLetterFilters = (s: string, lettersAndSpacesOnly: boolean, lettersOnly: boolean): string => {\n if (lettersAndSpacesOnly) {\n return s.replace(RX_NOT_LETTERS_OR_SPACE, ' ');\n }\n if (lettersOnly) {\n return s.replace(RX_NON_ARABIC_LETTERS, '');\n }\n return s;\n};\n\n/**\n * Collapses whitespace runs and trims if requested.\n */\nconst normalizeWhitespace = (s: string, collapse: boolean, doTrim: boolean): string => {\n if (collapse) {\n s = s.replace(RX_SPACES, ' ');\n }\n if (doTrim) {\n s = s.trim();\n }\n return s;\n};\n\n/**\n * Resolves a boolean by taking an optional override over a preset value.\n */\nconst resolveBoolean = (presetValue: boolean, override?: boolean): boolean =>\n override === undefined ? presetValue : !!override;\n\n/**\n * Resolves the tatweel mode by taking an optional override over a preset mode.\n * An override of `true` maps to `'safe'` for convenience.\n */\nconst resolveTatweelMode = (\n presetValue: false | 'safe' | 'all',\n override?: boolean | 'safe' | 'all',\n): false | 'safe' | 'all' => {\n if (override === undefined) {\n return presetValue;\n }\n if (override === true) {\n return 'safe';\n }\n if (override === false) {\n return false;\n }\n return override;\n};\n\nconst PRESETS: Record<SanitizePreset, PresetOptions> = {\n light: {\n nfc: true,\n stripZeroWidth: true,\n zeroWidthToSpace: false,\n stripDiacritics: false,\n stripTatweel: false,\n normalizeAlif: false,\n replaceAlifMaqsurah: false,\n replaceTaMarbutahWithHa: false,\n stripLatinAndSymbols: false,\n keepOnlyArabicLetters: false,\n lettersAndSpacesOnly: false,\n collapseWhitespace: true,\n trim: true,\n removeHijriMarker: false,\n },\n search: {\n nfc: true,\n stripZeroWidth: true,\n zeroWidthToSpace: false,\n stripDiacritics: true,\n stripTatweel: 'all',\n normalizeAlif: true,\n replaceAlifMaqsurah: true,\n replaceTaMarbutahWithHa: false,\n stripLatinAndSymbols: false,\n keepOnlyArabicLetters: false,\n lettersAndSpacesOnly: false,\n collapseWhitespace: true,\n trim: true,\n removeHijriMarker: true,\n },\n aggressive: {\n nfc: true,\n stripZeroWidth: true,\n zeroWidthToSpace: false,\n stripDiacritics: true,\n stripTatweel: 'all',\n normalizeAlif: true,\n replaceAlifMaqsurah: true,\n replaceTaMarbutahWithHa: true,\n stripLatinAndSymbols: true,\n keepOnlyArabicLetters: false,\n lettersAndSpacesOnly: true,\n collapseWhitespace: true,\n trim: true,\n removeHijriMarker: true,\n },\n} as const;\n\nconst PRESET_NONE: PresetOptions = {\n nfc: false,\n stripZeroWidth: false,\n zeroWidthToSpace: false,\n stripDiacritics: false,\n stripTatweel: false,\n normalizeAlif: false,\n replaceAlifMaqsurah: false,\n replaceTaMarbutahWithHa: false,\n stripLatinAndSymbols: false,\n keepOnlyArabicLetters: false,\n lettersAndSpacesOnly: false,\n collapseWhitespace: false,\n trim: false,\n removeHijriMarker: false,\n};\n\n/**\n * Sanitizes Arabic text according to a preset or custom options.\n *\n * Presets:\n * - `'light'`: NFC, zero-width removal, collapse/trim spaces.\n * - `'search'`: removes diacritics and tatweel, normalizes Alif and ى→ي, removes Hijri marker.\n * - `'aggressive'`: ideal for FTS; keeps letters+spaces only and strips common noise.\n *\n * Custom options:\n * - Passing an options object overlays the selected `base` preset (default `'light'`).\n * - Use `base: 'none'` to apply **only** the rules you specify (e.g., tatweel only).\n *\n * Examples:\n * ```ts\n * sanitizeArabic('أبـــتِـــكَةُ', { base: 'none', stripTatweel: true }); // 'أبتِكَةُ'\n * sanitizeArabic('1435/3/29 هـ', 'aggressive'); // '1435 3 29'\n * sanitizeArabic('اَلسَّلَامُ عَلَيْكُمْ', 'search'); // 'السلام عليكم'\n * ```\n */\nexport const sanitizeArabic = (input: string, optionsOrPreset: SanitizePreset | SanitizeOptions = 'search'): string => {\n if (!input) {\n return '';\n }\n\n let preset: PresetOptions;\n let opts: SanitizeOptions | null = null;\n\n if (typeof optionsOrPreset === 'string') {\n preset = PRESETS[optionsOrPreset];\n } else {\n const base = optionsOrPreset.base ?? 'light';\n preset = base === 'none' ? PRESET_NONE : PRESETS[base];\n opts = optionsOrPreset;\n }\n\n const nfc = resolveBoolean(preset.nfc, opts?.nfc);\n const stripZW = resolveBoolean(preset.stripZeroWidth, opts?.stripZeroWidth);\n const zwAsSpace = resolveBoolean(preset.zeroWidthToSpace, opts?.zeroWidthToSpace);\n const removeDia = resolveBoolean(preset.stripDiacritics, opts?.stripDiacritics);\n const normAlif = resolveBoolean(preset.normalizeAlif, opts?.normalizeAlif);\n const maqToYa = resolveBoolean(preset.replaceAlifMaqsurah, opts?.replaceAlifMaqsurah);\n const taToHa = resolveBoolean(preset.replaceTaMarbutahWithHa, opts?.replaceTaMarbutahWithHa);\n const stripNoise = resolveBoolean(preset.stripLatinAndSymbols, opts?.stripLatinAndSymbols);\n const lettersSpacesOnly = resolveBoolean(preset.lettersAndSpacesOnly, opts?.lettersAndSpacesOnly);\n const lettersOnly = resolveBoolean(preset.keepOnlyArabicLetters, opts?.keepOnlyArabicLetters);\n const collapseWS = resolveBoolean(preset.collapseWhitespace, opts?.collapseWhitespace);\n const doTrim = resolveBoolean(preset.trim, opts?.trim);\n const removeHijri = resolveBoolean(preset.removeHijriMarker, opts?.removeHijriMarker);\n const tatweelMode = resolveTatweelMode(preset.stripTatweel, opts?.stripTatweel);\n\n let s = input;\n s = applyNfcNormalization(s, nfc);\n s = removeZeroWidthControls(s, stripZW, zwAsSpace);\n if (removeHijri) {\n s = removeHijriDateMarker(s);\n }\n s = removeDiacriticsAndTatweel(s, removeDia, tatweelMode);\n s = applyCharacterMappings(s, normAlif, maqToYa, taToHa);\n\n if (!lettersSpacesOnly) {\n s = removeLatinAndSymbolNoise(s, stripNoise);\n }\n s = applyLetterFilters(s, lettersSpacesOnly, lettersOnly);\n\n s = normalizeWhitespace(s, collapseWS, doTrim);\n\n return s;\n};\n\n/** Character class for Arabic diacritics (tashkīl/harakāt). */\nconst DIACRITICS_CLASS = '[\\\\u0610-\\\\u061A\\\\u064B-\\\\u065F\\\\u0670\\\\u06D6-\\\\u06ED]';\n/** Tatweel (kashīda) class. */\nconst TATWEEL_CLASS = '\\\\u0640';\n\n/**\n * Escape a string so it can be safely embedded into a RegExp source.\n *\n * @param s Any string\n * @returns Escaped string\n */\nexport const escapeRegex = (s: string): string => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n\n/** Optional equivalence toggles for {@link makeDiacriticInsensitiveRegex}. */\ntype EquivOptions = {\n /** Treat ا/أ/إ/آ as equivalent. @default true */\n alif?: boolean;\n /** Treat ة/ه as equivalent. @default true */\n taMarbutahHa?: boolean;\n /** Treat ى/ي as equivalent. @default true */\n alifMaqsurahYa?: boolean;\n};\n\n/** Options for {@link makeDiacriticInsensitiveRegex}. */\nexport type MakeRegexOptions = {\n /**\n * Character equivalences to allow.\n * @default { alif: true, taMarbutahHa: true, alifMaqsurahYa: true }\n */\n equivalences?: EquivOptions;\n\n /**\n * Allow tatweel between letters (tolerate decorative elongation).\n * @default true\n */\n allowTatweel?: boolean;\n\n /**\n * Ignore diacritics by inserting a `DIACRITICS_CLASS*` after each letter.\n * @default true\n */\n ignoreDiacritics?: boolean;\n\n /**\n * Treat any whitespace in the needle as `\\s+` for flexible matching.\n * @default true\n */\n flexWhitespace?: boolean;\n\n /**\n * RegExp flags to use.\n * @default 'u'\n */\n flags?: string;\n};\n\n/**\n * Build a **diacritic-insensitive**, **tatweel-tolerant** RegExp for Arabic text matching.\n *\n * Features:\n * - Optional character equivalences: ا~أ~إ~آ, ة~ه, ى~ي.\n * - Optional tolerance for tatweel between characters.\n * - Optional diacritic-insensitivity (by inserting a diacritics class after each char).\n * - Optional flexible whitespace (needle whitespace becomes `\\s+`).\n *\n * @param needle The Arabic text to match\n * @param opts See {@link MakeRegexOptions}\n * @returns A `RegExp` matching the needle with the desired tolerances\n *\n * @example\n * const rx = makeDiacriticInsensitiveRegex('أنا إلى الآفاق');\n * rx.test('انا الي الافاق'); // true\n * rx.test('اَنا إلى الآفاق'); // true\n */\nexport const makeDiacriticInsensitiveRegex = (needle: string, opts: MakeRegexOptions = {}): RegExp => {\n const {\n equivalences = { alif: true, taMarbutahHa: true, alifMaqsurahYa: true },\n allowTatweel = true,\n ignoreDiacritics = true,\n flexWhitespace = true,\n flags = 'u',\n } = opts;\n\n // Safety guard against extremely large inputs causing excessive pattern sizes\n if (needle.length > 5000) {\n throw new Error('makeDiacriticInsensitiveRegex: needle too long');\n }\n\n const charClass = (ch: string): string => {\n switch (ch) {\n case 'ا':\n case 'أ':\n case 'إ':\n case 'آ':\n return equivalences.alif ? '[اأإآ]' : 'ا';\n case 'ة':\n case 'ه':\n return equivalences.taMarbutahHa ? '[هة]' : escapeRegex(ch);\n case 'ى':\n case 'ي':\n return equivalences.alifMaqsurahYa ? '[ىي]' : escapeRegex(ch);\n default:\n return escapeRegex(ch);\n }\n };\n\n const after = `${ignoreDiacritics ? `${DIACRITICS_CLASS}*` : ''}${allowTatweel ? `${TATWEEL_CLASS}*` : ''}`;\n\n let pattern = '';\n for (const ch of Array.from(needle)) {\n if (/\\s/.test(ch)) {\n pattern += flexWhitespace ? '\\\\s+' : '\\\\s*';\n } else {\n pattern += `${charClass(ch)}${after}`;\n }\n }\n\n return new RegExp(pattern, flags);\n};\n","/**\n * Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.\n * Example: 'Text.' becomes 'Text.\\n'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with line breaks added after punctuation.\n */\nexport const insertLineBreaksAfterPunctuation = (text: string) => {\n // Define the punctuation marks that should trigger a new line\n const punctuation = /([.?!؟])/g;\n\n // Replace occurrences of punctuation marks followed by a space with the punctuation mark, a newline, and the space\n const formattedText = text.replace(punctuation, '$1\\n').replace(/\\n\\s+/g, '\\n').trim();\n\n return formattedText;\n};\n\n/**\n * Adds spaces before and after punctuation, except for certain cases like quoted text or ayah references.\n * Example: 'Text,word' becomes 'Text, word'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with spaces added before and after punctuation.\n */\nexport const addSpaceBeforeAndAfterPunctuation = (text: string) => {\n return text\n .replace(/( ?)([.!?,،؟;؛])((?![ '”“)\"\\]\\n])|(?=\\s{2,}))/g, '$1$2 ')\n .replace(/\\s([.!?,،؟;؛])\\s*([ '”“)\"\\]\\n])/g, '$1$2')\n .replace(/([^\\s\\w\\d'”“)\"\\]]+)\\s+([.!?,،؟;؛])|([.!?,،؟;؛])\\s+$/g, '$1$2$3')\n .replace(/(?<=\\D)( ?: ?)(?!(\\d+:)|(:\\d+))|(?<=\\d) ?: ?(?=\\D)|(?<=\\D) ?: ?(?=\\d)/g, ': ');\n};\n\n/**\n * Turns regular double quotes surrounding a body of text into smart quotes.\n * Also fixes incorrect starting quotes by ensuring the string starts with an opening quote if needed.\n * Example: 'The \"quick brown\" fox' becomes 'The “quick brown” fox'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with smart quotes applied.\n */\nexport const applySmartQuotes = (text: string) => {\n return text\n .replace(/[“”]/g, '\"')\n .replace(/\"([^\"]*)\"/g, '“$1”')\n .replace(/^”/g, '“');\n};\n\n/**\n * Replaces literal new line characters (\\n) and carriage returns (\\r) with actual line breaks.\n * Example: 'A\\\\nB' becomes 'A\\nB'.\n * @param {string} text - The input text containing literal new lines.\n * @returns {string} - The modified text with actual line breaks.\n */\nexport const cleanLiteralNewLines = (text: string) => {\n return text.replace(/\\\\n|\\r/g, '\\n');\n};\n\n/**\n * Removes trailing spaces from each line in a multiline string.\n * Example: \" This is a line \\nAnother line \" becomes \"This is a line\\nAnother line\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with trailing spaces removed.\n */\nexport const cleanMultilines = (text: string) => {\n return text.replace(/^ +| +$/gm, '');\n};\n\n/**\n * Detects if a word is by itself in a line.\n * @param text The text to check.\n * @returns true if there exists a word in any of the lines in the text that is by itself.\n */\nexport const hasWordInSingleLine = (text: string): boolean => {\n return /^\\s*\\S+\\s*$/gm.test(text);\n};\n\n/**\n * Checks if the input string consists of only punctuation characters.\n * @param {string} text - The input text to check.\n * @returns {boolean} - Returns true if the string contains only punctuation, false otherwise.\n */\nexport const isOnlyPunctuation = (text: string): boolean => {\n const regex = /^[\\u0020-\\u002f\\u003a-\\u0040\\u005b-\\u0060\\u007b-\\u007e0-9٠-٩]+$/;\n return regex.test(text);\n};\n\n/**\n * Cleans unnecessary spaces before punctuation marks such as periods, commas, and question marks.\n * Example: 'This is a sentence , with extra space .' becomes 'This is a sentence, with extra space.'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with cleaned spaces before punctuation.\n */\nexport const cleanSpacesBeforePeriod = (text: string) => {\n return text.replace(/\\s+([.؟!,،؛:?])/g, '$1');\n};\n\n/**\n * Condenses multiple asterisks (*) into a single one.\n * Example: '***' becomes '*'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed asterisks.\n */\nexport const condenseAsterisks = (text: string) => {\n return text.replace(/(\\*\\s*)+/g, '*');\n};\n\n/**\n * Replaces occurrences of colons surrounded by periods (e.g., '.:.' or ':') with a single colon.\n * Example: 'This.:. is a test' becomes 'This: is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed colons.\n */\nexport const condenseColons = (text: string) => {\n return text.replace(/[.-]?:[.-]?/g, ':');\n};\n\n/**\n * Condenses two or more dashes (--) into a single dash (-).\n * Example: 'This is some ---- text' becomes 'This is some - text'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed dashes.\n */\nexport const condenseDashes = (text: string) => {\n return text.replace(/-{2,}/g, '-');\n};\n\n/**\n * Replaces sequences of two or more periods (e.g., '...') with an ellipsis character (…).\n * Example: 'This is a test...' becomes 'This is a test…'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with ellipses condensed.\n */\nexport const condenseEllipsis = (text: string) => {\n return text.replace(/\\.{2,}/g, '…');\n};\n\n/**\n * Reduces multiple consecutive line breaks (3 or more) to exactly 2 line breaks.\n * Example: 'This is line 1\\n\\n\\n\\nThis is line 2' becomes 'This is line 1\\n\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToDouble = (text: string) => {\n return text.replace(/(\\n\\s*){3,}/g, '\\n\\n');\n};\n\n/**\n * Reduces multiple consecutive line breaks (2 or more) to exactly 1 line break.\n * Example: 'This is line 1\\n\\nThis is line 2' becomes 'This is line 1\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToSingle = (text: string) => {\n return text.replace(/(\\n\\s*){2,}/g, '\\n');\n};\n\n/**\n * Condenses multiple periods separated by spaces (e.g., '. . .') into a single period.\n * Example: 'This . . . is a test' becomes 'This. is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed periods.\n */\nexport const condensePeriods = (text: string) => {\n return text.replace(/\\. +\\./g, '.');\n};\n\n/**\n * Condenses multiple underscores (__) or Arabic Tatweel characters (ـــــ) into a single underscore or Tatweel.\n * Example: 'This is ـــ some text __' becomes 'This is ـ some text _'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed underscores.\n */\nexport const condenseUnderscores = (text: string) => {\n return text.replace(/ـ{2,}/g, 'ـ').replace(/_+/g, '_');\n};\n\n/**\n * Replaces double parentheses or brackets with single ones.\n * Example: '((text))' becomes '(text)'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const doubleToSingleBrackets = (text: string) => {\n return text.replace(/(\\(|\\)){2,}|(\\[|\\]){2,}/g, '$1$2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before brackets.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before brackets\n */\nexport const ensureSpaceBeforeBrackets = (text: string) => {\n return text.replace(/(\\S) *(\\([^)]*\\))/g, '$1 $2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before Arabic quotation marks.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before Arabic quotes\n */\nexport const ensureSpaceBeforeQuotes = (text: string) => {\n return text.replace(/(\\S) *(«[^»]*»)/g, '$1 $2');\n};\n\n/**\n * Fixes common bracket and quotation mark typos in text\n * Corrects malformed patterns like \"(«\", \"»)\", and misplaced digits in brackets\n * @param text - Input text that may contain bracket typos\n * @returns Text with corrected bracket and quotation mark combinations\n */\nexport const fixBracketTypos = (text: string) => {\n return (\n text\n .replace(/\\(«|\\( \\(/g, '«')\n .replace(/»\\)|\\) \\)/g, '»')\n // Fix \")digit)\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\)/g, '($1)')\n // Fix \")digit(\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\(/g, '($1)')\n );\n};\n\n/**\n * Fixes mismatched curly braces by converting incorrect bracket/brace combinations\n * to proper curly braces { }\n * @param text - Input text that may contain mismatched curly braces\n * @returns Text with corrected curly brace pairs\n */\nexport const fixCurlyBraces = (text: string) => {\n // Process each mismatch type separately to avoid interference\n let result = text;\n\n // Fix ( content } to { content }\n result = result.replace(/\\(([^(){}]+)\\}/g, '{$1}');\n\n // Fix { content ) to { content }\n return result.replace(/\\{([^(){}]+)\\)/g, '{$1}');\n};\n\n/**\n * Fixes mismatched quotation marks in Arabic text by converting various\n * incorrect bracket/quote combinations to proper Arabic quotation marks (« »)\n * @param text - Input text that may contain mismatched quotation marks\n * @returns Text with corrected Arabic quotation marks\n */\nexport const fixMismatchedQuotationMarks = (text: string) => {\n return (\n text\n // Matches mismatched quotation marks: « followed by content and closed with )\n .replace(/«([^»)]+)\\)/g, '«$1»')\n // Fix reverse mismatched ( content » to « content »\n .replace(/\\(([^()]+)»/g, '«$1»')\n // Matches any unclosed « quotation marks at end of content\n .replace(/«([^»]+)(?=\\s*$|$)/g, '«$1»')\n );\n};\n\n/**\n * Formats a multiline string by joining sentences and maintaining footnotes on their own lines.\n * Footnotes are identified by Arabic and English numerals.\n * Example: 'Sentence one.\\n(1) A footnote.\\nSentence two.' remains the same, while regular sentences are joined.\n * @param {string} input - The input text containing sentences and footnotes.\n * @returns {string} - The formatted text.\n */\nexport const formatStringBySentence = (input: string) => {\n const footnoteRegex = /^\\((?:\\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\\)\\s/;\n const sentences: string[] = [];\n const lines = input.split('\\n');\n let currentSentence = '';\n\n lines.forEach((line) => {\n const trimmedLine = line.trim();\n const isFootnote = footnoteRegex.test(trimmedLine);\n const isNumber = /^\\(\\d+\\/\\d+\\)/.test(trimmedLine);\n\n if (isFootnote && !isNumber) {\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n sentences.push(trimmedLine);\n } else {\n currentSentence += `${trimmedLine} `;\n const lastChar = currentSentence.trim().slice(-1);\n if (/[.!؟]/.test(lastChar)) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n }\n });\n\n // Add any remaining text to the output\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n }\n\n return sentences.join('\\n');\n};\n\n/**\n * Detects if text is entirely in uppercase letters\n * @param text - The text to check\n * @returns true if all alphabetic characters are uppercase, false otherwise\n */\nexport const isAllUppercase = (text: string) => {\n // Remove non-letter characters (including numbers, punctuation, spaces)\n // \\p{L} matches any Unicode letter character\n const lettersOnly = text.replace(/[^\\p{L}]/gu, '');\n\n // If there are no letter characters, return false\n if (lettersOnly.length === 0) {\n return false;\n }\n\n return lettersOnly === lettersOnly.toUpperCase();\n};\n\n/**\n * Removes unnecessary spaces around slashes in references.\n * Example: '127 / 11' becomes '127/11'.\n * @param {string} text - The input text containing references.\n * @returns {string} - The modified text with spaces removed around slashes.\n */\nexport const normalizeSlashInReferences = (text: string) => {\n return text.replace(/(\\d+)\\s?\\/\\s?(\\d+)/g, '$1/$2');\n};\n\n/**\n * Reduces multiple spaces or tabs to a single space.\n * Example: 'This is a text' becomes 'This is a text'.\n * @param {string} text - The input text containing extra spaces.\n * @returns {string} - The modified text with reduced spaces.\n */\nexport const normalizeSpaces = (text: string) => {\n return text.replace(/[ \\t]+/g, ' ');\n};\n\n/**\n * Removes redundant punctuation marks that follow Arabic question marks or exclamation marks.\n * This function cleans up text by removing periods (.) or Arabic commas (،) that immediately\n * follow Arabic question marks (؟) or exclamation marks (!), as they are considered redundant\n * in proper Arabic punctuation.\n *\n * @param text - The Arabic text to clean up\n * @returns The text with redundant punctuation removed\n *\n * @example\n * ```typescript\n * removeRedundantPunctuation('كيف حالك؟.') // Returns: 'كيف حالك؟'\n * removeRedundantPunctuation('ممتاز!،') // Returns: 'ممتاز!'\n * removeRedundantPunctuation('هذا جيد.') // Returns: 'هذا جيد.' (unchanged)\n * ```\n */\nexport const removeRedundantPunctuation = (text: string) => {\n return text.replace(/([؟!])[.،]/g, '$1');\n};\n\n/**\n * Removes spaces inside brackets, parentheses, or square brackets.\n * Example: '( a b )' becomes '(a b)'.\n * @param {string} text - The input text with spaces inside brackets.\n * @returns {string} - The modified text with spaces removed inside brackets.\n */\nexport const removeSpaceInsideBrackets = (text: string) => {\n return text.replace(/([[(])\\s*(.*?)\\s*([\\])])/g, '$1$2$3');\n};\n\n/**\n * Replaces double parentheses single a single arrow variation.\n * Example: '((text))' becomes '«text»'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const replaceDoubleBracketsWithArrows = (text: string) => {\n return text.replace(/\\(\\(\\s?/g, '«').replace(/\\s?\\)\\)/g, '»');\n};\n\n/**\n * Removes bold styling from text by normalizing the string and removing stylistic characters.\n * @param {string} text - The input text containing bold characters.\n * @returns {string} - The modified text with bold styling removed.\n */\nexport const stripBoldStyling = (text: string) => {\n // Normalize the string to NFKD form\n const normalizedString = text.normalize('NFKD');\n\n // Remove combining marks (diacritics) and stylistic characters from the string\n return normalizedString.replace(/[\\u0300-\\u036f]/g, '').trim();\n};\n\n/**\n * Removes italicized characters by replacing italic Unicode characters with their normal counterparts.\n * Example: '𝘼𝘽𝘾' becomes 'ABC'.\n * @param {string} text - The input text containing italicized characters.\n * @returns {string} - The modified text with italics removed.\n */\nexport const stripItalicsStyling = (text: string) => {\n const italicMap: Record<string, string> = {\n '\\uD835\\uDC4E': 'I',\n '\\uD835\\uDC68': 'g',\n '\\u{1D63C}': '!',\n '\\uD835\\uDC4F': 'J',\n '\\uD835\\uDC69': 'h',\n '\\u{1D63D}': '?',\n '\\uD835\\uDC50': 'K',\n '\\uD835\\uDC6A': 'i',\n '\\uD835\\uDC51': 'L',\n '\\uD835\\uDC6B': 'j',\n '\\u{1D63F}': ',',\n '\\uD835\\uDC52': 'M',\n '\\uD835\\uDC6C': 'k',\n '\\u{1D640}': '.',\n '\\uD835\\uDC53': 'N',\n '\\uD835\\uDC6D': 'l',\n '\\uD835\\uDC54': 'O',\n '\\uD835\\uDC6E': 'm',\n '\\uD835\\uDC6F': 'n',\n '\\uD835\\uDC56': 'Q',\n '\\uD835\\uDC70': 'o',\n '\\uD835\\uDC57': 'R',\n '\\uD835\\uDC71': 'p',\n '\\uD835\\uDC58': 'S',\n '\\uD835\\uDC72': 'q',\n '\\uD835\\uDC59': 'T',\n '\\uD835\\uDC73': 'r',\n '\\u{1D647}': '-',\n '\\uD835\\uDC5A': 'U',\n '\\uD835\\uDC74': 's',\n '\\uD835\\uDC5B': 'V',\n '\\uD835\\uDC75': 't',\n '\\uD835\\uDC5C': 'W',\n '\\uD835\\uDC76': 'u',\n '\\uD835\\uDC5D': 'X',\n '\\uD835\\uDC77': 'v',\n '\\uD835\\uDC5E': 'Y',\n '\\uD835\\uDC78': 'w',\n '\\uD835\\uDC5F': 'Z',\n '\\uD835\\uDC79': 'x',\n '\\uD835\\uDC46': 'A',\n '\\uD835\\uDC7A': 'y',\n '\\uD835\\uDC47': 'B',\n '\\uD835\\uDC7B': 'z',\n '\\uD835\\uDC62': 'a',\n '\\uD835\\uDC48': 'C',\n '\\uD835\\uDC63': 'b',\n '\\uD835\\uDC49': 'D',\n '\\uD835\\uDC64': 'c',\n '\\uD835\\uDC4A': 'E',\n '\\uD835\\uDC65': 'd',\n '\\uD835\\uDC4B': 'F',\n '\\uD835\\uDC66': 'e',\n '\\uD835\\uDC4C': 'G',\n '\\uD835\\uDC67': 'f',\n '\\uD835\\uDC4D': 'H',\n '\\uD835\\uDC55': 'P',\n };\n\n return text.replace(/[\\uD835\\uDC62-\\uD835\\uDC7B\\uD835\\uDC46-\\uD835\\uDC5F\\u{1D63C}-\\u{1D647}]/gu, (match) => {\n return italicMap[match] || match;\n });\n};\n\n/**\n * Removes all bold and italic styling from the input text.\n * @param {string} text - The input text to remove styling from.\n * @returns {string} - The modified text with all styling removed.\n */\nexport const stripStyling = (text: string) => {\n return stripItalicsStyling(stripBoldStyling(text));\n};\n\n/**\n * Converts a string to title case (first letter of each word capitalized)\n * @param str - The input string to convert\n * @returns String with each word's first letter capitalized\n */\nexport const toTitleCase = (str: string) => {\n return str\n .toLowerCase()\n .split(' ')\n .map((word) => {\n if (word.length === 0) return word;\n // Find the first Unicode letter in the chunk\n const match = word.match(/\\p{L}/u);\n if (!match || match.index === undefined) return word;\n const i = match.index;\n return word.slice(0, i) + word.charAt(i).toUpperCase() + word.slice(i + 1);\n })\n .join(' ');\n};\n\n/**\n * Removes unnecessary spaces inside quotes.\n * Example: '“ Text ”' becomes '“Text”'.\n * @param {string} text - The input text with spaces inside quotes.\n * @returns {string} - The modified text with spaces removed inside quotes.\n */\nexport const trimSpaceInsideQuotes = (text: string) => {\n return text.replace(/([“”\"]|«) *(.*?) *([“”\"]|»)/g, '$1$2$3');\n};\n","/**\n * Converts a string that resembles JSON but with numeric keys and single-quoted values\n * into valid JSON format. This function replaces numeric keys with quoted numeric keys\n * and ensures all values are double-quoted as required by JSON.\n *\n * @param {string} str - The input string that needs to be fixed into valid JSON.\n * @returns {string} - A valid JSON string.\n *\n * @example\n * const result = normalizeJsonSyntax(\"{10: 'abc', 20: 'def'}\");\n * console.log(result); // '{\"10\": \"abc\", \"20\": \"def\"}'\n */\nexport const normalizeJsonSyntax = (str: string) => {\n let input = str.replace(/(\\b\\d+\\b)(?=:)/g, '\"$1\"');\n input = input.replace(/:\\s*'([^']+)'/g, ': \"$1\"');\n input = input.replace(/:\\s*\"([^\"]+)\"/g, ': \"$1\"');\n\n return JSON.stringify(JSON.parse(input));\n};\n\n/**\n * Checks if a given string resembles a JSON object with numeric or quoted keys and values\n * that are single or double quoted. This is useful for detecting malformed JSON-like\n * structures that can be fixed by the `normalizeJsonSyntax` function.\n *\n * @param {string} str - The input string to check.\n * @returns {boolean} - Returns true if the string is JSON-like, false otherwise.\n *\n * @example\n * const result = isJsonStructureValid(\"{10: 'abc', 'key': 'value'}\");\n * console.log(result); // true\n */\nexport const isJsonStructureValid = (str: string) => {\n // Checks for a pattern with numeric keys or quoted keys and values in quotes\n const jsonLikePattern =\n /^{(\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\3\\s*,)*(?:\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\5\\s*)}$/;\n return jsonLikePattern.test(str.trim());\n};\n\n/**\n * Splits a string by spaces and quoted substrings.\n *\n * This function takes an input string and splits it into parts where substrings\n * enclosed in double quotes are treated as a single part. Other substrings\n * separated by spaces are split normally.\n *\n * @param {string} query - The input string to be split.\n * @returns {string[]} An array of strings, with quoted substrings kept intact.\n *\n * @example\n * const result = splitByQuotes('\"This is\" \"a part of the\" \"string and\"');\n * console.log(result); // [\"This is\", \"a part of the\", \"string and\"]\n */\nexport const splitByQuotes = (query: string): string[] => {\n const regex = /(?:[^\\s\"]+|\"(.*?)\")+/g;\n return (query.match(regex) || []).map((s: string) => (s.startsWith('\"') ? s.slice(1, -1) : s));\n};\n\n/**\n * Checks if all double quotes in a string are balanced (even count).\n * A string has balanced quotes if every opening quote has a corresponding closing quote.\n *\n * @param str - The string to check for balanced quotes\n * @returns True if quotes are balanced (even count), false otherwise\n *\n * @example\n * ```typescript\n * areQuotesBalanced('Hello \"world\"') // Returns: true\n * areQuotesBalanced('Hello \"world') // Returns: false\n * areQuotesBalanced('No quotes') // Returns: true\n * ```\n */\nconst areQuotesBalanced = (str: string) => {\n let quoteCount = 0;\n for (const char of str) {\n if (char === '\"') {\n quoteCount++;\n }\n }\n return quoteCount % 2 === 0;\n};\n\nconst brackets = { '(': ')', '[': ']', '{': '}' };\nconst openBrackets = new Set(['(', '[', '{']);\nconst closeBrackets = new Set([')', ']', '}']);\n\n/**\n * Checks if all brackets in a string are properly balanced and matched.\n * This function validates that every opening bracket has a corresponding closing bracket\n * in the correct order and of the matching type.\n *\n * Supported bracket types: parentheses (), square brackets [], curly braces {}\n *\n * @param str - The string to check for balanced brackets\n * @returns True if all brackets are properly balanced and matched, false otherwise\n *\n * @example\n * ```typescript\n * areBracketsBalanced('(hello [world])') // Returns: true\n * areBracketsBalanced('(hello [world)') // Returns: false (mismatched)\n * areBracketsBalanced('((hello))') // Returns: true\n * areBracketsBalanced('(hello') // Returns: false (unclosed)\n * ```\n */\n\nconst areBracketsBalanced = (str: string) => {\n const stack: string[] = [];\n\n for (const char of str) {\n if (openBrackets.has(char)) {\n stack.push(char);\n } else if (closeBrackets.has(char)) {\n const lastOpen = stack.pop();\n if (!lastOpen || brackets[lastOpen as keyof typeof brackets] !== char) {\n return false;\n }\n }\n }\n\n return stack.length === 0;\n};\n\n/**\n * Checks if both quotes and brackets are balanced in a string.\n * This function combines quote balance checking and bracket balance checking\n * to ensure the entire string has properly balanced punctuation.\n *\n * A string is considered balanced when:\n * - All double quotes have matching pairs (even count)\n * - All brackets (parentheses, square brackets, curly braces) are properly matched and nested\n *\n * @param str - The string to check for balanced quotes and brackets\n * @returns True if both quotes and brackets are balanced, false otherwise\n *\n * @example\n * ```typescript\n * isBalanced('He said \"Hello (world)!\"') // Returns: true\n * isBalanced('He said \"Hello (world!\"') // Returns: false (unbalanced quote)\n * isBalanced('He said \"Hello (world)\"') // Returns: false (unbalanced quote)\n * isBalanced('Hello (world) [test]') // Returns: true\n * ```\n */\nexport const isBalanced = (str: string) => {\n return areQuotesBalanced(str) && areBracketsBalanced(str);\n};\n\n/**\n * Parses page input string into array of page numbers, supporting ranges and lists\n * @param pageInput - Page specification string (e.g., \"1-5\" or \"1,3,5\")\n * @returns Array of page numbers\n * @throws Error when start page exceeds end page in range\n */\nexport const parsePageRanges = (pageInput: string): number[] => {\n if (pageInput.includes('-')) {\n const [start, end] = pageInput.split('-').map(Number);\n\n if (start > end) {\n throw new Error('Start page cannot be greater than end page');\n }\n\n return Array.from({ length: end - start + 1 }, (_, i) => start + i);\n } else {\n return pageInput.split(',').map(Number);\n }\n};\n","import { escapeRegex } from './cleaning';\n\n/**\n * Removes various symbols, part references, and numerical markers from the text.\n * Example: '(1) (2/3)' becomes ''.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with symbols and part references removed.\n */\nexport const cleanSymbolsAndPartReferences = (text: string) => {\n return text.replace(\n / *\\(?:\\d+(?:\\/\\d+){0,2}\\)? *| *\\[\\d+(?:\\/\\d+)?\\] *| *«\\d+» *|\\d+\\/\\d+(?:\\/\\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\\]…ـ¬.\\\\/*()\"]/g,\n ' ',\n );\n};\n\n/**\n * Removes trailing page numbers formatted as '-[46]-' from the text.\n * Example: 'This is some -[46]- text' becomes 'This is some text'.\n * @param {string} text - The input text with trailing page numbers.\n * @returns {string} - The modified text with page numbers removed.\n */\nexport const cleanTrailingPageNumbers = (text: string) => {\n return text.replace(/-\\[\\d+\\]-/g, '');\n};\n\n/**\n * Replaces consecutive line breaks and whitespace characters with a single space.\n * Example: 'a\\nb' becomes 'a b'.\n * @param {string} text - The input text containing line breaks or multiple spaces.\n * @returns {string} - The modified text with spaces.\n */\nexport const replaceLineBreaksWithSpaces = (text: string) => {\n return text.replace(/\\s+/g, ' ');\n};\n\n/**\n * Removes all numeric digits from the text.\n * Example: 'abc123' becomes 'abc'.\n * @param {string} text - The input text containing digits.\n * @returns {string} - The modified text with digits removed.\n */\nexport const stripAllDigits = (text: string) => {\n return text.replace(/[0-9]/g, '');\n};\n\n/**\n * Removes death year references like \"(d. 390H)\" and \"[d. 100h]\" from the text.\n * Example: 'Sufyān ibn ‘Uyaynah (d. 198h)' becomes 'Sufyān ibn ‘Uyaynah'.\n * @param {string} text - The input text containing death year references.\n * @returns {string} - The modified text with death years removed.\n */\nexport const removeDeathYear = (text: string) => {\n return text.replace(/\\[(d)\\.\\s*\\d{1,4}[hH]\\]\\s*|\\((d)\\.\\s*\\d{1,4}[hH]\\)\\s*/g, '');\n};\n\n/**\n * Removes numeric digits and dashes from the text.\n * Example: 'ABC 123-Xyz' becomes 'ABC Xyz'.\n * @param {string} text - The input text containing digits and dashes.\n * @returns {string} - The modified text with numbers and dashes removed.\n */\nexport const removeNumbersAndDashes = (text: string) => {\n return text.replace(/[\\d-]/g, '');\n};\n\n/**\n * Removes single digit references like (1), «2», [3] from the text.\n * Example: 'Ref (1), Ref «2», Ref [3]' becomes 'Ref , Ref , Ref '.\n * @param {string} text - The input text containing single digit references.\n * @returns {string} - The modified text with single digit references removed.\n */\nexport const removeSingleDigitReferences = (text: string) => {\n return text.replace(/\\(\\d{1}\\)|\\[\\d{1}\\]|«\\d»/g, '');\n};\n\n/**\n * Removes URLs from the text.\n * Example: 'Visit https://example.com' becomes 'Visit '.\n * @param {string} text - The input text containing URLs.\n * @returns {string} - The modified text with URLs removed.\n */\nexport const removeUrls = (text: string) => {\n return text.replace(\n /https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,\n '',\n );\n};\n\n/**\n * Removes common Markdown formatting syntax from text\n * @param text - The input text containing Markdown formatting\n * @returns Text with Markdown formatting removed (bold, italics, headers, lists, backticks)\n */\nexport const removeMarkdownFormatting = (text: string) => {\n return (\n text\n // Remove bold first (**text**) - must come before italics\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n // Remove bold with underscores (__text__)\n .replace(/__([^_]+)__/g, '$1')\n // Remove italics (*text*)\n .replace(/\\*([^*]+)\\*/g, '$1')\n // Remove italics with underscores (_text_)\n .replace(/_([^_]+)_/g, '$1')\n // Remove strikethrough (~~text~~)\n .replace(/~~([^~]+)~~/g, '$1')\n // Remove blockquotes\n .replace(/^\\s*>\\s?/gm, '')\n // Remove images ![alt](url)\n .replace(/!\\[[^\\]]*]\\([^)]*\\)/g, '')\n // Convert links [text](url) -> text\n .replace(/\\[([^\\]]+)]\\([^)]*\\)/g, '$1')\n // Remove headers (# ## ### etc.)\n .replace(/^#+\\s*/gm, '')\n // Remove unordered list markers (- * +)\n .replace(/^\\s*[-*+]\\s+/gm, '')\n // Remove ordered list markers (1. 2. etc.)\n .replace(/^\\s*\\d+\\.\\s+/gm, '')\n // Remove backticks\n .replace(/`/gm, '')\n );\n};\n\n/**\n * Truncates a string to a specified length, adding an ellipsis if truncated.\n *\n * @param val - The string to truncate\n * @param n - Maximum length of the string (default: 150)\n * @returns The truncated string with ellipsis if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncate('The quick brown fox jumps over the lazy dog', 20);\n * // Output: 'The quick brown fox…'\n *\n * truncate('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncate = (val: string, n = 150): string => (val.length > n ? `${val.substring(0, n - 1)}…` : val);\n\n/**\n * Truncates a string from the middle, preserving both the beginning and end portions.\n *\n * @param text - The string to truncate\n * @param maxLength - Maximum length of the resulting string (default: 50)\n * @param endLength - Number of characters to preserve at the end (default: 1/3 of maxLength, minimum 3)\n * @returns The truncated string with ellipsis in the middle if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 20);\n * // Output: 'The quick bro…zy dog'\n *\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 25, 8);\n * // Output: 'The quick brown …lazy dog'\n *\n * truncateMiddle('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncateMiddle = (text: string, maxLength: number = 50, endLength?: number) => {\n if (text.length <= maxLength) {\n return text;\n }\n\n // Default end length is roughly 1/3 of max length, minimum 3 characters\n const defaultEndLength = Math.max(3, Math.floor(maxLength / 3));\n const actualEndLength = endLength ?? defaultEndLength;\n\n // Reserve space for the ellipsis character (1 char)\n const availableLength = maxLength - 1;\n\n // Calculate start length (remaining space after end portion)\n const startLength = availableLength - actualEndLength;\n\n // Ensure we have at least some characters at the start\n if (startLength < 1) {\n // If we can't fit both start and end, just truncate normally\n return `${text.substring(0, maxLength - 1)}…`;\n }\n\n const startPortion = text.substring(0, startLength);\n const endPortion = text.substring(text.length - actualEndLength);\n\n return `${startPortion}…${endPortion}`;\n};\n\n/**\n * Unescapes backslash-escaped spaces and trims whitespace from both ends.\n * Commonly used to clean file paths that have been escaped when pasted into terminals.\n *\n * @param input - The string to unescape and clean\n * @returns The cleaned string with escaped spaces converted to regular spaces and trimmed\n *\n * @example\n * ```javascript\n * unescapeSpaces('My\\\\ Folder\\\\ Name');\n * // Output: 'My Folder Name'\n *\n * unescapeSpaces(' /path/to/My\\\\ Document.txt ');\n * // Output: '/path/to/My Document.txt'\n *\n * unescapeSpaces('regular text');\n * // Output: 'regular text'\n * ```\n */\nexport const unescapeSpaces = (input: string) => input.replace(/\\\\ /g, ' ').trim();\n\n/**\n * Arabic diacritics (Tashkeel/Harakat).\n */\nconst DIACRITICS_CLASS = '[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652]';\n\n/**\n * Groups of equivalent Arabic characters — any character in a group should match\n * any other character in the same group.\n */\nconst EQUIV_GROUPS: string[][] = [\n ['\\u0627', '\\u0622', '\\u0623', '\\u0625'], // ا, آ, أ, إ\n ['\\u0629', '\\u0647'], // ة <-> ه\n ['\\u0649', '\\u064A'], // ى <-> ي\n];\n\n/** Return a character class for a char if it belongs to an equivalence group. */\nconst getEquivClass = (ch: string): string => {\n for (const group of EQUIV_GROUPS) {\n if (group.includes(ch)) {\n // join the group's members into a character class\n return `[${group.map((c) => escapeRegex(c)).join('')}]`;\n }\n }\n // not in equivalence groups -> return escaped character\n return escapeRegex(ch);\n};\n\n/** Small safe normalization: NFC, remove ZWJ/ZWNJ, collapse spaces. */\nconst normalizeArabicLight = (str: string) => {\n return str\n .normalize('NFC')\n .replace(/[\\u200C\\u200D]/g, '') // remove ZWJ/ZWNJ\n .replace(/\\s+/g, ' ')\n .trim();\n};\n\n/**\n * Creates a diacritic-insensitive regex pattern for Arabic text matching.\n * Normalizes text, handles character equivalences (ا/آ/أ/إ, ة/ه, ى/ي),\n * and makes each character tolerant of Arabic diacritics (Tashkeel/Harakat)\n * @param text - Input Arabic text to make diacritic-insensitive\n * @returns Regex pattern string that matches the text with or without diacritics and character variants\n */\nexport const makeDiacriticInsensitive = (text: string) => {\n const diacriticsMatcher = `${DIACRITICS_CLASS}*`;\n const norm = normalizeArabicLight(text);\n // Use Array.from to iterate grapheme-safe over the string (works fine for Arabic letters)\n return Array.from(norm)\n .map((ch) => getEquivClass(ch) + diacriticsMatcher)\n .join('');\n};\n","import { normalizeSpaces } from './formatting';\n\n/**\n * Replaces common Arabic prefixes (like 'Al-', 'Ar-', 'Ash-', etc.) with 'al-' in the text.\n * Handles different variations of prefixes such as Ash- and Al- but not when the second word\n * does not start with 'S'.\n * Example: 'Ash-Shafiee' becomes 'al-Shafiee'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with standardized 'al-' prefixes.\n */\nexport const normalizeArabicPrefixesToAl = (text: string) => {\n return text\n .replace(/(\\b|\\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g, '$1al-')\n .replace(/(\\b|\\W)(Ash-S|ash-S)/g, '$1al-S')\n .replace(/al- (.+?)\\b/g, 'al-$1');\n};\n\n/**\n * Removes double occurrences of Arabic apostrophes such as ʿʿ or ʾʾ in the text.\n * Example: 'ʿulamāʾʾ' becomes 'ʿulamāʾ'.\n *\n * @param {string} text - The input text containing double apostrophes.\n * @returns {string} - The modified text with condensed apostrophes.\n */\nexport const normalizeDoubleApostrophes = (text: string) => {\n return text.replace(/ʿʿ/g, 'ʿ').replace(/ʾʾ/g, 'ʾ');\n};\n\n/**\n * Replaces common salutations such as \"sallahu alayhi wasallam\" with \"ﷺ\" in the text.\n * It also handles variations of the salutation phrase, including 'peace and blessings be upon him'.\n * Example: 'Then Muḥammad (sallahu alayhi wasallam)' becomes 'Then Muḥammad ﷺ'.\n *\n * @param {string} text - The input text containing salutations.\n * @returns {string} - The modified text with salutations replaced.\n */\nexport const replaceSalutationsWithSymbol = (text: string) => {\n return text\n .replace(\n /\\(peace be upon him\\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\\)*/gi,\n '$1 ﷺ',\n )\n .replace(/,\\s*ﷺ\\s*,/g, ' ﷺ');\n};\n\n/**\n * Normalizes the text by removing diacritics, apostrophes, and dashes.\n * Example: 'Al-Jadwal' becomes 'AlJadwal'.\n *\n * @param {string} input - The input text to normalize.\n * @returns {string} - The normalized text.\n */\nexport const normalize = (input: string) => {\n return input\n .normalize('NFKD')\n .replace(/[\\u0300-\\u036f]/g, '')\n .replace(/`|ʾ|ʿ|-/g, '');\n};\n\n/**\n * Strips common Arabic prefixes like 'al-', 'bi-', 'fī', 'wa-', etc. from the beginning of words.\n * Example: 'al-Bukhari' becomes 'Bukhari'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with prefixes stripped.\n */\nexport const removeArabicPrefixes = (text: string) => {\n return normalizeSpaces(text.replace(/(\\bal-|\\bli-|\\bbi-|\\bfī|\\bwa[-\\s]+|\\bl-|\\bliʿl|\\Bʿalá|\\Bʿan|\\bb\\.)/gi, ''));\n};\n\n/**\n * Simplifies English transliterations by removing diacritics, apostrophes, and common prefixes.\n * Example: 'Al-Jadwal' becomes 'Jadwal', and 'āḍġḥīṣṭū' becomes 'adghistu'.\n *\n * @param {string} text - The input text to simplify.\n * @returns {string} - The simplified text.\n */\nexport const normalizeTransliteratedEnglish = (text: string) => normalize(removeArabicPrefixes(text));\n\n/**\n * Extracts the initials from the input string, typically used for names or titles.\n * Example: 'Nayl al-Awtar' becomes 'NA'.\n *\n * @param {string} text - The input text to extract initials from.\n * @returns {string} - The extracted initials.\n */\nexport const extractInitials = (fullName: string) => {\n const initials = normalizeTransliteratedEnglish(fullName)\n .trim()\n .split(/[ -]/)\n .slice(0, 2)\n .map((word) => {\n return word.charAt(0).toUpperCase();\n })\n .join('');\n return initials;\n};\n"],"mappings":"AAuBO,IAAMA,GAAyBC,GAC3B,SACHA,EAAO,QAAQ,mBAAqBC,IAAOA,EAAE,WAAW,CAAC,EAAI,MAAQ,SAAS,CAAC,EAC/E,EACJ,EAUSC,GAAiCC,GACnCA,EAAK,QAAQ,qCAAsC,EAAE,EASnDC,GAA8BD,GAChCA,EAAK,QAAQ,KAAM,QAAG,EAAE,QAAQ,KAAM,QAAG,EASvCE,GAAkBF,GAAiB,CAC5C,GAAI,CAACA,EACD,MAAO,GAGX,IAAMG,EAAuB,uEAEvBC,EAAkB,mCAElBC,EAAsB,sCACtBC,EAAUN,EAAK,QAAQI,EAAiB,EAAE,EAC1CG,EAAgBD,EAAQ,MAAMH,CAAoB,GAAK,CAAC,EACxDK,EAAeF,EAAQ,MAAMD,CAAmB,GAAK,CAAC,EAC5D,OAAOG,EAAa,SAAW,EAAI,EAAID,EAAc,OAASC,EAAa,MAC/E,EASaC,GAAkBT,GACpBA,EAAK,QAAQ,OAAQ,SAAI,EASvBU,GAAuCV,GACzCA,EAAK,QAAQ,2BAA4B,OAAO,EAS9CW,GAA4BX,GAC9BA,EACF,QAAQ,iEAAkE,GAAG,EAC7E,QAAQ,8DAA+D,GAAG,EAStEY,GAAuBZ,GACzBA,EAAK,QAAQ,2CAA4C,EAAE,EASzDa,GAA+Bb,GACjCA,EAAK,QAAQ,6BAA8B,GAAG,EAS5Cc,GAAuCd,GACzCA,EACF,QAAQ,UAAW,QAAG,EACtB,QAAQ,oBAAqB,QAAG,EAChC,QAAQ,QAAS,QAAG,EChD7B,IAAMe,EAAY,OACZC,EAAa,UACbC,EAAgB,mDAChBC,EAAmB,UACnBC,EAAmB,UACnBC,EAAiB,UACjBC,EAAgB,mDAChBC,EAAuB,qDACvBC,EAAwB,kFACxBC,EAA0B,oFAK1BC,EAAgBC,GAA0BA,IAAS,GAKnDC,EAAoBD,GAA2BA,GAAQ,IAAMA,GAAQ,IAAQA,GAAQ,MAAUA,GAAQ,KAMvGE,EAAuBC,GACzBA,EAAE,QAAQb,EAAY,CAACc,EAAIC,EAAWC,IAAgB,CAClD,IAAIC,EAAIF,EAAI,EACZ,KAAOE,GAAK,GAAKR,EAAaO,EAAI,WAAWC,CAAC,CAAC,GAC3CA,IAEJ,GAAIA,GAAK,EAAG,CACR,IAAMC,EAAOF,EAAI,WAAWC,CAAC,EAC7B,GAAIN,EAAiBO,CAAI,GAAKA,IAAS,KACnC,MAAO,QAEf,CACA,MAAO,EACX,CAAC,EAKCC,EAAyBN,GAC3BA,EAAE,QAAQ,sFAAuF,IAAI,EAKnGO,EAAwB,CAACP,EAAWQ,IAA6BA,GAAUR,EAAE,UAAYA,EAAE,UAAU,KAAK,EAAIA,EAK9GS,EAA0B,CAACT,EAAWQ,EAAiBE,IACzDF,EAASR,EAAE,QAAQR,EAAekB,EAAU,IAAM,EAAE,EAAIV,EAKtDW,EAA6B,CAC/BX,EACAY,EACAC,KAEID,IACAZ,EAAIA,EAAE,QAAQZ,EAAe,EAAE,GAE/ByB,IAAgB,OACTd,EAAoBC,CAAC,EAE5Ba,IAAgB,MACTb,EAAE,QAAQb,EAAY,EAAE,EAE5Ba,GAMLc,EAAyB,CAC3Bd,EACAe,EACAC,EACAC,KAEIF,IACAf,EAAIA,EAAE,QAAQX,EAAkB,QAAG,GAEnC2B,IACAhB,EAAIA,EAAE,QAAQV,EAAkB,QAAG,GAEnC2B,IACAjB,EAAIA,EAAE,QAAQT,EAAgB,QAAG,GAE9BS,GAMLkB,EAA4B,CAAClB,EAAWQ,IAC1CA,EAASR,EAAE,QAAQP,EAAsB,GAAG,EAAIO,EAO9CmB,EAAqB,CAACnB,EAAWoB,EAA+BC,IAC9DD,EACOpB,EAAE,QAAQL,EAAyB,GAAG,EAE7C0B,EACOrB,EAAE,QAAQN,EAAuB,EAAE,EAEvCM,EAMLsB,EAAsB,CAACtB,EAAWuB,EAAmBC,KACnDD,IACAvB,EAAIA,EAAE,QAAQd,EAAW,GAAG,GAE5BsC,IACAxB,EAAIA,EAAE,KAAK,GAERA,GAMLyB,EAAiB,CAACC,EAAsBC,IAC1CA,IAAa,OAAYD,EAAc,CAAC,CAACC,EAMvCC,EAAqB,CACvBF,EACAC,IAEIA,IAAa,OACND,EAEPC,IAAa,GACN,OAEPA,IAAa,GACN,GAEJA,EAGLE,EAAiD,CACnD,MAAO,CACH,IAAK,GACL,eAAgB,GAChB,iBAAkB,GAClB,gBAAiB,GACjB,aAAc,GACd,cAAe,GACf,oBAAqB,GACrB,wBAAyB,GACzB,qBAAsB,GACtB,sBAAuB,GACvB,qBAAsB,GACtB,mBAAoB,GACpB,KAAM,GACN,kBAAmB,EACvB,EACA,OAAQ,CACJ,IAAK,GACL,eAAgB,GAChB,iBAAkB,GAClB,gBAAiB,GACjB,aAAc,MACd,cAAe,GACf,oBAAqB,GACrB,wBAAyB,GACzB,qBAAsB,GACtB,sBAAuB,GACvB,qBAAsB,GACtB,mBAAoB,GACpB,KAAM,GACN,kBAAmB,EACvB,EACA,WAAY,CACR,IAAK,GACL,eAAgB,GAChB,iBAAkB,GAClB,gBAAiB,GACjB,aAAc,MACd,cAAe,GACf,oBAAqB,GACrB,wBAAyB,GACzB,qBAAsB,GACtB,sBAAuB,GACvB,qBAAsB,GACtB,mBAAoB,GACpB,KAAM,GACN,kBAAmB,EACvB,CACJ,EAEMC,EAA6B,CAC/B,IAAK,GACL,eAAgB,GAChB,iBAAkB,GAClB,gBAAiB,GACjB,aAAc,GACd,cAAe,GACf,oBAAqB,GACrB,wBAAyB,GACzB,qBAAsB,GACtB,sBAAuB,GACvB,qBAAsB,GACtB,mBAAoB,GACpB,KAAM,GACN,kBAAmB,EACvB,EAqBaC,GAAiB,CAACC,EAAeC,EAAoD,WAAqB,CACnH,GAAI,CAACD,EACD,MAAO,GAGX,IAAIE,EACAC,EAA+B,KAEnC,GAAI,OAAOF,GAAoB,SAC3BC,EAASL,EAAQI,CAAe,MAC7B,CACH,IAAMG,EAAOH,EAAgB,MAAQ,QACrCC,EAASE,IAAS,OAASN,EAAcD,EAAQO,CAAI,EACrDD,EAAOF,CACX,CAEA,IAAMI,EAAMZ,EAAeS,EAAO,IAAKC,GAAM,GAAG,EAC1CG,EAAUb,EAAeS,EAAO,eAAgBC,GAAM,cAAc,EACpEI,EAAYd,EAAeS,EAAO,iBAAkBC,GAAM,gBAAgB,EAC1EK,EAAYf,EAAeS,EAAO,gBAAiBC,GAAM,eAAe,EACxEM,EAAWhB,EAAeS,EAAO,cAAeC,GAAM,aAAa,EACnEO,EAAUjB,EAAeS,EAAO,oBAAqBC,GAAM,mBAAmB,EAC9EQ,EAASlB,EAAeS,EAAO,wBAAyBC,GAAM,uBAAuB,EACrFS,EAAanB,EAAeS,EAAO,qBAAsBC,GAAM,oBAAoB,EACnFU,EAAoBpB,EAAeS,EAAO,qBAAsBC,GAAM,oBAAoB,EAC1Fd,EAAcI,EAAeS,EAAO,sBAAuBC,GAAM,qBAAqB,EACtFW,EAAarB,EAAeS,EAAO,mBAAoBC,GAAM,kBAAkB,EAC/EX,EAASC,EAAeS,EAAO,KAAMC,GAAM,IAAI,EAC/CY,EAActB,EAAeS,EAAO,kBAAmBC,GAAM,iBAAiB,EAC9EtB,EAAce,EAAmBM,EAAO,aAAcC,GAAM,YAAY,EAE1EnC,EAAIgC,EACR,OAAAhC,EAAIO,EAAsBP,EAAGqC,CAAG,EAChCrC,EAAIS,EAAwBT,EAAGsC,EAASC,CAAS,EAC7CQ,IACA/C,EAAIM,EAAsBN,CAAC,GAE/BA,EAAIW,EAA2BX,EAAGwC,EAAW3B,CAAW,EACxDb,EAAIc,EAAuBd,EAAGyC,EAAUC,EAASC,CAAM,EAElDE,IACD7C,EAAIkB,EAA0BlB,EAAG4C,CAAU,GAE/C5C,EAAImB,EAAmBnB,EAAG6C,EAAmBxB,CAAW,EAExDrB,EAAIsB,EAAoBtB,EAAG8C,EAAYtB,CAAM,EAEtCxB,CACX,EAGMgD,EAAmB,yDAEnBC,EAAgB,UAQTC,EAAelD,GAAsBA,EAAE,QAAQ,sBAAuB,MAAM,EA+D5EmD,GAAgC,CAACC,EAAgBjB,EAAyB,CAAC,IAAc,CAClG,GAAM,CACF,aAAAkB,EAAe,CAAE,KAAM,GAAM,aAAc,GAAM,eAAgB,EAAK,EACtE,aAAAC,EAAe,GACf,iBAAAC,EAAmB,GACnB,eAAAC,EAAiB,GACjB,MAAAC,EAAQ,GACZ,EAAItB,EAGJ,GAAIiB,EAAO,OAAS,IAChB,MAAM,IAAI,MAAM,gDAAgD,EAGpE,IAAMM,EAAaC,GAAuB,CACtC,OAAQA,EAAI,CACR,IAAK,SACL,IAAK,SACL,IAAK,SACL,IAAK,SACD,OAAON,EAAa,KAAO,6BAAW,SAC1C,IAAK,SACL,IAAK,SACD,OAAOA,EAAa,aAAe,iBAASH,EAAYS,CAAE,EAC9D,IAAK,SACL,IAAK,SACD,OAAON,EAAa,eAAiB,iBAASH,EAAYS,CAAE,EAChE,QACI,OAAOT,EAAYS,CAAE,CAC7B,CACJ,EAEMC,EAAQ,GAAGL,EAAmB,GAAGP,CAAgB,IAAM,EAAE,GAAGM,EAAe,GAAGL,CAAa,IAAM,EAAE,GAErGY,EAAU,GACd,QAAWF,KAAM,MAAM,KAAKP,CAAM,EAC1B,KAAK,KAAKO,CAAE,EACZE,GAAWL,EAAiB,OAAS,OAErCK,GAAW,GAAGH,EAAUC,CAAE,CAAC,GAAGC,CAAK,GAI3C,OAAO,IAAI,OAAOC,EAASJ,CAAK,CACpC,EC/eO,IAAMK,GAAoCC,GAAiB,CAE9D,IAAMC,EAAc,YAKpB,OAFsBD,EAAK,QAAQC,EAAa;AAAA,CAAM,EAAE,QAAQ,SAAU;AAAA,CAAI,EAAE,KAAK,CAGzF,EAQaC,GAAqCF,GACvCA,EACF,QAAQ,iDAAkD,OAAO,EACjE,QAAQ,mCAAoC,MAAM,EAClD,QAAQ,uDAAwD,QAAQ,EACxE,QAAQ,yEAA0E,IAAI,EAUlFG,GAAoBH,GACtBA,EACF,QAAQ,QAAS,GAAG,EACpB,QAAQ,aAAc,gBAAM,EAC5B,QAAQ,MAAO,QAAG,EASdI,GAAwBJ,GAC1BA,EAAK,QAAQ,UAAW;AAAA,CAAI,EAS1BK,GAAmBL,GACrBA,EAAK,QAAQ,YAAa,EAAE,EAQ1BM,GAAuBN,GACzB,gBAAgB,KAAKA,CAAI,EAQvBO,GAAqBP,GAChB,kEACD,KAAKA,CAAI,EASbQ,GAA2BR,GAC7BA,EAAK,QAAQ,mBAAoB,IAAI,EASnCS,GAAqBT,GACvBA,EAAK,QAAQ,YAAa,GAAG,EAS3BU,GAAkBV,GACpBA,EAAK,QAAQ,eAAgB,GAAG,EAS9BW,GAAkBX,GACpBA,EAAK,QAAQ,SAAU,GAAG,EASxBY,GAAoBZ,GACtBA,EAAK,QAAQ,UAAW,QAAG,EASzBa,GAAiCb,GACnCA,EAAK,QAAQ,eAAgB;AAAA;AAAA,CAAM,EASjCc,GAAiCd,GACnCA,EAAK,QAAQ,eAAgB;AAAA,CAAI,EAS/Be,GAAmBf,GACrBA,EAAK,QAAQ,UAAW,GAAG,EASzBgB,GAAuBhB,GACzBA,EAAK,QAAQ,SAAU,QAAG,EAAE,QAAQ,MAAO,GAAG,EAS5CiB,GAA0BjB,GAC5BA,EAAK,QAAQ,2BAA4B,MAAM,EAS7CkB,GAA6BlB,GAC/BA,EAAK,QAAQ,qBAAsB,OAAO,EASxCmB,GAA2BnB,GAC7BA,EAAK,QAAQ,mBAAoB,OAAO,EAStCoB,GAAmBpB,GAExBA,EACK,QAAQ,aAAc,MAAG,EACzB,QAAQ,aAAc,MAAG,EAEzB,QAAQ,6BAA8B,MAAM,EAE5C,QAAQ,6BAA8B,MAAM,EAU5CqB,GAAkBrB,GAAiB,CAE5C,IAAIsB,EAAStB,EAGb,OAAAsB,EAASA,EAAO,QAAQ,kBAAmB,MAAM,EAG1CA,EAAO,QAAQ,kBAAmB,MAAM,CACnD,EAQaC,GAA+BvB,GAEpCA,EAEK,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,sBAAuB,YAAM,EAWrCwB,GAA0BC,GAAkB,CACrD,IAAMC,EAAgB,mCAChBC,EAAsB,CAAC,EACvBC,EAAQH,EAAM,MAAM;AAAA,CAAI,EAC1BI,EAAkB,GAEtB,OAAAD,EAAM,QAASE,GAAS,CACpB,IAAMC,EAAcD,EAAK,KAAK,EACxBE,EAAaN,EAAc,KAAKK,CAAW,EAC3CE,EAAW,gBAAgB,KAAKF,CAAW,EAEjD,GAAIC,GAAc,CAACC,EACXJ,IACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,IAEtBF,EAAU,KAAKI,CAAW,MACvB,CACHF,GAAmB,GAAGE,CAAW,IACjC,IAAMG,EAAWL,EAAgB,KAAK,EAAE,MAAM,EAAE,EAC5C,QAAQ,KAAKK,CAAQ,IACrBP,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,GAE1B,CACJ,CAAC,EAGGA,GACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EAGlCF,EAAU,KAAK;AAAA,CAAI,CAC9B,EAOaQ,GAAkBnC,GAAiB,CAG5C,IAAMoC,EAAcpC,EAAK,QAAQ,aAAc,EAAE,EAGjD,OAAIoC,EAAY,SAAW,EAChB,GAGJA,IAAgBA,EAAY,YAAY,CACnD,EAQaC,GAA8BrC,GAChCA,EAAK,QAAQ,sBAAuB,OAAO,EASzCsC,EAAmBtC,GACrBA,EAAK,QAAQ,UAAW,GAAG,EAmBzBuC,GAA8BvC,GAChCA,EAAK,QAAQ,cAAe,IAAI,EAS9BwC,GAA6BxC,GAC/BA,EAAK,QAAQ,4BAA6B,QAAQ,EAShDyC,GAAmCzC,GACrCA,EAAK,QAAQ,WAAY,MAAG,EAAE,QAAQ,WAAY,MAAG,EAQnD0C,EAAoB1C,GAEJA,EAAK,UAAU,MAAM,EAGtB,QAAQ,mBAAoB,EAAE,EAAE,KAAK,EASpD2C,EAAuB3C,GAAiB,CACjD,IAAM4C,EAAoC,CACtC,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,GACpB,EAEA,OAAO5C,EAAK,QAAQ,4EAA8E6C,GACvFD,EAAUC,CAAK,GAAKA,CAC9B,CACL,EAOaC,GAAgB9C,GAClB2C,EAAoBD,EAAiB1C,CAAI,CAAC,EAQxC+C,GAAeC,GACjBA,EACF,YAAY,EACZ,MAAM,GAAG,EACT,IAAKC,GAAS,CACX,GAAIA,EAAK,SAAW,EAAG,OAAOA,EAE9B,IAAMJ,EAAQI,EAAK,MAAM,QAAQ,EACjC,GAAI,CAACJ,GAASA,EAAM,QAAU,OAAW,OAAOI,EAChD,IAAMC,EAAIL,EAAM,MAChB,OAAOI,EAAK,MAAM,EAAGC,CAAC,EAAID,EAAK,OAAOC,CAAC,EAAE,YAAY,EAAID,EAAK,MAAMC,EAAI,CAAC,CAC7E,CAAC,EACA,KAAK,GAAG,EASJC,GAAyBnD,GAC3BA,EAAK,QAAQ,+BAAgC,QAAQ,ECrezD,IAAMoD,GAAuBC,GAAgB,CAChD,IAAIC,EAAQD,EAAI,QAAQ,kBAAmB,MAAM,EACjD,OAAAC,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAChDA,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAEzC,KAAK,UAAU,KAAK,MAAMA,CAAK,CAAC,CAC3C,EAcaC,GAAwBF,GAG7B,6GACmB,KAAKA,EAAI,KAAK,CAAC,EAiB7BG,GAAiBC,GAA4B,CACtD,IAAMC,EAAQ,wBACd,OAAQD,EAAM,MAAMC,CAAK,GAAK,CAAC,GAAG,IAAKC,GAAeA,EAAE,WAAW,GAAG,EAAIA,EAAE,MAAM,EAAG,EAAE,EAAIA,CAAE,CACjG,EAgBMC,EAAqBP,GAAgB,CACvC,IAAIQ,EAAa,EACjB,QAAWC,KAAQT,EACXS,IAAS,KACTD,IAGR,OAAOA,EAAa,IAAM,CAC9B,EAEME,EAAW,CAAE,IAAK,IAAK,IAAK,IAAK,IAAK,GAAI,EAC1CC,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EACtCC,GAAgB,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EAqBvCC,GAAuBb,GAAgB,CACzC,IAAMc,EAAkB,CAAC,EAEzB,QAAWL,KAAQT,EACf,GAAIW,EAAa,IAAIF,CAAI,EACrBK,EAAM,KAAKL,CAAI,UACRG,GAAc,IAAIH,CAAI,EAAG,CAChC,IAAMM,EAAWD,EAAM,IAAI,EAC3B,GAAI,CAACC,GAAYL,EAASK,CAAiC,IAAMN,EAC7D,MAAO,EAEf,CAGJ,OAAOK,EAAM,SAAW,CAC5B,EAsBaE,GAAchB,GAChBO,EAAkBP,CAAG,GAAKa,GAAoBb,CAAG,EAS/CiB,GAAmBC,GAAgC,CAC5D,GAAIA,EAAU,SAAS,GAAG,EAAG,CACzB,GAAM,CAACC,EAAOC,CAAG,EAAIF,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,EAEpD,GAAIC,EAAQC,EACR,MAAM,IAAI,MAAM,4CAA4C,EAGhE,OAAO,MAAM,KAAK,CAAE,OAAQA,EAAMD,EAAQ,CAAE,EAAG,CAACE,EAAGC,IAAMH,EAAQG,CAAC,CACtE,KACI,QAAOJ,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,CAE9C,EC5JO,IAAMK,GAAiCC,GACnCA,EAAK,QACR,wHACA,GACJ,EASSC,GAA4BD,GAC9BA,EAAK,QAAQ,aAAc,EAAE,EAS3BE,GAA+BF,GACjCA,EAAK,QAAQ,OAAQ,GAAG,EAStBG,GAAkBH,GACpBA,EAAK,QAAQ,SAAU,EAAE,EASvBI,GAAmBJ,GACrBA,EAAK,QAAQ,yDAA0D,EAAE,EASvEK,GAA0BL,GAC5BA,EAAK,QAAQ,SAAU,EAAE,EASvBM,GAA+BN,GACjCA,EAAK,QAAQ,4BAA6B,EAAE,EAS1CO,GAAcP,GAChBA,EAAK,QACR,uGACA,EACJ,EAQSQ,GAA4BR,GAEjCA,EAEK,QAAQ,mBAAoB,IAAI,EAEhC,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,IAAI,EAE1B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,EAAE,EAExB,QAAQ,uBAAwB,EAAE,EAElC,QAAQ,wBAAyB,IAAI,EAErC,QAAQ,WAAY,EAAE,EAEtB,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,MAAO,EAAE,EAoBjBS,GAAW,CAACC,EAAaC,EAAI,MAAiBD,EAAI,OAASC,EAAI,GAAGD,EAAI,UAAU,EAAGC,EAAI,CAAC,CAAC,SAAMD,EAsB/FE,GAAiB,CAACZ,EAAca,EAAoB,GAAIC,IAAuB,CACxF,GAAId,EAAK,QAAUa,EACf,OAAOb,EAIX,IAAMe,EAAmB,KAAK,IAAI,EAAG,KAAK,MAAMF,EAAY,CAAC,CAAC,EACxDG,EAAkBF,GAAaC,EAM/BE,EAHkBJ,EAAY,EAGEG,EAGtC,GAAIC,EAAc,EAEd,MAAO,GAAGjB,EAAK,UAAU,EAAGa,EAAY,CAAC,CAAC,SAG9C,IAAMK,EAAelB,EAAK,UAAU,EAAGiB,CAAW,EAC5CE,EAAanB,EAAK,UAAUA,EAAK,OAASgB,CAAe,EAE/D,MAAO,GAAGE,CAAY,SAAIC,CAAU,EACxC,EAqBaC,GAAkBC,GAAkBA,EAAM,QAAQ,OAAQ,GAAG,EAAE,KAAK,EAK3EC,GAAmB,qDAMnBC,GAA2B,CAC7B,CAAC,SAAU,SAAU,SAAU,QAAQ,EACvC,CAAC,SAAU,QAAQ,EACnB,CAAC,SAAU,QAAQ,CACvB,EAGMC,GAAiBC,GAAuB,CAC1C,QAAWC,KAASH,GAChB,GAAIG,EAAM,SAASD,CAAE,EAEjB,MAAO,IAAIC,EAAM,IAAKC,GAAMC,EAAYD,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,IAI5D,OAAOC,EAAYH,CAAE,CACzB,EAGMI,GAAwBC,GACnBA,EACF,UAAU,KAAK,EACf,QAAQ,kBAAmB,EAAE,EAC7B,QAAQ,OAAQ,GAAG,EACnB,KAAK,EAUDC,GAA4B/B,GAAiB,CACtD,IAAMgC,EAAoB,GAAGV,EAAgB,IACvCW,EAAOJ,GAAqB7B,CAAI,EAEtC,OAAO,MAAM,KAAKiC,CAAI,EACjB,IAAKR,GAAOD,GAAcC,CAAE,EAAIO,CAAiB,EACjD,KAAK,EAAE,CAChB,ECxPO,IAAME,GAA+BC,GACjCA,EACF,QAAQ,6EAA8E,OAAO,EAC7F,QAAQ,wBAAyB,QAAQ,EACzC,QAAQ,eAAgB,OAAO,EAU3BC,GAA8BD,GAChCA,EAAK,QAAQ,MAAO,QAAG,EAAE,QAAQ,MAAO,QAAG,EAWzCE,GAAgCF,GAClCA,EACF,QACG,2IACA,WACJ,EACC,QAAQ,aAAc,SAAI,EAUtBG,GAAaC,GACfA,EACF,UAAU,MAAM,EAChB,QAAQ,mBAAoB,EAAE,EAC9B,QAAQ,WAAY,EAAE,EAUlBC,GAAwBL,GAC1BM,EAAgBN,EAAK,QAAQ,uEAAwE,EAAE,CAAC,EAUtGO,GAAkCP,GAAiBG,GAAUE,GAAqBL,CAAI,CAAC,EASvFQ,GAAmBC,GACXF,GAA+BE,CAAQ,EACnD,KAAK,EACL,MAAM,MAAM,EACZ,MAAM,EAAG,CAAC,EACV,IAAKC,GACKA,EAAK,OAAO,CAAC,EAAE,YAAY,CACrC,EACA,KAAK,EAAE","names":["arabicNumeralToNumber","arabic","c","cleanExtremeArabicUnderscores","text","convertUrduSymbolsToArabic","getArabicScore","arabicLettersPattern","allDigitPattern","countedCharsPattern","cleaned","arabicMatches","totalMatches","fixTrailingWow","addSpaceBetweenArabicTextAndNumbers","removeNonIndexSignatures","removeSingularCodes","removeSolitaryArabicLetters","replaceEnglishPunctuationWithArabic","RX_SPACES","RX_TATWEEL","RX_DIACRITICS","RX_ALIF_VARIANTS","RX_ALIF_MAQSURAH","RX_TA_MARBUTAH","RX_ZERO_WIDTH","RX_LATIN_AND_SYMBOLS","RX_NON_ARABIC_LETTERS","RX_NOT_LETTERS_OR_SPACE","isAsciiSpace","code","isDigitCodePoint","removeTatweelSafely","s","_m","i","str","j","prev","removeHijriDateMarker","applyNfcNormalization","enable","removeZeroWidthControls","asSpace","removeDiacriticsAndTatweel","removeDiacritics","tatweelMode","applyCharacterMappings","normalizeAlif","maqsurahToYa","taMarbutahToHa","removeLatinAndSymbolNoise","applyLetterFilters","lettersAndSpacesOnly","lettersOnly","normalizeWhitespace","collapse","doTrim","resolveBoolean","presetValue","override","resolveTatweelMode","PRESETS","PRESET_NONE","sanitizeArabic","input","optionsOrPreset","preset","opts","base","nfc","stripZW","zwAsSpace","removeDia","normAlif","maqToYa","taToHa","stripNoise","lettersSpacesOnly","collapseWS","removeHijri","DIACRITICS_CLASS","TATWEEL_CLASS","escapeRegex","makeDiacriticInsensitiveRegex","needle","equivalences","allowTatweel","ignoreDiacritics","flexWhitespace","flags","charClass","ch","after","pattern","insertLineBreaksAfterPunctuation","text","punctuation","addSpaceBeforeAndAfterPunctuation","applySmartQuotes","cleanLiteralNewLines","cleanMultilines","hasWordInSingleLine","isOnlyPunctuation","cleanSpacesBeforePeriod","condenseAsterisks","condenseColons","condenseDashes","condenseEllipsis","reduceMultilineBreaksToDouble","reduceMultilineBreaksToSingle","condensePeriods","condenseUnderscores","doubleToSingleBrackets","ensureSpaceBeforeBrackets","ensureSpaceBeforeQuotes","fixBracketTypos","fixCurlyBraces","result","fixMismatchedQuotationMarks","formatStringBySentence","input","footnoteRegex","sentences","lines","currentSentence","line","trimmedLine","isFootnote","isNumber","lastChar","isAllUppercase","lettersOnly","normalizeSlashInReferences","normalizeSpaces","removeRedundantPunctuation","removeSpaceInsideBrackets","replaceDoubleBracketsWithArrows","stripBoldStyling","stripItalicsStyling","italicMap","match","stripStyling","toTitleCase","str","word","i","trimSpaceInsideQuotes","normalizeJsonSyntax","str","input","isJsonStructureValid","splitByQuotes","query","regex","s","areQuotesBalanced","quoteCount","char","brackets","openBrackets","closeBrackets","areBracketsBalanced","stack","lastOpen","isBalanced","parsePageRanges","pageInput","start","end","_","i","cleanSymbolsAndPartReferences","text","cleanTrailingPageNumbers","replaceLineBreaksWithSpaces","stripAllDigits","removeDeathYear","removeNumbersAndDashes","removeSingleDigitReferences","removeUrls","removeMarkdownFormatting","truncate","val","n","truncateMiddle","maxLength","endLength","defaultEndLength","actualEndLength","startLength","startPortion","endPortion","unescapeSpaces","input","DIACRITICS_CLASS","EQUIV_GROUPS","getEquivClass","ch","group","c","escapeRegex","normalizeArabicLight","str","makeDiacriticInsensitive","diacriticsMatcher","norm","normalizeArabicPrefixesToAl","text","normalizeDoubleApostrophes","replaceSalutationsWithSymbol","normalize","input","removeArabicPrefixes","normalizeSpaces","normalizeTransliteratedEnglish","extractInitials","fullName","word"]}
1
+ {"version":3,"sources":["../src/arabic.ts","../src/cleaning.ts","../src/formatting.ts","../src/parsing.ts","../src/sanitization.ts","../src/transliteration.ts"],"sourcesContent":["/**\n * Converts Arabic-Indic numerals (٠-٩) to a JavaScript number.\n *\n * This function finds all Arabic-Indic digits in the input string and converts them\n * to their corresponding Arabic (Western) digits, then parses the result as an integer.\n *\n * Arabic-Indic digits mapping:\n * - ٠ → 0, ١ → 1, ٢ → 2, ٣ → 3, ٤ → 4\n * - ٥ → 5, ٦ → 6, ٧ → 7, ٨ → 8, ٩ → 9\n *\n * @param arabic - The string containing Arabic-Indic numerals to convert\n * @returns The parsed integer value of the converted numerals\n *\n * @example\n * ```typescript\n * arabicNumeralToNumber(\"١٢٣\"); // returns 123\n * arabicNumeralToNumber(\"٥٠\"); // returns 50\n * arabicNumeralToNumber(\"abc١٢٣xyz\"); // returns 123 (non-digits ignored)\n * arabicNumeralToNumber(\"\"); // returns NaN\n * ```\n *\n * Returns NaN if no valid Arabic-Indic digits are found\n */\nexport const arabicNumeralToNumber = (arabic: string) => {\n return parseInt(\n arabic.replace(/[\\u0660-\\u0669]/g, (c) => (c.charCodeAt(0) - 0x0660).toString()),\n 10,\n );\n};\n\n/**\n * Removes extreme Arabic underscores (ـ) that appear at the beginning or end of a line or in text.\n * Does not affect Hijri dates (e.g., 1424هـ) or specific Arabic terms.\n * Example: \"ـThis is a textـ\" will be changed to \"This is a text\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with extreme underscores removed.\n */\nexport const cleanExtremeArabicUnderscores = (text: string) => {\n return text.replace(/(?<!\\d ?ه|اه)ـ(?=\\r?$)|^ـ(?!اهـ)/gm, '');\n};\n\n/**\n * Converts Urdu symbols to their Arabic equivalents.\n * Example: 'ھذا' will be changed to 'هذا', 'ی' to 'ي'.\n * @param {string} text - The input text containing Urdu symbols.\n * @returns {string} - The modified text with Urdu symbols converted to Arabic symbols.\n */\nexport const convertUrduSymbolsToArabic = (text: string) => {\n return text.replace(/ھ/g, 'ه').replace(/ی/g, 'ي');\n};\n\n/**\n * Calculates the proportion of Arabic characters in text relative to total non-whitespace, non-digit characters.\n * Digits (ASCII and Arabic-Indic variants) are excluded from both numerator and denominator.\n * @param text - The input text to analyze\n * @returns A decimal between 0-1 representing the Arabic character ratio (0 = no Arabic, 1 = all Arabic)\n */\nexport const getArabicScore = (text: string) => {\n if (!text) {\n return 0;\n }\n // Arabic letters (letters/ranges only)\n const arabicLettersPattern = /[\\u0600-\\u06FF\\u0750-\\u077F\\u08A0-\\u08FF\\uFB50-\\uFDFF\\uFE70-\\uFEFF]/g;\n // ASCII digits + Arabic-Indic digits + Extended Arabic-Indic digits\n const allDigitPattern = /[0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n // Counted characters exclude whitespace and all listed digits\n const countedCharsPattern = /[^\\s0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n const cleaned = text.replace(allDigitPattern, '');\n const arabicMatches = cleaned.match(arabicLettersPattern) || [];\n const totalMatches = cleaned.match(countedCharsPattern) || [];\n return totalMatches.length === 0 ? 0 : arabicMatches.length / totalMatches.length;\n};\n\n/**\n * Fixes the trailing \"و\" (wow) in phrases such as \"عليكم و رحمة\" to \"عليكم ورحمة\".\n * This function attempts to correct phrases where \"و\" appears unnecessarily, particularly in greetings.\n * Example: 'السلام عليكم و رحمة' will be changed to 'السلام عليكم ورحمة'.\n * @param {string} text - The input text containing the \"و\" character.\n * @returns {string} - The modified text with unnecessary trailing \"و\" characters corrected.\n */\nexport const fixTrailingWow = (text: string) => {\n return text.replace(/ و /g, ' و');\n};\n\n/**\n * Inserts a space between Arabic text and numbers.\n * Example: 'الآية37' will be changed to 'الآية 37'.\n * @param {string} text - The input text containing Arabic text followed by numbers.\n * @returns {string} - The modified text with spaces inserted between Arabic text and numbers.\n */\nexport const addSpaceBetweenArabicTextAndNumbers = (text: string) => {\n return text.replace(/([\\u0600-\\u06FF]+)(\\d+)/g, '$1 $2');\n};\n\n/**\n * Removes single-digit numbers surrounded by Arabic text. Also removes dashes (-) not followed by a number.\n * For example, removes '3' from 'وهب 3 وقال' but does not remove '121' from 'لوحه 121 الجرح'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with non-index numbers and dashes removed.\n */\nexport const removeNonIndexSignatures = (text: string) => {\n return text\n .replace(/(?<![0-9] ?)-|(?<=[\\u0600-\\u06FF])\\s?\\d\\s?(?=[\\u0600-\\u06FF])/g, ' ')\n .replace(/(?<=[\\u0600-\\u06FF]\\s)(\\d+\\s)+\\d+(?=(\\s[\\u0600-\\u06FF]|$))/g, ' ');\n};\n\n/**\n * Removes characters enclosed in square brackets [] or parentheses () if they are Arabic letters or Arabic-Indic numerals.\n * Example: '[س]' or '(س)' will be removed.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with singular codes removed.\n */\nexport const removeSingularCodes = (text: string) => {\n return text.replace(/[[({][\\u0621-\\u064A\\u0660-\\u0669][\\])}]/g, '');\n};\n\n/**\n * Removes solitary Arabic letters unless they are the 'ha' letter, which is used in Hijri years.\n * Example: \"ب ا الكلمات ت\" will be changed to \"ا الكلمات\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with solitary Arabic letters removed.\n */\nexport const removeSolitaryArabicLetters = (text: string) => {\n return text.replace(/(^| )[\\u0621-\\u064A]( |$)/g, ' ');\n};\n\n/**\n * Replaces English punctuation (question mark and semicolon) with their Arabic equivalents.\n * Example: '?' will be replaced with '؟', and ';' with '؛'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.\n */\nexport const replaceEnglishPunctuationWithArabic = (text: string) => {\n return text\n .replace(/\\?|؟\\./g, '؟')\n .replace(/(;|؛)\\s*(\\1\\s*)*/g, '؛')\n .replace(/,|-،/g, '،');\n};\n","/** Character class for Arabic diacritics (tashkīl/harakāt). */\nconst DIACRITICS_CLASS = '[\\\\u0610-\\\\u061A\\\\u064B-\\\\u065F\\\\u0670\\\\u06D6-\\\\u06ED]';\n/** Tatweel (kashīda) class. */\nconst TATWEEL_CLASS = '\\\\u0640';\n\n/**\n * Escape a string so it can be safely embedded into a RegExp source.\n *\n * @param s Any string\n * @returns Escaped string\n */\nexport const escapeRegex = (s: string): string => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n\n/** Optional equivalence toggles for {@link makeDiacriticInsensitiveRegex}. */\ntype EquivOptions = {\n /** Treat ا/أ/إ/آ as equivalent. @default true */\n alif?: boolean;\n /** Treat ة/ه as equivalent. @default true */\n taMarbutahHa?: boolean;\n /** Treat ى/ي as equivalent. @default true */\n alifMaqsurahYa?: boolean;\n};\n\n/** Options for {@link makeDiacriticInsensitiveRegex}. */\nexport type MakeRegexOptions = {\n /**\n * Character equivalences to allow.\n * @default { alif: true, taMarbutahHa: true, alifMaqsurahYa: true }\n */\n equivalences?: EquivOptions;\n\n /**\n * Allow tatweel between letters (tolerate decorative elongation).\n * @default true\n */\n allowTatweel?: boolean;\n\n /**\n * Ignore diacritics by inserting a `DIACRITICS_CLASS*` after each letter.\n * @default true\n */\n ignoreDiacritics?: boolean;\n\n /**\n * Treat any whitespace in the needle as `\\s+` for flexible matching.\n * @default true\n */\n flexWhitespace?: boolean;\n\n /**\n * RegExp flags to use.\n * @default 'u'\n */\n flags?: string;\n};\n\n/**\n * Build a **diacritic-insensitive**, **tatweel-tolerant** RegExp for Arabic text matching.\n *\n * Features:\n * - Optional character equivalences: ا~أ~إ~آ, ة~ه, ى~ي.\n * - Optional tolerance for tatweel between characters.\n * - Optional diacritic-insensitivity (by inserting a diacritics class after each char).\n * - Optional flexible whitespace (needle whitespace becomes `\\s+`).\n *\n * @param needle The Arabic text to match\n * @param opts See {@link MakeRegexOptions}\n * @returns A `RegExp` matching the needle with the desired tolerances\n *\n * @example\n * const rx = makeDiacriticInsensitiveRegex('أنا إلى الآفاق');\n * rx.test('انا الي الافاق'); // true\n * rx.test('اَنا إلى الآفاق'); // true\n */\nexport const makeDiacriticInsensitiveRegex = (needle: string, opts: MakeRegexOptions = {}): RegExp => {\n const {\n equivalences = { alif: true, taMarbutahHa: true, alifMaqsurahYa: true },\n allowTatweel = true,\n ignoreDiacritics = true,\n flexWhitespace = true,\n flags = 'u',\n } = opts;\n\n // Safety guard against extremely large inputs causing excessive pattern sizes\n if (needle.length > 5000) {\n throw new Error('makeDiacriticInsensitiveRegex: needle too long');\n }\n\n const charClass = (ch: string): string => {\n switch (ch) {\n case 'ا':\n case 'أ':\n case 'إ':\n case 'آ':\n return equivalences.alif ? '[اأإآ]' : 'ا';\n case 'ة':\n case 'ه':\n return equivalences.taMarbutahHa ? '[هة]' : escapeRegex(ch);\n case 'ى':\n case 'ي':\n return equivalences.alifMaqsurahYa ? '[ىي]' : escapeRegex(ch);\n default:\n return escapeRegex(ch);\n }\n };\n\n const after = `${ignoreDiacritics ? `${DIACRITICS_CLASS}*` : ''}${allowTatweel ? `${TATWEEL_CLASS}*` : ''}`;\n\n let pattern = '';\n for (const ch of Array.from(needle)) {\n if (/\\s/.test(ch)) {\n pattern += flexWhitespace ? '\\\\s+' : '\\\\s*';\n } else {\n pattern += `${charClass(ch)}${after}`;\n }\n }\n\n return new RegExp(pattern, flags);\n};\n","/**\n * Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.\n * Example: 'Text.' becomes 'Text.\\n'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with line breaks added after punctuation.\n */\nexport const insertLineBreaksAfterPunctuation = (text: string) => {\n // Define the punctuation marks that should trigger a new line\n const punctuation = /([.?!؟])/g;\n\n // Replace occurrences of punctuation marks followed by a space with the punctuation mark, a newline, and the space\n const formattedText = text.replace(punctuation, '$1\\n').replace(/\\n\\s+/g, '\\n').trim();\n\n return formattedText;\n};\n\n/**\n * Adds spaces before and after punctuation, except for certain cases like quoted text or ayah references.\n * Example: 'Text,word' becomes 'Text, word'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with spaces added before and after punctuation.\n */\nexport const addSpaceBeforeAndAfterPunctuation = (text: string) => {\n return text\n .replace(/( ?)([.!?,،؟;؛])((?![ '”“)\"\\]\\n])|(?=\\s{2,}))/g, '$1$2 ')\n .replace(/\\s([.!?,،؟;؛])\\s*([ '”“)\"\\]\\n])/g, '$1$2')\n .replace(/([^\\s\\w\\d'”“)\"\\]]+)\\s+([.!?,،؟;؛])|([.!?,،؟;؛])\\s+$/g, '$1$2$3')\n .replace(/(?<=\\D)( ?: ?)(?!(\\d+:)|(:\\d+))|(?<=\\d) ?: ?(?=\\D)|(?<=\\D) ?: ?(?=\\d)/g, ': ');\n};\n\n/**\n * Turns regular double quotes surrounding a body of text into smart quotes.\n * Also fixes incorrect starting quotes by ensuring the string starts with an opening quote if needed.\n * Example: 'The \"quick brown\" fox' becomes 'The “quick brown” fox'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with smart quotes applied.\n */\nexport const applySmartQuotes = (text: string) => {\n return text\n .replace(/[“”]/g, '\"')\n .replace(/\"([^\"]*)\"/g, '“$1”')\n .replace(/^”/g, '“');\n};\n\n/**\n * Replaces literal new line characters (\\n) and carriage returns (\\r) with actual line breaks.\n * Example: 'A\\\\nB' becomes 'A\\nB'.\n * @param {string} text - The input text containing literal new lines.\n * @returns {string} - The modified text with actual line breaks.\n */\nexport const cleanLiteralNewLines = (text: string) => {\n return text.replace(/\\\\n|\\r/g, '\\n');\n};\n\n/**\n * Removes trailing spaces from each line in a multiline string.\n * Example: \" This is a line \\nAnother line \" becomes \"This is a line\\nAnother line\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with trailing spaces removed.\n */\nexport const cleanMultilines = (text: string) => {\n return text.replace(/^ +| +$/gm, '');\n};\n\n/**\n * Detects if a word is by itself in a line.\n * @param text The text to check.\n * @returns true if there exists a word in any of the lines in the text that is by itself.\n */\nexport const hasWordInSingleLine = (text: string): boolean => {\n return /^\\s*\\S+\\s*$/gm.test(text);\n};\n\n/**\n * Checks if the input string consists of only punctuation characters.\n * @param {string} text - The input text to check.\n * @returns {boolean} - Returns true if the string contains only punctuation, false otherwise.\n */\nexport const isOnlyPunctuation = (text: string): boolean => {\n const regex = /^[\\u0020-\\u002f\\u003a-\\u0040\\u005b-\\u0060\\u007b-\\u007e0-9٠-٩]+$/;\n return regex.test(text);\n};\n\n/**\n * Cleans unnecessary spaces before punctuation marks such as periods, commas, and question marks.\n * Example: 'This is a sentence , with extra space .' becomes 'This is a sentence, with extra space.'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with cleaned spaces before punctuation.\n */\nexport const cleanSpacesBeforePeriod = (text: string) => {\n return text.replace(/\\s+([.؟!,،؛:?])/g, '$1');\n};\n\n/**\n * Condenses multiple asterisks (*) into a single one.\n * Example: '***' becomes '*'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed asterisks.\n */\nexport const condenseAsterisks = (text: string) => {\n return text.replace(/(\\*\\s*)+/g, '*');\n};\n\n/**\n * Replaces occurrences of colons surrounded by periods (e.g., '.:.' or ':') with a single colon.\n * Example: 'This.:. is a test' becomes 'This: is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed colons.\n */\nexport const condenseColons = (text: string) => {\n return text.replace(/[.-]?:[.-]?/g, ':');\n};\n\n/**\n * Condenses two or more dashes (--) into a single dash (-).\n * Example: 'This is some ---- text' becomes 'This is some - text'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed dashes.\n */\nexport const condenseDashes = (text: string) => {\n return text.replace(/-{2,}/g, '-');\n};\n\n/**\n * Replaces sequences of two or more periods (e.g., '...') with an ellipsis character (…).\n * Example: 'This is a test...' becomes 'This is a test…'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with ellipses condensed.\n */\nexport const condenseEllipsis = (text: string) => {\n return text.replace(/\\.{2,}/g, '…');\n};\n\n/**\n * Reduces multiple consecutive line breaks (3 or more) to exactly 2 line breaks.\n * Example: 'This is line 1\\n\\n\\n\\nThis is line 2' becomes 'This is line 1\\n\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToDouble = (text: string) => {\n return text.replace(/(\\n\\s*){3,}/g, '\\n\\n');\n};\n\n/**\n * Reduces multiple consecutive line breaks (2 or more) to exactly 1 line break.\n * Example: 'This is line 1\\n\\nThis is line 2' becomes 'This is line 1\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToSingle = (text: string) => {\n return text.replace(/(\\n\\s*){2,}/g, '\\n');\n};\n\n/**\n * Condenses multiple periods separated by spaces (e.g., '. . .') into a single period.\n * Example: 'This . . . is a test' becomes 'This. is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed periods.\n */\nexport const condensePeriods = (text: string) => {\n return text.replace(/\\. +\\./g, '.');\n};\n\n/**\n * Condenses multiple underscores (__) or Arabic Tatweel characters (ـــــ) into a single underscore or Tatweel.\n * Example: 'This is ـــ some text __' becomes 'This is ـ some text _'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed underscores.\n */\nexport const condenseUnderscores = (text: string) => {\n return text.replace(/ـ{2,}/g, 'ـ').replace(/_+/g, '_');\n};\n\n/**\n * Replaces double parentheses or brackets with single ones.\n * Example: '((text))' becomes '(text)'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const doubleToSingleBrackets = (text: string) => {\n return text.replace(/(\\(|\\)){2,}|(\\[|\\]){2,}/g, '$1$2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before brackets.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before brackets\n */\nexport const ensureSpaceBeforeBrackets = (text: string) => {\n return text.replace(/(\\S) *(\\([^)]*\\))/g, '$1 $2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before Arabic quotation marks.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before Arabic quotes\n */\nexport const ensureSpaceBeforeQuotes = (text: string) => {\n return text.replace(/(\\S) *(«[^»]*»)/g, '$1 $2');\n};\n\n/**\n * Fixes common bracket and quotation mark typos in text\n * Corrects malformed patterns like \"(«\", \"»)\", and misplaced digits in brackets\n * @param text - Input text that may contain bracket typos\n * @returns Text with corrected bracket and quotation mark combinations\n */\nexport const fixBracketTypos = (text: string) => {\n return (\n text\n .replace(/\\(«|\\( \\(/g, '«')\n .replace(/»\\)|\\) \\)/g, '»')\n // Fix \")digit)\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\)/g, '($1)')\n // Fix \")digit(\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\(/g, '($1)')\n );\n};\n\n/**\n * Fixes mismatched curly braces by converting incorrect bracket/brace combinations\n * to proper curly braces { }\n * @param text - Input text that may contain mismatched curly braces\n * @returns Text with corrected curly brace pairs\n */\nexport const fixCurlyBraces = (text: string) => {\n // Process each mismatch type separately to avoid interference\n let result = text;\n\n // Fix ( content } to { content }\n result = result.replace(/\\(([^(){}]+)\\}/g, '{$1}');\n\n // Fix { content ) to { content }\n return result.replace(/\\{([^(){}]+)\\)/g, '{$1}');\n};\n\n/**\n * Fixes mismatched quotation marks in Arabic text by converting various\n * incorrect bracket/quote combinations to proper Arabic quotation marks (« »)\n * @param text - Input text that may contain mismatched quotation marks\n * @returns Text with corrected Arabic quotation marks\n */\nexport const fixMismatchedQuotationMarks = (text: string) => {\n return (\n text\n // Matches mismatched quotation marks: « followed by content and closed with )\n .replace(/«([^»)]+)\\)/g, '«$1»')\n // Fix reverse mismatched ( content » to « content »\n .replace(/\\(([^()]+)»/g, '«$1»')\n // Matches any unclosed « quotation marks at end of content\n .replace(/«([^»]+)(?=\\s*$|$)/g, '«$1»')\n );\n};\n\n/**\n * Formats a multiline string by joining sentences and maintaining footnotes on their own lines.\n * Footnotes are identified by Arabic and English numerals.\n * Example: 'Sentence one.\\n(1) A footnote.\\nSentence two.' remains the same, while regular sentences are joined.\n * @param {string} input - The input text containing sentences and footnotes.\n * @returns {string} - The formatted text.\n */\nexport const formatStringBySentence = (input: string) => {\n const footnoteRegex = /^\\((?:\\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\\)\\s/;\n const sentences: string[] = [];\n const lines = input.split('\\n');\n let currentSentence = '';\n\n lines.forEach((line) => {\n const trimmedLine = line.trim();\n const isFootnote = footnoteRegex.test(trimmedLine);\n const isNumber = /^\\(\\d+\\/\\d+\\)/.test(trimmedLine);\n\n if (isFootnote && !isNumber) {\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n sentences.push(trimmedLine);\n } else {\n currentSentence += `${trimmedLine} `;\n const lastChar = currentSentence.trim().slice(-1);\n if (/[.!؟]/.test(lastChar)) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n }\n });\n\n // Add any remaining text to the output\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n }\n\n return sentences.join('\\n');\n};\n\n/**\n * Detects if text is entirely in uppercase letters\n * @param text - The text to check\n * @returns true if all alphabetic characters are uppercase, false otherwise\n */\nexport const isAllUppercase = (text: string) => {\n // Remove non-letter characters (including numbers, punctuation, spaces)\n // \\p{L} matches any Unicode letter character\n const lettersOnly = text.replace(/[^\\p{L}]/gu, '');\n\n // If there are no letter characters, return false\n if (lettersOnly.length === 0) {\n return false;\n }\n\n return lettersOnly === lettersOnly.toUpperCase();\n};\n\n/**\n * Removes unnecessary spaces around slashes in references.\n * Example: '127 / 11' becomes '127/11'.\n * @param {string} text - The input text containing references.\n * @returns {string} - The modified text with spaces removed around slashes.\n */\nexport const normalizeSlashInReferences = (text: string) => {\n return text.replace(/(\\d+)\\s?\\/\\s?(\\d+)/g, '$1/$2');\n};\n\n/**\n * Reduces multiple spaces or tabs to a single space.\n * Example: 'This is a text' becomes 'This is a text'.\n * @param {string} text - The input text containing extra spaces.\n * @returns {string} - The modified text with reduced spaces.\n */\nexport const normalizeSpaces = (text: string) => {\n return text.replace(/[ \\t]+/g, ' ');\n};\n\n/**\n * Removes redundant punctuation marks that follow Arabic question marks or exclamation marks.\n * This function cleans up text by removing periods (.) or Arabic commas (،) that immediately\n * follow Arabic question marks (؟) or exclamation marks (!), as they are considered redundant\n * in proper Arabic punctuation.\n *\n * @param text - The Arabic text to clean up\n * @returns The text with redundant punctuation removed\n *\n * @example\n * ```typescript\n * removeRedundantPunctuation('كيف حالك؟.') // Returns: 'كيف حالك؟'\n * removeRedundantPunctuation('ممتاز!،') // Returns: 'ممتاز!'\n * removeRedundantPunctuation('هذا جيد.') // Returns: 'هذا جيد.' (unchanged)\n * ```\n */\nexport const removeRedundantPunctuation = (text: string) => {\n return text.replace(/([؟!])[.،]/g, '$1');\n};\n\n/**\n * Removes spaces inside brackets, parentheses, or square brackets.\n * Example: '( a b )' becomes '(a b)'.\n * @param {string} text - The input text with spaces inside brackets.\n * @returns {string} - The modified text with spaces removed inside brackets.\n */\nexport const removeSpaceInsideBrackets = (text: string) => {\n return text.replace(/([[(])\\s*(.*?)\\s*([\\])])/g, '$1$2$3');\n};\n\n/**\n * Replaces double parentheses single a single arrow variation.\n * Example: '((text))' becomes '«text»'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const replaceDoubleBracketsWithArrows = (text: string) => {\n return text.replace(/\\(\\(\\s?/g, '«').replace(/\\s?\\)\\)/g, '»');\n};\n\n/**\n * Removes bold styling from text by normalizing the string and removing stylistic characters.\n * @param {string} text - The input text containing bold characters.\n * @returns {string} - The modified text with bold styling removed.\n */\nexport const stripBoldStyling = (text: string) => {\n // Normalize the string to NFKD form\n const normalizedString = text.normalize('NFKD');\n\n // Remove combining marks (diacritics) and stylistic characters from the string\n return normalizedString.replace(/[\\u0300-\\u036f]/g, '').trim();\n};\n\n/**\n * Removes italicized characters by replacing italic Unicode characters with their normal counterparts.\n * Example: '𝘼𝘽𝘾' becomes 'ABC'.\n * @param {string} text - The input text containing italicized characters.\n * @returns {string} - The modified text with italics removed.\n */\nexport const stripItalicsStyling = (text: string) => {\n const italicMap: Record<string, string> = {\n '\\uD835\\uDC4E': 'I',\n '\\uD835\\uDC68': 'g',\n '\\u{1D63C}': '!',\n '\\uD835\\uDC4F': 'J',\n '\\uD835\\uDC69': 'h',\n '\\u{1D63D}': '?',\n '\\uD835\\uDC50': 'K',\n '\\uD835\\uDC6A': 'i',\n '\\uD835\\uDC51': 'L',\n '\\uD835\\uDC6B': 'j',\n '\\u{1D63F}': ',',\n '\\uD835\\uDC52': 'M',\n '\\uD835\\uDC6C': 'k',\n '\\u{1D640}': '.',\n '\\uD835\\uDC53': 'N',\n '\\uD835\\uDC6D': 'l',\n '\\uD835\\uDC54': 'O',\n '\\uD835\\uDC6E': 'm',\n '\\uD835\\uDC6F': 'n',\n '\\uD835\\uDC56': 'Q',\n '\\uD835\\uDC70': 'o',\n '\\uD835\\uDC57': 'R',\n '\\uD835\\uDC71': 'p',\n '\\uD835\\uDC58': 'S',\n '\\uD835\\uDC72': 'q',\n '\\uD835\\uDC59': 'T',\n '\\uD835\\uDC73': 'r',\n '\\u{1D647}': '-',\n '\\uD835\\uDC5A': 'U',\n '\\uD835\\uDC74': 's',\n '\\uD835\\uDC5B': 'V',\n '\\uD835\\uDC75': 't',\n '\\uD835\\uDC5C': 'W',\n '\\uD835\\uDC76': 'u',\n '\\uD835\\uDC5D': 'X',\n '\\uD835\\uDC77': 'v',\n '\\uD835\\uDC5E': 'Y',\n '\\uD835\\uDC78': 'w',\n '\\uD835\\uDC5F': 'Z',\n '\\uD835\\uDC79': 'x',\n '\\uD835\\uDC46': 'A',\n '\\uD835\\uDC7A': 'y',\n '\\uD835\\uDC47': 'B',\n '\\uD835\\uDC7B': 'z',\n '\\uD835\\uDC62': 'a',\n '\\uD835\\uDC48': 'C',\n '\\uD835\\uDC63': 'b',\n '\\uD835\\uDC49': 'D',\n '\\uD835\\uDC64': 'c',\n '\\uD835\\uDC4A': 'E',\n '\\uD835\\uDC65': 'd',\n '\\uD835\\uDC4B': 'F',\n '\\uD835\\uDC66': 'e',\n '\\uD835\\uDC4C': 'G',\n '\\uD835\\uDC67': 'f',\n '\\uD835\\uDC4D': 'H',\n '\\uD835\\uDC55': 'P',\n };\n\n return text.replace(/[\\uD835\\uDC62-\\uD835\\uDC7B\\uD835\\uDC46-\\uD835\\uDC5F\\u{1D63C}-\\u{1D647}]/gu, (match) => {\n return italicMap[match] || match;\n });\n};\n\n/**\n * Removes all bold and italic styling from the input text.\n * @param {string} text - The input text to remove styling from.\n * @returns {string} - The modified text with all styling removed.\n */\nexport const stripStyling = (text: string) => {\n return stripItalicsStyling(stripBoldStyling(text));\n};\n\n/**\n * Converts a string to title case (first letter of each word capitalized)\n * @param str - The input string to convert\n * @returns String with each word's first letter capitalized\n */\nexport const toTitleCase = (str: string) => {\n return str\n .toLowerCase()\n .split(' ')\n .map((word) => {\n if (word.length === 0) return word;\n // Find the first Unicode letter in the chunk\n const match = word.match(/\\p{L}/u);\n if (!match || match.index === undefined) return word;\n const i = match.index;\n return word.slice(0, i) + word.charAt(i).toUpperCase() + word.slice(i + 1);\n })\n .join(' ');\n};\n\n/**\n * Removes unnecessary spaces inside quotes.\n * Example: '“ Text ”' becomes '“Text”'.\n * @param {string} text - The input text with spaces inside quotes.\n * @returns {string} - The modified text with spaces removed inside quotes.\n */\nexport const trimSpaceInsideQuotes = (text: string) => {\n return text.replace(/([“”\"]|«) *(.*?) *([“”\"]|»)/g, '$1$2$3');\n};\n","/**\n * Converts a string that resembles JSON but with numeric keys and single-quoted values\n * into valid JSON format. This function replaces numeric keys with quoted numeric keys\n * and ensures all values are double-quoted as required by JSON.\n *\n * @param {string} str - The input string that needs to be fixed into valid JSON.\n * @returns {string} - A valid JSON string.\n *\n * @example\n * const result = normalizeJsonSyntax(\"{10: 'abc', 20: 'def'}\");\n * console.log(result); // '{\"10\": \"abc\", \"20\": \"def\"}'\n */\nexport const normalizeJsonSyntax = (str: string) => {\n let input = str.replace(/(\\b\\d+\\b)(?=:)/g, '\"$1\"');\n input = input.replace(/:\\s*'([^']+)'/g, ': \"$1\"');\n input = input.replace(/:\\s*\"([^\"]+)\"/g, ': \"$1\"');\n\n return JSON.stringify(JSON.parse(input));\n};\n\n/**\n * Checks if a given string resembles a JSON object with numeric or quoted keys and values\n * that are single or double quoted. This is useful for detecting malformed JSON-like\n * structures that can be fixed by the `normalizeJsonSyntax` function.\n *\n * @param {string} str - The input string to check.\n * @returns {boolean} - Returns true if the string is JSON-like, false otherwise.\n *\n * @example\n * const result = isJsonStructureValid(\"{10: 'abc', 'key': 'value'}\");\n * console.log(result); // true\n */\nexport const isJsonStructureValid = (str: string) => {\n // Checks for a pattern with numeric keys or quoted keys and values in quotes\n const jsonLikePattern =\n /^{(\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\3\\s*,)*(?:\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\5\\s*)}$/;\n return jsonLikePattern.test(str.trim());\n};\n\n/**\n * Splits a string by spaces and quoted substrings.\n *\n * This function takes an input string and splits it into parts where substrings\n * enclosed in double quotes are treated as a single part. Other substrings\n * separated by spaces are split normally.\n *\n * @param {string} query - The input string to be split.\n * @returns {string[]} An array of strings, with quoted substrings kept intact.\n *\n * @example\n * const result = splitByQuotes('\"This is\" \"a part of the\" \"string and\"');\n * console.log(result); // [\"This is\", \"a part of the\", \"string and\"]\n */\nexport const splitByQuotes = (query: string): string[] => {\n const regex = /(?:[^\\s\"]+|\"(.*?)\")+/g;\n return (query.match(regex) || []).map((s: string) => (s.startsWith('\"') ? s.slice(1, -1) : s));\n};\n\n/**\n * Checks if all double quotes in a string are balanced (even count).\n * A string has balanced quotes if every opening quote has a corresponding closing quote.\n *\n * @param str - The string to check for balanced quotes\n * @returns True if quotes are balanced (even count), false otherwise\n *\n * @example\n * ```typescript\n * areQuotesBalanced('Hello \"world\"') // Returns: true\n * areQuotesBalanced('Hello \"world') // Returns: false\n * areQuotesBalanced('No quotes') // Returns: true\n * ```\n */\nconst areQuotesBalanced = (str: string) => {\n let quoteCount = 0;\n for (const char of str) {\n if (char === '\"') {\n quoteCount++;\n }\n }\n return quoteCount % 2 === 0;\n};\n\nconst brackets = { '(': ')', '[': ']', '{': '}' };\nconst openBrackets = new Set(['(', '[', '{']);\nconst closeBrackets = new Set([')', ']', '}']);\n\n/**\n * Checks if all brackets in a string are properly balanced and matched.\n * This function validates that every opening bracket has a corresponding closing bracket\n * in the correct order and of the matching type.\n *\n * Supported bracket types: parentheses (), square brackets [], curly braces {}\n *\n * @param str - The string to check for balanced brackets\n * @returns True if all brackets are properly balanced and matched, false otherwise\n *\n * @example\n * ```typescript\n * areBracketsBalanced('(hello [world])') // Returns: true\n * areBracketsBalanced('(hello [world)') // Returns: false (mismatched)\n * areBracketsBalanced('((hello))') // Returns: true\n * areBracketsBalanced('(hello') // Returns: false (unclosed)\n * ```\n */\n\nconst areBracketsBalanced = (str: string) => {\n const stack: string[] = [];\n\n for (const char of str) {\n if (openBrackets.has(char)) {\n stack.push(char);\n } else if (closeBrackets.has(char)) {\n const lastOpen = stack.pop();\n if (!lastOpen || brackets[lastOpen as keyof typeof brackets] !== char) {\n return false;\n }\n }\n }\n\n return stack.length === 0;\n};\n\n/**\n * Checks if both quotes and brackets are balanced in a string.\n * This function combines quote balance checking and bracket balance checking\n * to ensure the entire string has properly balanced punctuation.\n *\n * A string is considered balanced when:\n * - All double quotes have matching pairs (even count)\n * - All brackets (parentheses, square brackets, curly braces) are properly matched and nested\n *\n * @param str - The string to check for balanced quotes and brackets\n * @returns True if both quotes and brackets are balanced, false otherwise\n *\n * @example\n * ```typescript\n * isBalanced('He said \"Hello (world)!\"') // Returns: true\n * isBalanced('He said \"Hello (world!\"') // Returns: false (unbalanced quote)\n * isBalanced('He said \"Hello (world)\"') // Returns: false (unbalanced quote)\n * isBalanced('Hello (world) [test]') // Returns: true\n * ```\n */\nexport const isBalanced = (str: string) => {\n return areQuotesBalanced(str) && areBracketsBalanced(str);\n};\n\n/**\n * Parses page input string into array of page numbers, supporting ranges and lists\n * @param pageInput - Page specification string (e.g., \"1-5\" or \"1,3,5\")\n * @returns Array of page numbers\n * @throws Error when start page exceeds end page in range\n */\nexport const parsePageRanges = (pageInput: string): number[] => {\n if (pageInput.includes('-')) {\n const [start, end] = pageInput.split('-').map(Number);\n\n if (start > end) {\n throw new Error('Start page cannot be greater than end page');\n }\n\n return Array.from({ length: end - start + 1 }, (_, i) => start + i);\n } else {\n return pageInput.split(',').map(Number);\n }\n};\n","import { escapeRegex } from './cleaning';\n\n/**\n * Removes various symbols, part references, and numerical markers from the text.\n * Example: '(1) (2/3)' becomes ''.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with symbols and part references removed.\n */\nexport const cleanSymbolsAndPartReferences = (text: string) => {\n return text.replace(\n / *\\(?:\\d+(?:\\/\\d+){0,2}\\)? *| *\\[\\d+(?:\\/\\d+)?\\] *| *«\\d+» *|\\d+\\/\\d+(?:\\/\\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\\]…ـ¬.\\\\/*()\"]/g,\n ' ',\n );\n};\n\n/**\n * Removes trailing page numbers formatted as '-[46]-' from the text.\n * Example: 'This is some -[46]- text' becomes 'This is some text'.\n * @param {string} text - The input text with trailing page numbers.\n * @returns {string} - The modified text with page numbers removed.\n */\nexport const cleanTrailingPageNumbers = (text: string) => {\n return text.replace(/-\\[\\d+\\]-/g, '');\n};\n\n/**\n * Replaces consecutive line breaks and whitespace characters with a single space.\n * Example: 'a\\nb' becomes 'a b'.\n * @param {string} text - The input text containing line breaks or multiple spaces.\n * @returns {string} - The modified text with spaces.\n */\nexport const replaceLineBreaksWithSpaces = (text: string) => {\n return text.replace(/\\s+/g, ' ');\n};\n\n/**\n * Removes all numeric digits from the text.\n * Example: 'abc123' becomes 'abc'.\n * @param {string} text - The input text containing digits.\n * @returns {string} - The modified text with digits removed.\n */\nexport const stripAllDigits = (text: string) => {\n return text.replace(/[0-9]/g, '');\n};\n\n/**\n * Removes death year references like \"(d. 390H)\" and \"[d. 100h]\" from the text.\n * Example: 'Sufyān ibn ‘Uyaynah (d. 198h)' becomes 'Sufyān ibn ‘Uyaynah'.\n * @param {string} text - The input text containing death year references.\n * @returns {string} - The modified text with death years removed.\n */\nexport const removeDeathYear = (text: string) => {\n return text.replace(/\\[(d)\\.\\s*\\d{1,4}[hH]\\]\\s*|\\((d)\\.\\s*\\d{1,4}[hH]\\)\\s*/g, '');\n};\n\n/**\n * Removes numeric digits and dashes from the text.\n * Example: 'ABC 123-Xyz' becomes 'ABC Xyz'.\n * @param {string} text - The input text containing digits and dashes.\n * @returns {string} - The modified text with numbers and dashes removed.\n */\nexport const removeNumbersAndDashes = (text: string) => {\n return text.replace(/[\\d-]/g, '');\n};\n\n/**\n * Removes single digit references like (1), «2», [3] from the text.\n * Example: 'Ref (1), Ref «2», Ref [3]' becomes 'Ref , Ref , Ref '.\n * @param {string} text - The input text containing single digit references.\n * @returns {string} - The modified text with single digit references removed.\n */\nexport const removeSingleDigitReferences = (text: string) => {\n return text.replace(/\\(\\d{1}\\)|\\[\\d{1}\\]|«\\d»/g, '');\n};\n\n/**\n * Removes URLs from the text.\n * Example: 'Visit https://example.com' becomes 'Visit '.\n * @param {string} text - The input text containing URLs.\n * @returns {string} - The modified text with URLs removed.\n */\nexport const removeUrls = (text: string) => {\n return text.replace(\n /https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,\n '',\n );\n};\n\n/**\n * Removes common Markdown formatting syntax from text\n * @param text - The input text containing Markdown formatting\n * @returns Text with Markdown formatting removed (bold, italics, headers, lists, backticks)\n */\nexport const removeMarkdownFormatting = (text: string) => {\n return (\n text\n // Remove bold first (**text**) - must come before italics\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n // Remove bold with underscores (__text__)\n .replace(/__([^_]+)__/g, '$1')\n // Remove italics (*text*)\n .replace(/\\*([^*]+)\\*/g, '$1')\n // Remove italics with underscores (_text_)\n .replace(/_([^_]+)_/g, '$1')\n // Remove strikethrough (~~text~~)\n .replace(/~~([^~]+)~~/g, '$1')\n // Remove blockquotes\n .replace(/^\\s*>\\s?/gm, '')\n // Remove images ![alt](url)\n .replace(/!\\[[^\\]]*]\\([^)]*\\)/g, '')\n // Convert links [text](url) -> text\n .replace(/\\[([^\\]]+)]\\([^)]*\\)/g, '$1')\n // Remove headers (# ## ### etc.)\n .replace(/^#+\\s*/gm, '')\n // Remove unordered list markers (- * +)\n .replace(/^\\s*[-*+]\\s+/gm, '')\n // Remove ordered list markers (1. 2. etc.)\n .replace(/^\\s*\\d+\\.\\s+/gm, '')\n // Remove backticks\n .replace(/`/gm, '')\n );\n};\n\n/**\n * Truncates a string to a specified length, adding an ellipsis if truncated.\n *\n * @param val - The string to truncate\n * @param n - Maximum length of the string (default: 150)\n * @returns The truncated string with ellipsis if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncate('The quick brown fox jumps over the lazy dog', 20);\n * // Output: 'The quick brown fox…'\n *\n * truncate('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncate = (val: string, n = 150): string => (val.length > n ? `${val.substring(0, n - 1)}…` : val);\n\n/**\n * Truncates a string from the middle, preserving both the beginning and end portions.\n *\n * @param text - The string to truncate\n * @param maxLength - Maximum length of the resulting string (default: 50)\n * @param endLength - Number of characters to preserve at the end (default: 1/3 of maxLength, minimum 3)\n * @returns The truncated string with ellipsis in the middle if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 20);\n * // Output: 'The quick bro…zy dog'\n *\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 25, 8);\n * // Output: 'The quick brown …lazy dog'\n *\n * truncateMiddle('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncateMiddle = (text: string, maxLength: number = 50, endLength?: number) => {\n if (text.length <= maxLength) {\n return text;\n }\n\n // Default end length is roughly 1/3 of max length, minimum 3 characters\n const defaultEndLength = Math.max(3, Math.floor(maxLength / 3));\n const actualEndLength = endLength ?? defaultEndLength;\n\n // Reserve space for the ellipsis character (1 char)\n const availableLength = maxLength - 1;\n\n // Calculate start length (remaining space after end portion)\n const startLength = availableLength - actualEndLength;\n\n // Ensure we have at least some characters at the start\n if (startLength < 1) {\n // If we can't fit both start and end, just truncate normally\n return `${text.substring(0, maxLength - 1)}…`;\n }\n\n const startPortion = text.substring(0, startLength);\n const endPortion = text.substring(text.length - actualEndLength);\n\n return `${startPortion}…${endPortion}`;\n};\n\n/**\n * Unescapes backslash-escaped spaces and trims whitespace from both ends.\n * Commonly used to clean file paths that have been escaped when pasted into terminals.\n *\n * @param input - The string to unescape and clean\n * @returns The cleaned string with escaped spaces converted to regular spaces and trimmed\n *\n * @example\n * ```javascript\n * unescapeSpaces('My\\\\ Folder\\\\ Name');\n * // Output: 'My Folder Name'\n *\n * unescapeSpaces(' /path/to/My\\\\ Document.txt ');\n * // Output: '/path/to/My Document.txt'\n *\n * unescapeSpaces('regular text');\n * // Output: 'regular text'\n * ```\n */\nexport const unescapeSpaces = (input: string) => input.replace(/\\\\ /g, ' ').trim();\n\n/**\n * Arabic diacritics (Tashkeel/Harakat).\n */\nconst DIACRITICS_CLASS = '[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652]';\n\n/**\n * Groups of equivalent Arabic characters — any character in a group should match\n * any other character in the same group.\n */\nconst EQUIV_GROUPS: string[][] = [\n ['\\u0627', '\\u0622', '\\u0623', '\\u0625'], // ا, آ, أ, إ\n ['\\u0629', '\\u0647'], // ة <-> ه\n ['\\u0649', '\\u064A'], // ى <-> ي\n];\n\n/** Return a character class for a char if it belongs to an equivalence group. */\nconst getEquivClass = (ch: string): string => {\n for (const group of EQUIV_GROUPS) {\n if (group.includes(ch)) {\n // join the group's members into a character class\n return `[${group.map((c) => escapeRegex(c)).join('')}]`;\n }\n }\n // not in equivalence groups -> return escaped character\n return escapeRegex(ch);\n};\n\n/** Small safe normalization: NFC, remove ZWJ/ZWNJ, collapse spaces. */\nconst normalizeArabicLight = (str: string) => {\n return str\n .normalize('NFC')\n .replace(/[\\u200C\\u200D]/g, '') // remove ZWJ/ZWNJ\n .replace(/\\s+/g, ' ')\n .trim();\n};\n\n/**\n * Creates a diacritic-insensitive regex pattern for Arabic text matching.\n * Normalizes text, handles character equivalences (ا/آ/أ/إ, ة/ه, ى/ي),\n * and makes each character tolerant of Arabic diacritics (Tashkeel/Harakat)\n * @param text - Input Arabic text to make diacritic-insensitive\n * @returns Regex pattern string that matches the text with or without diacritics and character variants\n */\nexport const makeDiacriticInsensitive = (text: string) => {\n const diacriticsMatcher = `${DIACRITICS_CLASS}*`;\n const norm = normalizeArabicLight(text);\n // Use Array.from to iterate grapheme-safe over the string (works fine for Arabic letters)\n return Array.from(norm)\n .map((ch) => getEquivClass(ch) + diacriticsMatcher)\n .join('');\n};\n","import { normalizeSpaces } from './formatting';\n\n/**\n * Replaces common Arabic prefixes (like 'Al-', 'Ar-', 'Ash-', etc.) with 'al-' in the text.\n * Handles different variations of prefixes such as Ash- and Al- but not when the second word\n * does not start with 'S'.\n * Example: 'Ash-Shafiee' becomes 'al-Shafiee'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with standardized 'al-' prefixes.\n */\nexport const normalizeArabicPrefixesToAl = (text: string) => {\n return text\n .replace(/(\\b|\\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g, '$1al-')\n .replace(/(\\b|\\W)(Ash-S|ash-S)/g, '$1al-S')\n .replace(/al- (.+?)\\b/g, 'al-$1');\n};\n\n/**\n * Removes double occurrences of Arabic apostrophes such as ʿʿ or ʾʾ in the text.\n * Example: 'ʿulamāʾʾ' becomes 'ʿulamāʾ'.\n *\n * @param {string} text - The input text containing double apostrophes.\n * @returns {string} - The modified text with condensed apostrophes.\n */\nexport const normalizeDoubleApostrophes = (text: string) => {\n return text.replace(/ʿʿ/g, 'ʿ').replace(/ʾʾ/g, 'ʾ');\n};\n\n/**\n * Replaces common salutations such as \"sallahu alayhi wasallam\" with \"ﷺ\" in the text.\n * It also handles variations of the salutation phrase, including 'peace and blessings be upon him'.\n * Example: 'Then Muḥammad (sallahu alayhi wasallam)' becomes 'Then Muḥammad ﷺ'.\n *\n * @param {string} text - The input text containing salutations.\n * @returns {string} - The modified text with salutations replaced.\n */\nexport const replaceSalutationsWithSymbol = (text: string) => {\n return text\n .replace(\n /\\(peace be upon him\\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\\)*/gi,\n '$1 ﷺ',\n )\n .replace(/,\\s*ﷺ\\s*,/g, ' ﷺ');\n};\n\n/**\n * Normalizes the text by removing diacritics, apostrophes, and dashes.\n * Example: 'Al-Jadwal' becomes 'AlJadwal'.\n *\n * @param {string} input - The input text to normalize.\n * @returns {string} - The normalized text.\n */\nexport const normalize = (input: string) => {\n return input\n .normalize('NFKD')\n .replace(/[\\u0300-\\u036f]/g, '')\n .replace(/`|ʾ|ʿ|-/g, '');\n};\n\n/**\n * Strips common Arabic prefixes like 'al-', 'bi-', 'fī', 'wa-', etc. from the beginning of words.\n * Example: 'al-Bukhari' becomes 'Bukhari'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with prefixes stripped.\n */\nexport const removeArabicPrefixes = (text: string) => {\n return normalizeSpaces(text.replace(/(\\bal-|\\bli-|\\bbi-|\\bfī|\\bwa[-\\s]+|\\bl-|\\bliʿl|\\Bʿalá|\\Bʿan|\\bb\\.)/gi, ''));\n};\n\n/**\n * Simplifies English transliterations by removing diacritics, apostrophes, and common prefixes.\n * Example: 'Al-Jadwal' becomes 'Jadwal', and 'āḍġḥīṣṭū' becomes 'adghistu'.\n *\n * @param {string} text - The input text to simplify.\n * @returns {string} - The simplified text.\n */\nexport const normalizeTransliteratedEnglish = (text: string) => normalize(removeArabicPrefixes(text));\n\n/**\n * Extracts the initials from the input string, typically used for names or titles.\n * Example: 'Nayl al-Awtar' becomes 'NA'.\n *\n * @param {string} text - The input text to extract initials from.\n * @returns {string} - The extracted initials.\n */\nexport const extractInitials = (fullName: string) => {\n const initials = normalizeTransliteratedEnglish(fullName)\n .trim()\n .split(/[ -]/)\n .slice(0, 2)\n .map((word) => {\n return word.charAt(0).toUpperCase();\n })\n .join('');\n return initials;\n};\n"],"mappings":"AAuBO,IAAMA,EAAyBC,GAC3B,SACHA,EAAO,QAAQ,mBAAqBC,IAAOA,EAAE,WAAW,CAAC,EAAI,MAAQ,SAAS,CAAC,EAC/E,EACJ,EAUSC,EAAiCC,GACnCA,EAAK,QAAQ,qCAAsC,EAAE,EASnDC,EAA8BD,GAChCA,EAAK,QAAQ,KAAM,QAAG,EAAE,QAAQ,KAAM,QAAG,EASvCE,EAAkBF,GAAiB,CAC5C,GAAI,CAACA,EACD,MAAO,GAGX,IAAMG,EAAuB,uEAEvBC,EAAkB,mCAElBC,EAAsB,sCACtBC,EAAUN,EAAK,QAAQI,EAAiB,EAAE,EAC1CG,EAAgBD,EAAQ,MAAMH,CAAoB,GAAK,CAAC,EACxDK,EAAeF,EAAQ,MAAMD,CAAmB,GAAK,CAAC,EAC5D,OAAOG,EAAa,SAAW,EAAI,EAAID,EAAc,OAASC,EAAa,MAC/E,EASaC,EAAkBT,GACpBA,EAAK,QAAQ,OAAQ,SAAI,EASvBU,EAAuCV,GACzCA,EAAK,QAAQ,2BAA4B,OAAO,EAS9CW,EAA4BX,GAC9BA,EACF,QAAQ,iEAAkE,GAAG,EAC7E,QAAQ,8DAA+D,GAAG,EAStEY,EAAuBZ,GACzBA,EAAK,QAAQ,2CAA4C,EAAE,EASzDa,EAA+Bb,GACjCA,EAAK,QAAQ,6BAA8B,GAAG,EAS5Cc,EAAuCd,GACzCA,EACF,QAAQ,UAAW,QAAG,EACtB,QAAQ,oBAAqB,QAAG,EAChC,QAAQ,QAAS,QAAG,ECvI7B,IAAMe,EAAmB,yDAEnBC,EAAgB,UAQTC,EAAeC,GAAsBA,EAAE,QAAQ,sBAAuB,MAAM,EA+D5EC,EAAgC,CAACC,EAAgBC,EAAyB,CAAC,IAAc,CAClG,GAAM,CACF,aAAAC,EAAe,CAAE,KAAM,GAAM,aAAc,GAAM,eAAgB,EAAK,EACtE,aAAAC,EAAe,GACf,iBAAAC,EAAmB,GACnB,eAAAC,EAAiB,GACjB,MAAAC,EAAQ,GACZ,EAAIL,EAGJ,GAAID,EAAO,OAAS,IAChB,MAAM,IAAI,MAAM,gDAAgD,EAGpE,IAAMO,EAAaC,GAAuB,CACtC,OAAQA,EAAI,CACR,IAAK,SACL,IAAK,SACL,IAAK,SACL,IAAK,SACD,OAAON,EAAa,KAAO,6BAAW,SAC1C,IAAK,SACL,IAAK,SACD,OAAOA,EAAa,aAAe,iBAASL,EAAYW,CAAE,EAC9D,IAAK,SACL,IAAK,SACD,OAAON,EAAa,eAAiB,iBAASL,EAAYW,CAAE,EAChE,QACI,OAAOX,EAAYW,CAAE,CAC7B,CACJ,EAEMC,EAAQ,GAAGL,EAAmB,GAAGT,CAAgB,IAAM,EAAE,GAAGQ,EAAe,GAAGP,CAAa,IAAM,EAAE,GAErGc,EAAU,GACd,QAAWF,KAAM,MAAM,KAAKR,CAAM,EAC1B,KAAK,KAAKQ,CAAE,EACZE,GAAWL,EAAiB,OAAS,OAErCK,GAAW,GAAGH,EAAUC,CAAE,CAAC,GAAGC,CAAK,GAI3C,OAAO,IAAI,OAAOC,EAASJ,CAAK,CACpC,EChHO,IAAMK,EAAoCC,GAAiB,CAE9D,IAAMC,EAAc,YAKpB,OAFsBD,EAAK,QAAQC,EAAa;AAAA,CAAM,EAAE,QAAQ,SAAU;AAAA,CAAI,EAAE,KAAK,CAGzF,EAQaC,EAAqCF,GACvCA,EACF,QAAQ,iDAAkD,OAAO,EACjE,QAAQ,mCAAoC,MAAM,EAClD,QAAQ,uDAAwD,QAAQ,EACxE,QAAQ,yEAA0E,IAAI,EAUlFG,EAAoBH,GACtBA,EACF,QAAQ,QAAS,GAAG,EACpB,QAAQ,aAAc,gBAAM,EAC5B,QAAQ,MAAO,QAAG,EASdI,EAAwBJ,GAC1BA,EAAK,QAAQ,UAAW;AAAA,CAAI,EAS1BK,EAAmBL,GACrBA,EAAK,QAAQ,YAAa,EAAE,EAQ1BM,EAAuBN,GACzB,gBAAgB,KAAKA,CAAI,EAQvBO,EAAqBP,GAChB,kEACD,KAAKA,CAAI,EASbQ,EAA2BR,GAC7BA,EAAK,QAAQ,mBAAoB,IAAI,EASnCS,EAAqBT,GACvBA,EAAK,QAAQ,YAAa,GAAG,EAS3BU,EAAkBV,GACpBA,EAAK,QAAQ,eAAgB,GAAG,EAS9BW,EAAkBX,GACpBA,EAAK,QAAQ,SAAU,GAAG,EASxBY,EAAoBZ,GACtBA,EAAK,QAAQ,UAAW,QAAG,EASzBa,GAAiCb,GACnCA,EAAK,QAAQ,eAAgB;AAAA;AAAA,CAAM,EASjCc,GAAiCd,GACnCA,EAAK,QAAQ,eAAgB;AAAA,CAAI,EAS/Be,GAAmBf,GACrBA,EAAK,QAAQ,UAAW,GAAG,EASzBgB,GAAuBhB,GACzBA,EAAK,QAAQ,SAAU,QAAG,EAAE,QAAQ,MAAO,GAAG,EAS5CiB,GAA0BjB,GAC5BA,EAAK,QAAQ,2BAA4B,MAAM,EAS7CkB,GAA6BlB,GAC/BA,EAAK,QAAQ,qBAAsB,OAAO,EASxCmB,GAA2BnB,GAC7BA,EAAK,QAAQ,mBAAoB,OAAO,EAStCoB,GAAmBpB,GAExBA,EACK,QAAQ,aAAc,MAAG,EACzB,QAAQ,aAAc,MAAG,EAEzB,QAAQ,6BAA8B,MAAM,EAE5C,QAAQ,6BAA8B,MAAM,EAU5CqB,GAAkBrB,GAAiB,CAE5C,IAAIsB,EAAStB,EAGb,OAAAsB,EAASA,EAAO,QAAQ,kBAAmB,MAAM,EAG1CA,EAAO,QAAQ,kBAAmB,MAAM,CACnD,EAQaC,GAA+BvB,GAEpCA,EAEK,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,sBAAuB,YAAM,EAWrCwB,GAA0BC,GAAkB,CACrD,IAAMC,EAAgB,mCAChBC,EAAsB,CAAC,EACvBC,EAAQH,EAAM,MAAM;AAAA,CAAI,EAC1BI,EAAkB,GAEtB,OAAAD,EAAM,QAASE,GAAS,CACpB,IAAMC,EAAcD,EAAK,KAAK,EACxBE,EAAaN,EAAc,KAAKK,CAAW,EAC3CE,EAAW,gBAAgB,KAAKF,CAAW,EAEjD,GAAIC,GAAc,CAACC,EACXJ,IACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,IAEtBF,EAAU,KAAKI,CAAW,MACvB,CACHF,GAAmB,GAAGE,CAAW,IACjC,IAAMG,EAAWL,EAAgB,KAAK,EAAE,MAAM,EAAE,EAC5C,QAAQ,KAAKK,CAAQ,IACrBP,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,GAE1B,CACJ,CAAC,EAGGA,GACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EAGlCF,EAAU,KAAK;AAAA,CAAI,CAC9B,EAOaQ,GAAkBnC,GAAiB,CAG5C,IAAMoC,EAAcpC,EAAK,QAAQ,aAAc,EAAE,EAGjD,OAAIoC,EAAY,SAAW,EAChB,GAGJA,IAAgBA,EAAY,YAAY,CACnD,EAQaC,GAA8BrC,GAChCA,EAAK,QAAQ,sBAAuB,OAAO,EASzCsC,EAAmBtC,GACrBA,EAAK,QAAQ,UAAW,GAAG,EAmBzBuC,GAA8BvC,GAChCA,EAAK,QAAQ,cAAe,IAAI,EAS9BwC,GAA6BxC,GAC/BA,EAAK,QAAQ,4BAA6B,QAAQ,EAShDyC,GAAmCzC,GACrCA,EAAK,QAAQ,WAAY,MAAG,EAAE,QAAQ,WAAY,MAAG,EAQnD0C,EAAoB1C,GAEJA,EAAK,UAAU,MAAM,EAGtB,QAAQ,mBAAoB,EAAE,EAAE,KAAK,EASpD2C,EAAuB3C,GAAiB,CACjD,IAAM4C,EAAoC,CACtC,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,GACpB,EAEA,OAAO5C,EAAK,QAAQ,4EAA8E6C,GACvFD,EAAUC,CAAK,GAAKA,CAC9B,CACL,EAOaC,GAAgB9C,GAClB2C,EAAoBD,EAAiB1C,CAAI,CAAC,EAQxC+C,GAAeC,GACjBA,EACF,YAAY,EACZ,MAAM,GAAG,EACT,IAAKC,GAAS,CACX,GAAIA,EAAK,SAAW,EAAG,OAAOA,EAE9B,IAAMJ,EAAQI,EAAK,MAAM,QAAQ,EACjC,GAAI,CAACJ,GAASA,EAAM,QAAU,OAAW,OAAOI,EAChD,IAAMC,EAAIL,EAAM,MAChB,OAAOI,EAAK,MAAM,EAAGC,CAAC,EAAID,EAAK,OAAOC,CAAC,EAAE,YAAY,EAAID,EAAK,MAAMC,EAAI,CAAC,CAC7E,CAAC,EACA,KAAK,GAAG,EASJC,GAAyBnD,GAC3BA,EAAK,QAAQ,+BAAgC,QAAQ,ECrezD,IAAMoD,GAAuBC,GAAgB,CAChD,IAAIC,EAAQD,EAAI,QAAQ,kBAAmB,MAAM,EACjD,OAAAC,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAChDA,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAEzC,KAAK,UAAU,KAAK,MAAMA,CAAK,CAAC,CAC3C,EAcaC,GAAwBF,GAG7B,6GACmB,KAAKA,EAAI,KAAK,CAAC,EAiB7BG,GAAiBC,GAA4B,CACtD,IAAMC,EAAQ,wBACd,OAAQD,EAAM,MAAMC,CAAK,GAAK,CAAC,GAAG,IAAKC,GAAeA,EAAE,WAAW,GAAG,EAAIA,EAAE,MAAM,EAAG,EAAE,EAAIA,CAAE,CACjG,EAgBMC,EAAqBP,GAAgB,CACvC,IAAIQ,EAAa,EACjB,QAAWC,KAAQT,EACXS,IAAS,KACTD,IAGR,OAAOA,EAAa,IAAM,CAC9B,EAEME,EAAW,CAAE,IAAK,IAAK,IAAK,IAAK,IAAK,GAAI,EAC1CC,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EACtCC,EAAgB,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EAqBvCC,EAAuBb,GAAgB,CACzC,IAAMc,EAAkB,CAAC,EAEzB,QAAWL,KAAQT,EACf,GAAIW,EAAa,IAAIF,CAAI,EACrBK,EAAM,KAAKL,CAAI,UACRG,EAAc,IAAIH,CAAI,EAAG,CAChC,IAAMM,EAAWD,EAAM,IAAI,EAC3B,GAAI,CAACC,GAAYL,EAASK,CAAiC,IAAMN,EAC7D,MAAO,EAEf,CAGJ,OAAOK,EAAM,SAAW,CAC5B,EAsBaE,GAAchB,GAChBO,EAAkBP,CAAG,GAAKa,EAAoBb,CAAG,EAS/CiB,GAAmBC,GAAgC,CAC5D,GAAIA,EAAU,SAAS,GAAG,EAAG,CACzB,GAAM,CAACC,EAAOC,CAAG,EAAIF,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,EAEpD,GAAIC,EAAQC,EACR,MAAM,IAAI,MAAM,4CAA4C,EAGhE,OAAO,MAAM,KAAK,CAAE,OAAQA,EAAMD,EAAQ,CAAE,EAAG,CAACE,EAAGC,IAAMH,EAAQG,CAAC,CACtE,KACI,QAAOJ,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,CAE9C,EC5JO,IAAMK,GAAiCC,GACnCA,EAAK,QACR,wHACA,GACJ,EASSC,GAA4BD,GAC9BA,EAAK,QAAQ,aAAc,EAAE,EAS3BE,GAA+BF,GACjCA,EAAK,QAAQ,OAAQ,GAAG,EAStBG,GAAkBH,GACpBA,EAAK,QAAQ,SAAU,EAAE,EASvBI,GAAmBJ,GACrBA,EAAK,QAAQ,yDAA0D,EAAE,EASvEK,GAA0BL,GAC5BA,EAAK,QAAQ,SAAU,EAAE,EASvBM,GAA+BN,GACjCA,EAAK,QAAQ,4BAA6B,EAAE,EAS1CO,GAAcP,GAChBA,EAAK,QACR,uGACA,EACJ,EAQSQ,GAA4BR,GAEjCA,EAEK,QAAQ,mBAAoB,IAAI,EAEhC,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,IAAI,EAE1B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,EAAE,EAExB,QAAQ,uBAAwB,EAAE,EAElC,QAAQ,wBAAyB,IAAI,EAErC,QAAQ,WAAY,EAAE,EAEtB,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,MAAO,EAAE,EAoBjBS,GAAW,CAACC,EAAaC,EAAI,MAAiBD,EAAI,OAASC,EAAI,GAAGD,EAAI,UAAU,EAAGC,EAAI,CAAC,CAAC,SAAMD,EAsB/FE,GAAiB,CAACZ,EAAca,EAAoB,GAAIC,IAAuB,CACxF,GAAId,EAAK,QAAUa,EACf,OAAOb,EAIX,IAAMe,EAAmB,KAAK,IAAI,EAAG,KAAK,MAAMF,EAAY,CAAC,CAAC,EACxDG,EAAkBF,GAAaC,EAM/BE,EAHkBJ,EAAY,EAGEG,EAGtC,GAAIC,EAAc,EAEd,MAAO,GAAGjB,EAAK,UAAU,EAAGa,EAAY,CAAC,CAAC,SAG9C,IAAMK,EAAelB,EAAK,UAAU,EAAGiB,CAAW,EAC5CE,EAAanB,EAAK,UAAUA,EAAK,OAASgB,CAAe,EAE/D,MAAO,GAAGE,CAAY,SAAIC,CAAU,EACxC,EAqBaC,GAAkBC,GAAkBA,EAAM,QAAQ,OAAQ,GAAG,EAAE,KAAK,EAK3EC,EAAmB,qDAMnBC,EAA2B,CAC7B,CAAC,SAAU,SAAU,SAAU,QAAQ,EACvC,CAAC,SAAU,QAAQ,EACnB,CAAC,SAAU,QAAQ,CACvB,EAGMC,EAAiBC,GAAuB,CAC1C,QAAWC,KAASH,EAChB,GAAIG,EAAM,SAASD,CAAE,EAEjB,MAAO,IAAIC,EAAM,IAAKC,GAAMC,EAAYD,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,IAI5D,OAAOC,EAAYH,CAAE,CACzB,EAGMI,EAAwBC,GACnBA,EACF,UAAU,KAAK,EACf,QAAQ,kBAAmB,EAAE,EAC7B,QAAQ,OAAQ,GAAG,EACnB,KAAK,EAUDC,GAA4B/B,GAAiB,CACtD,IAAMgC,EAAoB,GAAGV,CAAgB,IACvCW,EAAOJ,EAAqB7B,CAAI,EAEtC,OAAO,MAAM,KAAKiC,CAAI,EACjB,IAAKR,GAAOD,EAAcC,CAAE,EAAIO,CAAiB,EACjD,KAAK,EAAE,CAChB,ECxPO,IAAME,GAA+BC,GACjCA,EACF,QAAQ,6EAA8E,OAAO,EAC7F,QAAQ,wBAAyB,QAAQ,EACzC,QAAQ,eAAgB,OAAO,EAU3BC,GAA8BD,GAChCA,EAAK,QAAQ,MAAO,QAAG,EAAE,QAAQ,MAAO,QAAG,EAWzCE,GAAgCF,GAClCA,EACF,QACG,2IACA,WACJ,EACC,QAAQ,aAAc,SAAI,EAUtBG,EAAaC,GACfA,EACF,UAAU,MAAM,EAChB,QAAQ,mBAAoB,EAAE,EAC9B,QAAQ,WAAY,EAAE,EAUlBC,EAAwBL,GAC1BM,EAAgBN,EAAK,QAAQ,uEAAwE,EAAE,CAAC,EAUtGO,EAAkCP,GAAiBG,EAAUE,EAAqBL,CAAI,CAAC,EASvFQ,GAAmBC,GACXF,EAA+BE,CAAQ,EACnD,KAAK,EACL,MAAM,MAAM,EACZ,MAAM,EAAG,CAAC,EACV,IAAKC,GACKA,EAAK,OAAO,CAAC,EAAE,YAAY,CACrC,EACA,KAAK,EAAE","names":["arabicNumeralToNumber","arabic","c","cleanExtremeArabicUnderscores","text","convertUrduSymbolsToArabic","getArabicScore","arabicLettersPattern","allDigitPattern","countedCharsPattern","cleaned","arabicMatches","totalMatches","fixTrailingWow","addSpaceBetweenArabicTextAndNumbers","removeNonIndexSignatures","removeSingularCodes","removeSolitaryArabicLetters","replaceEnglishPunctuationWithArabic","DIACRITICS_CLASS","TATWEEL_CLASS","escapeRegex","s","makeDiacriticInsensitiveRegex","needle","opts","equivalences","allowTatweel","ignoreDiacritics","flexWhitespace","flags","charClass","ch","after","pattern","insertLineBreaksAfterPunctuation","text","punctuation","addSpaceBeforeAndAfterPunctuation","applySmartQuotes","cleanLiteralNewLines","cleanMultilines","hasWordInSingleLine","isOnlyPunctuation","cleanSpacesBeforePeriod","condenseAsterisks","condenseColons","condenseDashes","condenseEllipsis","reduceMultilineBreaksToDouble","reduceMultilineBreaksToSingle","condensePeriods","condenseUnderscores","doubleToSingleBrackets","ensureSpaceBeforeBrackets","ensureSpaceBeforeQuotes","fixBracketTypos","fixCurlyBraces","result","fixMismatchedQuotationMarks","formatStringBySentence","input","footnoteRegex","sentences","lines","currentSentence","line","trimmedLine","isFootnote","isNumber","lastChar","isAllUppercase","lettersOnly","normalizeSlashInReferences","normalizeSpaces","removeRedundantPunctuation","removeSpaceInsideBrackets","replaceDoubleBracketsWithArrows","stripBoldStyling","stripItalicsStyling","italicMap","match","stripStyling","toTitleCase","str","word","i","trimSpaceInsideQuotes","normalizeJsonSyntax","str","input","isJsonStructureValid","splitByQuotes","query","regex","s","areQuotesBalanced","quoteCount","char","brackets","openBrackets","closeBrackets","areBracketsBalanced","stack","lastOpen","isBalanced","parsePageRanges","pageInput","start","end","_","i","cleanSymbolsAndPartReferences","text","cleanTrailingPageNumbers","replaceLineBreaksWithSpaces","stripAllDigits","removeDeathYear","removeNumbersAndDashes","removeSingleDigitReferences","removeUrls","removeMarkdownFormatting","truncate","val","n","truncateMiddle","maxLength","endLength","defaultEndLength","actualEndLength","startLength","startPortion","endPortion","unescapeSpaces","input","DIACRITICS_CLASS","EQUIV_GROUPS","getEquivClass","ch","group","c","escapeRegex","normalizeArabicLight","str","makeDiacriticInsensitive","diacriticsMatcher","norm","normalizeArabicPrefixesToAl","text","normalizeDoubleApostrophes","replaceSalutationsWithSymbol","normalize","input","removeArabicPrefixes","normalizeSpaces","normalizeTransliteratedEnglish","extractInitials","fullName","word"]}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "bitaboom",
3
3
  "description": "Use string utils library to format Arabic and English translations.",
4
- "version": "1.5.0",
4
+ "version": "2.0.0",
5
5
  "author": "Ragaeeb Haq",
6
6
  "license": "MIT",
7
7
  "private": false,
@@ -22,9 +22,7 @@
22
22
  "node": ">=22.0.0"
23
23
  },
24
24
  "files": [
25
- "dist/index.js",
26
- "dist/index.js.map",
27
- "dist/*.d.ts"
25
+ "dist/**"
28
26
  ],
29
27
  "scripts": {
30
28
  "build": "tsup",
@@ -39,7 +37,7 @@
39
37
  "devDependencies": {
40
38
  "@biomejs/biome": "^2.2.4",
41
39
  "@types/bun": "^1.2.21",
42
- "@types/node": "^24.3.1",
40
+ "@types/node": "^24.3.3",
43
41
  "semantic-release": "^24.2.8",
44
42
  "tsup": "^8.5.0"
45
43
  }