bitaboom 1.4.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -89,30 +89,38 @@ applySmartQuotes('The "quick brown" fox');
89
89
 
90
90
  ---
91
91
 
92
- ### `cleanExtremeArabicUnderscores`
92
+ ### `arabicNumeralToNumber`
93
93
 
94
- Removes extreme Arabic underscores (ـ) from the beginning or end of lines. It does not affect Hijri dates or certain Arabic terms.
94
+ Converts Arabic-Indic numerals (٠-٩) to a JavaScript number. This function finds all Arabic-Indic digits in the input string and converts them to their corresponding Arabic (Western) digits, then parses the result as an integer.
95
95
 
96
96
  #### Example
97
97
 
98
98
  ```javascript
99
- cleanExtremeArabicUnderscores('ـThis is a textـ');
100
- // Output: "This is a text"
99
+ arabicNumeralToNumber("١٢٣");
100
+ // Output: 123
101
+
102
+ arabicNumeralToNumber("٥٠");
103
+ // Output: 50
104
+
105
+ arabicNumeralToNumber("abc١٢٣xyz");
106
+ // Output: 123 (non-digits ignored)
101
107
  ```
102
108
 
103
109
  ---
104
110
 
105
- ### `cleanJunkFromText`
111
+ ### `cleanExtremeArabicUnderscores`
106
112
 
107
- Cleans unnecessary spaces and punctuation from text.
113
+ Removes extreme Arabic underscores (ـ) from the beginning or end of lines. It does not affect Hijri dates or certain Arabic terms.
108
114
 
109
115
  #### Example
110
116
 
111
117
  ```javascript
112
- cleanJunkFromText('Some text !@#\nAnother line.');
113
- // Output: 'Some text\nAnother line.'
118
+ cleanExtremeArabicUnderscores('ـThis is a textـ');
119
+ // Output: "This is a text"
114
120
  ```
115
121
 
122
+ ---
123
+
116
124
  ### `cleanLiteralNewLines`
117
125
 
118
126
  Replaces literal new line characters (`\n`) with actual line breaks.
@@ -210,6 +218,19 @@ ensureSpaceBeforeQuotes('text «quote»');
210
218
 
211
219
  ---
212
220
 
221
+ ### `escapeRegex`
222
+
223
+ Escapes a string so it can be safely embedded into a RegExp source.
224
+
225
+ #### Example
226
+
227
+ ```javascript
228
+ escapeRegex('Hello [world]');
229
+ // Output: 'Hello \\[world\\]'
230
+ ```
231
+
232
+ ---
233
+
213
234
  ### `extractInitials`
214
235
 
215
236
  Extracts initials from the input string, typically for names or titles.
@@ -401,30 +422,6 @@ regex.test('محمد'); // true
401
422
 
402
423
  ---
403
424
 
404
- ### `normalizeAlifVariants`
405
-
406
- Simplifies all forms of 'alif' (أ, إ, and آ) to the basic 'ا'.
407
-
408
- #### Example
409
-
410
- ```javascript
411
- normalizeAlifVariants('أنا إلى الآفاق');
412
- // Output: 'انا الى الافاق'
413
- ```
414
-
415
- ### `normalizeApostrophes`
416
-
417
- Replaces various apostrophe characters like ‛, ', and ' with the standard apostrophe (').
418
-
419
- #### Example
420
-
421
- ```javascript
422
- normalizeApostrophes('‛ulama' al-su'');
423
- // Output: "'ulama' al-su'"
424
- ```
425
-
426
- ---
427
-
428
425
  ### `normalizeArabicPrefixesToAl`
429
426
 
430
427
  Replaces common Arabic prefixes like 'Al-', 'Ar-', 'Ash-', etc., with 'al-' in the text. Handles variations and lam-assimilation patterns (before sun letters), and avoids changes where assimilation rules do not apply.
@@ -632,25 +629,6 @@ removeSolitaryArabicLetters('ب ا الكلمات ت');
632
629
 
633
630
  ---
634
631
 
635
- ### `removeTatwil` (Updated)
636
-
637
- Removes tatweel characters while preserving dates references and numbered list items. Example: "1435/3/29 هـ" remains as "1435/3/29 هـ" but "أبـــتِـــكَةُ" becomes "أبتِكَةُ". Also preserves tatweels in numbered list items like "3 ـ item".
638
-
639
- #### Example
640
-
641
- ```javascript
642
- removeTatwil('أبـــتِـــكَةُ');
643
- // Output: 'أبتِكَةُ'
644
-
645
- removeTatwil('1435/3/29 هـ');
646
- // Output: '1435/3/29 هـ' (unchanged)
647
-
648
- removeTatwil('3 ـ وشريط');
649
- // Output: '3 ـ وشريط' (unchanged)
650
- ```
651
-
652
- ---
653
-
654
632
  ### `removeUrls`
655
633
 
656
634
  Removes URLs from the text.
@@ -662,19 +640,6 @@ removeUrls('Visit https://example.com');
662
640
  // Output: 'Visit '
663
641
  ```
664
642
 
665
- ### `replaceAlifMaqsurah`
666
-
667
- Replaces 'alif maqsurah' (ى) with 'ya' (ي).
668
-
669
- #### Example
670
-
671
- ```javascript
672
- replaceAlifMaqsurah('رؤيى');
673
- // Output: 'رؤيي'
674
- ```
675
-
676
- ---
677
-
678
643
  ### `replaceDoubleBracketsWithArrows`
679
644
 
680
645
  Replaces double parentheses with single arrow quotation marks. Converts `((text))` format to `«text»` format, handling optional spaces inside the brackets.
@@ -730,19 +695,6 @@ replaceSalutationsWithSymbol('Then Muḥammad (sallahu alayhi wasallam)');
730
695
 
731
696
  ---
732
697
 
733
- ### `replaceTaMarbutahWithHa`
734
-
735
- Replaces 'ta marbutah' (ة) with 'ha' (ه).
736
-
737
- #### Example
738
-
739
- ```javascript
740
- replaceTaMarbutahWithHa('مدرسة');
741
- // Output: 'مدرسه'
742
- ```
743
-
744
- ---
745
-
746
698
  ### `splitByQuotes`
747
699
 
748
700
  Splits a string by spaces but keeps quoted substrings intact. Substrings enclosed in double quotes are treated as a single part.
@@ -767,45 +719,6 @@ stripAllDigits('abc123');
767
719
 
768
720
  ---
769
721
 
770
- ### `stripDiacritics`
771
-
772
- Removes Arabic diacritics (tashkeel) and the elongation character (ـ).
773
-
774
- #### Example
775
-
776
- ```javascript
777
- stripDiacritics('مُحَمَّدٌ');
778
- // Output: 'محمد'
779
- ```
780
-
781
- ---
782
-
783
- ### `stripEnglishCharactersAndSymbols`
784
-
785
- Removes English letters and symbols from the text.
786
-
787
- #### Example
788
-
789
- ```javascript
790
- stripEnglishCharactersAndSymbols('أحب & لنفسي');
791
- // Output: 'أحب لنفسي'
792
- ```
793
-
794
- ---
795
-
796
- ### `stripZeroWidthCharacters`
797
-
798
- Removes zero-width characters like ZWJ and other invisible characters.
799
-
800
- #### Example
801
-
802
- ```javascript
803
- stripZeroWidthCharacters('يَخْلُوَ ‏.');
804
- // Output: 'يَخْلُوَ .'
805
- ```
806
-
807
- ---
808
-
809
722
  ### `toTitleCase`
810
723
 
811
724
  Converts a string to title case (first letter of each word capitalized).
@@ -875,3 +788,28 @@ unescapeSpaces('regular text');
875
788
  ```
876
789
 
877
790
  ---
791
+
792
+ ## makeDiacriticInsensitiveRegex — tolerant Arabic matcher
793
+
794
+ `makeDiacriticInsensitiveRegex(needle, opts?)` returns a `RegExp` that matches Arabic text while ignoring diacritics,
795
+ optionally tolerating tatweel, and treating common equivalents as equal (`ا~أ~إ~آ`, `ة~ه`, `ى~ي`). Whitespace in the needle
796
+ is treated as `\s+` by default, making it robust across spacing variants.
797
+
798
+ **Examples**
799
+
800
+ ```ts
801
+ import { makeDiacriticInsensitiveRegex } from 'bitaboom';
802
+
803
+ const rx = makeDiacriticInsensitiveRegex('أنا إلى الآفاق');
804
+ rx.test('انا الى الافاق'); // true
805
+ rx.test('أنا الي الآفاق'); // true
806
+ ```
807
+
808
+ **Composing tolerant heads with a literal tail**
809
+
810
+ ```ts
811
+ const heads = ['السلام', 'مرحبا'];
812
+ const pattern = heads.map(h => makeDiacriticInsensitiveRegex(h).source).join('|');
813
+ const rx2 = new RegExp(`^(?:${pattern})\s+عليكم.*$`, 'mu');
814
+ rx2.test('اَلسَّلَامُ عَلَيْكُمْ ورحمة'); // true
815
+ ```
package/dist/index.d.ts CHANGED
@@ -1,3 +1,27 @@
1
+ /**
2
+ * Converts Arabic-Indic numerals (٠-٩) to a JavaScript number.
3
+ *
4
+ * This function finds all Arabic-Indic digits in the input string and converts them
5
+ * to their corresponding Arabic (Western) digits, then parses the result as an integer.
6
+ *
7
+ * Arabic-Indic digits mapping:
8
+ * - ٠ → 0, ١ → 1, ٢ → 2, ٣ → 3, ٤ → 4
9
+ * - ٥ → 5, ٦ → 6, ٧ → 7, ٨ → 8, ٩ → 9
10
+ *
11
+ * @param arabic - The string containing Arabic-Indic numerals to convert
12
+ * @returns The parsed integer value of the converted numerals
13
+ *
14
+ * @example
15
+ * ```typescript
16
+ * arabicNumeralToNumber("١٢٣"); // returns 123
17
+ * arabicNumeralToNumber("٥٠"); // returns 50
18
+ * arabicNumeralToNumber("abc١٢٣xyz"); // returns 123 (non-digits ignored)
19
+ * arabicNumeralToNumber(""); // returns NaN
20
+ * ```
21
+ *
22
+ * Returns NaN if no valid Arabic-Indic digits are found
23
+ */
24
+ declare const arabicNumeralToNumber: (arabic: string) => number;
1
25
  /**
2
26
  * Removes extreme Arabic underscores (ـ) that appear at the beginning or end of a line or in text.
3
27
  * Does not affect Hijri dates (e.g., 1424هـ) or specific Arabic terms.
@@ -35,13 +59,6 @@ declare const fixTrailingWow: (text: string) => string;
35
59
  * @returns {string} - The modified text with spaces inserted between Arabic text and numbers.
36
60
  */
37
61
  declare const addSpaceBetweenArabicTextAndNumbers: (text: string) => string;
38
- /**
39
- * Removes English letters and symbols from the text, including ampersands, slashes, and other symbols.
40
- * Example: 'أحب & لنفسي' will be changed to 'أحب لنفسي'.
41
- * @param {string} text - The input text containing English letters and symbols.
42
- * @returns {string} - The modified text with English letters and symbols removed.
43
- */
44
- declare const stripEnglishCharactersAndSymbols: (text: string) => string;
45
62
  /**
46
63
  * Removes single-digit numbers surrounded by Arabic text. Also removes dashes (-) not followed by a number.
47
64
  * For example, removes '3' from 'وهب 3 وقال' but does not remove '121' from 'لوحه 121 الجرح'.
@@ -63,41 +80,6 @@ declare const removeSingularCodes: (text: string) => string;
63
80
  * @returns {string} - The modified text with solitary Arabic letters removed.
64
81
  */
65
82
  declare const removeSolitaryArabicLetters: (text: string) => string;
66
- /**
67
- * Removes tatweel characters while preserving dates references and numbered list items.
68
- * Example: "1435/3/29 هـ" remains as "1435/3/29 هـ" but "أبـــتِـــكَةُ" becomes "أبتِكَةُ"
69
- * @param text The text to format.
70
- * @returns The modified text with the tatweel characters removed.
71
- */
72
- declare const removeTatwil: (text: string) => string;
73
- /**
74
- * Replaces the 'tah marbutah' (ة) character with 'ha' (ه).
75
- * Example: 'مدرسة' will be changed to 'مدرسه'.
76
- * @param {string} text - The input text to apply the rule to.
77
- * @returns {string} - The modified text with 'ta marbutah' replaced by 'ha'.
78
- */
79
- declare const replaceTaMarbutahWithHa: (text: string) => string;
80
- /**
81
- * Removes Arabic diacritics (tashkeel) and the tatweel (elongation) character.
82
- * Example: 'مُحَمَّدٌ' will be changed to 'محمد'.
83
- * @param {string} text - The input text to apply the rule to.
84
- * @returns {string} - The modified text with diacritics and tatweel removed.
85
- */
86
- declare const stripDiacritics: (text: string) => string;
87
- /**
88
- * Removes zero-width joiners (ZWJ) and other zero-width characters from the input text.
89
- * Zero-width characters include U+200B to U+200F, U+202A to U+202E, U+2060 to U+2064, and U+FEFF.
90
- * @param {string} text - The input text to apply the rule to.
91
- * @returns {string} - The modified text with zero-width characters removed.
92
- */
93
- declare const stripZeroWidthCharacters: (text: string) => string;
94
- /**
95
- * Replaces the 'alif maqsurah' (ى) character with the regular 'ya' (ي).
96
- * Example: 'رؤيى' will be changed to 'رؤيي'.
97
- * @param {string} text - The input text to apply the rule to.
98
- * @returns {string} - The modified text with 'alif maqsurah' replaced by 'ya'.
99
- */
100
- declare const replaceAlifMaqsurah: (text: string) => string;
101
83
  /**
102
84
  * Replaces English punctuation (question mark and semicolon) with their Arabic equivalents.
103
85
  * Example: '?' will be replaced with '؟', and ';' with '؛'.
@@ -105,13 +87,70 @@ declare const replaceAlifMaqsurah: (text: string) => string;
105
87
  * @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.
106
88
  */
107
89
  declare const replaceEnglishPunctuationWithArabic: (text: string) => string;
90
+
108
91
  /**
109
- * Simplifies all forms of 'alif' (أ, إ, and آ) to the basic 'ا'.
110
- * Example: 'أنا إلى الآفاق' will be changed to 'انا الى الافاق'.
111
- * @param {string} text - The input text to apply the rule to.
112
- * @returns {string} - The modified text with simplified 'alif' characters.
92
+ * Escape a string so it can be safely embedded into a RegExp source.
93
+ *
94
+ * @param s Any string
95
+ * @returns Escaped string
96
+ */
97
+ declare const escapeRegex: (s: string) => string;
98
+ /** Optional equivalence toggles for {@link makeDiacriticInsensitiveRegex}. */
99
+ type EquivOptions = {
100
+ /** Treat ا/أ/إ/آ as equivalent. @default true */
101
+ alif?: boolean;
102
+ /** Treat ة/ه as equivalent. @default true */
103
+ taMarbutahHa?: boolean;
104
+ /** Treat ى/ي as equivalent. @default true */
105
+ alifMaqsurahYa?: boolean;
106
+ };
107
+ /** Options for {@link makeDiacriticInsensitiveRegex}. */
108
+ type MakeRegexOptions = {
109
+ /**
110
+ * Character equivalences to allow.
111
+ * @default { alif: true, taMarbutahHa: true, alifMaqsurahYa: true }
112
+ */
113
+ equivalences?: EquivOptions;
114
+ /**
115
+ * Allow tatweel between letters (tolerate decorative elongation).
116
+ * @default true
117
+ */
118
+ allowTatweel?: boolean;
119
+ /**
120
+ * Ignore diacritics by inserting a `DIACRITICS_CLASS*` after each letter.
121
+ * @default true
122
+ */
123
+ ignoreDiacritics?: boolean;
124
+ /**
125
+ * Treat any whitespace in the needle as `\s+` for flexible matching.
126
+ * @default true
127
+ */
128
+ flexWhitespace?: boolean;
129
+ /**
130
+ * RegExp flags to use.
131
+ * @default 'u'
132
+ */
133
+ flags?: string;
134
+ };
135
+ /**
136
+ * Build a **diacritic-insensitive**, **tatweel-tolerant** RegExp for Arabic text matching.
137
+ *
138
+ * Features:
139
+ * - Optional character equivalences: ا~أ~إ~آ, ة~ه, ى~ي.
140
+ * - Optional tolerance for tatweel between characters.
141
+ * - Optional diacritic-insensitivity (by inserting a diacritics class after each char).
142
+ * - Optional flexible whitespace (needle whitespace becomes `\s+`).
143
+ *
144
+ * @param needle The Arabic text to match
145
+ * @param opts See {@link MakeRegexOptions}
146
+ * @returns A `RegExp` matching the needle with the desired tolerances
147
+ *
148
+ * @example
149
+ * const rx = makeDiacriticInsensitiveRegex('أنا إلى الآفاق');
150
+ * rx.test('انا الي الافاق'); // true
151
+ * rx.test('اَنا إلى الآفاق'); // true
113
152
  */
114
- declare const normalizeAlifVariants: (text: string) => string;
153
+ declare const makeDiacriticInsensitiveRegex: (needle: string, opts?: MakeRegexOptions) => RegExp;
115
154
 
116
155
  /**
117
156
  * Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.
@@ -161,7 +200,6 @@ declare const hasWordInSingleLine: (text: string) => boolean;
161
200
  * @returns {boolean} - Returns true if the string contains only punctuation, false otherwise.
162
201
  */
163
202
  declare const isOnlyPunctuation: (text: string) => boolean;
164
- declare const cleanJunkFromText: (text: string) => string;
165
203
  /**
166
204
  * Cleans unnecessary spaces before punctuation marks such as periods, commas, and question marks.
167
205
  * Example: 'This is a sentence , with extra space .' becomes 'This is a sentence, with extra space.'.
@@ -593,14 +631,6 @@ declare const replaceSalutationsWithSymbol: (text: string) => string;
593
631
  * @returns {string} - The normalized text.
594
632
  */
595
633
  declare const normalize: (input: string) => string;
596
- /**
597
- * Replaces various apostrophe characters (‛, ’, ‘) with the standard apostrophe (').
598
- * Example: '‛ulama’ al-su‘' becomes ''ulama' al-su''.
599
- *
600
- * @param {string} text - The input text containing different apostrophe characters.
601
- * @returns {string} - The modified text with normalized apostrophes.
602
- */
603
- declare const normalizeApostrophes: (text: string) => string;
604
634
  /**
605
635
  * Strips common Arabic prefixes like 'al-', 'bi-', 'fī', 'wa-', etc. from the beginning of words.
606
636
  * Example: 'al-Bukhari' becomes 'Bukhari'.
@@ -626,4 +656,4 @@ declare const normalizeTransliteratedEnglish: (text: string) => string;
626
656
  */
627
657
  declare const extractInitials: (fullName: string) => string;
628
658
 
629
- export { addSpaceBeforeAndAfterPunctuation, addSpaceBetweenArabicTextAndNumbers, applySmartQuotes, cleanExtremeArabicUnderscores, cleanJunkFromText, cleanLiteralNewLines, cleanMultilines, cleanSpacesBeforePeriod, cleanSymbolsAndPartReferences, cleanTrailingPageNumbers, condenseAsterisks, condenseColons, condenseDashes, condenseEllipsis, condensePeriods, condenseUnderscores, convertUrduSymbolsToArabic, doubleToSingleBrackets, ensureSpaceBeforeBrackets, ensureSpaceBeforeQuotes, extractInitials, fixBracketTypos, fixCurlyBraces, fixMismatchedQuotationMarks, fixTrailingWow, formatStringBySentence, getArabicScore, hasWordInSingleLine, insertLineBreaksAfterPunctuation, isAllUppercase, isBalanced, isJsonStructureValid, isOnlyPunctuation, makeDiacriticInsensitive, normalize, normalizeAlifVariants, normalizeApostrophes, normalizeArabicPrefixesToAl, normalizeDoubleApostrophes, normalizeJsonSyntax, normalizeSlashInReferences, normalizeSpaces, normalizeTransliteratedEnglish, parsePageRanges, reduceMultilineBreaksToDouble, reduceMultilineBreaksToSingle, removeArabicPrefixes, removeDeathYear, removeMarkdownFormatting, removeNonIndexSignatures, removeNumbersAndDashes, removeRedundantPunctuation, removeSingleDigitReferences, removeSingularCodes, removeSolitaryArabicLetters, removeSpaceInsideBrackets, removeTatwil, removeUrls, replaceAlifMaqsurah, replaceDoubleBracketsWithArrows, replaceEnglishPunctuationWithArabic, replaceLineBreaksWithSpaces, replaceSalutationsWithSymbol, replaceTaMarbutahWithHa, splitByQuotes, stripAllDigits, stripBoldStyling, stripDiacritics, stripEnglishCharactersAndSymbols, stripItalicsStyling, stripStyling, stripZeroWidthCharacters, toTitleCase, trimSpaceInsideQuotes, truncate, truncateMiddle, unescapeSpaces };
659
+ export { type MakeRegexOptions, addSpaceBeforeAndAfterPunctuation, addSpaceBetweenArabicTextAndNumbers, applySmartQuotes, arabicNumeralToNumber, cleanExtremeArabicUnderscores, cleanLiteralNewLines, cleanMultilines, cleanSpacesBeforePeriod, cleanSymbolsAndPartReferences, cleanTrailingPageNumbers, condenseAsterisks, condenseColons, condenseDashes, condenseEllipsis, condensePeriods, condenseUnderscores, convertUrduSymbolsToArabic, doubleToSingleBrackets, ensureSpaceBeforeBrackets, ensureSpaceBeforeQuotes, escapeRegex, extractInitials, fixBracketTypos, fixCurlyBraces, fixMismatchedQuotationMarks, fixTrailingWow, formatStringBySentence, getArabicScore, hasWordInSingleLine, insertLineBreaksAfterPunctuation, isAllUppercase, isBalanced, isJsonStructureValid, isOnlyPunctuation, makeDiacriticInsensitive, makeDiacriticInsensitiveRegex, normalize, normalizeArabicPrefixesToAl, normalizeDoubleApostrophes, normalizeJsonSyntax, normalizeSlashInReferences, normalizeSpaces, normalizeTransliteratedEnglish, parsePageRanges, reduceMultilineBreaksToDouble, reduceMultilineBreaksToSingle, removeArabicPrefixes, removeDeathYear, removeMarkdownFormatting, removeNonIndexSignatures, removeNumbersAndDashes, removeRedundantPunctuation, removeSingleDigitReferences, removeSingularCodes, removeSolitaryArabicLetters, removeSpaceInsideBrackets, removeUrls, replaceDoubleBracketsWithArrows, replaceEnglishPunctuationWithArabic, replaceLineBreaksWithSpaces, replaceSalutationsWithSymbol, splitByQuotes, stripAllDigits, stripBoldStyling, stripItalicsStyling, stripStyling, toTitleCase, trimSpaceInsideQuotes, truncate, truncateMiddle, unescapeSpaces };
package/dist/index.js CHANGED
@@ -1,12 +1,10 @@
1
- var z=e=>e.replace(/(?<!\d ?ه|اه)ـ(?=\r?$)|^ـ(?!اهـ)/gm,""),E=e=>e.replace(/ھ/g,"\u0647").replace(/ی/g,"\u064A"),P=e=>{if(!e)return 0;let r=/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/g,t=/[0-9\u0660-\u0669\u06F0-\u06F9]/g,n=/[^\s0-9\u0660-\u0669\u06F0-\u06F9]/g,s=e.replace(t,""),o=s.match(r)||[],u=s.match(n)||[];return u.length===0?0:o.length/u.length},M=e=>e.replace(/ و /g," \u0648"),L=e=>e.replace(/([\u0600-\u06FF]+)(\d+)/g,"$1 $2"),T=e=>e.replace(/[a-zA-Z]+[0-9]*|[¬§`ﷺ=]|\/{2,}|&/g," "),v=e=>e.replace(/(?<![0-9] ?)-|(?<=[\u0600-\u06FF])\s?\d\s?(?=[\u0600-\u06FF])/g," ").replace(/(?<=[\u0600-\u06FF]\s)(\d+\s)+\d+(?=(\s[\u0600-\u06FF]|$))/g," "),_=e=>e.replace(/[[({][\u0621-\u064A\u0660-\u0669][\])}]/g,""),N=e=>e.replace(/(^| )[\u0621-\u064A]( |$)/g," "),w=e=>e.replace(/(?<![0-9ه])(?<![0-9]\s*)ـ/g,""),W=e=>e.replace(/[ة]/g,"\u0647"),I=e=>e.replace(/[\u0610\u0611\u0612\u0613\u0614\u0615\u0616\u0617\u0618\u0619\u061A\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652\u0653\u0654\u0655\u0656\u0657\u0658\u065A\u065B\u065C\u065D\u065E\u0640]/g,""),R=e=>e.replace(/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\uFEFF]/g," "),U=e=>e.replace(/[ىي]/g,"\u064A"),j=e=>e.replace(/\?|؟\./g,"\u061F").replace(/(;|؛)\s*(\1\s*)*/g,"\u061B").replace(/,|-،/g,"\u060C"),Q=e=>e.replace(/[أإآ]/g,"\u0627");var J=e=>{let r=/([.?!؟])/g;return e.replace(r,`$1
1
+ var E=e=>parseInt(e.replace(/[\u0660-\u0669]/g,r=>(r.charCodeAt(0)-1632).toString()),10),P=e=>e.replace(/(?<!\d ?ه|اه)ـ(?=\r?$)|^ـ(?!اهـ)/gm,""),v=e=>e.replace(/ھ/g,"\u0647").replace(/ی/g,"\u064A"),z=e=>{if(!e)return 0;let r=/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/g,t=/[0-9\u0660-\u0669\u06F0-\u06F9]/g,s=/[^\s0-9\u0660-\u0669\u06F0-\u06F9]/g,n=e.replace(t,""),u=n.match(r)||[],a=n.match(s)||[];return a.length===0?0:u.length/a.length},L=e=>e.replace(/ و /g," \u0648"),T=e=>e.replace(/([\u0600-\u06FF]+)(\d+)/g,"$1 $2"),R=e=>e.replace(/(?<![0-9] ?)-|(?<=[\u0600-\u06FF])\s?\d\s?(?=[\u0600-\u06FF])/g," ").replace(/(?<=[\u0600-\u06FF]\s)(\d+\s)+\d+(?=(\s[\u0600-\u06FF]|$))/g," "),w=e=>e.replace(/[[({][\u0621-\u064A\u0660-\u0669][\])}]/g,""),_=e=>e.replace(/(^| )[\u0621-\u064A]( |$)/g," "),I=e=>e.replace(/\?|؟\./g,"\u061F").replace(/(;|؛)\s*(\1\s*)*/g,"\u061B").replace(/,|-،/g,"\u060C");var D="[\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]",m="\\u0640",c=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),W=(e,r={})=>{let{equivalences:t={alif:!0,taMarbutahHa:!0,alifMaqsurahYa:!0},allowTatweel:s=!0,ignoreDiacritics:n=!0,flexWhitespace:u=!0,flags:a="u"}=r;if(e.length>5e3)throw new Error("makeDiacriticInsensitiveRegex: needle too long");let i=o=>{switch(o){case"\u0627":case"\u0623":case"\u0625":case"\u0622":return t.alif?"[\u0627\u0623\u0625\u0622]":"\u0627";case"\u0629":case"\u0647":return t.taMarbutahHa?"[\u0647\u0629]":c(o);case"\u0649":case"\u064A":return t.alifMaqsurahYa?"[\u0649\u064A]":c(o);default:return c(o)}},l=`${n?`${D}*`:""}${s?`${m}*`:""}`,p="";for(let o of Array.from(e))/\s/.test(o)?p+=u?"\\s+":"\\s*":p+=`${i(o)}${l}`;return new RegExp(p,a)};var U=e=>{let r=/([.?!؟])/g;return e.replace(r,`$1
2
2
  `).replace(/\n\s+/g,`
3
- `).trim()},Z=e=>e.replace(/( ?)([.!?,،؟;؛])((?![ '”“)"\]\n])|(?=\s{2,}))/g,"$1$2 ").replace(/\s([.!?,،؟;؛])\s*([ '”“)"\]\n])/g,"$1$2").replace(/([^\s\w\d'”“)"\]]+)\s+([.!?,،؟;؛])|([.!?,،؟;؛])\s+$/g,"$1$2$3").replace(/(?<=\D)( ?: ?)(?!(\d+:)|(:\d+))|(?<=\d) ?: ?(?=\D)|(?<=\D) ?: ?(?=\d)/g,": "),q=e=>e.replace(/[“”]/g,'"').replace(/"([^"]*)"/g,"\u201C$1\u201D").replace(/^”/g,"\u201C"),H=e=>e.replace(/\\n|\r/g,`
4
- `),g=e=>e.replace(/^ +| +$/gm,""),V=e=>/^\s*\S+\s*$/gm.test(e),D=e=>/^[\u0020-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u007e0-9٠-٩]+$/.test(e),K=e=>g(e).split(`
5
- `).filter(n=>!n||n.length>1&&!D(n)).join(`
6
- `).trim(),G=e=>e.replace(/\s+([.؟!,،؛:?])/g,"$1"),Y=e=>e.replace(/(\*\s*)+/g,"*"),X=e=>e.replace(/[.-]?:[.-]?/g,":"),ee=e=>e.replace(/-{2,}/g,"-"),re=e=>e.replace(/\.{2,}/g,"\u2026"),te=e=>e.replace(/(\n\s*){3,}/g,`
3
+ `).trim()},q=e=>e.replace(/( ?)([.!?,،؟;؛])((?![ '”“)"\]\n])|(?=\s{2,}))/g,"$1$2 ").replace(/\s([.!?,،؟;؛])\s*([ '”“)"\]\n])/g,"$1$2").replace(/([^\s\w\d'”“)"\]]+)\s+([.!?,،؟;؛])|([.!?,،؟;؛])\s+$/g,"$1$2$3").replace(/(?<=\D)( ?: ?)(?!(\d+:)|(:\d+))|(?<=\d) ?: ?(?=\D)|(?<=\D) ?: ?(?=\d)/g,": "),j=e=>e.replace(/[“”]/g,'"').replace(/"([^"]*)"/g,"\u201C$1\u201D").replace(/^”/g,"\u201C"),Q=e=>e.replace(/\\n|\r/g,`
4
+ `),H=e=>e.replace(/^ +| +$/gm,""),J=e=>/^\s*\S+\s*$/gm.test(e),Y=e=>/^[\u0020-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u007e0-9٠-٩]+$/.test(e),Z=e=>e.replace(/\s+([.؟!,،؛:?])/g,"$1"),K=e=>e.replace(/(\*\s*)+/g,"*"),V=e=>e.replace(/[.-]?:[.-]?/g,":"),G=e=>e.replace(/-{2,}/g,"-"),X=e=>e.replace(/\.{2,}/g,"\u2026"),ee=e=>e.replace(/(\n\s*){3,}/g,`
7
5
 
8
- `),ne=e=>e.replace(/(\n\s*){2,}/g,`
9
- `),se=e=>e.replace(/\. +\./g,"."),ue=e=>e.replace(/ـ{2,}/g,"\u0640").replace(/_+/g,"_"),oe=e=>e.replace(/(\(|\)){2,}|(\[|\]){2,}/g,"$1$2"),ce=e=>e.replace(/(\S) *(\([^)]*\))/g,"$1 $2"),ae=e=>e.replace(/(\S) *(«[^»]*»)/g,"$1 $2"),ie=e=>e.replace(/\(«|\( \(/g,"\xAB").replace(/»\)|\) \)/g,"\xBB").replace(/\)([0-9\u0660-\u0669]+)\)/g,"($1)").replace(/\)([0-9\u0660-\u0669]+)\(/g,"($1)"),pe=e=>{let r=e;return r=r.replace(/\(([^(){}]+)\}/g,"{$1}"),r.replace(/\{([^(){}]+)\)/g,"{$1}")},le=e=>e.replace(/«([^»)]+)\)/g,"\xAB$1\xBB").replace(/\(([^()]+)»/g,"\xAB$1\xBB").replace(/«([^»]+)(?=\s*$|$)/g,"\xAB$1\xBB"),ge=e=>{let r=/^\((?:\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\)\s/,t=[],n=e.split(`
10
- `),s="";return n.forEach(o=>{let u=o.trim(),c=r.test(u),a=/^\(\d+\/\d+\)/.test(u);if(c&&!a)s&&(t.push(s.trim()),s=""),t.push(u);else{s+=`${u} `;let l=s.trim().slice(-1);/[.!؟]/.test(l)&&(t.push(s.trim()),s="")}}),s&&t.push(s.trim()),t.join(`
11
- `)},De=e=>{let r=e.replace(/[^\p{L}]/gu,"");return r.length===0?!1:r===r.toUpperCase()},me=e=>e.replace(/(\d+)\s?\/\s?(\d+)/g,"$1/$2"),i=e=>e.replace(/[ \t]+/g," "),de=e=>e.replace(/([؟!])[.،]/g,"$1"),xe=e=>e.replace(/([[(])\s*(.*?)\s*([\])])/g,"$1$2$3"),Ce=e=>e.replace(/\(\(\s?/g,"\xAB").replace(/\s?\)\)/g,"\xBB"),m=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").trim(),d=e=>{let r={"\u{1D44E}":"I","\u{1D468}":"g","\u{1D63C}":"!","\u{1D44F}":"J","\u{1D469}":"h","\u{1D63D}":"?","\u{1D450}":"K","\u{1D46A}":"i","\u{1D451}":"L","\u{1D46B}":"j","\u{1D63F}":",","\u{1D452}":"M","\u{1D46C}":"k","\u{1D640}":".","\u{1D453}":"N","\u{1D46D}":"l","\u{1D454}":"O","\u{1D46E}":"m","\u{1D46F}":"n","\u{1D456}":"Q","\u{1D470}":"o","\u{1D457}":"R","\u{1D471}":"p","\u{1D458}":"S","\u{1D472}":"q","\u{1D459}":"T","\u{1D473}":"r","\u{1D647}":"-","\u{1D45A}":"U","\u{1D474}":"s","\u{1D45B}":"V","\u{1D475}":"t","\u{1D45C}":"W","\u{1D476}":"u","\u{1D45D}":"X","\u{1D477}":"v","\u{1D45E}":"Y","\u{1D478}":"w","\u{1D45F}":"Z","\u{1D479}":"x","\u{1D446}":"A","\u{1D47A}":"y","\u{1D447}":"B","\u{1D47B}":"z","\u{1D462}":"a","\u{1D448}":"C","\u{1D463}":"b","\u{1D449}":"D","\u{1D464}":"c","\u{1D44A}":"E","\u{1D465}":"d","\u{1D44B}":"F","\u{1D466}":"e","\u{1D44C}":"G","\u{1D467}":"f","\u{1D44D}":"H","\u{1D455}":"P"};return e.replace(/[\uD835\uDC62-\uD835\uDC7B\uD835\uDC46-\uD835\uDC5F\u{1D63C}-\u{1D647}]/gu,t=>r[t]||t)},he=e=>d(m(e)),fe=e=>e.toLowerCase().split(" ").map(r=>{if(r.length===0)return r;let t=r.match(/\p{L}/u);if(!t||t.index===void 0)return r;let n=t.index;return r.slice(0,n)+r.charAt(n).toUpperCase()+r.slice(n+1)}).join(" "),$e=e=>e.replace(/([“”"]|«) *(.*?) *([“”"]|»)/g,"$1$2$3");var be=e=>{let r=e.replace(/(\b\d+\b)(?=:)/g,'"$1"');return r=r.replace(/:\s*'([^']+)'/g,': "$1"'),r=r.replace(/:\s*"([^"]+)"/g,': "$1"'),JSON.stringify(JSON.parse(r))},Se=e=>/^{(\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\3\s*,)*(?:\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\5\s*)}$/.test(e.trim()),Fe=e=>{let r=/(?:[^\s"]+|"(.*?)")+/g;return(e.match(r)||[]).map(t=>t.startsWith('"')?t.slice(1,-1):t)},x=e=>{let r=0;for(let t of e)t==='"'&&r++;return r%2===0},C={"(":")","[":"]","{":"}"},h=new Set(["(","[","{"]),f=new Set([")","]","}"]),$=e=>{let r=[];for(let t of e)if(h.has(t))r.push(t);else if(f.has(t)){let n=r.pop();if(!n||C[n]!==t)return!1}return r.length===0},Be=e=>x(e)&&$(e),ye=e=>{if(e.includes("-")){let[r,t]=e.split("-").map(Number);if(r>t)throw new Error("Start page cannot be greater than end page");return Array.from({length:t-r+1},(n,s)=>r+s)}else return e.split(",").map(Number)};var ze=e=>e.replace(/ *\(?:\d+(?:\/\d+){0,2}\)? *| *\[\d+(?:\/\d+)?\] *| *«\d+» *|\d+\/\d+(?:\/\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\]…ـ¬.\\/*()"]/g," "),Ee=e=>e.replace(/-\[\d+\]-/g,""),Pe=e=>e.replace(/\s+/g," "),Me=e=>e.replace(/[0-9]/g,""),Le=e=>e.replace(/\[(d)\.\s*\d{1,4}[hH]\]\s*|\((d)\.\s*\d{1,4}[hH]\)\s*/g,""),Te=e=>e.replace(/[\d-]/g,""),ve=e=>e.replace(/\(\d{1}\)|\[\d{1}\]|«\d»/g,""),_e=e=>e.replace(/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,""),Ne=e=>e.replace(/\*\*([^*]+)\*\*/g,"$1").replace(/__([^_]+)__/g,"$1").replace(/\*([^*]+)\*/g,"$1").replace(/_([^_]+)_/g,"$1").replace(/~~([^~]+)~~/g,"$1").replace(/^\s*>\s?/gm,"").replace(/!\[[^\]]*]\([^)]*\)/g,"").replace(/\[([^\]]+)]\([^)]*\)/g,"$1").replace(/^#+\s*/gm,"").replace(/^\s*[-*+]\s+/gm,"").replace(/^\s*\d+\.\s+/gm,"").replace(/`/gm,""),we=(e,r=150)=>e.length>r?`${e.substring(0,r-1)}\u2026`:e,We=(e,r=50,t)=>{if(e.length<=r)return e;let n=Math.max(3,Math.floor(r/3)),s=t??n,u=r-1-s;if(u<1)return`${e.substring(0,r-1)}\u2026`;let c=e.substring(0,u),a=e.substring(e.length-s);return`${c}\u2026${a}`},Ie=e=>e.replace(/\\ /g," ").trim(),A="[\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652]",b=[["\u0627","\u0622","\u0623","\u0625"],["\u0629","\u0647"],["\u0649","\u064A"]],p=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),S=e=>{for(let r of b)if(r.includes(e))return`[${r.map(t=>p(t)).join("")}]`;return p(e)},F=e=>e.normalize("NFC").replace(/[\u200C\u200D]/g,"").replace(/\s+/g," ").trim(),Re=e=>{let r=`${A}*`,t=F(e);return Array.from(t).map(n=>S(n)+r).join("")};var Qe=e=>e.replace(/(\b|\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g,"$1al-").replace(/(\b|\W)(Ash-S|ash-S)/g,"$1al-S").replace(/al- (.+?)\b/g,"al-$1"),Oe=e=>e.replace(/ʿʿ/g,"\u02BF").replace(/ʾʾ/g,"\u02BE"),Je=e=>e.replace(/\(peace be upon him\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\)*/gi,"$1 \uFDFA").replace(/,\s*ﷺ\s*,/g," \uFDFA"),B=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").replace(/`|ʾ|ʿ|-/g,""),Ze=e=>e.replace(/‛|’|‘/g,"'"),y=e=>i(e.replace(/(\bal-|\bli-|\bbi-|\bfī|\bwa[-\s]+|\bl-|\bliʿl|\Bʿalá|\Bʿan|\bb\.)/gi,"")),k=e=>B(y(e)),qe=e=>k(e).trim().split(/[ -]/).slice(0,2).map(t=>t.charAt(0).toUpperCase()).join("");export{Z as addSpaceBeforeAndAfterPunctuation,L as addSpaceBetweenArabicTextAndNumbers,q as applySmartQuotes,z as cleanExtremeArabicUnderscores,K as cleanJunkFromText,H as cleanLiteralNewLines,g as cleanMultilines,G as cleanSpacesBeforePeriod,ze as cleanSymbolsAndPartReferences,Ee as cleanTrailingPageNumbers,Y as condenseAsterisks,X as condenseColons,ee as condenseDashes,re as condenseEllipsis,se as condensePeriods,ue as condenseUnderscores,E as convertUrduSymbolsToArabic,oe as doubleToSingleBrackets,ce as ensureSpaceBeforeBrackets,ae as ensureSpaceBeforeQuotes,qe as extractInitials,ie as fixBracketTypos,pe as fixCurlyBraces,le as fixMismatchedQuotationMarks,M as fixTrailingWow,ge as formatStringBySentence,P as getArabicScore,V as hasWordInSingleLine,J as insertLineBreaksAfterPunctuation,De as isAllUppercase,Be as isBalanced,Se as isJsonStructureValid,D as isOnlyPunctuation,Re as makeDiacriticInsensitive,B as normalize,Q as normalizeAlifVariants,Ze as normalizeApostrophes,Qe as normalizeArabicPrefixesToAl,Oe as normalizeDoubleApostrophes,be as normalizeJsonSyntax,me as normalizeSlashInReferences,i as normalizeSpaces,k as normalizeTransliteratedEnglish,ye as parsePageRanges,te as reduceMultilineBreaksToDouble,ne as reduceMultilineBreaksToSingle,y as removeArabicPrefixes,Le as removeDeathYear,Ne as removeMarkdownFormatting,v as removeNonIndexSignatures,Te as removeNumbersAndDashes,de as removeRedundantPunctuation,ve as removeSingleDigitReferences,_ as removeSingularCodes,N as removeSolitaryArabicLetters,xe as removeSpaceInsideBrackets,w as removeTatwil,_e as removeUrls,U as replaceAlifMaqsurah,Ce as replaceDoubleBracketsWithArrows,j as replaceEnglishPunctuationWithArabic,Pe as replaceLineBreaksWithSpaces,Je as replaceSalutationsWithSymbol,W as replaceTaMarbutahWithHa,Fe as splitByQuotes,Me as stripAllDigits,m as stripBoldStyling,I as stripDiacritics,T as stripEnglishCharactersAndSymbols,d as stripItalicsStyling,he as stripStyling,R as stripZeroWidthCharacters,fe as toTitleCase,$e as trimSpaceInsideQuotes,we as truncate,We as truncateMiddle,Ie as unescapeSpaces};
6
+ `),re=e=>e.replace(/(\n\s*){2,}/g,`
7
+ `),te=e=>e.replace(/\. +\./g,"."),ne=e=>e.replace(/ـ{2,}/g,"\u0640").replace(/_+/g,"_"),se=e=>e.replace(/(\(|\)){2,}|(\[|\]){2,}/g,"$1$2"),ae=e=>e.replace(/(\S) *(\([^)]*\))/g,"$1 $2"),oe=e=>e.replace(/(\S) *(«[^»]*»)/g,"$1 $2"),ue=e=>e.replace(/\(«|\( \(/g,"\xAB").replace(/»\)|\) \)/g,"\xBB").replace(/\)([0-9\u0660-\u0669]+)\)/g,"($1)").replace(/\)([0-9\u0660-\u0669]+)\(/g,"($1)"),ce=e=>{let r=e;return r=r.replace(/\(([^(){}]+)\}/g,"{$1}"),r.replace(/\{([^(){}]+)\)/g,"{$1}")},ie=e=>e.replace(/«([^»)]+)\)/g,"\xAB$1\xBB").replace(/\(([^()]+)»/g,"\xAB$1\xBB").replace(/«([^»]+)(?=\s*$|$)/g,"\xAB$1\xBB"),le=e=>{let r=/^\((?:\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\)\s/,t=[],s=e.split(`
8
+ `),n="";return s.forEach(u=>{let a=u.trim(),i=r.test(a),l=/^\(\d+\/\d+\)/.test(a);if(i&&!l)n&&(t.push(n.trim()),n=""),t.push(a);else{n+=`${a} `;let p=n.trim().slice(-1);/[.!؟]/.test(p)&&(t.push(n.trim()),n="")}}),n&&t.push(n.trim()),t.join(`
9
+ `)},pe=e=>{let r=e.replace(/[^\p{L}]/gu,"");return r.length===0?!1:r===r.toUpperCase()},ge=e=>e.replace(/(\d+)\s?\/\s?(\d+)/g,"$1/$2"),g=e=>e.replace(/[ \t]+/g," "),De=e=>e.replace(/([؟!])[.،]/g,"$1"),me=e=>e.replace(/([[(])\s*(.*?)\s*([\])])/g,"$1$2$3"),xe=e=>e.replace(/\(\(\s?/g,"\xAB").replace(/\s?\)\)/g,"\xBB"),x=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").trim(),d=e=>{let r={"\u{1D44E}":"I","\u{1D468}":"g","\u{1D63C}":"!","\u{1D44F}":"J","\u{1D469}":"h","\u{1D63D}":"?","\u{1D450}":"K","\u{1D46A}":"i","\u{1D451}":"L","\u{1D46B}":"j","\u{1D63F}":",","\u{1D452}":"M","\u{1D46C}":"k","\u{1D640}":".","\u{1D453}":"N","\u{1D46D}":"l","\u{1D454}":"O","\u{1D46E}":"m","\u{1D46F}":"n","\u{1D456}":"Q","\u{1D470}":"o","\u{1D457}":"R","\u{1D471}":"p","\u{1D458}":"S","\u{1D472}":"q","\u{1D459}":"T","\u{1D473}":"r","\u{1D647}":"-","\u{1D45A}":"U","\u{1D474}":"s","\u{1D45B}":"V","\u{1D475}":"t","\u{1D45C}":"W","\u{1D476}":"u","\u{1D45D}":"X","\u{1D477}":"v","\u{1D45E}":"Y","\u{1D478}":"w","\u{1D45F}":"Z","\u{1D479}":"x","\u{1D446}":"A","\u{1D47A}":"y","\u{1D447}":"B","\u{1D47B}":"z","\u{1D462}":"a","\u{1D448}":"C","\u{1D463}":"b","\u{1D449}":"D","\u{1D464}":"c","\u{1D44A}":"E","\u{1D465}":"d","\u{1D44B}":"F","\u{1D466}":"e","\u{1D44C}":"G","\u{1D467}":"f","\u{1D44D}":"H","\u{1D455}":"P"};return e.replace(/[\uD835\uDC62-\uD835\uDC7B\uD835\uDC46-\uD835\uDC5F\u{1D63C}-\u{1D647}]/gu,t=>r[t]||t)},de=e=>d(x(e)),Ce=e=>e.toLowerCase().split(" ").map(r=>{if(r.length===0)return r;let t=r.match(/\p{L}/u);if(!t||t.index===void 0)return r;let s=t.index;return r.slice(0,s)+r.charAt(s).toUpperCase()+r.slice(s+1)}).join(" "),fe=e=>e.replace(/([“”"]|«) *(.*?) *([“”"]|»)/g,"$1$2$3");var $e=e=>{let r=e.replace(/(\b\d+\b)(?=:)/g,'"$1"');return r=r.replace(/:\s*'([^']+)'/g,': "$1"'),r=r.replace(/:\s*"([^"]+)"/g,': "$1"'),JSON.stringify(JSON.parse(r))},be=e=>/^{(\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\3\s*,)*(?:\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\5\s*)}$/.test(e.trim()),Ae=e=>{let r=/(?:[^\s"]+|"(.*?)")+/g;return(e.match(r)||[]).map(t=>t.startsWith('"')?t.slice(1,-1):t)},C=e=>{let r=0;for(let t of e)t==='"'&&r++;return r%2===0},f={"(":")","[":"]","{":"}"},h=new Set(["(","[","{"]),$=new Set([")","]","}"]),b=e=>{let r=[];for(let t of e)if(h.has(t))r.push(t);else if($.has(t)){let s=r.pop();if(!s||f[s]!==t)return!1}return r.length===0},Se=e=>C(e)&&b(e),Fe=e=>{if(e.includes("-")){let[r,t]=e.split("-").map(Number);if(r>t)throw new Error("Start page cannot be greater than end page");return Array.from({length:t-r+1},(s,n)=>r+n)}else return e.split(",").map(Number)};var ye=e=>e.replace(/ *\(?:\d+(?:\/\d+){0,2}\)? *| *\[\d+(?:\/\d+)?\] *| *«\d+» *|\d+\/\d+(?:\/\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\]…ـ¬.\\/*()"]/g," "),Me=e=>e.replace(/-\[\d+\]-/g,""),Ee=e=>e.replace(/\s+/g," "),Pe=e=>e.replace(/[0-9]/g,""),ve=e=>e.replace(/\[(d)\.\s*\d{1,4}[hH]\]\s*|\((d)\.\s*\d{1,4}[hH]\)\s*/g,""),ze=e=>e.replace(/[\d-]/g,""),Le=e=>e.replace(/\(\d{1}\)|\[\d{1}\]|«\d»/g,""),Te=e=>e.replace(/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,""),Re=e=>e.replace(/\*\*([^*]+)\*\*/g,"$1").replace(/__([^_]+)__/g,"$1").replace(/\*([^*]+)\*/g,"$1").replace(/_([^_]+)_/g,"$1").replace(/~~([^~]+)~~/g,"$1").replace(/^\s*>\s?/gm,"").replace(/!\[[^\]]*]\([^)]*\)/g,"").replace(/\[([^\]]+)]\([^)]*\)/g,"$1").replace(/^#+\s*/gm,"").replace(/^\s*[-*+]\s+/gm,"").replace(/^\s*\d+\.\s+/gm,"").replace(/`/gm,""),we=(e,r=150)=>e.length>r?`${e.substring(0,r-1)}\u2026`:e,_e=(e,r=50,t)=>{if(e.length<=r)return e;let s=Math.max(3,Math.floor(r/3)),n=t??s,a=r-1-n;if(a<1)return`${e.substring(0,r-1)}\u2026`;let i=e.substring(0,a),l=e.substring(e.length-n);return`${i}\u2026${l}`},Ie=e=>e.replace(/\\ /g," ").trim(),A="[\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652]",S=[["\u0627","\u0622","\u0623","\u0625"],["\u0629","\u0647"],["\u0649","\u064A"]],F=e=>{for(let r of S)if(r.includes(e))return`[${r.map(t=>c(t)).join("")}]`;return c(e)},B=e=>e.normalize("NFC").replace(/[\u200C\u200D]/g,"").replace(/\s+/g," ").trim(),Ne=e=>{let r=`${A}*`,t=B(e);return Array.from(t).map(s=>F(s)+r).join("")};var Ue=e=>e.replace(/(\b|\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g,"$1al-").replace(/(\b|\W)(Ash-S|ash-S)/g,"$1al-S").replace(/al- (.+?)\b/g,"al-$1"),qe=e=>e.replace(/ʿʿ/g,"\u02BF").replace(/ʾʾ/g,"\u02BE"),je=e=>e.replace(/\(peace be upon him\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\)*/gi,"$1 \uFDFA").replace(/,\s*ﷺ\s*,/g," \uFDFA"),k=e=>e.normalize("NFKD").replace(/[\u0300-\u036f]/g,"").replace(/`|ʾ|ʿ|-/g,""),y=e=>g(e.replace(/(\bal-|\bli-|\bbi-|\bfī|\bwa[-\s]+|\bl-|\bliʿl|\Bʿalá|\Bʿan|\bb\.)/gi,"")),M=e=>k(y(e)),Qe=e=>M(e).trim().split(/[ -]/).slice(0,2).map(t=>t.charAt(0).toUpperCase()).join("");export{q as addSpaceBeforeAndAfterPunctuation,T as addSpaceBetweenArabicTextAndNumbers,j as applySmartQuotes,E as arabicNumeralToNumber,P as cleanExtremeArabicUnderscores,Q as cleanLiteralNewLines,H as cleanMultilines,Z as cleanSpacesBeforePeriod,ye as cleanSymbolsAndPartReferences,Me as cleanTrailingPageNumbers,K as condenseAsterisks,V as condenseColons,G as condenseDashes,X as condenseEllipsis,te as condensePeriods,ne as condenseUnderscores,v as convertUrduSymbolsToArabic,se as doubleToSingleBrackets,ae as ensureSpaceBeforeBrackets,oe as ensureSpaceBeforeQuotes,c as escapeRegex,Qe as extractInitials,ue as fixBracketTypos,ce as fixCurlyBraces,ie as fixMismatchedQuotationMarks,L as fixTrailingWow,le as formatStringBySentence,z as getArabicScore,J as hasWordInSingleLine,U as insertLineBreaksAfterPunctuation,pe as isAllUppercase,Se as isBalanced,be as isJsonStructureValid,Y as isOnlyPunctuation,Ne as makeDiacriticInsensitive,W as makeDiacriticInsensitiveRegex,k as normalize,Ue as normalizeArabicPrefixesToAl,qe as normalizeDoubleApostrophes,$e as normalizeJsonSyntax,ge as normalizeSlashInReferences,g as normalizeSpaces,M as normalizeTransliteratedEnglish,Fe as parsePageRanges,ee as reduceMultilineBreaksToDouble,re as reduceMultilineBreaksToSingle,y as removeArabicPrefixes,ve as removeDeathYear,Re as removeMarkdownFormatting,R as removeNonIndexSignatures,ze as removeNumbersAndDashes,De as removeRedundantPunctuation,Le as removeSingleDigitReferences,w as removeSingularCodes,_ as removeSolitaryArabicLetters,me as removeSpaceInsideBrackets,Te as removeUrls,xe as replaceDoubleBracketsWithArrows,I as replaceEnglishPunctuationWithArabic,Ee as replaceLineBreaksWithSpaces,je as replaceSalutationsWithSymbol,Ae as splitByQuotes,Pe as stripAllDigits,x as stripBoldStyling,d as stripItalicsStyling,de as stripStyling,Ce as toTitleCase,fe as trimSpaceInsideQuotes,we as truncate,_e as truncateMiddle,Ie as unescapeSpaces};
12
10
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/arabic.ts","../src/formatting.ts","../src/parsing.ts","../src/sanitization.ts","../src/transliteration.ts"],"sourcesContent":["/**\n * Removes extreme Arabic underscores (ـ) that appear at the beginning or end of a line or in text.\n * Does not affect Hijri dates (e.g., 1424هـ) or specific Arabic terms.\n * Example: \"ـThis is a textـ\" will be changed to \"This is a text\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with extreme underscores removed.\n */\nexport const cleanExtremeArabicUnderscores = (text: string) => {\n return text.replace(/(?<!\\d ?ه|اه)ـ(?=\\r?$)|^ـ(?!اهـ)/gm, '');\n};\n\n/**\n * Converts Urdu symbols to their Arabic equivalents.\n * Example: 'ھذا' will be changed to 'هذا', 'ی' to 'ي'.\n * @param {string} text - The input text containing Urdu symbols.\n * @returns {string} - The modified text with Urdu symbols converted to Arabic symbols.\n */\nexport const convertUrduSymbolsToArabic = (text: string) => {\n return text.replace(/ھ/g, 'ه').replace(/ی/g, 'ي');\n};\n\n/**\n * Calculates the proportion of Arabic characters in text relative to total non-whitespace, non-digit characters.\n * Digits (ASCII and Arabic-Indic variants) are excluded from both numerator and denominator.\n * @param text - The input text to analyze\n * @returns A decimal between 0-1 representing the Arabic character ratio (0 = no Arabic, 1 = all Arabic)\n */\nexport const getArabicScore = (text: string) => {\n if (!text) {\n return 0;\n }\n // Arabic letters (letters/ranges only)\n const arabicLettersPattern = /[\\u0600-\\u06FF\\u0750-\\u077F\\u08A0-\\u08FF\\uFB50-\\uFDFF\\uFE70-\\uFEFF]/g;\n // ASCII digits + Arabic-Indic digits + Extended Arabic-Indic digits\n const allDigitPattern = /[0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n // Counted characters exclude whitespace and all listed digits\n const countedCharsPattern = /[^\\s0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n const cleaned = text.replace(allDigitPattern, '');\n const arabicMatches = cleaned.match(arabicLettersPattern) || [];\n const totalMatches = cleaned.match(countedCharsPattern) || [];\n return totalMatches.length === 0 ? 0 : arabicMatches.length / totalMatches.length;\n};\n\n/**\n * Fixes the trailing \"و\" (wow) in phrases such as \"عليكم و رحمة\" to \"عليكم ورحمة\".\n * This function attempts to correct phrases where \"و\" appears unnecessarily, particularly in greetings.\n * Example: 'السلام عليكم و رحمة' will be changed to 'السلام عليكم ورحمة'.\n * @param {string} text - The input text containing the \"و\" character.\n * @returns {string} - The modified text with unnecessary trailing \"و\" characters corrected.\n */\nexport const fixTrailingWow = (text: string) => {\n return text.replace(/ و /g, ' و');\n};\n\n/**\n * Inserts a space between Arabic text and numbers.\n * Example: 'الآية37' will be changed to 'الآية 37'.\n * @param {string} text - The input text containing Arabic text followed by numbers.\n * @returns {string} - The modified text with spaces inserted between Arabic text and numbers.\n */\nexport const addSpaceBetweenArabicTextAndNumbers = (text: string) => {\n return text.replace(/([\\u0600-\\u06FF]+)(\\d+)/g, '$1 $2');\n};\n\n/**\n * Removes English letters and symbols from the text, including ampersands, slashes, and other symbols.\n * Example: 'أحب & لنفسي' will be changed to 'أحب لنفسي'.\n * @param {string} text - The input text containing English letters and symbols.\n * @returns {string} - The modified text with English letters and symbols removed.\n */\nexport const stripEnglishCharactersAndSymbols = (text: string) => {\n return text.replace(/[a-zA-Z]+[0-9]*|[¬§`ﷺ=]|\\/{2,}|&/g, ' ');\n};\n\n/**\n * Removes single-digit numbers surrounded by Arabic text. Also removes dashes (-) not followed by a number.\n * For example, removes '3' from 'وهب 3 وقال' but does not remove '121' from 'لوحه 121 الجرح'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with non-index numbers and dashes removed.\n */\nexport const removeNonIndexSignatures = (text: string) => {\n return text\n .replace(/(?<![0-9] ?)-|(?<=[\\u0600-\\u06FF])\\s?\\d\\s?(?=[\\u0600-\\u06FF])/g, ' ')\n .replace(/(?<=[\\u0600-\\u06FF]\\s)(\\d+\\s)+\\d+(?=(\\s[\\u0600-\\u06FF]|$))/g, ' ');\n};\n\n/**\n * Removes characters enclosed in square brackets [] or parentheses () if they are Arabic letters or Arabic-Indic numerals.\n * Example: '[س]' or '(س)' will be removed.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with singular codes removed.\n */\nexport const removeSingularCodes = (text: string) => {\n return text.replace(/[[({][\\u0621-\\u064A\\u0660-\\u0669][\\])}]/g, '');\n};\n\n/**\n * Removes solitary Arabic letters unless they are the 'ha' letter, which is used in Hijri years.\n * Example: \"ب ا الكلمات ت\" will be changed to \"ا الكلمات\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with solitary Arabic letters removed.\n */\nexport const removeSolitaryArabicLetters = (text: string) => {\n return text.replace(/(^| )[\\u0621-\\u064A]( |$)/g, ' ');\n};\n\n/**\n * Removes tatweel characters while preserving dates references and numbered list items.\n * Example: \"1435/3/29 هـ\" remains as \"1435/3/29 هـ\" but \"أبـــتِـــكَةُ\" becomes \"أبتِكَةُ\"\n * @param text The text to format.\n * @returns The modified text with the tatweel characters removed.\n */\nexport const removeTatwil = (text: string) => {\n // Don't remove tatweel if:\n // 1. Immediately preceded by a number or ه (for dates like \"1435هـ\")\n // 2. Preceded by a number with optional spaces (for list items like \"3 ـ\")\n return text.replace(/(?<![0-9ه])(?<![0-9]\\s*)ـ/g, '');\n};\n\n/**\n * Replaces the 'tah marbutah' (ة) character with 'ha' (ه).\n * Example: 'مدرسة' will be changed to 'مدرسه'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with 'ta marbutah' replaced by 'ha'.\n */\nexport const replaceTaMarbutahWithHa = (text: string) => {\n return text.replace(/[ة]/g, 'ه');\n};\n\n/**\n * Removes Arabic diacritics (tashkeel) and the tatweel (elongation) character.\n * Example: 'مُحَمَّدٌ' will be changed to 'محمد'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with diacritics and tatweel removed.\n */\nexport const stripDiacritics = (text: string) => {\n return text.replace(\n /[\\u0610\\u0611\\u0612\\u0613\\u0614\\u0615\\u0616\\u0617\\u0618\\u0619\\u061A\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652\\u0653\\u0654\\u0655\\u0656\\u0657\\u0658\\u065A\\u065B\\u065C\\u065D\\u065E\\u0640]/g,\n '',\n );\n};\n\n/**\n * Removes zero-width joiners (ZWJ) and other zero-width characters from the input text.\n * Zero-width characters include U+200B to U+200F, U+202A to U+202E, U+2060 to U+2064, and U+FEFF.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with zero-width characters removed.\n */\nexport const stripZeroWidthCharacters = (text: string) => {\n return text.replace(/[\\u200B-\\u200F\\u202A-\\u202E\\u2060-\\u2064\\uFEFF]/g, ' ');\n};\n\n/**\n * Replaces the 'alif maqsurah' (ى) character with the regular 'ya' (ي).\n * Example: 'رؤيى' will be changed to 'رؤيي'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with 'alif maqsurah' replaced by 'ya'.\n */\nexport const replaceAlifMaqsurah = (text: string) => {\n return text.replace(/[ىي]/g, 'ي');\n};\n\n/**\n * Replaces English punctuation (question mark and semicolon) with their Arabic equivalents.\n * Example: '?' will be replaced with '؟', and ';' with '؛'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.\n */\nexport const replaceEnglishPunctuationWithArabic = (text: string) => {\n return text\n .replace(/\\?|؟\\./g, '؟')\n .replace(/(;|؛)\\s*(\\1\\s*)*/g, '؛')\n .replace(/,|-،/g, '،');\n};\n\n/**\n * Simplifies all forms of 'alif' (أ, إ, and آ) to the basic 'ا'.\n * Example: 'أنا إلى الآفاق' will be changed to 'انا الى الافاق'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with simplified 'alif' characters.\n */\nexport const normalizeAlifVariants = (text: string) => {\n return text.replace(/[أإآ]/g, 'ا');\n};\n","/**\n * Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.\n * Example: 'Text.' becomes 'Text.\\n'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with line breaks added after punctuation.\n */\nexport const insertLineBreaksAfterPunctuation = (text: string) => {\n // Define the punctuation marks that should trigger a new line\n const punctuation = /([.?!؟])/g;\n\n // Replace occurrences of punctuation marks followed by a space with the punctuation mark, a newline, and the space\n const formattedText = text.replace(punctuation, '$1\\n').replace(/\\n\\s+/g, '\\n').trim();\n\n return formattedText;\n};\n\n/**\n * Adds spaces before and after punctuation, except for certain cases like quoted text or ayah references.\n * Example: 'Text,word' becomes 'Text, word'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with spaces added before and after punctuation.\n */\nexport const addSpaceBeforeAndAfterPunctuation = (text: string) => {\n return text\n .replace(/( ?)([.!?,،؟;؛])((?![ '”“)\"\\]\\n])|(?=\\s{2,}))/g, '$1$2 ')\n .replace(/\\s([.!?,،؟;؛])\\s*([ '”“)\"\\]\\n])/g, '$1$2')\n .replace(/([^\\s\\w\\d'”“)\"\\]]+)\\s+([.!?,،؟;؛])|([.!?,،؟;؛])\\s+$/g, '$1$2$3')\n .replace(/(?<=\\D)( ?: ?)(?!(\\d+:)|(:\\d+))|(?<=\\d) ?: ?(?=\\D)|(?<=\\D) ?: ?(?=\\d)/g, ': ');\n};\n\n/**\n * Turns regular double quotes surrounding a body of text into smart quotes.\n * Also fixes incorrect starting quotes by ensuring the string starts with an opening quote if needed.\n * Example: 'The \"quick brown\" fox' becomes 'The “quick brown” fox'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with smart quotes applied.\n */\nexport const applySmartQuotes = (text: string) => {\n return text\n .replace(/[“”]/g, '\"')\n .replace(/\"([^\"]*)\"/g, '“$1”')\n .replace(/^”/g, '“');\n};\n\n/**\n * Replaces literal new line characters (\\n) and carriage returns (\\r) with actual line breaks.\n * Example: 'A\\\\nB' becomes 'A\\nB'.\n * @param {string} text - The input text containing literal new lines.\n * @returns {string} - The modified text with actual line breaks.\n */\nexport const cleanLiteralNewLines = (text: string) => {\n return text.replace(/\\\\n|\\r/g, '\\n');\n};\n\n/**\n * Removes trailing spaces from each line in a multiline string.\n * Example: \" This is a line \\nAnother line \" becomes \"This is a line\\nAnother line\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with trailing spaces removed.\n */\nexport const cleanMultilines = (text: string) => {\n return text.replace(/^ +| +$/gm, '');\n};\n\n/**\n * Detects if a word is by itself in a line.\n * @param text The text to check.\n * @returns true if there exists a word in any of the lines in the text that is by itself.\n */\nexport const hasWordInSingleLine = (text: string): boolean => {\n return /^\\s*\\S+\\s*$/gm.test(text);\n};\n\n/**\n * Checks if the input string consists of only punctuation characters.\n * @param {string} text - The input text to check.\n * @returns {boolean} - Returns true if the string contains only punctuation, false otherwise.\n */\nexport const isOnlyPunctuation = (text: string): boolean => {\n const regex = /^[\\u0020-\\u002f\\u003a-\\u0040\\u005b-\\u0060\\u007b-\\u007e0-9٠-٩]+$/;\n return regex.test(text);\n};\n\nexport const cleanJunkFromText = (text: string) => {\n const newBody = cleanMultilines(text);\n const lines = newBody.split('\\n').filter((line) => {\n return !line || (line.length > 1 && !isOnlyPunctuation(line));\n });\n\n return lines.join('\\n').trim();\n};\n\n/**\n * Cleans unnecessary spaces before punctuation marks such as periods, commas, and question marks.\n * Example: 'This is a sentence , with extra space .' becomes 'This is a sentence, with extra space.'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with cleaned spaces before punctuation.\n */\nexport const cleanSpacesBeforePeriod = (text: string) => {\n return text.replace(/\\s+([.؟!,،؛:?])/g, '$1');\n};\n\n/**\n * Condenses multiple asterisks (*) into a single one.\n * Example: '***' becomes '*'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed asterisks.\n */\nexport const condenseAsterisks = (text: string) => {\n return text.replace(/(\\*\\s*)+/g, '*');\n};\n\n/**\n * Replaces occurrences of colons surrounded by periods (e.g., '.:.' or ':') with a single colon.\n * Example: 'This.:. is a test' becomes 'This: is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed colons.\n */\nexport const condenseColons = (text: string) => {\n return text.replace(/[.-]?:[.-]?/g, ':');\n};\n\n/**\n * Condenses two or more dashes (--) into a single dash (-).\n * Example: 'This is some ---- text' becomes 'This is some - text'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed dashes.\n */\nexport const condenseDashes = (text: string) => {\n return text.replace(/-{2,}/g, '-');\n};\n\n/**\n * Replaces sequences of two or more periods (e.g., '...') with an ellipsis character (…).\n * Example: 'This is a test...' becomes 'This is a test…'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with ellipses condensed.\n */\nexport const condenseEllipsis = (text: string) => {\n return text.replace(/\\.{2,}/g, '…');\n};\n\n/**\n * Reduces multiple consecutive line breaks (3 or more) to exactly 2 line breaks.\n * Example: 'This is line 1\\n\\n\\n\\nThis is line 2' becomes 'This is line 1\\n\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToDouble = (text: string) => {\n return text.replace(/(\\n\\s*){3,}/g, '\\n\\n');\n};\n\n/**\n * Reduces multiple consecutive line breaks (2 or more) to exactly 1 line break.\n * Example: 'This is line 1\\n\\nThis is line 2' becomes 'This is line 1\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToSingle = (text: string) => {\n return text.replace(/(\\n\\s*){2,}/g, '\\n');\n};\n\n/**\n * Condenses multiple periods separated by spaces (e.g., '. . .') into a single period.\n * Example: 'This . . . is a test' becomes 'This. is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed periods.\n */\nexport const condensePeriods = (text: string) => {\n return text.replace(/\\. +\\./g, '.');\n};\n\n/**\n * Condenses multiple underscores (__) or Arabic Tatweel characters (ـــــ) into a single underscore or Tatweel.\n * Example: 'This is ـــ some text __' becomes 'This is ـ some text _'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed underscores.\n */\nexport const condenseUnderscores = (text: string) => {\n return text.replace(/ـ{2,}/g, 'ـ').replace(/_+/g, '_');\n};\n\n/**\n * Replaces double parentheses or brackets with single ones.\n * Example: '((text))' becomes '(text)'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const doubleToSingleBrackets = (text: string) => {\n return text.replace(/(\\(|\\)){2,}|(\\[|\\]){2,}/g, '$1$2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before brackets.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before brackets\n */\nexport const ensureSpaceBeforeBrackets = (text: string) => {\n return text.replace(/(\\S) *(\\([^)]*\\))/g, '$1 $2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before Arabic quotation marks.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before Arabic quotes\n */\nexport const ensureSpaceBeforeQuotes = (text: string) => {\n return text.replace(/(\\S) *(«[^»]*»)/g, '$1 $2');\n};\n\n/**\n * Fixes common bracket and quotation mark typos in text\n * Corrects malformed patterns like \"(«\", \"»)\", and misplaced digits in brackets\n * @param text - Input text that may contain bracket typos\n * @returns Text with corrected bracket and quotation mark combinations\n */\nexport const fixBracketTypos = (text: string) => {\n return (\n text\n .replace(/\\(«|\\( \\(/g, '«')\n .replace(/»\\)|\\) \\)/g, '»')\n // Fix \")digit)\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\)/g, '($1)')\n // Fix \")digit(\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\(/g, '($1)')\n );\n};\n\n/**\n * Fixes mismatched curly braces by converting incorrect bracket/brace combinations\n * to proper curly braces { }\n * @param text - Input text that may contain mismatched curly braces\n * @returns Text with corrected curly brace pairs\n */\nexport const fixCurlyBraces = (text: string) => {\n // Process each mismatch type separately to avoid interference\n let result = text;\n\n // Fix ( content } to { content }\n result = result.replace(/\\(([^(){}]+)\\}/g, '{$1}');\n\n // Fix { content ) to { content }\n return result.replace(/\\{([^(){}]+)\\)/g, '{$1}');\n};\n\n/**\n * Fixes mismatched quotation marks in Arabic text by converting various\n * incorrect bracket/quote combinations to proper Arabic quotation marks (« »)\n * @param text - Input text that may contain mismatched quotation marks\n * @returns Text with corrected Arabic quotation marks\n */\nexport const fixMismatchedQuotationMarks = (text: string) => {\n return (\n text\n // Matches mismatched quotation marks: « followed by content and closed with )\n .replace(/«([^»)]+)\\)/g, '«$1»')\n // Fix reverse mismatched ( content » to « content »\n .replace(/\\(([^()]+)»/g, '«$1»')\n // Matches any unclosed « quotation marks at end of content\n .replace(/«([^»]+)(?=\\s*$|$)/g, '«$1»')\n );\n};\n\n/**\n * Formats a multiline string by joining sentences and maintaining footnotes on their own lines.\n * Footnotes are identified by Arabic and English numerals.\n * Example: 'Sentence one.\\n(1) A footnote.\\nSentence two.' remains the same, while regular sentences are joined.\n * @param {string} input - The input text containing sentences and footnotes.\n * @returns {string} - The formatted text.\n */\nexport const formatStringBySentence = (input: string) => {\n const footnoteRegex = /^\\((?:\\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\\)\\s/;\n const sentences: string[] = [];\n const lines = input.split('\\n');\n let currentSentence = '';\n\n lines.forEach((line) => {\n const trimmedLine = line.trim();\n const isFootnote = footnoteRegex.test(trimmedLine);\n const isNumber = /^\\(\\d+\\/\\d+\\)/.test(trimmedLine);\n\n if (isFootnote && !isNumber) {\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n sentences.push(trimmedLine);\n } else {\n currentSentence += `${trimmedLine} `;\n const lastChar = currentSentence.trim().slice(-1);\n if (/[.!؟]/.test(lastChar)) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n }\n });\n\n // Add any remaining text to the output\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n }\n\n return sentences.join('\\n');\n};\n\n/**\n * Detects if text is entirely in uppercase letters\n * @param text - The text to check\n * @returns true if all alphabetic characters are uppercase, false otherwise\n */\nexport const isAllUppercase = (text: string) => {\n // Remove non-letter characters (including numbers, punctuation, spaces)\n // \\p{L} matches any Unicode letter character\n const lettersOnly = text.replace(/[^\\p{L}]/gu, '');\n\n // If there are no letter characters, return false\n if (lettersOnly.length === 0) {\n return false;\n }\n\n return lettersOnly === lettersOnly.toUpperCase();\n};\n\n/**\n * Removes unnecessary spaces around slashes in references.\n * Example: '127 / 11' becomes '127/11'.\n * @param {string} text - The input text containing references.\n * @returns {string} - The modified text with spaces removed around slashes.\n */\nexport const normalizeSlashInReferences = (text: string) => {\n return text.replace(/(\\d+)\\s?\\/\\s?(\\d+)/g, '$1/$2');\n};\n\n/**\n * Reduces multiple spaces or tabs to a single space.\n * Example: 'This is a text' becomes 'This is a text'.\n * @param {string} text - The input text containing extra spaces.\n * @returns {string} - The modified text with reduced spaces.\n */\nexport const normalizeSpaces = (text: string) => {\n return text.replace(/[ \\t]+/g, ' ');\n};\n\n/**\n * Removes redundant punctuation marks that follow Arabic question marks or exclamation marks.\n * This function cleans up text by removing periods (.) or Arabic commas (،) that immediately\n * follow Arabic question marks (؟) or exclamation marks (!), as they are considered redundant\n * in proper Arabic punctuation.\n *\n * @param text - The Arabic text to clean up\n * @returns The text with redundant punctuation removed\n *\n * @example\n * ```typescript\n * removeRedundantPunctuation('كيف حالك؟.') // Returns: 'كيف حالك؟'\n * removeRedundantPunctuation('ممتاز!،') // Returns: 'ممتاز!'\n * removeRedundantPunctuation('هذا جيد.') // Returns: 'هذا جيد.' (unchanged)\n * ```\n */\nexport const removeRedundantPunctuation = (text: string) => {\n return text.replace(/([؟!])[.،]/g, '$1');\n};\n\n/**\n * Removes spaces inside brackets, parentheses, or square brackets.\n * Example: '( a b )' becomes '(a b)'.\n * @param {string} text - The input text with spaces inside brackets.\n * @returns {string} - The modified text with spaces removed inside brackets.\n */\nexport const removeSpaceInsideBrackets = (text: string) => {\n return text.replace(/([[(])\\s*(.*?)\\s*([\\])])/g, '$1$2$3');\n};\n\n/**\n * Replaces double parentheses single a single arrow variation.\n * Example: '((text))' becomes '«text»'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const replaceDoubleBracketsWithArrows = (text: string) => {\n return text.replace(/\\(\\(\\s?/g, '«').replace(/\\s?\\)\\)/g, '»');\n};\n\n/**\n * Removes bold styling from text by normalizing the string and removing stylistic characters.\n * @param {string} text - The input text containing bold characters.\n * @returns {string} - The modified text with bold styling removed.\n */\nexport const stripBoldStyling = (text: string) => {\n // Normalize the string to NFKD form\n const normalizedString = text.normalize('NFKD');\n\n // Remove combining marks (diacritics) and stylistic characters from the string\n return normalizedString.replace(/[\\u0300-\\u036f]/g, '').trim();\n};\n\n/**\n * Removes italicized characters by replacing italic Unicode characters with their normal counterparts.\n * Example: '𝘼𝘽𝘾' becomes 'ABC'.\n * @param {string} text - The input text containing italicized characters.\n * @returns {string} - The modified text with italics removed.\n */\nexport const stripItalicsStyling = (text: string) => {\n const italicMap: Record<string, string> = {\n '\\uD835\\uDC4E': 'I',\n '\\uD835\\uDC68': 'g',\n '\\u{1D63C}': '!',\n '\\uD835\\uDC4F': 'J',\n '\\uD835\\uDC69': 'h',\n '\\u{1D63D}': '?',\n '\\uD835\\uDC50': 'K',\n '\\uD835\\uDC6A': 'i',\n '\\uD835\\uDC51': 'L',\n '\\uD835\\uDC6B': 'j',\n '\\u{1D63F}': ',',\n '\\uD835\\uDC52': 'M',\n '\\uD835\\uDC6C': 'k',\n '\\u{1D640}': '.',\n '\\uD835\\uDC53': 'N',\n '\\uD835\\uDC6D': 'l',\n '\\uD835\\uDC54': 'O',\n '\\uD835\\uDC6E': 'm',\n '\\uD835\\uDC6F': 'n',\n '\\uD835\\uDC56': 'Q',\n '\\uD835\\uDC70': 'o',\n '\\uD835\\uDC57': 'R',\n '\\uD835\\uDC71': 'p',\n '\\uD835\\uDC58': 'S',\n '\\uD835\\uDC72': 'q',\n '\\uD835\\uDC59': 'T',\n '\\uD835\\uDC73': 'r',\n '\\u{1D647}': '-',\n '\\uD835\\uDC5A': 'U',\n '\\uD835\\uDC74': 's',\n '\\uD835\\uDC5B': 'V',\n '\\uD835\\uDC75': 't',\n '\\uD835\\uDC5C': 'W',\n '\\uD835\\uDC76': 'u',\n '\\uD835\\uDC5D': 'X',\n '\\uD835\\uDC77': 'v',\n '\\uD835\\uDC5E': 'Y',\n '\\uD835\\uDC78': 'w',\n '\\uD835\\uDC5F': 'Z',\n '\\uD835\\uDC79': 'x',\n '\\uD835\\uDC46': 'A',\n '\\uD835\\uDC7A': 'y',\n '\\uD835\\uDC47': 'B',\n '\\uD835\\uDC7B': 'z',\n '\\uD835\\uDC62': 'a',\n '\\uD835\\uDC48': 'C',\n '\\uD835\\uDC63': 'b',\n '\\uD835\\uDC49': 'D',\n '\\uD835\\uDC64': 'c',\n '\\uD835\\uDC4A': 'E',\n '\\uD835\\uDC65': 'd',\n '\\uD835\\uDC4B': 'F',\n '\\uD835\\uDC66': 'e',\n '\\uD835\\uDC4C': 'G',\n '\\uD835\\uDC67': 'f',\n '\\uD835\\uDC4D': 'H',\n '\\uD835\\uDC55': 'P',\n };\n\n return text.replace(/[\\uD835\\uDC62-\\uD835\\uDC7B\\uD835\\uDC46-\\uD835\\uDC5F\\u{1D63C}-\\u{1D647}]/gu, (match) => {\n return italicMap[match] || match;\n });\n};\n\n/**\n * Removes all bold and italic styling from the input text.\n * @param {string} text - The input text to remove styling from.\n * @returns {string} - The modified text with all styling removed.\n */\nexport const stripStyling = (text: string) => {\n return stripItalicsStyling(stripBoldStyling(text));\n};\n\n/**\n * Converts a string to title case (first letter of each word capitalized)\n * @param str - The input string to convert\n * @returns String with each word's first letter capitalized\n */\nexport const toTitleCase = (str: string) => {\n return str\n .toLowerCase()\n .split(' ')\n .map((word) => {\n if (word.length === 0) return word;\n // Find the first Unicode letter in the chunk\n const match = word.match(/\\p{L}/u);\n if (!match || match.index === undefined) return word;\n const i = match.index;\n return word.slice(0, i) + word.charAt(i).toUpperCase() + word.slice(i + 1);\n })\n .join(' ');\n};\n\n/**\n * Removes unnecessary spaces inside quotes.\n * Example: '“ Text ”' becomes '“Text”'.\n * @param {string} text - The input text with spaces inside quotes.\n * @returns {string} - The modified text with spaces removed inside quotes.\n */\nexport const trimSpaceInsideQuotes = (text: string) => {\n return text.replace(/([“”\"]|«) *(.*?) *([“”\"]|»)/g, '$1$2$3');\n};\n","/**\n * Converts a string that resembles JSON but with numeric keys and single-quoted values\n * into valid JSON format. This function replaces numeric keys with quoted numeric keys\n * and ensures all values are double-quoted as required by JSON.\n *\n * @param {string} str - The input string that needs to be fixed into valid JSON.\n * @returns {string} - A valid JSON string.\n *\n * @example\n * const result = normalizeJsonSyntax(\"{10: 'abc', 20: 'def'}\");\n * console.log(result); // '{\"10\": \"abc\", \"20\": \"def\"}'\n */\nexport const normalizeJsonSyntax = (str: string) => {\n let input = str.replace(/(\\b\\d+\\b)(?=:)/g, '\"$1\"');\n input = input.replace(/:\\s*'([^']+)'/g, ': \"$1\"');\n input = input.replace(/:\\s*\"([^\"]+)\"/g, ': \"$1\"');\n\n return JSON.stringify(JSON.parse(input));\n};\n\n/**\n * Checks if a given string resembles a JSON object with numeric or quoted keys and values\n * that are single or double quoted. This is useful for detecting malformed JSON-like\n * structures that can be fixed by the `normalizeJsonSyntax` function.\n *\n * @param {string} str - The input string to check.\n * @returns {boolean} - Returns true if the string is JSON-like, false otherwise.\n *\n * @example\n * const result = isJsonStructureValid(\"{10: 'abc', 'key': 'value'}\");\n * console.log(result); // true\n */\nexport const isJsonStructureValid = (str: string) => {\n // Checks for a pattern with numeric keys or quoted keys and values in quotes\n const jsonLikePattern =\n /^{(\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\3\\s*,)*(?:\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\5\\s*)}$/;\n return jsonLikePattern.test(str.trim());\n};\n\n/**\n * Splits a string by spaces and quoted substrings.\n *\n * This function takes an input string and splits it into parts where substrings\n * enclosed in double quotes are treated as a single part. Other substrings\n * separated by spaces are split normally.\n *\n * @param {string} query - The input string to be split.\n * @returns {string[]} An array of strings, with quoted substrings kept intact.\n *\n * @example\n * const result = splitByQuotes('\"This is\" \"a part of the\" \"string and\"');\n * console.log(result); // [\"This is\", \"a part of the\", \"string and\"]\n */\nexport const splitByQuotes = (query: string): string[] => {\n const regex = /(?:[^\\s\"]+|\"(.*?)\")+/g;\n return (query.match(regex) || []).map((s: string) => (s.startsWith('\"') ? s.slice(1, -1) : s));\n};\n\n/**\n * Checks if all double quotes in a string are balanced (even count).\n * A string has balanced quotes if every opening quote has a corresponding closing quote.\n *\n * @param str - The string to check for balanced quotes\n * @returns True if quotes are balanced (even count), false otherwise\n *\n * @example\n * ```typescript\n * areQuotesBalanced('Hello \"world\"') // Returns: true\n * areQuotesBalanced('Hello \"world') // Returns: false\n * areQuotesBalanced('No quotes') // Returns: true\n * ```\n */\nconst areQuotesBalanced = (str: string) => {\n let quoteCount = 0;\n for (const char of str) {\n if (char === '\"') {\n quoteCount++;\n }\n }\n return quoteCount % 2 === 0;\n};\n\nconst brackets = { '(': ')', '[': ']', '{': '}' };\nconst openBrackets = new Set(['(', '[', '{']);\nconst closeBrackets = new Set([')', ']', '}']);\n\n/**\n * Checks if all brackets in a string are properly balanced and matched.\n * This function validates that every opening bracket has a corresponding closing bracket\n * in the correct order and of the matching type.\n *\n * Supported bracket types: parentheses (), square brackets [], curly braces {}\n *\n * @param str - The string to check for balanced brackets\n * @returns True if all brackets are properly balanced and matched, false otherwise\n *\n * @example\n * ```typescript\n * areBracketsBalanced('(hello [world])') // Returns: true\n * areBracketsBalanced('(hello [world)') // Returns: false (mismatched)\n * areBracketsBalanced('((hello))') // Returns: true\n * areBracketsBalanced('(hello') // Returns: false (unclosed)\n * ```\n */\n\nconst areBracketsBalanced = (str: string) => {\n const stack: string[] = [];\n\n for (const char of str) {\n if (openBrackets.has(char)) {\n stack.push(char);\n } else if (closeBrackets.has(char)) {\n const lastOpen = stack.pop();\n if (!lastOpen || brackets[lastOpen as keyof typeof brackets] !== char) {\n return false;\n }\n }\n }\n\n return stack.length === 0;\n};\n\n/**\n * Checks if both quotes and brackets are balanced in a string.\n * This function combines quote balance checking and bracket balance checking\n * to ensure the entire string has properly balanced punctuation.\n *\n * A string is considered balanced when:\n * - All double quotes have matching pairs (even count)\n * - All brackets (parentheses, square brackets, curly braces) are properly matched and nested\n *\n * @param str - The string to check for balanced quotes and brackets\n * @returns True if both quotes and brackets are balanced, false otherwise\n *\n * @example\n * ```typescript\n * isBalanced('He said \"Hello (world)!\"') // Returns: true\n * isBalanced('He said \"Hello (world!\"') // Returns: false (unbalanced quote)\n * isBalanced('He said \"Hello (world)\"') // Returns: false (unbalanced quote)\n * isBalanced('Hello (world) [test]') // Returns: true\n * ```\n */\nexport const isBalanced = (str: string) => {\n return areQuotesBalanced(str) && areBracketsBalanced(str);\n};\n\n/**\n * Parses page input string into array of page numbers, supporting ranges and lists\n * @param pageInput - Page specification string (e.g., \"1-5\" or \"1,3,5\")\n * @returns Array of page numbers\n * @throws Error when start page exceeds end page in range\n */\nexport const parsePageRanges = (pageInput: string): number[] => {\n if (pageInput.includes('-')) {\n const [start, end] = pageInput.split('-').map(Number);\n\n if (start > end) {\n throw new Error('Start page cannot be greater than end page');\n }\n\n return Array.from({ length: end - start + 1 }, (_, i) => start + i);\n } else {\n return pageInput.split(',').map(Number);\n }\n};\n","/**\n * Removes various symbols, part references, and numerical markers from the text.\n * Example: '(1) (2/3)' becomes ''.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with symbols and part references removed.\n */\nexport const cleanSymbolsAndPartReferences = (text: string) => {\n return text.replace(\n / *\\(?:\\d+(?:\\/\\d+){0,2}\\)? *| *\\[\\d+(?:\\/\\d+)?\\] *| *«\\d+» *|\\d+\\/\\d+(?:\\/\\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\\]…ـ¬.\\\\/*()\"]/g,\n ' ',\n );\n};\n\n/**\n * Removes trailing page numbers formatted as '-[46]-' from the text.\n * Example: 'This is some -[46]- text' becomes 'This is some text'.\n * @param {string} text - The input text with trailing page numbers.\n * @returns {string} - The modified text with page numbers removed.\n */\nexport const cleanTrailingPageNumbers = (text: string) => {\n return text.replace(/-\\[\\d+\\]-/g, '');\n};\n\n/**\n * Replaces consecutive line breaks and whitespace characters with a single space.\n * Example: 'a\\nb' becomes 'a b'.\n * @param {string} text - The input text containing line breaks or multiple spaces.\n * @returns {string} - The modified text with spaces.\n */\nexport const replaceLineBreaksWithSpaces = (text: string) => {\n return text.replace(/\\s+/g, ' ');\n};\n\n/**\n * Removes all numeric digits from the text.\n * Example: 'abc123' becomes 'abc'.\n * @param {string} text - The input text containing digits.\n * @returns {string} - The modified text with digits removed.\n */\nexport const stripAllDigits = (text: string) => {\n return text.replace(/[0-9]/g, '');\n};\n\n/**\n * Removes death year references like \"(d. 390H)\" and \"[d. 100h]\" from the text.\n * Example: 'Sufyān ibn ‘Uyaynah (d. 198h)' becomes 'Sufyān ibn ‘Uyaynah'.\n * @param {string} text - The input text containing death year references.\n * @returns {string} - The modified text with death years removed.\n */\nexport const removeDeathYear = (text: string) => {\n return text.replace(/\\[(d)\\.\\s*\\d{1,4}[hH]\\]\\s*|\\((d)\\.\\s*\\d{1,4}[hH]\\)\\s*/g, '');\n};\n\n/**\n * Removes numeric digits and dashes from the text.\n * Example: 'ABC 123-Xyz' becomes 'ABC Xyz'.\n * @param {string} text - The input text containing digits and dashes.\n * @returns {string} - The modified text with numbers and dashes removed.\n */\nexport const removeNumbersAndDashes = (text: string) => {\n return text.replace(/[\\d-]/g, '');\n};\n\n/**\n * Removes single digit references like (1), «2», [3] from the text.\n * Example: 'Ref (1), Ref «2», Ref [3]' becomes 'Ref , Ref , Ref '.\n * @param {string} text - The input text containing single digit references.\n * @returns {string} - The modified text with single digit references removed.\n */\nexport const removeSingleDigitReferences = (text: string) => {\n return text.replace(/\\(\\d{1}\\)|\\[\\d{1}\\]|«\\d»/g, '');\n};\n\n/**\n * Removes URLs from the text.\n * Example: 'Visit https://example.com' becomes 'Visit '.\n * @param {string} text - The input text containing URLs.\n * @returns {string} - The modified text with URLs removed.\n */\nexport const removeUrls = (text: string) => {\n return text.replace(\n /https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,\n '',\n );\n};\n\n/**\n * Removes common Markdown formatting syntax from text\n * @param text - The input text containing Markdown formatting\n * @returns Text with Markdown formatting removed (bold, italics, headers, lists, backticks)\n */\nexport const removeMarkdownFormatting = (text: string) => {\n return (\n text\n // Remove bold first (**text**) - must come before italics\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n // Remove bold with underscores (__text__)\n .replace(/__([^_]+)__/g, '$1')\n // Remove italics (*text*)\n .replace(/\\*([^*]+)\\*/g, '$1')\n // Remove italics with underscores (_text_)\n .replace(/_([^_]+)_/g, '$1')\n // Remove strikethrough (~~text~~)\n .replace(/~~([^~]+)~~/g, '$1')\n // Remove blockquotes\n .replace(/^\\s*>\\s?/gm, '')\n // Remove images ![alt](url)\n .replace(/!\\[[^\\]]*]\\([^)]*\\)/g, '')\n // Convert links [text](url) -> text\n .replace(/\\[([^\\]]+)]\\([^)]*\\)/g, '$1')\n // Remove headers (# ## ### etc.)\n .replace(/^#+\\s*/gm, '')\n // Remove unordered list markers (- * +)\n .replace(/^\\s*[-*+]\\s+/gm, '')\n // Remove ordered list markers (1. 2. etc.)\n .replace(/^\\s*\\d+\\.\\s+/gm, '')\n // Remove backticks\n .replace(/`/gm, '')\n );\n};\n\n/**\n * Truncates a string to a specified length, adding an ellipsis if truncated.\n *\n * @param val - The string to truncate\n * @param n - Maximum length of the string (default: 150)\n * @returns The truncated string with ellipsis if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncate('The quick brown fox jumps over the lazy dog', 20);\n * // Output: 'The quick brown fox…'\n *\n * truncate('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncate = (val: string, n = 150): string => (val.length > n ? `${val.substring(0, n - 1)}…` : val);\n\n/**\n * Truncates a string from the middle, preserving both the beginning and end portions.\n *\n * @param text - The string to truncate\n * @param maxLength - Maximum length of the resulting string (default: 50)\n * @param endLength - Number of characters to preserve at the end (default: 1/3 of maxLength, minimum 3)\n * @returns The truncated string with ellipsis in the middle if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 20);\n * // Output: 'The quick bro…zy dog'\n *\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 25, 8);\n * // Output: 'The quick brown …lazy dog'\n *\n * truncateMiddle('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncateMiddle = (text: string, maxLength: number = 50, endLength?: number) => {\n if (text.length <= maxLength) {\n return text;\n }\n\n // Default end length is roughly 1/3 of max length, minimum 3 characters\n const defaultEndLength = Math.max(3, Math.floor(maxLength / 3));\n const actualEndLength = endLength ?? defaultEndLength;\n\n // Reserve space for the ellipsis character (1 char)\n const availableLength = maxLength - 1;\n\n // Calculate start length (remaining space after end portion)\n const startLength = availableLength - actualEndLength;\n\n // Ensure we have at least some characters at the start\n if (startLength < 1) {\n // If we can't fit both start and end, just truncate normally\n return `${text.substring(0, maxLength - 1)}…`;\n }\n\n const startPortion = text.substring(0, startLength);\n const endPortion = text.substring(text.length - actualEndLength);\n\n return `${startPortion}…${endPortion}`;\n};\n\n/**\n * Unescapes backslash-escaped spaces and trims whitespace from both ends.\n * Commonly used to clean file paths that have been escaped when pasted into terminals.\n *\n * @param input - The string to unescape and clean\n * @returns The cleaned string with escaped spaces converted to regular spaces and trimmed\n *\n * @example\n * ```javascript\n * unescapeSpaces('My\\\\ Folder\\\\ Name');\n * // Output: 'My Folder Name'\n *\n * unescapeSpaces(' /path/to/My\\\\ Document.txt ');\n * // Output: '/path/to/My Document.txt'\n *\n * unescapeSpaces('regular text');\n * // Output: 'regular text'\n * ```\n */\nexport const unescapeSpaces = (input: string) => input.replace(/\\\\ /g, ' ').trim();\n\n/**\n * Arabic diacritics (Tashkeel/Harakat).\n */\nconst DIACRITICS_CLASS = '[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652]';\n\n/**\n * Groups of equivalent Arabic characters — any character in a group should match\n * any other character in the same group.\n */\nconst EQUIV_GROUPS: string[][] = [\n ['\\u0627', '\\u0622', '\\u0623', '\\u0625'], // ا, آ, أ, إ\n ['\\u0629', '\\u0647'], // ة <-> ه\n ['\\u0649', '\\u064A'], // ى <-> ي\n];\n\n/** Escape regex special characters (if the search word contains punctuation). */\nconst escapeForRegex = (s: string) => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n\n/** Return a character class for a char if it belongs to an equivalence group. */\nconst getEquivClass = (ch: string): string => {\n for (const group of EQUIV_GROUPS) {\n if (group.includes(ch)) {\n // join the group's members into a character class\n return `[${group.map((c) => escapeForRegex(c)).join('')}]`;\n }\n }\n // not in equivalence groups -> return escaped character\n return escapeForRegex(ch);\n};\n\n/** Small safe normalization: NFC, remove ZWJ/ZWNJ, collapse spaces. */\nconst normalizeArabicLight = (str: string) => {\n return str\n .normalize('NFC')\n .replace(/[\\u200C\\u200D]/g, '') // remove ZWJ/ZWNJ\n .replace(/\\s+/g, ' ')\n .trim();\n};\n\n/**\n * Creates a diacritic-insensitive regex pattern for Arabic text matching.\n * Normalizes text, handles character equivalences (ا/آ/أ/إ, ة/ه, ى/ي),\n * and makes each character tolerant of Arabic diacritics (Tashkeel/Harakat)\n * @param text - Input Arabic text to make diacritic-insensitive\n * @returns Regex pattern string that matches the text with or without diacritics and character variants\n */\nexport const makeDiacriticInsensitive = (text: string) => {\n const diacriticsMatcher = `${DIACRITICS_CLASS}*`;\n const norm = normalizeArabicLight(text);\n // Use Array.from to iterate grapheme-safe over the string (works fine for Arabic letters)\n return Array.from(norm)\n .map((ch) => getEquivClass(ch) + diacriticsMatcher)\n .join('');\n};\n","import { normalizeSpaces } from './formatting';\n\n/**\n * Replaces common Arabic prefixes (like 'Al-', 'Ar-', 'Ash-', etc.) with 'al-' in the text.\n * Handles different variations of prefixes such as Ash- and Al- but not when the second word\n * does not start with 'S'.\n * Example: 'Ash-Shafiee' becomes 'al-Shafiee'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with standardized 'al-' prefixes.\n */\nexport const normalizeArabicPrefixesToAl = (text: string) => {\n return text\n .replace(/(\\b|\\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g, '$1al-')\n .replace(/(\\b|\\W)(Ash-S|ash-S)/g, '$1al-S')\n .replace(/al- (.+?)\\b/g, 'al-$1');\n};\n\n/**\n * Removes double occurrences of Arabic apostrophes such as ʿʿ or ʾʾ in the text.\n * Example: 'ʿulamāʾʾ' becomes 'ʿulamāʾ'.\n *\n * @param {string} text - The input text containing double apostrophes.\n * @returns {string} - The modified text with condensed apostrophes.\n */\nexport const normalizeDoubleApostrophes = (text: string) => {\n return text.replace(/ʿʿ/g, 'ʿ').replace(/ʾʾ/g, 'ʾ');\n};\n\n/**\n * Replaces common salutations such as \"sallahu alayhi wasallam\" with \"ﷺ\" in the text.\n * It also handles variations of the salutation phrase, including 'peace and blessings be upon him'.\n * Example: 'Then Muḥammad (sallahu alayhi wasallam)' becomes 'Then Muḥammad ﷺ'.\n *\n * @param {string} text - The input text containing salutations.\n * @returns {string} - The modified text with salutations replaced.\n */\nexport const replaceSalutationsWithSymbol = (text: string) => {\n return text\n .replace(\n /\\(peace be upon him\\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\\)*/gi,\n '$1 ﷺ',\n )\n .replace(/,\\s*ﷺ\\s*,/g, ' ﷺ');\n};\n\n/**\n * Normalizes the text by removing diacritics, apostrophes, and dashes.\n * Example: 'Al-Jadwal' becomes 'AlJadwal'.\n *\n * @param {string} input - The input text to normalize.\n * @returns {string} - The normalized text.\n */\nexport const normalize = (input: string) => {\n return input\n .normalize('NFKD')\n .replace(/[\\u0300-\\u036f]/g, '')\n .replace(/`|ʾ|ʿ|-/g, '');\n};\n\n/**\n * Replaces various apostrophe characters (‛, ’, ‘) with the standard apostrophe (').\n * Example: '‛ulama’ al-su‘' becomes ''ulama' al-su''.\n *\n * @param {string} text - The input text containing different apostrophe characters.\n * @returns {string} - The modified text with normalized apostrophes.\n */\nexport const normalizeApostrophes = (text: string) => {\n return text.replace(/‛|’|‘/g, \"'\");\n};\n\n/**\n * Strips common Arabic prefixes like 'al-', 'bi-', 'fī', 'wa-', etc. from the beginning of words.\n * Example: 'al-Bukhari' becomes 'Bukhari'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with prefixes stripped.\n */\nexport const removeArabicPrefixes = (text: string) => {\n return normalizeSpaces(text.replace(/(\\bal-|\\bli-|\\bbi-|\\bfī|\\bwa[-\\s]+|\\bl-|\\bliʿl|\\Bʿalá|\\Bʿan|\\bb\\.)/gi, ''));\n};\n\n/**\n * Simplifies English transliterations by removing diacritics, apostrophes, and common prefixes.\n * Example: 'Al-Jadwal' becomes 'Jadwal', and 'āḍġḥīṣṭū' becomes 'adghistu'.\n *\n * @param {string} text - The input text to simplify.\n * @returns {string} - The simplified text.\n */\nexport const normalizeTransliteratedEnglish = (text: string) => normalize(removeArabicPrefixes(text));\n\n/**\n * Extracts the initials from the input string, typically used for names or titles.\n * Example: 'Nayl al-Awtar' becomes 'NA'.\n *\n * @param {string} text - The input text to extract initials from.\n * @returns {string} - The extracted initials.\n */\nexport const extractInitials = (fullName: string) => {\n const initials = normalizeTransliteratedEnglish(fullName)\n .trim()\n .split(/[ -]/)\n .slice(0, 2)\n .map((word) => {\n return word.charAt(0).toUpperCase();\n })\n .join('');\n return initials;\n};\n"],"mappings":"AAOO,IAAMA,EAAiCC,GACnCA,EAAK,QAAQ,qCAAsC,EAAE,EASnDC,EAA8BD,GAChCA,EAAK,QAAQ,KAAM,QAAG,EAAE,QAAQ,KAAM,QAAG,EASvCE,EAAkBF,GAAiB,CAC5C,GAAI,CAACA,EACD,MAAO,GAGX,IAAMG,EAAuB,uEAEvBC,EAAkB,mCAElBC,EAAsB,sCACtBC,EAAUN,EAAK,QAAQI,EAAiB,EAAE,EAC1CG,EAAgBD,EAAQ,MAAMH,CAAoB,GAAK,CAAC,EACxDK,EAAeF,EAAQ,MAAMD,CAAmB,GAAK,CAAC,EAC5D,OAAOG,EAAa,SAAW,EAAI,EAAID,EAAc,OAASC,EAAa,MAC/E,EASaC,EAAkBT,GACpBA,EAAK,QAAQ,OAAQ,SAAI,EASvBU,EAAuCV,GACzCA,EAAK,QAAQ,2BAA4B,OAAO,EAS9CW,EAAoCX,GACtCA,EAAK,QAAQ,wCAAyC,GAAG,EASvDY,EAA4BZ,GAC9BA,EACF,QAAQ,iEAAkE,GAAG,EAC7E,QAAQ,8DAA+D,GAAG,EAStEa,EAAuBb,GACzBA,EAAK,QAAQ,2CAA4C,EAAE,EASzDc,EAA+Bd,GACjCA,EAAK,QAAQ,6BAA8B,GAAG,EAS5Ce,EAAgBf,GAIlBA,EAAK,QAAQ,6BAA8B,EAAE,EAS3CgB,EAA2BhB,GAC7BA,EAAK,QAAQ,OAAQ,QAAG,EAStBiB,EAAmBjB,GACrBA,EAAK,QACR,gMACA,EACJ,EASSkB,EAA4BlB,GAC9BA,EAAK,QAAQ,mDAAoD,GAAG,EASlEmB,EAAuBnB,GACzBA,EAAK,QAAQ,QAAS,QAAG,EASvBoB,EAAuCpB,GACzCA,EACF,QAAQ,UAAW,QAAG,EACtB,QAAQ,oBAAqB,QAAG,EAChC,QAAQ,QAAS,QAAG,EAShBqB,EAAyBrB,GAC3BA,EAAK,QAAQ,SAAU,QAAG,EChL9B,IAAMsB,EAAoCC,GAAiB,CAE9D,IAAMC,EAAc,YAKpB,OAFsBD,EAAK,QAAQC,EAAa;AAAA,CAAM,EAAE,QAAQ,SAAU;AAAA,CAAI,EAAE,KAAK,CAGzF,EAQaC,EAAqCF,GACvCA,EACF,QAAQ,iDAAkD,OAAO,EACjE,QAAQ,mCAAoC,MAAM,EAClD,QAAQ,uDAAwD,QAAQ,EACxE,QAAQ,yEAA0E,IAAI,EAUlFG,EAAoBH,GACtBA,EACF,QAAQ,QAAS,GAAG,EACpB,QAAQ,aAAc,gBAAM,EAC5B,QAAQ,MAAO,QAAG,EASdI,EAAwBJ,GAC1BA,EAAK,QAAQ,UAAW;AAAA,CAAI,EAS1BK,EAAmBL,GACrBA,EAAK,QAAQ,YAAa,EAAE,EAQ1BM,EAAuBN,GACzB,gBAAgB,KAAKA,CAAI,EAQvBO,EAAqBP,GAChB,kEACD,KAAKA,CAAI,EAGbQ,EAAqBR,GACdK,EAAgBL,CAAI,EACd,MAAM;AAAA,CAAI,EAAE,OAAQS,GAC/B,CAACA,GAASA,EAAK,OAAS,GAAK,CAACF,EAAkBE,CAAI,CAC9D,EAEY,KAAK;AAAA,CAAI,EAAE,KAAK,EASpBC,EAA2BV,GAC7BA,EAAK,QAAQ,mBAAoB,IAAI,EASnCW,EAAqBX,GACvBA,EAAK,QAAQ,YAAa,GAAG,EAS3BY,EAAkBZ,GACpBA,EAAK,QAAQ,eAAgB,GAAG,EAS9Ba,GAAkBb,GACpBA,EAAK,QAAQ,SAAU,GAAG,EASxBc,GAAoBd,GACtBA,EAAK,QAAQ,UAAW,QAAG,EASzBe,GAAiCf,GACnCA,EAAK,QAAQ,eAAgB;AAAA;AAAA,CAAM,EASjCgB,GAAiChB,GACnCA,EAAK,QAAQ,eAAgB;AAAA,CAAI,EAS/BiB,GAAmBjB,GACrBA,EAAK,QAAQ,UAAW,GAAG,EASzBkB,GAAuBlB,GACzBA,EAAK,QAAQ,SAAU,QAAG,EAAE,QAAQ,MAAO,GAAG,EAS5CmB,GAA0BnB,GAC5BA,EAAK,QAAQ,2BAA4B,MAAM,EAS7CoB,GAA6BpB,GAC/BA,EAAK,QAAQ,qBAAsB,OAAO,EASxCqB,GAA2BrB,GAC7BA,EAAK,QAAQ,mBAAoB,OAAO,EAStCsB,GAAmBtB,GAExBA,EACK,QAAQ,aAAc,MAAG,EACzB,QAAQ,aAAc,MAAG,EAEzB,QAAQ,6BAA8B,MAAM,EAE5C,QAAQ,6BAA8B,MAAM,EAU5CuB,GAAkBvB,GAAiB,CAE5C,IAAIwB,EAASxB,EAGb,OAAAwB,EAASA,EAAO,QAAQ,kBAAmB,MAAM,EAG1CA,EAAO,QAAQ,kBAAmB,MAAM,CACnD,EAQaC,GAA+BzB,GAEpCA,EAEK,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,sBAAuB,YAAM,EAWrC0B,GAA0BC,GAAkB,CACrD,IAAMC,EAAgB,mCAChBC,EAAsB,CAAC,EACvBC,EAAQH,EAAM,MAAM;AAAA,CAAI,EAC1BI,EAAkB,GAEtB,OAAAD,EAAM,QAASrB,GAAS,CACpB,IAAMuB,EAAcvB,EAAK,KAAK,EACxBwB,EAAaL,EAAc,KAAKI,CAAW,EAC3CE,EAAW,gBAAgB,KAAKF,CAAW,EAEjD,GAAIC,GAAc,CAACC,EACXH,IACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,IAEtBF,EAAU,KAAKG,CAAW,MACvB,CACHD,GAAmB,GAAGC,CAAW,IACjC,IAAMG,EAAWJ,EAAgB,KAAK,EAAE,MAAM,EAAE,EAC5C,QAAQ,KAAKI,CAAQ,IACrBN,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,GAE1B,CACJ,CAAC,EAGGA,GACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EAGlCF,EAAU,KAAK;AAAA,CAAI,CAC9B,EAOaO,GAAkBpC,GAAiB,CAG5C,IAAMqC,EAAcrC,EAAK,QAAQ,aAAc,EAAE,EAGjD,OAAIqC,EAAY,SAAW,EAChB,GAGJA,IAAgBA,EAAY,YAAY,CACnD,EAQaC,GAA8BtC,GAChCA,EAAK,QAAQ,sBAAuB,OAAO,EASzCuC,EAAmBvC,GACrBA,EAAK,QAAQ,UAAW,GAAG,EAmBzBwC,GAA8BxC,GAChCA,EAAK,QAAQ,cAAe,IAAI,EAS9ByC,GAA6BzC,GAC/BA,EAAK,QAAQ,4BAA6B,QAAQ,EAShD0C,GAAmC1C,GACrCA,EAAK,QAAQ,WAAY,MAAG,EAAE,QAAQ,WAAY,MAAG,EAQnD2C,EAAoB3C,GAEJA,EAAK,UAAU,MAAM,EAGtB,QAAQ,mBAAoB,EAAE,EAAE,KAAK,EASpD4C,EAAuB5C,GAAiB,CACjD,IAAM6C,EAAoC,CACtC,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,GACpB,EAEA,OAAO7C,EAAK,QAAQ,4EAA8E8C,GACvFD,EAAUC,CAAK,GAAKA,CAC9B,CACL,EAOaC,GAAgB/C,GAClB4C,EAAoBD,EAAiB3C,CAAI,CAAC,EAQxCgD,GAAeC,GACjBA,EACF,YAAY,EACZ,MAAM,GAAG,EACT,IAAKC,GAAS,CACX,GAAIA,EAAK,SAAW,EAAG,OAAOA,EAE9B,IAAMJ,EAAQI,EAAK,MAAM,QAAQ,EACjC,GAAI,CAACJ,GAASA,EAAM,QAAU,OAAW,OAAOI,EAChD,IAAMC,EAAIL,EAAM,MAChB,OAAOI,EAAK,MAAM,EAAGC,CAAC,EAAID,EAAK,OAAOC,CAAC,EAAE,YAAY,EAAID,EAAK,MAAMC,EAAI,CAAC,CAC7E,CAAC,EACA,KAAK,GAAG,EASJC,GAAyBpD,GAC3BA,EAAK,QAAQ,+BAAgC,QAAQ,EC9ezD,IAAMqD,GAAuBC,GAAgB,CAChD,IAAIC,EAAQD,EAAI,QAAQ,kBAAmB,MAAM,EACjD,OAAAC,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAChDA,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAEzC,KAAK,UAAU,KAAK,MAAMA,CAAK,CAAC,CAC3C,EAcaC,GAAwBF,GAG7B,6GACmB,KAAKA,EAAI,KAAK,CAAC,EAiB7BG,GAAiBC,GAA4B,CACtD,IAAMC,EAAQ,wBACd,OAAQD,EAAM,MAAMC,CAAK,GAAK,CAAC,GAAG,IAAKC,GAAeA,EAAE,WAAW,GAAG,EAAIA,EAAE,MAAM,EAAG,EAAE,EAAIA,CAAE,CACjG,EAgBMC,EAAqBP,GAAgB,CACvC,IAAIQ,EAAa,EACjB,QAAWC,KAAQT,EACXS,IAAS,KACTD,IAGR,OAAOA,EAAa,IAAM,CAC9B,EAEME,EAAW,CAAE,IAAK,IAAK,IAAK,IAAK,IAAK,GAAI,EAC1CC,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EACtCC,EAAgB,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EAqBvCC,EAAuBb,GAAgB,CACzC,IAAMc,EAAkB,CAAC,EAEzB,QAAWL,KAAQT,EACf,GAAIW,EAAa,IAAIF,CAAI,EACrBK,EAAM,KAAKL,CAAI,UACRG,EAAc,IAAIH,CAAI,EAAG,CAChC,IAAMM,EAAWD,EAAM,IAAI,EAC3B,GAAI,CAACC,GAAYL,EAASK,CAAiC,IAAMN,EAC7D,MAAO,EAEf,CAGJ,OAAOK,EAAM,SAAW,CAC5B,EAsBaE,GAAchB,GAChBO,EAAkBP,CAAG,GAAKa,EAAoBb,CAAG,EAS/CiB,GAAmBC,GAAgC,CAC5D,GAAIA,EAAU,SAAS,GAAG,EAAG,CACzB,GAAM,CAACC,EAAOC,CAAG,EAAIF,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,EAEpD,GAAIC,EAAQC,EACR,MAAM,IAAI,MAAM,4CAA4C,EAGhE,OAAO,MAAM,KAAK,CAAE,OAAQA,EAAMD,EAAQ,CAAE,EAAG,CAACE,EAAGC,IAAMH,EAAQG,CAAC,CACtE,KACI,QAAOJ,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,CAE9C,EC9JO,IAAMK,GAAiCC,GACnCA,EAAK,QACR,wHACA,GACJ,EASSC,GAA4BD,GAC9BA,EAAK,QAAQ,aAAc,EAAE,EAS3BE,GAA+BF,GACjCA,EAAK,QAAQ,OAAQ,GAAG,EAStBG,GAAkBH,GACpBA,EAAK,QAAQ,SAAU,EAAE,EASvBI,GAAmBJ,GACrBA,EAAK,QAAQ,yDAA0D,EAAE,EASvEK,GAA0BL,GAC5BA,EAAK,QAAQ,SAAU,EAAE,EASvBM,GAA+BN,GACjCA,EAAK,QAAQ,4BAA6B,EAAE,EAS1CO,GAAcP,GAChBA,EAAK,QACR,uGACA,EACJ,EAQSQ,GAA4BR,GAEjCA,EAEK,QAAQ,mBAAoB,IAAI,EAEhC,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,IAAI,EAE1B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,EAAE,EAExB,QAAQ,uBAAwB,EAAE,EAElC,QAAQ,wBAAyB,IAAI,EAErC,QAAQ,WAAY,EAAE,EAEtB,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,MAAO,EAAE,EAoBjBS,GAAW,CAACC,EAAaC,EAAI,MAAiBD,EAAI,OAASC,EAAI,GAAGD,EAAI,UAAU,EAAGC,EAAI,CAAC,CAAC,SAAMD,EAsB/FE,GAAiB,CAACZ,EAAca,EAAoB,GAAIC,IAAuB,CACxF,GAAId,EAAK,QAAUa,EACf,OAAOb,EAIX,IAAMe,EAAmB,KAAK,IAAI,EAAG,KAAK,MAAMF,EAAY,CAAC,CAAC,EACxDG,EAAkBF,GAAaC,EAM/BE,EAHkBJ,EAAY,EAGEG,EAGtC,GAAIC,EAAc,EAEd,MAAO,GAAGjB,EAAK,UAAU,EAAGa,EAAY,CAAC,CAAC,SAG9C,IAAMK,EAAelB,EAAK,UAAU,EAAGiB,CAAW,EAC5CE,EAAanB,EAAK,UAAUA,EAAK,OAASgB,CAAe,EAE/D,MAAO,GAAGE,CAAY,SAAIC,CAAU,EACxC,EAqBaC,GAAkBC,GAAkBA,EAAM,QAAQ,OAAQ,GAAG,EAAE,KAAK,EAK3EC,EAAmB,qDAMnBC,EAA2B,CAC7B,CAAC,SAAU,SAAU,SAAU,QAAQ,EACvC,CAAC,SAAU,QAAQ,EACnB,CAAC,SAAU,QAAQ,CACvB,EAGMC,EAAkBC,GAAcA,EAAE,QAAQ,sBAAuB,MAAM,EAGvEC,EAAiBC,GAAuB,CAC1C,QAAWC,KAASL,EAChB,GAAIK,EAAM,SAASD,CAAE,EAEjB,MAAO,IAAIC,EAAM,IAAKC,GAAML,EAAeK,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,IAI/D,OAAOL,EAAeG,CAAE,CAC5B,EAGMG,EAAwBC,GACnBA,EACF,UAAU,KAAK,EACf,QAAQ,kBAAmB,EAAE,EAC7B,QAAQ,OAAQ,GAAG,EACnB,KAAK,EAUDC,GAA4BhC,GAAiB,CACtD,IAAMiC,EAAoB,GAAGX,CAAgB,IACvCY,EAAOJ,EAAqB9B,CAAI,EAEtC,OAAO,MAAM,KAAKkC,CAAI,EACjB,IAAKP,GAAOD,EAAcC,CAAE,EAAIM,CAAiB,EACjD,KAAK,EAAE,CAChB,ECzPO,IAAME,GAA+BC,GACjCA,EACF,QAAQ,6EAA8E,OAAO,EAC7F,QAAQ,wBAAyB,QAAQ,EACzC,QAAQ,eAAgB,OAAO,EAU3BC,GAA8BD,GAChCA,EAAK,QAAQ,MAAO,QAAG,EAAE,QAAQ,MAAO,QAAG,EAWzCE,GAAgCF,GAClCA,EACF,QACG,2IACA,WACJ,EACC,QAAQ,aAAc,SAAI,EAUtBG,EAAaC,GACfA,EACF,UAAU,MAAM,EAChB,QAAQ,mBAAoB,EAAE,EAC9B,QAAQ,WAAY,EAAE,EAUlBC,GAAwBL,GAC1BA,EAAK,QAAQ,SAAU,GAAG,EAUxBM,EAAwBN,GAC1BO,EAAgBP,EAAK,QAAQ,uEAAwE,EAAE,CAAC,EAUtGQ,EAAkCR,GAAiBG,EAAUG,EAAqBN,CAAI,CAAC,EASvFS,GAAmBC,GACXF,EAA+BE,CAAQ,EACnD,KAAK,EACL,MAAM,MAAM,EACZ,MAAM,EAAG,CAAC,EACV,IAAKC,GACKA,EAAK,OAAO,CAAC,EAAE,YAAY,CACrC,EACA,KAAK,EAAE","names":["cleanExtremeArabicUnderscores","text","convertUrduSymbolsToArabic","getArabicScore","arabicLettersPattern","allDigitPattern","countedCharsPattern","cleaned","arabicMatches","totalMatches","fixTrailingWow","addSpaceBetweenArabicTextAndNumbers","stripEnglishCharactersAndSymbols","removeNonIndexSignatures","removeSingularCodes","removeSolitaryArabicLetters","removeTatwil","replaceTaMarbutahWithHa","stripDiacritics","stripZeroWidthCharacters","replaceAlifMaqsurah","replaceEnglishPunctuationWithArabic","normalizeAlifVariants","insertLineBreaksAfterPunctuation","text","punctuation","addSpaceBeforeAndAfterPunctuation","applySmartQuotes","cleanLiteralNewLines","cleanMultilines","hasWordInSingleLine","isOnlyPunctuation","cleanJunkFromText","line","cleanSpacesBeforePeriod","condenseAsterisks","condenseColons","condenseDashes","condenseEllipsis","reduceMultilineBreaksToDouble","reduceMultilineBreaksToSingle","condensePeriods","condenseUnderscores","doubleToSingleBrackets","ensureSpaceBeforeBrackets","ensureSpaceBeforeQuotes","fixBracketTypos","fixCurlyBraces","result","fixMismatchedQuotationMarks","formatStringBySentence","input","footnoteRegex","sentences","lines","currentSentence","trimmedLine","isFootnote","isNumber","lastChar","isAllUppercase","lettersOnly","normalizeSlashInReferences","normalizeSpaces","removeRedundantPunctuation","removeSpaceInsideBrackets","replaceDoubleBracketsWithArrows","stripBoldStyling","stripItalicsStyling","italicMap","match","stripStyling","toTitleCase","str","word","i","trimSpaceInsideQuotes","normalizeJsonSyntax","str","input","isJsonStructureValid","splitByQuotes","query","regex","s","areQuotesBalanced","quoteCount","char","brackets","openBrackets","closeBrackets","areBracketsBalanced","stack","lastOpen","isBalanced","parsePageRanges","pageInput","start","end","_","i","cleanSymbolsAndPartReferences","text","cleanTrailingPageNumbers","replaceLineBreaksWithSpaces","stripAllDigits","removeDeathYear","removeNumbersAndDashes","removeSingleDigitReferences","removeUrls","removeMarkdownFormatting","truncate","val","n","truncateMiddle","maxLength","endLength","defaultEndLength","actualEndLength","startLength","startPortion","endPortion","unescapeSpaces","input","DIACRITICS_CLASS","EQUIV_GROUPS","escapeForRegex","s","getEquivClass","ch","group","c","normalizeArabicLight","str","makeDiacriticInsensitive","diacriticsMatcher","norm","normalizeArabicPrefixesToAl","text","normalizeDoubleApostrophes","replaceSalutationsWithSymbol","normalize","input","normalizeApostrophes","removeArabicPrefixes","normalizeSpaces","normalizeTransliteratedEnglish","extractInitials","fullName","word"]}
1
+ {"version":3,"sources":["../src/arabic.ts","../src/cleaning.ts","../src/formatting.ts","../src/parsing.ts","../src/sanitization.ts","../src/transliteration.ts"],"sourcesContent":["/**\n * Converts Arabic-Indic numerals (٠-٩) to a JavaScript number.\n *\n * This function finds all Arabic-Indic digits in the input string and converts them\n * to their corresponding Arabic (Western) digits, then parses the result as an integer.\n *\n * Arabic-Indic digits mapping:\n * - ٠ → 0, ١ → 1, ٢ → 2, ٣ → 3, ٤ → 4\n * - ٥ → 5, ٦ → 6, ٧ → 7, ٨ → 8, ٩ → 9\n *\n * @param arabic - The string containing Arabic-Indic numerals to convert\n * @returns The parsed integer value of the converted numerals\n *\n * @example\n * ```typescript\n * arabicNumeralToNumber(\"١٢٣\"); // returns 123\n * arabicNumeralToNumber(\"٥٠\"); // returns 50\n * arabicNumeralToNumber(\"abc١٢٣xyz\"); // returns 123 (non-digits ignored)\n * arabicNumeralToNumber(\"\"); // returns NaN\n * ```\n *\n * Returns NaN if no valid Arabic-Indic digits are found\n */\nexport const arabicNumeralToNumber = (arabic: string) => {\n return parseInt(\n arabic.replace(/[\\u0660-\\u0669]/g, (c) => (c.charCodeAt(0) - 0x0660).toString()),\n 10,\n );\n};\n\n/**\n * Removes extreme Arabic underscores (ـ) that appear at the beginning or end of a line or in text.\n * Does not affect Hijri dates (e.g., 1424هـ) or specific Arabic terms.\n * Example: \"ـThis is a textـ\" will be changed to \"This is a text\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with extreme underscores removed.\n */\nexport const cleanExtremeArabicUnderscores = (text: string) => {\n return text.replace(/(?<!\\d ?ه|اه)ـ(?=\\r?$)|^ـ(?!اهـ)/gm, '');\n};\n\n/**\n * Converts Urdu symbols to their Arabic equivalents.\n * Example: 'ھذا' will be changed to 'هذا', 'ی' to 'ي'.\n * @param {string} text - The input text containing Urdu symbols.\n * @returns {string} - The modified text with Urdu symbols converted to Arabic symbols.\n */\nexport const convertUrduSymbolsToArabic = (text: string) => {\n return text.replace(/ھ/g, 'ه').replace(/ی/g, 'ي');\n};\n\n/**\n * Calculates the proportion of Arabic characters in text relative to total non-whitespace, non-digit characters.\n * Digits (ASCII and Arabic-Indic variants) are excluded from both numerator and denominator.\n * @param text - The input text to analyze\n * @returns A decimal between 0-1 representing the Arabic character ratio (0 = no Arabic, 1 = all Arabic)\n */\nexport const getArabicScore = (text: string) => {\n if (!text) {\n return 0;\n }\n // Arabic letters (letters/ranges only)\n const arabicLettersPattern = /[\\u0600-\\u06FF\\u0750-\\u077F\\u08A0-\\u08FF\\uFB50-\\uFDFF\\uFE70-\\uFEFF]/g;\n // ASCII digits + Arabic-Indic digits + Extended Arabic-Indic digits\n const allDigitPattern = /[0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n // Counted characters exclude whitespace and all listed digits\n const countedCharsPattern = /[^\\s0-9\\u0660-\\u0669\\u06F0-\\u06F9]/g;\n const cleaned = text.replace(allDigitPattern, '');\n const arabicMatches = cleaned.match(arabicLettersPattern) || [];\n const totalMatches = cleaned.match(countedCharsPattern) || [];\n return totalMatches.length === 0 ? 0 : arabicMatches.length / totalMatches.length;\n};\n\n/**\n * Fixes the trailing \"و\" (wow) in phrases such as \"عليكم و رحمة\" to \"عليكم ورحمة\".\n * This function attempts to correct phrases where \"و\" appears unnecessarily, particularly in greetings.\n * Example: 'السلام عليكم و رحمة' will be changed to 'السلام عليكم ورحمة'.\n * @param {string} text - The input text containing the \"و\" character.\n * @returns {string} - The modified text with unnecessary trailing \"و\" characters corrected.\n */\nexport const fixTrailingWow = (text: string) => {\n return text.replace(/ و /g, ' و');\n};\n\n/**\n * Inserts a space between Arabic text and numbers.\n * Example: 'الآية37' will be changed to 'الآية 37'.\n * @param {string} text - The input text containing Arabic text followed by numbers.\n * @returns {string} - The modified text with spaces inserted between Arabic text and numbers.\n */\nexport const addSpaceBetweenArabicTextAndNumbers = (text: string) => {\n return text.replace(/([\\u0600-\\u06FF]+)(\\d+)/g, '$1 $2');\n};\n\n/**\n * Removes single-digit numbers surrounded by Arabic text. Also removes dashes (-) not followed by a number.\n * For example, removes '3' from 'وهب 3 وقال' but does not remove '121' from 'لوحه 121 الجرح'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with non-index numbers and dashes removed.\n */\nexport const removeNonIndexSignatures = (text: string) => {\n return text\n .replace(/(?<![0-9] ?)-|(?<=[\\u0600-\\u06FF])\\s?\\d\\s?(?=[\\u0600-\\u06FF])/g, ' ')\n .replace(/(?<=[\\u0600-\\u06FF]\\s)(\\d+\\s)+\\d+(?=(\\s[\\u0600-\\u06FF]|$))/g, ' ');\n};\n\n/**\n * Removes characters enclosed in square brackets [] or parentheses () if they are Arabic letters or Arabic-Indic numerals.\n * Example: '[س]' or '(س)' will be removed.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with singular codes removed.\n */\nexport const removeSingularCodes = (text: string) => {\n return text.replace(/[[({][\\u0621-\\u064A\\u0660-\\u0669][\\])}]/g, '');\n};\n\n/**\n * Removes solitary Arabic letters unless they are the 'ha' letter, which is used in Hijri years.\n * Example: \"ب ا الكلمات ت\" will be changed to \"ا الكلمات\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with solitary Arabic letters removed.\n */\nexport const removeSolitaryArabicLetters = (text: string) => {\n return text.replace(/(^| )[\\u0621-\\u064A]( |$)/g, ' ');\n};\n\n/**\n * Replaces English punctuation (question mark and semicolon) with their Arabic equivalents.\n * Example: '?' will be replaced with '؟', and ';' with '؛'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.\n */\nexport const replaceEnglishPunctuationWithArabic = (text: string) => {\n return text\n .replace(/\\?|؟\\./g, '؟')\n .replace(/(;|؛)\\s*(\\1\\s*)*/g, '؛')\n .replace(/,|-،/g, '،');\n};\n","/** Character class for Arabic diacritics (tashkīl/harakāt). */\nconst DIACRITICS_CLASS = '[\\\\u0610-\\\\u061A\\\\u064B-\\\\u065F\\\\u0670\\\\u06D6-\\\\u06ED]';\n/** Tatweel (kashīda) class. */\nconst TATWEEL_CLASS = '\\\\u0640';\n\n/**\n * Escape a string so it can be safely embedded into a RegExp source.\n *\n * @param s Any string\n * @returns Escaped string\n */\nexport const escapeRegex = (s: string): string => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n\n/** Optional equivalence toggles for {@link makeDiacriticInsensitiveRegex}. */\ntype EquivOptions = {\n /** Treat ا/أ/إ/آ as equivalent. @default true */\n alif?: boolean;\n /** Treat ة/ه as equivalent. @default true */\n taMarbutahHa?: boolean;\n /** Treat ى/ي as equivalent. @default true */\n alifMaqsurahYa?: boolean;\n};\n\n/** Options for {@link makeDiacriticInsensitiveRegex}. */\nexport type MakeRegexOptions = {\n /**\n * Character equivalences to allow.\n * @default { alif: true, taMarbutahHa: true, alifMaqsurahYa: true }\n */\n equivalences?: EquivOptions;\n\n /**\n * Allow tatweel between letters (tolerate decorative elongation).\n * @default true\n */\n allowTatweel?: boolean;\n\n /**\n * Ignore diacritics by inserting a `DIACRITICS_CLASS*` after each letter.\n * @default true\n */\n ignoreDiacritics?: boolean;\n\n /**\n * Treat any whitespace in the needle as `\\s+` for flexible matching.\n * @default true\n */\n flexWhitespace?: boolean;\n\n /**\n * RegExp flags to use.\n * @default 'u'\n */\n flags?: string;\n};\n\n/**\n * Build a **diacritic-insensitive**, **tatweel-tolerant** RegExp for Arabic text matching.\n *\n * Features:\n * - Optional character equivalences: ا~أ~إ~آ, ة~ه, ى~ي.\n * - Optional tolerance for tatweel between characters.\n * - Optional diacritic-insensitivity (by inserting a diacritics class after each char).\n * - Optional flexible whitespace (needle whitespace becomes `\\s+`).\n *\n * @param needle The Arabic text to match\n * @param opts See {@link MakeRegexOptions}\n * @returns A `RegExp` matching the needle with the desired tolerances\n *\n * @example\n * const rx = makeDiacriticInsensitiveRegex('أنا إلى الآفاق');\n * rx.test('انا الي الافاق'); // true\n * rx.test('اَنا إلى الآفاق'); // true\n */\nexport const makeDiacriticInsensitiveRegex = (needle: string, opts: MakeRegexOptions = {}): RegExp => {\n const {\n equivalences = { alif: true, taMarbutahHa: true, alifMaqsurahYa: true },\n allowTatweel = true,\n ignoreDiacritics = true,\n flexWhitespace = true,\n flags = 'u',\n } = opts;\n\n // Safety guard against extremely large inputs causing excessive pattern sizes\n if (needle.length > 5000) {\n throw new Error('makeDiacriticInsensitiveRegex: needle too long');\n }\n\n const charClass = (ch: string): string => {\n switch (ch) {\n case 'ا':\n case 'أ':\n case 'إ':\n case 'آ':\n return equivalences.alif ? '[اأإآ]' : 'ا';\n case 'ة':\n case 'ه':\n return equivalences.taMarbutahHa ? '[هة]' : escapeRegex(ch);\n case 'ى':\n case 'ي':\n return equivalences.alifMaqsurahYa ? '[ىي]' : escapeRegex(ch);\n default:\n return escapeRegex(ch);\n }\n };\n\n const after = `${ignoreDiacritics ? `${DIACRITICS_CLASS}*` : ''}${allowTatweel ? `${TATWEEL_CLASS}*` : ''}`;\n\n let pattern = '';\n for (const ch of Array.from(needle)) {\n if (/\\s/.test(ch)) {\n pattern += flexWhitespace ? '\\\\s+' : '\\\\s*';\n } else {\n pattern += `${charClass(ch)}${after}`;\n }\n }\n\n return new RegExp(pattern, flags);\n};\n","/**\n * Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.\n * Example: 'Text.' becomes 'Text.\\n'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with line breaks added after punctuation.\n */\nexport const insertLineBreaksAfterPunctuation = (text: string) => {\n // Define the punctuation marks that should trigger a new line\n const punctuation = /([.?!؟])/g;\n\n // Replace occurrences of punctuation marks followed by a space with the punctuation mark, a newline, and the space\n const formattedText = text.replace(punctuation, '$1\\n').replace(/\\n\\s+/g, '\\n').trim();\n\n return formattedText;\n};\n\n/**\n * Adds spaces before and after punctuation, except for certain cases like quoted text or ayah references.\n * Example: 'Text,word' becomes 'Text, word'.\n * @param {string} text - The input text containing punctuation.\n * @returns {string} - The modified text with spaces added before and after punctuation.\n */\nexport const addSpaceBeforeAndAfterPunctuation = (text: string) => {\n return text\n .replace(/( ?)([.!?,،؟;؛])((?![ '”“)\"\\]\\n])|(?=\\s{2,}))/g, '$1$2 ')\n .replace(/\\s([.!?,،؟;؛])\\s*([ '”“)\"\\]\\n])/g, '$1$2')\n .replace(/([^\\s\\w\\d'”“)\"\\]]+)\\s+([.!?,،؟;؛])|([.!?,،؟;؛])\\s+$/g, '$1$2$3')\n .replace(/(?<=\\D)( ?: ?)(?!(\\d+:)|(:\\d+))|(?<=\\d) ?: ?(?=\\D)|(?<=\\D) ?: ?(?=\\d)/g, ': ');\n};\n\n/**\n * Turns regular double quotes surrounding a body of text into smart quotes.\n * Also fixes incorrect starting quotes by ensuring the string starts with an opening quote if needed.\n * Example: 'The \"quick brown\" fox' becomes 'The “quick brown” fox'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with smart quotes applied.\n */\nexport const applySmartQuotes = (text: string) => {\n return text\n .replace(/[“”]/g, '\"')\n .replace(/\"([^\"]*)\"/g, '“$1”')\n .replace(/^”/g, '“');\n};\n\n/**\n * Replaces literal new line characters (\\n) and carriage returns (\\r) with actual line breaks.\n * Example: 'A\\\\nB' becomes 'A\\nB'.\n * @param {string} text - The input text containing literal new lines.\n * @returns {string} - The modified text with actual line breaks.\n */\nexport const cleanLiteralNewLines = (text: string) => {\n return text.replace(/\\\\n|\\r/g, '\\n');\n};\n\n/**\n * Removes trailing spaces from each line in a multiline string.\n * Example: \" This is a line \\nAnother line \" becomes \"This is a line\\nAnother line\".\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with trailing spaces removed.\n */\nexport const cleanMultilines = (text: string) => {\n return text.replace(/^ +| +$/gm, '');\n};\n\n/**\n * Detects if a word is by itself in a line.\n * @param text The text to check.\n * @returns true if there exists a word in any of the lines in the text that is by itself.\n */\nexport const hasWordInSingleLine = (text: string): boolean => {\n return /^\\s*\\S+\\s*$/gm.test(text);\n};\n\n/**\n * Checks if the input string consists of only punctuation characters.\n * @param {string} text - The input text to check.\n * @returns {boolean} - Returns true if the string contains only punctuation, false otherwise.\n */\nexport const isOnlyPunctuation = (text: string): boolean => {\n const regex = /^[\\u0020-\\u002f\\u003a-\\u0040\\u005b-\\u0060\\u007b-\\u007e0-9٠-٩]+$/;\n return regex.test(text);\n};\n\n/**\n * Cleans unnecessary spaces before punctuation marks such as periods, commas, and question marks.\n * Example: 'This is a sentence , with extra space .' becomes 'This is a sentence, with extra space.'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with cleaned spaces before punctuation.\n */\nexport const cleanSpacesBeforePeriod = (text: string) => {\n return text.replace(/\\s+([.؟!,،؛:?])/g, '$1');\n};\n\n/**\n * Condenses multiple asterisks (*) into a single one.\n * Example: '***' becomes '*'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed asterisks.\n */\nexport const condenseAsterisks = (text: string) => {\n return text.replace(/(\\*\\s*)+/g, '*');\n};\n\n/**\n * Replaces occurrences of colons surrounded by periods (e.g., '.:.' or ':') with a single colon.\n * Example: 'This.:. is a test' becomes 'This: is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed colons.\n */\nexport const condenseColons = (text: string) => {\n return text.replace(/[.-]?:[.-]?/g, ':');\n};\n\n/**\n * Condenses two or more dashes (--) into a single dash (-).\n * Example: 'This is some ---- text' becomes 'This is some - text'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed dashes.\n */\nexport const condenseDashes = (text: string) => {\n return text.replace(/-{2,}/g, '-');\n};\n\n/**\n * Replaces sequences of two or more periods (e.g., '...') with an ellipsis character (…).\n * Example: 'This is a test...' becomes 'This is a test…'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with ellipses condensed.\n */\nexport const condenseEllipsis = (text: string) => {\n return text.replace(/\\.{2,}/g, '…');\n};\n\n/**\n * Reduces multiple consecutive line breaks (3 or more) to exactly 2 line breaks.\n * Example: 'This is line 1\\n\\n\\n\\nThis is line 2' becomes 'This is line 1\\n\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToDouble = (text: string) => {\n return text.replace(/(\\n\\s*){3,}/g, '\\n\\n');\n};\n\n/**\n * Reduces multiple consecutive line breaks (2 or more) to exactly 1 line break.\n * Example: 'This is line 1\\n\\nThis is line 2' becomes 'This is line 1\\nThis is line 2'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed line breaks.\n */\nexport const reduceMultilineBreaksToSingle = (text: string) => {\n return text.replace(/(\\n\\s*){2,}/g, '\\n');\n};\n\n/**\n * Condenses multiple periods separated by spaces (e.g., '. . .') into a single period.\n * Example: 'This . . . is a test' becomes 'This. is a test'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed periods.\n */\nexport const condensePeriods = (text: string) => {\n return text.replace(/\\. +\\./g, '.');\n};\n\n/**\n * Condenses multiple underscores (__) or Arabic Tatweel characters (ـــــ) into a single underscore or Tatweel.\n * Example: 'This is ـــ some text __' becomes 'This is ـ some text _'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed underscores.\n */\nexport const condenseUnderscores = (text: string) => {\n return text.replace(/ـ{2,}/g, 'ـ').replace(/_+/g, '_');\n};\n\n/**\n * Replaces double parentheses or brackets with single ones.\n * Example: '((text))' becomes '(text)'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const doubleToSingleBrackets = (text: string) => {\n return text.replace(/(\\(|\\)){2,}|(\\[|\\]){2,}/g, '$1$2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before brackets.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before brackets\n */\nexport const ensureSpaceBeforeBrackets = (text: string) => {\n return text.replace(/(\\S) *(\\([^)]*\\))/g, '$1 $2');\n};\n\n/**\n * Ensures at most 1 space exists before any word before Arabic quotation marks.\n * Adds a space if there isn't one, or reduces multiple spaces to one.\n * @param {string} text - The input text to modify\n * @returns {string} - The modified text with proper spacing before Arabic quotes\n */\nexport const ensureSpaceBeforeQuotes = (text: string) => {\n return text.replace(/(\\S) *(«[^»]*»)/g, '$1 $2');\n};\n\n/**\n * Fixes common bracket and quotation mark typos in text\n * Corrects malformed patterns like \"(«\", \"»)\", and misplaced digits in brackets\n * @param text - Input text that may contain bracket typos\n * @returns Text with corrected bracket and quotation mark combinations\n */\nexport const fixBracketTypos = (text: string) => {\n return (\n text\n .replace(/\\(«|\\( \\(/g, '«')\n .replace(/»\\)|\\) \\)/g, '»')\n // Fix \")digit)\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\)/g, '($1)')\n // Fix \")digit(\" pattern to \"(digit)\"\n .replace(/\\)([0-9\\u0660-\\u0669]+)\\(/g, '($1)')\n );\n};\n\n/**\n * Fixes mismatched curly braces by converting incorrect bracket/brace combinations\n * to proper curly braces { }\n * @param text - Input text that may contain mismatched curly braces\n * @returns Text with corrected curly brace pairs\n */\nexport const fixCurlyBraces = (text: string) => {\n // Process each mismatch type separately to avoid interference\n let result = text;\n\n // Fix ( content } to { content }\n result = result.replace(/\\(([^(){}]+)\\}/g, '{$1}');\n\n // Fix { content ) to { content }\n return result.replace(/\\{([^(){}]+)\\)/g, '{$1}');\n};\n\n/**\n * Fixes mismatched quotation marks in Arabic text by converting various\n * incorrect bracket/quote combinations to proper Arabic quotation marks (« »)\n * @param text - Input text that may contain mismatched quotation marks\n * @returns Text with corrected Arabic quotation marks\n */\nexport const fixMismatchedQuotationMarks = (text: string) => {\n return (\n text\n // Matches mismatched quotation marks: « followed by content and closed with )\n .replace(/«([^»)]+)\\)/g, '«$1»')\n // Fix reverse mismatched ( content » to « content »\n .replace(/\\(([^()]+)»/g, '«$1»')\n // Matches any unclosed « quotation marks at end of content\n .replace(/«([^»]+)(?=\\s*$|$)/g, '«$1»')\n );\n};\n\n/**\n * Formats a multiline string by joining sentences and maintaining footnotes on their own lines.\n * Footnotes are identified by Arabic and English numerals.\n * Example: 'Sentence one.\\n(1) A footnote.\\nSentence two.' remains the same, while regular sentences are joined.\n * @param {string} input - The input text containing sentences and footnotes.\n * @returns {string} - The formatted text.\n */\nexport const formatStringBySentence = (input: string) => {\n const footnoteRegex = /^\\((?:\\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\\)\\s/;\n const sentences: string[] = [];\n const lines = input.split('\\n');\n let currentSentence = '';\n\n lines.forEach((line) => {\n const trimmedLine = line.trim();\n const isFootnote = footnoteRegex.test(trimmedLine);\n const isNumber = /^\\(\\d+\\/\\d+\\)/.test(trimmedLine);\n\n if (isFootnote && !isNumber) {\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n sentences.push(trimmedLine);\n } else {\n currentSentence += `${trimmedLine} `;\n const lastChar = currentSentence.trim().slice(-1);\n if (/[.!؟]/.test(lastChar)) {\n sentences.push(currentSentence.trim());\n currentSentence = '';\n }\n }\n });\n\n // Add any remaining text to the output\n if (currentSentence) {\n sentences.push(currentSentence.trim());\n }\n\n return sentences.join('\\n');\n};\n\n/**\n * Detects if text is entirely in uppercase letters\n * @param text - The text to check\n * @returns true if all alphabetic characters are uppercase, false otherwise\n */\nexport const isAllUppercase = (text: string) => {\n // Remove non-letter characters (including numbers, punctuation, spaces)\n // \\p{L} matches any Unicode letter character\n const lettersOnly = text.replace(/[^\\p{L}]/gu, '');\n\n // If there are no letter characters, return false\n if (lettersOnly.length === 0) {\n return false;\n }\n\n return lettersOnly === lettersOnly.toUpperCase();\n};\n\n/**\n * Removes unnecessary spaces around slashes in references.\n * Example: '127 / 11' becomes '127/11'.\n * @param {string} text - The input text containing references.\n * @returns {string} - The modified text with spaces removed around slashes.\n */\nexport const normalizeSlashInReferences = (text: string) => {\n return text.replace(/(\\d+)\\s?\\/\\s?(\\d+)/g, '$1/$2');\n};\n\n/**\n * Reduces multiple spaces or tabs to a single space.\n * Example: 'This is a text' becomes 'This is a text'.\n * @param {string} text - The input text containing extra spaces.\n * @returns {string} - The modified text with reduced spaces.\n */\nexport const normalizeSpaces = (text: string) => {\n return text.replace(/[ \\t]+/g, ' ');\n};\n\n/**\n * Removes redundant punctuation marks that follow Arabic question marks or exclamation marks.\n * This function cleans up text by removing periods (.) or Arabic commas (،) that immediately\n * follow Arabic question marks (؟) or exclamation marks (!), as they are considered redundant\n * in proper Arabic punctuation.\n *\n * @param text - The Arabic text to clean up\n * @returns The text with redundant punctuation removed\n *\n * @example\n * ```typescript\n * removeRedundantPunctuation('كيف حالك؟.') // Returns: 'كيف حالك؟'\n * removeRedundantPunctuation('ممتاز!،') // Returns: 'ممتاز!'\n * removeRedundantPunctuation('هذا جيد.') // Returns: 'هذا جيد.' (unchanged)\n * ```\n */\nexport const removeRedundantPunctuation = (text: string) => {\n return text.replace(/([؟!])[.،]/g, '$1');\n};\n\n/**\n * Removes spaces inside brackets, parentheses, or square brackets.\n * Example: '( a b )' becomes '(a b)'.\n * @param {string} text - The input text with spaces inside brackets.\n * @returns {string} - The modified text with spaces removed inside brackets.\n */\nexport const removeSpaceInsideBrackets = (text: string) => {\n return text.replace(/([[(])\\s*(.*?)\\s*([\\])])/g, '$1$2$3');\n};\n\n/**\n * Replaces double parentheses single a single arrow variation.\n * Example: '((text))' becomes '«text»'.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with condensed brackets.\n */\nexport const replaceDoubleBracketsWithArrows = (text: string) => {\n return text.replace(/\\(\\(\\s?/g, '«').replace(/\\s?\\)\\)/g, '»');\n};\n\n/**\n * Removes bold styling from text by normalizing the string and removing stylistic characters.\n * @param {string} text - The input text containing bold characters.\n * @returns {string} - The modified text with bold styling removed.\n */\nexport const stripBoldStyling = (text: string) => {\n // Normalize the string to NFKD form\n const normalizedString = text.normalize('NFKD');\n\n // Remove combining marks (diacritics) and stylistic characters from the string\n return normalizedString.replace(/[\\u0300-\\u036f]/g, '').trim();\n};\n\n/**\n * Removes italicized characters by replacing italic Unicode characters with their normal counterparts.\n * Example: '𝘼𝘽𝘾' becomes 'ABC'.\n * @param {string} text - The input text containing italicized characters.\n * @returns {string} - The modified text with italics removed.\n */\nexport const stripItalicsStyling = (text: string) => {\n const italicMap: Record<string, string> = {\n '\\uD835\\uDC4E': 'I',\n '\\uD835\\uDC68': 'g',\n '\\u{1D63C}': '!',\n '\\uD835\\uDC4F': 'J',\n '\\uD835\\uDC69': 'h',\n '\\u{1D63D}': '?',\n '\\uD835\\uDC50': 'K',\n '\\uD835\\uDC6A': 'i',\n '\\uD835\\uDC51': 'L',\n '\\uD835\\uDC6B': 'j',\n '\\u{1D63F}': ',',\n '\\uD835\\uDC52': 'M',\n '\\uD835\\uDC6C': 'k',\n '\\u{1D640}': '.',\n '\\uD835\\uDC53': 'N',\n '\\uD835\\uDC6D': 'l',\n '\\uD835\\uDC54': 'O',\n '\\uD835\\uDC6E': 'm',\n '\\uD835\\uDC6F': 'n',\n '\\uD835\\uDC56': 'Q',\n '\\uD835\\uDC70': 'o',\n '\\uD835\\uDC57': 'R',\n '\\uD835\\uDC71': 'p',\n '\\uD835\\uDC58': 'S',\n '\\uD835\\uDC72': 'q',\n '\\uD835\\uDC59': 'T',\n '\\uD835\\uDC73': 'r',\n '\\u{1D647}': '-',\n '\\uD835\\uDC5A': 'U',\n '\\uD835\\uDC74': 's',\n '\\uD835\\uDC5B': 'V',\n '\\uD835\\uDC75': 't',\n '\\uD835\\uDC5C': 'W',\n '\\uD835\\uDC76': 'u',\n '\\uD835\\uDC5D': 'X',\n '\\uD835\\uDC77': 'v',\n '\\uD835\\uDC5E': 'Y',\n '\\uD835\\uDC78': 'w',\n '\\uD835\\uDC5F': 'Z',\n '\\uD835\\uDC79': 'x',\n '\\uD835\\uDC46': 'A',\n '\\uD835\\uDC7A': 'y',\n '\\uD835\\uDC47': 'B',\n '\\uD835\\uDC7B': 'z',\n '\\uD835\\uDC62': 'a',\n '\\uD835\\uDC48': 'C',\n '\\uD835\\uDC63': 'b',\n '\\uD835\\uDC49': 'D',\n '\\uD835\\uDC64': 'c',\n '\\uD835\\uDC4A': 'E',\n '\\uD835\\uDC65': 'd',\n '\\uD835\\uDC4B': 'F',\n '\\uD835\\uDC66': 'e',\n '\\uD835\\uDC4C': 'G',\n '\\uD835\\uDC67': 'f',\n '\\uD835\\uDC4D': 'H',\n '\\uD835\\uDC55': 'P',\n };\n\n return text.replace(/[\\uD835\\uDC62-\\uD835\\uDC7B\\uD835\\uDC46-\\uD835\\uDC5F\\u{1D63C}-\\u{1D647}]/gu, (match) => {\n return italicMap[match] || match;\n });\n};\n\n/**\n * Removes all bold and italic styling from the input text.\n * @param {string} text - The input text to remove styling from.\n * @returns {string} - The modified text with all styling removed.\n */\nexport const stripStyling = (text: string) => {\n return stripItalicsStyling(stripBoldStyling(text));\n};\n\n/**\n * Converts a string to title case (first letter of each word capitalized)\n * @param str - The input string to convert\n * @returns String with each word's first letter capitalized\n */\nexport const toTitleCase = (str: string) => {\n return str\n .toLowerCase()\n .split(' ')\n .map((word) => {\n if (word.length === 0) return word;\n // Find the first Unicode letter in the chunk\n const match = word.match(/\\p{L}/u);\n if (!match || match.index === undefined) return word;\n const i = match.index;\n return word.slice(0, i) + word.charAt(i).toUpperCase() + word.slice(i + 1);\n })\n .join(' ');\n};\n\n/**\n * Removes unnecessary spaces inside quotes.\n * Example: '“ Text ”' becomes '“Text”'.\n * @param {string} text - The input text with spaces inside quotes.\n * @returns {string} - The modified text with spaces removed inside quotes.\n */\nexport const trimSpaceInsideQuotes = (text: string) => {\n return text.replace(/([“”\"]|«) *(.*?) *([“”\"]|»)/g, '$1$2$3');\n};\n","/**\n * Converts a string that resembles JSON but with numeric keys and single-quoted values\n * into valid JSON format. This function replaces numeric keys with quoted numeric keys\n * and ensures all values are double-quoted as required by JSON.\n *\n * @param {string} str - The input string that needs to be fixed into valid JSON.\n * @returns {string} - A valid JSON string.\n *\n * @example\n * const result = normalizeJsonSyntax(\"{10: 'abc', 20: 'def'}\");\n * console.log(result); // '{\"10\": \"abc\", \"20\": \"def\"}'\n */\nexport const normalizeJsonSyntax = (str: string) => {\n let input = str.replace(/(\\b\\d+\\b)(?=:)/g, '\"$1\"');\n input = input.replace(/:\\s*'([^']+)'/g, ': \"$1\"');\n input = input.replace(/:\\s*\"([^\"]+)\"/g, ': \"$1\"');\n\n return JSON.stringify(JSON.parse(input));\n};\n\n/**\n * Checks if a given string resembles a JSON object with numeric or quoted keys and values\n * that are single or double quoted. This is useful for detecting malformed JSON-like\n * structures that can be fixed by the `normalizeJsonSyntax` function.\n *\n * @param {string} str - The input string to check.\n * @returns {boolean} - Returns true if the string is JSON-like, false otherwise.\n *\n * @example\n * const result = isJsonStructureValid(\"{10: 'abc', 'key': 'value'}\");\n * console.log(result); // true\n */\nexport const isJsonStructureValid = (str: string) => {\n // Checks for a pattern with numeric keys or quoted keys and values in quotes\n const jsonLikePattern =\n /^{(\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\3\\s*,)*(?:\\s*(\\d+|'[^']*'|\"[^\"]*\")\\s*:\\s*('|\")[^'\"]*\\5\\s*)}$/;\n return jsonLikePattern.test(str.trim());\n};\n\n/**\n * Splits a string by spaces and quoted substrings.\n *\n * This function takes an input string and splits it into parts where substrings\n * enclosed in double quotes are treated as a single part. Other substrings\n * separated by spaces are split normally.\n *\n * @param {string} query - The input string to be split.\n * @returns {string[]} An array of strings, with quoted substrings kept intact.\n *\n * @example\n * const result = splitByQuotes('\"This is\" \"a part of the\" \"string and\"');\n * console.log(result); // [\"This is\", \"a part of the\", \"string and\"]\n */\nexport const splitByQuotes = (query: string): string[] => {\n const regex = /(?:[^\\s\"]+|\"(.*?)\")+/g;\n return (query.match(regex) || []).map((s: string) => (s.startsWith('\"') ? s.slice(1, -1) : s));\n};\n\n/**\n * Checks if all double quotes in a string are balanced (even count).\n * A string has balanced quotes if every opening quote has a corresponding closing quote.\n *\n * @param str - The string to check for balanced quotes\n * @returns True if quotes are balanced (even count), false otherwise\n *\n * @example\n * ```typescript\n * areQuotesBalanced('Hello \"world\"') // Returns: true\n * areQuotesBalanced('Hello \"world') // Returns: false\n * areQuotesBalanced('No quotes') // Returns: true\n * ```\n */\nconst areQuotesBalanced = (str: string) => {\n let quoteCount = 0;\n for (const char of str) {\n if (char === '\"') {\n quoteCount++;\n }\n }\n return quoteCount % 2 === 0;\n};\n\nconst brackets = { '(': ')', '[': ']', '{': '}' };\nconst openBrackets = new Set(['(', '[', '{']);\nconst closeBrackets = new Set([')', ']', '}']);\n\n/**\n * Checks if all brackets in a string are properly balanced and matched.\n * This function validates that every opening bracket has a corresponding closing bracket\n * in the correct order and of the matching type.\n *\n * Supported bracket types: parentheses (), square brackets [], curly braces {}\n *\n * @param str - The string to check for balanced brackets\n * @returns True if all brackets are properly balanced and matched, false otherwise\n *\n * @example\n * ```typescript\n * areBracketsBalanced('(hello [world])') // Returns: true\n * areBracketsBalanced('(hello [world)') // Returns: false (mismatched)\n * areBracketsBalanced('((hello))') // Returns: true\n * areBracketsBalanced('(hello') // Returns: false (unclosed)\n * ```\n */\n\nconst areBracketsBalanced = (str: string) => {\n const stack: string[] = [];\n\n for (const char of str) {\n if (openBrackets.has(char)) {\n stack.push(char);\n } else if (closeBrackets.has(char)) {\n const lastOpen = stack.pop();\n if (!lastOpen || brackets[lastOpen as keyof typeof brackets] !== char) {\n return false;\n }\n }\n }\n\n return stack.length === 0;\n};\n\n/**\n * Checks if both quotes and brackets are balanced in a string.\n * This function combines quote balance checking and bracket balance checking\n * to ensure the entire string has properly balanced punctuation.\n *\n * A string is considered balanced when:\n * - All double quotes have matching pairs (even count)\n * - All brackets (parentheses, square brackets, curly braces) are properly matched and nested\n *\n * @param str - The string to check for balanced quotes and brackets\n * @returns True if both quotes and brackets are balanced, false otherwise\n *\n * @example\n * ```typescript\n * isBalanced('He said \"Hello (world)!\"') // Returns: true\n * isBalanced('He said \"Hello (world!\"') // Returns: false (unbalanced quote)\n * isBalanced('He said \"Hello (world)\"') // Returns: false (unbalanced quote)\n * isBalanced('Hello (world) [test]') // Returns: true\n * ```\n */\nexport const isBalanced = (str: string) => {\n return areQuotesBalanced(str) && areBracketsBalanced(str);\n};\n\n/**\n * Parses page input string into array of page numbers, supporting ranges and lists\n * @param pageInput - Page specification string (e.g., \"1-5\" or \"1,3,5\")\n * @returns Array of page numbers\n * @throws Error when start page exceeds end page in range\n */\nexport const parsePageRanges = (pageInput: string): number[] => {\n if (pageInput.includes('-')) {\n const [start, end] = pageInput.split('-').map(Number);\n\n if (start > end) {\n throw new Error('Start page cannot be greater than end page');\n }\n\n return Array.from({ length: end - start + 1 }, (_, i) => start + i);\n } else {\n return pageInput.split(',').map(Number);\n }\n};\n","import { escapeRegex } from './cleaning';\n\n/**\n * Removes various symbols, part references, and numerical markers from the text.\n * Example: '(1) (2/3)' becomes ''.\n * @param {string} text - The input text to apply the rule to.\n * @returns {string} - The modified text with symbols and part references removed.\n */\nexport const cleanSymbolsAndPartReferences = (text: string) => {\n return text.replace(\n / *\\(?:\\d+(?:\\/\\d+){0,2}\\)? *| *\\[\\d+(?:\\/\\d+)?\\] *| *«\\d+» *|\\d+\\/\\d+(?:\\/\\d+)?|[،§{}۝؍‎﴿﴾<>;_؟»«:!،؛[\\]…ـ¬.\\\\/*()\"]/g,\n ' ',\n );\n};\n\n/**\n * Removes trailing page numbers formatted as '-[46]-' from the text.\n * Example: 'This is some -[46]- text' becomes 'This is some text'.\n * @param {string} text - The input text with trailing page numbers.\n * @returns {string} - The modified text with page numbers removed.\n */\nexport const cleanTrailingPageNumbers = (text: string) => {\n return text.replace(/-\\[\\d+\\]-/g, '');\n};\n\n/**\n * Replaces consecutive line breaks and whitespace characters with a single space.\n * Example: 'a\\nb' becomes 'a b'.\n * @param {string} text - The input text containing line breaks or multiple spaces.\n * @returns {string} - The modified text with spaces.\n */\nexport const replaceLineBreaksWithSpaces = (text: string) => {\n return text.replace(/\\s+/g, ' ');\n};\n\n/**\n * Removes all numeric digits from the text.\n * Example: 'abc123' becomes 'abc'.\n * @param {string} text - The input text containing digits.\n * @returns {string} - The modified text with digits removed.\n */\nexport const stripAllDigits = (text: string) => {\n return text.replace(/[0-9]/g, '');\n};\n\n/**\n * Removes death year references like \"(d. 390H)\" and \"[d. 100h]\" from the text.\n * Example: 'Sufyān ibn ‘Uyaynah (d. 198h)' becomes 'Sufyān ibn ‘Uyaynah'.\n * @param {string} text - The input text containing death year references.\n * @returns {string} - The modified text with death years removed.\n */\nexport const removeDeathYear = (text: string) => {\n return text.replace(/\\[(d)\\.\\s*\\d{1,4}[hH]\\]\\s*|\\((d)\\.\\s*\\d{1,4}[hH]\\)\\s*/g, '');\n};\n\n/**\n * Removes numeric digits and dashes from the text.\n * Example: 'ABC 123-Xyz' becomes 'ABC Xyz'.\n * @param {string} text - The input text containing digits and dashes.\n * @returns {string} - The modified text with numbers and dashes removed.\n */\nexport const removeNumbersAndDashes = (text: string) => {\n return text.replace(/[\\d-]/g, '');\n};\n\n/**\n * Removes single digit references like (1), «2», [3] from the text.\n * Example: 'Ref (1), Ref «2», Ref [3]' becomes 'Ref , Ref , Ref '.\n * @param {string} text - The input text containing single digit references.\n * @returns {string} - The modified text with single digit references removed.\n */\nexport const removeSingleDigitReferences = (text: string) => {\n return text.replace(/\\(\\d{1}\\)|\\[\\d{1}\\]|«\\d»/g, '');\n};\n\n/**\n * Removes URLs from the text.\n * Example: 'Visit https://example.com' becomes 'Visit '.\n * @param {string} text - The input text containing URLs.\n * @returns {string} - The modified text with URLs removed.\n */\nexport const removeUrls = (text: string) => {\n return text.replace(\n /https?:\\/\\/(www\\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,\n '',\n );\n};\n\n/**\n * Removes common Markdown formatting syntax from text\n * @param text - The input text containing Markdown formatting\n * @returns Text with Markdown formatting removed (bold, italics, headers, lists, backticks)\n */\nexport const removeMarkdownFormatting = (text: string) => {\n return (\n text\n // Remove bold first (**text**) - must come before italics\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n // Remove bold with underscores (__text__)\n .replace(/__([^_]+)__/g, '$1')\n // Remove italics (*text*)\n .replace(/\\*([^*]+)\\*/g, '$1')\n // Remove italics with underscores (_text_)\n .replace(/_([^_]+)_/g, '$1')\n // Remove strikethrough (~~text~~)\n .replace(/~~([^~]+)~~/g, '$1')\n // Remove blockquotes\n .replace(/^\\s*>\\s?/gm, '')\n // Remove images ![alt](url)\n .replace(/!\\[[^\\]]*]\\([^)]*\\)/g, '')\n // Convert links [text](url) -> text\n .replace(/\\[([^\\]]+)]\\([^)]*\\)/g, '$1')\n // Remove headers (# ## ### etc.)\n .replace(/^#+\\s*/gm, '')\n // Remove unordered list markers (- * +)\n .replace(/^\\s*[-*+]\\s+/gm, '')\n // Remove ordered list markers (1. 2. etc.)\n .replace(/^\\s*\\d+\\.\\s+/gm, '')\n // Remove backticks\n .replace(/`/gm, '')\n );\n};\n\n/**\n * Truncates a string to a specified length, adding an ellipsis if truncated.\n *\n * @param val - The string to truncate\n * @param n - Maximum length of the string (default: 150)\n * @returns The truncated string with ellipsis if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncate('The quick brown fox jumps over the lazy dog', 20);\n * // Output: 'The quick brown fox…'\n *\n * truncate('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncate = (val: string, n = 150): string => (val.length > n ? `${val.substring(0, n - 1)}…` : val);\n\n/**\n * Truncates a string from the middle, preserving both the beginning and end portions.\n *\n * @param text - The string to truncate\n * @param maxLength - Maximum length of the resulting string (default: 50)\n * @param endLength - Number of characters to preserve at the end (default: 1/3 of maxLength, minimum 3)\n * @returns The truncated string with ellipsis in the middle if needed, otherwise the original string\n *\n * @example\n * ```javascript\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 20);\n * // Output: 'The quick bro…zy dog'\n *\n * truncateMiddle('The quick brown fox jumps right over the lazy dog', 25, 8);\n * // Output: 'The quick brown …lazy dog'\n *\n * truncateMiddle('Short text', 50);\n * // Output: 'Short text'\n * ```\n */\nexport const truncateMiddle = (text: string, maxLength: number = 50, endLength?: number) => {\n if (text.length <= maxLength) {\n return text;\n }\n\n // Default end length is roughly 1/3 of max length, minimum 3 characters\n const defaultEndLength = Math.max(3, Math.floor(maxLength / 3));\n const actualEndLength = endLength ?? defaultEndLength;\n\n // Reserve space for the ellipsis character (1 char)\n const availableLength = maxLength - 1;\n\n // Calculate start length (remaining space after end portion)\n const startLength = availableLength - actualEndLength;\n\n // Ensure we have at least some characters at the start\n if (startLength < 1) {\n // If we can't fit both start and end, just truncate normally\n return `${text.substring(0, maxLength - 1)}…`;\n }\n\n const startPortion = text.substring(0, startLength);\n const endPortion = text.substring(text.length - actualEndLength);\n\n return `${startPortion}…${endPortion}`;\n};\n\n/**\n * Unescapes backslash-escaped spaces and trims whitespace from both ends.\n * Commonly used to clean file paths that have been escaped when pasted into terminals.\n *\n * @param input - The string to unescape and clean\n * @returns The cleaned string with escaped spaces converted to regular spaces and trimmed\n *\n * @example\n * ```javascript\n * unescapeSpaces('My\\\\ Folder\\\\ Name');\n * // Output: 'My Folder Name'\n *\n * unescapeSpaces(' /path/to/My\\\\ Document.txt ');\n * // Output: '/path/to/My Document.txt'\n *\n * unescapeSpaces('regular text');\n * // Output: 'regular text'\n * ```\n */\nexport const unescapeSpaces = (input: string) => input.replace(/\\\\ /g, ' ').trim();\n\n/**\n * Arabic diacritics (Tashkeel/Harakat).\n */\nconst DIACRITICS_CLASS = '[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652]';\n\n/**\n * Groups of equivalent Arabic characters — any character in a group should match\n * any other character in the same group.\n */\nconst EQUIV_GROUPS: string[][] = [\n ['\\u0627', '\\u0622', '\\u0623', '\\u0625'], // ا, آ, أ, إ\n ['\\u0629', '\\u0647'], // ة <-> ه\n ['\\u0649', '\\u064A'], // ى <-> ي\n];\n\n/** Return a character class for a char if it belongs to an equivalence group. */\nconst getEquivClass = (ch: string): string => {\n for (const group of EQUIV_GROUPS) {\n if (group.includes(ch)) {\n // join the group's members into a character class\n return `[${group.map((c) => escapeRegex(c)).join('')}]`;\n }\n }\n // not in equivalence groups -> return escaped character\n return escapeRegex(ch);\n};\n\n/** Small safe normalization: NFC, remove ZWJ/ZWNJ, collapse spaces. */\nconst normalizeArabicLight = (str: string) => {\n return str\n .normalize('NFC')\n .replace(/[\\u200C\\u200D]/g, '') // remove ZWJ/ZWNJ\n .replace(/\\s+/g, ' ')\n .trim();\n};\n\n/**\n * Creates a diacritic-insensitive regex pattern for Arabic text matching.\n * Normalizes text, handles character equivalences (ا/آ/أ/إ, ة/ه, ى/ي),\n * and makes each character tolerant of Arabic diacritics (Tashkeel/Harakat)\n * @param text - Input Arabic text to make diacritic-insensitive\n * @returns Regex pattern string that matches the text with or without diacritics and character variants\n */\nexport const makeDiacriticInsensitive = (text: string) => {\n const diacriticsMatcher = `${DIACRITICS_CLASS}*`;\n const norm = normalizeArabicLight(text);\n // Use Array.from to iterate grapheme-safe over the string (works fine for Arabic letters)\n return Array.from(norm)\n .map((ch) => getEquivClass(ch) + diacriticsMatcher)\n .join('');\n};\n","import { normalizeSpaces } from './formatting';\n\n/**\n * Replaces common Arabic prefixes (like 'Al-', 'Ar-', 'Ash-', etc.) with 'al-' in the text.\n * Handles different variations of prefixes such as Ash- and Al- but not when the second word\n * does not start with 'S'.\n * Example: 'Ash-Shafiee' becomes 'al-Shafiee'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with standardized 'al-' prefixes.\n */\nexport const normalizeArabicPrefixesToAl = (text: string) => {\n return text\n .replace(/(\\b|\\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g, '$1al-')\n .replace(/(\\b|\\W)(Ash-S|ash-S)/g, '$1al-S')\n .replace(/al- (.+?)\\b/g, 'al-$1');\n};\n\n/**\n * Removes double occurrences of Arabic apostrophes such as ʿʿ or ʾʾ in the text.\n * Example: 'ʿulamāʾʾ' becomes 'ʿulamāʾ'.\n *\n * @param {string} text - The input text containing double apostrophes.\n * @returns {string} - The modified text with condensed apostrophes.\n */\nexport const normalizeDoubleApostrophes = (text: string) => {\n return text.replace(/ʿʿ/g, 'ʿ').replace(/ʾʾ/g, 'ʾ');\n};\n\n/**\n * Replaces common salutations such as \"sallahu alayhi wasallam\" with \"ﷺ\" in the text.\n * It also handles variations of the salutation phrase, including 'peace and blessings be upon him'.\n * Example: 'Then Muḥammad (sallahu alayhi wasallam)' becomes 'Then Muḥammad ﷺ'.\n *\n * @param {string} text - The input text containing salutations.\n * @returns {string} - The modified text with salutations replaced.\n */\nexport const replaceSalutationsWithSymbol = (text: string) => {\n return text\n .replace(\n /\\(peace be upon him\\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\\)*/gi,\n '$1 ﷺ',\n )\n .replace(/,\\s*ﷺ\\s*,/g, ' ﷺ');\n};\n\n/**\n * Normalizes the text by removing diacritics, apostrophes, and dashes.\n * Example: 'Al-Jadwal' becomes 'AlJadwal'.\n *\n * @param {string} input - The input text to normalize.\n * @returns {string} - The normalized text.\n */\nexport const normalize = (input: string) => {\n return input\n .normalize('NFKD')\n .replace(/[\\u0300-\\u036f]/g, '')\n .replace(/`|ʾ|ʿ|-/g, '');\n};\n\n/**\n * Strips common Arabic prefixes like 'al-', 'bi-', 'fī', 'wa-', etc. from the beginning of words.\n * Example: 'al-Bukhari' becomes 'Bukhari'.\n *\n * @param {string} text - The input text containing Arabic prefixes.\n * @returns {string} - The modified text with prefixes stripped.\n */\nexport const removeArabicPrefixes = (text: string) => {\n return normalizeSpaces(text.replace(/(\\bal-|\\bli-|\\bbi-|\\bfī|\\bwa[-\\s]+|\\bl-|\\bliʿl|\\Bʿalá|\\Bʿan|\\bb\\.)/gi, ''));\n};\n\n/**\n * Simplifies English transliterations by removing diacritics, apostrophes, and common prefixes.\n * Example: 'Al-Jadwal' becomes 'Jadwal', and 'āḍġḥīṣṭū' becomes 'adghistu'.\n *\n * @param {string} text - The input text to simplify.\n * @returns {string} - The simplified text.\n */\nexport const normalizeTransliteratedEnglish = (text: string) => normalize(removeArabicPrefixes(text));\n\n/**\n * Extracts the initials from the input string, typically used for names or titles.\n * Example: 'Nayl al-Awtar' becomes 'NA'.\n *\n * @param {string} text - The input text to extract initials from.\n * @returns {string} - The extracted initials.\n */\nexport const extractInitials = (fullName: string) => {\n const initials = normalizeTransliteratedEnglish(fullName)\n .trim()\n .split(/[ -]/)\n .slice(0, 2)\n .map((word) => {\n return word.charAt(0).toUpperCase();\n })\n .join('');\n return initials;\n};\n"],"mappings":"AAuBO,IAAMA,EAAyBC,GAC3B,SACHA,EAAO,QAAQ,mBAAqBC,IAAOA,EAAE,WAAW,CAAC,EAAI,MAAQ,SAAS,CAAC,EAC/E,EACJ,EAUSC,EAAiCC,GACnCA,EAAK,QAAQ,qCAAsC,EAAE,EASnDC,EAA8BD,GAChCA,EAAK,QAAQ,KAAM,QAAG,EAAE,QAAQ,KAAM,QAAG,EASvCE,EAAkBF,GAAiB,CAC5C,GAAI,CAACA,EACD,MAAO,GAGX,IAAMG,EAAuB,uEAEvBC,EAAkB,mCAElBC,EAAsB,sCACtBC,EAAUN,EAAK,QAAQI,EAAiB,EAAE,EAC1CG,EAAgBD,EAAQ,MAAMH,CAAoB,GAAK,CAAC,EACxDK,EAAeF,EAAQ,MAAMD,CAAmB,GAAK,CAAC,EAC5D,OAAOG,EAAa,SAAW,EAAI,EAAID,EAAc,OAASC,EAAa,MAC/E,EASaC,EAAkBT,GACpBA,EAAK,QAAQ,OAAQ,SAAI,EASvBU,EAAuCV,GACzCA,EAAK,QAAQ,2BAA4B,OAAO,EAS9CW,EAA4BX,GAC9BA,EACF,QAAQ,iEAAkE,GAAG,EAC7E,QAAQ,8DAA+D,GAAG,EAStEY,EAAuBZ,GACzBA,EAAK,QAAQ,2CAA4C,EAAE,EASzDa,EAA+Bb,GACjCA,EAAK,QAAQ,6BAA8B,GAAG,EAS5Cc,EAAuCd,GACzCA,EACF,QAAQ,UAAW,QAAG,EACtB,QAAQ,oBAAqB,QAAG,EAChC,QAAQ,QAAS,QAAG,ECvI7B,IAAMe,EAAmB,yDAEnBC,EAAgB,UAQTC,EAAeC,GAAsBA,EAAE,QAAQ,sBAAuB,MAAM,EA+D5EC,EAAgC,CAACC,EAAgBC,EAAyB,CAAC,IAAc,CAClG,GAAM,CACF,aAAAC,EAAe,CAAE,KAAM,GAAM,aAAc,GAAM,eAAgB,EAAK,EACtE,aAAAC,EAAe,GACf,iBAAAC,EAAmB,GACnB,eAAAC,EAAiB,GACjB,MAAAC,EAAQ,GACZ,EAAIL,EAGJ,GAAID,EAAO,OAAS,IAChB,MAAM,IAAI,MAAM,gDAAgD,EAGpE,IAAMO,EAAaC,GAAuB,CACtC,OAAQA,EAAI,CACR,IAAK,SACL,IAAK,SACL,IAAK,SACL,IAAK,SACD,OAAON,EAAa,KAAO,6BAAW,SAC1C,IAAK,SACL,IAAK,SACD,OAAOA,EAAa,aAAe,iBAASL,EAAYW,CAAE,EAC9D,IAAK,SACL,IAAK,SACD,OAAON,EAAa,eAAiB,iBAASL,EAAYW,CAAE,EAChE,QACI,OAAOX,EAAYW,CAAE,CAC7B,CACJ,EAEMC,EAAQ,GAAGL,EAAmB,GAAGT,CAAgB,IAAM,EAAE,GAAGQ,EAAe,GAAGP,CAAa,IAAM,EAAE,GAErGc,EAAU,GACd,QAAWF,KAAM,MAAM,KAAKR,CAAM,EAC1B,KAAK,KAAKQ,CAAE,EACZE,GAAWL,EAAiB,OAAS,OAErCK,GAAW,GAAGH,EAAUC,CAAE,CAAC,GAAGC,CAAK,GAI3C,OAAO,IAAI,OAAOC,EAASJ,CAAK,CACpC,EChHO,IAAMK,EAAoCC,GAAiB,CAE9D,IAAMC,EAAc,YAKpB,OAFsBD,EAAK,QAAQC,EAAa;AAAA,CAAM,EAAE,QAAQ,SAAU;AAAA,CAAI,EAAE,KAAK,CAGzF,EAQaC,EAAqCF,GACvCA,EACF,QAAQ,iDAAkD,OAAO,EACjE,QAAQ,mCAAoC,MAAM,EAClD,QAAQ,uDAAwD,QAAQ,EACxE,QAAQ,yEAA0E,IAAI,EAUlFG,EAAoBH,GACtBA,EACF,QAAQ,QAAS,GAAG,EACpB,QAAQ,aAAc,gBAAM,EAC5B,QAAQ,MAAO,QAAG,EASdI,EAAwBJ,GAC1BA,EAAK,QAAQ,UAAW;AAAA,CAAI,EAS1BK,EAAmBL,GACrBA,EAAK,QAAQ,YAAa,EAAE,EAQ1BM,EAAuBN,GACzB,gBAAgB,KAAKA,CAAI,EAQvBO,EAAqBP,GAChB,kEACD,KAAKA,CAAI,EASbQ,EAA2BR,GAC7BA,EAAK,QAAQ,mBAAoB,IAAI,EASnCS,EAAqBT,GACvBA,EAAK,QAAQ,YAAa,GAAG,EAS3BU,EAAkBV,GACpBA,EAAK,QAAQ,eAAgB,GAAG,EAS9BW,EAAkBX,GACpBA,EAAK,QAAQ,SAAU,GAAG,EASxBY,EAAoBZ,GACtBA,EAAK,QAAQ,UAAW,QAAG,EASzBa,GAAiCb,GACnCA,EAAK,QAAQ,eAAgB;AAAA;AAAA,CAAM,EASjCc,GAAiCd,GACnCA,EAAK,QAAQ,eAAgB;AAAA,CAAI,EAS/Be,GAAmBf,GACrBA,EAAK,QAAQ,UAAW,GAAG,EASzBgB,GAAuBhB,GACzBA,EAAK,QAAQ,SAAU,QAAG,EAAE,QAAQ,MAAO,GAAG,EAS5CiB,GAA0BjB,GAC5BA,EAAK,QAAQ,2BAA4B,MAAM,EAS7CkB,GAA6BlB,GAC/BA,EAAK,QAAQ,qBAAsB,OAAO,EASxCmB,GAA2BnB,GAC7BA,EAAK,QAAQ,mBAAoB,OAAO,EAStCoB,GAAmBpB,GAExBA,EACK,QAAQ,aAAc,MAAG,EACzB,QAAQ,aAAc,MAAG,EAEzB,QAAQ,6BAA8B,MAAM,EAE5C,QAAQ,6BAA8B,MAAM,EAU5CqB,GAAkBrB,GAAiB,CAE5C,IAAIsB,EAAStB,EAGb,OAAAsB,EAASA,EAAO,QAAQ,kBAAmB,MAAM,EAG1CA,EAAO,QAAQ,kBAAmB,MAAM,CACnD,EAQaC,GAA+BvB,GAEpCA,EAEK,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,eAAgB,YAAM,EAE9B,QAAQ,sBAAuB,YAAM,EAWrCwB,GAA0BC,GAAkB,CACrD,IAAMC,EAAgB,mCAChBC,EAAsB,CAAC,EACvBC,EAAQH,EAAM,MAAM;AAAA,CAAI,EAC1BI,EAAkB,GAEtB,OAAAD,EAAM,QAASE,GAAS,CACpB,IAAMC,EAAcD,EAAK,KAAK,EACxBE,EAAaN,EAAc,KAAKK,CAAW,EAC3CE,EAAW,gBAAgB,KAAKF,CAAW,EAEjD,GAAIC,GAAc,CAACC,EACXJ,IACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,IAEtBF,EAAU,KAAKI,CAAW,MACvB,CACHF,GAAmB,GAAGE,CAAW,IACjC,IAAMG,EAAWL,EAAgB,KAAK,EAAE,MAAM,EAAE,EAC5C,QAAQ,KAAKK,CAAQ,IACrBP,EAAU,KAAKE,EAAgB,KAAK,CAAC,EACrCA,EAAkB,GAE1B,CACJ,CAAC,EAGGA,GACAF,EAAU,KAAKE,EAAgB,KAAK,CAAC,EAGlCF,EAAU,KAAK;AAAA,CAAI,CAC9B,EAOaQ,GAAkBnC,GAAiB,CAG5C,IAAMoC,EAAcpC,EAAK,QAAQ,aAAc,EAAE,EAGjD,OAAIoC,EAAY,SAAW,EAChB,GAGJA,IAAgBA,EAAY,YAAY,CACnD,EAQaC,GAA8BrC,GAChCA,EAAK,QAAQ,sBAAuB,OAAO,EASzCsC,EAAmBtC,GACrBA,EAAK,QAAQ,UAAW,GAAG,EAmBzBuC,GAA8BvC,GAChCA,EAAK,QAAQ,cAAe,IAAI,EAS9BwC,GAA6BxC,GAC/BA,EAAK,QAAQ,4BAA6B,QAAQ,EAShDyC,GAAmCzC,GACrCA,EAAK,QAAQ,WAAY,MAAG,EAAE,QAAQ,WAAY,MAAG,EAQnD0C,EAAoB1C,GAEJA,EAAK,UAAU,MAAM,EAGtB,QAAQ,mBAAoB,EAAE,EAAE,KAAK,EASpD2C,EAAuB3C,GAAiB,CACjD,IAAM4C,EAAoC,CACtC,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAa,IACb,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,IAChB,YAAgB,GACpB,EAEA,OAAO5C,EAAK,QAAQ,4EAA8E6C,GACvFD,EAAUC,CAAK,GAAKA,CAC9B,CACL,EAOaC,GAAgB9C,GAClB2C,EAAoBD,EAAiB1C,CAAI,CAAC,EAQxC+C,GAAeC,GACjBA,EACF,YAAY,EACZ,MAAM,GAAG,EACT,IAAKC,GAAS,CACX,GAAIA,EAAK,SAAW,EAAG,OAAOA,EAE9B,IAAMJ,EAAQI,EAAK,MAAM,QAAQ,EACjC,GAAI,CAACJ,GAASA,EAAM,QAAU,OAAW,OAAOI,EAChD,IAAMC,EAAIL,EAAM,MAChB,OAAOI,EAAK,MAAM,EAAGC,CAAC,EAAID,EAAK,OAAOC,CAAC,EAAE,YAAY,EAAID,EAAK,MAAMC,EAAI,CAAC,CAC7E,CAAC,EACA,KAAK,GAAG,EASJC,GAAyBnD,GAC3BA,EAAK,QAAQ,+BAAgC,QAAQ,ECrezD,IAAMoD,GAAuBC,GAAgB,CAChD,IAAIC,EAAQD,EAAI,QAAQ,kBAAmB,MAAM,EACjD,OAAAC,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAChDA,EAAQA,EAAM,QAAQ,iBAAkB,QAAQ,EAEzC,KAAK,UAAU,KAAK,MAAMA,CAAK,CAAC,CAC3C,EAcaC,GAAwBF,GAG7B,6GACmB,KAAKA,EAAI,KAAK,CAAC,EAiB7BG,GAAiBC,GAA4B,CACtD,IAAMC,EAAQ,wBACd,OAAQD,EAAM,MAAMC,CAAK,GAAK,CAAC,GAAG,IAAKC,GAAeA,EAAE,WAAW,GAAG,EAAIA,EAAE,MAAM,EAAG,EAAE,EAAIA,CAAE,CACjG,EAgBMC,EAAqBP,GAAgB,CACvC,IAAIQ,EAAa,EACjB,QAAWC,KAAQT,EACXS,IAAS,KACTD,IAGR,OAAOA,EAAa,IAAM,CAC9B,EAEME,EAAW,CAAE,IAAK,IAAK,IAAK,IAAK,IAAK,GAAI,EAC1CC,EAAe,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EACtCC,EAAgB,IAAI,IAAI,CAAC,IAAK,IAAK,GAAG,CAAC,EAqBvCC,EAAuBb,GAAgB,CACzC,IAAMc,EAAkB,CAAC,EAEzB,QAAWL,KAAQT,EACf,GAAIW,EAAa,IAAIF,CAAI,EACrBK,EAAM,KAAKL,CAAI,UACRG,EAAc,IAAIH,CAAI,EAAG,CAChC,IAAMM,EAAWD,EAAM,IAAI,EAC3B,GAAI,CAACC,GAAYL,EAASK,CAAiC,IAAMN,EAC7D,MAAO,EAEf,CAGJ,OAAOK,EAAM,SAAW,CAC5B,EAsBaE,GAAchB,GAChBO,EAAkBP,CAAG,GAAKa,EAAoBb,CAAG,EAS/CiB,GAAmBC,GAAgC,CAC5D,GAAIA,EAAU,SAAS,GAAG,EAAG,CACzB,GAAM,CAACC,EAAOC,CAAG,EAAIF,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,EAEpD,GAAIC,EAAQC,EACR,MAAM,IAAI,MAAM,4CAA4C,EAGhE,OAAO,MAAM,KAAK,CAAE,OAAQA,EAAMD,EAAQ,CAAE,EAAG,CAACE,EAAGC,IAAMH,EAAQG,CAAC,CACtE,KACI,QAAOJ,EAAU,MAAM,GAAG,EAAE,IAAI,MAAM,CAE9C,EC5JO,IAAMK,GAAiCC,GACnCA,EAAK,QACR,wHACA,GACJ,EASSC,GAA4BD,GAC9BA,EAAK,QAAQ,aAAc,EAAE,EAS3BE,GAA+BF,GACjCA,EAAK,QAAQ,OAAQ,GAAG,EAStBG,GAAkBH,GACpBA,EAAK,QAAQ,SAAU,EAAE,EASvBI,GAAmBJ,GACrBA,EAAK,QAAQ,yDAA0D,EAAE,EASvEK,GAA0BL,GAC5BA,EAAK,QAAQ,SAAU,EAAE,EASvBM,GAA+BN,GACjCA,EAAK,QAAQ,4BAA6B,EAAE,EAS1CO,GAAcP,GAChBA,EAAK,QACR,uGACA,EACJ,EAQSQ,GAA4BR,GAEjCA,EAEK,QAAQ,mBAAoB,IAAI,EAEhC,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,IAAI,EAE1B,QAAQ,eAAgB,IAAI,EAE5B,QAAQ,aAAc,EAAE,EAExB,QAAQ,uBAAwB,EAAE,EAElC,QAAQ,wBAAyB,IAAI,EAErC,QAAQ,WAAY,EAAE,EAEtB,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,iBAAkB,EAAE,EAE5B,QAAQ,MAAO,EAAE,EAoBjBS,GAAW,CAACC,EAAaC,EAAI,MAAiBD,EAAI,OAASC,EAAI,GAAGD,EAAI,UAAU,EAAGC,EAAI,CAAC,CAAC,SAAMD,EAsB/FE,GAAiB,CAACZ,EAAca,EAAoB,GAAIC,IAAuB,CACxF,GAAId,EAAK,QAAUa,EACf,OAAOb,EAIX,IAAMe,EAAmB,KAAK,IAAI,EAAG,KAAK,MAAMF,EAAY,CAAC,CAAC,EACxDG,EAAkBF,GAAaC,EAM/BE,EAHkBJ,EAAY,EAGEG,EAGtC,GAAIC,EAAc,EAEd,MAAO,GAAGjB,EAAK,UAAU,EAAGa,EAAY,CAAC,CAAC,SAG9C,IAAMK,EAAelB,EAAK,UAAU,EAAGiB,CAAW,EAC5CE,EAAanB,EAAK,UAAUA,EAAK,OAASgB,CAAe,EAE/D,MAAO,GAAGE,CAAY,SAAIC,CAAU,EACxC,EAqBaC,GAAkBC,GAAkBA,EAAM,QAAQ,OAAQ,GAAG,EAAE,KAAK,EAK3EC,EAAmB,qDAMnBC,EAA2B,CAC7B,CAAC,SAAU,SAAU,SAAU,QAAQ,EACvC,CAAC,SAAU,QAAQ,EACnB,CAAC,SAAU,QAAQ,CACvB,EAGMC,EAAiBC,GAAuB,CAC1C,QAAWC,KAASH,EAChB,GAAIG,EAAM,SAASD,CAAE,EAEjB,MAAO,IAAIC,EAAM,IAAKC,GAAMC,EAAYD,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,IAI5D,OAAOC,EAAYH,CAAE,CACzB,EAGMI,EAAwBC,GACnBA,EACF,UAAU,KAAK,EACf,QAAQ,kBAAmB,EAAE,EAC7B,QAAQ,OAAQ,GAAG,EACnB,KAAK,EAUDC,GAA4B/B,GAAiB,CACtD,IAAMgC,EAAoB,GAAGV,CAAgB,IACvCW,EAAOJ,EAAqB7B,CAAI,EAEtC,OAAO,MAAM,KAAKiC,CAAI,EACjB,IAAKR,GAAOD,EAAcC,CAAE,EAAIO,CAAiB,EACjD,KAAK,EAAE,CAChB,ECxPO,IAAME,GAA+BC,GACjCA,EACF,QAAQ,6EAA8E,OAAO,EAC7F,QAAQ,wBAAyB,QAAQ,EACzC,QAAQ,eAAgB,OAAO,EAU3BC,GAA8BD,GAChCA,EAAK,QAAQ,MAAO,QAAG,EAAE,QAAQ,MAAO,QAAG,EAWzCE,GAAgCF,GAClCA,EACF,QACG,2IACA,WACJ,EACC,QAAQ,aAAc,SAAI,EAUtBG,EAAaC,GACfA,EACF,UAAU,MAAM,EAChB,QAAQ,mBAAoB,EAAE,EAC9B,QAAQ,WAAY,EAAE,EAUlBC,EAAwBL,GAC1BM,EAAgBN,EAAK,QAAQ,uEAAwE,EAAE,CAAC,EAUtGO,EAAkCP,GAAiBG,EAAUE,EAAqBL,CAAI,CAAC,EASvFQ,GAAmBC,GACXF,EAA+BE,CAAQ,EACnD,KAAK,EACL,MAAM,MAAM,EACZ,MAAM,EAAG,CAAC,EACV,IAAKC,GACKA,EAAK,OAAO,CAAC,EAAE,YAAY,CACrC,EACA,KAAK,EAAE","names":["arabicNumeralToNumber","arabic","c","cleanExtremeArabicUnderscores","text","convertUrduSymbolsToArabic","getArabicScore","arabicLettersPattern","allDigitPattern","countedCharsPattern","cleaned","arabicMatches","totalMatches","fixTrailingWow","addSpaceBetweenArabicTextAndNumbers","removeNonIndexSignatures","removeSingularCodes","removeSolitaryArabicLetters","replaceEnglishPunctuationWithArabic","DIACRITICS_CLASS","TATWEEL_CLASS","escapeRegex","s","makeDiacriticInsensitiveRegex","needle","opts","equivalences","allowTatweel","ignoreDiacritics","flexWhitespace","flags","charClass","ch","after","pattern","insertLineBreaksAfterPunctuation","text","punctuation","addSpaceBeforeAndAfterPunctuation","applySmartQuotes","cleanLiteralNewLines","cleanMultilines","hasWordInSingleLine","isOnlyPunctuation","cleanSpacesBeforePeriod","condenseAsterisks","condenseColons","condenseDashes","condenseEllipsis","reduceMultilineBreaksToDouble","reduceMultilineBreaksToSingle","condensePeriods","condenseUnderscores","doubleToSingleBrackets","ensureSpaceBeforeBrackets","ensureSpaceBeforeQuotes","fixBracketTypos","fixCurlyBraces","result","fixMismatchedQuotationMarks","formatStringBySentence","input","footnoteRegex","sentences","lines","currentSentence","line","trimmedLine","isFootnote","isNumber","lastChar","isAllUppercase","lettersOnly","normalizeSlashInReferences","normalizeSpaces","removeRedundantPunctuation","removeSpaceInsideBrackets","replaceDoubleBracketsWithArrows","stripBoldStyling","stripItalicsStyling","italicMap","match","stripStyling","toTitleCase","str","word","i","trimSpaceInsideQuotes","normalizeJsonSyntax","str","input","isJsonStructureValid","splitByQuotes","query","regex","s","areQuotesBalanced","quoteCount","char","brackets","openBrackets","closeBrackets","areBracketsBalanced","stack","lastOpen","isBalanced","parsePageRanges","pageInput","start","end","_","i","cleanSymbolsAndPartReferences","text","cleanTrailingPageNumbers","replaceLineBreaksWithSpaces","stripAllDigits","removeDeathYear","removeNumbersAndDashes","removeSingleDigitReferences","removeUrls","removeMarkdownFormatting","truncate","val","n","truncateMiddle","maxLength","endLength","defaultEndLength","actualEndLength","startLength","startPortion","endPortion","unescapeSpaces","input","DIACRITICS_CLASS","EQUIV_GROUPS","getEquivClass","ch","group","c","escapeRegex","normalizeArabicLight","str","makeDiacriticInsensitive","diacriticsMatcher","norm","normalizeArabicPrefixesToAl","text","normalizeDoubleApostrophes","replaceSalutationsWithSymbol","normalize","input","removeArabicPrefixes","normalizeSpaces","normalizeTransliteratedEnglish","extractInitials","fullName","word"]}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "bitaboom",
3
3
  "description": "Use string utils library to format Arabic and English translations.",
4
- "version": "1.4.1",
4
+ "version": "2.0.0",
5
5
  "author": "Ragaeeb Haq",
6
6
  "license": "MIT",
7
7
  "private": false,
@@ -22,29 +22,23 @@
22
22
  "node": ">=22.0.0"
23
23
  },
24
24
  "files": [
25
- "dist/index.js",
26
- "dist/index.js.map",
27
- "dist/*.d.ts"
25
+ "dist/**"
28
26
  ],
29
27
  "scripts": {
30
28
  "build": "tsup",
31
- "test": "bun test --coverage --coverage-reporter=lcov"
29
+ "lint": "biome check --write .",
30
+ "format": "biome format --write .",
31
+ "lint:ci": "biome ci"
32
32
  },
33
33
  "keywords": [
34
34
  "formatting",
35
35
  "arabic"
36
36
  ],
37
37
  "devDependencies": {
38
- "@eslint/js": "^9.34.0",
38
+ "@biomejs/biome": "^2.2.4",
39
39
  "@types/bun": "^1.2.21",
40
- "@types/node": "^24.3.0",
41
- "eslint": "^9.34.0",
42
- "eslint-plugin-perfectionist": "^4.15.0",
43
- "eslint-plugin-prettier": "^5.5.4",
44
- "globals": "^16.3.0",
45
- "prettier": "^3.6.2",
46
- "semantic-release": "^24.2.7",
47
- "tsup": "^8.5.0",
48
- "typescript-eslint": "^8.42.0"
40
+ "@types/node": "^24.3.3",
41
+ "semantic-release": "^24.2.8",
42
+ "tsup": "^8.5.0"
49
43
  }
50
44
  }