bitaboom 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +224 -57
- package/dist/index.d.ts +150 -78
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -0
- package/package.json +23 -26
- package/dist/main.js +0 -502
- package/dist/main.js.map +0 -1
package/dist/index.d.ts
CHANGED
|
@@ -5,14 +5,21 @@
|
|
|
5
5
|
* @param {string} text - The input text to apply the rule to.
|
|
6
6
|
* @returns {string} - The modified text with extreme underscores removed.
|
|
7
7
|
*/
|
|
8
|
-
|
|
8
|
+
declare const cleanExtremeArabicUnderscores: (text: string) => string;
|
|
9
9
|
/**
|
|
10
10
|
* Converts Urdu symbols to their Arabic equivalents.
|
|
11
11
|
* Example: 'ھذا' will be changed to 'هذا', 'ی' to 'ي'.
|
|
12
12
|
* @param {string} text - The input text containing Urdu symbols.
|
|
13
13
|
* @returns {string} - The modified text with Urdu symbols converted to Arabic symbols.
|
|
14
14
|
*/
|
|
15
|
-
|
|
15
|
+
declare const convertUrduSymbolsToArabic: (text: string) => string;
|
|
16
|
+
/**
|
|
17
|
+
* Calculates the proportion of Arabic characters in text relative to total non-whitespace, non-digit characters.
|
|
18
|
+
* Digits (ASCII and Arabic-Indic variants) are excluded from both numerator and denominator.
|
|
19
|
+
* @param text - The input text to analyze
|
|
20
|
+
* @returns A decimal between 0-1 representing the Arabic character ratio (0 = no Arabic, 1 = all Arabic)
|
|
21
|
+
*/
|
|
22
|
+
declare const getArabicScore: (text: string) => number;
|
|
16
23
|
/**
|
|
17
24
|
* Fixes the trailing "و" (wow) in phrases such as "عليكم و رحمة" to "عليكم ورحمة".
|
|
18
25
|
* This function attempts to correct phrases where "و" appears unnecessarily, particularly in greetings.
|
|
@@ -20,105 +27,106 @@ export const convertUrduSymbolsToArabic: (text: string) => string;
|
|
|
20
27
|
* @param {string} text - The input text containing the "و" character.
|
|
21
28
|
* @returns {string} - The modified text with unnecessary trailing "و" characters corrected.
|
|
22
29
|
*/
|
|
23
|
-
|
|
30
|
+
declare const fixTrailingWow: (text: string) => string;
|
|
24
31
|
/**
|
|
25
32
|
* Inserts a space between Arabic text and numbers.
|
|
26
33
|
* Example: 'الآية37' will be changed to 'الآية 37'.
|
|
27
34
|
* @param {string} text - The input text containing Arabic text followed by numbers.
|
|
28
35
|
* @returns {string} - The modified text with spaces inserted between Arabic text and numbers.
|
|
29
36
|
*/
|
|
30
|
-
|
|
37
|
+
declare const addSpaceBetweenArabicTextAndNumbers: (text: string) => string;
|
|
31
38
|
/**
|
|
32
39
|
* Removes English letters and symbols from the text, including ampersands, slashes, and other symbols.
|
|
33
40
|
* Example: 'أحب & لنفسي' will be changed to 'أحب لنفسي'.
|
|
34
41
|
* @param {string} text - The input text containing English letters and symbols.
|
|
35
42
|
* @returns {string} - The modified text with English letters and symbols removed.
|
|
36
43
|
*/
|
|
37
|
-
|
|
44
|
+
declare const stripEnglishCharactersAndSymbols: (text: string) => string;
|
|
38
45
|
/**
|
|
39
46
|
* Removes single-digit numbers surrounded by Arabic text. Also removes dashes (-) not followed by a number.
|
|
40
47
|
* For example, removes '3' from 'وهب 3 وقال' but does not remove '121' from 'لوحه 121 الجرح'.
|
|
41
48
|
* @param {string} text - The input text to apply the rule to.
|
|
42
49
|
* @returns {string} - The modified text with non-index numbers and dashes removed.
|
|
43
50
|
*/
|
|
44
|
-
|
|
51
|
+
declare const removeNonIndexSignatures: (text: string) => string;
|
|
45
52
|
/**
|
|
46
53
|
* Removes characters enclosed in square brackets [] or parentheses () if they are Arabic letters or Arabic-Indic numerals.
|
|
47
54
|
* Example: '[س]' or '(س)' will be removed.
|
|
48
55
|
* @param {string} text - The input text to apply the rule to.
|
|
49
56
|
* @returns {string} - The modified text with singular codes removed.
|
|
50
57
|
*/
|
|
51
|
-
|
|
58
|
+
declare const removeSingularCodes: (text: string) => string;
|
|
52
59
|
/**
|
|
53
60
|
* Removes solitary Arabic letters unless they are the 'ha' letter, which is used in Hijri years.
|
|
54
61
|
* Example: "ب ا الكلمات ت" will be changed to "ا الكلمات".
|
|
55
62
|
* @param {string} text - The input text to apply the rule to.
|
|
56
63
|
* @returns {string} - The modified text with solitary Arabic letters removed.
|
|
57
64
|
*/
|
|
58
|
-
|
|
65
|
+
declare const removeSolitaryArabicLetters: (text: string) => string;
|
|
59
66
|
/**
|
|
60
67
|
* Removes tatweel characters while preserving dates references and numbered list items.
|
|
61
68
|
* Example: "1435/3/29 هـ" remains as "1435/3/29 هـ" but "أبـــتِـــكَةُ" becomes "أبتِكَةُ"
|
|
62
69
|
* @param text The text to format.
|
|
63
70
|
* @returns The modified text with the tatweel characters removed.
|
|
64
71
|
*/
|
|
65
|
-
|
|
72
|
+
declare const removeTatwil: (text: string) => string;
|
|
66
73
|
/**
|
|
67
74
|
* Replaces the 'tah marbutah' (ة) character with 'ha' (ه).
|
|
68
75
|
* Example: 'مدرسة' will be changed to 'مدرسه'.
|
|
69
76
|
* @param {string} text - The input text to apply the rule to.
|
|
70
77
|
* @returns {string} - The modified text with 'ta marbutah' replaced by 'ha'.
|
|
71
78
|
*/
|
|
72
|
-
|
|
79
|
+
declare const replaceTaMarbutahWithHa: (text: string) => string;
|
|
73
80
|
/**
|
|
74
81
|
* Removes Arabic diacritics (tashkeel) and the tatweel (elongation) character.
|
|
75
82
|
* Example: 'مُحَمَّدٌ' will be changed to 'محمد'.
|
|
76
83
|
* @param {string} text - The input text to apply the rule to.
|
|
77
84
|
* @returns {string} - The modified text with diacritics and tatweel removed.
|
|
78
85
|
*/
|
|
79
|
-
|
|
86
|
+
declare const stripDiacritics: (text: string) => string;
|
|
80
87
|
/**
|
|
81
88
|
* Removes zero-width joiners (ZWJ) and other zero-width characters from the input text.
|
|
82
89
|
* Zero-width characters include U+200B to U+200F, U+202A to U+202E, U+2060 to U+2064, and U+FEFF.
|
|
83
90
|
* @param {string} text - The input text to apply the rule to.
|
|
84
91
|
* @returns {string} - The modified text with zero-width characters removed.
|
|
85
92
|
*/
|
|
86
|
-
|
|
93
|
+
declare const stripZeroWidthCharacters: (text: string) => string;
|
|
87
94
|
/**
|
|
88
95
|
* Replaces the 'alif maqsurah' (ى) character with the regular 'ya' (ي).
|
|
89
96
|
* Example: 'رؤيى' will be changed to 'رؤيي'.
|
|
90
97
|
* @param {string} text - The input text to apply the rule to.
|
|
91
98
|
* @returns {string} - The modified text with 'alif maqsurah' replaced by 'ya'.
|
|
92
99
|
*/
|
|
93
|
-
|
|
100
|
+
declare const replaceAlifMaqsurah: (text: string) => string;
|
|
94
101
|
/**
|
|
95
102
|
* Replaces English punctuation (question mark and semicolon) with their Arabic equivalents.
|
|
96
103
|
* Example: '?' will be replaced with '؟', and ';' with '؛'.
|
|
97
104
|
* @param {string} text - The input text to apply the rule to.
|
|
98
105
|
* @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.
|
|
99
106
|
*/
|
|
100
|
-
|
|
107
|
+
declare const replaceEnglishPunctuationWithArabic: (text: string) => string;
|
|
101
108
|
/**
|
|
102
109
|
* Simplifies all forms of 'alif' (أ, إ, and آ) to the basic 'ا'.
|
|
103
110
|
* Example: 'أنا إلى الآفاق' will be changed to 'انا الى الافاق'.
|
|
104
111
|
* @param {string} text - The input text to apply the rule to.
|
|
105
112
|
* @returns {string} - The modified text with simplified 'alif' characters.
|
|
106
113
|
*/
|
|
107
|
-
|
|
114
|
+
declare const normalizeAlifVariants: (text: string) => string;
|
|
115
|
+
|
|
108
116
|
/**
|
|
109
117
|
* Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.
|
|
110
118
|
* Example: 'Text.' becomes 'Text.\n'.
|
|
111
119
|
* @param {string} text - The input text containing punctuation.
|
|
112
120
|
* @returns {string} - The modified text with line breaks added after punctuation.
|
|
113
121
|
*/
|
|
114
|
-
|
|
122
|
+
declare const insertLineBreaksAfterPunctuation: (text: string) => string;
|
|
115
123
|
/**
|
|
116
124
|
* Adds spaces before and after punctuation, except for certain cases like quoted text or ayah references.
|
|
117
125
|
* Example: 'Text,word' becomes 'Text, word'.
|
|
118
126
|
* @param {string} text - The input text containing punctuation.
|
|
119
127
|
* @returns {string} - The modified text with spaces added before and after punctuation.
|
|
120
128
|
*/
|
|
121
|
-
|
|
129
|
+
declare const addSpaceBeforeAndAfterPunctuation: (text: string) => string;
|
|
122
130
|
/**
|
|
123
131
|
* Turns regular double quotes surrounding a body of text into smart quotes.
|
|
124
132
|
* Also fixes incorrect starting quotes by ensuring the string starts with an opening quote if needed.
|
|
@@ -126,111 +134,139 @@ export const addSpaceBeforeAndAfterPunctuation: (text: string) => string;
|
|
|
126
134
|
* @param {string} text - The input text to apply the rule to.
|
|
127
135
|
* @returns {string} - The modified text with smart quotes applied.
|
|
128
136
|
*/
|
|
129
|
-
|
|
137
|
+
declare const applySmartQuotes: (text: string) => string;
|
|
130
138
|
/**
|
|
131
139
|
* Replaces literal new line characters (\n) and carriage returns (\r) with actual line breaks.
|
|
132
140
|
* Example: 'A\\nB' becomes 'A\nB'.
|
|
133
141
|
* @param {string} text - The input text containing literal new lines.
|
|
134
142
|
* @returns {string} - The modified text with actual line breaks.
|
|
135
143
|
*/
|
|
136
|
-
|
|
144
|
+
declare const cleanLiteralNewLines: (text: string) => string;
|
|
137
145
|
/**
|
|
138
146
|
* Removes trailing spaces from each line in a multiline string.
|
|
139
147
|
* Example: " This is a line \nAnother line " becomes "This is a line\nAnother line".
|
|
140
148
|
* @param {string} text - The input text to apply the rule to.
|
|
141
149
|
* @returns {string} - The modified text with trailing spaces removed.
|
|
142
150
|
*/
|
|
143
|
-
|
|
151
|
+
declare const cleanMultilines: (text: string) => string;
|
|
144
152
|
/**
|
|
145
153
|
* Detects if a word is by itself in a line.
|
|
146
154
|
* @param text The text to check.
|
|
147
155
|
* @returns true if there exists a word in any of the lines in the text that is by itself.
|
|
148
156
|
*/
|
|
149
|
-
|
|
157
|
+
declare const hasWordInSingleLine: (text: string) => boolean;
|
|
150
158
|
/**
|
|
151
159
|
* Checks if the input string consists of only punctuation characters.
|
|
152
160
|
* @param {string} text - The input text to check.
|
|
153
161
|
* @returns {boolean} - Returns true if the string contains only punctuation, false otherwise.
|
|
154
162
|
*/
|
|
155
|
-
|
|
156
|
-
|
|
163
|
+
declare const isOnlyPunctuation: (text: string) => boolean;
|
|
164
|
+
declare const cleanJunkFromText: (text: string) => string;
|
|
157
165
|
/**
|
|
158
166
|
* Cleans unnecessary spaces before punctuation marks such as periods, commas, and question marks.
|
|
159
167
|
* Example: 'This is a sentence , with extra space .' becomes 'This is a sentence, with extra space.'.
|
|
160
168
|
* @param {string} text - The input text to apply the rule to.
|
|
161
169
|
* @returns {string} - The modified text with cleaned spaces before punctuation.
|
|
162
170
|
*/
|
|
163
|
-
|
|
171
|
+
declare const cleanSpacesBeforePeriod: (text: string) => string;
|
|
164
172
|
/**
|
|
165
173
|
* Condenses multiple asterisks (*) into a single one.
|
|
166
174
|
* Example: '***' becomes '*'.
|
|
167
175
|
* @param {string} text - The input text to apply the rule to.
|
|
168
176
|
* @returns {string} - The modified text with condensed asterisks.
|
|
169
177
|
*/
|
|
170
|
-
|
|
178
|
+
declare const condenseAsterisks: (text: string) => string;
|
|
171
179
|
/**
|
|
172
180
|
* Replaces occurrences of colons surrounded by periods (e.g., '.:.' or ':') with a single colon.
|
|
173
181
|
* Example: 'This.:. is a test' becomes 'This: is a test'.
|
|
174
182
|
* @param {string} text - The input text to apply the rule to.
|
|
175
183
|
* @returns {string} - The modified text with condensed colons.
|
|
176
184
|
*/
|
|
177
|
-
|
|
185
|
+
declare const condenseColons: (text: string) => string;
|
|
178
186
|
/**
|
|
179
187
|
* Condenses two or more dashes (--) into a single dash (-).
|
|
180
188
|
* Example: 'This is some ---- text' becomes 'This is some - text'.
|
|
181
189
|
* @param {string} text - The input text to apply the rule to.
|
|
182
190
|
* @returns {string} - The modified text with condensed dashes.
|
|
183
191
|
*/
|
|
184
|
-
|
|
192
|
+
declare const condenseDashes: (text: string) => string;
|
|
185
193
|
/**
|
|
186
194
|
* Replaces sequences of two or more periods (e.g., '...') with an ellipsis character (…).
|
|
187
195
|
* Example: 'This is a test...' becomes 'This is a test…'.
|
|
188
196
|
* @param {string} text - The input text to apply the rule to.
|
|
189
197
|
* @returns {string} - The modified text with ellipses condensed.
|
|
190
198
|
*/
|
|
191
|
-
|
|
199
|
+
declare const condenseEllipsis: (text: string) => string;
|
|
192
200
|
/**
|
|
193
201
|
* Reduces multiple consecutive line breaks (3 or more) to exactly 2 line breaks.
|
|
194
202
|
* Example: 'This is line 1\n\n\n\nThis is line 2' becomes 'This is line 1\n\nThis is line 2'.
|
|
195
203
|
* @param {string} text - The input text to apply the rule to.
|
|
196
204
|
* @returns {string} - The modified text with condensed line breaks.
|
|
197
205
|
*/
|
|
198
|
-
|
|
206
|
+
declare const reduceMultilineBreaksToDouble: (text: string) => string;
|
|
199
207
|
/**
|
|
200
208
|
* Reduces multiple consecutive line breaks (2 or more) to exactly 1 line break.
|
|
201
209
|
* Example: 'This is line 1\n\nThis is line 2' becomes 'This is line 1\nThis is line 2'.
|
|
202
210
|
* @param {string} text - The input text to apply the rule to.
|
|
203
211
|
* @returns {string} - The modified text with condensed line breaks.
|
|
204
212
|
*/
|
|
205
|
-
|
|
213
|
+
declare const reduceMultilineBreaksToSingle: (text: string) => string;
|
|
206
214
|
/**
|
|
207
215
|
* Condenses multiple periods separated by spaces (e.g., '. . .') into a single period.
|
|
208
216
|
* Example: 'This . . . is a test' becomes 'This. is a test'.
|
|
209
217
|
* @param {string} text - The input text to apply the rule to.
|
|
210
218
|
* @returns {string} - The modified text with condensed periods.
|
|
211
219
|
*/
|
|
212
|
-
|
|
220
|
+
declare const condensePeriods: (text: string) => string;
|
|
213
221
|
/**
|
|
214
222
|
* Condenses multiple underscores (__) or Arabic Tatweel characters (ـــــ) into a single underscore or Tatweel.
|
|
215
223
|
* Example: 'This is ـــ some text __' becomes 'This is ـ some text _'.
|
|
216
224
|
* @param {string} text - The input text to apply the rule to.
|
|
217
225
|
* @returns {string} - The modified text with condensed underscores.
|
|
218
226
|
*/
|
|
219
|
-
|
|
227
|
+
declare const condenseUnderscores: (text: string) => string;
|
|
220
228
|
/**
|
|
221
229
|
* Replaces double parentheses or brackets with single ones.
|
|
222
230
|
* Example: '((text))' becomes '(text)'.
|
|
223
231
|
* @param {string} text - The input text to apply the rule to.
|
|
224
232
|
* @returns {string} - The modified text with condensed brackets.
|
|
225
233
|
*/
|
|
226
|
-
|
|
234
|
+
declare const doubleToSingleBrackets: (text: string) => string;
|
|
227
235
|
/**
|
|
228
|
-
*
|
|
229
|
-
*
|
|
230
|
-
* @param {string} text - The input text to
|
|
231
|
-
* @returns {string} - The modified text with
|
|
236
|
+
* Ensures at most 1 space exists before any word before brackets.
|
|
237
|
+
* Adds a space if there isn't one, or reduces multiple spaces to one.
|
|
238
|
+
* @param {string} text - The input text to modify
|
|
239
|
+
* @returns {string} - The modified text with proper spacing before brackets
|
|
232
240
|
*/
|
|
233
|
-
|
|
241
|
+
declare const ensureSpaceBeforeBrackets: (text: string) => string;
|
|
242
|
+
/**
|
|
243
|
+
* Ensures at most 1 space exists before any word before Arabic quotation marks.
|
|
244
|
+
* Adds a space if there isn't one, or reduces multiple spaces to one.
|
|
245
|
+
* @param {string} text - The input text to modify
|
|
246
|
+
* @returns {string} - The modified text with proper spacing before Arabic quotes
|
|
247
|
+
*/
|
|
248
|
+
declare const ensureSpaceBeforeQuotes: (text: string) => string;
|
|
249
|
+
/**
|
|
250
|
+
* Fixes common bracket and quotation mark typos in text
|
|
251
|
+
* Corrects malformed patterns like "(«", "»)", and misplaced digits in brackets
|
|
252
|
+
* @param text - Input text that may contain bracket typos
|
|
253
|
+
* @returns Text with corrected bracket and quotation mark combinations
|
|
254
|
+
*/
|
|
255
|
+
declare const fixBracketTypos: (text: string) => string;
|
|
256
|
+
/**
|
|
257
|
+
* Fixes mismatched curly braces by converting incorrect bracket/brace combinations
|
|
258
|
+
* to proper curly braces { }
|
|
259
|
+
* @param text - Input text that may contain mismatched curly braces
|
|
260
|
+
* @returns Text with corrected curly brace pairs
|
|
261
|
+
*/
|
|
262
|
+
declare const fixCurlyBraces: (text: string) => string;
|
|
263
|
+
/**
|
|
264
|
+
* Fixes mismatched quotation marks in Arabic text by converting various
|
|
265
|
+
* incorrect bracket/quote combinations to proper Arabic quotation marks (« »)
|
|
266
|
+
* @param text - Input text that may contain mismatched quotation marks
|
|
267
|
+
* @returns Text with corrected Arabic quotation marks
|
|
268
|
+
*/
|
|
269
|
+
declare const fixMismatchedQuotationMarks: (text: string) => string;
|
|
234
270
|
/**
|
|
235
271
|
* Formats a multiline string by joining sentences and maintaining footnotes on their own lines.
|
|
236
272
|
* Footnotes are identified by Arabic and English numerals.
|
|
@@ -238,28 +274,27 @@ export const replaceDoubleBracketsWithArrows: (text: string) => string;
|
|
|
238
274
|
* @param {string} input - The input text containing sentences and footnotes.
|
|
239
275
|
* @returns {string} - The formatted text.
|
|
240
276
|
*/
|
|
241
|
-
|
|
277
|
+
declare const formatStringBySentence: (input: string) => string;
|
|
278
|
+
/**
|
|
279
|
+
* Detects if text is entirely in uppercase letters
|
|
280
|
+
* @param text - The text to check
|
|
281
|
+
* @returns true if all alphabetic characters are uppercase, false otherwise
|
|
282
|
+
*/
|
|
283
|
+
declare const isAllUppercase: (text: string) => boolean;
|
|
242
284
|
/**
|
|
243
285
|
* Removes unnecessary spaces around slashes in references.
|
|
244
286
|
* Example: '127 / 11' becomes '127/11'.
|
|
245
287
|
* @param {string} text - The input text containing references.
|
|
246
288
|
* @returns {string} - The modified text with spaces removed around slashes.
|
|
247
289
|
*/
|
|
248
|
-
|
|
290
|
+
declare const normalizeSlashInReferences: (text: string) => string;
|
|
249
291
|
/**
|
|
250
292
|
* Reduces multiple spaces or tabs to a single space.
|
|
251
293
|
* Example: 'This is a text' becomes 'This is a text'.
|
|
252
294
|
* @param {string} text - The input text containing extra spaces.
|
|
253
295
|
* @returns {string} - The modified text with reduced spaces.
|
|
254
296
|
*/
|
|
255
|
-
|
|
256
|
-
/**
|
|
257
|
-
* Ensures at most 1 space exists before any word before brackets.
|
|
258
|
-
* Adds a space if there isn't one, or reduces multiple spaces to one.
|
|
259
|
-
* @param {string} text - The input text to modify
|
|
260
|
-
* @returns {string} - The modified text with proper spacing before brackets
|
|
261
|
-
*/
|
|
262
|
-
export const ensureSpaceBeforeBrackets: (text: string) => string;
|
|
297
|
+
declare const normalizeSpaces: (text: string) => string;
|
|
263
298
|
/**
|
|
264
299
|
* Removes redundant punctuation marks that follow Arabic question marks or exclamation marks.
|
|
265
300
|
* This function cleans up text by removing periods (.) or Arabic commas (،) that immediately
|
|
@@ -276,40 +311,54 @@ export const ensureSpaceBeforeBrackets: (text: string) => string;
|
|
|
276
311
|
* removeRedundantPunctuation('هذا جيد.') // Returns: 'هذا جيد.' (unchanged)
|
|
277
312
|
* ```
|
|
278
313
|
*/
|
|
279
|
-
|
|
314
|
+
declare const removeRedundantPunctuation: (text: string) => string;
|
|
280
315
|
/**
|
|
281
316
|
* Removes spaces inside brackets, parentheses, or square brackets.
|
|
282
317
|
* Example: '( a b )' becomes '(a b)'.
|
|
283
318
|
* @param {string} text - The input text with spaces inside brackets.
|
|
284
319
|
* @returns {string} - The modified text with spaces removed inside brackets.
|
|
285
320
|
*/
|
|
286
|
-
|
|
321
|
+
declare const removeSpaceInsideBrackets: (text: string) => string;
|
|
322
|
+
/**
|
|
323
|
+
* Replaces double parentheses single a single arrow variation.
|
|
324
|
+
* Example: '((text))' becomes '«text»'.
|
|
325
|
+
* @param {string} text - The input text to apply the rule to.
|
|
326
|
+
* @returns {string} - The modified text with condensed brackets.
|
|
327
|
+
*/
|
|
328
|
+
declare const replaceDoubleBracketsWithArrows: (text: string) => string;
|
|
287
329
|
/**
|
|
288
330
|
* Removes bold styling from text by normalizing the string and removing stylistic characters.
|
|
289
331
|
* @param {string} text - The input text containing bold characters.
|
|
290
332
|
* @returns {string} - The modified text with bold styling removed.
|
|
291
333
|
*/
|
|
292
|
-
|
|
334
|
+
declare const stripBoldStyling: (text: string) => string;
|
|
293
335
|
/**
|
|
294
336
|
* Removes italicized characters by replacing italic Unicode characters with their normal counterparts.
|
|
295
337
|
* Example: '𝘼𝘽𝘾' becomes 'ABC'.
|
|
296
338
|
* @param {string} text - The input text containing italicized characters.
|
|
297
339
|
* @returns {string} - The modified text with italics removed.
|
|
298
340
|
*/
|
|
299
|
-
|
|
341
|
+
declare const stripItalicsStyling: (text: string) => string;
|
|
300
342
|
/**
|
|
301
343
|
* Removes all bold and italic styling from the input text.
|
|
302
344
|
* @param {string} text - The input text to remove styling from.
|
|
303
345
|
* @returns {string} - The modified text with all styling removed.
|
|
304
346
|
*/
|
|
305
|
-
|
|
347
|
+
declare const stripStyling: (text: string) => string;
|
|
348
|
+
/**
|
|
349
|
+
* Converts a string to title case (first letter of each word capitalized)
|
|
350
|
+
* @param str - The input string to convert
|
|
351
|
+
* @returns String with each word's first letter capitalized
|
|
352
|
+
*/
|
|
353
|
+
declare const toTitleCase: (str: string) => string;
|
|
306
354
|
/**
|
|
307
355
|
* Removes unnecessary spaces inside quotes.
|
|
308
356
|
* Example: '“ Text ”' becomes '“Text”'.
|
|
309
357
|
* @param {string} text - The input text with spaces inside quotes.
|
|
310
358
|
* @returns {string} - The modified text with spaces removed inside quotes.
|
|
311
359
|
*/
|
|
312
|
-
|
|
360
|
+
declare const trimSpaceInsideQuotes: (text: string) => string;
|
|
361
|
+
|
|
313
362
|
/**
|
|
314
363
|
* Converts a string that resembles JSON but with numeric keys and single-quoted values
|
|
315
364
|
* into valid JSON format. This function replaces numeric keys with quoted numeric keys
|
|
@@ -322,7 +371,7 @@ export const trimSpaceInsideQuotes: (text: string) => string;
|
|
|
322
371
|
* const result = normalizeJsonSyntax("{10: 'abc', 20: 'def'}");
|
|
323
372
|
* console.log(result); // '{"10": "abc", "20": "def"}'
|
|
324
373
|
*/
|
|
325
|
-
|
|
374
|
+
declare const normalizeJsonSyntax: (str: string) => string;
|
|
326
375
|
/**
|
|
327
376
|
* Checks if a given string resembles a JSON object with numeric or quoted keys and values
|
|
328
377
|
* that are single or double quoted. This is useful for detecting malformed JSON-like
|
|
@@ -335,7 +384,7 @@ export const normalizeJsonSyntax: (str: string) => string;
|
|
|
335
384
|
* const result = isJsonStructureValid("{10: 'abc', 'key': 'value'}");
|
|
336
385
|
* console.log(result); // true
|
|
337
386
|
*/
|
|
338
|
-
|
|
387
|
+
declare const isJsonStructureValid: (str: string) => boolean;
|
|
339
388
|
/**
|
|
340
389
|
* Splits a string by spaces and quoted substrings.
|
|
341
390
|
*
|
|
@@ -350,7 +399,7 @@ export const isJsonStructureValid: (str: string) => boolean;
|
|
|
350
399
|
* const result = splitByQuotes('"This is" "a part of the" "string and"');
|
|
351
400
|
* console.log(result); // ["This is", "a part of the", "string and"]
|
|
352
401
|
*/
|
|
353
|
-
|
|
402
|
+
declare const splitByQuotes: (query: string) => string[];
|
|
354
403
|
/**
|
|
355
404
|
* Checks if both quotes and brackets are balanced in a string.
|
|
356
405
|
* This function combines quote balance checking and bracket balance checking
|
|
@@ -371,63 +420,77 @@ export const splitByQuotes: (query: string) => string[];
|
|
|
371
420
|
* isBalanced('Hello (world) [test]') // Returns: true
|
|
372
421
|
* ```
|
|
373
422
|
*/
|
|
374
|
-
|
|
423
|
+
declare const isBalanced: (str: string) => boolean;
|
|
424
|
+
/**
|
|
425
|
+
* Parses page input string into array of page numbers, supporting ranges and lists
|
|
426
|
+
* @param pageInput - Page specification string (e.g., "1-5" or "1,3,5")
|
|
427
|
+
* @returns Array of page numbers
|
|
428
|
+
* @throws Error when start page exceeds end page in range
|
|
429
|
+
*/
|
|
430
|
+
declare const parsePageRanges: (pageInput: string) => number[];
|
|
431
|
+
|
|
375
432
|
/**
|
|
376
433
|
* Removes various symbols, part references, and numerical markers from the text.
|
|
377
434
|
* Example: '(1) (2/3)' becomes ''.
|
|
378
435
|
* @param {string} text - The input text to apply the rule to.
|
|
379
436
|
* @returns {string} - The modified text with symbols and part references removed.
|
|
380
437
|
*/
|
|
381
|
-
|
|
438
|
+
declare const cleanSymbolsAndPartReferences: (text: string) => string;
|
|
382
439
|
/**
|
|
383
440
|
* Removes trailing page numbers formatted as '-[46]-' from the text.
|
|
384
441
|
* Example: 'This is some -[46]- text' becomes 'This is some text'.
|
|
385
442
|
* @param {string} text - The input text with trailing page numbers.
|
|
386
443
|
* @returns {string} - The modified text with page numbers removed.
|
|
387
444
|
*/
|
|
388
|
-
|
|
445
|
+
declare const cleanTrailingPageNumbers: (text: string) => string;
|
|
389
446
|
/**
|
|
390
447
|
* Replaces consecutive line breaks and whitespace characters with a single space.
|
|
391
448
|
* Example: 'a\nb' becomes 'a b'.
|
|
392
449
|
* @param {string} text - The input text containing line breaks or multiple spaces.
|
|
393
450
|
* @returns {string} - The modified text with spaces.
|
|
394
451
|
*/
|
|
395
|
-
|
|
452
|
+
declare const replaceLineBreaksWithSpaces: (text: string) => string;
|
|
396
453
|
/**
|
|
397
454
|
* Removes all numeric digits from the text.
|
|
398
455
|
* Example: 'abc123' becomes 'abc'.
|
|
399
456
|
* @param {string} text - The input text containing digits.
|
|
400
457
|
* @returns {string} - The modified text with digits removed.
|
|
401
458
|
*/
|
|
402
|
-
|
|
459
|
+
declare const stripAllDigits: (text: string) => string;
|
|
403
460
|
/**
|
|
404
461
|
* Removes death year references like "(d. 390H)" and "[d. 100h]" from the text.
|
|
405
462
|
* Example: 'Sufyān ibn ‘Uyaynah (d. 198h)' becomes 'Sufyān ibn ‘Uyaynah'.
|
|
406
463
|
* @param {string} text - The input text containing death year references.
|
|
407
464
|
* @returns {string} - The modified text with death years removed.
|
|
408
465
|
*/
|
|
409
|
-
|
|
466
|
+
declare const removeDeathYear: (text: string) => string;
|
|
410
467
|
/**
|
|
411
468
|
* Removes numeric digits and dashes from the text.
|
|
412
469
|
* Example: 'ABC 123-Xyz' becomes 'ABC Xyz'.
|
|
413
470
|
* @param {string} text - The input text containing digits and dashes.
|
|
414
471
|
* @returns {string} - The modified text with numbers and dashes removed.
|
|
415
472
|
*/
|
|
416
|
-
|
|
473
|
+
declare const removeNumbersAndDashes: (text: string) => string;
|
|
417
474
|
/**
|
|
418
475
|
* Removes single digit references like (1), «2», [3] from the text.
|
|
419
476
|
* Example: 'Ref (1), Ref «2», Ref [3]' becomes 'Ref , Ref , Ref '.
|
|
420
477
|
* @param {string} text - The input text containing single digit references.
|
|
421
478
|
* @returns {string} - The modified text with single digit references removed.
|
|
422
479
|
*/
|
|
423
|
-
|
|
480
|
+
declare const removeSingleDigitReferences: (text: string) => string;
|
|
424
481
|
/**
|
|
425
482
|
* Removes URLs from the text.
|
|
426
483
|
* Example: 'Visit https://example.com' becomes 'Visit '.
|
|
427
484
|
* @param {string} text - The input text containing URLs.
|
|
428
485
|
* @returns {string} - The modified text with URLs removed.
|
|
429
486
|
*/
|
|
430
|
-
|
|
487
|
+
declare const removeUrls: (text: string) => string;
|
|
488
|
+
/**
|
|
489
|
+
* Removes common Markdown formatting syntax from text
|
|
490
|
+
* @param text - The input text containing Markdown formatting
|
|
491
|
+
* @returns Text with Markdown formatting removed (bold, italics, headers, lists, backticks)
|
|
492
|
+
*/
|
|
493
|
+
declare const removeMarkdownFormatting: (text: string) => string;
|
|
431
494
|
/**
|
|
432
495
|
* Truncates a string to a specified length, adding an ellipsis if truncated.
|
|
433
496
|
*
|
|
@@ -444,7 +507,7 @@ export const removeUrls: (text: string) => string;
|
|
|
444
507
|
* // Output: 'Short text'
|
|
445
508
|
* ```
|
|
446
509
|
*/
|
|
447
|
-
|
|
510
|
+
declare const truncate: (val: string, n?: number) => string;
|
|
448
511
|
/**
|
|
449
512
|
* Truncates a string from the middle, preserving both the beginning and end portions.
|
|
450
513
|
*
|
|
@@ -465,7 +528,7 @@ export const truncate: (val: string, n?: number) => string;
|
|
|
465
528
|
* // Output: 'Short text'
|
|
466
529
|
* ```
|
|
467
530
|
*/
|
|
468
|
-
|
|
531
|
+
declare const truncateMiddle: (text: string, maxLength?: number, endLength?: number) => string;
|
|
469
532
|
/**
|
|
470
533
|
* Unescapes backslash-escaped spaces and trims whitespace from both ends.
|
|
471
534
|
* Commonly used to clean file paths that have been escaped when pasted into terminals.
|
|
@@ -485,7 +548,16 @@ export const truncateMiddle: (text: string, maxLength?: number, endLength?: numb
|
|
|
485
548
|
* // Output: 'regular text'
|
|
486
549
|
* ```
|
|
487
550
|
*/
|
|
488
|
-
|
|
551
|
+
declare const unescapeSpaces: (input: string) => string;
|
|
552
|
+
/**
|
|
553
|
+
* Creates a diacritic-insensitive regex pattern for Arabic text matching.
|
|
554
|
+
* Normalizes text, handles character equivalences (ا/آ/أ/إ, ة/ه, ى/ي),
|
|
555
|
+
* and makes each character tolerant of Arabic diacritics (Tashkeel/Harakat)
|
|
556
|
+
* @param text - Input Arabic text to make diacritic-insensitive
|
|
557
|
+
* @returns Regex pattern string that matches the text with or without diacritics and character variants
|
|
558
|
+
*/
|
|
559
|
+
declare const makeDiacriticInsensitive: (text: string) => string;
|
|
560
|
+
|
|
489
561
|
/**
|
|
490
562
|
* Replaces common Arabic prefixes (like 'Al-', 'Ar-', 'Ash-', etc.) with 'al-' in the text.
|
|
491
563
|
* Handles different variations of prefixes such as Ash- and Al- but not when the second word
|
|
@@ -495,7 +567,7 @@ export const unescapeSpaces: (input: string) => string;
|
|
|
495
567
|
* @param {string} text - The input text containing Arabic prefixes.
|
|
496
568
|
* @returns {string} - The modified text with standardized 'al-' prefixes.
|
|
497
569
|
*/
|
|
498
|
-
|
|
570
|
+
declare const normalizeArabicPrefixesToAl: (text: string) => string;
|
|
499
571
|
/**
|
|
500
572
|
* Removes double occurrences of Arabic apostrophes such as ʿʿ or ʾʾ in the text.
|
|
501
573
|
* Example: 'ʿulamāʾʾ' becomes 'ʿulamāʾ'.
|
|
@@ -503,7 +575,7 @@ export const normalizeArabicPrefixesToAl: (text: string) => string;
|
|
|
503
575
|
* @param {string} text - The input text containing double apostrophes.
|
|
504
576
|
* @returns {string} - The modified text with condensed apostrophes.
|
|
505
577
|
*/
|
|
506
|
-
|
|
578
|
+
declare const normalizeDoubleApostrophes: (text: string) => string;
|
|
507
579
|
/**
|
|
508
580
|
* Replaces common salutations such as "sallahu alayhi wasallam" with "ﷺ" in the text.
|
|
509
581
|
* It also handles variations of the salutation phrase, including 'peace and blessings be upon him'.
|
|
@@ -512,7 +584,7 @@ export const normalizeDoubleApostrophes: (text: string) => string;
|
|
|
512
584
|
* @param {string} text - The input text containing salutations.
|
|
513
585
|
* @returns {string} - The modified text with salutations replaced.
|
|
514
586
|
*/
|
|
515
|
-
|
|
587
|
+
declare const replaceSalutationsWithSymbol: (text: string) => string;
|
|
516
588
|
/**
|
|
517
589
|
* Normalizes the text by removing diacritics, apostrophes, and dashes.
|
|
518
590
|
* Example: 'Al-Jadwal' becomes 'AlJadwal'.
|
|
@@ -520,7 +592,7 @@ export const replaceSalutationsWithSymbol: (text: string) => string;
|
|
|
520
592
|
* @param {string} input - The input text to normalize.
|
|
521
593
|
* @returns {string} - The normalized text.
|
|
522
594
|
*/
|
|
523
|
-
|
|
595
|
+
declare const normalize: (input: string) => string;
|
|
524
596
|
/**
|
|
525
597
|
* Replaces various apostrophe characters (‛, ’, ‘) with the standard apostrophe (').
|
|
526
598
|
* Example: '‛ulama’ al-su‘' becomes ''ulama' al-su''.
|
|
@@ -528,7 +600,7 @@ export const normalize: (input: string) => string;
|
|
|
528
600
|
* @param {string} text - The input text containing different apostrophe characters.
|
|
529
601
|
* @returns {string} - The modified text with normalized apostrophes.
|
|
530
602
|
*/
|
|
531
|
-
|
|
603
|
+
declare const normalizeApostrophes: (text: string) => string;
|
|
532
604
|
/**
|
|
533
605
|
* Strips common Arabic prefixes like 'al-', 'bi-', 'fī', 'wa-', etc. from the beginning of words.
|
|
534
606
|
* Example: 'al-Bukhari' becomes 'Bukhari'.
|
|
@@ -536,7 +608,7 @@ export const normalizeApostrophes: (text: string) => string;
|
|
|
536
608
|
* @param {string} text - The input text containing Arabic prefixes.
|
|
537
609
|
* @returns {string} - The modified text with prefixes stripped.
|
|
538
610
|
*/
|
|
539
|
-
|
|
611
|
+
declare const removeArabicPrefixes: (text: string) => string;
|
|
540
612
|
/**
|
|
541
613
|
* Simplifies English transliterations by removing diacritics, apostrophes, and common prefixes.
|
|
542
614
|
* Example: 'Al-Jadwal' becomes 'Jadwal', and 'āḍġḥīṣṭū' becomes 'adghistu'.
|
|
@@ -544,7 +616,7 @@ export const removeArabicPrefixes: (text: string) => string;
|
|
|
544
616
|
* @param {string} text - The input text to simplify.
|
|
545
617
|
* @returns {string} - The simplified text.
|
|
546
618
|
*/
|
|
547
|
-
|
|
619
|
+
declare const normalizeTransliteratedEnglish: (text: string) => string;
|
|
548
620
|
/**
|
|
549
621
|
* Extracts the initials from the input string, typically used for names or titles.
|
|
550
622
|
* Example: 'Nayl al-Awtar' becomes 'NA'.
|
|
@@ -552,6 +624,6 @@ export const normalizeTransliteratedEnglish: (text: string) => string;
|
|
|
552
624
|
* @param {string} text - The input text to extract initials from.
|
|
553
625
|
* @returns {string} - The extracted initials.
|
|
554
626
|
*/
|
|
555
|
-
|
|
627
|
+
declare const extractInitials: (fullName: string) => string;
|
|
556
628
|
|
|
557
|
-
|
|
629
|
+
export { addSpaceBeforeAndAfterPunctuation, addSpaceBetweenArabicTextAndNumbers, applySmartQuotes, cleanExtremeArabicUnderscores, cleanJunkFromText, cleanLiteralNewLines, cleanMultilines, cleanSpacesBeforePeriod, cleanSymbolsAndPartReferences, cleanTrailingPageNumbers, condenseAsterisks, condenseColons, condenseDashes, condenseEllipsis, condensePeriods, condenseUnderscores, convertUrduSymbolsToArabic, doubleToSingleBrackets, ensureSpaceBeforeBrackets, ensureSpaceBeforeQuotes, extractInitials, fixBracketTypos, fixCurlyBraces, fixMismatchedQuotationMarks, fixTrailingWow, formatStringBySentence, getArabicScore, hasWordInSingleLine, insertLineBreaksAfterPunctuation, isAllUppercase, isBalanced, isJsonStructureValid, isOnlyPunctuation, makeDiacriticInsensitive, normalize, normalizeAlifVariants, normalizeApostrophes, normalizeArabicPrefixesToAl, normalizeDoubleApostrophes, normalizeJsonSyntax, normalizeSlashInReferences, normalizeSpaces, normalizeTransliteratedEnglish, parsePageRanges, reduceMultilineBreaksToDouble, reduceMultilineBreaksToSingle, removeArabicPrefixes, removeDeathYear, removeMarkdownFormatting, removeNonIndexSignatures, removeNumbersAndDashes, removeRedundantPunctuation, removeSingleDigitReferences, removeSingularCodes, removeSolitaryArabicLetters, removeSpaceInsideBrackets, removeTatwil, removeUrls, replaceAlifMaqsurah, replaceDoubleBracketsWithArrows, replaceEnglishPunctuationWithArabic, replaceLineBreaksWithSpaces, replaceSalutationsWithSymbol, replaceTaMarbutahWithHa, splitByQuotes, stripAllDigits, stripBoldStyling, stripDiacritics, stripEnglishCharactersAndSymbols, stripItalicsStyling, stripStyling, stripZeroWidthCharacters, toTitleCase, trimSpaceInsideQuotes, truncate, truncateMiddle, unescapeSpaces };
|