bitaboom 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -784
- package/dist/index.d.ts +104 -38
- package/dist/index.js +9 -7
- package/dist/index.js.map +1 -1
- package/package.json +10 -9
package/dist/index.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
//#region src/arabic.d.ts
|
|
1
2
|
/**
|
|
2
3
|
* Converts Arabic-Indic numerals (٠-٩) to a JavaScript number.
|
|
3
4
|
*
|
|
@@ -44,6 +45,24 @@ declare const convertUrduSymbolsToArabic: (text: string) => string;
|
|
|
44
45
|
* @returns A decimal between 0-1 representing the Arabic character ratio (0 = no Arabic, 1 = all Arabic)
|
|
45
46
|
*/
|
|
46
47
|
declare const getArabicScore: (text: string) => number;
|
|
48
|
+
/**
|
|
49
|
+
* Finds the position of the last punctuation character in a string
|
|
50
|
+
*
|
|
51
|
+
* @param text - The text to search through
|
|
52
|
+
* @returns The index of the last punctuation character, or -1 if none found
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```typescript
|
|
56
|
+
* const text = "Hello world! How are you?";
|
|
57
|
+
* const lastPuncIndex = findLastPunctuation(text);
|
|
58
|
+
* // Result: 24 (position of the last '?')
|
|
59
|
+
*
|
|
60
|
+
* const noPuncText = "Hello world";
|
|
61
|
+
* const notFound = findLastPunctuation(noPuncText);
|
|
62
|
+
* // Result: -1 (no punctuation found)
|
|
63
|
+
* ```
|
|
64
|
+
*/
|
|
65
|
+
declare const findLastPunctuation: (text: string) => number;
|
|
47
66
|
/**
|
|
48
67
|
* Fixes the trailing "و" (wow) in phrases such as "عليكم و رحمة" to "عليكم ورحمة".
|
|
49
68
|
* This function attempts to correct phrases where "و" appears unnecessarily, particularly in greetings.
|
|
@@ -87,7 +106,8 @@ declare const removeSolitaryArabicLetters: (text: string) => string;
|
|
|
87
106
|
* @returns {string} - The modified text with English punctuation replaced by Arabic punctuation.
|
|
88
107
|
*/
|
|
89
108
|
declare const replaceEnglishPunctuationWithArabic: (text: string) => string;
|
|
90
|
-
|
|
109
|
+
//#endregion
|
|
110
|
+
//#region src/cleaning.d.ts
|
|
91
111
|
/**
|
|
92
112
|
* Escape a string so it can be safely embedded into a RegExp source.
|
|
93
113
|
*
|
|
@@ -97,40 +117,40 @@ declare const replaceEnglishPunctuationWithArabic: (text: string) => string;
|
|
|
97
117
|
declare const escapeRegex: (s: string) => string;
|
|
98
118
|
/** Optional equivalence toggles for {@link makeDiacriticInsensitiveRegex}. */
|
|
99
119
|
type EquivOptions = {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
120
|
+
/** Treat ا/أ/إ/آ as equivalent. @default true */
|
|
121
|
+
alif?: boolean;
|
|
122
|
+
/** Treat ة/ه as equivalent. @default true */
|
|
123
|
+
taMarbutahHa?: boolean;
|
|
124
|
+
/** Treat ى/ي as equivalent. @default true */
|
|
125
|
+
alifMaqsurahYa?: boolean;
|
|
106
126
|
};
|
|
107
127
|
/** Options for {@link makeDiacriticInsensitiveRegex}. */
|
|
108
128
|
type MakeRegexOptions = {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
129
|
+
/**
|
|
130
|
+
* Character equivalences to allow.
|
|
131
|
+
* @default { alif: true, taMarbutahHa: true, alifMaqsurahYa: true }
|
|
132
|
+
*/
|
|
133
|
+
equivalences?: EquivOptions;
|
|
134
|
+
/**
|
|
135
|
+
* Allow tatweel between letters (tolerate decorative elongation).
|
|
136
|
+
* @default true
|
|
137
|
+
*/
|
|
138
|
+
allowTatweel?: boolean;
|
|
139
|
+
/**
|
|
140
|
+
* Ignore diacritics by inserting a `DIACRITICS_CLASS*` after each letter.
|
|
141
|
+
* @default true
|
|
142
|
+
*/
|
|
143
|
+
ignoreDiacritics?: boolean;
|
|
144
|
+
/**
|
|
145
|
+
* Treat any whitespace in the needle as `\s+` for flexible matching.
|
|
146
|
+
* @default true
|
|
147
|
+
*/
|
|
148
|
+
flexWhitespace?: boolean;
|
|
149
|
+
/**
|
|
150
|
+
* RegExp flags to use.
|
|
151
|
+
* @default 'u'
|
|
152
|
+
*/
|
|
153
|
+
flags?: string;
|
|
134
154
|
};
|
|
135
155
|
/**
|
|
136
156
|
* Build a **diacritic-insensitive**, **tatweel-tolerant** RegExp for Arabic text matching.
|
|
@@ -151,10 +171,28 @@ type MakeRegexOptions = {
|
|
|
151
171
|
* rx.test('اَنا إلى الآفاق'); // true
|
|
152
172
|
*/
|
|
153
173
|
declare const makeDiacriticInsensitiveRegex: (needle: string, opts?: MakeRegexOptions) => RegExp;
|
|
154
|
-
|
|
174
|
+
/**
|
|
175
|
+
* Remove simple HTML/XML-like tags from a string.
|
|
176
|
+
*
|
|
177
|
+
* This is intentionally lightweight and does not attempt to parse HTML; it simply drops
|
|
178
|
+
* substrings that look like `<...>`.
|
|
179
|
+
*
|
|
180
|
+
* @param content Input string
|
|
181
|
+
* @returns String with tags removed
|
|
182
|
+
*/
|
|
183
|
+
declare const removeAllTags: (content: string) => string;
|
|
184
|
+
//#endregion
|
|
185
|
+
//#region src/constants.d.ts
|
|
186
|
+
/** Matches text ending with common punctuation marks */
|
|
187
|
+
declare const PATTERN_ENDS_WITH_PUNCTUATION: RegExp;
|
|
188
|
+
//#endregion
|
|
189
|
+
//#region src/formatting.d.ts
|
|
155
190
|
/**
|
|
156
191
|
* Adds line breaks after punctuation marks such as periods, exclamation points, and question marks.
|
|
157
192
|
* Example: 'Text.' becomes 'Text.\n'.
|
|
193
|
+
*
|
|
194
|
+
* Note: For the full preformatting pipeline in one pass (significantly faster and more memory-friendly
|
|
195
|
+
* on very large inputs), use `preformatArabicText` from `src/preformat.ts`.
|
|
158
196
|
* @param {string} text - The input text containing punctuation.
|
|
159
197
|
* @returns {string} - The modified text with line breaks added after punctuation.
|
|
160
198
|
*/
|
|
@@ -396,7 +434,8 @@ declare const toTitleCase: (str: string) => string;
|
|
|
396
434
|
* @returns {string} - The modified text with spaces removed inside quotes.
|
|
397
435
|
*/
|
|
398
436
|
declare const trimSpaceInsideQuotes: (text: string) => string;
|
|
399
|
-
|
|
437
|
+
//#endregion
|
|
438
|
+
//#region src/parsing.d.ts
|
|
400
439
|
/**
|
|
401
440
|
* Converts a string that resembles JSON but with numeric keys and single-quoted values
|
|
402
441
|
* into valid JSON format. This function replaces numeric keys with quoted numeric keys
|
|
@@ -466,7 +505,32 @@ declare const isBalanced: (str: string) => boolean;
|
|
|
466
505
|
* @throws Error when start page exceeds end page in range
|
|
467
506
|
*/
|
|
468
507
|
declare const parsePageRanges: (pageInput: string) => number[];
|
|
469
|
-
|
|
508
|
+
//#endregion
|
|
509
|
+
//#region src/preformat.d.ts
|
|
510
|
+
/**
|
|
511
|
+
* Hyperoptimized Arabic text preformatting entry point.
|
|
512
|
+
*
|
|
513
|
+
* The implementation lives in `src/preformat-core.ts` to keep the public surface
|
|
514
|
+
* area small while allowing internal benchmarking (buffer vs concat builders).
|
|
515
|
+
*
|
|
516
|
+
* @module preformat
|
|
517
|
+
*/
|
|
518
|
+
type PreformatArabicText = {
|
|
519
|
+
(text: string): string;
|
|
520
|
+
(texts: string[]): string[];
|
|
521
|
+
};
|
|
522
|
+
/**
|
|
523
|
+
* High-performance Arabic preformatting pipeline.
|
|
524
|
+
*
|
|
525
|
+
* Consolidates common formatting steps (spacing, punctuation normalization, reference formatting,
|
|
526
|
+
* bracket/quote cleanup, ellipsis condensation, newline normalization) into a single-pass formatter.
|
|
527
|
+
*
|
|
528
|
+
* @param text Input string or an array of strings
|
|
529
|
+
* @returns Preformatted string or array of strings (matching input shape)
|
|
530
|
+
*/
|
|
531
|
+
declare const preformatArabicText: PreformatArabicText;
|
|
532
|
+
//#endregion
|
|
533
|
+
//#region src/sanitization.d.ts
|
|
470
534
|
/**
|
|
471
535
|
* Removes various symbols, part references, and numerical markers from the text.
|
|
472
536
|
* Example: '(1) (2/3)' becomes ''.
|
|
@@ -595,7 +659,8 @@ declare const unescapeSpaces: (input: string) => string;
|
|
|
595
659
|
* @returns Regex pattern string that matches the text with or without diacritics and character variants
|
|
596
660
|
*/
|
|
597
661
|
declare const makeDiacriticInsensitive: (text: string) => string;
|
|
598
|
-
|
|
662
|
+
//#endregion
|
|
663
|
+
//#region src/transliteration.d.ts
|
|
599
664
|
/**
|
|
600
665
|
* Replaces common Arabic prefixes (like 'Al-', 'Ar-', 'Ash-', etc.) with 'al-' in the text.
|
|
601
666
|
* Handles different variations of prefixes such as Ash- and Al- but not when the second word
|
|
@@ -655,5 +720,6 @@ declare const normalizeTransliteratedEnglish: (text: string) => string;
|
|
|
655
720
|
* @returns {string} - The extracted initials.
|
|
656
721
|
*/
|
|
657
722
|
declare const extractInitials: (fullName: string) => string;
|
|
658
|
-
|
|
659
|
-
export {
|
|
723
|
+
//#endregion
|
|
724
|
+
export { MakeRegexOptions, PATTERN_ENDS_WITH_PUNCTUATION, addSpaceBeforeAndAfterPunctuation, addSpaceBetweenArabicTextAndNumbers, applySmartQuotes, arabicNumeralToNumber, cleanExtremeArabicUnderscores, cleanLiteralNewLines, cleanMultilines, cleanSpacesBeforePeriod, cleanSymbolsAndPartReferences, cleanTrailingPageNumbers, condenseAsterisks, condenseColons, condenseDashes, condenseEllipsis, condensePeriods, condenseUnderscores, convertUrduSymbolsToArabic, doubleToSingleBrackets, ensureSpaceBeforeBrackets, ensureSpaceBeforeQuotes, escapeRegex, extractInitials, findLastPunctuation, fixBracketTypos, fixCurlyBraces, fixMismatchedQuotationMarks, fixTrailingWow, formatStringBySentence, getArabicScore, hasWordInSingleLine, insertLineBreaksAfterPunctuation, isAllUppercase, isBalanced, isJsonStructureValid, isOnlyPunctuation, makeDiacriticInsensitive, makeDiacriticInsensitiveRegex, normalize, normalizeArabicPrefixesToAl, normalizeDoubleApostrophes, normalizeJsonSyntax, normalizeSlashInReferences, normalizeSpaces, normalizeTransliteratedEnglish, parsePageRanges, preformatArabicText, reduceMultilineBreaksToDouble, reduceMultilineBreaksToSingle, removeAllTags, removeArabicPrefixes, removeDeathYear, removeMarkdownFormatting, removeNonIndexSignatures, removeNumbersAndDashes, removeRedundantPunctuation, removeSingleDigitReferences, removeSingularCodes, removeSolitaryArabicLetters, removeSpaceInsideBrackets, removeUrls, replaceDoubleBracketsWithArrows, replaceEnglishPunctuationWithArabic, replaceLineBreaksWithSpaces, replaceSalutationsWithSymbol, splitByQuotes, stripAllDigits, stripBoldStyling, stripItalicsStyling, stripStyling, toTitleCase, trimSpaceInsideQuotes, truncate, truncateMiddle, unescapeSpaces };
|
|
725
|
+
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
const e=/[.!?؟؛]$/,t=e=>parseInt(e.replace(/[\u0660-\u0669]/g,e=>(e.charCodeAt(0)-1632).toString()),10),n=e=>e.replace(/(?<!\d ?ه|اه)ـ(?=\r?$)|^ـ(?!اهـ)/gm,``),r=e=>e.replace(/ھ/g,`ه`).replace(/ی/g,`ي`),i=e=>{if(!e)return 0;let t=/[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/g,n=/[0-9\u0660-\u0669\u06F0-\u06F9]/g,r=/[^\s0-9\u0660-\u0669\u06F0-\u06F9]/g,i=e.replace(n,``),a=i.match(t)||[],o=i.match(r)||[];return o.length===0?0:a.length/o.length},a=t=>{for(let n=t.length-1;n>=0;n--)if(e.test(t[n]))return n;return-1},o=e=>e.replace(/ و /g,` و`),s=e=>e.replace(/([\u0600-\u06FF]+)(\d+)/g,`$1 $2`),c=e=>e.replace(/(?<![0-9] ?)-|(?<=[\u0600-\u06FF])\s?\d\s?(?=[\u0600-\u06FF])/g,` `).replace(/(?<=[\u0600-\u06FF]\s)(\d+\s)+\d+(?=(\s[\u0600-\u06FF]|$))/g,` `),l=e=>e.replace(/[[({][\u0621-\u064A\u0660-\u0669][\])}]/g,``),u=e=>e.replace(/(^| )[\u0621-\u064A]( |$)/g,` `),d=e=>e.replace(/\?|؟\./g,`؟`).replace(/(;|؛)\s*(\1\s*)*/g,`؛`).replace(/,|-،/g,`،`),f=e=>e.replace(/[.*+?^${}()|[\]\\]/g,`\\$&`),p=(e,t={})=>{let{equivalences:n={alif:!0,taMarbutahHa:!0,alifMaqsurahYa:!0},allowTatweel:r=!0,ignoreDiacritics:i=!0,flexWhitespace:a=!0,flags:o=`u`}=t;if(e.length>5e3)throw Error(`makeDiacriticInsensitiveRegex: needle too long`);let s=e=>{switch(e){case`ا`:case`أ`:case`إ`:case`آ`:return n.alif?`[اأإآ]`:`ا`;case`ة`:case`ه`:return n.taMarbutahHa?`[هة]`:f(e);case`ى`:case`ي`:return n.alifMaqsurahYa?`[ىي]`:f(e);default:return f(e)}},c=`${i?`[\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]*`:``}${r?`\\u0640*`:``}`,l=``;for(let t of Array.from(e))/\s/.test(t)?l+=a?`\\s+`:`\\s*`:l+=`${s(t)}${c}`;return new RegExp(l,o)},ee=e=>e.replace(/<[^>]*>/g,``),te=e=>e.replace(/([.?!؟])/g,`$1
|
|
2
2
|
`).replace(/\n\s+/g,`
|
|
3
|
-
`).trim()
|
|
4
|
-
`),
|
|
3
|
+
`).trim(),ne=e=>e.replace(/( ?)([.!?,،؟;؛])((?![ '”“)"\]\n])|(?=\s{2,}))/g,`$1$2 `).replace(/\s([.!?,،؟;؛])\s*([ '”“)"\]\n])/g,`$1$2`).replace(/([^\s\w\d'”“)"\]]+)\s+([.!?,،؟;؛])|([.!?,،؟;؛])\s+$/g,`$1$2$3`).replace(/(?<=\D)( ?: ?)(?!(\d+:)|(:\d+))|(?<=\d) ?: ?(?=\D)|(?<=\D) ?: ?(?=\d)/g,`: `),re=e=>e.replace(/[“”]/g,`"`).replace(/"([^"]*)"/g,`“$1”`).replace(/^”/g,`“`),ie=e=>e.replace(/\\n|\r/g,`
|
|
4
|
+
`),ae=e=>e.replace(/^ +| +$/gm,``),oe=e=>/^\s*\S+\s*$/gm.test(e),se=e=>/^[\u0020-\u002f\u003a-\u0040\u005b-\u0060\u007b-\u007e0-9٠-٩]+$/.test(e),ce=e=>e.replace(/\s+([.؟!,،؛:?])/g,`$1`),le=e=>e.replace(/(\*\s*)+/g,`*`),ue=e=>e.replace(/[.-]?:[.-]?/g,`:`),de=e=>e.replace(/-{2,}/g,`-`),fe=e=>e.replace(/\.{2,}/g,`…`),pe=e=>e.replace(/(\n\s*){3,}/g,`
|
|
5
5
|
|
|
6
|
-
`),
|
|
7
|
-
`),
|
|
8
|
-
`),
|
|
9
|
-
`)},
|
|
6
|
+
`),me=e=>e.replace(/(\n\s*){2,}/g,`
|
|
7
|
+
`),he=e=>e.replace(/\. +\./g,`.`),ge=e=>e.replace(/ـ{2,}/g,`ـ`).replace(/_+/g,`_`),m=e=>e.replace(/(\(|\)){2,}|(\[|\]){2,}/g,`$1$2`),h=e=>e.replace(/(\S) *(\([^)]*\))/g,`$1 $2`),g=e=>e.replace(/(\S) *(«[^»]*»)/g,`$1 $2`),_=e=>e.replace(/\(«|\( \(/g,`«`).replace(/»\)|\) \)/g,`»`).replace(/\)([0-9\u0660-\u0669]+)\)/g,`($1)`).replace(/\)([0-9\u0660-\u0669]+)\(/g,`($1)`),v=e=>{let t=e;return t=t.replace(/\(([^(){}]+)\}/g,`{$1}`),t.replace(/\{([^(){}]+)\)/g,`{$1}`)},y=e=>e.replace(/«([^»)]+)\)/g,`«$1»`).replace(/\(([^()]+)»/g,`«$1»`).replace(/«([^»]+)(?=\s*$|$)/g,`«$1»`),b=e=>{let t=/^\((?:\d+|۱|۲|۳|۴|۵|۶|۷|۸|۹)\)\s/,n=[],r=e.split(`
|
|
8
|
+
`),i=``;return r.forEach(e=>{let r=e.trim(),a=t.test(r),o=/^\(\d+\/\d+\)/.test(r);if(a&&!o)i&&=(n.push(i.trim()),``),n.push(r);else{i+=`${r} `;let e=i.trim().slice(-1);/[.!؟]/.test(e)&&(n.push(i.trim()),i=``)}}),i&&n.push(i.trim()),n.join(`
|
|
9
|
+
`)},x=e=>{let t=e.replace(/[^\p{L}]/gu,``);return t.length===0?!1:t===t.toUpperCase()},S=e=>e.replace(/(\d+)\s?\/\s?(\d+)/g,`$1/$2`),C=e=>e.replace(/[ \t]+/g,` `),w=e=>e.replace(/([؟!])[.،]/g,`$1`),T=e=>e.replace(/([[(])\s*(.*?)\s*([\])])/g,`$1$2$3`),E=e=>e.replace(/\(\(\s?/g,`«`).replace(/\s?\)\)/g,`»`),D=e=>e.normalize(`NFKD`).replace(/[\u0300-\u036f]/g,``).trim(),O=e=>{let t={𝑎:`I`,𝑨:`g`,𝘼:`!`,𝑏:`J`,𝑩:`h`,𝘽:`?`,𝑐:`K`,𝑪:`i`,𝑑:`L`,𝑫:`j`,𝘿:`,`,𝑒:`M`,𝑬:`k`,𝙀:`.`,𝑓:`N`,𝑭:`l`,𝑔:`O`,𝑮:`m`,𝑯:`n`,𝑖:`Q`,𝑰:`o`,𝑗:`R`,𝑱:`p`,𝑘:`S`,𝑲:`q`,𝑙:`T`,𝑳:`r`,𝙇:`-`,𝑚:`U`,𝑴:`s`,𝑛:`V`,𝑵:`t`,𝑜:`W`,𝑶:`u`,𝑝:`X`,𝑷:`v`,𝑞:`Y`,𝑸:`w`,𝑟:`Z`,𝑹:`x`,𝑆:`A`,𝑺:`y`,𝑇:`B`,𝑻:`z`,𝑢:`a`,𝑈:`C`,𝑣:`b`,𝑉:`D`,𝑤:`c`,𝑊:`E`,𝑥:`d`,𝑋:`F`,𝑦:`e`,𝑌:`G`,𝑧:`f`,𝑍:`H`,"":`P`};return e.replace(/[\uD835\uDC62-\uD835\uDC7B\uD835\uDC46-\uD835\uDC5F\u{1D63C}-\u{1D647}]/gu,e=>t[e]||e)},k=e=>O(D(e)),A=e=>e.toLowerCase().split(` `).map(e=>{if(e.length===0)return e;let t=e.match(/\p{L}/u);if(!t||t.index===void 0)return e;let n=t.index;return e.slice(0,n)+e.charAt(n).toUpperCase()+e.slice(n+1)}).join(` `),j=e=>e.replace(/([“”"]|«) *(.*?) *([“”"]|»)/g,`$1$2$3`),M=e=>{let t=e.replace(/(\b\d+\b)(?=:)/g,`"$1"`);return t=t.replace(/:\s*'([^']+)'/g,`: "$1"`),t=t.replace(/:\s*"([^"]+)"/g,`: "$1"`),JSON.stringify(JSON.parse(t))},N=e=>/^{(\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\3\s*,)*(?:\s*(\d+|'[^']*'|"[^"]*")\s*:\s*('|")[^'"]*\5\s*)}$/.test(e.trim()),P=e=>(e.match(/(?:[^\s"]+|"(.*?)")+/g)||[]).map(e=>e.startsWith(`"`)?e.slice(1,-1):e),F=e=>{let t=0;for(let n of e)n===`"`&&t++;return t%2==0},I={"(":`)`,"[":`]`,"{":`}`},L=new Set([`(`,`[`,`{`]),_e=new Set([`)`,`]`,`}`]),ve=e=>{let t=[];for(let n of e)if(L.has(n))t.push(n);else if(_e.has(n)){let e=t.pop();if(!e||I[e]!==n)return!1}return t.length===0},ye=e=>F(e)&&ve(e),be=e=>{if(e.includes(`-`)){let[t,n]=e.split(`-`).map(Number);if(t>n)throw Error(`Start page cannot be greater than end page`);return Array.from({length:n-t+1},(e,n)=>t+n)}else return e.split(`,`).map(Number)},R=new Uint8Array(65536),z=(e,t)=>{for(let n=0;n<e.length;n++)R[e.charCodeAt(n)]|=t};z(` `,1),z(`
|
|
10
|
+
\r`,64);for(let e=48;e<=57;e++)R[e]|=16;for(let e=1536;e<=1791;e++)R[e]|=32;for(let e=1872;e<=1919;e++)R[e]|=32;z(`.!?,:;`,194),z(`،؛؟`,194),z(`([{"'«“`,68),z(`)]}"'»”`,74),z(`-_*و/`,64);const B=8221,V=1608,H=1548,U=1563,W=1567,G=1600,K=8230,q=e=>e===32||e===9||e===10||e===13;var xe=class{buffer;length;constructor(e){this.buffer=new Uint16Array(Math.max(16,e)),this.length=0}last(){return this.length>0?this.buffer[this.length-1]:0}secondLast(){return this.length>1?this.buffer[this.length-2]:0}push(e){this.ensureCapacity(1),this.buffer[this.length]=e,this.length++}pop(){this.length>0&&this.length--}ensureCapacity(e){let t=this.length+e;if(t<=this.buffer.length)return;let n=this.buffer.length*2;n<t&&(n=t);let r=new Uint16Array(n);r.set(this.buffer.subarray(0,this.length)),this.buffer=r}toStringTrimmed(){let e=0,t=this.length;for(;e<t&&q(this.buffer[e]);)e++;for(;t>e&&q(this.buffer[t-1]);)t--;if(t<=e)return``;let n=32768,r=``;for(let i=e;i<t;i+=n){let e=this.buffer.subarray(i,Math.min(t,i+n));r+=String.fromCharCode(...e)}return r}};const Se=e=>{if(!e)return``;let t=``,n=e.length,r=0,i=0,a=0;for(;r<n;){let o=e.charCodeAt(r),s=o,c=R[o];if(c&1){i++,r++;continue}if(c&64){if(o===10||o===13){for(i=0,a!==10&&(t+=`
|
|
11
|
+
`,a=10),r++;r<n;){let t=e.charCodeAt(r);if(t===32||t===9||t===10||t===13)r++;else break}continue}if(o===63)o=W;else if(o===59)o=U;else if(o===44)o=H;else if(o===58){(a===46||a===45)&&(t=t.slice(0,-1),a=t.charCodeAt(t.length-1)||0);let n=e.charCodeAt(r+1);(n===46||n===45)&&r++}else if(o===40&&e.charCodeAt(r+1)===40)o=171,r++;else if(o===41&&e.charCodeAt(r+1)===41)o=187,r++;else if(o===46){if(a===K){r++;continue}if(a===46){t=t.slice(0,-1)+`…`,a=K,r++;continue}}else if(o===V&&a===32){for(t+=`و`,a=V,r++;r<n;){let t=e.charCodeAt(r);if(t===32||t===9)r++;else break}continue}else if(o===G&&a===G||o===95&&a===95||o===45&&a===45||o===42&&a===42){r++;continue}if((a===W||a===33)&&(o===46||o===H)){r++;continue}}if(i>0){let s=!0,c=R[o];if(c&2&&(s=!1),R[a]&4&&(s=!1),o===47){let t=0;for(let i=r+1;i<n;i++){let n=e.charCodeAt(i);if(n!==32&&n!==9&&n!==10&&n!==13){t=n;break}}R[a]&16&&R[t]&16&&(s=!1)}a===47&&c&16&&t.length>=2&&R[t.charCodeAt(t.length-2)]&16&&(s=!1),s&&(t+=` `,a=32),i=0}let l=R[o];l&16&&R[a]&32&&(t+=` `,a=32),l&4&&a!==32&&a!==10&&!(R[a]&4)&&a!==0&&(t+=` `,a=32),R[a]&128&&(l&13||o===32||o===10||o===13||l&8||o===34||o===39||o===187||o===B||o===a||(a===W||a===33)&&(o===46||o===H)||(t+=` `,a=32)),o===s?t+=e[r]:t+=String.fromCharCode(o),a=o,r++}return t.trim()},Ce=e=>{if(!e)return``;let t=e.length,n=0,r=new xe(t+(t>>3)+64),i=0,a=0;for(;n<t;){let o=e.charCodeAt(n),s=o,c=R[o];if(c&1){i++,n++;continue}if(c&64){if(o===10||o===13){for(i=0,a!==10&&(r.push(10),a=10),n++;n<t;){let t=e.charCodeAt(n);if(t===32||t===9||t===10||t===13)n++;else break}continue}if(o===63)o=W;else if(o===59)o=U;else if(o===44)o=H;else if(o===58){(a===46||a===45)&&(r.pop(),a=r.last());let t=e.charCodeAt(n+1);(t===46||t===45)&&n++}else if(o===40&&e.charCodeAt(n+1)===40)o=171,n++;else if(o===41&&e.charCodeAt(n+1)===41)o=187,n++;else if(o===46){if(a===K){n++;continue}if(a===46){r.pop(),r.push(K),a=K,n++;continue}}else if(o===V&&a===32){for(r.push(V),a=V,n++;n<t;){let t=e.charCodeAt(n);if(t===32||t===9)n++;else break}continue}else if(o===G&&a===G||o===95&&a===95||o===45&&a===45||o===42&&a===42){n++;continue}if((a===W||a===33)&&(o===46||o===H)){n++;continue}}if(i>0){let s=!0,c=R[o];if(c&2&&(s=!1),R[a]&4&&(s=!1),o===47){let r=0;for(let i=n+1;i<t;i++){let t=e.charCodeAt(i);if(t!==32&&t!==9&&t!==10&&t!==13){r=t;break}}R[a]&16&&R[r]&16&&(s=!1)}a===47&&c&16&&r.length>=2&&R[r.secondLast()]&16&&(s=!1),s&&(r.push(32),a=32),i=0}let l=R[o];l&16&&R[a]&32&&(r.push(32),a=32),l&4&&a!==32&&a!==10&&!(R[a]&4)&&a!==0&&(r.push(32),a=32),R[a]&128&&(l&13||o===32||o===10||o===13||l&8||o===34||o===39||o===187||o===B||o===a||(a===W||a===33)&&(o===46||o===H)||(r.push(32),a=32)),o===s?r.push(s):r.push(o),a=o,n++}return r.toStringTrimmed()},J=e=>Se(e),we=e=>Ce(e),Y=e=>{let t=process.env.BITABOOM_PREFORMAT_BUILDER;return t===`concat`?J(e):t===`buffer`?we(e):J(e)},Te=e=>Array.isArray(e)?e.map(Y):Y(e),Ee=e=>e.replace(/ *\(?:\d+(?:\/\d+){0,2}\)? *| *\[\d+(?:\/\d+)?\] *| *«\d+» *|\d+\/\d+(?:\/\d+)?|[،§{}؍﴿﴾<>;_؟»«:!،؛[\]…ـ¬.\\/*()"]/g,` `),De=e=>e.replace(/-\[\d+\]-/g,``),Oe=e=>e.replace(/\s+/g,` `),ke=e=>e.replace(/[0-9]/g,``),Ae=e=>e.replace(/\[(d)\.\s*\d{1,4}[hH]\]\s*|\((d)\.\s*\d{1,4}[hH]\)\s*/g,``),je=e=>e.replace(/[\d-]/g,``),Me=e=>e.replace(/\(\d{1}\)|\[\d{1}\]|«\d»/g,``),Ne=e=>e.replace(/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g,``),Pe=e=>e.replace(/\*\*([^*]+)\*\*/g,`$1`).replace(/__([^_]+)__/g,`$1`).replace(/\*([^*]+)\*/g,`$1`).replace(/_([^_]+)_/g,`$1`).replace(/~~([^~]+)~~/g,`$1`).replace(/^\s*>\s?/gm,``).replace(/!\[[^\]]*]\([^)]*\)/g,``).replace(/\[([^\]]+)]\([^)]*\)/g,`$1`).replace(/^#+\s*/gm,``).replace(/^\s*[-*+]\s+/gm,``).replace(/^\s*\d+\.\s+/gm,``).replace(/`/gm,``),Fe=(e,t=150)=>e.length>t?`${e.substring(0,t-1)}…`:e,Ie=(e,t=50,n)=>{if(e.length<=t)return e;let r=Math.max(3,Math.floor(t/3)),i=n??r,a=t-1-i;return a<1?`${e.substring(0,t-1)}…`:`${e.substring(0,a)}…${e.substring(e.length-i)}`},Le=e=>e.replace(/\\ /g,` `).trim(),X=[[`ا`,`آ`,`أ`,`إ`],[`ة`,`ه`],[`ى`,`ي`]],Re=e=>{for(let t of X)if(t.includes(e))return`[${t.map(e=>f(e)).join(``)}]`;return f(e)},ze=e=>e.normalize(`NFC`).replace(/[\u200C\u200D]/g,``).replace(/\s+/g,` `).trim(),Be=e=>{let t=ze(e);return Array.from(t).map(e=>Re(e)+`[ًٌٍَُِّْ]*`).join(``)},Ve=e=>e.replace(/(\b|\W)(Al |Al-|Ar-|As-|Adh-|Ad-|Ats-|Ath |Ath-|Az |Az-|az-|adh-|as-|ar-)/g,`$1al-`).replace(/(\b|\W)(Ash-S|ash-S)/g,`$1al-S`).replace(/al- (.+?)\b/g,`al-$1`),He=e=>e.replace(/ʿʿ/g,`ʿ`).replace(/ʾʾ/g,`ʾ`),Ue=e=>e.replace(/\(peace be upon him\)|(Messenger of (Allah|Allāh)|Messenger|Prophet|Mu[hḥ]ammad) *\((s[^)]*m|peace[^)]*him|May[^)]*him|may[^)]*him)\)*/gi,`$1 ﷺ`).replace(/,\s*ﷺ\s*,/g,` ﷺ`),Z=e=>e.normalize(`NFKD`).replace(/[\u0300-\u036f]/g,``).replace(/`|ʾ|ʿ|-/g,``),Q=e=>C(e.replace(/(\bal-|\bli-|\bbi-|\bfī|\bwa[-\s]+|\bl-|\bliʿl|\Bʿalá|\Bʿan|\bb\.)/gi,``)),$=e=>Z(Q(e)),We=e=>$(e).trim().split(/[ -]/).slice(0,2).map(e=>e.charAt(0).toUpperCase()).join(``);export{e as PATTERN_ENDS_WITH_PUNCTUATION,ne as addSpaceBeforeAndAfterPunctuation,s as addSpaceBetweenArabicTextAndNumbers,re as applySmartQuotes,t as arabicNumeralToNumber,n as cleanExtremeArabicUnderscores,ie as cleanLiteralNewLines,ae as cleanMultilines,ce as cleanSpacesBeforePeriod,Ee as cleanSymbolsAndPartReferences,De as cleanTrailingPageNumbers,le as condenseAsterisks,ue as condenseColons,de as condenseDashes,fe as condenseEllipsis,he as condensePeriods,ge as condenseUnderscores,r as convertUrduSymbolsToArabic,m as doubleToSingleBrackets,h as ensureSpaceBeforeBrackets,g as ensureSpaceBeforeQuotes,f as escapeRegex,We as extractInitials,a as findLastPunctuation,_ as fixBracketTypos,v as fixCurlyBraces,y as fixMismatchedQuotationMarks,o as fixTrailingWow,b as formatStringBySentence,i as getArabicScore,oe as hasWordInSingleLine,te as insertLineBreaksAfterPunctuation,x as isAllUppercase,ye as isBalanced,N as isJsonStructureValid,se as isOnlyPunctuation,Be as makeDiacriticInsensitive,p as makeDiacriticInsensitiveRegex,Z as normalize,Ve as normalizeArabicPrefixesToAl,He as normalizeDoubleApostrophes,M as normalizeJsonSyntax,S as normalizeSlashInReferences,C as normalizeSpaces,$ as normalizeTransliteratedEnglish,be as parsePageRanges,Te as preformatArabicText,pe as reduceMultilineBreaksToDouble,me as reduceMultilineBreaksToSingle,ee as removeAllTags,Q as removeArabicPrefixes,Ae as removeDeathYear,Pe as removeMarkdownFormatting,c as removeNonIndexSignatures,je as removeNumbersAndDashes,w as removeRedundantPunctuation,Me as removeSingleDigitReferences,l as removeSingularCodes,u as removeSolitaryArabicLetters,T as removeSpaceInsideBrackets,Ne as removeUrls,E as replaceDoubleBracketsWithArrows,d as replaceEnglishPunctuationWithArabic,Oe as replaceLineBreaksWithSpaces,Ue as replaceSalutationsWithSymbol,P as splitByQuotes,ke as stripAllDigits,D as stripBoldStyling,O as stripItalicsStyling,k as stripStyling,A as toTitleCase,j as trimSpaceInsideQuotes,Fe as truncate,Ie as truncateMiddle,Le as unescapeSpaces};
|
|
10
12
|
//# sourceMappingURL=index.js.map
|