@danielhaim/titlecaser 1.7.10 → 1.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@danielhaim/titlecaser",
3
- "version": "1.7.10",
3
+ "version": "1.7.13",
4
4
  "description": "A powerful utility for transforming text to title case with support for multiple style guides and extensive customization options.",
5
5
  "keywords": [
6
6
  "title case",
@@ -61,25 +61,27 @@
61
61
  "tree": "tree -a -I 'node_modules|.git|.DS_Store'"
62
62
  },
63
63
  "devDependencies": {
64
- "@babel/cli": "^7.28.3",
65
- "@babel/core": "^7.28.4",
66
- "@babel/plugin-transform-class-properties": "7.25.9",
67
- "@babel/plugin-transform-modules-commonjs": "^7.27.1",
68
- "@babel/plugin-transform-object-rest-spread": "7.25.9",
69
- "@babel/preset-env": "^7.28.3",
70
- "@babel/runtime-corejs3": "^7.28.4",
71
- "@jest/expect": "^29.7.0",
72
- "babel-jest": "29.7.0",
64
+ "@babel/cli": "^7.28.6",
65
+ "@babel/core": "^7.29.0",
66
+ "@babel/plugin-transform-class-properties": "7.28.6",
67
+ "@babel/plugin-transform-modules-commonjs": "^7.28.6",
68
+ "@babel/plugin-transform-object-rest-spread": "7.28.6",
69
+ "@babel/preset-env": "^7.29.0",
70
+ "@babel/runtime-corejs3": "^7.29.0",
71
+ "@jest/expect": "^30.2.0",
72
+ "@types/jest": "30.0.0",
73
+ "babel-jest": "30.2.0",
73
74
  "babel-loader": "^10.0.0",
74
75
  "exports-loader": "^5.0.0",
75
- "jest": "^29.7.0",
76
- "jest-environment-jsdom": "^29.7.0",
76
+ "jest": "^30.2.0",
77
+ "jest-environment-jsdom": "^30.2.0",
77
78
  "jest-environment-puppeteer": "^11.0.0",
78
79
  "jest-puppeteer": "^11.0.0",
79
- "puppeteer": "^24.22.0",
80
- "puppeteer-core": "^24.22.0",
81
- "terser-webpack-plugin": "^5.3.14",
82
- "webpack": "^5.101.3",
80
+ "puppeteer": "^24.37.3",
81
+ "puppeteer-core": "^24.37.3",
82
+ "terser-webpack-plugin": "^5.3.16",
83
+ "ts-jest": "29.4.6",
84
+ "webpack": "^5.105.2",
83
85
  "webpack-cli": "6.0.1",
84
86
  "webpack-node-externals": "^3.0.0"
85
87
  },
package/src/TitleCaser.js CHANGED
@@ -1,9 +1,10 @@
1
1
  import {
2
- commonShortWords,
3
- correctTitleCasingList,
4
- correctPhraseCasingList,
2
+ shortWordsList,
3
+ specialTermsList,
4
+ phraseReplacementMap,
5
5
  wordReplacementsList,
6
- titleCaseDefaultOptionsList,
6
+ styleConfigMap,
7
+ REGEX_PATTERNS,
7
8
  } from "./TitleCaserConsts.js";
8
9
 
9
10
  import { TitleCaserUtils } from "./TitleCaserUtils.js";
@@ -13,7 +14,7 @@ export class TitleCaser {
13
14
  this.options = options;
14
15
  this.debug = options.debug || false;
15
16
  this.wordReplacementsList = wordReplacementsList;
16
- this.correctPhraseCasingList = correctPhraseCasingList;
17
+ this.phraseReplacementMap = phraseReplacementMap;
17
18
  }
18
19
 
19
20
  logWarning(message) {
@@ -30,6 +31,9 @@ export class TitleCaser {
30
31
  // ! If input is not a string, throw an error.
31
32
  if (typeof str !== "string") throw new TypeError("Invalid input: input must be a string.");
32
33
 
34
+ // ! Input sanitization: limit length to prevent performance issues
35
+ if (str.length > 100000) throw new TypeError("Invalid input: input exceeds maximum length of 100,000 characters.");
36
+
33
37
  // ! If options is not an object, throw an error.
34
38
  if (typeof this.options !== "undefined" && typeof this.options !== "object")
35
39
  throw new TypeError("Invalid options: options must be an object.");
@@ -37,11 +41,11 @@ export class TitleCaser {
37
41
  const {
38
42
  style = "ap",
39
43
  neverCapitalize = [],
40
- replaceTermList = this.wordReplacementsList,
44
+ wordReplacementsList = this.wordReplacementsList,
41
45
  smartQuotes = false, // Set to false by default
42
46
  } = this.options;
43
47
 
44
- const styleConfig = titleCaseDefaultOptionsList[style] || {};
48
+ const styleConfig = styleConfigMap[style] || {};
45
49
 
46
50
  const ignoreList = ["nl2br", ...neverCapitalize];
47
51
  const {
@@ -51,36 +55,26 @@ export class TitleCaser {
51
55
  neverCapitalizedList,
52
56
  replaceTerms,
53
57
  smartQuotes: mergedSmartQuotes,
54
- } = TitleCaserUtils.getTitleCaseOptions(this.options, commonShortWords, wordReplacementsList);
58
+ } = TitleCaserUtils.getTitleCaseOptions(this.options, shortWordsList, wordReplacementsList);
55
59
 
56
- // Prerocess the replaceTerms array to make it easier to search for.
57
- const replaceTermsArray = replaceTermList.map((term) => Object.keys(term)[0].toLowerCase());
60
+ // Preprocess the replaceTerms array to make it easier to search for.
61
+ const replaceTermsArray = wordReplacementsList.map((term) => Object.keys(term)[0].toLowerCase());
58
62
  // Create an object from the replaceTerms array to make it easier to search for.
59
63
  const replaceTermObj = Object.fromEntries(
60
- replaceTermList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
64
+ wordReplacementsList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
61
65
  );
62
66
 
63
67
  this.logWarning(`replaceTermsArray: ${replaceTermsArray}`);
64
68
  this.logWarning(`this.wordReplacementsList: ${this.wordReplacementsList}`);
65
69
 
66
- const map = {
67
- "&": "&",
68
- "<": "&lt;",
69
- ">": "&gt;",
70
- // '\u2018': '\u2019', // Smart single quote
71
- // '\u201C': '\u201D', // Smart double quote
72
- '"': "&quot;",
73
- "'": "&#039;",
74
- };
75
-
76
70
  // Remove extra spaces and replace <br> tags with a placeholder.
77
71
  let inputString = str.trim();
78
72
 
79
73
  // Replace <br> and <br /> tags with a placeholder.
80
- inputString = inputString.replace(/<\s*br\s*\/?\s*>/gi, " nl2br ");
74
+ inputString = inputString.replace(REGEX_PATTERNS.HTML_BREAK, " nl2br ");
81
75
 
82
- // Remove extra spaces and replace <br> tags with a placeholder.
83
- inputString = inputString.replace(/ {2,}/g, (match) => match.slice(0, 1));
76
+ // Remove extra spaces
77
+ inputString = inputString.replace(REGEX_PATTERNS.MULTIPLE_SPACES, ' ');
84
78
 
85
79
  // Check if the entire input string is uppercase and normalize it to lowercase
86
80
  // before processing if it is. This ensures consistent handling for all-caps text.
@@ -107,9 +101,9 @@ export class TitleCaser {
107
101
  case replaceTermsArray.includes(word.toLowerCase()):
108
102
  // ! If the word is in the replaceTerms array, return the replacement.
109
103
  return replaceTermObj[word.toLowerCase()];
110
- case TitleCaserUtils.isWordInArray(word, correctTitleCasingList):
111
- // ! If the word is in the correctTitleCasingList array, return the correct casing.
112
- return TitleCaserUtils.correctTerm(word, correctTitleCasingList);
104
+ case TitleCaserUtils.isWordInArray(word, specialTermsList):
105
+ // ! If the word is in the specialTermsList array, return the correct casing.
106
+ return TitleCaserUtils.correctTerm(word, specialTermsList);
113
107
  case TitleCaserUtils.isElidedWord(word):
114
108
  // ! If the word is an elided word, return the correct casing.
115
109
  return TitleCaserUtils.normalizeElidedWord(word);
@@ -136,7 +130,7 @@ export class TitleCaser {
136
130
  return processedWord.endsWith(trailingPunctuation) ? processedWord : processedWord + trailingPunctuation;
137
131
  case TitleCaserUtils.hasSuffix(word, style):
138
132
  // ! If the word has a suffix, return the correct casing.
139
- return TitleCaserUtils.correctSuffix(word, correctTitleCasingList);
133
+ return TitleCaserUtils.correctSuffix(word, specialTermsList);
140
134
  case TitleCaserUtils.hasUppercaseIntentional(word):
141
135
  // ! If the word has an intentional uppercase letter, return the correct casing.
142
136
  return word;
@@ -152,7 +146,7 @@ export class TitleCaser {
152
146
  case TitleCaserUtils.endsWithSymbol(word):
153
147
  this.logWarning(`Check if the word ends with a symbol: ${word}`);
154
148
  // ! If the word ends with a symbol, return the correct casing.
155
- const splitWord = word.split(/([.,\/#!$%\^&\*;:{}=\-_`~()?])/g);
149
+ const splitWord = word.split(REGEX_PATTERNS.SPLIT_AT_PUNCTUATION);
156
150
  this.logWarning(`Splitting word at symbols, result: ${splitWord}`);
157
151
  // Process each part for correct casing
158
152
  const processedWords = splitWord.map((part) => {
@@ -164,9 +158,9 @@ export class TitleCaser {
164
158
  } else {
165
159
  this.logWarning(`Part is a word: ${part}`);
166
160
  // ! If it's a word, process it for correct casing
167
- if (TitleCaserUtils.isWordInArray(part, correctTitleCasingList)) {
168
- const correctedTerm = TitleCaserUtils.correctTerm(part, correctTitleCasingList);
169
- this.logWarning(`Word is in correctTitleCasingList, corrected term: ${correctedTerm}`);
161
+ if (TitleCaserUtils.isWordInArray(part, specialTermsList)) {
162
+ const correctedTerm = TitleCaserUtils.correctTerm(part, specialTermsList);
163
+ this.logWarning(`Word is in specialTermsList, corrected term: ${correctedTerm}`);
170
164
  return correctedTerm;
171
165
  } else if (replaceTermsArray.includes(part)) {
172
166
  const replacement = replaceTermObj[part];
@@ -184,7 +178,7 @@ export class TitleCaser {
184
178
  return processedWords.join("");
185
179
  case TitleCaserUtils.startsWithSymbol(word):
186
180
  // ! If the word starts with a symbol, return the correct casing.
187
- return !TitleCaserUtils.isWordInArray(word, correctTitleCasingList)
181
+ return !TitleCaserUtils.isWordInArray(word, specialTermsList)
188
182
  ? word
189
183
  : TitleCaserUtils.correctTerm(word);
190
184
  case TitleCaserUtils.hasRomanNumeral(word):
@@ -211,23 +205,22 @@ export class TitleCaser {
211
205
  inputString = TitleCaserUtils.convertQuotesToCurly(inputString);
212
206
  }
213
207
 
214
- const newWords = inputString.split(" ");
215
- let firstWord = newWords[0];
216
- let secondWord = newWords[1] || null;
217
- let lastWord = newWords[newWords.length - 1];
218
-
219
- for (let i = 0; i < newWords.length; i++) {
220
- const prevWord = i > 0 ? newWords[i - 1] : null;
221
- let currentWord = newWords[i];
222
- const nextWord = i < newWords.length - 1 ? newWords[i + 1] : null;
208
+ const wordsForAcronyms = inputString.split(" ");
209
+ let firstWord = wordsForAcronyms[0];
210
+ let secondWord = wordsForAcronyms[1] || null;
211
+
212
+ for (let i = 0; i < wordsForAcronyms.length; i++) {
213
+ const prevWord = i > 0 ? wordsForAcronyms[i - 1] : null;
214
+ let currentWord = wordsForAcronyms[i];
215
+ const nextWord = i < wordsForAcronyms.length - 1 ? wordsForAcronyms[i + 1] : null;
223
216
 
224
217
  // Capture punctuation at the end of the word
225
- const punctuationMatch = currentWord.match(/[.,!?;:]+$/);
218
+ const punctuationMatch = currentWord.match(REGEX_PATTERNS.TRAILING_PUNCTUATION);
226
219
  let punctuation = "";
227
220
 
228
221
  if (punctuationMatch) {
229
222
  punctuation = punctuationMatch[0];
230
- currentWord = currentWord.replace(/[.,!?;:]+$/, ""); // Remove punctuation at the end
223
+ currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, ""); // Remove punctuation at the end
231
224
  }
232
225
 
233
226
  if (TitleCaserUtils.isRegionalAcronym(currentWord)) {
@@ -242,15 +235,20 @@ export class TitleCaser {
242
235
  if (punctuation !== "") {
243
236
  currentWord = currentWord + punctuation;
244
237
  }
238
+
239
+ // NOTE: Deliberately NOT writing back to wordsForAcronyms[i] here.
240
+ // This first pass does naive acronym detection that creates false positives
241
+ // (e.g., pronoun "us" detected as country "US"). Later loops use more
242
+ // sophisticated context-aware logic to correctly identify regional acronyms.
245
243
  }
246
244
 
247
- inputString = newWords.join(" ");
245
+ inputString = wordsForAcronyms.join(" ");
248
246
 
249
- const newSplit = inputString.split(" ");
250
- for (let i = 1; i < newSplit.length - 1; i++) {
251
- const currentWord = newSplit[i];
252
- const prevWord = newSplit[i - 1];
253
- const nextWord = newSplit[i + 1];
247
+ const wordsForShortWords = inputString.split(" ");
248
+ for (let i = 1; i < wordsForShortWords.length - 1; i++) {
249
+ const currentWord = wordsForShortWords[i];
250
+ const prevWord = wordsForShortWords[i - 1];
251
+ const nextWord = wordsForShortWords[i + 1];
254
252
 
255
253
  if (
256
254
  currentWord === currentWord.toUpperCase() ||
@@ -259,66 +257,52 @@ export class TitleCaser {
259
257
  continue;
260
258
  }
261
259
 
262
- if (TitleCaserUtils.isWordInArray(currentWord, commonShortWords)) {
263
- newSplit[i] =
260
+ if (TitleCaserUtils.isWordInArray(currentWord, shortWordsList)) {
261
+ wordsForShortWords[i] =
264
262
  currentWord.length <= 3
265
263
  ? currentWord.toLowerCase()
266
264
  : currentWord;
267
265
  }
268
266
  }
269
267
 
270
- inputString = newSplit.join(" ");
268
+ inputString = wordsForShortWords.join(" ");
271
269
 
272
- const newSplit2 = inputString.split(" ");
273
- for (let i = 0; i < newSplit2.length; i++) {
274
- let currentWord = newSplit2[i];
275
- let nextWord = newSplit2[i + 1];
276
- let prevWord = newSplit2[i - 1];
277
- if (nextWord !== null && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
278
- newSplit2[i] = currentWord.toUpperCase();
270
+ const wordsForFinalPass = inputString.split(" ");
271
+ for (let i = 0; i < wordsForFinalPass.length; i++) {
272
+ let currentWord = wordsForFinalPass[i];
273
+ let nextWord = wordsForFinalPass[i + 1];
274
+ let prevWord = wordsForFinalPass[i - 1];
275
+ if (nextWord && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
276
+ wordsForFinalPass[i] = currentWord.toUpperCase();
279
277
  }
280
278
  }
281
279
 
282
-
283
-
284
- let finalWord = newSplit2[newSplit2.length - 1];
285
- let wordBeforeFinal = newSplit2[newSplit2.length - 2];
286
- let twoWordsBeforeFinal = newSplit2[newSplit2.length - 3];
280
+ let finalWord = wordsForFinalPass[wordsForFinalPass.length - 1];
281
+ let wordBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 2];
282
+ let twoWordsBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 3];
287
283
 
288
284
  if (TitleCaserUtils.isRegionalAcronym(firstWord)) {
289
- console.log("firstWord is a regional acronym, proof: ", firstWord);
290
- newSplit2[0] = firstWord.toUpperCase();
285
+ this.logWarning(`firstWord is a regional acronym: ${firstWord}`);
286
+ wordsForFinalPass[0] = firstWord.toUpperCase();
291
287
  }
292
288
 
293
289
  if (TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
294
- newSplit2[0] = firstWord.toUpperCase();
290
+ wordsForFinalPass[0] = firstWord.toUpperCase();
295
291
  }
296
292
 
297
293
  if (TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)) {
298
- newSplit2[newSplit2.length - 1] = finalWord.toUpperCase();
294
+ wordsForFinalPass[wordsForFinalPass.length - 1] = finalWord.toUpperCase();
299
295
  }
300
296
 
301
- inputString = newSplit2.join(" ");
297
+ inputString = wordsForFinalPass.join(" ");
302
298
 
303
- for (const [phrase, replacement] of Object.entries(this.correctPhraseCasingList)) {
299
+ for (const [phrase, replacement] of Object.entries(this.phraseReplacementMap)) {
304
300
  // Create a regular expression for case-insensitive matching of the phrase
305
- const regex = new RegExp(phrase.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "gi");
301
+ const regex = new RegExp(phrase.replace(REGEX_PATTERNS.REGEX_ESCAPE, "\\$&"), "gi");
306
302
 
307
303
  // Replace the phrase in the input string with its corresponding replacement
308
304
  inputString = inputString.replace(regex, replacement);
309
305
  }
310
-
311
- function shouldKeepCasing(word) {
312
- // If it's an acronym
313
- if (TitleCaserUtils.isRegionalAcronym(word)) return true;
314
- // If it has known “intentional uppercase” patterns
315
- if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
316
- // If it’s in the brand/correctTitleCasingList
317
- if (TitleCaserUtils.isWordInArray(word, correctTitleCasingList)) return true;
318
-
319
- // Otherwise, no. It's safe to lowercase.
320
- return false;
321
- }
322
306
 
323
307
  // ! Handle sentence case
324
308
  if (styleConfig.caseStyle === "sentence") {
@@ -331,17 +315,17 @@ export class TitleCaser {
331
315
  // 1) The first word: Capitalize first letter only, preserve existing brand/case in the rest
332
316
  if (!firstWordFound && /[A-Za-z]/.test(word)) {
333
317
  // If you want to skip altering brand or acronym, do one more check:
334
- if (!shouldKeepCasing(word)) {
318
+ if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
335
319
  // "Normal" first word
336
320
  words[i] = word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
337
321
  }
338
- // Otherwise, its a brand/acronym, so leave it
322
+ // Otherwise, it's a brand/acronym, so leave it
339
323
  firstWordFound = true;
340
324
  continue;
341
325
  }
342
326
 
343
327
  // 2) For subsequent words, only force-lowercase if we do NOT want to preserve uppercase
344
- if (!shouldKeepCasing(word)) {
328
+ if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
345
329
  words[i] = word.toLowerCase();
346
330
  }
347
331
  // else, we keep it exactly as is
@@ -353,7 +337,12 @@ export class TitleCaser {
353
337
  return inputString;
354
338
 
355
339
  } catch (error) {
356
- throw new Error(error);
340
+ // Preserve original error information
341
+ if (error instanceof Error) {
342
+ throw error;
343
+ } else {
344
+ throw new Error(String(error));
345
+ }
357
346
  }
358
347
  }
359
348
 
@@ -433,7 +422,7 @@ export class TitleCaser {
433
422
  const key = Object.keys(item)[0];
434
423
  const value = item[key];
435
424
  if (typeof key === "string" && typeof value === "string") {
436
- this.correctPhraseCasingList[key] = value;
425
+ this.phraseReplacementMap[key] = value;
437
426
  } else {
438
427
  throw new TypeError("Invalid argument: Each key-value pair must contain strings.");
439
428
  }
@@ -442,7 +431,7 @@ export class TitleCaser {
442
431
  else if (typeof item === "object" && !Array.isArray(item)) {
443
432
  Object.entries(item).forEach(([key, value]) => {
444
433
  if (typeof key === "string" && typeof value === "string") {
445
- this.correctPhraseCasingList[key] = value;
434
+ this.phraseReplacementMap[key] = value;
446
435
  } else {
447
436
  throw new TypeError("Invalid argument: Each key-value pair must contain strings.");
448
437
  }
@@ -454,7 +443,7 @@ export class TitleCaser {
454
443
  }
455
444
  });
456
445
 
457
- this.logWarning(`Log the this.correctPhraseCasingList: ${this.correctPhraseCasingList}`);
446
+ this.logWarning(`Log the this.phraseReplacementMap: ${this.phraseReplacementMap}`);
458
447
  }
459
448
 
460
449
  setStyle(style) {
@@ -464,4 +453,22 @@ export class TitleCaser {
464
453
 
465
454
  this.options.style = style;
466
455
  }
456
+
457
+ /**
458
+ * Determines if a word should keep its existing casing
459
+ * @param {string} word - The word to check
460
+ * @param {Array<string>} specialTermsList - List of terms to preserve
461
+ * @returns {boolean} True if word should keep its casing
462
+ */
463
+ static shouldKeepCasing(word, specialTermsList) {
464
+ // If it's an acronym
465
+ if (TitleCaserUtils.isRegionalAcronym(word)) return true;
466
+ // If it has known "intentional uppercase" patterns
467
+ if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
468
+ // If it's in the brand/specialTermsList
469
+ if (TitleCaserUtils.isWordInArray(word, specialTermsList)) return true;
470
+
471
+ // Otherwise, no. It's safe to lowercase.
472
+ return false;
473
+ }
467
474
  }
@@ -42,9 +42,9 @@ const mergedArray = mergeArrays(
42
42
  militaryTerms,
43
43
  );
44
44
 
45
- export const correctTitleCasingList = mergedArray;
45
+ export const specialTermsList = mergedArray;
46
46
 
47
- export const commonShortWords = [
47
+ export const shortWordsList = [
48
48
  "the",
49
49
  "in",
50
50
  "to",
@@ -91,7 +91,7 @@ export const wordReplacementsList = [
91
91
  // * ! Title Case Styles
92
92
  // * ! ===============================================
93
93
 
94
- export const titleCaseStylesList = Object.freeze({
94
+ export const TITLE_CASE_STYLES = Object.freeze({
95
95
  AP: "ap",
96
96
  APA: "apa",
97
97
  BRITISH: "british",
@@ -100,9 +100,9 @@ export const titleCaseStylesList = Object.freeze({
100
100
  WIKIPEDIA: "wikipedia",
101
101
  });
102
102
 
103
- export const allowedTitleCaseStylesList = Object.values(titleCaseStylesList);
103
+ export const allowedStylesList = Object.values(TITLE_CASE_STYLES);
104
104
 
105
- export const titleCaseDefaultOptionsList = Object.freeze({
105
+ export const styleConfigMap = Object.freeze({
106
106
  ap: {
107
107
  caseStyle: "title",
108
108
  // AP: Capitalize all words 4+ letters and all verbs/adverbs
@@ -246,13 +246,30 @@ export const ignoredWordList = [];
246
246
  // * ! Correct Phrase Casing
247
247
  // * ! ===============================================
248
248
 
249
- export const correctPhraseCasingList = {
249
+ export const phraseReplacementMap = {
250
250
  'the cybersmile foundation': 'The Cybersmile Foundation',
251
251
  'co. by colgate': 'CO. by Colgate',
252
252
  "on & off": "On & Off",
253
253
  "on and off": "On and Off",
254
254
  };
255
255
 
256
+ // * ! ===============================================
257
+ // * ! Regex Patterns
258
+ // * ! ===============================================
259
+
260
+ export const REGEX_PATTERNS = Object.freeze({
261
+ // Punctuation at end of word
262
+ TRAILING_PUNCTUATION: /[.,!?;:]+$/,
263
+ // Split word at punctuation while preserving delimiters
264
+ SPLIT_AT_PUNCTUATION: /([.,\/#!$%\^&\*;:{}=\-_`~()?])/g,
265
+ // Match HTML break tags
266
+ HTML_BREAK: /<\s*br\s*\/?\s*>/gi,
267
+ // Multiple consecutive spaces
268
+ MULTIPLE_SPACES: / {2,}/g,
269
+ // Regex escape special characters
270
+ REGEX_ESCAPE: /[.*+?^${}()|[\]\\]/g,
271
+ });
272
+
256
273
  // * ! ===============================================
257
274
  // * ! Acronym Replacements
258
275
  // * ! ===============================================
@@ -272,12 +289,12 @@ export const regionalAcronymList = [
272
289
  "u.k",
273
290
  ];
274
291
 
275
- export const regionalAcronymPrecedingWords = [
292
+ export const regionalAcronymPrecedingWordsList = [
276
293
  "the", "via", "among", "across", "beyond", "outside",
277
294
  "alongside", "throughout", "despite", "unlike", "upon"
278
295
  ];
279
296
 
280
- export const directFollowingIndicatorsRegionalAcronym = [
297
+ export const regionalAcronymFollowingWordsList = [
281
298
  "act", "acts",
282
299
  "administration", "administrations",
283
300
  "agency", "agencies",