@danielhaim/titlecaser 1.7.13 → 1.7.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/TitleCaser.js CHANGED
@@ -13,8 +13,8 @@ export class TitleCaser {
13
13
  constructor (options = {}) {
14
14
  this.options = options;
15
15
  this.debug = options.debug || false;
16
- this.wordReplacementsList = wordReplacementsList;
17
- this.phraseReplacementMap = phraseReplacementMap;
16
+ this.wordReplacementsList = JSON.parse(JSON.stringify(wordReplacementsList));
17
+ this.phraseReplacementMap = JSON.parse(JSON.stringify(phraseReplacementMap));
18
18
  }
19
19
 
20
20
  logWarning(message) {
@@ -25,12 +25,12 @@ export class TitleCaser {
25
25
 
26
26
  toTitleCase(str) {
27
27
  try {
28
- // ! If input is empty, throw an error.
29
- if (str.trim().length === 0) throw new TypeError("Invalid input: input must not be empty.");
30
-
31
28
  // ! If input is not a string, throw an error.
32
29
  if (typeof str !== "string") throw new TypeError("Invalid input: input must be a string.");
33
30
 
31
+ // ! If input is empty, throw an error.
32
+ if (str.length === 0) throw new TypeError("Invalid input: input must not be empty.");
33
+
34
34
  // ! Input sanitization: limit length to prevent performance issues
35
35
  if (str.length > 100000) throw new TypeError("Invalid input: input exceeds maximum length of 100,000 characters.");
36
36
 
@@ -43,6 +43,7 @@ export class TitleCaser {
43
43
  neverCapitalize = [],
44
44
  wordReplacementsList = this.wordReplacementsList,
45
45
  smartQuotes = false, // Set to false by default
46
+ normalizeWhitespace = true,
46
47
  } = this.options;
47
48
 
48
49
  const styleConfig = styleConfigMap[style] || {};
@@ -67,15 +68,12 @@ export class TitleCaser {
67
68
  this.logWarning(`replaceTermsArray: ${replaceTermsArray}`);
68
69
  this.logWarning(`this.wordReplacementsList: ${this.wordReplacementsList}`);
69
70
 
70
- // Remove extra spaces and replace <br> tags with a placeholder.
71
- let inputString = str.trim();
71
+ // Normalize HTML breaks and optionally normalize whitespace (see normalizeWhitespace option).
72
+ let inputString = str;
72
73
 
73
74
  // Replace <br> and <br /> tags with a placeholder.
74
75
  inputString = inputString.replace(REGEX_PATTERNS.HTML_BREAK, " nl2br ");
75
76
 
76
- // Remove extra spaces
77
- inputString = inputString.replace(REGEX_PATTERNS.MULTIPLE_SPACES, ' ');
78
-
79
77
  // Check if the entire input string is uppercase and normalize it to lowercase
80
78
  // before processing if it is. This ensures consistent handling for all-caps text.
81
79
  const isEntireStringUppercase = TitleCaserUtils.isEntirelyUppercase(inputString.replace(/[^a-zA-Z]/g, ''));
@@ -84,10 +82,14 @@ export class TitleCaser {
84
82
  inputString = inputString.toLowerCase();
85
83
  }
86
84
 
87
- // Split the string into an array of words.
88
- const words = inputString.split(" ");
85
+ // Tokenize preserving whitespace
86
+ const tokens = inputString.split(/(\s+)/);
87
+
88
+ const wordsInTitleCase = tokens.map((token, i) => {
89
+ if (!token || /^\s+$/.test(token)) return token;
90
+
91
+ const word = token;
89
92
 
90
- const wordsInTitleCase = words.map((word, i) => {
91
93
  switch (true) {
92
94
  case TitleCaserUtils.isWordAmpersand(word):
93
95
  // ! if the word is an ampersand, return it as is.
@@ -135,8 +137,18 @@ export class TitleCaser {
135
137
  // ! If the word has an intentional uppercase letter, return the correct casing.
136
138
  return word;
137
139
  case TitleCaserUtils.isShortWord(word, style) && i !== 0:
138
- // ! If the word is a short word, return the correct casing.
139
- const isAtEndOfSentence = i > 0 && TitleCaserUtils.endsWithSymbol(words[i - 1], [":", "?", "!", "."]);
140
+ // Find previous non-whitespace token
141
+ let prevToken = null;
142
+ for (let j = i - 1; j >= 0; j--) {
143
+ if (!/^\s+$/.test(tokens[j])) {
144
+ prevToken = tokens[j];
145
+ break;
146
+ }
147
+ }
148
+
149
+ const isAtEndOfSentence =
150
+ prevToken && TitleCaserUtils.endsWithSymbol(prevToken, [":", "?", "!", "."]);
151
+
140
152
  if (isAtEndOfSentence) {
141
153
  return word.charAt(0).toUpperCase() + word.slice(1);
142
154
  }
@@ -194,7 +206,7 @@ export class TitleCaser {
194
206
  });
195
207
 
196
208
  // Join the words in the array into a string.
197
- inputString = wordsInTitleCase.join(" ");
209
+ inputString = wordsInTitleCase.join("");
198
210
 
199
211
  // Replace the nl2br placeholder with <br> tags.
200
212
  inputString = inputString.replace(/nl2br/gi, "<br>");
@@ -205,46 +217,61 @@ export class TitleCaser {
205
217
  inputString = TitleCaserUtils.convertQuotesToCurly(inputString);
206
218
  }
207
219
 
208
- const wordsForAcronyms = inputString.split(" ");
209
- let firstWord = wordsForAcronyms[0];
210
- let secondWord = wordsForAcronyms[1] || null;
211
-
220
+ const wordsForAcronyms = inputString.split(/(\s+)/);
221
+
222
+ // Extract non-whitespace words for first/second detection
223
+
224
+ // Extract non-whitespace words for first/second detection
225
+ const nonWhitespaceWords = wordsForAcronyms.filter(t => !/^\s+$/.test(t));
226
+ let firstWord = nonWhitespaceWords[0] || null;
227
+ let secondWord = nonWhitespaceWords[1] || null;
228
+
212
229
  for (let i = 0; i < wordsForAcronyms.length; i++) {
213
- const prevWord = i > 0 ? wordsForAcronyms[i - 1] : null;
230
+
231
+ if (/^\s+$/.test(wordsForAcronyms[i])) continue;
232
+
233
+ // Find previous non-whitespace word
234
+ let prevWord = null;
235
+ for (let j = i - 1; j >= 0; j--) {
236
+ if (!/^\s+$/.test(wordsForAcronyms[j])) {
237
+ prevWord = wordsForAcronyms[j];
238
+ break;
239
+ }
240
+ }
241
+
242
+ // Find next non-whitespace word
243
+ let nextWord = null;
244
+ for (let j = i + 1; j < wordsForAcronyms.length; j++) {
245
+ if (!/^\s+$/.test(wordsForAcronyms[j])) {
246
+ nextWord = wordsForAcronyms[j];
247
+ break;
248
+ }
249
+ }
250
+
214
251
  let currentWord = wordsForAcronyms[i];
215
- const nextWord = i < wordsForAcronyms.length - 1 ? wordsForAcronyms[i + 1] : null;
216
252
 
217
- // Capture punctuation at the end of the word
218
253
  const punctuationMatch = currentWord.match(REGEX_PATTERNS.TRAILING_PUNCTUATION);
219
254
  let punctuation = "";
220
255
 
221
256
  if (punctuationMatch) {
222
257
  punctuation = punctuationMatch[0];
223
- currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, ""); // Remove punctuation at the end
224
- }
225
-
226
- if (TitleCaserUtils.isRegionalAcronym(currentWord)) {
227
- currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
258
+ currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
228
259
  }
229
260
 
230
- if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord)) {
261
+ if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
231
262
  currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
232
263
  }
233
264
 
234
- // if punctuation is not empty, add it to the end of the word
235
265
  if (punctuation !== "") {
236
266
  currentWord = currentWord + punctuation;
237
267
  }
238
-
239
- // NOTE: Deliberately NOT writing back to wordsForAcronyms[i] here.
240
- // This first pass does naive acronym detection that creates false positives
241
- // (e.g., pronoun "us" detected as country "US"). Later loops use more
242
- // sophisticated context-aware logic to correctly identify regional acronyms.
268
+
269
+ wordsForAcronyms[i] = currentWord;
243
270
  }
244
271
 
245
- inputString = wordsForAcronyms.join(" ");
272
+ inputString = wordsForAcronyms.join("");
246
273
 
247
- const wordsForShortWords = inputString.split(" ");
274
+ const wordsForShortWords = inputString.split(/(\s+)/);
248
275
  for (let i = 1; i < wordsForShortWords.length - 1; i++) {
249
276
  const currentWord = wordsForShortWords[i];
250
277
  const prevWord = wordsForShortWords[i - 1];
@@ -265,36 +292,62 @@ export class TitleCaser {
265
292
  }
266
293
  }
267
294
 
268
- inputString = wordsForShortWords.join(" ");
295
+ inputString = wordsForShortWords.join("");
269
296
 
270
- const wordsForFinalPass = inputString.split(" ");
297
+ const wordsForFinalPass = inputString.split(/(\s+)/);
271
298
  for (let i = 0; i < wordsForFinalPass.length; i++) {
299
+
300
+ if (/^\s+$/.test(wordsForFinalPass[i])) continue;
301
+
272
302
  let currentWord = wordsForFinalPass[i];
273
- let nextWord = wordsForFinalPass[i + 1];
274
- let prevWord = wordsForFinalPass[i - 1];
303
+
304
+ // Find previous non-whitespace word
305
+ let prevWord = null;
306
+ for (let j = i - 1; j >= 0; j--) {
307
+ if (!/^\s+$/.test(wordsForFinalPass[j])) {
308
+ prevWord = wordsForFinalPass[j];
309
+ break;
310
+ }
311
+ }
312
+
313
+ // Find next non-whitespace word
314
+ let nextWord = null;
315
+ for (let j = i + 1; j < wordsForFinalPass.length; j++) {
316
+ if (!/^\s+$/.test(wordsForFinalPass[j])) {
317
+ nextWord = wordsForFinalPass[j];
318
+ break;
319
+ }
320
+ }
321
+
275
322
  if (nextWord && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
276
323
  wordsForFinalPass[i] = currentWord.toUpperCase();
277
324
  }
278
325
  }
279
326
 
280
- let finalWord = wordsForFinalPass[wordsForFinalPass.length - 1];
281
- let wordBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 2];
282
- let twoWordsBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 3];
283
-
284
- if (TitleCaserUtils.isRegionalAcronym(firstWord)) {
327
+ const nonWhitespaceFinal = wordsForFinalPass.filter(t => !/^\s+$/.test(t));
328
+
329
+ let finalWord = nonWhitespaceFinal[nonWhitespaceFinal.length - 1];
330
+ let wordBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 2];
331
+ let twoWordsBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 3];
332
+
333
+ if (firstWord && TitleCaserUtils.isRegionalAcronym(firstWord)) {
285
334
  this.logWarning(`firstWord is a regional acronym: ${firstWord}`);
286
335
  wordsForFinalPass[0] = firstWord.toUpperCase();
287
336
  }
288
337
 
289
- if (TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
338
+ if (firstWord && secondWord && TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
290
339
  wordsForFinalPass[0] = firstWord.toUpperCase();
291
340
  }
292
341
 
293
- if (TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)) {
342
+ if (
343
+ finalWord &&
344
+ wordBeforeFinal &&
345
+ TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)
346
+ ) {
294
347
  wordsForFinalPass[wordsForFinalPass.length - 1] = finalWord.toUpperCase();
295
348
  }
296
349
 
297
- inputString = wordsForFinalPass.join(" ");
350
+ inputString = wordsForFinalPass.join("");
298
351
 
299
352
  for (const [phrase, replacement] of Object.entries(this.phraseReplacementMap)) {
300
353
  // Create a regular expression for case-insensitive matching of the phrase
@@ -303,15 +356,15 @@ export class TitleCaser {
303
356
  // Replace the phrase in the input string with its corresponding replacement
304
357
  inputString = inputString.replace(regex, replacement);
305
358
  }
306
-
359
+
307
360
  // ! Handle sentence case
308
361
  if (styleConfig.caseStyle === "sentence") {
309
- const words = inputString.split(" ");
362
+ const words = inputString.split(/(\s+)/);
310
363
  let firstWordFound = false;
311
-
364
+
312
365
  for (let i = 0; i < words.length; i++) {
313
366
  let word = words[i];
314
-
367
+
315
368
  // 1) The first word: Capitalize first letter only, preserve existing brand/case in the rest
316
369
  if (!firstWordFound && /[A-Za-z]/.test(word)) {
317
370
  // If you want to skip altering brand or acronym, do one more check:
@@ -323,15 +376,21 @@ export class TitleCaser {
323
376
  firstWordFound = true;
324
377
  continue;
325
378
  }
326
-
379
+
327
380
  // 2) For subsequent words, only force-lowercase if we do NOT want to preserve uppercase
328
381
  if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
329
382
  words[i] = word.toLowerCase();
330
383
  }
331
384
  // else, we keep it exactly as is
332
385
  }
333
-
334
- inputString = words.join(" ");
386
+
387
+ inputString = words.join("");
388
+ }
389
+
390
+ if (normalizeWhitespace) {
391
+ inputString = inputString
392
+ .replace(/\s+/g, " ")
393
+ .trim();
335
394
  }
336
395
 
337
396
  return inputString;
@@ -368,6 +427,11 @@ export class TitleCaser {
368
427
  }
369
428
  });
370
429
 
430
+ // Added check to prevent excessive number of replacement rules which could lead to performance issues
431
+ if (this.wordReplacementsList.length > 2000) {
432
+ throw new Error("Too many replacement rules.");
433
+ }
434
+
371
435
  this.options.wordReplacementsList = this.wordReplacementsList;
372
436
 
373
437
  this.logWarning(`Log the updated this.wordReplacementsList: ${this.wordReplacementsList}`);
@@ -386,6 +450,10 @@ export class TitleCaser {
386
450
  this.wordReplacementsList.push({ [term]: replacement });
387
451
  }
388
452
 
453
+ if (this.wordReplacementsList.length > 2000) {
454
+ throw new Error("Too many replacement rules.");
455
+ }
456
+
389
457
  this.options.wordReplacementsList = this.wordReplacementsList;
390
458
  }
391
459
 
@@ -467,7 +535,7 @@ export class TitleCaser {
467
535
  if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
468
536
  // If it's in the brand/specialTermsList
469
537
  if (TitleCaserUtils.isWordInArray(word, specialTermsList)) return true;
470
-
538
+
471
539
  // Otherwise, no. It's safe to lowercase.
472
540
  return false;
473
541
  }