@danielhaim/titlecaser 1.7.12 → 1.7.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/TitleCaser.js CHANGED
@@ -13,8 +13,8 @@ export class TitleCaser {
13
13
  constructor (options = {}) {
14
14
  this.options = options;
15
15
  this.debug = options.debug || false;
16
- this.wordReplacementsList = wordReplacementsList;
17
- this.phraseReplacementMap = phraseReplacementMap;
16
+ this.wordReplacementsList = JSON.parse(JSON.stringify(wordReplacementsList));
17
+ this.phraseReplacementMap = JSON.parse(JSON.stringify(phraseReplacementMap));
18
18
  }
19
19
 
20
20
  logWarning(message) {
@@ -25,12 +25,12 @@ export class TitleCaser {
25
25
 
26
26
  toTitleCase(str) {
27
27
  try {
28
- // ! If input is empty, throw an error.
29
- if (str.trim().length === 0) throw new TypeError("Invalid input: input must not be empty.");
30
-
31
28
  // ! If input is not a string, throw an error.
32
29
  if (typeof str !== "string") throw new TypeError("Invalid input: input must be a string.");
33
30
 
31
+ // ! If input is empty, throw an error.
32
+ if (str.length === 0) throw new TypeError("Invalid input: input must not be empty.");
33
+
34
34
  // ! Input sanitization: limit length to prevent performance issues
35
35
  if (str.length > 100000) throw new TypeError("Invalid input: input exceeds maximum length of 100,000 characters.");
36
36
 
@@ -41,8 +41,9 @@ export class TitleCaser {
41
41
  const {
42
42
  style = "ap",
43
43
  neverCapitalize = [],
44
- replaceTermList = this.wordReplacementsList,
44
+ wordReplacementsList = this.wordReplacementsList,
45
45
  smartQuotes = false, // Set to false by default
46
+ normalizeWhitespace = true,
46
47
  } = this.options;
47
48
 
48
49
  const styleConfig = styleConfigMap[style] || {};
@@ -58,24 +59,21 @@ export class TitleCaser {
58
59
  } = TitleCaserUtils.getTitleCaseOptions(this.options, shortWordsList, wordReplacementsList);
59
60
 
60
61
  // Preprocess the replaceTerms array to make it easier to search for.
61
- const replaceTermsArray = replaceTermList.map((term) => Object.keys(term)[0].toLowerCase());
62
+ const replaceTermsArray = wordReplacementsList.map((term) => Object.keys(term)[0].toLowerCase());
62
63
  // Create an object from the replaceTerms array to make it easier to search for.
63
64
  const replaceTermObj = Object.fromEntries(
64
- replaceTermList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
65
+ wordReplacementsList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
65
66
  );
66
67
 
67
68
  this.logWarning(`replaceTermsArray: ${replaceTermsArray}`);
68
69
  this.logWarning(`this.wordReplacementsList: ${this.wordReplacementsList}`);
69
70
 
70
- // Remove extra spaces and replace <br> tags with a placeholder.
71
- let inputString = str.trim();
71
+ // Normalize HTML breaks and optionally normalize whitespace (see normalizeWhitespace option).
72
+ let inputString = str;
72
73
 
73
74
  // Replace <br> and <br /> tags with a placeholder.
74
75
  inputString = inputString.replace(REGEX_PATTERNS.HTML_BREAK, " nl2br ");
75
76
 
76
- // Remove extra spaces
77
- inputString = inputString.replace(REGEX_PATTERNS.MULTIPLE_SPACES, ' ');
78
-
79
77
  // Check if the entire input string is uppercase and normalize it to lowercase
80
78
  // before processing if it is. This ensures consistent handling for all-caps text.
81
79
  const isEntireStringUppercase = TitleCaserUtils.isEntirelyUppercase(inputString.replace(/[^a-zA-Z]/g, ''));
@@ -84,10 +82,14 @@ export class TitleCaser {
84
82
  inputString = inputString.toLowerCase();
85
83
  }
86
84
 
87
- // Split the string into an array of words.
88
- const words = inputString.split(" ");
85
+ // Tokenize preserving whitespace
86
+ const tokens = inputString.split(/(\s+)/);
87
+
88
+ const wordsInTitleCase = tokens.map((token, i) => {
89
+ if (!token || /^\s+$/.test(token)) return token;
90
+
91
+ const word = token;
89
92
 
90
- const wordsInTitleCase = words.map((word, i) => {
91
93
  switch (true) {
92
94
  case TitleCaserUtils.isWordAmpersand(word):
93
95
  // ! if the word is an ampersand, return it as is.
@@ -135,8 +137,18 @@ export class TitleCaser {
135
137
  // ! If the word has an intentional uppercase letter, return the correct casing.
136
138
  return word;
137
139
  case TitleCaserUtils.isShortWord(word, style) && i !== 0:
138
- // ! If the word is a short word, return the correct casing.
139
- const isAtEndOfSentence = i > 0 && TitleCaserUtils.endsWithSymbol(words[i - 1], [":", "?", "!", "."]);
140
+ // Find previous non-whitespace token
141
+ let prevToken = null;
142
+ for (let j = i - 1; j >= 0; j--) {
143
+ if (!/^\s+$/.test(tokens[j])) {
144
+ prevToken = tokens[j];
145
+ break;
146
+ }
147
+ }
148
+
149
+ const isAtEndOfSentence =
150
+ prevToken && TitleCaserUtils.endsWithSymbol(prevToken, [":", "?", "!", "."]);
151
+
140
152
  if (isAtEndOfSentence) {
141
153
  return word.charAt(0).toUpperCase() + word.slice(1);
142
154
  }
@@ -194,7 +206,7 @@ export class TitleCaser {
194
206
  });
195
207
 
196
208
  // Join the words in the array into a string.
197
- inputString = wordsInTitleCase.join(" ");
209
+ inputString = wordsInTitleCase.join("");
198
210
 
199
211
  // Replace the nl2br placeholder with <br> tags.
200
212
  inputString = inputString.replace(/nl2br/gi, "<br>");
@@ -205,46 +217,61 @@ export class TitleCaser {
205
217
  inputString = TitleCaserUtils.convertQuotesToCurly(inputString);
206
218
  }
207
219
 
208
- const wordsForAcronyms = inputString.split(" ");
209
- let firstWord = wordsForAcronyms[0];
210
- let secondWord = wordsForAcronyms[1] || null;
211
-
220
+ const wordsForAcronyms = inputString.split(/(\s+)/);
221
+
222
+ // Extract non-whitespace words for first/second detection
223
+
224
+ // Extract non-whitespace words for first/second detection
225
+ const nonWhitespaceWords = wordsForAcronyms.filter(t => !/^\s+$/.test(t));
226
+ let firstWord = nonWhitespaceWords[0] || null;
227
+ let secondWord = nonWhitespaceWords[1] || null;
228
+
212
229
  for (let i = 0; i < wordsForAcronyms.length; i++) {
213
- const prevWord = i > 0 ? wordsForAcronyms[i - 1] : null;
230
+
231
+ if (/^\s+$/.test(wordsForAcronyms[i])) continue;
232
+
233
+ // Find previous non-whitespace word
234
+ let prevWord = null;
235
+ for (let j = i - 1; j >= 0; j--) {
236
+ if (!/^\s+$/.test(wordsForAcronyms[j])) {
237
+ prevWord = wordsForAcronyms[j];
238
+ break;
239
+ }
240
+ }
241
+
242
+ // Find next non-whitespace word
243
+ let nextWord = null;
244
+ for (let j = i + 1; j < wordsForAcronyms.length; j++) {
245
+ if (!/^\s+$/.test(wordsForAcronyms[j])) {
246
+ nextWord = wordsForAcronyms[j];
247
+ break;
248
+ }
249
+ }
250
+
214
251
  let currentWord = wordsForAcronyms[i];
215
- const nextWord = i < wordsForAcronyms.length - 1 ? wordsForAcronyms[i + 1] : null;
216
252
 
217
- // Capture punctuation at the end of the word
218
253
  const punctuationMatch = currentWord.match(REGEX_PATTERNS.TRAILING_PUNCTUATION);
219
254
  let punctuation = "";
220
255
 
221
256
  if (punctuationMatch) {
222
257
  punctuation = punctuationMatch[0];
223
- currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, ""); // Remove punctuation at the end
224
- }
225
-
226
- if (TitleCaserUtils.isRegionalAcronym(currentWord)) {
227
- currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
258
+ currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
228
259
  }
229
260
 
230
- if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord)) {
261
+ if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
231
262
  currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
232
263
  }
233
264
 
234
- // if punctuation is not empty, add it to the end of the word
235
265
  if (punctuation !== "") {
236
266
  currentWord = currentWord + punctuation;
237
267
  }
238
-
239
- // NOTE: Deliberately NOT writing back to wordsForAcronyms[i] here.
240
- // This first pass does naive acronym detection that creates false positives
241
- // (e.g., pronoun "us" detected as country "US"). Later loops use more
242
- // sophisticated context-aware logic to correctly identify regional acronyms.
268
+
269
+ wordsForAcronyms[i] = currentWord;
243
270
  }
244
271
 
245
- inputString = wordsForAcronyms.join(" ");
272
+ inputString = wordsForAcronyms.join("");
246
273
 
247
- const wordsForShortWords = inputString.split(" ");
274
+ const wordsForShortWords = inputString.split(/(\s+)/);
248
275
  for (let i = 1; i < wordsForShortWords.length - 1; i++) {
249
276
  const currentWord = wordsForShortWords[i];
250
277
  const prevWord = wordsForShortWords[i - 1];
@@ -265,36 +292,62 @@ export class TitleCaser {
265
292
  }
266
293
  }
267
294
 
268
- inputString = wordsForShortWords.join(" ");
295
+ inputString = wordsForShortWords.join("");
269
296
 
270
- const wordsForFinalPass = inputString.split(" ");
297
+ const wordsForFinalPass = inputString.split(/(\s+)/);
271
298
  for (let i = 0; i < wordsForFinalPass.length; i++) {
299
+
300
+ if (/^\s+$/.test(wordsForFinalPass[i])) continue;
301
+
272
302
  let currentWord = wordsForFinalPass[i];
273
- let nextWord = wordsForFinalPass[i + 1];
274
- let prevWord = wordsForFinalPass[i - 1];
303
+
304
+ // Find previous non-whitespace word
305
+ let prevWord = null;
306
+ for (let j = i - 1; j >= 0; j--) {
307
+ if (!/^\s+$/.test(wordsForFinalPass[j])) {
308
+ prevWord = wordsForFinalPass[j];
309
+ break;
310
+ }
311
+ }
312
+
313
+ // Find next non-whitespace word
314
+ let nextWord = null;
315
+ for (let j = i + 1; j < wordsForFinalPass.length; j++) {
316
+ if (!/^\s+$/.test(wordsForFinalPass[j])) {
317
+ nextWord = wordsForFinalPass[j];
318
+ break;
319
+ }
320
+ }
321
+
275
322
  if (nextWord && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
276
323
  wordsForFinalPass[i] = currentWord.toUpperCase();
277
324
  }
278
325
  }
279
326
 
280
- let finalWord = wordsForFinalPass[wordsForFinalPass.length - 1];
281
- let wordBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 2];
282
- let twoWordsBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 3];
283
-
284
- if (TitleCaserUtils.isRegionalAcronym(firstWord)) {
327
+ const nonWhitespaceFinal = wordsForFinalPass.filter(t => !/^\s+$/.test(t));
328
+
329
+ let finalWord = nonWhitespaceFinal[nonWhitespaceFinal.length - 1];
330
+ let wordBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 2];
331
+ let twoWordsBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 3];
332
+
333
+ if (firstWord && TitleCaserUtils.isRegionalAcronym(firstWord)) {
285
334
  this.logWarning(`firstWord is a regional acronym: ${firstWord}`);
286
335
  wordsForFinalPass[0] = firstWord.toUpperCase();
287
336
  }
288
337
 
289
- if (TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
338
+ if (firstWord && secondWord && TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
290
339
  wordsForFinalPass[0] = firstWord.toUpperCase();
291
340
  }
292
341
 
293
- if (TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)) {
342
+ if (
343
+ finalWord &&
344
+ wordBeforeFinal &&
345
+ TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)
346
+ ) {
294
347
  wordsForFinalPass[wordsForFinalPass.length - 1] = finalWord.toUpperCase();
295
348
  }
296
349
 
297
- inputString = wordsForFinalPass.join(" ");
350
+ inputString = wordsForFinalPass.join("");
298
351
 
299
352
  for (const [phrase, replacement] of Object.entries(this.phraseReplacementMap)) {
300
353
  // Create a regular expression for case-insensitive matching of the phrase
@@ -303,15 +356,15 @@ export class TitleCaser {
303
356
  // Replace the phrase in the input string with its corresponding replacement
304
357
  inputString = inputString.replace(regex, replacement);
305
358
  }
306
-
359
+
307
360
  // ! Handle sentence case
308
361
  if (styleConfig.caseStyle === "sentence") {
309
- const words = inputString.split(" ");
362
+ const words = inputString.split(/(\s+)/);
310
363
  let firstWordFound = false;
311
-
364
+
312
365
  for (let i = 0; i < words.length; i++) {
313
366
  let word = words[i];
314
-
367
+
315
368
  // 1) The first word: Capitalize first letter only, preserve existing brand/case in the rest
316
369
  if (!firstWordFound && /[A-Za-z]/.test(word)) {
317
370
  // If you want to skip altering brand or acronym, do one more check:
@@ -323,15 +376,21 @@ export class TitleCaser {
323
376
  firstWordFound = true;
324
377
  continue;
325
378
  }
326
-
379
+
327
380
  // 2) For subsequent words, only force-lowercase if we do NOT want to preserve uppercase
328
381
  if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
329
382
  words[i] = word.toLowerCase();
330
383
  }
331
384
  // else, we keep it exactly as is
332
385
  }
333
-
334
- inputString = words.join(" ");
386
+
387
+ inputString = words.join("");
388
+ }
389
+
390
+ if (normalizeWhitespace) {
391
+ inputString = inputString
392
+ .replace(/\s+/g, " ")
393
+ .trim();
335
394
  }
336
395
 
337
396
  return inputString;
@@ -368,6 +427,11 @@ export class TitleCaser {
368
427
  }
369
428
  });
370
429
 
430
+ // Added check to prevent excessive number of replacement rules which could lead to performance issues
431
+ if (this.wordReplacementsList.length > 2000) {
432
+ throw new Error("Too many replacement rules.");
433
+ }
434
+
371
435
  this.options.wordReplacementsList = this.wordReplacementsList;
372
436
 
373
437
  this.logWarning(`Log the updated this.wordReplacementsList: ${this.wordReplacementsList}`);
@@ -386,6 +450,10 @@ export class TitleCaser {
386
450
  this.wordReplacementsList.push({ [term]: replacement });
387
451
  }
388
452
 
453
+ if (this.wordReplacementsList.length > 2000) {
454
+ throw new Error("Too many replacement rules.");
455
+ }
456
+
389
457
  this.options.wordReplacementsList = this.wordReplacementsList;
390
458
  }
391
459
 
@@ -467,7 +535,7 @@ export class TitleCaser {
467
535
  if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
468
536
  // If it's in the brand/specialTermsList
469
537
  if (TitleCaserUtils.isWordInArray(word, specialTermsList)) return true;
470
-
538
+
471
539
  // Otherwise, no. It's safe to lowercase.
472
540
  return false;
473
541
  }
@@ -7,7 +7,7 @@ import {
7
7
  shortWordsList,
8
8
  regionalAcronymList,
9
9
  regionalAcronymPrecedingWordsList,
10
- regionalAcronymFollowingWordsList
10
+ regionalAcronymFollowingWordsList,
11
11
  } from "./TitleCaserConsts.js";
12
12
 
13
13
  export class TitleCaserUtils {
@@ -65,7 +65,7 @@ export class TitleCaserUtils {
65
65
  // Create a unique key for the cache using a faster approach than JSON.stringify
66
66
  const style = options.style || "ap";
67
67
  const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
68
- const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(',') : ''}`;
68
+ const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
69
69
 
70
70
  // If the cache already has an entry for this key, return the cached options
71
71
  if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
@@ -80,22 +80,13 @@ export class TitleCaserUtils {
80
80
 
81
81
  // Merge the default articles with user-provided articles and lowercase words
82
82
  // Using Set for O(n) deduplication instead of O(n²) filter+indexOf
83
- const mergedArticles = [...new Set([
84
- ...mergedOptions.articlesList,
85
- ...lowercaseWords
86
- ])];
83
+ const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
87
84
 
88
85
  // Merge the default short conjunctions with user-provided conjunctions and lowercase words
89
- const mergedShortConjunctions = [...new Set([
90
- ...mergedOptions.shortConjunctionsList,
91
- ...lowercaseWords
92
- ])];
86
+ const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
93
87
 
94
88
  // Merge the default short prepositions with user-provided prepositions and lowercase words
95
- const mergedShortPrepositions = [...new Set([
96
- ...mergedOptions.shortPrepositionsList,
97
- ...lowercaseWords
98
- ])];
89
+ const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
99
90
 
100
91
  // Merge the default word replacements with the user-provided replacements
101
92
  const mergedReplaceTerms = [
@@ -240,9 +231,7 @@ export class TitleCaserUtils {
240
231
 
241
232
  // Check if the entire input string is uppercase
242
233
  static isEntirelyUppercase(str) {
243
- return str === str.toUpperCase() &&
244
- str !== str.toLowerCase() &&
245
- str.length > 1;
234
+ return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
246
235
  }
247
236
 
248
237
  static isRegionalAcronym(word) {
@@ -259,27 +248,25 @@ export class TitleCaserUtils {
259
248
  }
260
249
 
261
250
  static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
262
- if (typeof word !== 'string' || typeof nextWord !== 'string') {
251
+ if (typeof word !== "string" || typeof nextWord !== "string") {
263
252
  return false;
264
253
  }
265
254
 
266
255
  const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
267
256
  const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
268
257
 
269
- const smallDirectPrecedingIndicators = [
270
- "the",
271
- ];
258
+ const smallDirectPrecedingIndicators = ["the"];
272
259
 
273
- if (prevWord &&
260
+ if (
261
+ prevWord &&
274
262
  regionalAcronymList.includes(firstWordStripped) &&
275
- smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
276
-
277
- return true;
263
+ smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
264
+ ) {
265
+ return true;
278
266
  }
279
267
 
280
268
  return (
281
- regionalAcronymList.includes(firstWordStripped) &&
282
- regionalAcronymFollowingWordsList.includes(nextWordStripped)
269
+ regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
283
270
  );
284
271
  }
285
272
 
@@ -288,9 +275,7 @@ export class TitleCaserUtils {
288
275
 
289
276
  const current = word.toLowerCase().replace(/[^\w]/g, "");
290
277
  const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
291
- const prevPrev = typeof prevPrevWord === "string"
292
- ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
293
- : null;
278
+ const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
294
279
 
295
280
  if (!regionalAcronymList.includes(current)) return false;
296
281
 
@@ -321,19 +306,9 @@ export class TitleCaserUtils {
321
306
  if (!word || !style || !styleConfigMap[style]) return false;
322
307
 
323
308
  const lowerWord = word.toLowerCase();
324
- const {
325
- shortConjunctionsList,
326
- articlesList,
327
- shortPrepositionsList,
328
- neverCapitalizedList
329
- } = styleConfigMap[style];
330
-
331
- const combinedList = [
332
- ...shortConjunctionsList,
333
- ...articlesList,
334
- ...shortPrepositionsList,
335
- ...neverCapitalizedList
336
- ];
309
+ const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
310
+
311
+ const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
337
312
 
338
313
  return combinedList.includes(lowerWord) ? word : false;
339
314
  }
@@ -585,9 +560,7 @@ export class TitleCaserUtils {
585
560
  throw new TypeError("Invalid input: word must be a non-empty string.");
586
561
  }
587
562
 
588
- const knownElidedPrefixes = new Set([
589
- "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
590
- ]);
563
+ const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
591
564
 
592
565
  const normalized = word.trim().toLowerCase().replace(/'/g, "’");
593
566
 
@@ -606,9 +579,7 @@ export class TitleCaserUtils {
606
579
  throw new TypeError("Invalid input: word must be a non-empty string.");
607
580
  }
608
581
 
609
- const knownElidedPrefixes = new Set([
610
- "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
611
- ]);
582
+ const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
612
583
 
613
584
  const original = word.trim();
614
585
  const normalized = original.replace(/'/g, "’").toLowerCase();
@@ -619,9 +590,7 @@ export class TitleCaserUtils {
619
590
  const rest = original.slice(prefixLength);
620
591
 
621
592
  const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
622
- const fixedRest = rest.length > 0
623
- ? rest.charAt(0).toUpperCase() + rest.slice(1)
624
- : "";
593
+ const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
625
594
 
626
595
  return fixedPrefix + fixedRest;
627
596
  }
@@ -716,88 +685,88 @@ export class TitleCaserUtils {
716
685
  return parts.join(joiner);
717
686
  }
718
687
 
719
- // This function is used to check if a word is in the correct terms list
720
- static correctTermHyphenated(word, style) {
721
- // Split the word into an array of words
722
- const hyphenatedWords = word.split("-");
723
-
724
- // Define functions to process words
725
- const capitalizeFirst = (word) => word.charAt(0).toUpperCase() + word.slice(1);
726
- const lowercaseRest = (word) => word.charAt(0) + word.slice(1).toLowerCase();
727
-
728
- // Define the style-specific processing functions
729
- const styleFunctions = {
730
- ap: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
731
- chicago: capitalizeFirst,
732
- apa: (word, index, length) => {
733
- if (TitleCaserUtils.isShortWord(word, style) && index > 0 && index < length - 1) {
734
- return word.toLowerCase();
735
- } else {
736
- return capitalizeFirst(word);
737
- }
738
- },
739
- nyt: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
740
- wikipedia: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
741
- };
688
+ // This function is used to check if a word is in the correct terms list
689
+ static correctTermHyphenated(word, style) {
690
+ // Split the word into an array of words (supports -, –, —)
691
+ const dashMatch = word.match(/[-–—]/);
692
+ if (!dashMatch) return word;
693
+
694
+ const dash = dashMatch[0];
695
+ const hyphenatedWords = word.split(/[-–—]/);
696
+
697
+ // Detect if ANY segment is a regional acronym
698
+ const containsRegionalAcronym = hyphenatedWords.some((segment) =>
699
+ regionalAcronymList.includes(
700
+ segment.toLowerCase().replace(/[^\w]/g, "")
701
+ )
702
+ );
703
+
704
+ // Define functions to process words
705
+ const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
706
+ const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
707
+
708
+ // Define the style-specific processing functions
709
+ const styleFunctions = {
710
+ ap: (w, index) => {
711
+ // If compound contains acronym → headline-style compound
712
+ if (containsRegionalAcronym) {
713
+ return capitalizeFirst(w);
714
+ }
715
+ return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
716
+ },
717
+ chicago: capitalizeFirst,
718
+ apa: (w, index, length) => {
719
+ if (
720
+ !containsRegionalAcronym &&
721
+ TitleCaserUtils.isShortWord(w, style) &&
722
+ index > 0 &&
723
+ index < length - 1
724
+ ) {
725
+ return w.toLowerCase();
726
+ }
727
+ return capitalizeFirst(w);
728
+ },
729
+ nyt: capitalizeFirst,
730
+ wikipedia: (w, index) =>
731
+ index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
732
+ };
742
733
 
743
- // Get the style-specific processing function
744
- const processWord = styleFunctions[style] || lowercaseRest;
734
+ const processWord = styleFunctions[style] || lowercaseRest;
745
735
 
746
- // Process each word
747
- const processedWords = hyphenatedWords.map((word, i) => {
748
- let correctedWord = word;
736
+ const processedWords = hyphenatedWords.map((segment, i) => {
737
+ let correctedWord = segment;
749
738
 
750
- const romanNumeralApostropheSRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'s$/i;
751
- if (romanNumeralApostropheSRegex.test(word)) {
752
- const updatedWord = correctedWord.toUpperCase().replace(/'S$/, "'s");
753
- // Uppercase the Roman numeral part and concatenate back with 's
754
- return updatedWord;
755
- }
739
+ const normalizedSegment = segment
740
+ .toLowerCase()
741
+ .replace(/[^\w]/g, "");
756
742
 
757
- // Check if the word is a Roman numeral
758
- const romanNumeralRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
759
- if (romanNumeralRegex.test(word)) {
760
- return word.toUpperCase();
761
- }
743
+ // Normalize acronym casing
744
+ if (regionalAcronymList.includes(normalizedSegment)) {
745
+ return segment.toUpperCase();
746
+ }
762
747
 
763
- // Preserve the original word
764
-
765
- // Check if the word contains an apostrophe
766
- const hasApostrophe = word.includes("'");
767
- if (hasApostrophe) {
768
- // Split the word at the apostrophe
769
- const wordParts = word.split("'");
770
- // Check each part for Roman numerals
771
- const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
772
- if (isRomanNumeral) {
773
- // Uppercase each Roman numeral part and join back with apostrophe
774
- correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
775
- return correctedWord;
776
- } else {
777
- return processWord(correctedWord, i, hyphenatedWords.length);
778
- }
779
- }
748
+ // Roman numeral logic
749
+ const romanNumeralRegex =
750
+ /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
780
751
 
781
- // Check if the word is in the list of words to preserve
782
- const lowerCaseWord = word.toLowerCase();
783
- const uniqueTermsIndex = specialTermsList.findIndex((w) => w.toLowerCase() === lowerCaseWord);
784
- if (uniqueTermsIndex >= 0) {
785
- correctedWord = specialTermsList[uniqueTermsIndex];
786
- }
787
- // Check if the word is a possessive form
788
- else if (lowerCaseWord.endsWith("'s")) {
789
- const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
790
- const rootWordIndex = specialTermsList.findIndex((w) => w.toLowerCase() === rootWord);
791
- if (rootWordIndex >= 0) {
792
- correctedWord = `${specialTermsList[rootWordIndex]}'s`;
793
- }
794
- }
752
+ if (romanNumeralRegex.test(segment)) {
753
+ return segment.toUpperCase();
754
+ }
795
755
 
796
- // Process the word
797
- return processWord(correctedWord, i, hyphenatedWords.length);
798
- });
756
+ // Preserve special terms
757
+ const lowerCaseWord = segment.toLowerCase();
758
+ const uniqueTermsIndex = specialTermsList.findIndex(
759
+ (w) => w.toLowerCase() === lowerCaseWord
760
+ );
761
+
762
+ if (uniqueTermsIndex >= 0) {
763
+ correctedWord = specialTermsList[uniqueTermsIndex];
764
+ }
765
+
766
+ return processWord(correctedWord, i, hyphenatedWords.length);
767
+ });
768
+
769
+ return processedWords.join(dash);
770
+ }
799
771
 
800
- // Rejoin the words
801
- return processedWords.join("-");
802
- }
803
772
  }