@danielhaim/titlecaser 1.7.10 → 1.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  import {
2
- allowedTitleCaseStylesList,
3
- titleCaseDefaultOptionsList,
2
+ allowedStylesList,
3
+ styleConfigMap,
4
4
  wordReplacementsList,
5
- correctTitleCasingList,
5
+ specialTermsList,
6
6
  ignoredWordList,
7
- commonShortWords,
7
+ shortWordsList,
8
8
  regionalAcronymList,
9
- regionalAcronymPrecedingWords,
10
- directFollowingIndicatorsRegionalAcronym
9
+ regionalAcronymPrecedingWordsList,
10
+ regionalAcronymFollowingWordsList,
11
11
  } from "./TitleCaserConsts.js";
12
12
 
13
13
  export class TitleCaserUtils {
@@ -32,7 +32,7 @@ export class TitleCaserUtils {
32
32
  if (key === "style") {
33
33
  if (typeof options.style !== "string") {
34
34
  throw new TypeError(`Invalid option: ${key} must be a string`);
35
- } else if (!allowedTitleCaseStylesList.includes(options.style)) {
35
+ } else if (!allowedStylesList.includes(options.style)) {
36
36
  throw new TypeError(`Invalid option: ${key} must be a string`);
37
37
  }
38
38
  continue;
@@ -51,22 +51,21 @@ export class TitleCaserUtils {
51
51
  continue;
52
52
  }
53
53
 
54
- if (!titleCaseDefaultOptionsList.hasOwnProperty(key)) {
54
+ if (!styleConfigMap.hasOwnProperty(key)) {
55
55
  throw new TypeError(`Invalid option: ${key}`);
56
56
  }
57
57
 
58
- this.TitleCaseValidator.validateOption(key, options[key]);
58
+ TitleCaserUtils.validateOption(key, options[key]);
59
59
  }
60
60
  }
61
61
 
62
62
  static titleCaseOptionsCache = new Map();
63
63
 
64
64
  static getTitleCaseOptions(options = {}, lowercaseWords = []) {
65
- // Create a unique key for the cache that combines the options and the lowercase words
66
- const cacheKey = JSON.stringify({
67
- options,
68
- lowercaseWords,
69
- });
65
+ // Create a unique key for the cache using a faster approach than JSON.stringify
66
+ const style = options.style || "ap";
67
+ const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
68
+ const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
70
69
 
71
70
  // If the cache already has an entry for this key, return the cached options
72
71
  if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
@@ -74,25 +73,20 @@ export class TitleCaserUtils {
74
73
  }
75
74
 
76
75
  const mergedOptions = {
77
- ...titleCaseDefaultOptionsList[options.style || "ap"],
76
+ ...styleConfigMap[options.style || "ap"],
78
77
  ...options,
79
78
  smartQuotes: options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false,
80
79
  };
81
80
 
82
81
  // Merge the default articles with user-provided articles and lowercase words
83
- const mergedArticles = mergedOptions.articlesList
84
- .concat(lowercaseWords)
85
- .filter((word, index, array) => array.indexOf(word) === index);
82
+ // Using Set for O(n) deduplication instead of O(n²) filter+indexOf
83
+ const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
86
84
 
87
85
  // Merge the default short conjunctions with user-provided conjunctions and lowercase words
88
- const mergedShortConjunctions = mergedOptions.shortConjunctionsList
89
- .concat(lowercaseWords)
90
- .filter((word, index, array) => array.indexOf(word) === index);
86
+ const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
91
87
 
92
88
  // Merge the default short prepositions with user-provided prepositions and lowercase words
93
- const mergedShortPrepositions = mergedOptions.shortPrepositionsList
94
- .concat(lowercaseWords)
95
- .filter((word, index, array) => array.indexOf(word) === index);
89
+ const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
96
90
 
97
91
  // Merge the default word replacements with the user-provided replacements
98
92
  const mergedReplaceTerms = [
@@ -186,8 +180,8 @@ export class TitleCaserUtils {
186
180
  }
187
181
 
188
182
  // If the style is not one of the allowed styles, throw an Error.
189
- if (!allowedTitleCaseStylesList.includes(style)) {
190
- throw new Error(`Invalid option: style must be one of ${allowedTitleCaseStylesList.join(", ")}.`);
183
+ if (!allowedStylesList.includes(style)) {
184
+ throw new Error(`Invalid option: style must be one of ${allowedStylesList.join(", ")}.`);
191
185
  }
192
186
 
193
187
  // If the word is a short conjunction, article, preposition, or is in the never-capitalized list, return true.
@@ -237,9 +231,7 @@ export class TitleCaserUtils {
237
231
 
238
232
  // Check if the entire input string is uppercase
239
233
  static isEntirelyUppercase(str) {
240
- return str === str.toUpperCase() &&
241
- str !== str.toLowerCase() &&
242
- str.length > 1;
234
+ return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
243
235
  }
244
236
 
245
237
  static isRegionalAcronym(word) {
@@ -256,27 +248,25 @@ export class TitleCaserUtils {
256
248
  }
257
249
 
258
250
  static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
259
- if (typeof word !== 'string' || typeof nextWord !== 'string') {
251
+ if (typeof word !== "string" || typeof nextWord !== "string") {
260
252
  return false;
261
253
  }
262
254
 
263
255
  const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
264
256
  const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
265
257
 
266
- const smallDirectPrecedingIndicators = [
267
- "the",
268
- ];
258
+ const smallDirectPrecedingIndicators = ["the"];
269
259
 
270
- if (prevWord &&
260
+ if (
261
+ prevWord &&
271
262
  regionalAcronymList.includes(firstWordStripped) &&
272
- smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
273
-
274
- return true;
263
+ smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
264
+ ) {
265
+ return true;
275
266
  }
276
267
 
277
268
  return (
278
- regionalAcronymList.includes(firstWordStripped) &&
279
- directFollowingIndicatorsRegionalAcronym.includes(nextWordStripped)
269
+ regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
280
270
  );
281
271
  }
282
272
 
@@ -285,17 +275,15 @@ export class TitleCaserUtils {
285
275
 
286
276
  const current = word.toLowerCase().replace(/[^\w]/g, "");
287
277
  const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
288
- const prevPrev = typeof prevPrevWord === "string"
289
- ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
290
- : null;
278
+ const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
291
279
 
292
280
  if (!regionalAcronymList.includes(current)) return false;
293
281
 
294
282
  // Direct 100% safe word before the acronym
295
- if (regionalAcronymPrecedingWords.includes(prev)) return true;
283
+ if (regionalAcronymPrecedingWordsList.includes(prev)) return true;
296
284
 
297
285
  // Extended pattern: e.g., "from the US"
298
- if (prev === "the" && prevPrev && regionalAcronymPrecedingWords.includes(prevPrev)) {
286
+ if (prev === "the" && prevPrev && regionalAcronymPrecedingWordsList.includes(prevPrev)) {
299
287
  return true;
300
288
  }
301
289
 
@@ -315,22 +303,12 @@ export class TitleCaserUtils {
315
303
  }
316
304
 
317
305
  static normalizeCasingForWordByStyle(word, style) {
318
- if (!word || !style || !titleCaseDefaultOptionsList[style]) return false;
306
+ if (!word || !style || !styleConfigMap[style]) return false;
319
307
 
320
308
  const lowerWord = word.toLowerCase();
321
- const {
322
- shortConjunctionsList,
323
- articlesList,
324
- shortPrepositionsList,
325
- neverCapitalizedList
326
- } = titleCaseDefaultOptionsList[style];
327
-
328
- const combinedList = [
329
- ...shortConjunctionsList,
330
- ...articlesList,
331
- ...shortPrepositionsList,
332
- ...neverCapitalizedList
333
- ];
309
+ const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
310
+
311
+ const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
334
312
 
335
313
  return combinedList.includes(lowerWord) ? word : false;
336
314
  }
@@ -582,9 +560,7 @@ export class TitleCaserUtils {
582
560
  throw new TypeError("Invalid input: word must be a non-empty string.");
583
561
  }
584
562
 
585
- const knownElidedPrefixes = new Set([
586
- "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
587
- ]);
563
+ const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
588
564
 
589
565
  const normalized = word.trim().toLowerCase().replace(/'/g, "’");
590
566
 
@@ -603,9 +579,7 @@ export class TitleCaserUtils {
603
579
  throw new TypeError("Invalid input: word must be a non-empty string.");
604
580
  }
605
581
 
606
- const knownElidedPrefixes = new Set([
607
- "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
608
- ]);
582
+ const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
609
583
 
610
584
  const original = word.trim();
611
585
  const normalized = original.replace(/'/g, "’").toLowerCase();
@@ -616,9 +590,7 @@ export class TitleCaserUtils {
616
590
  const rest = original.slice(prefixLength);
617
591
 
618
592
  const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
619
- const fixedRest = rest.length > 0
620
- ? rest.charAt(0).toUpperCase() + rest.slice(1)
621
- : "";
593
+ const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
622
594
 
623
595
  return fixedPrefix + fixedRest;
624
596
  }
@@ -713,88 +685,88 @@ export class TitleCaserUtils {
713
685
  return parts.join(joiner);
714
686
  }
715
687
 
716
- // This function is used to check if a word is in the correct terms list
717
- static correctTermHyphenated(word, style) {
718
- // Split the word into an array of words
719
- const hyphenatedWords = word.split("-");
720
-
721
- // Define functions to process words
722
- const capitalizeFirst = (word) => word.charAt(0).toUpperCase() + word.slice(1);
723
- const lowercaseRest = (word) => word.charAt(0) + word.slice(1).toLowerCase();
724
-
725
- // Define the style-specific processing functions
726
- const styleFunctions = {
727
- ap: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
728
- chicago: capitalizeFirst,
729
- apa: (word, index, length) => {
730
- if (TitleCaserUtils.isShortWord(word, style) && index > 0 && index < length - 1) {
731
- return word.toLowerCase();
732
- } else {
733
- return capitalizeFirst(word);
734
- }
735
- },
736
- nyt: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
737
- wikipedia: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
738
- };
688
+ // This function is used to check if a word is in the correct terms list
689
+ static correctTermHyphenated(word, style) {
690
+ // Split the word into an array of words (supports -, –, —)
691
+ const dashMatch = word.match(/[-–—]/);
692
+ if (!dashMatch) return word;
693
+
694
+ const dash = dashMatch[0];
695
+ const hyphenatedWords = word.split(/[-–—]/);
696
+
697
+ // Detect if ANY segment is a regional acronym
698
+ const containsRegionalAcronym = hyphenatedWords.some((segment) =>
699
+ regionalAcronymList.includes(
700
+ segment.toLowerCase().replace(/[^\w]/g, "")
701
+ )
702
+ );
703
+
704
+ // Define functions to process words
705
+ const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
706
+ const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
707
+
708
+ // Define the style-specific processing functions
709
+ const styleFunctions = {
710
+ ap: (w, index) => {
711
+ // If compound contains acronym → headline-style compound
712
+ if (containsRegionalAcronym) {
713
+ return capitalizeFirst(w);
714
+ }
715
+ return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
716
+ },
717
+ chicago: capitalizeFirst,
718
+ apa: (w, index, length) => {
719
+ if (
720
+ !containsRegionalAcronym &&
721
+ TitleCaserUtils.isShortWord(w, style) &&
722
+ index > 0 &&
723
+ index < length - 1
724
+ ) {
725
+ return w.toLowerCase();
726
+ }
727
+ return capitalizeFirst(w);
728
+ },
729
+ nyt: capitalizeFirst,
730
+ wikipedia: (w, index) =>
731
+ index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
732
+ };
739
733
 
740
- // Get the style-specific processing function
741
- const processWord = styleFunctions[style] || lowercaseRest;
734
+ const processWord = styleFunctions[style] || lowercaseRest;
742
735
 
743
- // Process each word
744
- const processedWords = hyphenatedWords.map((word, i) => {
745
- let correctedWord = word;
736
+ const processedWords = hyphenatedWords.map((segment, i) => {
737
+ let correctedWord = segment;
746
738
 
747
- const romanNumeralApostropheSRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'s$/i;
748
- if (romanNumeralApostropheSRegex.test(word)) {
749
- const updatedWord = correctedWord.toUpperCase().replace(/'S$/, "'s");
750
- // Uppercase the Roman numeral part and concatenate back with 's
751
- return updatedWord;
752
- }
739
+ const normalizedSegment = segment
740
+ .toLowerCase()
741
+ .replace(/[^\w]/g, "");
753
742
 
754
- // Check if the word is a Roman numeral
755
- const romanNumeralRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
756
- if (romanNumeralRegex.test(word)) {
757
- return word.toUpperCase();
758
- }
743
+ // Normalize acronym casing
744
+ if (regionalAcronymList.includes(normalizedSegment)) {
745
+ return segment.toUpperCase();
746
+ }
759
747
 
760
- // Preserve the original word
761
-
762
- // Check if the word contains an apostrophe
763
- const hasApostrophe = word.includes("'");
764
- if (hasApostrophe) {
765
- // Split the word at the apostrophe
766
- const wordParts = word.split("'");
767
- // Check each part for Roman numerals
768
- const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
769
- if (isRomanNumeral) {
770
- // Uppercase each Roman numeral part and join back with apostrophe
771
- correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
772
- return correctedWord;
773
- } else {
774
- return processWord(correctedWord, i, hyphenatedWords.length);
775
- }
776
- }
748
+ // Roman numeral logic
749
+ const romanNumeralRegex =
750
+ /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
777
751
 
778
- // Check if the word is in the list of words to preserve
779
- const lowerCaseWord = word.toLowerCase();
780
- const uniqueTermsIndex = correctTitleCasingList.findIndex((w) => w.toLowerCase() === lowerCaseWord);
781
- if (uniqueTermsIndex >= 0) {
782
- correctedWord = correctTitleCasingList[uniqueTermsIndex];
783
- }
784
- // Check if the word is a possessive form
785
- else if (lowerCaseWord.endsWith("'s")) {
786
- const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
787
- const rootWordIndex = correctTitleCasingList.findIndex((w) => w.toLowerCase() === rootWord);
788
- if (rootWordIndex >= 0) {
789
- correctedWord = `${correctTitleCasingList[rootWordIndex]}'s`;
790
- }
791
- }
752
+ if (romanNumeralRegex.test(segment)) {
753
+ return segment.toUpperCase();
754
+ }
792
755
 
793
- // Process the word
794
- return processWord(correctedWord, i, hyphenatedWords.length);
795
- });
756
+ // Preserve special terms
757
+ const lowerCaseWord = segment.toLowerCase();
758
+ const uniqueTermsIndex = specialTermsList.findIndex(
759
+ (w) => w.toLowerCase() === lowerCaseWord
760
+ );
761
+
762
+ if (uniqueTermsIndex >= 0) {
763
+ correctedWord = specialTermsList[uniqueTermsIndex];
764
+ }
765
+
766
+ return processWord(correctedWord, i, hyphenatedWords.length);
767
+ });
768
+
769
+ return processedWords.join(dash);
770
+ }
796
771
 
797
- // Rejoin the words
798
- return processedWords.join("-");
799
- }
800
772
  }