@danielhaim/titlecaser 1.7.12 → 1.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import {
7
7
  shortWordsList,
8
8
  regionalAcronymList,
9
9
  regionalAcronymPrecedingWordsList,
10
- regionalAcronymFollowingWordsList
10
+ regionalAcronymFollowingWordsList,
11
11
  } from "./TitleCaserConsts.js";
12
12
 
13
13
  export class TitleCaserUtils {
@@ -65,7 +65,7 @@ export class TitleCaserUtils {
65
65
  // Create a unique key for the cache using a faster approach than JSON.stringify
66
66
  const style = options.style || "ap";
67
67
  const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
68
- const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(',') : ''}`;
68
+ const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
69
69
 
70
70
  // If the cache already has an entry for this key, return the cached options
71
71
  if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
@@ -80,22 +80,13 @@ export class TitleCaserUtils {
80
80
 
81
81
  // Merge the default articles with user-provided articles and lowercase words
82
82
  // Using Set for O(n) deduplication instead of O(n²) filter+indexOf
83
- const mergedArticles = [...new Set([
84
- ...mergedOptions.articlesList,
85
- ...lowercaseWords
86
- ])];
83
+ const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
87
84
 
88
85
  // Merge the default short conjunctions with user-provided conjunctions and lowercase words
89
- const mergedShortConjunctions = [...new Set([
90
- ...mergedOptions.shortConjunctionsList,
91
- ...lowercaseWords
92
- ])];
86
+ const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
93
87
 
94
88
  // Merge the default short prepositions with user-provided prepositions and lowercase words
95
- const mergedShortPrepositions = [...new Set([
96
- ...mergedOptions.shortPrepositionsList,
97
- ...lowercaseWords
98
- ])];
89
+ const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
99
90
 
100
91
  // Merge the default word replacements with the user-provided replacements
101
92
  const mergedReplaceTerms = [
@@ -240,9 +231,7 @@ export class TitleCaserUtils {
240
231
 
241
232
  // Check if the entire input string is uppercase
242
233
  static isEntirelyUppercase(str) {
243
- return str === str.toUpperCase() &&
244
- str !== str.toLowerCase() &&
245
- str.length > 1;
234
+ return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
246
235
  }
247
236
 
248
237
  static isRegionalAcronym(word) {
@@ -259,27 +248,25 @@ export class TitleCaserUtils {
259
248
  }
260
249
 
261
250
  static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
262
- if (typeof word !== 'string' || typeof nextWord !== 'string') {
251
+ if (typeof word !== "string" || typeof nextWord !== "string") {
263
252
  return false;
264
253
  }
265
254
 
266
255
  const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
267
256
  const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
268
257
 
269
- const smallDirectPrecedingIndicators = [
270
- "the",
271
- ];
258
+ const smallDirectPrecedingIndicators = ["the"];
272
259
 
273
- if (prevWord &&
260
+ if (
261
+ prevWord &&
274
262
  regionalAcronymList.includes(firstWordStripped) &&
275
- smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
276
-
277
- return true;
263
+ smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
264
+ ) {
265
+ return true;
278
266
  }
279
267
 
280
268
  return (
281
- regionalAcronymList.includes(firstWordStripped) &&
282
- regionalAcronymFollowingWordsList.includes(nextWordStripped)
269
+ regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
283
270
  );
284
271
  }
285
272
 
@@ -288,9 +275,7 @@ export class TitleCaserUtils {
288
275
 
289
276
  const current = word.toLowerCase().replace(/[^\w]/g, "");
290
277
  const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
291
- const prevPrev = typeof prevPrevWord === "string"
292
- ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
293
- : null;
278
+ const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
294
279
 
295
280
  if (!regionalAcronymList.includes(current)) return false;
296
281
 
@@ -321,19 +306,9 @@ export class TitleCaserUtils {
321
306
  if (!word || !style || !styleConfigMap[style]) return false;
322
307
 
323
308
  const lowerWord = word.toLowerCase();
324
- const {
325
- shortConjunctionsList,
326
- articlesList,
327
- shortPrepositionsList,
328
- neverCapitalizedList
329
- } = styleConfigMap[style];
330
-
331
- const combinedList = [
332
- ...shortConjunctionsList,
333
- ...articlesList,
334
- ...shortPrepositionsList,
335
- ...neverCapitalizedList
336
- ];
309
+ const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
310
+
311
+ const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
337
312
 
338
313
  return combinedList.includes(lowerWord) ? word : false;
339
314
  }
@@ -585,9 +560,7 @@ export class TitleCaserUtils {
585
560
  throw new TypeError("Invalid input: word must be a non-empty string.");
586
561
  }
587
562
 
588
- const knownElidedPrefixes = new Set([
589
- "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
590
- ]);
563
+ const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
591
564
 
592
565
  const normalized = word.trim().toLowerCase().replace(/'/g, "’");
593
566
 
@@ -606,9 +579,7 @@ export class TitleCaserUtils {
606
579
  throw new TypeError("Invalid input: word must be a non-empty string.");
607
580
  }
608
581
 
609
- const knownElidedPrefixes = new Set([
610
- "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
611
- ]);
582
+ const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
612
583
 
613
584
  const original = word.trim();
614
585
  const normalized = original.replace(/'/g, "’").toLowerCase();
@@ -619,9 +590,7 @@ export class TitleCaserUtils {
619
590
  const rest = original.slice(prefixLength);
620
591
 
621
592
  const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
622
- const fixedRest = rest.length > 0
623
- ? rest.charAt(0).toUpperCase() + rest.slice(1)
624
- : "";
593
+ const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
625
594
 
626
595
  return fixedPrefix + fixedRest;
627
596
  }
@@ -716,88 +685,88 @@ export class TitleCaserUtils {
716
685
  return parts.join(joiner);
717
686
  }
718
687
 
719
- // This function is used to check if a word is in the correct terms list
720
- static correctTermHyphenated(word, style) {
721
- // Split the word into an array of words
722
- const hyphenatedWords = word.split("-");
723
-
724
- // Define functions to process words
725
- const capitalizeFirst = (word) => word.charAt(0).toUpperCase() + word.slice(1);
726
- const lowercaseRest = (word) => word.charAt(0) + word.slice(1).toLowerCase();
727
-
728
- // Define the style-specific processing functions
729
- const styleFunctions = {
730
- ap: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
731
- chicago: capitalizeFirst,
732
- apa: (word, index, length) => {
733
- if (TitleCaserUtils.isShortWord(word, style) && index > 0 && index < length - 1) {
734
- return word.toLowerCase();
735
- } else {
736
- return capitalizeFirst(word);
737
- }
738
- },
739
- nyt: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
740
- wikipedia: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
741
- };
688
+ // This function is used to check if a word is in the correct terms list
689
+ static correctTermHyphenated(word, style) {
690
+ // Split the word into an array of words (supports -, –, —)
691
+ const dashMatch = word.match(/[-–—]/);
692
+ if (!dashMatch) return word;
693
+
694
+ const dash = dashMatch[0];
695
+ const hyphenatedWords = word.split(/[-–—]/);
696
+
697
+ // Detect if ANY segment is a regional acronym
698
+ const containsRegionalAcronym = hyphenatedWords.some((segment) =>
699
+ regionalAcronymList.includes(
700
+ segment.toLowerCase().replace(/[^\w]/g, "")
701
+ )
702
+ );
703
+
704
+ // Define functions to process words
705
+ const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
706
+ const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
707
+
708
+ // Define the style-specific processing functions
709
+ const styleFunctions = {
710
+ ap: (w, index) => {
711
+ // If compound contains acronym → headline-style compound
712
+ if (containsRegionalAcronym) {
713
+ return capitalizeFirst(w);
714
+ }
715
+ return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
716
+ },
717
+ chicago: capitalizeFirst,
718
+ apa: (w, index, length) => {
719
+ if (
720
+ !containsRegionalAcronym &&
721
+ TitleCaserUtils.isShortWord(w, style) &&
722
+ index > 0 &&
723
+ index < length - 1
724
+ ) {
725
+ return w.toLowerCase();
726
+ }
727
+ return capitalizeFirst(w);
728
+ },
729
+ nyt: capitalizeFirst,
730
+ wikipedia: (w, index) =>
731
+ index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
732
+ };
742
733
 
743
- // Get the style-specific processing function
744
- const processWord = styleFunctions[style] || lowercaseRest;
734
+ const processWord = styleFunctions[style] || lowercaseRest;
745
735
 
746
- // Process each word
747
- const processedWords = hyphenatedWords.map((word, i) => {
748
- let correctedWord = word;
736
+ const processedWords = hyphenatedWords.map((segment, i) => {
737
+ let correctedWord = segment;
749
738
 
750
- const romanNumeralApostropheSRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'s$/i;
751
- if (romanNumeralApostropheSRegex.test(word)) {
752
- const updatedWord = correctedWord.toUpperCase().replace(/'S$/, "'s");
753
- // Uppercase the Roman numeral part and concatenate back with 's
754
- return updatedWord;
755
- }
739
+ const normalizedSegment = segment
740
+ .toLowerCase()
741
+ .replace(/[^\w]/g, "");
756
742
 
757
- // Check if the word is a Roman numeral
758
- const romanNumeralRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
759
- if (romanNumeralRegex.test(word)) {
760
- return word.toUpperCase();
761
- }
743
+ // Normalize acronym casing
744
+ if (regionalAcronymList.includes(normalizedSegment)) {
745
+ return segment.toUpperCase();
746
+ }
762
747
 
763
- // Preserve the original word
764
-
765
- // Check if the word contains an apostrophe
766
- const hasApostrophe = word.includes("'");
767
- if (hasApostrophe) {
768
- // Split the word at the apostrophe
769
- const wordParts = word.split("'");
770
- // Check each part for Roman numerals
771
- const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
772
- if (isRomanNumeral) {
773
- // Uppercase each Roman numeral part and join back with apostrophe
774
- correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
775
- return correctedWord;
776
- } else {
777
- return processWord(correctedWord, i, hyphenatedWords.length);
778
- }
779
- }
748
+ // Roman numeral logic
749
+ const romanNumeralRegex =
750
+ /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
780
751
 
781
- // Check if the word is in the list of words to preserve
782
- const lowerCaseWord = word.toLowerCase();
783
- const uniqueTermsIndex = specialTermsList.findIndex((w) => w.toLowerCase() === lowerCaseWord);
784
- if (uniqueTermsIndex >= 0) {
785
- correctedWord = specialTermsList[uniqueTermsIndex];
786
- }
787
- // Check if the word is a possessive form
788
- else if (lowerCaseWord.endsWith("'s")) {
789
- const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
790
- const rootWordIndex = specialTermsList.findIndex((w) => w.toLowerCase() === rootWord);
791
- if (rootWordIndex >= 0) {
792
- correctedWord = `${specialTermsList[rootWordIndex]}'s`;
793
- }
794
- }
752
+ if (romanNumeralRegex.test(segment)) {
753
+ return segment.toUpperCase();
754
+ }
795
755
 
796
- // Process the word
797
- return processWord(correctedWord, i, hyphenatedWords.length);
798
- });
756
+ // Preserve special terms
757
+ const lowerCaseWord = segment.toLowerCase();
758
+ const uniqueTermsIndex = specialTermsList.findIndex(
759
+ (w) => w.toLowerCase() === lowerCaseWord
760
+ );
761
+
762
+ if (uniqueTermsIndex >= 0) {
763
+ correctedWord = specialTermsList[uniqueTermsIndex];
764
+ }
765
+
766
+ return processWord(correctedWord, i, hyphenatedWords.length);
767
+ });
768
+
769
+ return processedWords.join(dash);
770
+ }
799
771
 
800
- // Rejoin the words
801
- return processedWords.join("-");
802
- }
803
772
  }