npm - @danielhaim/titlecaser - Versions diffs - 1.7.10 → 1.7.13 - Mend

@danielhaim/titlecaser 1.7.10 → 1.7.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +174 -407
package/dist/titlecaser.amd.js +3 -3
package/dist/titlecaser.esm.js +3 -3
package/dist/titlecaser.module.js +3 -3
package/index.d.ts +22 -22
package/package.json +18 -16
package/src/TitleCaser.js +98 -91
package/src/TitleCaserConsts.js +25 -8
package/src/TitleCaserUtils.js +115 -143

package/src/TitleCaserUtils.js CHANGED Viewed

@@ -1,13 +1,13 @@
 import {
-  allowedTitleCaseStylesList,
-  titleCaseDefaultOptionsList,
+  allowedStylesList,
+  styleConfigMap,
   wordReplacementsList,
-  correctTitleCasingList,
+  specialTermsList,
   ignoredWordList,
-  commonShortWords,
+  shortWordsList,
   regionalAcronymList,
-  regionalAcronymPrecedingWords,
-  directFollowingIndicatorsRegionalAcronym
+  regionalAcronymPrecedingWordsList,
+  regionalAcronymFollowingWordsList,
 } from "./TitleCaserConsts.js";
 export class TitleCaserUtils {
@@ -32,7 +32,7 @@ export class TitleCaserUtils {
       if (key === "style") {
         if (typeof options.style !== "string") {
           throw new TypeError(`Invalid option: ${key} must be a string`);
-        } else if (!allowedTitleCaseStylesList.includes(options.style)) {
+        } else if (!allowedStylesList.includes(options.style)) {
           throw new TypeError(`Invalid option: ${key} must be a string`);
         }
         continue;
@@ -51,22 +51,21 @@ export class TitleCaserUtils {
         continue;
       }
-      if (!titleCaseDefaultOptionsList.hasOwnProperty(key)) {
+      if (!styleConfigMap.hasOwnProperty(key)) {
         throw new TypeError(`Invalid option: ${key}`);
       }
-      this.TitleCaseValidator.validateOption(key, options[key]);
+      TitleCaserUtils.validateOption(key, options[key]);
     }
   }
   static titleCaseOptionsCache = new Map();
   static getTitleCaseOptions(options = {}, lowercaseWords = []) {
-    // Create a unique key for the cache that combines the options and the lowercase words
-    const cacheKey = JSON.stringify({
-      options,
-      lowercaseWords,
-    });
+    // Create a unique key for the cache using a faster approach than JSON.stringify
+    const style = options.style || "ap";
+    const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
+    const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
     // If the cache already has an entry for this key, return the cached options
     if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
@@ -74,25 +73,20 @@ export class TitleCaserUtils {
     }
     const mergedOptions = {
-      ...titleCaseDefaultOptionsList[options.style || "ap"],
+      ...styleConfigMap[options.style || "ap"],
       ...options,
       smartQuotes: options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false,
     };
     // Merge the default articles with user-provided articles and lowercase words
-    const mergedArticles = mergedOptions.articlesList
-      .concat(lowercaseWords)
-      .filter((word, index, array) => array.indexOf(word) === index);
+    // Using Set for O(n) deduplication instead of O(n²) filter+indexOf
+    const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
     // Merge the default short conjunctions with user-provided conjunctions and lowercase words
-    const mergedShortConjunctions = mergedOptions.shortConjunctionsList
-      .concat(lowercaseWords)
-      .filter((word, index, array) => array.indexOf(word) === index);
+    const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
     // Merge the default short prepositions with user-provided prepositions and lowercase words
-    const mergedShortPrepositions = mergedOptions.shortPrepositionsList
-      .concat(lowercaseWords)
-      .filter((word, index, array) => array.indexOf(word) === index);
+    const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
     // Merge the default word replacements with the user-provided replacements
     const mergedReplaceTerms = [
@@ -186,8 +180,8 @@ export class TitleCaserUtils {
     }
     // If the style is not one of the allowed styles, throw an Error.
-    if (!allowedTitleCaseStylesList.includes(style)) {
-      throw new Error(`Invalid option: style must be one of ${allowedTitleCaseStylesList.join(", ")}.`);
+    if (!allowedStylesList.includes(style)) {
+      throw new Error(`Invalid option: style must be one of ${allowedStylesList.join(", ")}.`);
     }
     // If the word is a short conjunction, article, preposition, or is in the never-capitalized list, return true.
@@ -237,9 +231,7 @@ export class TitleCaserUtils {
   // Check if the entire input string is uppercase
   static isEntirelyUppercase(str) {
-    return str === str.toUpperCase() &&
-      str !== str.toLowerCase() &&
-      str.length > 1;
+    return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
   }
   static isRegionalAcronym(word) {
@@ -256,27 +248,25 @@ export class TitleCaserUtils {
   }
   static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
-    if (typeof word !== 'string' || typeof nextWord !== 'string') {
+    if (typeof word !== "string" || typeof nextWord !== "string") {
       return false;
     }
     const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
     const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
-    const smallDirectPrecedingIndicators = [
-      "the",
-    ];
+    const smallDirectPrecedingIndicators = ["the"];
-    if (prevWord &&
+    if (
+      prevWord &&
       regionalAcronymList.includes(firstWordStripped) &&
-      smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
-        return true;
+      smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
+    ) {
+      return true;
     }
     return (
-      regionalAcronymList.includes(firstWordStripped) &&
-      directFollowingIndicatorsRegionalAcronym.includes(nextWordStripped)
+      regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
     );
   }
@@ -285,17 +275,15 @@ export class TitleCaserUtils {
     const current = word.toLowerCase().replace(/[^\w]/g, "");
     const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
-    const prevPrev = typeof prevPrevWord === "string"
-      ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
-      : null;
+    const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
     if (!regionalAcronymList.includes(current)) return false;
     // Direct 100% safe word before the acronym
-    if (regionalAcronymPrecedingWords.includes(prev)) return true;
+    if (regionalAcronymPrecedingWordsList.includes(prev)) return true;
     // Extended pattern: e.g., "from the US"
-    if (prev === "the" && prevPrev && regionalAcronymPrecedingWords.includes(prevPrev)) {
+    if (prev === "the" && prevPrev && regionalAcronymPrecedingWordsList.includes(prevPrev)) {
       return true;
     }
@@ -315,22 +303,12 @@ export class TitleCaserUtils {
   }
   static normalizeCasingForWordByStyle(word, style) {
-    if (!word || !style || !titleCaseDefaultOptionsList[style]) return false;
+    if (!word || !style || !styleConfigMap[style]) return false;
     const lowerWord = word.toLowerCase();
-    const {
-      shortConjunctionsList,
-      articlesList,
-      shortPrepositionsList,
-      neverCapitalizedList
-    } = titleCaseDefaultOptionsList[style];
-    const combinedList = [
-      ...shortConjunctionsList,
-      ...articlesList,
-      ...shortPrepositionsList,
-      ...neverCapitalizedList
-    ];
+    const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
+    const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
     return combinedList.includes(lowerWord) ? word : false;
   }
@@ -582,9 +560,7 @@ export class TitleCaserUtils {
       throw new TypeError("Invalid input: word must be a non-empty string.");
     }
-    const knownElidedPrefixes = new Set([
-      "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
-    ]);
+    const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
     const normalized = word.trim().toLowerCase().replace(/'/g, "’");
@@ -603,9 +579,7 @@ export class TitleCaserUtils {
       throw new TypeError("Invalid input: word must be a non-empty string.");
     }
-    const knownElidedPrefixes = new Set([
-      "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
-    ]);
+    const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
     const original = word.trim();
     const normalized = original.replace(/'/g, "’").toLowerCase();
@@ -616,9 +590,7 @@ export class TitleCaserUtils {
         const rest = original.slice(prefixLength);
         const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
-        const fixedRest = rest.length > 0
-          ? rest.charAt(0).toUpperCase() + rest.slice(1)
-          : "";
+        const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
         return fixedPrefix + fixedRest;
       }
@@ -713,88 +685,88 @@ export class TitleCaserUtils {
     return parts.join(joiner);
   }
-  // This function is used to check if a word is in the correct terms list
-  static correctTermHyphenated(word, style) {
-    // Split the word into an array of words
-    const hyphenatedWords = word.split("-");
-    // Define functions to process words
-    const capitalizeFirst = (word) => word.charAt(0).toUpperCase() + word.slice(1);
-    const lowercaseRest = (word) => word.charAt(0) + word.slice(1).toLowerCase();
-    // Define the style-specific processing functions
-    const styleFunctions = {
-      ap: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
-      chicago: capitalizeFirst,
-      apa: (word, index, length) => {
-        if (TitleCaserUtils.isShortWord(word, style) && index > 0 && index < length - 1) {
-          return word.toLowerCase();
-        } else {
-          return capitalizeFirst(word);
-        }
-      },
-      nyt: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
-      wikipedia: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
-    };
+// This function is used to check if a word is in the correct terms list
+static correctTermHyphenated(word, style) {
+  // Split the word into an array of words (supports -, –, —)
+  const dashMatch = word.match(/[-–—]/);
+  if (!dashMatch) return word;
+  const dash = dashMatch[0];
+  const hyphenatedWords = word.split(/[-–—]/);
+  // Detect if ANY segment is a regional acronym
+  const containsRegionalAcronym = hyphenatedWords.some((segment) =>
+    regionalAcronymList.includes(
+      segment.toLowerCase().replace(/[^\w]/g, "")
+    )
+  );
+  // Define functions to process words
+  const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
+  const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
+  // Define the style-specific processing functions
+  const styleFunctions = {
+    ap: (w, index) => {
+      // If compound contains acronym → headline-style compound
+      if (containsRegionalAcronym) {
+        return capitalizeFirst(w);
+      }
+      return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
+    },
+    chicago: capitalizeFirst,
+    apa: (w, index, length) => {
+      if (
+        !containsRegionalAcronym &&
+        TitleCaserUtils.isShortWord(w, style) &&
+        index > 0 &&
+        index < length - 1
+      ) {
+        return w.toLowerCase();
+      }
+      return capitalizeFirst(w);
+    },
+    nyt: capitalizeFirst,
+    wikipedia: (w, index) =>
+      index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
+  };
-    // Get the style-specific processing function
-    const processWord = styleFunctions[style] || lowercaseRest;
+  const processWord = styleFunctions[style] || lowercaseRest;
-    // Process each word
-    const processedWords = hyphenatedWords.map((word, i) => {
-      let correctedWord = word;
+  const processedWords = hyphenatedWords.map((segment, i) => {
+    let correctedWord = segment;
-      const romanNumeralApostropheSRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'s$/i;
-      if (romanNumeralApostropheSRegex.test(word)) {
-        const updatedWord = correctedWord.toUpperCase().replace(/'S$/, "'s");
-        // Uppercase the Roman numeral part and concatenate back with 's
-        return updatedWord;
-      }
+    const normalizedSegment = segment
+      .toLowerCase()
+      .replace(/[^\w]/g, "");
-      // Check if the word is a Roman numeral
-      const romanNumeralRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
-      if (romanNumeralRegex.test(word)) {
-        return word.toUpperCase();
-      }
+    // Normalize acronym casing
+    if (regionalAcronymList.includes(normalizedSegment)) {
+      return segment.toUpperCase();
+    }
-      // Preserve the original word
-      // Check if the word contains an apostrophe
-      const hasApostrophe = word.includes("'");
-      if (hasApostrophe) {
-        // Split the word at the apostrophe
-        const wordParts = word.split("'");
-        // Check each part for Roman numerals
-        const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
-        if (isRomanNumeral) {
-          // Uppercase each Roman numeral part and join back with apostrophe
-          correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
-          return correctedWord;
-        } else {
-          return processWord(correctedWord, i, hyphenatedWords.length);
-        }
-      }
+    // Roman numeral logic
+    const romanNumeralRegex =
+      /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
-      // Check if the word is in the list of words to preserve
-      const lowerCaseWord = word.toLowerCase();
-      const uniqueTermsIndex = correctTitleCasingList.findIndex((w) => w.toLowerCase() === lowerCaseWord);
-      if (uniqueTermsIndex >= 0) {
-        correctedWord = correctTitleCasingList[uniqueTermsIndex];
-      }
-      // Check if the word is a possessive form
-      else if (lowerCaseWord.endsWith("'s")) {
-        const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
-        const rootWordIndex = correctTitleCasingList.findIndex((w) => w.toLowerCase() === rootWord);
-        if (rootWordIndex >= 0) {
-          correctedWord = `${correctTitleCasingList[rootWordIndex]}'s`;
-        }
-      }
+    if (romanNumeralRegex.test(segment)) {
+      return segment.toUpperCase();
+    }
-      // Process the word
-      return processWord(correctedWord, i, hyphenatedWords.length);
-    });
+    // Preserve special terms
+    const lowerCaseWord = segment.toLowerCase();
+    const uniqueTermsIndex = specialTermsList.findIndex(
+      (w) => w.toLowerCase() === lowerCaseWord
+    );
+    if (uniqueTermsIndex >= 0) {
+      correctedWord = specialTermsList[uniqueTermsIndex];
+    }
+    return processWord(correctedWord, i, hyphenatedWords.length);
+  });
+  return processedWords.join(dash);
+}
-    // Rejoin the words
-    return processedWords.join("-");
-  }
 }