npm - @danielhaim/titlecaser - Versions diffs - 1.7.12 → 1.7.14 - Mend

@danielhaim/titlecaser 1.7.12 → 1.7.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/src/TitleCaser.js CHANGED Viewed

@@ -13,8 +13,8 @@ export class TitleCaser {
   constructor (options = {}) {
     this.options = options;
     this.debug = options.debug || false;
-    this.wordReplacementsList = wordReplacementsList;
-    this.phraseReplacementMap = phraseReplacementMap;
+    this.wordReplacementsList = JSON.parse(JSON.stringify(wordReplacementsList));
+    this.phraseReplacementMap = JSON.parse(JSON.stringify(phraseReplacementMap));
   }
   logWarning(message) {
@@ -25,12 +25,12 @@ export class TitleCaser {
   toTitleCase(str) {
     try {
-      // ! If input is empty, throw an error.
-      if (str.trim().length === 0) throw new TypeError("Invalid input: input must not be empty.");
       // ! If input is not a string, throw an error.
       if (typeof str !== "string") throw new TypeError("Invalid input: input must be a string.");
+      // ! If input is empty, throw an error.
+      if (str.length === 0) throw new TypeError("Invalid input: input must not be empty.");
       // ! Input sanitization: limit length to prevent performance issues
       if (str.length > 100000) throw new TypeError("Invalid input: input exceeds maximum length of 100,000 characters.");
@@ -41,8 +41,9 @@ export class TitleCaser {
       const {
         style = "ap",
         neverCapitalize = [],
-        replaceTermList = this.wordReplacementsList,
+        wordReplacementsList = this.wordReplacementsList,
         smartQuotes = false, // Set to false by default
+        normalizeWhitespace = true,
       } = this.options;
       const styleConfig = styleConfigMap[style] || {};
@@ -58,24 +59,21 @@ export class TitleCaser {
       } = TitleCaserUtils.getTitleCaseOptions(this.options, shortWordsList, wordReplacementsList);
       // Preprocess the replaceTerms array to make it easier to search for.
-      const replaceTermsArray = replaceTermList.map((term) => Object.keys(term)[0].toLowerCase());
+      const replaceTermsArray = wordReplacementsList.map((term) => Object.keys(term)[0].toLowerCase());
       // Create an object from the replaceTerms array to make it easier to search for.
       const replaceTermObj = Object.fromEntries(
-        replaceTermList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
+        wordReplacementsList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
       );
       this.logWarning(`replaceTermsArray: ${replaceTermsArray}`);
       this.logWarning(`this.wordReplacementsList: ${this.wordReplacementsList}`);
-      // Remove extra spaces and replace <br> tags with a placeholder.
-      let inputString = str.trim();
+      // Normalize HTML breaks and optionally normalize whitespace (see normalizeWhitespace option).
+      let inputString = str;
       // Replace <br> and <br /> tags with a placeholder.
       inputString = inputString.replace(REGEX_PATTERNS.HTML_BREAK, " nl2br ");
-      // Remove extra spaces
-      inputString = inputString.replace(REGEX_PATTERNS.MULTIPLE_SPACES, ' ');
       // Check if the entire input string is uppercase and normalize it to lowercase
       // before processing if it is. This ensures consistent handling for all-caps text.
       const isEntireStringUppercase = TitleCaserUtils.isEntirelyUppercase(inputString.replace(/[^a-zA-Z]/g, ''));
@@ -84,10 +82,14 @@ export class TitleCaser {
         inputString = inputString.toLowerCase();
       }
-      // Split the string into an array of words.
-      const words = inputString.split(" ");
+      // Tokenize preserving whitespace
+      const tokens = inputString.split(/(\s+)/);
+      const wordsInTitleCase = tokens.map((token, i) => {
+        if (!token || /^\s+$/.test(token)) return token;
+        const word = token;
-      const wordsInTitleCase = words.map((word, i) => {
         switch (true) {
           case TitleCaserUtils.isWordAmpersand(word):
             // ! if the word is an ampersand, return it as is.
@@ -135,8 +137,18 @@ export class TitleCaser {
             // ! If the word has an intentional uppercase letter, return the correct casing.
             return word;
           case TitleCaserUtils.isShortWord(word, style) && i !== 0:
-            // ! If the word is a short word, return the correct casing.
-            const isAtEndOfSentence = i > 0 && TitleCaserUtils.endsWithSymbol(words[i - 1], [":", "?", "!", "."]);
+            // Find previous non-whitespace token
+            let prevToken = null;
+            for (let j = i - 1; j >= 0; j--) {
+              if (!/^\s+$/.test(tokens[j])) {
+                prevToken = tokens[j];
+                break;
+              }
+            }
+            const isAtEndOfSentence =
+              prevToken && TitleCaserUtils.endsWithSymbol(prevToken, [":", "?", "!", "."]);
             if (isAtEndOfSentence) {
               return word.charAt(0).toUpperCase() + word.slice(1);
             }
@@ -194,7 +206,7 @@ export class TitleCaser {
       });
       // Join the words in the array into a string.
-      inputString = wordsInTitleCase.join(" ");
+      inputString = wordsInTitleCase.join("");
       // Replace the nl2br placeholder with <br> tags.
       inputString = inputString.replace(/nl2br/gi, "<br>");
@@ -205,46 +217,61 @@ export class TitleCaser {
         inputString = TitleCaserUtils.convertQuotesToCurly(inputString);
       }
-      const wordsForAcronyms = inputString.split(" ");
-      let firstWord = wordsForAcronyms[0];
-      let secondWord = wordsForAcronyms[1] || null;
+      const wordsForAcronyms = inputString.split(/(\s+)/);
+      // Extract non-whitespace words for first/second detection
+      // Extract non-whitespace words for first/second detection
+      const nonWhitespaceWords = wordsForAcronyms.filter(t => !/^\s+$/.test(t));
+      let firstWord = nonWhitespaceWords[0] || null;
+      let secondWord = nonWhitespaceWords[1] || null;
       for (let i = 0; i < wordsForAcronyms.length; i++) {
-        const prevWord = i > 0 ? wordsForAcronyms[i - 1] : null;
+        if (/^\s+$/.test(wordsForAcronyms[i])) continue;
+        // Find previous non-whitespace word
+        let prevWord = null;
+        for (let j = i - 1; j >= 0; j--) {
+          if (!/^\s+$/.test(wordsForAcronyms[j])) {
+            prevWord = wordsForAcronyms[j];
+            break;
+          }
+        }
+        // Find next non-whitespace word
+        let nextWord = null;
+        for (let j = i + 1; j < wordsForAcronyms.length; j++) {
+          if (!/^\s+$/.test(wordsForAcronyms[j])) {
+            nextWord = wordsForAcronyms[j];
+            break;
+          }
+        }
         let currentWord = wordsForAcronyms[i];
-        const nextWord = i < wordsForAcronyms.length - 1 ? wordsForAcronyms[i + 1] : null;
-        // Capture punctuation at the end of the word
         const punctuationMatch = currentWord.match(REGEX_PATTERNS.TRAILING_PUNCTUATION);
         let punctuation = "";
         if (punctuationMatch) {
           punctuation = punctuationMatch[0];
-          currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, ""); // Remove punctuation at the end
-        }
-        if (TitleCaserUtils.isRegionalAcronym(currentWord)) {
-          currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
+          currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
         }
-        if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord)) {
+        if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
           currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
         }
-        // if punctuation is not empty, add it to the end of the word
         if (punctuation !== "") {
           currentWord = currentWord + punctuation;
         }
-        // NOTE: Deliberately NOT writing back to wordsForAcronyms[i] here.
-        // This first pass does naive acronym detection that creates false positives
-        // (e.g., pronoun "us" detected as country "US"). Later loops use more
-        // sophisticated context-aware logic to correctly identify regional acronyms.
+        wordsForAcronyms[i] = currentWord;
       }
-      inputString = wordsForAcronyms.join(" ");
+      inputString = wordsForAcronyms.join("");
-      const wordsForShortWords = inputString.split(" ");
+      const wordsForShortWords = inputString.split(/(\s+)/);
       for (let i = 1; i < wordsForShortWords.length - 1; i++) {
         const currentWord = wordsForShortWords[i];
         const prevWord = wordsForShortWords[i - 1];
@@ -265,36 +292,62 @@ export class TitleCaser {
         }
       }
-      inputString = wordsForShortWords.join(" ");
+      inputString = wordsForShortWords.join("");
-      const wordsForFinalPass = inputString.split(" ");
+      const wordsForFinalPass = inputString.split(/(\s+)/);
       for (let i = 0; i < wordsForFinalPass.length; i++) {
+        if (/^\s+$/.test(wordsForFinalPass[i])) continue;
         let currentWord = wordsForFinalPass[i];
-        let nextWord = wordsForFinalPass[i + 1];
-        let prevWord = wordsForFinalPass[i - 1];
+        // Find previous non-whitespace word
+        let prevWord = null;
+        for (let j = i - 1; j >= 0; j--) {
+          if (!/^\s+$/.test(wordsForFinalPass[j])) {
+            prevWord = wordsForFinalPass[j];
+            break;
+          }
+        }
+        // Find next non-whitespace word
+        let nextWord = null;
+        for (let j = i + 1; j < wordsForFinalPass.length; j++) {
+          if (!/^\s+$/.test(wordsForFinalPass[j])) {
+            nextWord = wordsForFinalPass[j];
+            break;
+          }
+        }
         if (nextWord && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
           wordsForFinalPass[i] = currentWord.toUpperCase();
         }
       }
-      let finalWord = wordsForFinalPass[wordsForFinalPass.length - 1];
-      let wordBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 2];
-      let twoWordsBeforeFinal = wordsForFinalPass[wordsForFinalPass.length - 3];
-      if (TitleCaserUtils.isRegionalAcronym(firstWord)) {
+      const nonWhitespaceFinal = wordsForFinalPass.filter(t => !/^\s+$/.test(t));
+      let finalWord = nonWhitespaceFinal[nonWhitespaceFinal.length - 1];
+      let wordBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 2];
+      let twoWordsBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 3];
+      if (firstWord && TitleCaserUtils.isRegionalAcronym(firstWord)) {
         this.logWarning(`firstWord is a regional acronym: ${firstWord}`);
         wordsForFinalPass[0] = firstWord.toUpperCase();
       }
-      if (TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
+      if (firstWord && secondWord && TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
         wordsForFinalPass[0] = firstWord.toUpperCase();
       }
-      if (TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)) {
+      if (
+        finalWord &&
+        wordBeforeFinal &&
+        TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)
+      ) {
         wordsForFinalPass[wordsForFinalPass.length - 1] = finalWord.toUpperCase();
       }
-      inputString = wordsForFinalPass.join(" ");
+      inputString = wordsForFinalPass.join("");
       for (const [phrase, replacement] of Object.entries(this.phraseReplacementMap)) {
         // Create a regular expression for case-insensitive matching of the phrase
@@ -303,15 +356,15 @@ export class TitleCaser {
         // Replace the phrase in the input string with its corresponding replacement
         inputString = inputString.replace(regex, replacement);
       }
       // ! Handle sentence case
       if (styleConfig.caseStyle === "sentence") {
-        const words = inputString.split(" ");
+        const words = inputString.split(/(\s+)/);
         let firstWordFound = false;
         for (let i = 0; i < words.length; i++) {
           let word = words[i];
           // 1) The first word: Capitalize first letter only, preserve existing brand/case in the rest
           if (!firstWordFound && /[A-Za-z]/.test(word)) {
             // If you want to skip altering brand or acronym, do one more check:
@@ -323,15 +376,21 @@ export class TitleCaser {
             firstWordFound = true;
             continue;
           }
           // 2) For subsequent words, only force-lowercase if we do NOT want to preserve uppercase
           if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
             words[i] = word.toLowerCase();
           }
           // else, we keep it exactly as is
         }
-        inputString = words.join(" ");
+        inputString = words.join("");
+      }
+      if (normalizeWhitespace) {
+        inputString = inputString
+          .replace(/\s+/g, " ")
+          .trim();
       }
       return inputString;
@@ -368,6 +427,11 @@ export class TitleCaser {
       }
     });
+    // Added check to prevent excessive number of replacement rules which could lead to performance issues
+    if (this.wordReplacementsList.length > 2000) {
+      throw new Error("Too many replacement rules.");
+    }
     this.options.wordReplacementsList = this.wordReplacementsList;
     this.logWarning(`Log the updated this.wordReplacementsList: ${this.wordReplacementsList}`);
@@ -386,6 +450,10 @@ export class TitleCaser {
       this.wordReplacementsList.push({ [term]: replacement });
     }
+    if (this.wordReplacementsList.length > 2000) {
+      throw new Error("Too many replacement rules.");
+    }
     this.options.wordReplacementsList = this.wordReplacementsList;
   }
@@ -467,7 +535,7 @@ export class TitleCaser {
     if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
     // If it's in the brand/specialTermsList
     if (TitleCaserUtils.isWordInArray(word, specialTermsList)) return true;
     // Otherwise, no. It's safe to lowercase.
     return false;
   }

package/src/TitleCaserUtils.js CHANGED Viewed

@@ -7,7 +7,7 @@ import {
   shortWordsList,
   regionalAcronymList,
   regionalAcronymPrecedingWordsList,
-  regionalAcronymFollowingWordsList
+  regionalAcronymFollowingWordsList,
 } from "./TitleCaserConsts.js";
 export class TitleCaserUtils {
@@ -65,7 +65,7 @@ export class TitleCaserUtils {
     // Create a unique key for the cache using a faster approach than JSON.stringify
     const style = options.style || "ap";
     const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
-    const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(',') : ''}`;
+    const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
     // If the cache already has an entry for this key, return the cached options
     if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
@@ -80,22 +80,13 @@ export class TitleCaserUtils {
     // Merge the default articles with user-provided articles and lowercase words
     // Using Set for O(n) deduplication instead of O(n²) filter+indexOf
-    const mergedArticles = [...new Set([
-      ...mergedOptions.articlesList,
-      ...lowercaseWords
-    ])];
+    const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
     // Merge the default short conjunctions with user-provided conjunctions and lowercase words
-    const mergedShortConjunctions = [...new Set([
-      ...mergedOptions.shortConjunctionsList,
-      ...lowercaseWords
-    ])];
+    const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
     // Merge the default short prepositions with user-provided prepositions and lowercase words
-    const mergedShortPrepositions = [...new Set([
-      ...mergedOptions.shortPrepositionsList,
-      ...lowercaseWords
-    ])];
+    const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
     // Merge the default word replacements with the user-provided replacements
     const mergedReplaceTerms = [
@@ -240,9 +231,7 @@ export class TitleCaserUtils {
   // Check if the entire input string is uppercase
   static isEntirelyUppercase(str) {
-    return str === str.toUpperCase() &&
-      str !== str.toLowerCase() &&
-      str.length > 1;
+    return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
   }
   static isRegionalAcronym(word) {
@@ -259,27 +248,25 @@ export class TitleCaserUtils {
   }
   static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
-    if (typeof word !== 'string' || typeof nextWord !== 'string') {
+    if (typeof word !== "string" || typeof nextWord !== "string") {
       return false;
     }
     const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
     const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
-    const smallDirectPrecedingIndicators = [
-      "the",
-    ];
+    const smallDirectPrecedingIndicators = ["the"];
-    if (prevWord &&
+    if (
+      prevWord &&
       regionalAcronymList.includes(firstWordStripped) &&
-      smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
-        return true;
+      smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
+    ) {
+      return true;
     }
     return (
-      regionalAcronymList.includes(firstWordStripped) &&
-      regionalAcronymFollowingWordsList.includes(nextWordStripped)
+      regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
     );
   }
@@ -288,9 +275,7 @@ export class TitleCaserUtils {
     const current = word.toLowerCase().replace(/[^\w]/g, "");
     const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
-    const prevPrev = typeof prevPrevWord === "string"
-      ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
-      : null;
+    const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
     if (!regionalAcronymList.includes(current)) return false;
@@ -321,19 +306,9 @@ export class TitleCaserUtils {
     if (!word || !style || !styleConfigMap[style]) return false;
     const lowerWord = word.toLowerCase();
-    const {
-      shortConjunctionsList,
-      articlesList,
-      shortPrepositionsList,
-      neverCapitalizedList
-    } = styleConfigMap[style];
-    const combinedList = [
-      ...shortConjunctionsList,
-      ...articlesList,
-      ...shortPrepositionsList,
-      ...neverCapitalizedList
-    ];
+    const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
+    const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
     return combinedList.includes(lowerWord) ? word : false;
   }
@@ -585,9 +560,7 @@ export class TitleCaserUtils {
       throw new TypeError("Invalid input: word must be a non-empty string.");
     }
-    const knownElidedPrefixes = new Set([
-      "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
-    ]);
+    const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
     const normalized = word.trim().toLowerCase().replace(/'/g, "’");
@@ -606,9 +579,7 @@ export class TitleCaserUtils {
       throw new TypeError("Invalid input: word must be a non-empty string.");
     }
-    const knownElidedPrefixes = new Set([
-      "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
-    ]);
+    const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
     const original = word.trim();
     const normalized = original.replace(/'/g, "’").toLowerCase();
@@ -619,9 +590,7 @@ export class TitleCaserUtils {
         const rest = original.slice(prefixLength);
         const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
-        const fixedRest = rest.length > 0
-          ? rest.charAt(0).toUpperCase() + rest.slice(1)
-          : "";
+        const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
         return fixedPrefix + fixedRest;
       }
@@ -716,88 +685,88 @@ export class TitleCaserUtils {
     return parts.join(joiner);
   }
-  // This function is used to check if a word is in the correct terms list
-  static correctTermHyphenated(word, style) {
-    // Split the word into an array of words
-    const hyphenatedWords = word.split("-");
-    // Define functions to process words
-    const capitalizeFirst = (word) => word.charAt(0).toUpperCase() + word.slice(1);
-    const lowercaseRest = (word) => word.charAt(0) + word.slice(1).toLowerCase();
-    // Define the style-specific processing functions
-    const styleFunctions = {
-      ap: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
-      chicago: capitalizeFirst,
-      apa: (word, index, length) => {
-        if (TitleCaserUtils.isShortWord(word, style) && index > 0 && index < length - 1) {
-          return word.toLowerCase();
-        } else {
-          return capitalizeFirst(word);
-        }
-      },
-      nyt: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
-      wikipedia: (word, index) => (index === 0 ? capitalizeFirst(word) : lowercaseRest(word)),
-    };
+// This function is used to check if a word is in the correct terms list
+static correctTermHyphenated(word, style) {
+  // Split the word into an array of words (supports -, –, —)
+  const dashMatch = word.match(/[-–—]/);
+  if (!dashMatch) return word;
+  const dash = dashMatch[0];
+  const hyphenatedWords = word.split(/[-–—]/);
+  // Detect if ANY segment is a regional acronym
+  const containsRegionalAcronym = hyphenatedWords.some((segment) =>
+    regionalAcronymList.includes(
+      segment.toLowerCase().replace(/[^\w]/g, "")
+    )
+  );
+  // Define functions to process words
+  const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
+  const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
+  // Define the style-specific processing functions
+  const styleFunctions = {
+    ap: (w, index) => {
+      // If compound contains acronym → headline-style compound
+      if (containsRegionalAcronym) {
+        return capitalizeFirst(w);
+      }
+      return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
+    },
+    chicago: capitalizeFirst,
+    apa: (w, index, length) => {
+      if (
+        !containsRegionalAcronym &&
+        TitleCaserUtils.isShortWord(w, style) &&
+        index > 0 &&
+        index < length - 1
+      ) {
+        return w.toLowerCase();
+      }
+      return capitalizeFirst(w);
+    },
+    nyt: capitalizeFirst,
+    wikipedia: (w, index) =>
+      index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
+  };
-    // Get the style-specific processing function
-    const processWord = styleFunctions[style] || lowercaseRest;
+  const processWord = styleFunctions[style] || lowercaseRest;
-    // Process each word
-    const processedWords = hyphenatedWords.map((word, i) => {
-      let correctedWord = word;
+  const processedWords = hyphenatedWords.map((segment, i) => {
+    let correctedWord = segment;
-      const romanNumeralApostropheSRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})'s$/i;
-      if (romanNumeralApostropheSRegex.test(word)) {
-        const updatedWord = correctedWord.toUpperCase().replace(/'S$/, "'s");
-        // Uppercase the Roman numeral part and concatenate back with 's
-        return updatedWord;
-      }
+    const normalizedSegment = segment
+      .toLowerCase()
+      .replace(/[^\w]/g, "");
-      // Check if the word is a Roman numeral
-      const romanNumeralRegex = /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
-      if (romanNumeralRegex.test(word)) {
-        return word.toUpperCase();
-      }
+    // Normalize acronym casing
+    if (regionalAcronymList.includes(normalizedSegment)) {
+      return segment.toUpperCase();
+    }
-      // Preserve the original word
-      // Check if the word contains an apostrophe
-      const hasApostrophe = word.includes("'");
-      if (hasApostrophe) {
-        // Split the word at the apostrophe
-        const wordParts = word.split("'");
-        // Check each part for Roman numerals
-        const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
-        if (isRomanNumeral) {
-          // Uppercase each Roman numeral part and join back with apostrophe
-          correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
-          return correctedWord;
-        } else {
-          return processWord(correctedWord, i, hyphenatedWords.length);
-        }
-      }
+    // Roman numeral logic
+    const romanNumeralRegex =
+      /^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
-      // Check if the word is in the list of words to preserve
-      const lowerCaseWord = word.toLowerCase();
-      const uniqueTermsIndex = specialTermsList.findIndex((w) => w.toLowerCase() === lowerCaseWord);
-      if (uniqueTermsIndex >= 0) {
-        correctedWord = specialTermsList[uniqueTermsIndex];
-      }
-      // Check if the word is a possessive form
-      else if (lowerCaseWord.endsWith("'s")) {
-        const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
-        const rootWordIndex = specialTermsList.findIndex((w) => w.toLowerCase() === rootWord);
-        if (rootWordIndex >= 0) {
-          correctedWord = `${specialTermsList[rootWordIndex]}'s`;
-        }
-      }
+    if (romanNumeralRegex.test(segment)) {
+      return segment.toUpperCase();
+    }
-      // Process the word
-      return processWord(correctedWord, i, hyphenatedWords.length);
-    });
+    // Preserve special terms
+    const lowerCaseWord = segment.toLowerCase();
+    const uniqueTermsIndex = specialTermsList.findIndex(
+      (w) => w.toLowerCase() === lowerCaseWord
+    );
+    if (uniqueTermsIndex >= 0) {
+      correctedWord = specialTermsList[uniqueTermsIndex];
+    }
+    return processWord(correctedWord, i, hyphenatedWords.length);
+  });
+  return processedWords.join(dash);
+}
-    // Rejoin the words
-    return processedWords.join("-");
-  }
 }