npm - @danielhaim/titlecaser - Versions diffs - 1.7.0 → 1.7.2 - Mend

@danielhaim/titlecaser 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +231 -87
package/dist/titlecaser.amd.js +5 -0
package/dist/titlecaser.module.js +5 -0
package/index.d.ts +23 -0
package/package.json +16 -14
package/src/TitleCaser.js +120 -53
package/src/TitleCaserConsts.js +188 -33
package/src/TitleCaserUtils.js +166 -202
package/src/data/brandList.json +532 -89
package/src/data/businessFinanceLegalTerms.json +108 -14
package/src/data/eCommerceDigitalTerms.json +13 -3
package/src/data/globalGeography.json +197 -41
package/src/data/marketingMediaTerms.json +37 -6
package/src/data/militaryTerms.json +153 -0
package/src/data/miscSpecializedTerms.json +12 -3
package/src/data/techComputingConcepts.json +184 -26
package/src/data/timeAcademicTerms.json +32 -5

package/src/TitleCaserUtils.js CHANGED Viewed

@@ -4,6 +4,10 @@ import {
   wordReplacementsList,
   correctTitleCasingList,
   ignoredWordList,
+  commonShortWords,
+  regionalAcronymList,
+  regionalAcronymPrecedingWords,
+  directFollowingIndicatorsRegionalAcronym
 } from "./TitleCaserConsts.js";
 export class TitleCaserUtils {
@@ -113,6 +117,10 @@ export class TitleCaserUtils {
   static isNeverCapitalizedCache = new Map();
+  static capitalizeFirstLetter(word) {
+    return word.charAt(0).toUpperCase() + word.slice(1);
+  }
   // Check if the word is a short conjunction
   static isShortConjunction(word, style) {
     // Get the list of short conjunctions from the TitleCaseHelper
@@ -142,6 +150,7 @@ export class TitleCaserUtils {
   // Check if the word is a short preposition
   static isShortPreposition(word, style) {
     // Get the list of short prepositions from the Title Case Helper.
+    // CONSOLE LOG THE WORD BEFORE CHECKING IF IT IS IN THE LIST
     const { shortPrepositionsList } = TitleCaserUtils.getTitleCaseOptions({
       style: style,
     });
@@ -183,6 +192,7 @@ export class TitleCaserUtils {
     // If the word is a short conjunction, article, preposition, or is in the never-capitalized list, return true.
     // Otherwise, return false.
     return (
       TitleCaserUtils.isShortConjunction(word, style) ||
       TitleCaserUtils.isArticle(word, style) ||
@@ -225,211 +235,106 @@ export class TitleCaserUtils {
     return hasUppercase && hasLowercase;
   }
-  // Check if a word is an acronym
-  // (i.e. 'the', 'to', 'within')
-  static isAcronym(word, prevWord, nextWord) {
-    try {
-      if (typeof word !== "string") {
-        throw new Error("Input word must be a string.");
-      }
+  // Check if the entire input string is uppercase
+  static isEntirelyUppercase(str) {
+    return str === str.toUpperCase() &&
+      str !== str.toLowerCase() &&
+      str.length > 1;
+  }
-      const countryCodes = new Set(["us", "usa"]);
-      const commonShortWords = new Set([
-        "the",
-        "in",
-        "to",
-        "within",
-        "towards",
-        "into",
-        "at",
-      ]);
-      const directFollowingIndicators = new Set([
-        "policies",
-        "government",
-        "military",
-        "embassy",
-        "administration",
-        "senate",
-        "congress",
-        "parliament",
-        "cabinet",
-        "federation",
-        "republic",
-        "democracy",
-        "law",
-        "act",
-        "treaty",
-        "court",
-        "legislation",
-        "statute",
-        "bill",
-        "agency",
-        "department",
-        "bureau",
-        "service",
-        "office",
-        "council",
-        "commission",
-        "division",
-        "alliance",
-        "union",
-        "confederation",
-        "bloc",
-        "zone",
-        "territory",
-        "province",
-        "state",
-        "army",
-        "navy",
-        "forces",
-        "marines",
-        "airforce",
-        "defense",
-        "intelligence",
-        "security",
-        "economy",
-        "budget",
-        "finance",
-        "treasury",
-        "trade",
-        "sanctions",
-        "aid",
-        "strategy",
-        "plan",
-        "policy",
-        "program",
-        "initiative",
-        "project",
-        "reform",
-        "relations",
-        "ambassador",
-        "diplomacy",
-        "summit",
-        "conference",
-        "talks",
-        "negotiations",
-      ]);
-      const removePunctuation = (word) => word.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
-      // Remove trailing punctuation from the word
-      const removeTrailingPunctuation = (word) => {
-        const match = word.match(/^(.*?)([.,\/#!$%\^&\*;:{}=\-_`~()]+)$/);
-        if (match && match[1]) {
-          return match[1];
-        }
-        return word;
-      };
-      word = word ? removePunctuation(word.toLowerCase()) : "";
-      word = removeTrailingPunctuation(word);
-      prevWord = prevWord ? removePunctuation(prevWord.toLowerCase()) : "";
-      nextWord = nextWord ? removePunctuation(nextWord.toLowerCase()) : "";
-      // Check if it's an acronym with direct following indicators
-      const isDirectAcronym =
-        countryCodes.has(word) &&
-        (!prevWord || commonShortWords.has(prevWord)) &&
-        (!nextWord || directFollowingIndicators.has(nextWord));
-      // Check if it's an acronym based on the previous word
-      const isPreviousAcronym = countryCodes.has(prevWord) && (!nextWord || directFollowingIndicators.has(nextWord));
-      return isDirectAcronym || isPreviousAcronym;
-    } catch (error) {
-      console.error(`An error occurred: ${error.message}`);
-      return false; // Return false in case of errors to indicate failure.
-    }
-  }
-  static checkIfWordIsAcronym(commonShortWords, prevWord, currentWord, nextWord) {
-    const countryCodes = ["us", "usa"];
-    const directPrecedingIndicators = ["the", "in", "to", "from", "against", "with", "within", "towards", "into", "at"];
-    const directFollowingIndicators = [
-      "policies",
-      "government",
-      "military",
-      "embassy",
-      "administration",
-      "senate",
-      "congress",
-      "parliament",
-      "cabinet",
-      "federation",
-      "republic",
-      "democracy",
-      "law",
-      "act",
-      "treaty",
-      "court",
-      "legislation",
-      "statute",
-      "bill",
-      "agency",
-      "department",
-      "bureau",
-      "service",
-      "office",
-      "council",
-      "commission",
-      "division",
-      "alliance",
-      "union",
-      "confederation",
-      "bloc",
-      "zone",
-      "territory",
-      "province",
-      "state",
-      "army",
-      "navy",
-      "forces",
-      "marines",
-      "airforce",
-      "defense",
-      "intelligence",
-      "security",
-      "economy",
-      "budget",
-      "finance",
-      "treasury",
-      "trade",
-      "sanctions",
-      "aid",
-      "strategy",
-      "plan",
-      "policy",
-      "program",
-      "initiative",
-      "project",
-      "reform",
-      "relations",
-      "ambassador",
-      "diplomacy",
-      "summit",
-      "conference",
-      "talks",
-      "negotiations",
+  static isRegionalAcronym(word) {
+    if (typeof word !== "string") {
+      throw new TypeError("Invalid input: word must be a string.");
+    }
+    if (word.length < 2) {
+      return false;
+    }
+    const lowercasedWord = word.toLowerCase();
+    return regionalAcronymList.includes(lowercasedWord);
+  }
+  static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
+    if (typeof word !== 'string' || typeof nextWord !== 'string') {
+      return false;
+    }
+    const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
+    const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
+    const smallDirectPrecedingIndicators = [
+      "the",
     ];
-    const removePunctuation = (word) => word.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
+    if (prevWord &&
+      regionalAcronymList.includes(firstWordStripped) &&
+      smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
+        return true;
+    }
+    return (
+      regionalAcronymList.includes(firstWordStripped) &&
+      directFollowingIndicatorsRegionalAcronym.includes(nextWordStripped)
+    );
+  }
-    currentWord = currentWord ? removePunctuation(currentWord.toLowerCase()) : "";
-    prevWord = prevWord ? removePunctuation(prevWord.toLowerCase()) : "";
-    nextWord = nextWord ? removePunctuation(nextWord.toLowerCase()) : "";
+  static isFinalWordRegionalAcronym(word, prevWord, prevPrevWord = null) {
+    if (typeof word !== "string" || typeof prevWord !== "string") return false;
-    if (
-      countryCodes.includes(currentWord.toLowerCase()) &&
-      (prevWord === null || commonShortWords.includes(prevWord.toLowerCase())) &&
-      (nextWord === null || directFollowingIndicators.includes(nextWord.toLowerCase()))
-    ) {
+    const current = word.toLowerCase().replace(/[^\w]/g, "");
+    const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
+    const prevPrev = typeof prevPrevWord === "string"
+      ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
+      : null;
+    if (!regionalAcronymList.includes(current)) return false;
+    // Direct 100% safe word before the acronym
+    if (regionalAcronymPrecedingWords.includes(prev)) return true;
+    // Extended pattern: e.g., "from the US"
+    if (prev === "the" && prevPrev && regionalAcronymPrecedingWords.includes(prevPrev)) {
       return true;
     }
     return false;
   }
+  static normalizeRegionalAcronym(word) {
+    if (typeof word !== "string") {
+      throw new TypeError("Invalid input: word must be a string.");
+    }
+    return word.toUpperCase();
+  }
+  static normalizeAcronymKey(word) {
+    return word.toLowerCase().replace(/\./g, ""); // "U.S." → "us"
+  }
+  static normalizeCasingForWordByStyle(word, style) {
+    if (!word || !style || !titleCaseDefaultOptionsList[style]) return false;
+    const lowerWord = word.toLowerCase();
+    const {
+      shortConjunctionsList,
+      articlesList,
+      shortPrepositionsList,
+      neverCapitalizedList
+    } = titleCaseDefaultOptionsList[style];
+    const combinedList = [
+      ...shortConjunctionsList,
+      ...articlesList,
+      ...shortPrepositionsList,
+      ...neverCapitalizedList
+    ];
+    return combinedList.includes(lowerWord) ? word : false;
+  }
   // Check if a word has a suffix
   static hasSuffix(word) {
     // Test if word is longer than suffix
@@ -671,6 +576,57 @@ export class TitleCaserUtils {
     return word;
   }
+  // This function is used to check if a word is an elided word
+  static isElidedWord(word) {
+    if (typeof word !== "string" || word.trim() === "") {
+      throw new TypeError("Invalid input: word must be a non-empty string.");
+    }
+    const knownElidedPrefixes = new Set([
+      "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
+    ]);
+    const normalized = word.trim().toLowerCase().replace(/'/g, "’");
+    for (const prefix of knownElidedPrefixes) {
+      if (normalized.startsWith(prefix)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  // This function is used to normalize an elided word
+  static normalizeElidedWord(word) {
+    if (typeof word !== "string" || word.trim() === "") {
+      throw new TypeError("Invalid input: word must be a non-empty string.");
+    }
+    const knownElidedPrefixes = new Set([
+      "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
+    ]);
+    const original = word.trim();
+    const normalized = original.replace(/'/g, "’").toLowerCase();
+    for (const prefix of knownElidedPrefixes) {
+      if (normalized.startsWith(prefix)) {
+        const prefixLength = prefix.length;
+        const rest = original.slice(prefixLength);
+        const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
+        const fixedRest = rest.length > 0
+          ? rest.charAt(0).toUpperCase() + rest.slice(1)
+          : "";
+        return fixedPrefix + fixedRest;
+      }
+    }
+    return false;
+  }
   // This function is used to check if a suffix is present in a word that is in the correct terms list
   static correctSuffix(word, correctTerms) {
     // Validate input
@@ -731,24 +687,30 @@ export class TitleCaserUtils {
     // Split the word into parts delimited by the specified delimiters
     const parts = word.split(delimiters);
-    // Count the number of parts
     const numParts = parts.length;
-    // For each part
+    // For each part, replace it with the correct term if found or title-case it if not found
     for (let i = 0; i < numParts; i++) {
-      // Lowercase the part
       const lowercasedPart = parts[i].toLowerCase();
-      // Search for the part in the list of correct terms
       const index = correctTerms.findIndex((t) => t.toLowerCase() === lowercasedPart);
-      // If the part is found in the list of correct terms
       if (index >= 0) {
-        // Replace the part with the correct term
         parts[i] = correctTerms[index];
+      } else {
+        // Capitalize first letter and lowercase the rest if no replacement is found
+        parts[i] = parts[i].charAt(0).toUpperCase() + parts[i].slice(1).toLowerCase();
       }
     }
-    // Join the parts back together using the first delimiter as the default delimiter
-    return parts.join(delimiters.source.charAt(0));
+    // Determine the joiner based on the original word
+    let joiner = delimiters.source.charAt(0);
+    if (word.includes("-")) {
+      joiner = "-";
+    } else if (word.includes("'")) {
+      joiner = "'";
+    }
+    // Join the parts back together using the determined joiner
+    return parts.join(joiner);
   }
   // This function is used to check if a word is in the correct terms list
@@ -835,4 +797,6 @@ export class TitleCaserUtils {
     // Rejoin the words
     return processedWords.join("-");
   }
 }