@danielhaim/titlecaser 1.7.12 → 1.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -407
- package/dist/titlecaser.amd.js +2 -2
- package/dist/titlecaser.esm.js +2 -2
- package/dist/titlecaser.module.js +2 -2
- package/index.d.ts +1 -3
- package/package.json +1 -1
- package/src/TitleCaser.js +3 -3
- package/src/TitleCaserUtils.js +97 -128
package/src/TitleCaserUtils.js
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
shortWordsList,
|
|
8
8
|
regionalAcronymList,
|
|
9
9
|
regionalAcronymPrecedingWordsList,
|
|
10
|
-
regionalAcronymFollowingWordsList
|
|
10
|
+
regionalAcronymFollowingWordsList,
|
|
11
11
|
} from "./TitleCaserConsts.js";
|
|
12
12
|
|
|
13
13
|
export class TitleCaserUtils {
|
|
@@ -65,7 +65,7 @@ export class TitleCaserUtils {
|
|
|
65
65
|
// Create a unique key for the cache using a faster approach than JSON.stringify
|
|
66
66
|
const style = options.style || "ap";
|
|
67
67
|
const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
|
|
68
|
-
const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(
|
|
68
|
+
const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
|
|
69
69
|
|
|
70
70
|
// If the cache already has an entry for this key, return the cached options
|
|
71
71
|
if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
|
|
@@ -80,22 +80,13 @@ export class TitleCaserUtils {
|
|
|
80
80
|
|
|
81
81
|
// Merge the default articles with user-provided articles and lowercase words
|
|
82
82
|
// Using Set for O(n) deduplication instead of O(n²) filter+indexOf
|
|
83
|
-
const mergedArticles = [...new Set([
|
|
84
|
-
...mergedOptions.articlesList,
|
|
85
|
-
...lowercaseWords
|
|
86
|
-
])];
|
|
83
|
+
const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
|
|
87
84
|
|
|
88
85
|
// Merge the default short conjunctions with user-provided conjunctions and lowercase words
|
|
89
|
-
const mergedShortConjunctions = [...new Set([
|
|
90
|
-
...mergedOptions.shortConjunctionsList,
|
|
91
|
-
...lowercaseWords
|
|
92
|
-
])];
|
|
86
|
+
const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
|
|
93
87
|
|
|
94
88
|
// Merge the default short prepositions with user-provided prepositions and lowercase words
|
|
95
|
-
const mergedShortPrepositions = [...new Set([
|
|
96
|
-
...mergedOptions.shortPrepositionsList,
|
|
97
|
-
...lowercaseWords
|
|
98
|
-
])];
|
|
89
|
+
const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
|
|
99
90
|
|
|
100
91
|
// Merge the default word replacements with the user-provided replacements
|
|
101
92
|
const mergedReplaceTerms = [
|
|
@@ -240,9 +231,7 @@ export class TitleCaserUtils {
|
|
|
240
231
|
|
|
241
232
|
// Check if the entire input string is uppercase
|
|
242
233
|
static isEntirelyUppercase(str) {
|
|
243
|
-
return str === str.toUpperCase() &&
|
|
244
|
-
str !== str.toLowerCase() &&
|
|
245
|
-
str.length > 1;
|
|
234
|
+
return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
|
|
246
235
|
}
|
|
247
236
|
|
|
248
237
|
static isRegionalAcronym(word) {
|
|
@@ -259,27 +248,25 @@ export class TitleCaserUtils {
|
|
|
259
248
|
}
|
|
260
249
|
|
|
261
250
|
static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
|
|
262
|
-
if (typeof word !==
|
|
251
|
+
if (typeof word !== "string" || typeof nextWord !== "string") {
|
|
263
252
|
return false;
|
|
264
253
|
}
|
|
265
254
|
|
|
266
255
|
const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
|
|
267
256
|
const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
|
|
268
257
|
|
|
269
|
-
const smallDirectPrecedingIndicators = [
|
|
270
|
-
"the",
|
|
271
|
-
];
|
|
258
|
+
const smallDirectPrecedingIndicators = ["the"];
|
|
272
259
|
|
|
273
|
-
if (
|
|
260
|
+
if (
|
|
261
|
+
prevWord &&
|
|
274
262
|
regionalAcronymList.includes(firstWordStripped) &&
|
|
275
|
-
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
|
|
276
|
-
|
|
277
|
-
|
|
263
|
+
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
|
|
264
|
+
) {
|
|
265
|
+
return true;
|
|
278
266
|
}
|
|
279
267
|
|
|
280
268
|
return (
|
|
281
|
-
regionalAcronymList.includes(firstWordStripped) &&
|
|
282
|
-
regionalAcronymFollowingWordsList.includes(nextWordStripped)
|
|
269
|
+
regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
|
|
283
270
|
);
|
|
284
271
|
}
|
|
285
272
|
|
|
@@ -288,9 +275,7 @@ export class TitleCaserUtils {
|
|
|
288
275
|
|
|
289
276
|
const current = word.toLowerCase().replace(/[^\w]/g, "");
|
|
290
277
|
const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
|
|
291
|
-
const prevPrev = typeof prevPrevWord === "string"
|
|
292
|
-
? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
|
|
293
|
-
: null;
|
|
278
|
+
const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
|
|
294
279
|
|
|
295
280
|
if (!regionalAcronymList.includes(current)) return false;
|
|
296
281
|
|
|
@@ -321,19 +306,9 @@ export class TitleCaserUtils {
|
|
|
321
306
|
if (!word || !style || !styleConfigMap[style]) return false;
|
|
322
307
|
|
|
323
308
|
const lowerWord = word.toLowerCase();
|
|
324
|
-
const {
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
shortPrepositionsList,
|
|
328
|
-
neverCapitalizedList
|
|
329
|
-
} = styleConfigMap[style];
|
|
330
|
-
|
|
331
|
-
const combinedList = [
|
|
332
|
-
...shortConjunctionsList,
|
|
333
|
-
...articlesList,
|
|
334
|
-
...shortPrepositionsList,
|
|
335
|
-
...neverCapitalizedList
|
|
336
|
-
];
|
|
309
|
+
const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
|
|
310
|
+
|
|
311
|
+
const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
|
|
337
312
|
|
|
338
313
|
return combinedList.includes(lowerWord) ? word : false;
|
|
339
314
|
}
|
|
@@ -585,9 +560,7 @@ export class TitleCaserUtils {
|
|
|
585
560
|
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
586
561
|
}
|
|
587
562
|
|
|
588
|
-
const knownElidedPrefixes = new Set([
|
|
589
|
-
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
590
|
-
]);
|
|
563
|
+
const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
|
|
591
564
|
|
|
592
565
|
const normalized = word.trim().toLowerCase().replace(/'/g, "’");
|
|
593
566
|
|
|
@@ -606,9 +579,7 @@ export class TitleCaserUtils {
|
|
|
606
579
|
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
607
580
|
}
|
|
608
581
|
|
|
609
|
-
const knownElidedPrefixes = new Set([
|
|
610
|
-
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
611
|
-
]);
|
|
582
|
+
const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
|
|
612
583
|
|
|
613
584
|
const original = word.trim();
|
|
614
585
|
const normalized = original.replace(/'/g, "’").toLowerCase();
|
|
@@ -619,9 +590,7 @@ export class TitleCaserUtils {
|
|
|
619
590
|
const rest = original.slice(prefixLength);
|
|
620
591
|
|
|
621
592
|
const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
|
|
622
|
-
const fixedRest = rest.length > 0
|
|
623
|
-
? rest.charAt(0).toUpperCase() + rest.slice(1)
|
|
624
|
-
: "";
|
|
593
|
+
const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
|
|
625
594
|
|
|
626
595
|
return fixedPrefix + fixedRest;
|
|
627
596
|
}
|
|
@@ -716,88 +685,88 @@ export class TitleCaserUtils {
|
|
|
716
685
|
return parts.join(joiner);
|
|
717
686
|
}
|
|
718
687
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
688
|
+
// This function is used to check if a word is in the correct terms list
|
|
689
|
+
static correctTermHyphenated(word, style) {
|
|
690
|
+
// Split the word into an array of words (supports -, –, —)
|
|
691
|
+
const dashMatch = word.match(/[-–—]/);
|
|
692
|
+
if (!dashMatch) return word;
|
|
693
|
+
|
|
694
|
+
const dash = dashMatch[0];
|
|
695
|
+
const hyphenatedWords = word.split(/[-–—]/);
|
|
696
|
+
|
|
697
|
+
// Detect if ANY segment is a regional acronym
|
|
698
|
+
const containsRegionalAcronym = hyphenatedWords.some((segment) =>
|
|
699
|
+
regionalAcronymList.includes(
|
|
700
|
+
segment.toLowerCase().replace(/[^\w]/g, "")
|
|
701
|
+
)
|
|
702
|
+
);
|
|
703
|
+
|
|
704
|
+
// Define functions to process words
|
|
705
|
+
const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
|
|
706
|
+
const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
|
|
707
|
+
|
|
708
|
+
// Define the style-specific processing functions
|
|
709
|
+
const styleFunctions = {
|
|
710
|
+
ap: (w, index) => {
|
|
711
|
+
// If compound contains acronym → headline-style compound
|
|
712
|
+
if (containsRegionalAcronym) {
|
|
713
|
+
return capitalizeFirst(w);
|
|
714
|
+
}
|
|
715
|
+
return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
|
|
716
|
+
},
|
|
717
|
+
chicago: capitalizeFirst,
|
|
718
|
+
apa: (w, index, length) => {
|
|
719
|
+
if (
|
|
720
|
+
!containsRegionalAcronym &&
|
|
721
|
+
TitleCaserUtils.isShortWord(w, style) &&
|
|
722
|
+
index > 0 &&
|
|
723
|
+
index < length - 1
|
|
724
|
+
) {
|
|
725
|
+
return w.toLowerCase();
|
|
726
|
+
}
|
|
727
|
+
return capitalizeFirst(w);
|
|
728
|
+
},
|
|
729
|
+
nyt: capitalizeFirst,
|
|
730
|
+
wikipedia: (w, index) =>
|
|
731
|
+
index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
|
|
732
|
+
};
|
|
742
733
|
|
|
743
|
-
|
|
744
|
-
const processWord = styleFunctions[style] || lowercaseRest;
|
|
734
|
+
const processWord = styleFunctions[style] || lowercaseRest;
|
|
745
735
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
let correctedWord = word;
|
|
736
|
+
const processedWords = hyphenatedWords.map((segment, i) => {
|
|
737
|
+
let correctedWord = segment;
|
|
749
738
|
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
// Uppercase the Roman numeral part and concatenate back with 's
|
|
754
|
-
return updatedWord;
|
|
755
|
-
}
|
|
739
|
+
const normalizedSegment = segment
|
|
740
|
+
.toLowerCase()
|
|
741
|
+
.replace(/[^\w]/g, "");
|
|
756
742
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
}
|
|
743
|
+
// Normalize acronym casing
|
|
744
|
+
if (regionalAcronymList.includes(normalizedSegment)) {
|
|
745
|
+
return segment.toUpperCase();
|
|
746
|
+
}
|
|
762
747
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
const hasApostrophe = word.includes("'");
|
|
767
|
-
if (hasApostrophe) {
|
|
768
|
-
// Split the word at the apostrophe
|
|
769
|
-
const wordParts = word.split("'");
|
|
770
|
-
// Check each part for Roman numerals
|
|
771
|
-
const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
|
|
772
|
-
if (isRomanNumeral) {
|
|
773
|
-
// Uppercase each Roman numeral part and join back with apostrophe
|
|
774
|
-
correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
|
|
775
|
-
return correctedWord;
|
|
776
|
-
} else {
|
|
777
|
-
return processWord(correctedWord, i, hyphenatedWords.length);
|
|
778
|
-
}
|
|
779
|
-
}
|
|
748
|
+
// Roman numeral logic
|
|
749
|
+
const romanNumeralRegex =
|
|
750
|
+
/^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
|
|
780
751
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
if (uniqueTermsIndex >= 0) {
|
|
785
|
-
correctedWord = specialTermsList[uniqueTermsIndex];
|
|
786
|
-
}
|
|
787
|
-
// Check if the word is a possessive form
|
|
788
|
-
else if (lowerCaseWord.endsWith("'s")) {
|
|
789
|
-
const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
|
|
790
|
-
const rootWordIndex = specialTermsList.findIndex((w) => w.toLowerCase() === rootWord);
|
|
791
|
-
if (rootWordIndex >= 0) {
|
|
792
|
-
correctedWord = `${specialTermsList[rootWordIndex]}'s`;
|
|
793
|
-
}
|
|
794
|
-
}
|
|
752
|
+
if (romanNumeralRegex.test(segment)) {
|
|
753
|
+
return segment.toUpperCase();
|
|
754
|
+
}
|
|
795
755
|
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
756
|
+
// Preserve special terms
|
|
757
|
+
const lowerCaseWord = segment.toLowerCase();
|
|
758
|
+
const uniqueTermsIndex = specialTermsList.findIndex(
|
|
759
|
+
(w) => w.toLowerCase() === lowerCaseWord
|
|
760
|
+
);
|
|
761
|
+
|
|
762
|
+
if (uniqueTermsIndex >= 0) {
|
|
763
|
+
correctedWord = specialTermsList[uniqueTermsIndex];
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
return processWord(correctedWord, i, hyphenatedWords.length);
|
|
767
|
+
});
|
|
768
|
+
|
|
769
|
+
return processedWords.join(dash);
|
|
770
|
+
}
|
|
799
771
|
|
|
800
|
-
// Rejoin the words
|
|
801
|
-
return processedWords.join("-");
|
|
802
|
-
}
|
|
803
772
|
}
|