@danielhaim/titlecaser 1.7.10 → 1.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -407
- package/dist/titlecaser.amd.js +3 -3
- package/dist/titlecaser.esm.js +3 -3
- package/dist/titlecaser.module.js +3 -3
- package/index.d.ts +22 -22
- package/package.json +18 -16
- package/src/TitleCaser.js +98 -91
- package/src/TitleCaserConsts.js +25 -8
- package/src/TitleCaserUtils.js +115 -143
package/src/TitleCaserUtils.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
allowedStylesList,
|
|
3
|
+
styleConfigMap,
|
|
4
4
|
wordReplacementsList,
|
|
5
|
-
|
|
5
|
+
specialTermsList,
|
|
6
6
|
ignoredWordList,
|
|
7
|
-
|
|
7
|
+
shortWordsList,
|
|
8
8
|
regionalAcronymList,
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
regionalAcronymPrecedingWordsList,
|
|
10
|
+
regionalAcronymFollowingWordsList,
|
|
11
11
|
} from "./TitleCaserConsts.js";
|
|
12
12
|
|
|
13
13
|
export class TitleCaserUtils {
|
|
@@ -32,7 +32,7 @@ export class TitleCaserUtils {
|
|
|
32
32
|
if (key === "style") {
|
|
33
33
|
if (typeof options.style !== "string") {
|
|
34
34
|
throw new TypeError(`Invalid option: ${key} must be a string`);
|
|
35
|
-
} else if (!
|
|
35
|
+
} else if (!allowedStylesList.includes(options.style)) {
|
|
36
36
|
throw new TypeError(`Invalid option: ${key} must be a string`);
|
|
37
37
|
}
|
|
38
38
|
continue;
|
|
@@ -51,22 +51,21 @@ export class TitleCaserUtils {
|
|
|
51
51
|
continue;
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
if (!
|
|
54
|
+
if (!styleConfigMap.hasOwnProperty(key)) {
|
|
55
55
|
throw new TypeError(`Invalid option: ${key}`);
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
TitleCaserUtils.validateOption(key, options[key]);
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
static titleCaseOptionsCache = new Map();
|
|
63
63
|
|
|
64
64
|
static getTitleCaseOptions(options = {}, lowercaseWords = []) {
|
|
65
|
-
// Create a unique key for the cache
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
});
|
|
65
|
+
// Create a unique key for the cache using a faster approach than JSON.stringify
|
|
66
|
+
const style = options.style || "ap";
|
|
67
|
+
const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
|
|
68
|
+
const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
|
|
70
69
|
|
|
71
70
|
// If the cache already has an entry for this key, return the cached options
|
|
72
71
|
if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
|
|
@@ -74,25 +73,20 @@ export class TitleCaserUtils {
|
|
|
74
73
|
}
|
|
75
74
|
|
|
76
75
|
const mergedOptions = {
|
|
77
|
-
...
|
|
76
|
+
...styleConfigMap[options.style || "ap"],
|
|
78
77
|
...options,
|
|
79
78
|
smartQuotes: options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false,
|
|
80
79
|
};
|
|
81
80
|
|
|
82
81
|
// Merge the default articles with user-provided articles and lowercase words
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
.filter((word, index, array) => array.indexOf(word) === index);
|
|
82
|
+
// Using Set for O(n) deduplication instead of O(n²) filter+indexOf
|
|
83
|
+
const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
|
|
86
84
|
|
|
87
85
|
// Merge the default short conjunctions with user-provided conjunctions and lowercase words
|
|
88
|
-
const mergedShortConjunctions = mergedOptions.shortConjunctionsList
|
|
89
|
-
.concat(lowercaseWords)
|
|
90
|
-
.filter((word, index, array) => array.indexOf(word) === index);
|
|
86
|
+
const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
|
|
91
87
|
|
|
92
88
|
// Merge the default short prepositions with user-provided prepositions and lowercase words
|
|
93
|
-
const mergedShortPrepositions = mergedOptions.shortPrepositionsList
|
|
94
|
-
.concat(lowercaseWords)
|
|
95
|
-
.filter((word, index, array) => array.indexOf(word) === index);
|
|
89
|
+
const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
|
|
96
90
|
|
|
97
91
|
// Merge the default word replacements with the user-provided replacements
|
|
98
92
|
const mergedReplaceTerms = [
|
|
@@ -186,8 +180,8 @@ export class TitleCaserUtils {
|
|
|
186
180
|
}
|
|
187
181
|
|
|
188
182
|
// If the style is not one of the allowed styles, throw an Error.
|
|
189
|
-
if (!
|
|
190
|
-
throw new Error(`Invalid option: style must be one of ${
|
|
183
|
+
if (!allowedStylesList.includes(style)) {
|
|
184
|
+
throw new Error(`Invalid option: style must be one of ${allowedStylesList.join(", ")}.`);
|
|
191
185
|
}
|
|
192
186
|
|
|
193
187
|
// If the word is a short conjunction, article, preposition, or is in the never-capitalized list, return true.
|
|
@@ -237,9 +231,7 @@ export class TitleCaserUtils {
|
|
|
237
231
|
|
|
238
232
|
// Check if the entire input string is uppercase
|
|
239
233
|
static isEntirelyUppercase(str) {
|
|
240
|
-
return str === str.toUpperCase() &&
|
|
241
|
-
str !== str.toLowerCase() &&
|
|
242
|
-
str.length > 1;
|
|
234
|
+
return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
|
|
243
235
|
}
|
|
244
236
|
|
|
245
237
|
static isRegionalAcronym(word) {
|
|
@@ -256,27 +248,25 @@ export class TitleCaserUtils {
|
|
|
256
248
|
}
|
|
257
249
|
|
|
258
250
|
static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
|
|
259
|
-
if (typeof word !==
|
|
251
|
+
if (typeof word !== "string" || typeof nextWord !== "string") {
|
|
260
252
|
return false;
|
|
261
253
|
}
|
|
262
254
|
|
|
263
255
|
const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
|
|
264
256
|
const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
|
|
265
257
|
|
|
266
|
-
const smallDirectPrecedingIndicators = [
|
|
267
|
-
"the",
|
|
268
|
-
];
|
|
258
|
+
const smallDirectPrecedingIndicators = ["the"];
|
|
269
259
|
|
|
270
|
-
if (
|
|
260
|
+
if (
|
|
261
|
+
prevWord &&
|
|
271
262
|
regionalAcronymList.includes(firstWordStripped) &&
|
|
272
|
-
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
|
|
273
|
-
|
|
274
|
-
|
|
263
|
+
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
|
|
264
|
+
) {
|
|
265
|
+
return true;
|
|
275
266
|
}
|
|
276
267
|
|
|
277
268
|
return (
|
|
278
|
-
regionalAcronymList.includes(firstWordStripped) &&
|
|
279
|
-
directFollowingIndicatorsRegionalAcronym.includes(nextWordStripped)
|
|
269
|
+
regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
|
|
280
270
|
);
|
|
281
271
|
}
|
|
282
272
|
|
|
@@ -285,17 +275,15 @@ export class TitleCaserUtils {
|
|
|
285
275
|
|
|
286
276
|
const current = word.toLowerCase().replace(/[^\w]/g, "");
|
|
287
277
|
const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
|
|
288
|
-
const prevPrev = typeof prevPrevWord === "string"
|
|
289
|
-
? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
|
|
290
|
-
: null;
|
|
278
|
+
const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
|
|
291
279
|
|
|
292
280
|
if (!regionalAcronymList.includes(current)) return false;
|
|
293
281
|
|
|
294
282
|
// Direct 100% safe word before the acronym
|
|
295
|
-
if (
|
|
283
|
+
if (regionalAcronymPrecedingWordsList.includes(prev)) return true;
|
|
296
284
|
|
|
297
285
|
// Extended pattern: e.g., "from the US"
|
|
298
|
-
if (prev === "the" && prevPrev &&
|
|
286
|
+
if (prev === "the" && prevPrev && regionalAcronymPrecedingWordsList.includes(prevPrev)) {
|
|
299
287
|
return true;
|
|
300
288
|
}
|
|
301
289
|
|
|
@@ -315,22 +303,12 @@ export class TitleCaserUtils {
|
|
|
315
303
|
}
|
|
316
304
|
|
|
317
305
|
static normalizeCasingForWordByStyle(word, style) {
|
|
318
|
-
if (!word || !style || !
|
|
306
|
+
if (!word || !style || !styleConfigMap[style]) return false;
|
|
319
307
|
|
|
320
308
|
const lowerWord = word.toLowerCase();
|
|
321
|
-
const {
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
shortPrepositionsList,
|
|
325
|
-
neverCapitalizedList
|
|
326
|
-
} = titleCaseDefaultOptionsList[style];
|
|
327
|
-
|
|
328
|
-
const combinedList = [
|
|
329
|
-
...shortConjunctionsList,
|
|
330
|
-
...articlesList,
|
|
331
|
-
...shortPrepositionsList,
|
|
332
|
-
...neverCapitalizedList
|
|
333
|
-
];
|
|
309
|
+
const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
|
|
310
|
+
|
|
311
|
+
const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
|
|
334
312
|
|
|
335
313
|
return combinedList.includes(lowerWord) ? word : false;
|
|
336
314
|
}
|
|
@@ -582,9 +560,7 @@ export class TitleCaserUtils {
|
|
|
582
560
|
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
583
561
|
}
|
|
584
562
|
|
|
585
|
-
const knownElidedPrefixes = new Set([
|
|
586
|
-
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
587
|
-
]);
|
|
563
|
+
const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
|
|
588
564
|
|
|
589
565
|
const normalized = word.trim().toLowerCase().replace(/'/g, "’");
|
|
590
566
|
|
|
@@ -603,9 +579,7 @@ export class TitleCaserUtils {
|
|
|
603
579
|
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
604
580
|
}
|
|
605
581
|
|
|
606
|
-
const knownElidedPrefixes = new Set([
|
|
607
|
-
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
608
|
-
]);
|
|
582
|
+
const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
|
|
609
583
|
|
|
610
584
|
const original = word.trim();
|
|
611
585
|
const normalized = original.replace(/'/g, "’").toLowerCase();
|
|
@@ -616,9 +590,7 @@ export class TitleCaserUtils {
|
|
|
616
590
|
const rest = original.slice(prefixLength);
|
|
617
591
|
|
|
618
592
|
const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
|
|
619
|
-
const fixedRest = rest.length > 0
|
|
620
|
-
? rest.charAt(0).toUpperCase() + rest.slice(1)
|
|
621
|
-
: "";
|
|
593
|
+
const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
|
|
622
594
|
|
|
623
595
|
return fixedPrefix + fixedRest;
|
|
624
596
|
}
|
|
@@ -713,88 +685,88 @@ export class TitleCaserUtils {
|
|
|
713
685
|
return parts.join(joiner);
|
|
714
686
|
}
|
|
715
687
|
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
688
|
+
// This function is used to check if a word is in the correct terms list
|
|
689
|
+
static correctTermHyphenated(word, style) {
|
|
690
|
+
// Split the word into an array of words (supports -, –, —)
|
|
691
|
+
const dashMatch = word.match(/[-–—]/);
|
|
692
|
+
if (!dashMatch) return word;
|
|
693
|
+
|
|
694
|
+
const dash = dashMatch[0];
|
|
695
|
+
const hyphenatedWords = word.split(/[-–—]/);
|
|
696
|
+
|
|
697
|
+
// Detect if ANY segment is a regional acronym
|
|
698
|
+
const containsRegionalAcronym = hyphenatedWords.some((segment) =>
|
|
699
|
+
regionalAcronymList.includes(
|
|
700
|
+
segment.toLowerCase().replace(/[^\w]/g, "")
|
|
701
|
+
)
|
|
702
|
+
);
|
|
703
|
+
|
|
704
|
+
// Define functions to process words
|
|
705
|
+
const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
|
|
706
|
+
const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
|
|
707
|
+
|
|
708
|
+
// Define the style-specific processing functions
|
|
709
|
+
const styleFunctions = {
|
|
710
|
+
ap: (w, index) => {
|
|
711
|
+
// If compound contains acronym → headline-style compound
|
|
712
|
+
if (containsRegionalAcronym) {
|
|
713
|
+
return capitalizeFirst(w);
|
|
714
|
+
}
|
|
715
|
+
return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
|
|
716
|
+
},
|
|
717
|
+
chicago: capitalizeFirst,
|
|
718
|
+
apa: (w, index, length) => {
|
|
719
|
+
if (
|
|
720
|
+
!containsRegionalAcronym &&
|
|
721
|
+
TitleCaserUtils.isShortWord(w, style) &&
|
|
722
|
+
index > 0 &&
|
|
723
|
+
index < length - 1
|
|
724
|
+
) {
|
|
725
|
+
return w.toLowerCase();
|
|
726
|
+
}
|
|
727
|
+
return capitalizeFirst(w);
|
|
728
|
+
},
|
|
729
|
+
nyt: capitalizeFirst,
|
|
730
|
+
wikipedia: (w, index) =>
|
|
731
|
+
index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
|
|
732
|
+
};
|
|
739
733
|
|
|
740
|
-
|
|
741
|
-
const processWord = styleFunctions[style] || lowercaseRest;
|
|
734
|
+
const processWord = styleFunctions[style] || lowercaseRest;
|
|
742
735
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
let correctedWord = word;
|
|
736
|
+
const processedWords = hyphenatedWords.map((segment, i) => {
|
|
737
|
+
let correctedWord = segment;
|
|
746
738
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
// Uppercase the Roman numeral part and concatenate back with 's
|
|
751
|
-
return updatedWord;
|
|
752
|
-
}
|
|
739
|
+
const normalizedSegment = segment
|
|
740
|
+
.toLowerCase()
|
|
741
|
+
.replace(/[^\w]/g, "");
|
|
753
742
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
}
|
|
743
|
+
// Normalize acronym casing
|
|
744
|
+
if (regionalAcronymList.includes(normalizedSegment)) {
|
|
745
|
+
return segment.toUpperCase();
|
|
746
|
+
}
|
|
759
747
|
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
const hasApostrophe = word.includes("'");
|
|
764
|
-
if (hasApostrophe) {
|
|
765
|
-
// Split the word at the apostrophe
|
|
766
|
-
const wordParts = word.split("'");
|
|
767
|
-
// Check each part for Roman numerals
|
|
768
|
-
const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
|
|
769
|
-
if (isRomanNumeral) {
|
|
770
|
-
// Uppercase each Roman numeral part and join back with apostrophe
|
|
771
|
-
correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
|
|
772
|
-
return correctedWord;
|
|
773
|
-
} else {
|
|
774
|
-
return processWord(correctedWord, i, hyphenatedWords.length);
|
|
775
|
-
}
|
|
776
|
-
}
|
|
748
|
+
// Roman numeral logic
|
|
749
|
+
const romanNumeralRegex =
|
|
750
|
+
/^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
|
|
777
751
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
if (uniqueTermsIndex >= 0) {
|
|
782
|
-
correctedWord = correctTitleCasingList[uniqueTermsIndex];
|
|
783
|
-
}
|
|
784
|
-
// Check if the word is a possessive form
|
|
785
|
-
else if (lowerCaseWord.endsWith("'s")) {
|
|
786
|
-
const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
|
|
787
|
-
const rootWordIndex = correctTitleCasingList.findIndex((w) => w.toLowerCase() === rootWord);
|
|
788
|
-
if (rootWordIndex >= 0) {
|
|
789
|
-
correctedWord = `${correctTitleCasingList[rootWordIndex]}'s`;
|
|
790
|
-
}
|
|
791
|
-
}
|
|
752
|
+
if (romanNumeralRegex.test(segment)) {
|
|
753
|
+
return segment.toUpperCase();
|
|
754
|
+
}
|
|
792
755
|
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
756
|
+
// Preserve special terms
|
|
757
|
+
const lowerCaseWord = segment.toLowerCase();
|
|
758
|
+
const uniqueTermsIndex = specialTermsList.findIndex(
|
|
759
|
+
(w) => w.toLowerCase() === lowerCaseWord
|
|
760
|
+
);
|
|
761
|
+
|
|
762
|
+
if (uniqueTermsIndex >= 0) {
|
|
763
|
+
correctedWord = specialTermsList[uniqueTermsIndex];
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
return processWord(correctedWord, i, hyphenatedWords.length);
|
|
767
|
+
});
|
|
768
|
+
|
|
769
|
+
return processedWords.join(dash);
|
|
770
|
+
}
|
|
796
771
|
|
|
797
|
-
// Rejoin the words
|
|
798
|
-
return processedWords.join("-");
|
|
799
|
-
}
|
|
800
772
|
}
|