@danielhaim/titlecaser 1.7.12 → 1.7.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -414
- package/dist/titlecaser.amd.js +2 -2
- package/dist/titlecaser.esm.js +2 -2
- package/dist/titlecaser.module.js +2 -2
- package/index.d.ts +1 -3
- package/package.json +1 -1
- package/src/TitleCaser.js +128 -60
- package/src/TitleCaserUtils.js +97 -128
package/src/TitleCaser.js
CHANGED
|
@@ -13,8 +13,8 @@ export class TitleCaser {
|
|
|
13
13
|
constructor (options = {}) {
|
|
14
14
|
this.options = options;
|
|
15
15
|
this.debug = options.debug || false;
|
|
16
|
-
this.wordReplacementsList = wordReplacementsList;
|
|
17
|
-
this.phraseReplacementMap = phraseReplacementMap;
|
|
16
|
+
this.wordReplacementsList = JSON.parse(JSON.stringify(wordReplacementsList));
|
|
17
|
+
this.phraseReplacementMap = JSON.parse(JSON.stringify(phraseReplacementMap));
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
logWarning(message) {
|
|
@@ -25,12 +25,12 @@ export class TitleCaser {
|
|
|
25
25
|
|
|
26
26
|
toTitleCase(str) {
|
|
27
27
|
try {
|
|
28
|
-
// ! If input is empty, throw an error.
|
|
29
|
-
if (str.trim().length === 0) throw new TypeError("Invalid input: input must not be empty.");
|
|
30
|
-
|
|
31
28
|
// ! If input is not a string, throw an error.
|
|
32
29
|
if (typeof str !== "string") throw new TypeError("Invalid input: input must be a string.");
|
|
33
30
|
|
|
31
|
+
// ! If input is empty, throw an error.
|
|
32
|
+
if (str.length === 0) throw new TypeError("Invalid input: input must not be empty.");
|
|
33
|
+
|
|
34
34
|
// ! Input sanitization: limit length to prevent performance issues
|
|
35
35
|
if (str.length > 100000) throw new TypeError("Invalid input: input exceeds maximum length of 100,000 characters.");
|
|
36
36
|
|
|
@@ -41,8 +41,9 @@ export class TitleCaser {
|
|
|
41
41
|
const {
|
|
42
42
|
style = "ap",
|
|
43
43
|
neverCapitalize = [],
|
|
44
|
-
|
|
44
|
+
wordReplacementsList = this.wordReplacementsList,
|
|
45
45
|
smartQuotes = false, // Set to false by default
|
|
46
|
+
normalizeWhitespace = true,
|
|
46
47
|
} = this.options;
|
|
47
48
|
|
|
48
49
|
const styleConfig = styleConfigMap[style] || {};
|
|
@@ -58,24 +59,21 @@ export class TitleCaser {
|
|
|
58
59
|
} = TitleCaserUtils.getTitleCaseOptions(this.options, shortWordsList, wordReplacementsList);
|
|
59
60
|
|
|
60
61
|
// Preprocess the replaceTerms array to make it easier to search for.
|
|
61
|
-
const replaceTermsArray =
|
|
62
|
+
const replaceTermsArray = wordReplacementsList.map((term) => Object.keys(term)[0].toLowerCase());
|
|
62
63
|
// Create an object from the replaceTerms array to make it easier to search for.
|
|
63
64
|
const replaceTermObj = Object.fromEntries(
|
|
64
|
-
|
|
65
|
+
wordReplacementsList.map((term) => [Object.keys(term)[0].toLowerCase(), Object.values(term)[0]]),
|
|
65
66
|
);
|
|
66
67
|
|
|
67
68
|
this.logWarning(`replaceTermsArray: ${replaceTermsArray}`);
|
|
68
69
|
this.logWarning(`this.wordReplacementsList: ${this.wordReplacementsList}`);
|
|
69
70
|
|
|
70
|
-
//
|
|
71
|
-
let inputString = str
|
|
71
|
+
// Normalize HTML breaks and optionally normalize whitespace (see normalizeWhitespace option).
|
|
72
|
+
let inputString = str;
|
|
72
73
|
|
|
73
74
|
// Replace <br> and <br /> tags with a placeholder.
|
|
74
75
|
inputString = inputString.replace(REGEX_PATTERNS.HTML_BREAK, " nl2br ");
|
|
75
76
|
|
|
76
|
-
// Remove extra spaces
|
|
77
|
-
inputString = inputString.replace(REGEX_PATTERNS.MULTIPLE_SPACES, ' ');
|
|
78
|
-
|
|
79
77
|
// Check if the entire input string is uppercase and normalize it to lowercase
|
|
80
78
|
// before processing if it is. This ensures consistent handling for all-caps text.
|
|
81
79
|
const isEntireStringUppercase = TitleCaserUtils.isEntirelyUppercase(inputString.replace(/[^a-zA-Z]/g, ''));
|
|
@@ -84,10 +82,14 @@ export class TitleCaser {
|
|
|
84
82
|
inputString = inputString.toLowerCase();
|
|
85
83
|
}
|
|
86
84
|
|
|
87
|
-
//
|
|
88
|
-
const
|
|
85
|
+
// Tokenize preserving whitespace
|
|
86
|
+
const tokens = inputString.split(/(\s+)/);
|
|
87
|
+
|
|
88
|
+
const wordsInTitleCase = tokens.map((token, i) => {
|
|
89
|
+
if (!token || /^\s+$/.test(token)) return token;
|
|
90
|
+
|
|
91
|
+
const word = token;
|
|
89
92
|
|
|
90
|
-
const wordsInTitleCase = words.map((word, i) => {
|
|
91
93
|
switch (true) {
|
|
92
94
|
case TitleCaserUtils.isWordAmpersand(word):
|
|
93
95
|
// ! if the word is an ampersand, return it as is.
|
|
@@ -135,8 +137,18 @@ export class TitleCaser {
|
|
|
135
137
|
// ! If the word has an intentional uppercase letter, return the correct casing.
|
|
136
138
|
return word;
|
|
137
139
|
case TitleCaserUtils.isShortWord(word, style) && i !== 0:
|
|
138
|
-
//
|
|
139
|
-
|
|
140
|
+
// Find previous non-whitespace token
|
|
141
|
+
let prevToken = null;
|
|
142
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
143
|
+
if (!/^\s+$/.test(tokens[j])) {
|
|
144
|
+
prevToken = tokens[j];
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const isAtEndOfSentence =
|
|
150
|
+
prevToken && TitleCaserUtils.endsWithSymbol(prevToken, [":", "?", "!", "."]);
|
|
151
|
+
|
|
140
152
|
if (isAtEndOfSentence) {
|
|
141
153
|
return word.charAt(0).toUpperCase() + word.slice(1);
|
|
142
154
|
}
|
|
@@ -194,7 +206,7 @@ export class TitleCaser {
|
|
|
194
206
|
});
|
|
195
207
|
|
|
196
208
|
// Join the words in the array into a string.
|
|
197
|
-
inputString = wordsInTitleCase.join("
|
|
209
|
+
inputString = wordsInTitleCase.join("");
|
|
198
210
|
|
|
199
211
|
// Replace the nl2br placeholder with <br> tags.
|
|
200
212
|
inputString = inputString.replace(/nl2br/gi, "<br>");
|
|
@@ -205,46 +217,61 @@ export class TitleCaser {
|
|
|
205
217
|
inputString = TitleCaserUtils.convertQuotesToCurly(inputString);
|
|
206
218
|
}
|
|
207
219
|
|
|
208
|
-
const wordsForAcronyms = inputString.split(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
220
|
+
const wordsForAcronyms = inputString.split(/(\s+)/);
|
|
221
|
+
|
|
222
|
+
// Extract non-whitespace words for first/second detection
|
|
223
|
+
|
|
224
|
+
// Extract non-whitespace words for first/second detection
|
|
225
|
+
const nonWhitespaceWords = wordsForAcronyms.filter(t => !/^\s+$/.test(t));
|
|
226
|
+
let firstWord = nonWhitespaceWords[0] || null;
|
|
227
|
+
let secondWord = nonWhitespaceWords[1] || null;
|
|
228
|
+
|
|
212
229
|
for (let i = 0; i < wordsForAcronyms.length; i++) {
|
|
213
|
-
|
|
230
|
+
|
|
231
|
+
if (/^\s+$/.test(wordsForAcronyms[i])) continue;
|
|
232
|
+
|
|
233
|
+
// Find previous non-whitespace word
|
|
234
|
+
let prevWord = null;
|
|
235
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
236
|
+
if (!/^\s+$/.test(wordsForAcronyms[j])) {
|
|
237
|
+
prevWord = wordsForAcronyms[j];
|
|
238
|
+
break;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Find next non-whitespace word
|
|
243
|
+
let nextWord = null;
|
|
244
|
+
for (let j = i + 1; j < wordsForAcronyms.length; j++) {
|
|
245
|
+
if (!/^\s+$/.test(wordsForAcronyms[j])) {
|
|
246
|
+
nextWord = wordsForAcronyms[j];
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
214
251
|
let currentWord = wordsForAcronyms[i];
|
|
215
|
-
const nextWord = i < wordsForAcronyms.length - 1 ? wordsForAcronyms[i + 1] : null;
|
|
216
252
|
|
|
217
|
-
// Capture punctuation at the end of the word
|
|
218
253
|
const punctuationMatch = currentWord.match(REGEX_PATTERNS.TRAILING_PUNCTUATION);
|
|
219
254
|
let punctuation = "";
|
|
220
255
|
|
|
221
256
|
if (punctuationMatch) {
|
|
222
257
|
punctuation = punctuationMatch[0];
|
|
223
|
-
currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if (TitleCaserUtils.isRegionalAcronym(currentWord)) {
|
|
227
|
-
currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
|
|
258
|
+
currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
|
|
228
259
|
}
|
|
229
260
|
|
|
230
|
-
if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord)) {
|
|
261
|
+
if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
|
|
231
262
|
currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
|
|
232
263
|
}
|
|
233
264
|
|
|
234
|
-
// if punctuation is not empty, add it to the end of the word
|
|
235
265
|
if (punctuation !== "") {
|
|
236
266
|
currentWord = currentWord + punctuation;
|
|
237
267
|
}
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
// This first pass does naive acronym detection that creates false positives
|
|
241
|
-
// (e.g., pronoun "us" detected as country "US"). Later loops use more
|
|
242
|
-
// sophisticated context-aware logic to correctly identify regional acronyms.
|
|
268
|
+
|
|
269
|
+
wordsForAcronyms[i] = currentWord;
|
|
243
270
|
}
|
|
244
271
|
|
|
245
|
-
inputString = wordsForAcronyms.join("
|
|
272
|
+
inputString = wordsForAcronyms.join("");
|
|
246
273
|
|
|
247
|
-
const wordsForShortWords = inputString.split(
|
|
274
|
+
const wordsForShortWords = inputString.split(/(\s+)/);
|
|
248
275
|
for (let i = 1; i < wordsForShortWords.length - 1; i++) {
|
|
249
276
|
const currentWord = wordsForShortWords[i];
|
|
250
277
|
const prevWord = wordsForShortWords[i - 1];
|
|
@@ -265,36 +292,62 @@ export class TitleCaser {
|
|
|
265
292
|
}
|
|
266
293
|
}
|
|
267
294
|
|
|
268
|
-
inputString = wordsForShortWords.join("
|
|
295
|
+
inputString = wordsForShortWords.join("");
|
|
269
296
|
|
|
270
|
-
const wordsForFinalPass = inputString.split(
|
|
297
|
+
const wordsForFinalPass = inputString.split(/(\s+)/);
|
|
271
298
|
for (let i = 0; i < wordsForFinalPass.length; i++) {
|
|
299
|
+
|
|
300
|
+
if (/^\s+$/.test(wordsForFinalPass[i])) continue;
|
|
301
|
+
|
|
272
302
|
let currentWord = wordsForFinalPass[i];
|
|
273
|
-
|
|
274
|
-
|
|
303
|
+
|
|
304
|
+
// Find previous non-whitespace word
|
|
305
|
+
let prevWord = null;
|
|
306
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
307
|
+
if (!/^\s+$/.test(wordsForFinalPass[j])) {
|
|
308
|
+
prevWord = wordsForFinalPass[j];
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Find next non-whitespace word
|
|
314
|
+
let nextWord = null;
|
|
315
|
+
for (let j = i + 1; j < wordsForFinalPass.length; j++) {
|
|
316
|
+
if (!/^\s+$/.test(wordsForFinalPass[j])) {
|
|
317
|
+
nextWord = wordsForFinalPass[j];
|
|
318
|
+
break;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
275
322
|
if (nextWord && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
|
|
276
323
|
wordsForFinalPass[i] = currentWord.toUpperCase();
|
|
277
324
|
}
|
|
278
325
|
}
|
|
279
326
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
let
|
|
283
|
-
|
|
284
|
-
|
|
327
|
+
const nonWhitespaceFinal = wordsForFinalPass.filter(t => !/^\s+$/.test(t));
|
|
328
|
+
|
|
329
|
+
let finalWord = nonWhitespaceFinal[nonWhitespaceFinal.length - 1];
|
|
330
|
+
let wordBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 2];
|
|
331
|
+
let twoWordsBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 3];
|
|
332
|
+
|
|
333
|
+
if (firstWord && TitleCaserUtils.isRegionalAcronym(firstWord)) {
|
|
285
334
|
this.logWarning(`firstWord is a regional acronym: ${firstWord}`);
|
|
286
335
|
wordsForFinalPass[0] = firstWord.toUpperCase();
|
|
287
336
|
}
|
|
288
337
|
|
|
289
|
-
if (TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
|
|
338
|
+
if (firstWord && secondWord && TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
|
|
290
339
|
wordsForFinalPass[0] = firstWord.toUpperCase();
|
|
291
340
|
}
|
|
292
341
|
|
|
293
|
-
if (
|
|
342
|
+
if (
|
|
343
|
+
finalWord &&
|
|
344
|
+
wordBeforeFinal &&
|
|
345
|
+
TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)
|
|
346
|
+
) {
|
|
294
347
|
wordsForFinalPass[wordsForFinalPass.length - 1] = finalWord.toUpperCase();
|
|
295
348
|
}
|
|
296
349
|
|
|
297
|
-
inputString = wordsForFinalPass.join("
|
|
350
|
+
inputString = wordsForFinalPass.join("");
|
|
298
351
|
|
|
299
352
|
for (const [phrase, replacement] of Object.entries(this.phraseReplacementMap)) {
|
|
300
353
|
// Create a regular expression for case-insensitive matching of the phrase
|
|
@@ -303,15 +356,15 @@ export class TitleCaser {
|
|
|
303
356
|
// Replace the phrase in the input string with its corresponding replacement
|
|
304
357
|
inputString = inputString.replace(regex, replacement);
|
|
305
358
|
}
|
|
306
|
-
|
|
359
|
+
|
|
307
360
|
// ! Handle sentence case
|
|
308
361
|
if (styleConfig.caseStyle === "sentence") {
|
|
309
|
-
const words = inputString.split(
|
|
362
|
+
const words = inputString.split(/(\s+)/);
|
|
310
363
|
let firstWordFound = false;
|
|
311
|
-
|
|
364
|
+
|
|
312
365
|
for (let i = 0; i < words.length; i++) {
|
|
313
366
|
let word = words[i];
|
|
314
|
-
|
|
367
|
+
|
|
315
368
|
// 1) The first word: Capitalize first letter only, preserve existing brand/case in the rest
|
|
316
369
|
if (!firstWordFound && /[A-Za-z]/.test(word)) {
|
|
317
370
|
// If you want to skip altering brand or acronym, do one more check:
|
|
@@ -323,15 +376,21 @@ export class TitleCaser {
|
|
|
323
376
|
firstWordFound = true;
|
|
324
377
|
continue;
|
|
325
378
|
}
|
|
326
|
-
|
|
379
|
+
|
|
327
380
|
// 2) For subsequent words, only force-lowercase if we do NOT want to preserve uppercase
|
|
328
381
|
if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
|
|
329
382
|
words[i] = word.toLowerCase();
|
|
330
383
|
}
|
|
331
384
|
// else, we keep it exactly as is
|
|
332
385
|
}
|
|
333
|
-
|
|
334
|
-
inputString = words.join("
|
|
386
|
+
|
|
387
|
+
inputString = words.join("");
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if (normalizeWhitespace) {
|
|
391
|
+
inputString = inputString
|
|
392
|
+
.replace(/\s+/g, " ")
|
|
393
|
+
.trim();
|
|
335
394
|
}
|
|
336
395
|
|
|
337
396
|
return inputString;
|
|
@@ -368,6 +427,11 @@ export class TitleCaser {
|
|
|
368
427
|
}
|
|
369
428
|
});
|
|
370
429
|
|
|
430
|
+
// Added check to prevent excessive number of replacement rules which could lead to performance issues
|
|
431
|
+
if (this.wordReplacementsList.length > 2000) {
|
|
432
|
+
throw new Error("Too many replacement rules.");
|
|
433
|
+
}
|
|
434
|
+
|
|
371
435
|
this.options.wordReplacementsList = this.wordReplacementsList;
|
|
372
436
|
|
|
373
437
|
this.logWarning(`Log the updated this.wordReplacementsList: ${this.wordReplacementsList}`);
|
|
@@ -386,6 +450,10 @@ export class TitleCaser {
|
|
|
386
450
|
this.wordReplacementsList.push({ [term]: replacement });
|
|
387
451
|
}
|
|
388
452
|
|
|
453
|
+
if (this.wordReplacementsList.length > 2000) {
|
|
454
|
+
throw new Error("Too many replacement rules.");
|
|
455
|
+
}
|
|
456
|
+
|
|
389
457
|
this.options.wordReplacementsList = this.wordReplacementsList;
|
|
390
458
|
}
|
|
391
459
|
|
|
@@ -467,7 +535,7 @@ export class TitleCaser {
|
|
|
467
535
|
if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
|
|
468
536
|
// If it's in the brand/specialTermsList
|
|
469
537
|
if (TitleCaserUtils.isWordInArray(word, specialTermsList)) return true;
|
|
470
|
-
|
|
538
|
+
|
|
471
539
|
// Otherwise, no. It's safe to lowercase.
|
|
472
540
|
return false;
|
|
473
541
|
}
|
package/src/TitleCaserUtils.js
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
shortWordsList,
|
|
8
8
|
regionalAcronymList,
|
|
9
9
|
regionalAcronymPrecedingWordsList,
|
|
10
|
-
regionalAcronymFollowingWordsList
|
|
10
|
+
regionalAcronymFollowingWordsList,
|
|
11
11
|
} from "./TitleCaserConsts.js";
|
|
12
12
|
|
|
13
13
|
export class TitleCaserUtils {
|
|
@@ -65,7 +65,7 @@ export class TitleCaserUtils {
|
|
|
65
65
|
// Create a unique key for the cache using a faster approach than JSON.stringify
|
|
66
66
|
const style = options.style || "ap";
|
|
67
67
|
const smartQuotes = options.hasOwnProperty("smartQuotes") ? options.smartQuotes : false;
|
|
68
|
-
const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(
|
|
68
|
+
const cacheKey = `${style}|${smartQuotes}|${lowercaseWords.length > 0 ? lowercaseWords.sort().join(",") : ""}`;
|
|
69
69
|
|
|
70
70
|
// If the cache already has an entry for this key, return the cached options
|
|
71
71
|
if (TitleCaserUtils.titleCaseOptionsCache.has(cacheKey)) {
|
|
@@ -80,22 +80,13 @@ export class TitleCaserUtils {
|
|
|
80
80
|
|
|
81
81
|
// Merge the default articles with user-provided articles and lowercase words
|
|
82
82
|
// Using Set for O(n) deduplication instead of O(n²) filter+indexOf
|
|
83
|
-
const mergedArticles = [...new Set([
|
|
84
|
-
...mergedOptions.articlesList,
|
|
85
|
-
...lowercaseWords
|
|
86
|
-
])];
|
|
83
|
+
const mergedArticles = [...new Set([...mergedOptions.articlesList, ...lowercaseWords])];
|
|
87
84
|
|
|
88
85
|
// Merge the default short conjunctions with user-provided conjunctions and lowercase words
|
|
89
|
-
const mergedShortConjunctions = [...new Set([
|
|
90
|
-
...mergedOptions.shortConjunctionsList,
|
|
91
|
-
...lowercaseWords
|
|
92
|
-
])];
|
|
86
|
+
const mergedShortConjunctions = [...new Set([...mergedOptions.shortConjunctionsList, ...lowercaseWords])];
|
|
93
87
|
|
|
94
88
|
// Merge the default short prepositions with user-provided prepositions and lowercase words
|
|
95
|
-
const mergedShortPrepositions = [...new Set([
|
|
96
|
-
...mergedOptions.shortPrepositionsList,
|
|
97
|
-
...lowercaseWords
|
|
98
|
-
])];
|
|
89
|
+
const mergedShortPrepositions = [...new Set([...mergedOptions.shortPrepositionsList, ...lowercaseWords])];
|
|
99
90
|
|
|
100
91
|
// Merge the default word replacements with the user-provided replacements
|
|
101
92
|
const mergedReplaceTerms = [
|
|
@@ -240,9 +231,7 @@ export class TitleCaserUtils {
|
|
|
240
231
|
|
|
241
232
|
// Check if the entire input string is uppercase
|
|
242
233
|
static isEntirelyUppercase(str) {
|
|
243
|
-
return str === str.toUpperCase() &&
|
|
244
|
-
str !== str.toLowerCase() &&
|
|
245
|
-
str.length > 1;
|
|
234
|
+
return str === str.toUpperCase() && str !== str.toLowerCase() && str.length > 1;
|
|
246
235
|
}
|
|
247
236
|
|
|
248
237
|
static isRegionalAcronym(word) {
|
|
@@ -259,27 +248,25 @@ export class TitleCaserUtils {
|
|
|
259
248
|
}
|
|
260
249
|
|
|
261
250
|
static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
|
|
262
|
-
if (typeof word !==
|
|
251
|
+
if (typeof word !== "string" || typeof nextWord !== "string") {
|
|
263
252
|
return false;
|
|
264
253
|
}
|
|
265
254
|
|
|
266
255
|
const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
|
|
267
256
|
const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
|
|
268
257
|
|
|
269
|
-
const smallDirectPrecedingIndicators = [
|
|
270
|
-
"the",
|
|
271
|
-
];
|
|
258
|
+
const smallDirectPrecedingIndicators = ["the"];
|
|
272
259
|
|
|
273
|
-
if (
|
|
260
|
+
if (
|
|
261
|
+
prevWord &&
|
|
274
262
|
regionalAcronymList.includes(firstWordStripped) &&
|
|
275
|
-
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
|
|
276
|
-
|
|
277
|
-
|
|
263
|
+
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())
|
|
264
|
+
) {
|
|
265
|
+
return true;
|
|
278
266
|
}
|
|
279
267
|
|
|
280
268
|
return (
|
|
281
|
-
regionalAcronymList.includes(firstWordStripped) &&
|
|
282
|
-
regionalAcronymFollowingWordsList.includes(nextWordStripped)
|
|
269
|
+
regionalAcronymList.includes(firstWordStripped) && regionalAcronymFollowingWordsList.includes(nextWordStripped)
|
|
283
270
|
);
|
|
284
271
|
}
|
|
285
272
|
|
|
@@ -288,9 +275,7 @@ export class TitleCaserUtils {
|
|
|
288
275
|
|
|
289
276
|
const current = word.toLowerCase().replace(/[^\w]/g, "");
|
|
290
277
|
const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
|
|
291
|
-
const prevPrev = typeof prevPrevWord === "string"
|
|
292
|
-
? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
|
|
293
|
-
: null;
|
|
278
|
+
const prevPrev = typeof prevPrevWord === "string" ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "") : null;
|
|
294
279
|
|
|
295
280
|
if (!regionalAcronymList.includes(current)) return false;
|
|
296
281
|
|
|
@@ -321,19 +306,9 @@ export class TitleCaserUtils {
|
|
|
321
306
|
if (!word || !style || !styleConfigMap[style]) return false;
|
|
322
307
|
|
|
323
308
|
const lowerWord = word.toLowerCase();
|
|
324
|
-
const {
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
shortPrepositionsList,
|
|
328
|
-
neverCapitalizedList
|
|
329
|
-
} = styleConfigMap[style];
|
|
330
|
-
|
|
331
|
-
const combinedList = [
|
|
332
|
-
...shortConjunctionsList,
|
|
333
|
-
...articlesList,
|
|
334
|
-
...shortPrepositionsList,
|
|
335
|
-
...neverCapitalizedList
|
|
336
|
-
];
|
|
309
|
+
const { shortConjunctionsList, articlesList, shortPrepositionsList, neverCapitalizedList } = styleConfigMap[style];
|
|
310
|
+
|
|
311
|
+
const combinedList = [...shortConjunctionsList, ...articlesList, ...shortPrepositionsList, ...neverCapitalizedList];
|
|
337
312
|
|
|
338
313
|
return combinedList.includes(lowerWord) ? word : false;
|
|
339
314
|
}
|
|
@@ -585,9 +560,7 @@ export class TitleCaserUtils {
|
|
|
585
560
|
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
586
561
|
}
|
|
587
562
|
|
|
588
|
-
const knownElidedPrefixes = new Set([
|
|
589
|
-
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
590
|
-
]);
|
|
563
|
+
const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
|
|
591
564
|
|
|
592
565
|
const normalized = word.trim().toLowerCase().replace(/'/g, "’");
|
|
593
566
|
|
|
@@ -606,9 +579,7 @@ export class TitleCaserUtils {
|
|
|
606
579
|
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
607
580
|
}
|
|
608
581
|
|
|
609
|
-
const knownElidedPrefixes = new Set([
|
|
610
|
-
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
611
|
-
]);
|
|
582
|
+
const knownElidedPrefixes = new Set(["o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"]);
|
|
612
583
|
|
|
613
584
|
const original = word.trim();
|
|
614
585
|
const normalized = original.replace(/'/g, "’").toLowerCase();
|
|
@@ -619,9 +590,7 @@ export class TitleCaserUtils {
|
|
|
619
590
|
const rest = original.slice(prefixLength);
|
|
620
591
|
|
|
621
592
|
const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
|
|
622
|
-
const fixedRest = rest.length > 0
|
|
623
|
-
? rest.charAt(0).toUpperCase() + rest.slice(1)
|
|
624
|
-
: "";
|
|
593
|
+
const fixedRest = rest.length > 0 ? rest.charAt(0).toUpperCase() + rest.slice(1) : "";
|
|
625
594
|
|
|
626
595
|
return fixedPrefix + fixedRest;
|
|
627
596
|
}
|
|
@@ -716,88 +685,88 @@ export class TitleCaserUtils {
|
|
|
716
685
|
return parts.join(joiner);
|
|
717
686
|
}
|
|
718
687
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
688
|
+
// This function is used to check if a word is in the correct terms list
|
|
689
|
+
static correctTermHyphenated(word, style) {
|
|
690
|
+
// Split the word into an array of words (supports -, –, —)
|
|
691
|
+
const dashMatch = word.match(/[-–—]/);
|
|
692
|
+
if (!dashMatch) return word;
|
|
693
|
+
|
|
694
|
+
const dash = dashMatch[0];
|
|
695
|
+
const hyphenatedWords = word.split(/[-–—]/);
|
|
696
|
+
|
|
697
|
+
// Detect if ANY segment is a regional acronym
|
|
698
|
+
const containsRegionalAcronym = hyphenatedWords.some((segment) =>
|
|
699
|
+
regionalAcronymList.includes(
|
|
700
|
+
segment.toLowerCase().replace(/[^\w]/g, "")
|
|
701
|
+
)
|
|
702
|
+
);
|
|
703
|
+
|
|
704
|
+
// Define functions to process words
|
|
705
|
+
const capitalizeFirst = (w) => w.charAt(0).toUpperCase() + w.slice(1);
|
|
706
|
+
const lowercaseRest = (w) => w.charAt(0) + w.slice(1).toLowerCase();
|
|
707
|
+
|
|
708
|
+
// Define the style-specific processing functions
|
|
709
|
+
const styleFunctions = {
|
|
710
|
+
ap: (w, index) => {
|
|
711
|
+
// If compound contains acronym → headline-style compound
|
|
712
|
+
if (containsRegionalAcronym) {
|
|
713
|
+
return capitalizeFirst(w);
|
|
714
|
+
}
|
|
715
|
+
return index === 0 ? capitalizeFirst(w) : lowercaseRest(w);
|
|
716
|
+
},
|
|
717
|
+
chicago: capitalizeFirst,
|
|
718
|
+
apa: (w, index, length) => {
|
|
719
|
+
if (
|
|
720
|
+
!containsRegionalAcronym &&
|
|
721
|
+
TitleCaserUtils.isShortWord(w, style) &&
|
|
722
|
+
index > 0 &&
|
|
723
|
+
index < length - 1
|
|
724
|
+
) {
|
|
725
|
+
return w.toLowerCase();
|
|
726
|
+
}
|
|
727
|
+
return capitalizeFirst(w);
|
|
728
|
+
},
|
|
729
|
+
nyt: capitalizeFirst,
|
|
730
|
+
wikipedia: (w, index) =>
|
|
731
|
+
index === 0 ? capitalizeFirst(w) : lowercaseRest(w),
|
|
732
|
+
};
|
|
742
733
|
|
|
743
|
-
|
|
744
|
-
const processWord = styleFunctions[style] || lowercaseRest;
|
|
734
|
+
const processWord = styleFunctions[style] || lowercaseRest;
|
|
745
735
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
let correctedWord = word;
|
|
736
|
+
const processedWords = hyphenatedWords.map((segment, i) => {
|
|
737
|
+
let correctedWord = segment;
|
|
749
738
|
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
// Uppercase the Roman numeral part and concatenate back with 's
|
|
754
|
-
return updatedWord;
|
|
755
|
-
}
|
|
739
|
+
const normalizedSegment = segment
|
|
740
|
+
.toLowerCase()
|
|
741
|
+
.replace(/[^\w]/g, "");
|
|
756
742
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
}
|
|
743
|
+
// Normalize acronym casing
|
|
744
|
+
if (regionalAcronymList.includes(normalizedSegment)) {
|
|
745
|
+
return segment.toUpperCase();
|
|
746
|
+
}
|
|
762
747
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
const hasApostrophe = word.includes("'");
|
|
767
|
-
if (hasApostrophe) {
|
|
768
|
-
// Split the word at the apostrophe
|
|
769
|
-
const wordParts = word.split("'");
|
|
770
|
-
// Check each part for Roman numerals
|
|
771
|
-
const isRomanNumeral = wordParts.every((part) => romanNumeralRegex.test(part));
|
|
772
|
-
if (isRomanNumeral) {
|
|
773
|
-
// Uppercase each Roman numeral part and join back with apostrophe
|
|
774
|
-
correctedWord = wordParts.map((part) => part.toUpperCase()).join("'");
|
|
775
|
-
return correctedWord;
|
|
776
|
-
} else {
|
|
777
|
-
return processWord(correctedWord, i, hyphenatedWords.length);
|
|
778
|
-
}
|
|
779
|
-
}
|
|
748
|
+
// Roman numeral logic
|
|
749
|
+
const romanNumeralRegex =
|
|
750
|
+
/^(M{0,3})(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/i;
|
|
780
751
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
if (uniqueTermsIndex >= 0) {
|
|
785
|
-
correctedWord = specialTermsList[uniqueTermsIndex];
|
|
786
|
-
}
|
|
787
|
-
// Check if the word is a possessive form
|
|
788
|
-
else if (lowerCaseWord.endsWith("'s")) {
|
|
789
|
-
const rootWord = lowerCaseWord.substring(0, lowerCaseWord.length - 2);
|
|
790
|
-
const rootWordIndex = specialTermsList.findIndex((w) => w.toLowerCase() === rootWord);
|
|
791
|
-
if (rootWordIndex >= 0) {
|
|
792
|
-
correctedWord = `${specialTermsList[rootWordIndex]}'s`;
|
|
793
|
-
}
|
|
794
|
-
}
|
|
752
|
+
if (romanNumeralRegex.test(segment)) {
|
|
753
|
+
return segment.toUpperCase();
|
|
754
|
+
}
|
|
795
755
|
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
756
|
+
// Preserve special terms
|
|
757
|
+
const lowerCaseWord = segment.toLowerCase();
|
|
758
|
+
const uniqueTermsIndex = specialTermsList.findIndex(
|
|
759
|
+
(w) => w.toLowerCase() === lowerCaseWord
|
|
760
|
+
);
|
|
761
|
+
|
|
762
|
+
if (uniqueTermsIndex >= 0) {
|
|
763
|
+
correctedWord = specialTermsList[uniqueTermsIndex];
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
return processWord(correctedWord, i, hyphenatedWords.length);
|
|
767
|
+
});
|
|
768
|
+
|
|
769
|
+
return processedWords.join(dash);
|
|
770
|
+
}
|
|
799
771
|
|
|
800
|
-
// Rejoin the words
|
|
801
|
-
return processedWords.join("-");
|
|
802
|
-
}
|
|
803
772
|
}
|