@danielhaim/titlecaser 1.7.13 → 1.7.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -38
- package/dist/titlecaser.amd.js +2 -2
- package/dist/titlecaser.esm.js +2 -2
- package/dist/titlecaser.module.js +2 -2
- package/package.json +1 -1
- package/src/TitleCaser.js +125 -57
package/src/TitleCaser.js
CHANGED
|
@@ -13,8 +13,8 @@ export class TitleCaser {
|
|
|
13
13
|
constructor (options = {}) {
|
|
14
14
|
this.options = options;
|
|
15
15
|
this.debug = options.debug || false;
|
|
16
|
-
this.wordReplacementsList = wordReplacementsList;
|
|
17
|
-
this.phraseReplacementMap = phraseReplacementMap;
|
|
16
|
+
this.wordReplacementsList = JSON.parse(JSON.stringify(wordReplacementsList));
|
|
17
|
+
this.phraseReplacementMap = JSON.parse(JSON.stringify(phraseReplacementMap));
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
logWarning(message) {
|
|
@@ -25,12 +25,12 @@ export class TitleCaser {
|
|
|
25
25
|
|
|
26
26
|
toTitleCase(str) {
|
|
27
27
|
try {
|
|
28
|
-
// ! If input is empty, throw an error.
|
|
29
|
-
if (str.trim().length === 0) throw new TypeError("Invalid input: input must not be empty.");
|
|
30
|
-
|
|
31
28
|
// ! If input is not a string, throw an error.
|
|
32
29
|
if (typeof str !== "string") throw new TypeError("Invalid input: input must be a string.");
|
|
33
30
|
|
|
31
|
+
// ! If input is empty, throw an error.
|
|
32
|
+
if (str.length === 0) throw new TypeError("Invalid input: input must not be empty.");
|
|
33
|
+
|
|
34
34
|
// ! Input sanitization: limit length to prevent performance issues
|
|
35
35
|
if (str.length > 100000) throw new TypeError("Invalid input: input exceeds maximum length of 100,000 characters.");
|
|
36
36
|
|
|
@@ -43,6 +43,7 @@ export class TitleCaser {
|
|
|
43
43
|
neverCapitalize = [],
|
|
44
44
|
wordReplacementsList = this.wordReplacementsList,
|
|
45
45
|
smartQuotes = false, // Set to false by default
|
|
46
|
+
normalizeWhitespace = true,
|
|
46
47
|
} = this.options;
|
|
47
48
|
|
|
48
49
|
const styleConfig = styleConfigMap[style] || {};
|
|
@@ -67,15 +68,12 @@ export class TitleCaser {
|
|
|
67
68
|
this.logWarning(`replaceTermsArray: ${replaceTermsArray}`);
|
|
68
69
|
this.logWarning(`this.wordReplacementsList: ${this.wordReplacementsList}`);
|
|
69
70
|
|
|
70
|
-
//
|
|
71
|
-
let inputString = str
|
|
71
|
+
// Normalize HTML breaks and optionally normalize whitespace (see normalizeWhitespace option).
|
|
72
|
+
let inputString = str;
|
|
72
73
|
|
|
73
74
|
// Replace <br> and <br /> tags with a placeholder.
|
|
74
75
|
inputString = inputString.replace(REGEX_PATTERNS.HTML_BREAK, " nl2br ");
|
|
75
76
|
|
|
76
|
-
// Remove extra spaces
|
|
77
|
-
inputString = inputString.replace(REGEX_PATTERNS.MULTIPLE_SPACES, ' ');
|
|
78
|
-
|
|
79
77
|
// Check if the entire input string is uppercase and normalize it to lowercase
|
|
80
78
|
// before processing if it is. This ensures consistent handling for all-caps text.
|
|
81
79
|
const isEntireStringUppercase = TitleCaserUtils.isEntirelyUppercase(inputString.replace(/[^a-zA-Z]/g, ''));
|
|
@@ -84,10 +82,14 @@ export class TitleCaser {
|
|
|
84
82
|
inputString = inputString.toLowerCase();
|
|
85
83
|
}
|
|
86
84
|
|
|
87
|
-
//
|
|
88
|
-
const
|
|
85
|
+
// Tokenize preserving whitespace
|
|
86
|
+
const tokens = inputString.split(/(\s+)/);
|
|
87
|
+
|
|
88
|
+
const wordsInTitleCase = tokens.map((token, i) => {
|
|
89
|
+
if (!token || /^\s+$/.test(token)) return token;
|
|
90
|
+
|
|
91
|
+
const word = token;
|
|
89
92
|
|
|
90
|
-
const wordsInTitleCase = words.map((word, i) => {
|
|
91
93
|
switch (true) {
|
|
92
94
|
case TitleCaserUtils.isWordAmpersand(word):
|
|
93
95
|
// ! if the word is an ampersand, return it as is.
|
|
@@ -135,8 +137,18 @@ export class TitleCaser {
|
|
|
135
137
|
// ! If the word has an intentional uppercase letter, return the correct casing.
|
|
136
138
|
return word;
|
|
137
139
|
case TitleCaserUtils.isShortWord(word, style) && i !== 0:
|
|
138
|
-
//
|
|
139
|
-
|
|
140
|
+
// Find previous non-whitespace token
|
|
141
|
+
let prevToken = null;
|
|
142
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
143
|
+
if (!/^\s+$/.test(tokens[j])) {
|
|
144
|
+
prevToken = tokens[j];
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const isAtEndOfSentence =
|
|
150
|
+
prevToken && TitleCaserUtils.endsWithSymbol(prevToken, [":", "?", "!", "."]);
|
|
151
|
+
|
|
140
152
|
if (isAtEndOfSentence) {
|
|
141
153
|
return word.charAt(0).toUpperCase() + word.slice(1);
|
|
142
154
|
}
|
|
@@ -194,7 +206,7 @@ export class TitleCaser {
|
|
|
194
206
|
});
|
|
195
207
|
|
|
196
208
|
// Join the words in the array into a string.
|
|
197
|
-
inputString = wordsInTitleCase.join("
|
|
209
|
+
inputString = wordsInTitleCase.join("");
|
|
198
210
|
|
|
199
211
|
// Replace the nl2br placeholder with <br> tags.
|
|
200
212
|
inputString = inputString.replace(/nl2br/gi, "<br>");
|
|
@@ -205,46 +217,61 @@ export class TitleCaser {
|
|
|
205
217
|
inputString = TitleCaserUtils.convertQuotesToCurly(inputString);
|
|
206
218
|
}
|
|
207
219
|
|
|
208
|
-
const wordsForAcronyms = inputString.split(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
220
|
+
const wordsForAcronyms = inputString.split(/(\s+)/);
|
|
221
|
+
|
|
222
|
+
// Extract non-whitespace words for first/second detection
|
|
223
|
+
|
|
224
|
+
// Extract non-whitespace words for first/second detection
|
|
225
|
+
const nonWhitespaceWords = wordsForAcronyms.filter(t => !/^\s+$/.test(t));
|
|
226
|
+
let firstWord = nonWhitespaceWords[0] || null;
|
|
227
|
+
let secondWord = nonWhitespaceWords[1] || null;
|
|
228
|
+
|
|
212
229
|
for (let i = 0; i < wordsForAcronyms.length; i++) {
|
|
213
|
-
|
|
230
|
+
|
|
231
|
+
if (/^\s+$/.test(wordsForAcronyms[i])) continue;
|
|
232
|
+
|
|
233
|
+
// Find previous non-whitespace word
|
|
234
|
+
let prevWord = null;
|
|
235
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
236
|
+
if (!/^\s+$/.test(wordsForAcronyms[j])) {
|
|
237
|
+
prevWord = wordsForAcronyms[j];
|
|
238
|
+
break;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Find next non-whitespace word
|
|
243
|
+
let nextWord = null;
|
|
244
|
+
for (let j = i + 1; j < wordsForAcronyms.length; j++) {
|
|
245
|
+
if (!/^\s+$/.test(wordsForAcronyms[j])) {
|
|
246
|
+
nextWord = wordsForAcronyms[j];
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
214
251
|
let currentWord = wordsForAcronyms[i];
|
|
215
|
-
const nextWord = i < wordsForAcronyms.length - 1 ? wordsForAcronyms[i + 1] : null;
|
|
216
252
|
|
|
217
|
-
// Capture punctuation at the end of the word
|
|
218
253
|
const punctuationMatch = currentWord.match(REGEX_PATTERNS.TRAILING_PUNCTUATION);
|
|
219
254
|
let punctuation = "";
|
|
220
255
|
|
|
221
256
|
if (punctuationMatch) {
|
|
222
257
|
punctuation = punctuationMatch[0];
|
|
223
|
-
currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if (TitleCaserUtils.isRegionalAcronym(currentWord)) {
|
|
227
|
-
currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
|
|
258
|
+
currentWord = currentWord.replace(REGEX_PATTERNS.TRAILING_PUNCTUATION, "");
|
|
228
259
|
}
|
|
229
260
|
|
|
230
|
-
if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord)) {
|
|
261
|
+
if (TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
|
|
231
262
|
currentWord = TitleCaserUtils.normalizeRegionalAcronym(currentWord);
|
|
232
263
|
}
|
|
233
264
|
|
|
234
|
-
// if punctuation is not empty, add it to the end of the word
|
|
235
265
|
if (punctuation !== "") {
|
|
236
266
|
currentWord = currentWord + punctuation;
|
|
237
267
|
}
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
// This first pass does naive acronym detection that creates false positives
|
|
241
|
-
// (e.g., pronoun "us" detected as country "US"). Later loops use more
|
|
242
|
-
// sophisticated context-aware logic to correctly identify regional acronyms.
|
|
268
|
+
|
|
269
|
+
wordsForAcronyms[i] = currentWord;
|
|
243
270
|
}
|
|
244
271
|
|
|
245
|
-
inputString = wordsForAcronyms.join("
|
|
272
|
+
inputString = wordsForAcronyms.join("");
|
|
246
273
|
|
|
247
|
-
const wordsForShortWords = inputString.split(
|
|
274
|
+
const wordsForShortWords = inputString.split(/(\s+)/);
|
|
248
275
|
for (let i = 1; i < wordsForShortWords.length - 1; i++) {
|
|
249
276
|
const currentWord = wordsForShortWords[i];
|
|
250
277
|
const prevWord = wordsForShortWords[i - 1];
|
|
@@ -265,36 +292,62 @@ export class TitleCaser {
|
|
|
265
292
|
}
|
|
266
293
|
}
|
|
267
294
|
|
|
268
|
-
inputString = wordsForShortWords.join("
|
|
295
|
+
inputString = wordsForShortWords.join("");
|
|
269
296
|
|
|
270
|
-
const wordsForFinalPass = inputString.split(
|
|
297
|
+
const wordsForFinalPass = inputString.split(/(\s+)/);
|
|
271
298
|
for (let i = 0; i < wordsForFinalPass.length; i++) {
|
|
299
|
+
|
|
300
|
+
if (/^\s+$/.test(wordsForFinalPass[i])) continue;
|
|
301
|
+
|
|
272
302
|
let currentWord = wordsForFinalPass[i];
|
|
273
|
-
|
|
274
|
-
|
|
303
|
+
|
|
304
|
+
// Find previous non-whitespace word
|
|
305
|
+
let prevWord = null;
|
|
306
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
307
|
+
if (!/^\s+$/.test(wordsForFinalPass[j])) {
|
|
308
|
+
prevWord = wordsForFinalPass[j];
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Find next non-whitespace word
|
|
314
|
+
let nextWord = null;
|
|
315
|
+
for (let j = i + 1; j < wordsForFinalPass.length; j++) {
|
|
316
|
+
if (!/^\s+$/.test(wordsForFinalPass[j])) {
|
|
317
|
+
nextWord = wordsForFinalPass[j];
|
|
318
|
+
break;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
275
322
|
if (nextWord && TitleCaserUtils.isRegionalAcronymNoDot(currentWord, nextWord, prevWord)) {
|
|
276
323
|
wordsForFinalPass[i] = currentWord.toUpperCase();
|
|
277
324
|
}
|
|
278
325
|
}
|
|
279
326
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
let
|
|
283
|
-
|
|
284
|
-
|
|
327
|
+
const nonWhitespaceFinal = wordsForFinalPass.filter(t => !/^\s+$/.test(t));
|
|
328
|
+
|
|
329
|
+
let finalWord = nonWhitespaceFinal[nonWhitespaceFinal.length - 1];
|
|
330
|
+
let wordBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 2];
|
|
331
|
+
let twoWordsBeforeFinal = nonWhitespaceFinal[nonWhitespaceFinal.length - 3];
|
|
332
|
+
|
|
333
|
+
if (firstWord && TitleCaserUtils.isRegionalAcronym(firstWord)) {
|
|
285
334
|
this.logWarning(`firstWord is a regional acronym: ${firstWord}`);
|
|
286
335
|
wordsForFinalPass[0] = firstWord.toUpperCase();
|
|
287
336
|
}
|
|
288
337
|
|
|
289
|
-
if (TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
|
|
338
|
+
if (firstWord && secondWord && TitleCaserUtils.isRegionalAcronymNoDot(firstWord, secondWord)) {
|
|
290
339
|
wordsForFinalPass[0] = firstWord.toUpperCase();
|
|
291
340
|
}
|
|
292
341
|
|
|
293
|
-
if (
|
|
342
|
+
if (
|
|
343
|
+
finalWord &&
|
|
344
|
+
wordBeforeFinal &&
|
|
345
|
+
TitleCaserUtils.isFinalWordRegionalAcronym(finalWord, wordBeforeFinal, twoWordsBeforeFinal)
|
|
346
|
+
) {
|
|
294
347
|
wordsForFinalPass[wordsForFinalPass.length - 1] = finalWord.toUpperCase();
|
|
295
348
|
}
|
|
296
349
|
|
|
297
|
-
inputString = wordsForFinalPass.join("
|
|
350
|
+
inputString = wordsForFinalPass.join("");
|
|
298
351
|
|
|
299
352
|
for (const [phrase, replacement] of Object.entries(this.phraseReplacementMap)) {
|
|
300
353
|
// Create a regular expression for case-insensitive matching of the phrase
|
|
@@ -303,15 +356,15 @@ export class TitleCaser {
|
|
|
303
356
|
// Replace the phrase in the input string with its corresponding replacement
|
|
304
357
|
inputString = inputString.replace(regex, replacement);
|
|
305
358
|
}
|
|
306
|
-
|
|
359
|
+
|
|
307
360
|
// ! Handle sentence case
|
|
308
361
|
if (styleConfig.caseStyle === "sentence") {
|
|
309
|
-
const words = inputString.split(
|
|
362
|
+
const words = inputString.split(/(\s+)/);
|
|
310
363
|
let firstWordFound = false;
|
|
311
|
-
|
|
364
|
+
|
|
312
365
|
for (let i = 0; i < words.length; i++) {
|
|
313
366
|
let word = words[i];
|
|
314
|
-
|
|
367
|
+
|
|
315
368
|
// 1) The first word: Capitalize first letter only, preserve existing brand/case in the rest
|
|
316
369
|
if (!firstWordFound && /[A-Za-z]/.test(word)) {
|
|
317
370
|
// If you want to skip altering brand or acronym, do one more check:
|
|
@@ -323,15 +376,21 @@ export class TitleCaser {
|
|
|
323
376
|
firstWordFound = true;
|
|
324
377
|
continue;
|
|
325
378
|
}
|
|
326
|
-
|
|
379
|
+
|
|
327
380
|
// 2) For subsequent words, only force-lowercase if we do NOT want to preserve uppercase
|
|
328
381
|
if (!TitleCaser.shouldKeepCasing(word, specialTermsList)) {
|
|
329
382
|
words[i] = word.toLowerCase();
|
|
330
383
|
}
|
|
331
384
|
// else, we keep it exactly as is
|
|
332
385
|
}
|
|
333
|
-
|
|
334
|
-
inputString = words.join("
|
|
386
|
+
|
|
387
|
+
inputString = words.join("");
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if (normalizeWhitespace) {
|
|
391
|
+
inputString = inputString
|
|
392
|
+
.replace(/\s+/g, " ")
|
|
393
|
+
.trim();
|
|
335
394
|
}
|
|
336
395
|
|
|
337
396
|
return inputString;
|
|
@@ -368,6 +427,11 @@ export class TitleCaser {
|
|
|
368
427
|
}
|
|
369
428
|
});
|
|
370
429
|
|
|
430
|
+
// Added check to prevent excessive number of replacement rules which could lead to performance issues
|
|
431
|
+
if (this.wordReplacementsList.length > 2000) {
|
|
432
|
+
throw new Error("Too many replacement rules.");
|
|
433
|
+
}
|
|
434
|
+
|
|
371
435
|
this.options.wordReplacementsList = this.wordReplacementsList;
|
|
372
436
|
|
|
373
437
|
this.logWarning(`Log the updated this.wordReplacementsList: ${this.wordReplacementsList}`);
|
|
@@ -386,6 +450,10 @@ export class TitleCaser {
|
|
|
386
450
|
this.wordReplacementsList.push({ [term]: replacement });
|
|
387
451
|
}
|
|
388
452
|
|
|
453
|
+
if (this.wordReplacementsList.length > 2000) {
|
|
454
|
+
throw new Error("Too many replacement rules.");
|
|
455
|
+
}
|
|
456
|
+
|
|
389
457
|
this.options.wordReplacementsList = this.wordReplacementsList;
|
|
390
458
|
}
|
|
391
459
|
|
|
@@ -467,7 +535,7 @@ export class TitleCaser {
|
|
|
467
535
|
if (TitleCaserUtils.hasUppercaseIntentional(word)) return true;
|
|
468
536
|
// If it's in the brand/specialTermsList
|
|
469
537
|
if (TitleCaserUtils.isWordInArray(word, specialTermsList)) return true;
|
|
470
|
-
|
|
538
|
+
|
|
471
539
|
// Otherwise, no. It's safe to lowercase.
|
|
472
540
|
return false;
|
|
473
541
|
}
|