@danielhaim/titlecaser 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +231 -87
- package/dist/titlecaser.amd.js +5 -0
- package/dist/titlecaser.module.js +5 -0
- package/index.d.ts +23 -0
- package/package.json +16 -14
- package/src/TitleCaser.js +120 -53
- package/src/TitleCaserConsts.js +188 -33
- package/src/TitleCaserUtils.js +166 -202
- package/src/data/brandList.json +532 -89
- package/src/data/businessFinanceLegalTerms.json +108 -14
- package/src/data/eCommerceDigitalTerms.json +13 -3
- package/src/data/globalGeography.json +197 -41
- package/src/data/marketingMediaTerms.json +37 -6
- package/src/data/militaryTerms.json +153 -0
- package/src/data/miscSpecializedTerms.json +12 -3
- package/src/data/techComputingConcepts.json +184 -26
- package/src/data/timeAcademicTerms.json +32 -5
package/src/TitleCaserUtils.js
CHANGED
|
@@ -4,6 +4,10 @@ import {
|
|
|
4
4
|
wordReplacementsList,
|
|
5
5
|
correctTitleCasingList,
|
|
6
6
|
ignoredWordList,
|
|
7
|
+
commonShortWords,
|
|
8
|
+
regionalAcronymList,
|
|
9
|
+
regionalAcronymPrecedingWords,
|
|
10
|
+
directFollowingIndicatorsRegionalAcronym
|
|
7
11
|
} from "./TitleCaserConsts.js";
|
|
8
12
|
|
|
9
13
|
export class TitleCaserUtils {
|
|
@@ -113,6 +117,10 @@ export class TitleCaserUtils {
|
|
|
113
117
|
|
|
114
118
|
static isNeverCapitalizedCache = new Map();
|
|
115
119
|
|
|
120
|
+
static capitalizeFirstLetter(word) {
|
|
121
|
+
return word.charAt(0).toUpperCase() + word.slice(1);
|
|
122
|
+
}
|
|
123
|
+
|
|
116
124
|
// Check if the word is a short conjunction
|
|
117
125
|
static isShortConjunction(word, style) {
|
|
118
126
|
// Get the list of short conjunctions from the TitleCaseHelper
|
|
@@ -142,6 +150,7 @@ export class TitleCaserUtils {
|
|
|
142
150
|
// Check if the word is a short preposition
|
|
143
151
|
static isShortPreposition(word, style) {
|
|
144
152
|
// Get the list of short prepositions from the Title Case Helper.
|
|
153
|
+
// CONSOLE LOG THE WORD BEFORE CHECKING IF IT IS IN THE LIST
|
|
145
154
|
const { shortPrepositionsList } = TitleCaserUtils.getTitleCaseOptions({
|
|
146
155
|
style: style,
|
|
147
156
|
});
|
|
@@ -183,6 +192,7 @@ export class TitleCaserUtils {
|
|
|
183
192
|
|
|
184
193
|
// If the word is a short conjunction, article, preposition, or is in the never-capitalized list, return true.
|
|
185
194
|
// Otherwise, return false.
|
|
195
|
+
|
|
186
196
|
return (
|
|
187
197
|
TitleCaserUtils.isShortConjunction(word, style) ||
|
|
188
198
|
TitleCaserUtils.isArticle(word, style) ||
|
|
@@ -225,211 +235,106 @@ export class TitleCaserUtils {
|
|
|
225
235
|
return hasUppercase && hasLowercase;
|
|
226
236
|
}
|
|
227
237
|
|
|
228
|
-
// Check if
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
}
|
|
238
|
+
// Check if the entire input string is uppercase
|
|
239
|
+
static isEntirelyUppercase(str) {
|
|
240
|
+
return str === str.toUpperCase() &&
|
|
241
|
+
str !== str.toLowerCase() &&
|
|
242
|
+
str.length > 1;
|
|
243
|
+
}
|
|
235
244
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
"law",
|
|
260
|
-
"act",
|
|
261
|
-
"treaty",
|
|
262
|
-
"court",
|
|
263
|
-
"legislation",
|
|
264
|
-
"statute",
|
|
265
|
-
"bill",
|
|
266
|
-
"agency",
|
|
267
|
-
"department",
|
|
268
|
-
"bureau",
|
|
269
|
-
"service",
|
|
270
|
-
"office",
|
|
271
|
-
"council",
|
|
272
|
-
"commission",
|
|
273
|
-
"division",
|
|
274
|
-
"alliance",
|
|
275
|
-
"union",
|
|
276
|
-
"confederation",
|
|
277
|
-
"bloc",
|
|
278
|
-
"zone",
|
|
279
|
-
"territory",
|
|
280
|
-
"province",
|
|
281
|
-
"state",
|
|
282
|
-
"army",
|
|
283
|
-
"navy",
|
|
284
|
-
"forces",
|
|
285
|
-
"marines",
|
|
286
|
-
"airforce",
|
|
287
|
-
"defense",
|
|
288
|
-
"intelligence",
|
|
289
|
-
"security",
|
|
290
|
-
"economy",
|
|
291
|
-
"budget",
|
|
292
|
-
"finance",
|
|
293
|
-
"treasury",
|
|
294
|
-
"trade",
|
|
295
|
-
"sanctions",
|
|
296
|
-
"aid",
|
|
297
|
-
"strategy",
|
|
298
|
-
"plan",
|
|
299
|
-
"policy",
|
|
300
|
-
"program",
|
|
301
|
-
"initiative",
|
|
302
|
-
"project",
|
|
303
|
-
"reform",
|
|
304
|
-
"relations",
|
|
305
|
-
"ambassador",
|
|
306
|
-
"diplomacy",
|
|
307
|
-
"summit",
|
|
308
|
-
"conference",
|
|
309
|
-
"talks",
|
|
310
|
-
"negotiations",
|
|
311
|
-
]);
|
|
312
|
-
|
|
313
|
-
const removePunctuation = (word) => word.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
|
|
314
|
-
|
|
315
|
-
// Remove trailing punctuation from the word
|
|
316
|
-
const removeTrailingPunctuation = (word) => {
|
|
317
|
-
const match = word.match(/^(.*?)([.,\/#!$%\^&\*;:{}=\-_`~()]+)$/);
|
|
318
|
-
if (match && match[1]) {
|
|
319
|
-
return match[1];
|
|
320
|
-
}
|
|
321
|
-
return word;
|
|
322
|
-
};
|
|
323
|
-
|
|
324
|
-
word = word ? removePunctuation(word.toLowerCase()) : "";
|
|
325
|
-
word = removeTrailingPunctuation(word);
|
|
326
|
-
|
|
327
|
-
prevWord = prevWord ? removePunctuation(prevWord.toLowerCase()) : "";
|
|
328
|
-
nextWord = nextWord ? removePunctuation(nextWord.toLowerCase()) : "";
|
|
329
|
-
|
|
330
|
-
// Check if it's an acronym with direct following indicators
|
|
331
|
-
const isDirectAcronym =
|
|
332
|
-
countryCodes.has(word) &&
|
|
333
|
-
(!prevWord || commonShortWords.has(prevWord)) &&
|
|
334
|
-
(!nextWord || directFollowingIndicators.has(nextWord));
|
|
335
|
-
|
|
336
|
-
// Check if it's an acronym based on the previous word
|
|
337
|
-
const isPreviousAcronym = countryCodes.has(prevWord) && (!nextWord || directFollowingIndicators.has(nextWord));
|
|
338
|
-
|
|
339
|
-
return isDirectAcronym || isPreviousAcronym;
|
|
340
|
-
} catch (error) {
|
|
341
|
-
console.error(`An error occurred: ${error.message}`);
|
|
342
|
-
return false; // Return false in case of errors to indicate failure.
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
static checkIfWordIsAcronym(commonShortWords, prevWord, currentWord, nextWord) {
|
|
347
|
-
const countryCodes = ["us", "usa"];
|
|
348
|
-
const directPrecedingIndicators = ["the", "in", "to", "from", "against", "with", "within", "towards", "into", "at"];
|
|
349
|
-
const directFollowingIndicators = [
|
|
350
|
-
"policies",
|
|
351
|
-
"government",
|
|
352
|
-
"military",
|
|
353
|
-
"embassy",
|
|
354
|
-
"administration",
|
|
355
|
-
"senate",
|
|
356
|
-
"congress",
|
|
357
|
-
"parliament",
|
|
358
|
-
"cabinet",
|
|
359
|
-
"federation",
|
|
360
|
-
"republic",
|
|
361
|
-
"democracy",
|
|
362
|
-
"law",
|
|
363
|
-
"act",
|
|
364
|
-
"treaty",
|
|
365
|
-
"court",
|
|
366
|
-
"legislation",
|
|
367
|
-
"statute",
|
|
368
|
-
"bill",
|
|
369
|
-
"agency",
|
|
370
|
-
"department",
|
|
371
|
-
"bureau",
|
|
372
|
-
"service",
|
|
373
|
-
"office",
|
|
374
|
-
"council",
|
|
375
|
-
"commission",
|
|
376
|
-
"division",
|
|
377
|
-
"alliance",
|
|
378
|
-
"union",
|
|
379
|
-
"confederation",
|
|
380
|
-
"bloc",
|
|
381
|
-
"zone",
|
|
382
|
-
"territory",
|
|
383
|
-
"province",
|
|
384
|
-
"state",
|
|
385
|
-
"army",
|
|
386
|
-
"navy",
|
|
387
|
-
"forces",
|
|
388
|
-
"marines",
|
|
389
|
-
"airforce",
|
|
390
|
-
"defense",
|
|
391
|
-
"intelligence",
|
|
392
|
-
"security",
|
|
393
|
-
"economy",
|
|
394
|
-
"budget",
|
|
395
|
-
"finance",
|
|
396
|
-
"treasury",
|
|
397
|
-
"trade",
|
|
398
|
-
"sanctions",
|
|
399
|
-
"aid",
|
|
400
|
-
"strategy",
|
|
401
|
-
"plan",
|
|
402
|
-
"policy",
|
|
403
|
-
"program",
|
|
404
|
-
"initiative",
|
|
405
|
-
"project",
|
|
406
|
-
"reform",
|
|
407
|
-
"relations",
|
|
408
|
-
"ambassador",
|
|
409
|
-
"diplomacy",
|
|
410
|
-
"summit",
|
|
411
|
-
"conference",
|
|
412
|
-
"talks",
|
|
413
|
-
"negotiations",
|
|
245
|
+
static isRegionalAcronym(word) {
|
|
246
|
+
if (typeof word !== "string") {
|
|
247
|
+
throw new TypeError("Invalid input: word must be a string.");
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (word.length < 2) {
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const lowercasedWord = word.toLowerCase();
|
|
255
|
+
return regionalAcronymList.includes(lowercasedWord);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
|
|
259
|
+
if (typeof word !== 'string' || typeof nextWord !== 'string') {
|
|
260
|
+
return false;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
|
|
264
|
+
const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
|
|
265
|
+
|
|
266
|
+
const smallDirectPrecedingIndicators = [
|
|
267
|
+
"the",
|
|
414
268
|
];
|
|
415
269
|
|
|
416
|
-
|
|
270
|
+
if (prevWord &&
|
|
271
|
+
regionalAcronymList.includes(firstWordStripped) &&
|
|
272
|
+
smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
|
|
273
|
+
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return (
|
|
278
|
+
regionalAcronymList.includes(firstWordStripped) &&
|
|
279
|
+
directFollowingIndicatorsRegionalAcronym.includes(nextWordStripped)
|
|
280
|
+
);
|
|
281
|
+
}
|
|
417
282
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
nextWord = nextWord ? removePunctuation(nextWord.toLowerCase()) : "";
|
|
283
|
+
static isFinalWordRegionalAcronym(word, prevWord, prevPrevWord = null) {
|
|
284
|
+
if (typeof word !== "string" || typeof prevWord !== "string") return false;
|
|
421
285
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
286
|
+
const current = word.toLowerCase().replace(/[^\w]/g, "");
|
|
287
|
+
const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
|
|
288
|
+
const prevPrev = typeof prevPrevWord === "string"
|
|
289
|
+
? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
|
|
290
|
+
: null;
|
|
291
|
+
|
|
292
|
+
if (!regionalAcronymList.includes(current)) return false;
|
|
293
|
+
|
|
294
|
+
// Direct 100% safe word before the acronym
|
|
295
|
+
if (regionalAcronymPrecedingWords.includes(prev)) return true;
|
|
296
|
+
|
|
297
|
+
// Extended pattern: e.g., "from the US"
|
|
298
|
+
if (prev === "the" && prevPrev && regionalAcronymPrecedingWords.includes(prevPrev)) {
|
|
427
299
|
return true;
|
|
428
300
|
}
|
|
429
301
|
|
|
430
302
|
return false;
|
|
431
303
|
}
|
|
432
304
|
|
|
305
|
+
static normalizeRegionalAcronym(word) {
|
|
306
|
+
if (typeof word !== "string") {
|
|
307
|
+
throw new TypeError("Invalid input: word must be a string.");
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return word.toUpperCase();
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
static normalizeAcronymKey(word) {
|
|
314
|
+
return word.toLowerCase().replace(/\./g, ""); // "U.S." → "us"
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
static normalizeCasingForWordByStyle(word, style) {
|
|
318
|
+
if (!word || !style || !titleCaseDefaultOptionsList[style]) return false;
|
|
319
|
+
|
|
320
|
+
const lowerWord = word.toLowerCase();
|
|
321
|
+
const {
|
|
322
|
+
shortConjunctionsList,
|
|
323
|
+
articlesList,
|
|
324
|
+
shortPrepositionsList,
|
|
325
|
+
neverCapitalizedList
|
|
326
|
+
} = titleCaseDefaultOptionsList[style];
|
|
327
|
+
|
|
328
|
+
const combinedList = [
|
|
329
|
+
...shortConjunctionsList,
|
|
330
|
+
...articlesList,
|
|
331
|
+
...shortPrepositionsList,
|
|
332
|
+
...neverCapitalizedList
|
|
333
|
+
];
|
|
334
|
+
|
|
335
|
+
return combinedList.includes(lowerWord) ? word : false;
|
|
336
|
+
}
|
|
337
|
+
|
|
433
338
|
// Check if a word has a suffix
|
|
434
339
|
static hasSuffix(word) {
|
|
435
340
|
// Test if word is longer than suffix
|
|
@@ -671,6 +576,57 @@ export class TitleCaserUtils {
|
|
|
671
576
|
return word;
|
|
672
577
|
}
|
|
673
578
|
|
|
579
|
+
// This function is used to check if a word is an elided word
|
|
580
|
+
static isElidedWord(word) {
|
|
581
|
+
if (typeof word !== "string" || word.trim() === "") {
|
|
582
|
+
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
const knownElidedPrefixes = new Set([
|
|
586
|
+
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
587
|
+
]);
|
|
588
|
+
|
|
589
|
+
const normalized = word.trim().toLowerCase().replace(/'/g, "’");
|
|
590
|
+
|
|
591
|
+
for (const prefix of knownElidedPrefixes) {
|
|
592
|
+
if (normalized.startsWith(prefix)) {
|
|
593
|
+
return true;
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return false;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// This function is used to normalize an elided word
|
|
601
|
+
static normalizeElidedWord(word) {
|
|
602
|
+
if (typeof word !== "string" || word.trim() === "") {
|
|
603
|
+
throw new TypeError("Invalid input: word must be a non-empty string.");
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
const knownElidedPrefixes = new Set([
|
|
607
|
+
"o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
|
|
608
|
+
]);
|
|
609
|
+
|
|
610
|
+
const original = word.trim();
|
|
611
|
+
const normalized = original.replace(/'/g, "’").toLowerCase();
|
|
612
|
+
|
|
613
|
+
for (const prefix of knownElidedPrefixes) {
|
|
614
|
+
if (normalized.startsWith(prefix)) {
|
|
615
|
+
const prefixLength = prefix.length;
|
|
616
|
+
const rest = original.slice(prefixLength);
|
|
617
|
+
|
|
618
|
+
const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
|
|
619
|
+
const fixedRest = rest.length > 0
|
|
620
|
+
? rest.charAt(0).toUpperCase() + rest.slice(1)
|
|
621
|
+
: "";
|
|
622
|
+
|
|
623
|
+
return fixedPrefix + fixedRest;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
return false;
|
|
628
|
+
}
|
|
629
|
+
|
|
674
630
|
// This function is used to check if a suffix is present in a word that is in the correct terms list
|
|
675
631
|
static correctSuffix(word, correctTerms) {
|
|
676
632
|
// Validate input
|
|
@@ -731,24 +687,30 @@ export class TitleCaserUtils {
|
|
|
731
687
|
|
|
732
688
|
// Split the word into parts delimited by the specified delimiters
|
|
733
689
|
const parts = word.split(delimiters);
|
|
734
|
-
// Count the number of parts
|
|
735
690
|
const numParts = parts.length;
|
|
736
691
|
|
|
737
|
-
// For each part
|
|
692
|
+
// For each part, replace it with the correct term if found or title-case it if not found
|
|
738
693
|
for (let i = 0; i < numParts; i++) {
|
|
739
|
-
// Lowercase the part
|
|
740
694
|
const lowercasedPart = parts[i].toLowerCase();
|
|
741
|
-
// Search for the part in the list of correct terms
|
|
742
695
|
const index = correctTerms.findIndex((t) => t.toLowerCase() === lowercasedPart);
|
|
743
|
-
// If the part is found in the list of correct terms
|
|
744
696
|
if (index >= 0) {
|
|
745
|
-
// Replace the part with the correct term
|
|
746
697
|
parts[i] = correctTerms[index];
|
|
698
|
+
} else {
|
|
699
|
+
// Capitalize first letter and lowercase the rest if no replacement is found
|
|
700
|
+
parts[i] = parts[i].charAt(0).toUpperCase() + parts[i].slice(1).toLowerCase();
|
|
747
701
|
}
|
|
748
702
|
}
|
|
749
703
|
|
|
750
|
-
//
|
|
751
|
-
|
|
704
|
+
// Determine the joiner based on the original word
|
|
705
|
+
let joiner = delimiters.source.charAt(0);
|
|
706
|
+
if (word.includes("-")) {
|
|
707
|
+
joiner = "-";
|
|
708
|
+
} else if (word.includes("'")) {
|
|
709
|
+
joiner = "'";
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
// Join the parts back together using the determined joiner
|
|
713
|
+
return parts.join(joiner);
|
|
752
714
|
}
|
|
753
715
|
|
|
754
716
|
// This function is used to check if a word is in the correct terms list
|
|
@@ -835,4 +797,6 @@ export class TitleCaserUtils {
|
|
|
835
797
|
// Rejoin the words
|
|
836
798
|
return processedWords.join("-");
|
|
837
799
|
}
|
|
800
|
+
|
|
801
|
+
|
|
838
802
|
}
|