@danielhaim/titlecaser 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,10 @@ import {
4
4
  wordReplacementsList,
5
5
  correctTitleCasingList,
6
6
  ignoredWordList,
7
+ commonShortWords,
8
+ regionalAcronymList,
9
+ regionalAcronymPrecedingWords,
10
+ directFollowingIndicatorsRegionalAcronym
7
11
  } from "./TitleCaserConsts.js";
8
12
 
9
13
  export class TitleCaserUtils {
@@ -113,6 +117,10 @@ export class TitleCaserUtils {
113
117
 
114
118
  static isNeverCapitalizedCache = new Map();
115
119
 
120
+ static capitalizeFirstLetter(word) {
121
+ return word.charAt(0).toUpperCase() + word.slice(1);
122
+ }
123
+
116
124
  // Check if the word is a short conjunction
117
125
  static isShortConjunction(word, style) {
118
126
  // Get the list of short conjunctions from the TitleCaseHelper
@@ -142,6 +150,7 @@ export class TitleCaserUtils {
142
150
  // Check if the word is a short preposition
143
151
  static isShortPreposition(word, style) {
144
152
  // Get the list of short prepositions from the Title Case Helper.
153
+ // CONSOLE LOG THE WORD BEFORE CHECKING IF IT IS IN THE LIST
145
154
  const { shortPrepositionsList } = TitleCaserUtils.getTitleCaseOptions({
146
155
  style: style,
147
156
  });
@@ -183,6 +192,7 @@ export class TitleCaserUtils {
183
192
 
184
193
  // If the word is a short conjunction, article, preposition, or is in the never-capitalized list, return true.
185
194
  // Otherwise, return false.
195
+
186
196
  return (
187
197
  TitleCaserUtils.isShortConjunction(word, style) ||
188
198
  TitleCaserUtils.isArticle(word, style) ||
@@ -225,211 +235,106 @@ export class TitleCaserUtils {
225
235
  return hasUppercase && hasLowercase;
226
236
  }
227
237
 
228
- // Check if a word is an acronym
229
- // (i.e. 'the', 'to', 'within')
230
- static isAcronym(word, prevWord, nextWord) {
231
- try {
232
- if (typeof word !== "string") {
233
- throw new Error("Input word must be a string.");
234
- }
238
+ // Check if the entire input string is uppercase
239
+ static isEntirelyUppercase(str) {
240
+ return str === str.toUpperCase() &&
241
+ str !== str.toLowerCase() &&
242
+ str.length > 1;
243
+ }
235
244
 
236
- const countryCodes = new Set(["us", "usa"]);
237
- const commonShortWords = new Set([
238
- "the",
239
- "in",
240
- "to",
241
- "within",
242
- "towards",
243
- "into",
244
- "at",
245
- ]);
246
- const directFollowingIndicators = new Set([
247
- "policies",
248
- "government",
249
- "military",
250
- "embassy",
251
- "administration",
252
- "senate",
253
- "congress",
254
- "parliament",
255
- "cabinet",
256
- "federation",
257
- "republic",
258
- "democracy",
259
- "law",
260
- "act",
261
- "treaty",
262
- "court",
263
- "legislation",
264
- "statute",
265
- "bill",
266
- "agency",
267
- "department",
268
- "bureau",
269
- "service",
270
- "office",
271
- "council",
272
- "commission",
273
- "division",
274
- "alliance",
275
- "union",
276
- "confederation",
277
- "bloc",
278
- "zone",
279
- "territory",
280
- "province",
281
- "state",
282
- "army",
283
- "navy",
284
- "forces",
285
- "marines",
286
- "airforce",
287
- "defense",
288
- "intelligence",
289
- "security",
290
- "economy",
291
- "budget",
292
- "finance",
293
- "treasury",
294
- "trade",
295
- "sanctions",
296
- "aid",
297
- "strategy",
298
- "plan",
299
- "policy",
300
- "program",
301
- "initiative",
302
- "project",
303
- "reform",
304
- "relations",
305
- "ambassador",
306
- "diplomacy",
307
- "summit",
308
- "conference",
309
- "talks",
310
- "negotiations",
311
- ]);
312
-
313
- const removePunctuation = (word) => word.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
314
-
315
- // Remove trailing punctuation from the word
316
- const removeTrailingPunctuation = (word) => {
317
- const match = word.match(/^(.*?)([.,\/#!$%\^&\*;:{}=\-_`~()]+)$/);
318
- if (match && match[1]) {
319
- return match[1];
320
- }
321
- return word;
322
- };
323
-
324
- word = word ? removePunctuation(word.toLowerCase()) : "";
325
- word = removeTrailingPunctuation(word);
326
-
327
- prevWord = prevWord ? removePunctuation(prevWord.toLowerCase()) : "";
328
- nextWord = nextWord ? removePunctuation(nextWord.toLowerCase()) : "";
329
-
330
- // Check if it's an acronym with direct following indicators
331
- const isDirectAcronym =
332
- countryCodes.has(word) &&
333
- (!prevWord || commonShortWords.has(prevWord)) &&
334
- (!nextWord || directFollowingIndicators.has(nextWord));
335
-
336
- // Check if it's an acronym based on the previous word
337
- const isPreviousAcronym = countryCodes.has(prevWord) && (!nextWord || directFollowingIndicators.has(nextWord));
338
-
339
- return isDirectAcronym || isPreviousAcronym;
340
- } catch (error) {
341
- console.error(`An error occurred: ${error.message}`);
342
- return false; // Return false in case of errors to indicate failure.
343
- }
344
- }
345
-
346
- static checkIfWordIsAcronym(commonShortWords, prevWord, currentWord, nextWord) {
347
- const countryCodes = ["us", "usa"];
348
- const directPrecedingIndicators = ["the", "in", "to", "from", "against", "with", "within", "towards", "into", "at"];
349
- const directFollowingIndicators = [
350
- "policies",
351
- "government",
352
- "military",
353
- "embassy",
354
- "administration",
355
- "senate",
356
- "congress",
357
- "parliament",
358
- "cabinet",
359
- "federation",
360
- "republic",
361
- "democracy",
362
- "law",
363
- "act",
364
- "treaty",
365
- "court",
366
- "legislation",
367
- "statute",
368
- "bill",
369
- "agency",
370
- "department",
371
- "bureau",
372
- "service",
373
- "office",
374
- "council",
375
- "commission",
376
- "division",
377
- "alliance",
378
- "union",
379
- "confederation",
380
- "bloc",
381
- "zone",
382
- "territory",
383
- "province",
384
- "state",
385
- "army",
386
- "navy",
387
- "forces",
388
- "marines",
389
- "airforce",
390
- "defense",
391
- "intelligence",
392
- "security",
393
- "economy",
394
- "budget",
395
- "finance",
396
- "treasury",
397
- "trade",
398
- "sanctions",
399
- "aid",
400
- "strategy",
401
- "plan",
402
- "policy",
403
- "program",
404
- "initiative",
405
- "project",
406
- "reform",
407
- "relations",
408
- "ambassador",
409
- "diplomacy",
410
- "summit",
411
- "conference",
412
- "talks",
413
- "negotiations",
245
+ static isRegionalAcronym(word) {
246
+ if (typeof word !== "string") {
247
+ throw new TypeError("Invalid input: word must be a string.");
248
+ }
249
+
250
+ if (word.length < 2) {
251
+ return false;
252
+ }
253
+
254
+ const lowercasedWord = word.toLowerCase();
255
+ return regionalAcronymList.includes(lowercasedWord);
256
+ }
257
+
258
+ static isRegionalAcronymNoDot(word, nextWord, prevWord = null) {
259
+ if (typeof word !== 'string' || typeof nextWord !== 'string') {
260
+ return false;
261
+ }
262
+
263
+ const firstWordStripped = word.toLowerCase().replace(/[^\w\s]/g, "");
264
+ const nextWordStripped = nextWord.toLowerCase().replace(/[^\w\s]/g, "");
265
+
266
+ const smallDirectPrecedingIndicators = [
267
+ "the",
414
268
  ];
415
269
 
416
- const removePunctuation = (word) => word.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, "");
270
+ if (prevWord &&
271
+ regionalAcronymList.includes(firstWordStripped) &&
272
+ smallDirectPrecedingIndicators.includes(prevWord.toLowerCase())) {
273
+
274
+ return true;
275
+ }
276
+
277
+ return (
278
+ regionalAcronymList.includes(firstWordStripped) &&
279
+ directFollowingIndicatorsRegionalAcronym.includes(nextWordStripped)
280
+ );
281
+ }
417
282
 
418
- currentWord = currentWord ? removePunctuation(currentWord.toLowerCase()) : "";
419
- prevWord = prevWord ? removePunctuation(prevWord.toLowerCase()) : "";
420
- nextWord = nextWord ? removePunctuation(nextWord.toLowerCase()) : "";
283
+ static isFinalWordRegionalAcronym(word, prevWord, prevPrevWord = null) {
284
+ if (typeof word !== "string" || typeof prevWord !== "string") return false;
421
285
 
422
- if (
423
- countryCodes.includes(currentWord.toLowerCase()) &&
424
- (prevWord === null || commonShortWords.includes(prevWord.toLowerCase())) &&
425
- (nextWord === null || directFollowingIndicators.includes(nextWord.toLowerCase()))
426
- ) {
286
+ const current = word.toLowerCase().replace(/[^\w]/g, "");
287
+ const prev = prevWord.toLowerCase().replace(/[^\w]/g, "");
288
+ const prevPrev = typeof prevPrevWord === "string"
289
+ ? prevPrevWord.toLowerCase().replace(/[^\w]/g, "")
290
+ : null;
291
+
292
+ if (!regionalAcronymList.includes(current)) return false;
293
+
294
+ // Direct 100% safe word before the acronym
295
+ if (regionalAcronymPrecedingWords.includes(prev)) return true;
296
+
297
+ // Extended pattern: e.g., "from the US"
298
+ if (prev === "the" && prevPrev && regionalAcronymPrecedingWords.includes(prevPrev)) {
427
299
  return true;
428
300
  }
429
301
 
430
302
  return false;
431
303
  }
432
304
 
305
+ static normalizeRegionalAcronym(word) {
306
+ if (typeof word !== "string") {
307
+ throw new TypeError("Invalid input: word must be a string.");
308
+ }
309
+
310
+ return word.toUpperCase();
311
+ }
312
+
313
+ static normalizeAcronymKey(word) {
314
+ return word.toLowerCase().replace(/\./g, ""); // "U.S." → "us"
315
+ }
316
+
317
+ static normalizeCasingForWordByStyle(word, style) {
318
+ if (!word || !style || !titleCaseDefaultOptionsList[style]) return false;
319
+
320
+ const lowerWord = word.toLowerCase();
321
+ const {
322
+ shortConjunctionsList,
323
+ articlesList,
324
+ shortPrepositionsList,
325
+ neverCapitalizedList
326
+ } = titleCaseDefaultOptionsList[style];
327
+
328
+ const combinedList = [
329
+ ...shortConjunctionsList,
330
+ ...articlesList,
331
+ ...shortPrepositionsList,
332
+ ...neverCapitalizedList
333
+ ];
334
+
335
+ return combinedList.includes(lowerWord) ? word : false;
336
+ }
337
+
433
338
  // Check if a word has a suffix
434
339
  static hasSuffix(word) {
435
340
  // Test if word is longer than suffix
@@ -671,6 +576,57 @@ export class TitleCaserUtils {
671
576
  return word;
672
577
  }
673
578
 
579
+ // This function is used to check if a word is an elided word
580
+ static isElidedWord(word) {
581
+ if (typeof word !== "string" || word.trim() === "") {
582
+ throw new TypeError("Invalid input: word must be a non-empty string.");
583
+ }
584
+
585
+ const knownElidedPrefixes = new Set([
586
+ "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
587
+ ]);
588
+
589
+ const normalized = word.trim().toLowerCase().replace(/'/g, "’");
590
+
591
+ for (const prefix of knownElidedPrefixes) {
592
+ if (normalized.startsWith(prefix)) {
593
+ return true;
594
+ }
595
+ }
596
+
597
+ return false;
598
+ }
599
+
600
+ // This function is used to normalize an elided word
601
+ static normalizeElidedWord(word) {
602
+ if (typeof word !== "string" || word.trim() === "") {
603
+ throw new TypeError("Invalid input: word must be a non-empty string.");
604
+ }
605
+
606
+ const knownElidedPrefixes = new Set([
607
+ "o’", "fo’", "ne’er", "e’er", "’tis", "’twas", "’n’"
608
+ ]);
609
+
610
+ const original = word.trim();
611
+ const normalized = original.replace(/'/g, "’").toLowerCase();
612
+
613
+ for (const prefix of knownElidedPrefixes) {
614
+ if (normalized.startsWith(prefix)) {
615
+ const prefixLength = prefix.length;
616
+ const rest = original.slice(prefixLength);
617
+
618
+ const fixedPrefix = prefix.charAt(0).toUpperCase() + prefix.slice(1);
619
+ const fixedRest = rest.length > 0
620
+ ? rest.charAt(0).toUpperCase() + rest.slice(1)
621
+ : "";
622
+
623
+ return fixedPrefix + fixedRest;
624
+ }
625
+ }
626
+
627
+ return false;
628
+ }
629
+
674
630
  // This function is used to check if a suffix is present in a word that is in the correct terms list
675
631
  static correctSuffix(word, correctTerms) {
676
632
  // Validate input
@@ -731,24 +687,30 @@ export class TitleCaserUtils {
731
687
 
732
688
  // Split the word into parts delimited by the specified delimiters
733
689
  const parts = word.split(delimiters);
734
- // Count the number of parts
735
690
  const numParts = parts.length;
736
691
 
737
- // For each part
692
+ // For each part, replace it with the correct term if found or title-case it if not found
738
693
  for (let i = 0; i < numParts; i++) {
739
- // Lowercase the part
740
694
  const lowercasedPart = parts[i].toLowerCase();
741
- // Search for the part in the list of correct terms
742
695
  const index = correctTerms.findIndex((t) => t.toLowerCase() === lowercasedPart);
743
- // If the part is found in the list of correct terms
744
696
  if (index >= 0) {
745
- // Replace the part with the correct term
746
697
  parts[i] = correctTerms[index];
698
+ } else {
699
+ // Capitalize first letter and lowercase the rest if no replacement is found
700
+ parts[i] = parts[i].charAt(0).toUpperCase() + parts[i].slice(1).toLowerCase();
747
701
  }
748
702
  }
749
703
 
750
- // Join the parts back together using the first delimiter as the default delimiter
751
- return parts.join(delimiters.source.charAt(0));
704
+ // Determine the joiner based on the original word
705
+ let joiner = delimiters.source.charAt(0);
706
+ if (word.includes("-")) {
707
+ joiner = "-";
708
+ } else if (word.includes("'")) {
709
+ joiner = "'";
710
+ }
711
+
712
+ // Join the parts back together using the determined joiner
713
+ return parts.join(joiner);
752
714
  }
753
715
 
754
716
  // This function is used to check if a word is in the correct terms list
@@ -835,4 +797,6 @@ export class TitleCaserUtils {
835
797
  // Rejoin the words
836
798
  return processedWords.join("-");
837
799
  }
800
+
801
+
838
802
  }