cmpstr 3.0.1 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +7 -1
  2. package/dist/CmpStr.esm.js +257 -25
  3. package/dist/CmpStr.esm.js.map +1 -1
  4. package/dist/CmpStr.esm.min.js +2 -2
  5. package/dist/CmpStr.esm.min.js.map +1 -1
  6. package/dist/CmpStr.umd.js +257 -25
  7. package/dist/CmpStr.umd.js.map +1 -1
  8. package/dist/CmpStr.umd.min.js +2 -2
  9. package/dist/CmpStr.umd.min.js.map +1 -1
  10. package/dist/cjs/CmpStr.cjs +12 -1
  11. package/dist/cjs/CmpStr.cjs.map +1 -1
  12. package/dist/cjs/CmpStrAsync.cjs +11 -1
  13. package/dist/cjs/CmpStrAsync.cjs.map +1 -1
  14. package/dist/cjs/index.cjs +1 -1
  15. package/dist/cjs/metric/Cosine.cjs +1 -1
  16. package/dist/cjs/metric/Cosine.cjs.map +1 -1
  17. package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
  18. package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
  19. package/dist/cjs/metric/DiceSorensen.cjs +1 -1
  20. package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
  21. package/dist/cjs/metric/Hamming.cjs +2 -2
  22. package/dist/cjs/metric/Hamming.cjs.map +1 -1
  23. package/dist/cjs/metric/Jaccard.cjs +1 -1
  24. package/dist/cjs/metric/Jaccard.cjs.map +1 -1
  25. package/dist/cjs/metric/JaroWinkler.cjs +1 -1
  26. package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
  27. package/dist/cjs/metric/LCS.cjs +1 -1
  28. package/dist/cjs/metric/LCS.cjs.map +1 -1
  29. package/dist/cjs/metric/Levenshtein.cjs +1 -1
  30. package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
  31. package/dist/cjs/metric/Metric.cjs +6 -6
  32. package/dist/cjs/metric/Metric.cjs.map +1 -1
  33. package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
  34. package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
  35. package/dist/cjs/metric/SmithWaterman.cjs +1 -1
  36. package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
  37. package/dist/cjs/metric/qGram.cjs +1 -1
  38. package/dist/cjs/metric/qGram.cjs.map +1 -1
  39. package/dist/cjs/phonetic/Caverphone.cjs +199 -0
  40. package/dist/cjs/phonetic/Caverphone.cjs.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.cjs +1 -1
  42. package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
  43. package/dist/cjs/phonetic/Metaphone.cjs +1 -1
  44. package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
  45. package/dist/cjs/phonetic/Phonetic.cjs +50 -16
  46. package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
  47. package/dist/cjs/phonetic/Soundex.cjs +1 -1
  48. package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
  49. package/dist/cjs/utils/DeepMerge.cjs +2 -2
  50. package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
  51. package/dist/cjs/utils/DiffChecker.cjs +10 -10
  52. package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
  53. package/dist/cjs/utils/Filter.cjs +1 -1
  54. package/dist/cjs/utils/Filter.cjs.map +1 -1
  55. package/dist/cjs/utils/HashTable.cjs +1 -1
  56. package/dist/cjs/utils/HashTable.cjs.map +1 -1
  57. package/dist/cjs/utils/Normalizer.cjs +1 -1
  58. package/dist/cjs/utils/Normalizer.cjs.map +1 -1
  59. package/dist/cjs/utils/Pool.cjs +1 -1
  60. package/dist/cjs/utils/Pool.cjs.map +1 -1
  61. package/dist/cjs/utils/Profiler.cjs +3 -3
  62. package/dist/cjs/utils/Profiler.cjs.map +1 -1
  63. package/dist/cjs/utils/Registry.cjs +7 -7
  64. package/dist/cjs/utils/Registry.cjs.map +1 -1
  65. package/dist/cjs/utils/TextAnalyzer.cjs +2 -2
  66. package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
  67. package/dist/esm/{CmpStr.js → CmpStr.mjs} +36 -25
  68. package/dist/esm/CmpStr.mjs.map +1 -0
  69. package/dist/esm/{CmpStrAsync.js → CmpStrAsync.mjs} +16 -6
  70. package/dist/esm/CmpStrAsync.mjs.map +1 -0
  71. package/dist/esm/index.mjs +7 -0
  72. package/dist/esm/index.mjs.map +1 -0
  73. package/dist/esm/metric/{Cosine.js → Cosine.mjs} +4 -4
  74. package/dist/esm/metric/Cosine.mjs.map +1 -0
  75. package/dist/esm/metric/{DamerauLevenshtein.js → DamerauLevenshtein.mjs} +4 -4
  76. package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -0
  77. package/dist/esm/metric/{DiceSorensen.js → DiceSorensen.mjs} +4 -4
  78. package/dist/esm/metric/DiceSorensen.mjs.map +1 -0
  79. package/dist/esm/metric/{Hamming.js → Hamming.mjs} +4 -4
  80. package/dist/esm/metric/Hamming.mjs.map +1 -0
  81. package/dist/esm/metric/{Jaccard.js → Jaccard.mjs} +4 -4
  82. package/dist/esm/metric/Jaccard.mjs.map +1 -0
  83. package/dist/esm/metric/{JaroWinkler.js → JaroWinkler.mjs} +4 -4
  84. package/dist/esm/metric/JaroWinkler.mjs.map +1 -0
  85. package/dist/esm/metric/{LCS.js → LCS.mjs} +4 -4
  86. package/dist/esm/metric/LCS.mjs.map +1 -0
  87. package/dist/esm/metric/{Levenshtein.js → Levenshtein.mjs} +4 -4
  88. package/dist/esm/metric/Levenshtein.mjs.map +1 -0
  89. package/dist/esm/metric/{Metric.js → Metric.mjs} +10 -10
  90. package/dist/esm/metric/Metric.mjs.map +1 -0
  91. package/dist/esm/metric/{NeedlemanWunsch.js → NeedlemanWunsch.mjs} +4 -4
  92. package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -0
  93. package/dist/esm/metric/{SmithWaterman.js → SmithWaterman.mjs} +4 -4
  94. package/dist/esm/metric/SmithWaterman.mjs.map +1 -0
  95. package/dist/esm/metric/{qGram.js → qGram.mjs} +4 -4
  96. package/dist/esm/metric/qGram.mjs.map +1 -0
  97. package/dist/esm/phonetic/Caverphone.mjs +201 -0
  98. package/dist/esm/phonetic/Caverphone.mjs.map +1 -0
  99. package/dist/esm/phonetic/{Cologne.js → Cologne.mjs} +3 -3
  100. package/dist/esm/phonetic/Cologne.mjs.map +1 -0
  101. package/dist/esm/phonetic/{Metaphone.js → Metaphone.mjs} +3 -3
  102. package/dist/esm/phonetic/Metaphone.mjs.map +1 -0
  103. package/dist/esm/phonetic/{Phonetic.js → Phonetic.mjs} +52 -21
  104. package/dist/esm/phonetic/Phonetic.mjs.map +1 -0
  105. package/dist/esm/phonetic/{Soundex.js → Soundex.mjs} +3 -3
  106. package/dist/esm/phonetic/Soundex.mjs.map +1 -0
  107. package/dist/esm/utils/{DeepMerge.js → DeepMerge.mjs} +3 -3
  108. package/dist/esm/utils/DeepMerge.mjs.map +1 -0
  109. package/dist/esm/utils/{DiffChecker.js → DiffChecker.mjs} +11 -11
  110. package/dist/esm/utils/DiffChecker.mjs.map +1 -0
  111. package/dist/esm/utils/{Filter.js → Filter.mjs} +2 -2
  112. package/dist/esm/utils/Filter.mjs.map +1 -0
  113. package/dist/esm/utils/{HashTable.js → HashTable.mjs} +2 -2
  114. package/dist/esm/utils/HashTable.mjs.map +1 -0
  115. package/dist/esm/utils/{Normalizer.js → Normalizer.mjs} +3 -3
  116. package/dist/esm/utils/Normalizer.mjs.map +1 -0
  117. package/dist/esm/utils/{Pool.js → Pool.mjs} +2 -2
  118. package/dist/esm/utils/Pool.mjs.map +1 -0
  119. package/dist/esm/utils/{Profiler.js → Profiler.mjs} +4 -4
  120. package/dist/esm/utils/Profiler.mjs.map +1 -0
  121. package/dist/esm/utils/{Registry.js → Registry.mjs} +8 -8
  122. package/dist/esm/utils/Registry.mjs.map +1 -0
  123. package/dist/esm/utils/{TextAnalyzer.js → TextAnalyzer.mjs} +3 -3
  124. package/dist/esm/utils/TextAnalyzer.mjs.map +1 -0
  125. package/dist/types/index.d.ts +3 -2
  126. package/dist/types/phonetic/Caverphone.d.ts +55 -0
  127. package/dist/types/phonetic/Phonetic.d.ts +14 -2
  128. package/dist/types/phonetic/index.d.ts +1 -0
  129. package/dist/types/utils/Types.d.ts +12 -0
  130. package/package.json +15 -13
  131. package/dist/esm/CmpStr.js.map +0 -1
  132. package/dist/esm/CmpStrAsync.js.map +0 -1
  133. package/dist/esm/index.js +0 -7
  134. package/dist/esm/index.js.map +0 -1
  135. package/dist/esm/metric/Cosine.js.map +0 -1
  136. package/dist/esm/metric/DamerauLevenshtein.js.map +0 -1
  137. package/dist/esm/metric/DiceSorensen.js.map +0 -1
  138. package/dist/esm/metric/Hamming.js.map +0 -1
  139. package/dist/esm/metric/Jaccard.js.map +0 -1
  140. package/dist/esm/metric/JaroWinkler.js.map +0 -1
  141. package/dist/esm/metric/LCS.js.map +0 -1
  142. package/dist/esm/metric/Levenshtein.js.map +0 -1
  143. package/dist/esm/metric/Metric.js.map +0 -1
  144. package/dist/esm/metric/NeedlemanWunsch.js.map +0 -1
  145. package/dist/esm/metric/SmithWaterman.js.map +0 -1
  146. package/dist/esm/metric/qGram.js.map +0 -1
  147. package/dist/esm/phonetic/Cologne.js.map +0 -1
  148. package/dist/esm/phonetic/Metaphone.js.map +0 -1
  149. package/dist/esm/phonetic/Phonetic.js.map +0 -1
  150. package/dist/esm/phonetic/Soundex.js.map +0 -1
  151. package/dist/esm/utils/DeepMerge.js.map +0 -1
  152. package/dist/esm/utils/DiffChecker.js.map +0 -1
  153. package/dist/esm/utils/Filter.js.map +0 -1
  154. package/dist/esm/utils/HashTable.js.map +0 -1
  155. package/dist/esm/utils/Normalizer.js.map +0 -1
  156. package/dist/esm/utils/Pool.js.map +0 -1
  157. package/dist/esm/utils/Profiler.js.map +0 -1
  158. package/dist/esm/utils/Registry.js.map +0 -1
  159. package/dist/esm/utils/TextAnalyzer.js.map +0 -1
package/README.md CHANGED
@@ -68,8 +68,14 @@ console.log( result );
68
68
  // [ 'Meyer', 'Meier' ]
69
69
  ```
70
70
 
71
+ _Try with [OneCompiler](https://onecompiler.com/nodejs/43qr6trny)._
72
+
73
+ ## CLI Tool
74
+
75
+ Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.com/package/cmpstr-cli)** package and use many features of CmpStr directly on the console via the cmpstr command. Many options and parameters also make the command suitable for scripts and automatic processing.
76
+
71
77
  ## Documentation
72
78
 
73
79
  The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
74
80
 
75
- **LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
81
+ **LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
@@ -1,5 +1,5 @@
1
1
  /**
2
- * CmpStr v3.0.1 dev-052fa0c-250614
2
+ * CmpStr v3.0.3 build-462b952-250813
3
3
  * This is a lightweight, fast and well performing library for calculating string similarity.
4
4
  * (c) 2023-2025 Paul Köhler @komed3 / MIT License
5
5
  * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
@@ -61,7 +61,7 @@ function set(t, path, value) {
61
61
  const [k, ...r] = parse(path);
62
62
  // Throw an error if the key is not a valid identifier
63
63
  if (t !== undefined && (typeof t !== 'object' || t === null))
64
- throw Error(`cannot set property <${k}> of <${JSON.stringify(t)}>`);
64
+ throw Error(`Cannot set property <${k}> of <${JSON.stringify(t)}>`);
65
65
  // Assign the value to the specified key in the object
66
66
  return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
67
67
  [k]: set(t?.[k], r.join('.'), value)
@@ -1680,7 +1680,7 @@ const factory = Object.create(null);
1680
1680
  function Registry(reg, ctor) {
1681
1681
  // Throws an error if the registry already exists
1682
1682
  if (reg in registry || reg in factory)
1683
- throw new Error(`registry <${reg}> already exists / overwriting is forbidden`);
1683
+ throw new Error(`Registry <${reg}> already exists / overwriting is forbidden`);
1684
1684
  // Create a registry object to hold class constructors
1685
1685
  const classes = Object.create(null);
1686
1686
  const service = {
@@ -1695,9 +1695,9 @@ function Registry(reg, ctor) {
1695
1695
  */
1696
1696
  add(name, cls, update = false) {
1697
1697
  if (!(cls.prototype instanceof ctor))
1698
- throw new TypeError(`class must extend <${reg}>`);
1698
+ throw new TypeError(`Class must extend <${reg}>`);
1699
1699
  if (!update && name in classes)
1700
- throw new Error(`entry <${name}> already exists / use <update=true> to overwrite`);
1700
+ throw new Error(`Entry <${name}> already exists / use <update=true> to overwrite`);
1701
1701
  classes[name] = cls;
1702
1702
  },
1703
1703
  /**
@@ -1728,7 +1728,7 @@ function Registry(reg, ctor) {
1728
1728
  */
1729
1729
  get(name) {
1730
1730
  if (!(name in classes))
1731
- throw new Error(`class <${name}> not registered for <${reg}>`);
1731
+ throw new Error(`Class <${name}> not registered for <${reg}>`);
1732
1732
  return classes[name];
1733
1733
  }
1734
1734
  };
@@ -1749,7 +1749,7 @@ function Registry(reg, ctor) {
1749
1749
  */
1750
1750
  function resolveCls(reg, cls) {
1751
1751
  if (!(reg in registry))
1752
- throw new ReferenceError(`registry <${reg}> does not exist`);
1752
+ throw new ReferenceError(`Registry <${reg}> does not exist`);
1753
1753
  return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
1754
1754
  }
1755
1755
  /**
@@ -1767,7 +1767,9 @@ function createFromRegistry(reg, cls, ...args) {
1767
1767
  return new cls(...args);
1768
1768
  }
1769
1769
  catch (err) {
1770
- throw new Error(`cannot instantiate class <${cls}>`);
1770
+ throw new Error(`Cannot instantiate class <${cls}>`, {
1771
+ cause: err
1772
+ });
1771
1773
  }
1772
1774
  }
1773
1775
 
@@ -1865,7 +1867,7 @@ class Metric {
1865
1867
  this.b = Array.isArray(b) ? b : [b];
1866
1868
  // Validate inputs: ensure they are not empty
1867
1869
  if (this.a.length === 0 || this.b.length === 0)
1868
- throw new Error(`inputs <a> and <b> must not be empty`);
1870
+ throw new Error(`Inputs <a> and <b> must not be empty`);
1869
1871
  // Set options
1870
1872
  this.options = opt;
1871
1873
  this.symmetric = symmetric;
@@ -1902,7 +1904,7 @@ class Metric {
1902
1904
  * @throws {Error} - If not overridden in a subclass
1903
1905
  */
1904
1906
  compute(a, b, m, n, maxLen) {
1905
- throw new Error(`method compute() must be overridden in a subclass`);
1907
+ throw new Error(`Method compute() must be overridden in a subclass`);
1906
1908
  }
1907
1909
  /**
1908
1910
  * Run the metric computation for single inputs (two strings).
@@ -2059,7 +2061,7 @@ class Metric {
2059
2061
  */
2060
2062
  isPairwise(safe = false) {
2061
2063
  return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
2062
- throw new Error(`mode <pairwise> requires arrays of equal length`);
2064
+ throw new Error(`Mode <pairwise> requires arrays of equal length`);
2063
2065
  })();
2064
2066
  }
2065
2067
  /**
@@ -2120,7 +2122,7 @@ class Metric {
2120
2122
  this.runPairwise();
2121
2123
  break;
2122
2124
  // Unsupported mode
2123
- default: throw new Error(`unsupported mode <${mode}>`);
2125
+ default: throw new Error(`Unsupported mode <${mode}>`);
2124
2126
  }
2125
2127
  }
2126
2128
  /**
@@ -2155,7 +2157,7 @@ class Metric {
2155
2157
  await this.runPairwiseAsync();
2156
2158
  break;
2157
2159
  // Unsupported mode
2158
- default: throw new Error(`unsupported async mode <${mode}>`);
2160
+ default: throw new Error(`Unsupported async mode <${mode}>`);
2159
2161
  }
2160
2162
  }
2161
2163
  /**
@@ -2698,7 +2700,7 @@ class HammingDistance extends Metric {
2698
2700
  }
2699
2701
  // Standard: Error for unequal length
2700
2702
  else
2701
- throw new Error(`strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
2703
+ throw new Error(`Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
2702
2704
  `use option.pad for automatic adjustment`);
2703
2705
  }
2704
2706
  // Calculate the Hamming distance
@@ -3347,8 +3349,8 @@ MetricRegistry.add('smithWaterman', SmithWatermanDistance);
3347
3349
  * pose a risk of infringing upon existing trademarks due to their pronunciation.
3348
3350
  *
3349
3351
  * This module provides an abstract class for generating phonetic indices based
3350
- * on mappings and rules. It allows for the implementation of various phonetic
3351
- * algorithms by extending the abstract class.
3352
+ * on mappings, patterns and rules. It allows for the implementation of various
3353
+ * phonetic algorithms by extending the abstract class.
3352
3354
  *
3353
3355
  * @module Phonetic
3354
3356
  * @author Paul Köhler (komed3)
@@ -3388,22 +3390,55 @@ class Phonetic {
3388
3390
  * Constructor for the Phonetic class.
3389
3391
  *
3390
3392
  * Initializes the phonetic algorithm with the specified options and mapping.
3393
+ * Options hierarchy: User input > mapping options > default
3391
3394
  *
3392
3395
  * @param {string} algo - The name of the algorithm (e.g. 'soundex')
3393
3396
  * @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
3394
3397
  * @throws {Error} - If the requested mapping is not declared
3395
3398
  */
3396
3399
  constructor(algo, opt = {}) {
3397
- // Set the options by merging the default options with the provided ones
3398
- this.options = merge(this.constructor.default ?? {}, opt);
3399
- // Get the mapping based on the provided options
3400
- const map = PhoneticMappingRegistry.get(algo, this.options.map);
3400
+ // Get the phonetic default options
3401
+ const defaults = this.constructor.default ?? {};
3402
+ // Determine phonetic map ID from options or use defaults
3403
+ const mapId = opt.map ?? defaults.map;
3404
+ // If no algorithm is specified, throw an error
3405
+ if (!mapId)
3406
+ throw new Error(`No mapping specified for phonetic algorithm`);
3407
+ // Get the mapping based on the determined map ID
3408
+ const map = PhoneticMappingRegistry.get(algo, mapId);
3401
3409
  // If the mapping is not defined, throw an error
3402
3410
  if (map === undefined)
3403
- throw new Error(`requested mapping <${this.options.map}> is not declared`);
3411
+ throw new Error(`Requested mapping <${mapId}> is not declared`);
3412
+ // Set the options by merging the default options with the provided ones
3413
+ this.options = merge(merge(defaults, map.options ?? {}), opt);
3414
+ // Set the algorithm name and mapping
3404
3415
  this.algo = algo;
3405
3416
  this.map = map;
3406
3417
  }
3418
+ /**
3419
+ * Applies patterns to a word based on the phonetic map.
3420
+ *
3421
+ * This method processes the word by applying all defined patterns in the
3422
+ * phonetic map. It replaces occurrences of specified patterns with their
3423
+ * corresponding replacements.
3424
+ *
3425
+ * @param {string} word - The input word to be processed
3426
+ * @returns {string} - The modified word after applying all patterns
3427
+ */
3428
+ applyPattern(word) {
3429
+ const { patterns = [] } = this.map;
3430
+ // If no patterns are provided, return the input
3431
+ if (!patterns || !patterns.length)
3432
+ return word;
3433
+ // Iterate over the patterns and replace all matches
3434
+ for (const { pattern, replace, all = false } of patterns) {
3435
+ // Search for the pattern in the word and replace it
3436
+ // Use replaceAll if 'all' is true, otherwise use replace
3437
+ word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
3438
+ }
3439
+ // Return the modified word after applying all patterns
3440
+ return word;
3441
+ }
3407
3442
  /**
3408
3443
  * Applies phonetic rules to a character in a word context.
3409
3444
  *
@@ -3481,6 +3516,9 @@ class Phonetic {
3481
3516
  */
3482
3517
  encode(word) {
3483
3518
  const { map = {}, ignore = [] } = this.map;
3519
+ // Apply patterns to the word before processing
3520
+ // This allows for pre-processing of the word based on defined patterns
3521
+ word = this.applyPattern(word);
3484
3522
  // Get the characters of the word and its length
3485
3523
  const chars = this.word2Chars(word);
3486
3524
  const charLen = chars.length;
@@ -3517,11 +3555,11 @@ class Phonetic {
3517
3555
  * @returns {string|undefined} - The phonetic code or undefined if no code applies
3518
3556
  */
3519
3557
  mapChar(char, i, chars, charLen, lastCode, map) {
3520
- const { dedupe = true } = this.options;
3558
+ const { dedupe = true, fallback = undefined } = this.options;
3521
3559
  // Apply phonetic rules to the character
3522
3560
  // If no rules apply, use the mapping
3523
- // If the character is not in the mapping, return undefined
3524
- const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? undefined;
3561
+ // If the character is not in the mapping, return the fallback
3562
+ const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? fallback;
3525
3563
  // De-duplicate the code if necessary
3526
3564
  return dedupe && c === lastCode ? undefined : c;
3527
3565
  }
@@ -3674,7 +3712,7 @@ const PhoneticMappingRegistry = (() => {
3674
3712
  add(algo, id, map, update = false) {
3675
3713
  const mappings = maps(algo);
3676
3714
  if (!update && id in mappings)
3677
- throw new Error(`entry <${id}> already exists / use <update=true> to overwrite`);
3715
+ throw new Error(`Entry <${id}> already exists / use <update=true> to overwrite`);
3678
3716
  mappings[id] = map;
3679
3717
  },
3680
3718
  /**
@@ -3710,6 +3748,188 @@ const PhoneticMappingRegistry = (() => {
3710
3748
  };
3711
3749
  })();
3712
3750
 
3751
+ /**
3752
+ * Caverphone Phonetic Algorithm
3753
+ * src/phonetic/Caverphone.ts
3754
+ *
3755
+ * @see https://en.wikipedia.org/wiki/Caverphone
3756
+ *
3757
+ * This module implements the Caverphone phonetic algorithm, which is designed
3758
+ * to encode words into a phonetic representation. The Caverphone algorithm is
3759
+ * used primarily in New Zealand and was developed to assist in the indexing of
3760
+ * names in genealogical databases.
3761
+ *
3762
+ * It converts words into a standardized phonetic code, allowing for variations
3763
+ * in spelling and pronunciation to be matched.
3764
+ *
3765
+ * @module Phonetic/Caverphone
3766
+ * @author Paul Köhler (komed3)
3767
+ * @license MIT
3768
+ */
3769
+ /**
3770
+ * Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm.
3771
+ */
3772
+ class Caverphone extends Phonetic {
3773
+ // Default options for the Caverphone phonetic algorithm
3774
+ static default = {
3775
+ map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false
3776
+ };
3777
+ /**
3778
+ * Constructor for the Caverphone class.
3779
+ *
3780
+ * Initializes the Caverphone phonetic algorithm with the mapping and options.
3781
+ *
3782
+ * @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm
3783
+ */
3784
+ constructor(opt = {}) { super('caverphone', opt); }
3785
+ /**
3786
+ * Generates the Caverphone code for a given word.
3787
+ *
3788
+ * @param {string} word - The input word to be converted into a Caverphone code
3789
+ * @returns {string} - The generated Caverphone code
3790
+ */
3791
+ encode(word) {
3792
+ // Remove anything not A-Z and convert to lowercase
3793
+ word = word.replace(/[^A-Z]/gi, '').toLowerCase();
3794
+ // Use the base implementation for rule/mapping application
3795
+ return super.encode(word);
3796
+ }
3797
+ /**
3798
+ * Overrides the mapChar method to skip character mapping.
3799
+ *
3800
+ * @param {string} char - The character to be mapped
3801
+ * @returns {string} - The mapped character
3802
+ */
3803
+ mapChar(char) { return char; }
3804
+ /**
3805
+ * Adjusts the phonetic code to uppercase.
3806
+ *
3807
+ * @param {string} code - The phonetic code to adjust
3808
+ * @returns {string} - The adjusted phonetic code
3809
+ */
3810
+ adjustCode(code) { return code.toUpperCase(); }
3811
+ }
3812
+ // Register the Caverphone algorithm in the phonetic registry
3813
+ PhoneticRegistry.add('caverphone', Caverphone);
3814
+ // Register the Caverphone 1.0 phonetic mapping for English
3815
+ PhoneticMappingRegistry.add('caverphone', 'en1', {
3816
+ options: { length: 6, pad: '1' },
3817
+ map: {},
3818
+ patterns: [
3819
+ // Special word-initial replacements
3820
+ { pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' },
3821
+ { pattern: /^gn/, replace: '2n' },
3822
+ // Special word-final replacement
3823
+ { pattern: /mb$/, replace: 'm2' },
3824
+ // Character group replacements
3825
+ { pattern: /cq/g, replace: '2q' },
3826
+ { pattern: /c(e|i|y)/g, replace: 's$1' },
3827
+ { pattern: /tch/g, replace: '2ch' },
3828
+ { pattern: /[cqx]/g, replace: 'k' },
3829
+ { pattern: /v/g, replace: 'f' },
3830
+ { pattern: /dg/g, replace: '2g' },
3831
+ { pattern: /ti(a|o)/g, replace: 'si$1' },
3832
+ { pattern: /d/g, replace: 't' },
3833
+ { pattern: /ph/g, replace: 'fh' },
3834
+ { pattern: /b/g, replace: 'p' },
3835
+ { pattern: /sh/g, replace: 's2' },
3836
+ { pattern: /z/g, replace: 's' },
3837
+ // Vowel handling
3838
+ { pattern: /^[aeiou]/, replace: 'A' },
3839
+ { pattern: /[aeiou]/g, replace: '3' },
3840
+ // Special gh handling
3841
+ { pattern: /3gh3/g, replace: '3kh3' },
3842
+ { pattern: /gh/g, replace: '22' },
3843
+ // Single character replacements
3844
+ { pattern: /g/g, replace: 'k' },
3845
+ // Collapse repeated consonants
3846
+ { pattern: /s+/g, replace: 'S' },
3847
+ { pattern: /t+/g, replace: 'T' },
3848
+ { pattern: /p+/g, replace: 'P' },
3849
+ { pattern: /k+/g, replace: 'K' },
3850
+ { pattern: /f+/g, replace: 'F' },
3851
+ { pattern: /m+/g, replace: 'M' },
3852
+ { pattern: /n+/g, replace: 'N' },
3853
+ // Y and other single-letter handling
3854
+ { pattern: /j/g, replace: 'y' },
3855
+ // L/R/W/Y3 handling
3856
+ { pattern: /l3/g, replace: 'L3' },
3857
+ { pattern: /r3/g, replace: 'R3' },
3858
+ { pattern: /w3/g, replace: 'W3' },
3859
+ { pattern: /y3/g, replace: 'Y3' },
3860
+ // L/R/W followed by y
3861
+ { pattern: /ly/g, replace: 'Ly' },
3862
+ { pattern: /ry/g, replace: 'Ry' },
3863
+ { pattern: /wy/g, replace: 'Wy' },
3864
+ // WH handling
3865
+ { pattern: /wh3/g, replace: 'Wh3' },
3866
+ { pattern: /why/g, replace: 'Why' },
3867
+ // H at start
3868
+ { pattern: /^h/, replace: 'A' },
3869
+ // Remove certain letters
3870
+ { pattern: /[hlrwy23]/g, replace: '' }
3871
+ ]
3872
+ });
3873
+ // Register the Caverphone 2.0 phonetic mapping for English
3874
+ PhoneticMappingRegistry.add('caverphone', 'en2', {
3875
+ options: { length: 10, pad: '1' },
3876
+ map: {},
3877
+ patterns: [
3878
+ // Remove trailing 'e'
3879
+ { pattern: /e$/, replace: '' },
3880
+ // Special word-initial replacements
3881
+ { pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' },
3882
+ { pattern: /^gn/, replace: '2n' },
3883
+ // Special word-final replacement
3884
+ { pattern: /mb$/, replace: 'm2' },
3885
+ // Character group replacements
3886
+ { pattern: /cq/g, replace: '2q' },
3887
+ { pattern: /c(e|i|y)/g, replace: 's$1' },
3888
+ { pattern: /tch/g, replace: '2ch' },
3889
+ { pattern: /[cqx]/g, replace: 'k' },
3890
+ { pattern: /v/g, replace: 'f' },
3891
+ { pattern: /dg/g, replace: '2g' },
3892
+ { pattern: /ti(a|o)/g, replace: 'si$1' },
3893
+ { pattern: /d/g, replace: 't' },
3894
+ { pattern: /ph/g, replace: 'fh' },
3895
+ { pattern: /b/g, replace: 'p' },
3896
+ { pattern: /sh/g, replace: 's2' },
3897
+ { pattern: /z/g, replace: 's' },
3898
+ // Vowel handling
3899
+ { pattern: /^[aeiou]/, replace: 'A' },
3900
+ { pattern: /[aeiou]/g, replace: '3' },
3901
+ // Y handling
3902
+ { pattern: /j/g, replace: 'y' },
3903
+ { pattern: /^y3/, replace: 'Y3' },
3904
+ { pattern: /^y/, replace: 'A' },
3905
+ { pattern: /y/g, replace: '3' },
3906
+ // Special gh handling
3907
+ { pattern: /3gh3/g, replace: '3kh3' },
3908
+ { pattern: /gh/g, replace: '22' },
3909
+ // Single character replacements
3910
+ { pattern: /g/g, replace: 'k' },
3911
+ // Collapse repeated consonants
3912
+ { pattern: /s+/g, replace: 'S' },
3913
+ { pattern: /t+/g, replace: 'T' },
3914
+ { pattern: /p+/g, replace: 'P' },
3915
+ { pattern: /k+/g, replace: 'K' },
3916
+ { pattern: /f+/g, replace: 'F' },
3917
+ { pattern: /m+/g, replace: 'M' },
3918
+ { pattern: /n+/g, replace: 'N' },
3919
+ // L/R/W3 handling
3920
+ { pattern: /l3/g, replace: 'L3' },
3921
+ { pattern: /r3/g, replace: 'R3' },
3922
+ { pattern: /w3/g, replace: 'W3' },
3923
+ { pattern: /wh3/g, replace: 'Wh3' },
3924
+ { pattern: /[lrw]$/, replace: '3' },
3925
+ // // H at start and final 3 handling
3926
+ { pattern: /^h/, replace: 'A' },
3927
+ { pattern: /3$/, replace: 'A' },
3928
+ // Remove certain letters
3929
+ { pattern: /[hlrw23]/g, replace: '' }
3930
+ ]
3931
+ });
3932
+
3713
3933
  /**
3714
3934
  * Cologne Phonetic Algorithm
3715
3935
  * src/phonetic/Cologne.ts
@@ -4258,6 +4478,12 @@ class CmpStr {
4258
4478
  // Prepare the input
4259
4479
  const A = skip ? a : this.prepare(a, resolved);
4260
4480
  const B = skip ? b : this.prepare(b, resolved);
4481
+ // If the inputs are empty and safeEmpty is enabled, return an empty array
4482
+ if (resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) ||
4483
+ (Array.isArray(B) && B.length === 0) ||
4484
+ A === '' || B === '')) {
4485
+ return [];
4486
+ }
4261
4487
  // Get the metric class
4262
4488
  const metric = factory.metric(resolved.metric, A, B, resolved.opt);
4263
4489
  // Pass the original inputs to the metric
@@ -4688,6 +4914,12 @@ class CmpStrAsync extends CmpStr {
4688
4914
  // Prepare the input
4689
4915
  const A = skip ? a : await this.prepareAsync(a, resolved);
4690
4916
  const B = skip ? b : await this.prepareAsync(b, resolved);
4917
+ // If the inputs are empty and safeEmpty is enabled, return an empty array
4918
+ if (resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) ||
4919
+ (Array.isArray(B) && B.length === 0) ||
4920
+ A === '' || B === '')) {
4921
+ return [];
4922
+ }
4691
4923
  // Get the metric class
4692
4924
  const metric = factory.metric(resolved.metric, A, B, resolved.opt);
4693
4925
  // Pass the original inputs to the metric