cmpstr 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/README.md +10 -4
  2. package/dist/CmpStr.esm.js +242 -24
  3. package/dist/CmpStr.esm.js.map +1 -1
  4. package/dist/CmpStr.esm.min.js +2 -2
  5. package/dist/CmpStr.esm.min.js.map +1 -1
  6. package/dist/CmpStr.umd.js +242 -24
  7. package/dist/CmpStr.umd.js.map +1 -1
  8. package/dist/CmpStr.umd.min.js +2 -2
  9. package/dist/CmpStr.umd.min.js.map +1 -1
  10. package/dist/cjs/{CmpStr.js → CmpStr.cjs} +26 -25
  11. package/dist/cjs/CmpStr.cjs.map +1 -0
  12. package/dist/cjs/{CmpStrAsync.js → CmpStrAsync.cjs} +6 -6
  13. package/dist/cjs/CmpStrAsync.cjs.map +1 -0
  14. package/dist/cjs/index.cjs +15 -0
  15. package/dist/cjs/index.cjs.map +1 -0
  16. package/dist/cjs/metric/{Cosine.js → Cosine.cjs} +4 -4
  17. package/dist/cjs/metric/Cosine.cjs.map +1 -0
  18. package/dist/cjs/metric/{DamerauLevenshtein.js → DamerauLevenshtein.cjs} +4 -4
  19. package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -0
  20. package/dist/cjs/metric/{DiceSorensen.js → DiceSorensen.cjs} +4 -4
  21. package/dist/cjs/metric/DiceSorensen.cjs.map +1 -0
  22. package/dist/cjs/metric/{Hamming.js → Hamming.cjs} +4 -4
  23. package/dist/cjs/metric/Hamming.cjs.map +1 -0
  24. package/dist/cjs/metric/{Jaccard.js → Jaccard.cjs} +4 -4
  25. package/dist/cjs/metric/Jaccard.cjs.map +1 -0
  26. package/dist/cjs/metric/{JaroWinkler.js → JaroWinkler.cjs} +4 -4
  27. package/dist/cjs/metric/JaroWinkler.cjs.map +1 -0
  28. package/dist/cjs/metric/{LCS.js → LCS.cjs} +4 -4
  29. package/dist/cjs/metric/LCS.cjs.map +1 -0
  30. package/dist/cjs/metric/{Levenshtein.js → Levenshtein.cjs} +4 -4
  31. package/dist/cjs/metric/Levenshtein.cjs.map +1 -0
  32. package/dist/cjs/metric/{Metric.js → Metric.cjs} +9 -9
  33. package/dist/cjs/metric/Metric.cjs.map +1 -0
  34. package/dist/cjs/metric/{NeedlemanWunsch.js → NeedlemanWunsch.cjs} +4 -4
  35. package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -0
  36. package/dist/cjs/metric/{SmithWaterman.js → SmithWaterman.cjs} +4 -4
  37. package/dist/cjs/metric/SmithWaterman.cjs.map +1 -0
  38. package/dist/cjs/metric/{qGram.js → qGram.cjs} +4 -4
  39. package/dist/cjs/metric/qGram.cjs.map +1 -0
  40. package/dist/cjs/phonetic/Caverphone.cjs +199 -0
  41. package/dist/cjs/phonetic/Caverphone.cjs.map +1 -0
  42. package/dist/cjs/phonetic/{Cologne.js → Cologne.cjs} +3 -3
  43. package/dist/cjs/phonetic/Cologne.cjs.map +1 -0
  44. package/dist/cjs/phonetic/{Metaphone.js → Metaphone.cjs} +3 -3
  45. package/dist/cjs/phonetic/Metaphone.cjs.map +1 -0
  46. package/dist/cjs/phonetic/{Phonetic.js → Phonetic.cjs} +55 -21
  47. package/dist/cjs/phonetic/Phonetic.cjs.map +1 -0
  48. package/dist/cjs/phonetic/{Soundex.js → Soundex.cjs} +3 -3
  49. package/dist/cjs/phonetic/Soundex.cjs.map +1 -0
  50. package/dist/cjs/utils/{DeepMerge.js → DeepMerge.cjs} +3 -3
  51. package/dist/cjs/utils/DeepMerge.cjs.map +1 -0
  52. package/dist/cjs/utils/{DiffChecker.js → DiffChecker.cjs} +11 -11
  53. package/dist/cjs/utils/DiffChecker.cjs.map +1 -0
  54. package/dist/cjs/utils/{Filter.js → Filter.cjs} +2 -2
  55. package/dist/cjs/utils/Filter.cjs.map +1 -0
  56. package/dist/cjs/utils/{HashTable.js → HashTable.cjs} +2 -2
  57. package/dist/cjs/utils/HashTable.cjs.map +1 -0
  58. package/dist/cjs/utils/{Normalizer.js → Normalizer.cjs} +3 -3
  59. package/dist/cjs/utils/Normalizer.cjs.map +1 -0
  60. package/dist/cjs/utils/{Pool.js → Pool.cjs} +2 -2
  61. package/dist/cjs/utils/Pool.cjs.map +1 -0
  62. package/dist/cjs/utils/{Profiler.js → Profiler.cjs} +4 -4
  63. package/dist/cjs/utils/Profiler.cjs.map +1 -0
  64. package/dist/cjs/utils/{Registry.js → Registry.cjs} +8 -8
  65. package/dist/cjs/utils/Registry.cjs.map +1 -0
  66. package/dist/cjs/utils/{TextAnalyzer.js → TextAnalyzer.cjs} +3 -3
  67. package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -0
  68. package/dist/esm/{CmpStr.js → CmpStr.mjs} +26 -25
  69. package/dist/esm/CmpStr.mjs.map +1 -0
  70. package/dist/esm/{CmpStrAsync.js → CmpStrAsync.mjs} +6 -6
  71. package/dist/esm/CmpStrAsync.mjs.map +1 -0
  72. package/dist/esm/index.mjs +7 -0
  73. package/dist/esm/index.mjs.map +1 -0
  74. package/dist/esm/metric/{Cosine.js → Cosine.mjs} +4 -4
  75. package/dist/esm/metric/Cosine.mjs.map +1 -0
  76. package/dist/esm/metric/{DamerauLevenshtein.js → DamerauLevenshtein.mjs} +4 -4
  77. package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -0
  78. package/dist/esm/metric/{DiceSorensen.js → DiceSorensen.mjs} +4 -4
  79. package/dist/esm/metric/DiceSorensen.mjs.map +1 -0
  80. package/dist/esm/metric/{Hamming.js → Hamming.mjs} +4 -4
  81. package/dist/esm/metric/Hamming.mjs.map +1 -0
  82. package/dist/esm/metric/{Jaccard.js → Jaccard.mjs} +4 -4
  83. package/dist/esm/metric/Jaccard.mjs.map +1 -0
  84. package/dist/esm/metric/{JaroWinkler.js → JaroWinkler.mjs} +4 -4
  85. package/dist/esm/metric/JaroWinkler.mjs.map +1 -0
  86. package/dist/esm/metric/{LCS.js → LCS.mjs} +4 -4
  87. package/dist/esm/metric/LCS.mjs.map +1 -0
  88. package/dist/esm/metric/{Levenshtein.js → Levenshtein.mjs} +4 -4
  89. package/dist/esm/metric/Levenshtein.mjs.map +1 -0
  90. package/dist/esm/metric/{Metric.js → Metric.mjs} +9 -9
  91. package/dist/esm/metric/Metric.mjs.map +1 -0
  92. package/dist/esm/metric/{NeedlemanWunsch.js → NeedlemanWunsch.mjs} +4 -4
  93. package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -0
  94. package/dist/esm/metric/{SmithWaterman.js → SmithWaterman.mjs} +4 -4
  95. package/dist/esm/metric/SmithWaterman.mjs.map +1 -0
  96. package/dist/esm/metric/{qGram.js → qGram.mjs} +4 -4
  97. package/dist/esm/metric/qGram.mjs.map +1 -0
  98. package/dist/esm/phonetic/Caverphone.mjs +201 -0
  99. package/dist/esm/phonetic/Caverphone.mjs.map +1 -0
  100. package/dist/esm/phonetic/{Cologne.js → Cologne.mjs} +3 -3
  101. package/dist/esm/phonetic/Cologne.mjs.map +1 -0
  102. package/dist/esm/phonetic/{Metaphone.js → Metaphone.mjs} +3 -3
  103. package/dist/esm/phonetic/Metaphone.mjs.map +1 -0
  104. package/dist/esm/phonetic/{Phonetic.js → Phonetic.mjs} +52 -21
  105. package/dist/esm/phonetic/Phonetic.mjs.map +1 -0
  106. package/dist/esm/phonetic/{Soundex.js → Soundex.mjs} +3 -3
  107. package/dist/esm/phonetic/Soundex.mjs.map +1 -0
  108. package/dist/esm/utils/{DeepMerge.js → DeepMerge.mjs} +3 -3
  109. package/dist/esm/utils/DeepMerge.mjs.map +1 -0
  110. package/dist/esm/utils/{DiffChecker.js → DiffChecker.mjs} +11 -11
  111. package/dist/esm/utils/DiffChecker.mjs.map +1 -0
  112. package/dist/esm/utils/{Filter.js → Filter.mjs} +2 -2
  113. package/dist/esm/utils/Filter.mjs.map +1 -0
  114. package/dist/esm/utils/{HashTable.js → HashTable.mjs} +2 -2
  115. package/dist/esm/utils/HashTable.mjs.map +1 -0
  116. package/dist/esm/utils/{Normalizer.js → Normalizer.mjs} +3 -3
  117. package/dist/esm/utils/Normalizer.mjs.map +1 -0
  118. package/dist/esm/utils/{Pool.js → Pool.mjs} +2 -2
  119. package/dist/esm/utils/Pool.mjs.map +1 -0
  120. package/dist/esm/utils/{Profiler.js → Profiler.mjs} +4 -4
  121. package/dist/esm/utils/Profiler.mjs.map +1 -0
  122. package/dist/esm/utils/{Registry.js → Registry.mjs} +8 -8
  123. package/dist/esm/utils/Registry.mjs.map +1 -0
  124. package/dist/esm/utils/{TextAnalyzer.js → TextAnalyzer.mjs} +3 -3
  125. package/dist/esm/utils/TextAnalyzer.mjs.map +1 -0
  126. package/dist/types/index.d.ts +2 -2
  127. package/dist/types/phonetic/Caverphone.d.ts +55 -0
  128. package/dist/types/phonetic/Phonetic.d.ts +14 -2
  129. package/dist/types/phonetic/index.d.ts +1 -0
  130. package/dist/types/utils/Types.d.ts +11 -0
  131. package/package.json +21 -16
  132. package/dist/cjs/CmpStr.js.map +0 -1
  133. package/dist/cjs/CmpStrAsync.js.map +0 -1
  134. package/dist/cjs/index.js +0 -15
  135. package/dist/cjs/index.js.map +0 -1
  136. package/dist/cjs/metric/Cosine.js.map +0 -1
  137. package/dist/cjs/metric/DamerauLevenshtein.js.map +0 -1
  138. package/dist/cjs/metric/DiceSorensen.js.map +0 -1
  139. package/dist/cjs/metric/Hamming.js.map +0 -1
  140. package/dist/cjs/metric/Jaccard.js.map +0 -1
  141. package/dist/cjs/metric/JaroWinkler.js.map +0 -1
  142. package/dist/cjs/metric/LCS.js.map +0 -1
  143. package/dist/cjs/metric/Levenshtein.js.map +0 -1
  144. package/dist/cjs/metric/Metric.js.map +0 -1
  145. package/dist/cjs/metric/NeedlemanWunsch.js.map +0 -1
  146. package/dist/cjs/metric/SmithWaterman.js.map +0 -1
  147. package/dist/cjs/metric/qGram.js.map +0 -1
  148. package/dist/cjs/phonetic/Cologne.js.map +0 -1
  149. package/dist/cjs/phonetic/Metaphone.js.map +0 -1
  150. package/dist/cjs/phonetic/Phonetic.js.map +0 -1
  151. package/dist/cjs/phonetic/Soundex.js.map +0 -1
  152. package/dist/cjs/utils/DeepMerge.js.map +0 -1
  153. package/dist/cjs/utils/DiffChecker.js.map +0 -1
  154. package/dist/cjs/utils/Filter.js.map +0 -1
  155. package/dist/cjs/utils/HashTable.js.map +0 -1
  156. package/dist/cjs/utils/Normalizer.js.map +0 -1
  157. package/dist/cjs/utils/Pool.js.map +0 -1
  158. package/dist/cjs/utils/Profiler.js.map +0 -1
  159. package/dist/cjs/utils/Registry.js.map +0 -1
  160. package/dist/cjs/utils/TextAnalyzer.js.map +0 -1
  161. package/dist/esm/CmpStr.js.map +0 -1
  162. package/dist/esm/CmpStrAsync.js.map +0 -1
  163. package/dist/esm/index.js +0 -7
  164. package/dist/esm/index.js.map +0 -1
  165. package/dist/esm/metric/Cosine.js.map +0 -1
  166. package/dist/esm/metric/DamerauLevenshtein.js.map +0 -1
  167. package/dist/esm/metric/DiceSorensen.js.map +0 -1
  168. package/dist/esm/metric/Hamming.js.map +0 -1
  169. package/dist/esm/metric/Jaccard.js.map +0 -1
  170. package/dist/esm/metric/JaroWinkler.js.map +0 -1
  171. package/dist/esm/metric/LCS.js.map +0 -1
  172. package/dist/esm/metric/Levenshtein.js.map +0 -1
  173. package/dist/esm/metric/Metric.js.map +0 -1
  174. package/dist/esm/metric/NeedlemanWunsch.js.map +0 -1
  175. package/dist/esm/metric/SmithWaterman.js.map +0 -1
  176. package/dist/esm/metric/qGram.js.map +0 -1
  177. package/dist/esm/phonetic/Cologne.js.map +0 -1
  178. package/dist/esm/phonetic/Metaphone.js.map +0 -1
  179. package/dist/esm/phonetic/Phonetic.js.map +0 -1
  180. package/dist/esm/phonetic/Soundex.js.map +0 -1
  181. package/dist/esm/utils/DeepMerge.js.map +0 -1
  182. package/dist/esm/utils/DiffChecker.js.map +0 -1
  183. package/dist/esm/utils/Filter.js.map +0 -1
  184. package/dist/esm/utils/HashTable.js.map +0 -1
  185. package/dist/esm/utils/Normalizer.js.map +0 -1
  186. package/dist/esm/utils/Pool.js.map +0 -1
  187. package/dist/esm/utils/Profiler.js.map +0 -1
  188. package/dist/esm/utils/Registry.js.map +0 -1
  189. package/dist/esm/utils/TextAnalyzer.js.map +0 -1
package/README.md CHANGED
@@ -2,13 +2,13 @@
2
2
 
3
3
  [![GitHub License](https://img.shields.io/github/license/komed3/cmpstr?style=for-the-badge&logo=unlicense&logoColor=fff)](LICENSE)
4
4
  [![Static Badge](https://img.shields.io/badge/docs-docs?style=for-the-badge&logo=readthedocs&logoColor=fff&color=blue)](https://github.com/komed3/cmpstr/wiki)
5
- [![Static Badge](https://img.shields.io/badge/Typescript-support?style=for-the-badge&logo=typescript&logoColor=fff&color=blue)]()
5
+ [![Static Badge](https://img.shields.io/badge/Typescript-support?style=for-the-badge&logo=typescript&logoColor=fff&color=blue)](https://www.typescriptlang.org)
6
6
  [![GitHub package.json version](https://img.shields.io/github/package-json/v/komed3/cmpstr?style=for-the-badge&logo=npm&logoColor=fff)](https://npmjs.com/package/cmpstr)
7
7
  [![npm bundle size](https://img.shields.io/bundlephobia/min/cmpstr?style=for-the-badge&logo=gitlfs&logoColor=fff)](https://bundlephobia.com/package/cmpstr)
8
8
  [![NPM Downloads](https://img.shields.io/npm/dy/cmpstr?style=for-the-badge&logo=transmission&logoColor=fff)](https://npmpackage.info/package/cmpstr?t=downloads)
9
9
  [![GitHub Actions Workflow Status](https://img.shields.io/github/actions/workflow/status/komed3/cmpstr/build.yml?style=for-the-badge&logo=educative&logoColor=fff)](https://github.com/komed3/cmpstr/actions/workflows/build.yml)
10
- [![Static Badge](https://img.shields.io/badge/ESM_%26_CJS-TypeScript?style=for-the-badge&logo=nodedotjs&logoColor=fff&color=purple)]()
11
- [![Static Badge](https://img.shields.io/badge/UMD_%26_ESM-JavaScript?style=for-the-badge&logo=javascript&logoColor=fff&color=orange)]()
10
+ [![Static Badge](https://img.shields.io/badge/ESM_%26_CJS-TypeScript?style=for-the-badge&logo=nodedotjs&logoColor=fff&color=purple)](https://github.com/komed3/cmpstr/wiki/Installation-&-Setup#import-in-your-project)
11
+ [![Static Badge](https://img.shields.io/badge/UMD_%26_ESM-JavaScript?style=for-the-badge&logo=javascript&logoColor=fff&color=orange)](https://github.com/komed3/cmpstr/wiki/Installation-&-Setup#browser)
12
12
 
13
13
  **CmpStr** is a TypeScript library for advanced string comparison, similarity measurement, phonetic indexing, and text analysis. It includes implementations of several established algorithms such as Levenshtein, Dice–Sørensen, Damerau–Levenshtein and Soundex. The library has no external dependencies and allows for the integration of custom metrics, phonetic mappings, and normalization filters.
14
14
 
@@ -68,8 +68,14 @@ console.log( result );
68
68
  // [ 'Meyer', 'Meier' ]
69
69
  ```
70
70
 
71
+ _Try with [OneCompiler](https://onecompiler.com/nodejs/43qr6trny)._
72
+
73
+ ## CLI Tool
74
+
75
+ Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.com/package/cmpstr-cli)** package and use many features of CmpStr directly on the console via the cmpstr command. Many options and parameters also make the command suitable for scripts and automatic processing.
76
+
71
77
  ## Documentation
72
78
 
73
79
  The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
74
80
 
75
- **LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
81
+ **LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
@@ -1,5 +1,5 @@
1
1
  /**
2
- * CmpStr v3.0.0 dev-1a82e20-250612
2
+ * CmpStr v3.0.2 build-522ae69-250720
3
3
  * This is a lightweight, fast and well performing library for calculating string similarity.
4
4
  * (c) 2023-2025 Paul Köhler @komed3 / MIT License
5
5
  * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
@@ -61,7 +61,7 @@ function set(t, path, value) {
61
61
  const [k, ...r] = parse(path);
62
62
  // Throw an error if the key is not a valid identifier
63
63
  if (t !== undefined && (typeof t !== 'object' || t === null))
64
- throw Error(`cannot set property <${k}> of <${JSON.stringify(t)}>`);
64
+ throw Error(`Cannot set property <${k}> of <${JSON.stringify(t)}>`);
65
65
  // Assign the value to the specified key in the object
66
66
  return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
67
67
  [k]: set(t?.[k], r.join('.'), value)
@@ -1680,7 +1680,7 @@ const factory = Object.create(null);
1680
1680
  function Registry(reg, ctor) {
1681
1681
  // Throws an error if the registry already exists
1682
1682
  if (reg in registry || reg in factory)
1683
- throw new Error(`registry <${reg}> already exists / overwriting is forbidden`);
1683
+ throw new Error(`Registry <${reg}> already exists / overwriting is forbidden`);
1684
1684
  // Create a registry object to hold class constructors
1685
1685
  const classes = Object.create(null);
1686
1686
  const service = {
@@ -1695,9 +1695,9 @@ function Registry(reg, ctor) {
1695
1695
  */
1696
1696
  add(name, cls, update = false) {
1697
1697
  if (!(cls.prototype instanceof ctor))
1698
- throw new TypeError(`class must extend <${reg}>`);
1698
+ throw new TypeError(`Class must extend <${reg}>`);
1699
1699
  if (!update && name in classes)
1700
- throw new Error(`entry <${name}> already exists / use <update=true> to overwrite`);
1700
+ throw new Error(`Entry <${name}> already exists / use <update=true> to overwrite`);
1701
1701
  classes[name] = cls;
1702
1702
  },
1703
1703
  /**
@@ -1728,7 +1728,7 @@ function Registry(reg, ctor) {
1728
1728
  */
1729
1729
  get(name) {
1730
1730
  if (!(name in classes))
1731
- throw new Error(`class <${name}> not registered for <${reg}>`);
1731
+ throw new Error(`Class <${name}> not registered for <${reg}>`);
1732
1732
  return classes[name];
1733
1733
  }
1734
1734
  };
@@ -1749,7 +1749,7 @@ function Registry(reg, ctor) {
1749
1749
  */
1750
1750
  function resolveCls(reg, cls) {
1751
1751
  if (!(reg in registry))
1752
- throw new ReferenceError(`registry <${reg}> does not exist`);
1752
+ throw new ReferenceError(`Registry <${reg}> does not exist`);
1753
1753
  return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
1754
1754
  }
1755
1755
  /**
@@ -1767,7 +1767,7 @@ function createFromRegistry(reg, cls, ...args) {
1767
1767
  return new cls(...args);
1768
1768
  }
1769
1769
  catch (err) {
1770
- throw new Error(`cannot instantiate class <${cls}>`);
1770
+ throw new Error(`Cannot instantiate class <${cls}>`);
1771
1771
  }
1772
1772
  }
1773
1773
 
@@ -1902,7 +1902,7 @@ class Metric {
1902
1902
  * @throws {Error} - If not overridden in a subclass
1903
1903
  */
1904
1904
  compute(a, b, m, n, maxLen) {
1905
- throw new Error(`method compute() must be overridden in a subclass`);
1905
+ throw new Error(`Method compute() must be overridden in a subclass`);
1906
1906
  }
1907
1907
  /**
1908
1908
  * Run the metric computation for single inputs (two strings).
@@ -2059,7 +2059,7 @@ class Metric {
2059
2059
  */
2060
2060
  isPairwise(safe = false) {
2061
2061
  return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
2062
- throw new Error(`mode <pairwise> requires arrays of equal length`);
2062
+ throw new Error(`Mode <pairwise> requires arrays of equal length`);
2063
2063
  })();
2064
2064
  }
2065
2065
  /**
@@ -2120,7 +2120,7 @@ class Metric {
2120
2120
  this.runPairwise();
2121
2121
  break;
2122
2122
  // Unsupported mode
2123
- default: throw new Error(`unsupported mode <${mode}>`);
2123
+ default: throw new Error(`Unsupported mode <${mode}>`);
2124
2124
  }
2125
2125
  }
2126
2126
  /**
@@ -2155,7 +2155,7 @@ class Metric {
2155
2155
  await this.runPairwiseAsync();
2156
2156
  break;
2157
2157
  // Unsupported mode
2158
- default: throw new Error(`unsupported async mode <${mode}>`);
2158
+ default: throw new Error(`Unsupported async mode <${mode}>`);
2159
2159
  }
2160
2160
  }
2161
2161
  /**
@@ -2698,7 +2698,7 @@ class HammingDistance extends Metric {
2698
2698
  }
2699
2699
  // Standard: Error for unequal length
2700
2700
  else
2701
- throw new Error(`strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
2701
+ throw new Error(`Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
2702
2702
  `use option.pad for automatic adjustment`);
2703
2703
  }
2704
2704
  // Calculate the Hamming distance
@@ -3347,8 +3347,8 @@ MetricRegistry.add('smithWaterman', SmithWatermanDistance);
3347
3347
  * pose a risk of infringing upon existing trademarks due to their pronunciation.
3348
3348
  *
3349
3349
  * This module provides an abstract class for generating phonetic indices based
3350
- * on mappings and rules. It allows for the implementation of various phonetic
3351
- * algorithms by extending the abstract class.
3350
+ * on mappings, patterns and rules. It allows for the implementation of various
3351
+ * phonetic algorithms by extending the abstract class.
3352
3352
  *
3353
3353
  * @module Phonetic
3354
3354
  * @author Paul Köhler (komed3)
@@ -3388,22 +3388,55 @@ class Phonetic {
3388
3388
  * Constructor for the Phonetic class.
3389
3389
  *
3390
3390
  * Initializes the phonetic algorithm with the specified options and mapping.
3391
+ * Options hierarchy: User input > mapping options > default
3391
3392
  *
3392
3393
  * @param {string} algo - The name of the algorithm (e.g. 'soundex')
3393
3394
  * @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
3394
3395
  * @throws {Error} - If the requested mapping is not declared
3395
3396
  */
3396
3397
  constructor(algo, opt = {}) {
3397
- // Set the options by merging the default options with the provided ones
3398
- this.options = merge(this.constructor.default ?? {}, opt);
3399
- // Get the mapping based on the provided options
3400
- const map = PhoneticMappingRegistry.get(algo, this.options.map);
3398
+ // Get the phonetic default options
3399
+ const defaults = this.constructor.default ?? {};
3400
+ // Determine phonetic map ID from options or use defaults
3401
+ const mapId = opt.map ?? defaults.map;
3402
+ // If no algorithm is specified, throw an error
3403
+ if (!mapId)
3404
+ throw new Error(`No mapping specified for phonetic algorithm`);
3405
+ // Get the mapping based on the determined map ID
3406
+ const map = PhoneticMappingRegistry.get(algo, mapId);
3401
3407
  // If the mapping is not defined, throw an error
3402
3408
  if (map === undefined)
3403
- throw new Error(`requested mapping <${this.options.map}> is not declared`);
3409
+ throw new Error(`Requested mapping <${mapId}> is not declared`);
3410
+ // Set the options by merging the default options with the provided ones
3411
+ this.options = merge(merge(defaults, map.options ?? {}), opt);
3412
+ // Set the algorithm name and mapping
3404
3413
  this.algo = algo;
3405
3414
  this.map = map;
3406
3415
  }
3416
+ /**
3417
+ * Applies patterns to a word based on the phonetic map.
3418
+ *
3419
+ * This method processes the word by applying all defined patterns in the
3420
+ * phonetic map. It replaces occurrences of specified patterns with their
3421
+ * corresponding replacements.
3422
+ *
3423
+ * @param {string} word - The input word to be processed
3424
+ * @returns {string} - The modified word after applying all patterns
3425
+ */
3426
+ applyPattern(word) {
3427
+ const { patterns = [] } = this.map;
3428
+ // If no patterns are provided, return the input
3429
+ if (!patterns || !patterns.length)
3430
+ return word;
3431
+ // Iterate over the patterns and replace all matches
3432
+ for (const { pattern, replace, all = false } of patterns) {
3433
+ // Search for the pattern in the word and replace it
3434
+ // Use replaceAll if 'all' is true, otherwise use replace
3435
+ word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
3436
+ }
3437
+ // Return the modified word after applying all patterns
3438
+ return word;
3439
+ }
3407
3440
  /**
3408
3441
  * Applies phonetic rules to a character in a word context.
3409
3442
  *
@@ -3481,6 +3514,9 @@ class Phonetic {
3481
3514
  */
3482
3515
  encode(word) {
3483
3516
  const { map = {}, ignore = [] } = this.map;
3517
+ // Apply patterns to the word before processing
3518
+ // This allows for pre-processing of the word based on defined patterns
3519
+ word = this.applyPattern(word);
3484
3520
  // Get the characters of the word and its length
3485
3521
  const chars = this.word2Chars(word);
3486
3522
  const charLen = chars.length;
@@ -3517,11 +3553,11 @@ class Phonetic {
3517
3553
  * @returns {string|undefined} - The phonetic code or undefined if no code applies
3518
3554
  */
3519
3555
  mapChar(char, i, chars, charLen, lastCode, map) {
3520
- const { dedupe = true } = this.options;
3556
+ const { dedupe = true, fallback = undefined } = this.options;
3521
3557
  // Apply phonetic rules to the character
3522
3558
  // If no rules apply, use the mapping
3523
- // If the character is not in the mapping, return undefined
3524
- const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? undefined;
3559
+ // If the character is not in the mapping, return the fallback
3560
+ const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? fallback;
3525
3561
  // De-duplicate the code if necessary
3526
3562
  return dedupe && c === lastCode ? undefined : c;
3527
3563
  }
@@ -3674,7 +3710,7 @@ const PhoneticMappingRegistry = (() => {
3674
3710
  add(algo, id, map, update = false) {
3675
3711
  const mappings = maps(algo);
3676
3712
  if (!update && id in mappings)
3677
- throw new Error(`entry <${id}> already exists / use <update=true> to overwrite`);
3713
+ throw new Error(`Entry <${id}> already exists / use <update=true> to overwrite`);
3678
3714
  mappings[id] = map;
3679
3715
  },
3680
3716
  /**
@@ -3710,6 +3746,188 @@ const PhoneticMappingRegistry = (() => {
3710
3746
  };
3711
3747
  })();
3712
3748
 
3749
+ /**
3750
+ * Caverphone Phonetic Algorithm
3751
+ * src/phonetic/Caverphone.ts
3752
+ *
3753
+ * @see https://en.wikipedia.org/wiki/Caverphone
3754
+ *
3755
+ * This module implements the Caverphone phonetic algorithm, which is designed
3756
+ * to encode words into a phonetic representation. The Caverphone algorithm is
3757
+ * used primarily in New Zealand and was developed to assist in the indexing of
3758
+ * names in genealogical databases.
3759
+ *
3760
+ * It converts words into a standardized phonetic code, allowing for variations
3761
+ * in spelling and pronunciation to be matched.
3762
+ *
3763
+ * @module Phonetic/Caverphone
3764
+ * @author Paul Köhler (komed3)
3765
+ * @license MIT
3766
+ */
3767
+ /**
3768
+ * Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm.
3769
+ */
3770
+ class Caverphone extends Phonetic {
3771
+ // Default options for the Caverphone phonetic algorithm
3772
+ static default = {
3773
+ map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false
3774
+ };
3775
+ /**
3776
+ * Constructor for the Caverphone class.
3777
+ *
3778
+ * Initializes the Caverphone phonetic algorithm with the mapping and options.
3779
+ *
3780
+ * @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm
3781
+ */
3782
+ constructor(opt = {}) { super('caverphone', opt); }
3783
+ /**
3784
+ * Generates the Caverphone code for a given word.
3785
+ *
3786
+ * @param {string} word - The input word to be converted into a Caverphone code
3787
+ * @returns {string} - The generated Caverphone code
3788
+ */
3789
+ encode(word) {
3790
+ // Remove anything not A-Z and convert to lowercase
3791
+ word = word.replace(/[^A-Z]/gi, '').toLowerCase();
3792
+ // Use the base implementation for rule/mapping application
3793
+ return super.encode(word);
3794
+ }
3795
+ /**
3796
+ * Overrides the mapChar method to skip character mapping.
3797
+ *
3798
+ * @param {string} char - The character to be mapped
3799
+ * @returns {string} - The mapped character
3800
+ */
3801
+ mapChar(char) { return char; }
3802
+ /**
3803
+ * Adjusts the phonetic code to uppercase.
3804
+ *
3805
+ * @param {string} code - The phonetic code to adjust
3806
+ * @returns {string} - The adjusted phonetic code
3807
+ */
3808
+ adjustCode(code) { return code.toUpperCase(); }
3809
+ }
3810
+ // Register the Caverphone algorithm in the phonetic registry
3811
+ PhoneticRegistry.add('caverphone', Caverphone);
3812
+ // Register the Caverphone 1.0 phonetic mapping for English
3813
+ PhoneticMappingRegistry.add('caverphone', 'en1', {
3814
+ options: { length: 6, pad: '1' },
3815
+ map: {},
3816
+ patterns: [
3817
+ // Special word-initial replacements
3818
+ { pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' },
3819
+ { pattern: /^gn/, replace: '2n' },
3820
+ // Special word-final replacement
3821
+ { pattern: /mb$/, replace: 'm2' },
3822
+ // Character group replacements
3823
+ { pattern: /cq/g, replace: '2q' },
3824
+ { pattern: /c(e|i|y)/g, replace: 's$1' },
3825
+ { pattern: /tch/g, replace: '2ch' },
3826
+ { pattern: /[cqx]/g, replace: 'k' },
3827
+ { pattern: /v/g, replace: 'f' },
3828
+ { pattern: /dg/g, replace: '2g' },
3829
+ { pattern: /ti(a|o)/g, replace: 'si$1' },
3830
+ { pattern: /d/g, replace: 't' },
3831
+ { pattern: /ph/g, replace: 'fh' },
3832
+ { pattern: /b/g, replace: 'p' },
3833
+ { pattern: /sh/g, replace: 's2' },
3834
+ { pattern: /z/g, replace: 's' },
3835
+ // Vowel handling
3836
+ { pattern: /^[aeiou]/, replace: 'A' },
3837
+ { pattern: /[aeiou]/g, replace: '3' },
3838
+ // Special gh handling
3839
+ { pattern: /3gh3/g, replace: '3kh3' },
3840
+ { pattern: /gh/g, replace: '22' },
3841
+ // Single character replacements
3842
+ { pattern: /g/g, replace: 'k' },
3843
+ // Collapse repeated consonants
3844
+ { pattern: /s+/g, replace: 'S' },
3845
+ { pattern: /t+/g, replace: 'T' },
3846
+ { pattern: /p+/g, replace: 'P' },
3847
+ { pattern: /k+/g, replace: 'K' },
3848
+ { pattern: /f+/g, replace: 'F' },
3849
+ { pattern: /m+/g, replace: 'M' },
3850
+ { pattern: /n+/g, replace: 'N' },
3851
+ // Y and other single-letter handling
3852
+ { pattern: /j/g, replace: 'y' },
3853
+ // L/R/W/Y3 handling
3854
+ { pattern: /l3/g, replace: 'L3' },
3855
+ { pattern: /r3/g, replace: 'R3' },
3856
+ { pattern: /w3/g, replace: 'W3' },
3857
+ { pattern: /y3/g, replace: 'Y3' },
3858
+ // L/R/W followed by y
3859
+ { pattern: /ly/g, replace: 'Ly' },
3860
+ { pattern: /ry/g, replace: 'Ry' },
3861
+ { pattern: /wy/g, replace: 'Wy' },
3862
+ // WH handling
3863
+ { pattern: /wh3/g, replace: 'Wh3' },
3864
+ { pattern: /why/g, replace: 'Why' },
3865
+ // H at start
3866
+ { pattern: /^h/, replace: 'A' },
3867
+ // Remove certain letters
3868
+ { pattern: /[hlrwy23]/g, replace: '' }
3869
+ ]
3870
+ });
3871
+ // Register the Caverphone 2.0 phonetic mapping for English
3872
+ PhoneticMappingRegistry.add('caverphone', 'en2', {
3873
+ options: { length: 10, pad: '1' },
3874
+ map: {},
3875
+ patterns: [
3876
+ // Remove trailing 'e'
3877
+ { pattern: /e$/, replace: '' },
3878
+ // Special word-initial replacements
3879
+ { pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' },
3880
+ { pattern: /^gn/, replace: '2n' },
3881
+ // Special word-final replacement
3882
+ { pattern: /mb$/, replace: 'm2' },
3883
+ // Character group replacements
3884
+ { pattern: /cq/g, replace: '2q' },
3885
+ { pattern: /c(e|i|y)/g, replace: 's$1' },
3886
+ { pattern: /tch/g, replace: '2ch' },
3887
+ { pattern: /[cqx]/g, replace: 'k' },
3888
+ { pattern: /v/g, replace: 'f' },
3889
+ { pattern: /dg/g, replace: '2g' },
3890
+ { pattern: /ti(a|o)/g, replace: 'si$1' },
3891
+ { pattern: /d/g, replace: 't' },
3892
+ { pattern: /ph/g, replace: 'fh' },
3893
+ { pattern: /b/g, replace: 'p' },
3894
+ { pattern: /sh/g, replace: 's2' },
3895
+ { pattern: /z/g, replace: 's' },
3896
+ // Vowel handling
3897
+ { pattern: /^[aeiou]/, replace: 'A' },
3898
+ { pattern: /[aeiou]/g, replace: '3' },
3899
+ // Y handling
3900
+ { pattern: /j/g, replace: 'y' },
3901
+ { pattern: /^y3/, replace: 'Y3' },
3902
+ { pattern: /^y/, replace: 'A' },
3903
+ { pattern: /y/g, replace: '3' },
3904
+ // Special gh handling
3905
+ { pattern: /3gh3/g, replace: '3kh3' },
3906
+ { pattern: /gh/g, replace: '22' },
3907
+ // Single character replacements
3908
+ { pattern: /g/g, replace: 'k' },
3909
+ // Collapse repeated consonants
3910
+ { pattern: /s+/g, replace: 'S' },
3911
+ { pattern: /t+/g, replace: 'T' },
3912
+ { pattern: /p+/g, replace: 'P' },
3913
+ { pattern: /k+/g, replace: 'K' },
3914
+ { pattern: /f+/g, replace: 'F' },
3915
+ { pattern: /m+/g, replace: 'M' },
3916
+ { pattern: /n+/g, replace: 'N' },
3917
+ // L/R/W3 handling
3918
+ { pattern: /l3/g, replace: 'L3' },
3919
+ { pattern: /r3/g, replace: 'R3' },
3920
+ { pattern: /w3/g, replace: 'W3' },
3921
+ { pattern: /wh3/g, replace: 'Wh3' },
3922
+ { pattern: /[lrw]$/, replace: '3' },
3923
+ // // H at start and final 3 handling
3924
+ { pattern: /^h/, replace: 'A' },
3925
+ { pattern: /3$/, replace: 'A' },
3926
+ // Remove certain letters
3927
+ { pattern: /[hlrw23]/g, replace: '' }
3928
+ ]
3929
+ });
3930
+
3713
3931
  /**
3714
3932
  * Cologne Phonetic Algorithm
3715
3933
  * src/phonetic/Cologne.ts