cmpstr 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -499
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -912
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -204
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
@@ -0,0 +1,4863 @@
1
+ /**
2
+ * CmpStr v3.0.0 dev-1a82e20-250612
3
+ * This is a lightweight, fast and well performing library for calculating string similarity.
4
+ * (c) 2023-2025 Paul Köhler @komed3 / MIT License
5
+ * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
6
+ */
7
+ /**
8
+ * Deep Merge Utility
9
+ * src/utils/DeepMerge.ts
10
+ *
11
+ * This module provides utility functions for deep merging objects, getting values by path,
12
+ * and setting values by path in a deeply nested object structure.
13
+ *
14
+ * It supports dot and bracket notation (e.g. `a.b[0].c`) as well as escaped keys.
15
+ *
16
+ * Included functions:
17
+ * - `get`: Retrieve a deeply nested value by path
18
+ * - `set`: Assign a value to a nested path
19
+ * - `merge`: Deeply merge two objects
20
+ * - `has`: Check whether a path exists
21
+ * - `rmv`: Delete a value at a path
22
+ *
23
+ * @module Utils/DeepMerge
24
+ * @author Paul Köhler
25
+ * @license MIT
26
+ */
27
+ /**
28
+ * Parse a path string into an array of keys.
29
+ *
30
+ * @param {string} p - The path string, e.g. `a.b.c` or `a[0].b`
31
+ * @returns {(string|number)[]} - An array of keys, e.g. `['a', 'b', 'c']` or `['a', 0, 'b']`
32
+ */
33
+ const parse = (p) => (p.replace(/\[(\d+)]/g, '.$1').split('.').map(s => /^\d+$/.test(s) ? +s : s));
34
+ /**
35
+ * Deeply get a value from an object by a path string.
36
+ *
37
+ * @template T - The type of the object to get the value from
38
+ * @param {T} t - The object to get the value from
39
+ * @param {string} path - The path string, e.g. `a.b.c`
40
+ * @param {any} fallback - The default value to return if the path does not exist
41
+ * @returns {T|R|undefined} - The value at the specified path, otherwise the default value
42
+ */
43
+ function get(t, path, fallback) {
44
+ return parse(path).reduce((o, k) => o?.[k] ?? fallback, t);
45
+ }
46
+ /**
47
+ * Deeply set a value in an object by a path string.
48
+ *
49
+ * @template T - The type of the object to get the value from
50
+ * @param {T} t - The object to set the value in
51
+ * @param {string} path - The path string, e.g. `a.b.c`
52
+ * @param {any} value - The value to set at the specified path
53
+ * @returns {T} - The modified object with the value set at the specified path
54
+ * @throws {Error} - Throws an error if the key is not a valid identifier
55
+ */
56
+ function set(t, path, value) {
57
+ // If the path is empty, return the value
58
+ if (path === '')
59
+ return value;
60
+ // Split the path into the first key and the rest of the path
61
+ const [k, ...r] = parse(path);
62
+ // Throw an error if the key is not a valid identifier
63
+ if (t !== undefined && (typeof t !== 'object' || t === null))
64
+ throw Error(`cannot set property <${k}> of <${JSON.stringify(t)}>`);
65
+ // Assign the value to the specified key in the object
66
+ return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
67
+ [k]: set(t?.[k], r.join('.'), value)
68
+ });
69
+ }
70
+ /**
71
+ * Deeply merge two objects, where the second object overrides the first.
72
+ *
73
+ * @template T - The type of the object to get the value from
74
+ * @param {T} t - The target object to merge into
75
+ * @param {T} o - The source object to merge from
76
+ * @param {boolean} [mergeUndefined=false] - Whether to merge undefined values
77
+ * @returns {T} - The merged object
78
+ */
79
+ function merge(t = Object.create(null), o = Object.create(null), mergeUndefined = false) {
80
+ // Iterate over the keys of the source object and merge them into the target object
81
+ return Object.keys(o).forEach(k => {
82
+ const val = o[k];
83
+ // If the value is undefined and mergeUndefined is false, skip it
84
+ if (!mergeUndefined && val === undefined)
85
+ return;
86
+ // Skip dangerous property names to prevent prototype pollution
87
+ if (k === '__proto__' || k === 'constructor')
88
+ return;
89
+ // If the value is an object and not an array, recursively merge it
90
+ t[k] = typeof val === 'object' && !Array.isArray(val)
91
+ ? merge(typeof t[k] === 'object' && !Array.isArray(t[k])
92
+ ? t[k] : Object.create(null), val)
93
+ : val;
94
+ }), t;
95
+ }
96
+ /**
97
+ * Delete a value at a specified path in an object.
98
+ *
99
+ * @template T - The type of the object to get the value from
100
+ * @param {T} t - The object to delete the value from
101
+ * @param {string} path - The path string, e.g. `a.b.c`
102
+ * @param {boolean} [preserveEmpty=false] - Whether to preserve empty objects/arrays
103
+ * @returns {T} - The modified object with the value deleted at the specified path
104
+ */
105
+ function rmv(t, path, preserveEmpty = false) {
106
+ const r = (o, k, i = 0) => {
107
+ const key = k[i];
108
+ // Delete the key if it is not an object or if it is the last key in the path
109
+ if (!o || typeof o !== 'object')
110
+ return false;
111
+ if (i === k.length - 1)
112
+ return delete o[key];
113
+ if (!r(o[key], k, i + 1))
114
+ return false;
115
+ // If preserveEmpty is false, check if the object or array is empty
116
+ if (!preserveEmpty) {
117
+ const val = o[key];
118
+ // If the value is an empty array or object, delete the key
119
+ if (typeof val === 'object' && ((Array.isArray(val) && val.every(v => v == null)) ||
120
+ (!Array.isArray(val) && Object.keys(val).length === 0)))
121
+ delete o[key];
122
+ }
123
+ return true;
124
+ };
125
+ r(t, parse(path));
126
+ return t;
127
+ }
128
+
129
+ /**
130
+ * Profiler Utility
131
+ * src/utils/profiler.ts
132
+ *
133
+ * @see https://en.wikipedia.org/wiki/Profiling_(computer_programming)
134
+ *
135
+ * This class provides methods to run synchronous and asynchronous functions, capturing
136
+ * their execution time and memory usage, and storing the results in a set of profiler
137
+ * entries. It supports both Node.js and browser environments, detecting the environment
138
+ * automatically.
139
+ *
140
+ * The class is optimized for minimal overhead and can be used for fine-grained
141
+ * performance profiling.
142
+ *
143
+ * @module Utils/Profiler
144
+ * @author Paul Köhler (komed3)
145
+ * @license MIT
146
+ */
147
+ /**
148
+ * Profiler class for measuring execution time and memory usage of functions.
149
+ */
150
+ class Profiler {
151
+ // Environment detection
152
+ static ENV;
153
+ // Singleton instance
154
+ static instance;
155
+ // Store for profiler entries
156
+ store = new Set();
157
+ // Total time and memory consumption
158
+ totalTime = 0;
159
+ totalMem = 0;
160
+ // The Profiler active state
161
+ active;
162
+ /**
163
+ * Sets the environment based on the available global objects.
164
+ * Detects if running in Node.js or browser and sets the ENV property accordingly.
165
+ */
166
+ static detectEnv() {
167
+ // Check for Node.js environment
168
+ if (typeof process !== 'undefined')
169
+ Profiler.ENV = 'nodejs';
170
+ // Check for browser environment
171
+ else if (typeof performance !== 'undefined')
172
+ Profiler.ENV = 'browser';
173
+ // If neither, set ENV to unknown
174
+ else
175
+ Profiler.ENV = 'unknown';
176
+ }
177
+ /**
178
+ * Returns the singleton instance of the Perf class.
179
+ * If the instance does not exist, it creates a new one.
180
+ *
181
+ * @param {boolean} [enable=false] - Optional parameter to enable the profiler upon instantiation
182
+ * @returns {Profiler} - Singleton Profiler instance
183
+ */
184
+ static getInstance(enable) {
185
+ // Ensure the environment is detected
186
+ if (!Profiler.ENV)
187
+ Profiler.detectEnv();
188
+ // If instance does not exist, create a new one
189
+ if (!Profiler.instance)
190
+ Profiler.instance = new Profiler(enable);
191
+ // Return singleton instance
192
+ return Profiler.instance;
193
+ }
194
+ /**
195
+ * Private constructor to enforce singleton pattern.
196
+ * Initializes the store for profiler entries.
197
+ *
198
+ * @param {boolean} [enable=false] - Optional parameter to enable the profiler
199
+ */
200
+ constructor(enable) { this.active = enable ?? false; }
201
+ /**
202
+ * Gets the current time based on the environment.
203
+ *
204
+ * Uses process.hrtime.bigint() for Node.js, performance.now() for browsers,
205
+ * and Date.now() as a fallback.
206
+ *
207
+ * @returns {number} - Current time in milliseconds
208
+ */
209
+ now() {
210
+ switch (Profiler.ENV) {
211
+ // Node.js environment
212
+ case 'nodejs': return Number(process.hrtime.bigint()) / 1e6;
213
+ // Browser environment
214
+ case 'browser': return performance.now();
215
+ // Fallback
216
+ default: return Date.now();
217
+ }
218
+ }
219
+ /**
220
+ * Gets the current memory usage based on the environment.
221
+ *
222
+ * Uses process.memoryUsage().heapUsed for Node.js, performance.memory.usedJSHeapSize
223
+ * for browsers, and returns 0 as a fallback.
224
+ *
225
+ * @returns {number} - Current memory usage in bytes
226
+ */
227
+ mem() {
228
+ switch (Profiler.ENV) {
229
+ // Node.js environment
230
+ case 'nodejs': return process.memoryUsage().heapUsed;
231
+ // Browser environment
232
+ case 'browser': return performance.memory?.usedJSHeapSize ?? 0;
233
+ // Fallback
234
+ default: return 0;
235
+ }
236
+ }
237
+ /**
238
+ * Enables the profiler.
239
+ * Sets the active state to true, allowing profiling to occur.
240
+ */
241
+ enable() { this.active = true; }
242
+ /**
243
+ * Disables the profiler.
244
+ * Sets the active state to false, preventing further profiling.
245
+ */
246
+ disable() { this.active = false; }
247
+ /**
248
+ * Resets the profiler by clearing the store, total time and memory consumption.
249
+ * This method is useful for starting a new profiling session.
250
+ */
251
+ clear() {
252
+ this.store.clear();
253
+ this.totalTime = 0;
254
+ this.totalMem = 0;
255
+ }
256
+ /**
257
+ * Runs a synchronous function and profiles its execution time and memory usage.
258
+ * If the profiler is not active, it simply executes the function without profiling.
259
+ *
260
+ * @param {() => T} fn - Function to be executed and profiled
261
+ * @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
262
+ * @returns {T} - The result of the executed function
263
+ */
264
+ run(fn, meta = {}) {
265
+ // If the profiler is not active, simply execute the function without profiling
266
+ if (!this.active)
267
+ return fn();
268
+ // Capture the start time and memory usage
269
+ const startTime = this.now(), startMem = this.mem();
270
+ // Execute the function and capture the result
271
+ const res = fn();
272
+ // Calculate the time and memory consumption
273
+ const deltaTime = this.now() - startTime;
274
+ const deltaMem = this.mem() - startMem;
275
+ // Add the profiling entry to the store
276
+ this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
277
+ this.totalTime += deltaTime, this.totalMem += deltaMem;
278
+ // Return the result of the function
279
+ return res;
280
+ }
281
+ /**
282
+ * Runs an asynchronous function and profiles its execution time and memory usage.
283
+ * If the profiler is not active, it simply executes the function without profiling.
284
+ *
285
+ * @param {() => Promise<T>} fn - Asynchronous function to be executed and profiled
286
+ * @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
287
+ * @returns {Promise<T>} - A promise that resolves to the result of the executed function
288
+ */
289
+ async runAsync(fn, meta = {}) {
290
+ // If the profiler is not active, simply execute the function without profiling
291
+ if (!this.active)
292
+ return await fn();
293
+ // Capture the start time and memory usage
294
+ const startTime = this.now(), startMem = this.mem();
295
+ // Execute the asynchronous function and wait for its result
296
+ const res = await fn();
297
+ // Calculate the time and memory consumption
298
+ const deltaTime = this.now() - startTime;
299
+ const deltaMem = this.mem() - startMem;
300
+ // Add the profiling entry to the store
301
+ this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
302
+ this.totalTime += deltaTime, this.totalMem += deltaMem;
303
+ // Return the result of the function
304
+ return res;
305
+ }
306
+ /**
307
+ * Retrieves all profiler entries stored in the profiler.
308
+ *
309
+ * @returns {ProfilerEntry<any>[]} - An array of profiler entries
310
+ */
311
+ getAll() { return [...this.store]; }
312
+ /**
313
+ * Retrieves the last profiler entry stored in the profiler.
314
+ *
315
+ * @returns {ProfilerEntry<any> | undefined} - The last profiler entry or undefined if no entries exist
316
+ */
317
+ getLast() { return this.getAll().pop(); }
318
+ /**
319
+ * Retrieves the total time and memory consumption recorded by the profiler.
320
+ *
321
+ * @returns {{ time: number, mem: number }} - An object containing total time and memory usage
322
+ */
323
+ getTotal() {
324
+ return {
325
+ time: this.totalTime, mem: this.totalMem
326
+ };
327
+ }
328
+ /**
329
+ * Returns the services provided by the Profiler class.
330
+ * This allows for easy access to the profiler's methods.
331
+ *
332
+ * @returns {ProfilerService<any>} - An object containing methods to control the profiler
333
+ */
334
+ services = {
335
+ enable: this.enable.bind(this),
336
+ disable: this.disable.bind(this),
337
+ clear: this.clear.bind(this),
338
+ report: this.getAll.bind(this),
339
+ last: this.getLast.bind(this),
340
+ total: this.getTotal.bind(this)
341
+ };
342
+ }
343
+
344
+ /**
345
+ * TextAnalyzer Utility
346
+ * src/utils/TextAnalyzer.ts
347
+ *
348
+ * The TextAnalyzer class provides a comprehensive set of methods for analyzing and
349
+ * extracting statistics from a given text. It supports word and sentence tokenization,
350
+ * character and word frequency analysis, syllable estimation, readability metrics
351
+ * (Flesch, Kincaid, LIX, WSTF), and various ratios and histograms. Designed for
352
+ * efficiency and flexibility, it is suitable for linguistic research, readability
353
+ * scoring, and text preprocessing tasks.
354
+ *
355
+ * @module Utils/TextAnalyzer
356
+ * @author Paul Köhler (komed3)
357
+ * @license MIT
358
+ */
359
+ class TextAnalyzer {
360
+ // The original text to analyze
361
+ text;
362
+ // Tokenized words and sentences
363
+ words = [];
364
+ sentences = [];
365
+ // Frequency maps for characters and words
366
+ charFrequency = new Map();
367
+ wordHistogram = new Map();
368
+ syllableCache = new Map();
369
+ /**
370
+ * Constructs a new TextAnalyzer instance with the provided input text.
371
+ *
372
+ * @param {string} input - The text to analyze
373
+ */
374
+ constructor(input) {
375
+ this.text = input.trim();
376
+ this.tokenize();
377
+ this.computeFrequencies();
378
+ }
379
+ /**
380
+ * Tokenizes the input text into words and sentences.
381
+ */
382
+ tokenize() {
383
+ this.words = [], this.sentences = [];
384
+ const text = this.text;
385
+ const wordRegex = /\p{L}+/gu;
386
+ let match;
387
+ // Tokenize words using Unicode property escapes for letters
388
+ while ((match = wordRegex.exec(text)) !== null) {
389
+ this.words.push(match[0].toLowerCase());
390
+ }
391
+ // Tokenize sentences using punctuation marks as delimiters
392
+ this.sentences = text.split(/(?<=[.!?])\s+/).filter(Boolean);
393
+ }
394
+ /**
395
+ * Computes character and word frequencies from the tokenized text.
396
+ */
397
+ computeFrequencies() {
398
+ // Compute character frequencies
399
+ for (const char of this.text)
400
+ this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
401
+ // Compute word frequencies
402
+ for (const word of this.words)
403
+ this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
404
+ }
405
+ /**
406
+ * Estimates the number of syllables in a word using a simple heuristic.
407
+ *
408
+ * @param {string} word - The word to estimate syllables for
409
+ * @returns {number} - Estimated syllable count
410
+ */
411
+ estimateSyllables(word) {
412
+ // Check cache first to avoid redundant calculations
413
+ if (this.syllableCache.has(word))
414
+ return this.syllableCache.get(word);
415
+ // Normalize the word: lowercase and remove non-letter characters
416
+ const clean = word.toLowerCase().replace(/[^a-zäöüß]/g, '');
417
+ const matches = clean.match(/[aeiouyäöü]+/g);
418
+ // Count syllables based on vowel groups
419
+ const count = matches ? matches.length : 1;
420
+ this.syllableCache.set(word, count);
421
+ return count;
422
+ }
423
+ /**
424
+ * Gets the original text length in characters.
425
+ *
426
+ * @return {number} - Length of the text
427
+ */
428
+ getLength() { return this.text.length; }
429
+ /**
430
+ * Gets the number of words in the text.
431
+ *
432
+ * @return {number} - Count of words
433
+ */
434
+ getWordCount() { return this.words.length; }
435
+ /**
436
+ * Gets the number of sentences in the text.
437
+ *
438
+ * @return {number} - Count of sentences
439
+ */
440
+ getSentenceCount() { return this.sentences.length; }
441
+ /**
442
+ * Gets the average word length in the text.
443
+ *
444
+ * @return {number} - Average length of words
445
+ */
446
+ getAvgWordLength() {
447
+ let totalLen = 0;
448
+ for (const w of this.words)
449
+ totalLen += w.length;
450
+ return this.words.length ? totalLen / this.words.length : 0;
451
+ }
452
+ /**
453
+ * Gets the average sentence length in words.
454
+ *
455
+ * @return {number} - Average length of sentences
456
+ */
457
+ getAvgSentenceLength() {
458
+ return this.sentences.length ? this.words.length / this.sentences.length : 0;
459
+ }
460
+ /**
461
+ * Gets a histogram of word frequencies in the text.
462
+ *
463
+ * @returns {Record<string, number>} - A histogram of word frequencies
464
+ */
465
+ getWordHistogram() {
466
+ return Object.fromEntries(this.wordHistogram);
467
+ }
468
+ /**
469
+ * Gets the most common words in the text, limited to a specified number.
470
+ *
471
+ * @param {number} [limit=5] - Maximum number of common words to return
472
+ * @returns {string[]} - Array of the most common words
473
+ */
474
+ getMostCommonWords(limit = 5) {
475
+ return [...this.wordHistogram.entries()]
476
+ .sort((a, b) => b[1] - a[1])
477
+ .slice(0, limit).map(e => e[0]);
478
+ }
479
+ /**
480
+ * Gets the least common words (hapax legomena) in the text.
481
+ *
482
+ * Hapax legomena are words that occur only once in the text.
483
+ *
484
+ * @returns {string[]} - Array of hapax legomena
485
+ */
486
+ getHapaxLegomena() {
487
+ return [...this.wordHistogram.entries()]
488
+ .filter(([, c]) => c === 1)
489
+ .map(e => e[0]);
490
+ }
491
+ /**
492
+ * Checks if the text contains any numbers.
493
+ *
494
+ * @returns {boolean} - True if numbers are present, false otherwise
495
+ */
496
+ hasNumbers() { return /\d/.test(this.text); }
497
+ /**
498
+ * Calculates the ratio of uppercase letters to total letters in the text.
499
+ *
500
+ * @return {number} - Ratio of uppercase letters to total letters
501
+ */
502
+ getUpperCaseRatio() {
503
+ let upper = 0, letters = 0;
504
+ for (let i = 0, len = this.text.length; i < len; i++) {
505
+ const c = this.text[i];
506
+ if (/[A-Za-zÄÖÜäöüß]/.test(c)) {
507
+ letters++;
508
+ if (/[A-ZÄÖÜ]/.test(c))
509
+ upper++;
510
+ }
511
+ }
512
+ return letters ? upper / letters : 0;
513
+ }
514
+ /**
515
+ * Gets the frequency of each character in the text.
516
+ *
517
+ * @returns {Record<string, number>} - A record of character frequencies
518
+ */
519
+ getCharFrequency() {
520
+ return Object.fromEntries(this.charFrequency);
521
+ }
522
+ /**
523
+ * Gets the frequency of each Unicode block in the text.
524
+ *
525
+ * @returns {Record<string, number>} - A record of Unicode block frequencies
526
+ */
527
+ getUnicodeStats() {
528
+ const result = {};
529
+ for (const [char, count] of this.charFrequency) {
530
+ // Get the Unicode block for the character
531
+ const block = char
532
+ .charCodeAt(0).toString(16)
533
+ .padStart(4, '0').toUpperCase();
534
+ // Increment the count for the block
535
+ result[block] = (result[block] ?? 0) + count;
536
+ }
537
+ return result;
538
+ }
539
+ /**
540
+ * Gets the ratio of long words (words with length >= len) to total words.
541
+ *
542
+ * @param {number} [len=7] - Minimum length for a word to be considered long
543
+ * @returns {number} - Ratio of long words to total words
544
+ */
545
+ getLongWordRatio(len = 7) {
546
+ let long = 0;
547
+ for (const w of this.words)
548
+ if (w.length >= len)
549
+ long++;
550
+ return this.words.length ? long / this.words.length : 0;
551
+ }
552
+ /**
553
+ * Gets the ratio of short words (words with length <= len) to total words.
554
+ *
555
+ * @param {number} [len=3] - Maximum length for a word to be considered short
556
+ * @returns {number} - Ratio of short words to total words
557
+ */
558
+ getShortWordRatio(len = 3) {
559
+ let short = 0;
560
+ for (const w of this.words)
561
+ if (w.length <= len)
562
+ short++;
563
+ return this.words.length ? short / this.words.length : 0;
564
+ }
565
+ /**
566
+ * Estimates the number of syllables in the text.
567
+ *
568
+ * @returns {number} - Total estimated syllable count
569
+ */
570
+ getSyllablesCount() {
571
+ let count = 0;
572
+ for (const w of this.words)
573
+ count += this.estimateSyllables(w);
574
+ return count;
575
+ }
576
+ /**
577
+ * Gets the number of monosyllabic words (words with exactly one syllable).
578
+ *
579
+ * @returns {number} - Count of monosyllabic words
580
+ */
581
+ getMonosyllabicWordCount() {
582
+ let count = 0;
583
+ for (const w of this.words)
584
+ if (this.estimateSyllables(w) === 1)
585
+ count++;
586
+ return count;
587
+ }
588
+ /**
589
+ * Gets the number of words with at least a specified minimum syllable count.
590
+ *
591
+ * @param {number} min - Minimum syllable count for a word to be included
592
+ * @returns {number} - Count of words meeting the syllable criteria
593
+ */
594
+ getMinSyllablesWordCount(min) {
595
+ let count = 0;
596
+ for (const w of this.words)
597
+ if (this.estimateSyllables(w) >= min)
598
+ count++;
599
+ return count;
600
+ }
601
+ /**
602
+ * Gets the number of words with at most a specified maximum syllable count.
603
+ *
604
+ * @param {number} max - Maximum syllable count for a word to be included
605
+ * @returns {number} - Count of words meeting the syllable criteria
606
+ */
607
+ getMaxSyllablesWordCount(max) {
608
+ let count = 0;
609
+ for (const w of this.words)
610
+ if (this.estimateSyllables(w) <= max)
611
+ count++;
612
+ return count;
613
+ }
614
+ /**
615
+ * Calculates the Honore's R statistic for the text as a measure of lexical richness.
616
+ *
617
+ * @returns {number} - The Honore's R statistic
618
+ */
619
+ getHonoresR() {
620
+ return (100 * Math.log(this.words.length)) / (1 - (this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1)));
621
+ }
622
+ /**
623
+ * Estimates the reading time for the text based on words per minute (WPM).
624
+ *
625
+ * @param {number} [wpm=200] - Words per minute for the calculation
626
+ * @returns {number} - Estimated reading time in minutes
627
+ */
628
+ getReadingTime(wpm = 200) {
629
+ return Math.max(1, this.words.length / (wpm ?? 1));
630
+ }
631
+ /**
632
+ * Calculates various readability scores based on the text.
633
+ *
634
+ * This method supports multiple readability metrics:
635
+ * - Flesch Reading Ease
636
+ * - Flesch-Kincaid Grade Level
637
+ *
638
+ * @param {'flesch'|'fleschde'|'kincaid'} [metric='flesch'] - The readability metric to calculate
639
+ * @returns {number} - The calculated readability score
640
+ */
641
+ getReadabilityScore(metric = 'flesch') {
642
+ const w = this.words.length || 1;
643
+ const s = this.sentences.length || 1;
644
+ const y = this.getSyllablesCount() || 1;
645
+ const asl = w / s;
646
+ const asw = y / w;
647
+ switch (metric) {
648
+ // Flesch Reading Ease formula
649
+ case 'flesch': return 206.835 - (1.015 * asl) - (84.6 * asw);
650
+ // Flesch Reading Ease formula for German texts
651
+ case 'fleschde': return 180 - asl - (58.5 * asw);
652
+ // Flesch-Kincaid Grade Level formula
653
+ case 'kincaid': return (0.39 * asl) + (11.8 * asw) - 15.59;
654
+ }
655
+ }
656
+ /**
657
+ * Calculates the LIX (Lesbarhetsindex) score for the text.
658
+ *
659
+ * The LIX score is a readability index that combines average word length and sentence length.
660
+ *
661
+ * @returns {number} - The LIX score
662
+ */
663
+ getLIXScore() {
664
+ const w = this.words.length || 1;
665
+ const s = this.sentences.length || 1;
666
+ const l = this.getLongWordRatio() * w;
667
+ return (w / s) + (l / w * 100);
668
+ }
669
+ /**
670
+ * Calculates the Wiener Sachtextformel (WSTF) scores for the text.
671
+ *
672
+ * The WSTF scores are a set of readability metrics based on word and sentence characteristics.
673
+ *
674
+ * @returns {[number, number, number, number]} - An array of WSTF scores
675
+ */
676
+ getWSTFScore() {
677
+ const w = this.words.length || 1;
678
+ const h = this.getMinSyllablesWordCount(3) / w * 100;
679
+ const s = this.getAvgSentenceLength();
680
+ const l = this.getLongWordRatio() * 100;
681
+ const m = this.getMonosyllabicWordCount() / w * 100;
682
+ return [
683
+ 0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.8750,
684
+ 0.2007 * h + 0.1682 * s + 0.1373 * l - 2.7790,
685
+ 0.2963 * h + 0.1905 * s - 1.1144,
686
+ 0.2744 * h + 0.2656 * s - 1.6930
687
+ ];
688
+ }
689
+ }
690
+
691
+ /**
692
+ * DiffChecker Utility
693
+ * src/utils/DiffChecker.ts
694
+ *
695
+ * The DiffChecker class provides a robust and efficient utility for comparing two
696
+ * texts and extracting their differences (full lines or word mode). It supports
697
+ * context-aware grouping of changes, unified diff output (with CLI color or ASCII
698
+ * markup), and detailed change magnitude metrics. The class is highly configurable,
699
+ * allowing users to choose the diff granularity, case sensitivity, context lines,
700
+ * grouping, and output style. It is suitable for text comparison, code review
701
+ * tools, document versioning, and any application requiring precise and human-
702
+ * readable difference reporting.
703
+ *
704
+ * Features:
705
+ * - Line and word-based diffing
706
+ * - Case-insensitive comparison option
707
+ * - Context lines and grouping of adjacent changes
708
+ * - Unified diff output (ASCII or colored CLI)
709
+ * - Highlighting of changed segments within lines
710
+ * - Change magnitude calculation (relative to group or line)
711
+ * - Expand-all mode for full file context
712
+ *
713
+ * @module Utils/DiffChecker
714
+ * @author Paul Köhler (komed3)
715
+ * @license MIT
716
+ */
717
+ /**
718
+ * The DiffChecker class provides methods to compare two texts and generate
719
+ * structured diffs, grouped diffs, and unified diff outputs.
720
+ */
721
+ class DiffChecker {
722
+ // Original input texts and options
723
+ a;
724
+ b;
725
+ options;
726
+ // Computed diff entries and groups
727
+ entries = [];
728
+ grouped = [];
729
+ // Flag to indicate if the diff has already been computed
730
+ diffRun = false;
731
+ /**
732
+ * Constructs a new DiffChecker instance for comparing two texts.
733
+ *
734
+ * @param {string} a - The first (original) text
735
+ * @param {string} b - The second (modified) text
736
+ * @param {DiffOptions} [opt] - Optional diff configuration
737
+ */
738
+ constructor(a, b, opt = {}) {
739
+ // Set the two texts to compare
740
+ this.a = a, this.b = b;
741
+ // Merge default with user-provided options
742
+ this.options = { ...{
743
+ mode: 'word',
744
+ caseInsensitive: false,
745
+ contextLines: 1,
746
+ groupedLines: true,
747
+ expandLines: false,
748
+ showChangeMagnitude: true,
749
+ maxMagnitudeSymbols: 5,
750
+ lineBreak: '\n'
751
+ }, ...opt };
752
+ // Run the diff computation immediately
753
+ this.computeDiff();
754
+ }
755
+ /**
756
+ * Splits both input texts into arrays of lines and returns them
757
+ * with the maximum line count.
758
+ *
759
+ * @returns { linesA: string[], linesB: string[], maxLen: number }
760
+ */
761
+ text2lines() {
762
+ // Trim and split the input texts into lines
763
+ const linesA = this.a.trim().split(/\r?\n/);
764
+ const linesB = this.b.trim().split(/\r?\n/);
765
+ return { linesA, linesB, maxLen: Math.max(linesA.length, linesB.length) };
766
+ }
767
+ /**
768
+ * Tokenizes a string according to the current diff mode (line or word).
769
+ *
770
+ * @param {string} input - The string to tokenize
771
+ * @returns {string[]} - Array of tokens
772
+ */
773
+ tokenize(input) {
774
+ const { mode } = this.options;
775
+ switch (mode) {
776
+ // Tokenize by lines
777
+ case 'line': return [input];
778
+ // Tokenize by words
779
+ case 'word': return input.split(/\s+/);
780
+ }
781
+ }
782
+ /**
783
+ * Concatenates an array of tokens back into a string, respecting the diff mode.
784
+ *
785
+ * @param {string[]} input - Array of tokens
786
+ * @returns {string} - Concatenated string
787
+ */
788
+ concat(input) {
789
+ const { mode } = this.options;
790
+ return input.join(mode === 'word' ? ' ' : '');
791
+ }
792
+ /**
793
+ * Computes the diff between the two input texts and populates the
794
+ * entries and grouped arrays.
795
+ */
796
+ computeDiff() {
797
+ if (!this.diffRun) {
798
+ // Get the lines from both texts
799
+ const { linesA, linesB, maxLen } = this.text2lines();
800
+ // Loop through each line and compare them
801
+ for (let i = 0; i < maxLen; i++) {
802
+ const a = linesA[i] || '';
803
+ const b = linesB[i] || '';
804
+ // Perform line diffing
805
+ this.lineDiff(a, b, i);
806
+ }
807
+ // Find groups of adjacent changes
808
+ this.findGroups();
809
+ // Set the diff run flag to true
810
+ this.diffRun = true;
811
+ }
812
+ }
813
+ /**
814
+ * Compares two lines and records their differences at the configured granularity.
815
+ *
816
+ * @param {string} a - Line from the first text
817
+ * @param {string} b - Line from the second text
818
+ * @param {number} line - Line number
819
+ */
820
+ lineDiff(a, b, line) {
821
+ const { mode, caseInsensitive } = this.options;
822
+ const baseLen = Math.max(a.length, b.length);
823
+ let A = a, B = b;
824
+ // If case-insensitive mode is enabled, convert both lines to lowercase
825
+ if (caseInsensitive)
826
+ A = a.toLowerCase(), B = b.toLowerCase();
827
+ let diffs = [];
828
+ let delSize = 0, insSize = 0;
829
+ if (mode === 'line') {
830
+ // For line mode, compare the entire lines directly
831
+ if (A !== B) {
832
+ diffs.push({
833
+ posA: 0, posB: 0,
834
+ del: a, ins: b,
835
+ size: b.length - a.length
836
+ });
837
+ delSize = a.length;
838
+ insSize = b.length;
839
+ }
840
+ }
841
+ else {
842
+ // For word mode, find precise diffs between tokenized lines
843
+ diffs = this.preciseDiff(a, A, b, B);
844
+ // Calculate total sizes of deletions and insertions
845
+ for (const d of diffs)
846
+ delSize += d.del.length, insSize += d.ins.length;
847
+ }
848
+ if (diffs.length) {
849
+ // Add the diff entry for this line
850
+ this.entries.push({
851
+ line, diffs, delSize, insSize, baseLen,
852
+ totalSize: insSize - delSize,
853
+ magnitude: this.magnitude(delSize, insSize, baseLen)
854
+ });
855
+ }
856
+ }
857
+ /**
858
+ * Finds all minimal diff blocks between two tokenized strings,
859
+ * returning original text and positions.
860
+ *
861
+ * @param {string} a - Original line (case preserved)
862
+ * @param {string} A - Original line (possibly lowercased)
863
+ * @param {string} b - Modified line (case preserved)
864
+ * @param {string} B - Modified line (possibly lowercased)
865
+ * @returns {DiffEntry[]} - Array of diff entries for this line
866
+ */
867
+ preciseDiff(a, A, b, B) {
868
+ // Helper function to calculate positions of tokens in the original text
869
+ const posIndex = (t) => t.reduce((p, _, i) => (p.push(i ? p[i - 1] + t[i - 1].length + 1 : 0), p), []);
870
+ // Original and tokenized arrays, their lengths and position arrays
871
+ const origA = this.tokenize(a);
872
+ const origB = this.tokenize(b);
873
+ const tokenA = this.tokenize(A);
874
+ const tokenB = this.tokenize(B);
875
+ const lenA = tokenA.length;
876
+ const lenB = tokenB.length;
877
+ const posArrA = posIndex(origA);
878
+ const posArrB = posIndex(origB);
879
+ // Find all matching blocks (LCS)
880
+ const matches = [];
881
+ let ai = 0, bi = 0;
882
+ while (ai < lenA && bi < lenB) {
883
+ // If tokens match, find the length of the match
884
+ if (tokenA[ai] === tokenB[bi]) {
885
+ let len = 1;
886
+ // Extend the match as long as tokens continue to match
887
+ while (ai + len < lenA && bi + len < lenB &&
888
+ tokenA[ai + len] === tokenB[bi + len])
889
+ len++;
890
+ matches.push({ ai, bi, len });
891
+ ai += len, bi += len;
892
+ }
893
+ else {
894
+ let found = false;
895
+ // Look ahead for next sync point (greedy, but avoids long tails)
896
+ for (let offset = 1; offset <= 3 && !found; offset++) {
897
+ // Check if the next token in A matches the current token in B
898
+ if (ai + offset < lenA && tokenA[ai + offset] === tokenB[bi]) {
899
+ matches.push({ ai: ai + offset, bi, len: 1 });
900
+ ai += offset + 1, bi += 1, found = true;
901
+ }
902
+ // Check if the next token in B matches the current token in A
903
+ else if (bi + offset < lenB && tokenA[ai] === tokenB[bi + offset]) {
904
+ matches.push({ ai, bi: bi + offset, len: 1 });
905
+ ai += 1, bi += offset + 1, found = true;
906
+ }
907
+ }
908
+ // If no match was found, advance both pointers by one
909
+ if (!found)
910
+ ai++, bi++;
911
+ }
912
+ }
913
+ // Walk through tokens and emit diffs between matches
914
+ const diffs = [];
915
+ let i = 0, j = 0;
916
+ for (const m of matches) {
917
+ // If there are unmatched tokens before the match, record them
918
+ if (i < m.ai || j < m.bi) {
919
+ // Slice the original arrays to get the unmatched tokens
920
+ const delArr = origA.slice(i, m.ai);
921
+ const insArr = origB.slice(j, m.bi);
922
+ // Push the diff entry for unmatched tokens
923
+ diffs.push({
924
+ posA: posArrA[i] ?? 0,
925
+ posB: posArrB[j] ?? 0,
926
+ del: this.concat(delArr),
927
+ ins: this.concat(insArr),
928
+ size: insArr.join('').length - delArr.join('').length
929
+ });
930
+ }
931
+ // Advance to after the match
932
+ i = m.ai + m.len, j = m.bi + m.len;
933
+ }
934
+ // Tail diffs after the last match
935
+ if (i < lenA || j < lenB) {
936
+ // Slice the original arrays to get the unmatched tokens
937
+ const delArr = origA.slice(i);
938
+ const insArr = origB.slice(j);
939
+ // Push the diff entry for unmatched tokens at the end
940
+ diffs.push({
941
+ posA: posArrA[i] ?? 0,
942
+ posB: posArrB[j] ?? 0,
943
+ del: this.concat(delArr),
944
+ ins: this.concat(insArr),
945
+ size: insArr.join('').length - delArr.join('').length
946
+ });
947
+ }
948
+ // Remove empty diffs
949
+ return diffs.filter(d => d.del.length > 0 || d.ins.length > 0);
950
+ }
951
+ /**
952
+ * Groups adjacent changed lines together, including context lines,
953
+ * and calculates group metrics.
954
+ */
955
+ findGroups() {
956
+ const { contextLines } = this.options;
957
+ // Helper function to add a group to the grouped array
958
+ const addGroup = (group, start, end) => {
959
+ // Calculate total sizes and base length for the group
960
+ const [delSize, insSize, totalSize, baseLen] = [
961
+ 'delSize', 'insSize', 'totalSize', 'baseLen'
962
+ ].map(k => group.reduce((sum, e) => sum + e[k], 0));
963
+ // Push the group to the grouped array
964
+ this.grouped.push({
965
+ start, end, delSize, insSize, totalSize,
966
+ line: group[0].line, entries: group,
967
+ magnitude: this.magnitude(delSize, insSize, baseLen)
968
+ });
969
+ };
970
+ let group = [];
971
+ let start = 0, end = 0;
972
+ // Iterate through each diff entry to find groups
973
+ for (const entry of this.entries) {
974
+ const s = Math.max(0, entry.line - contextLines);
975
+ const e = entry.line + contextLines;
976
+ // If the group is empty or the current entry is adjacent to the last one
977
+ if (!group.length || s <= end + 1) {
978
+ // If this is the first entry, set the start position
979
+ if (!group.length)
980
+ start = s;
981
+ end = Math.max(end, e);
982
+ group.push(entry);
983
+ }
984
+ else {
985
+ // If the group is not empty, finalize it and start a new one
986
+ addGroup(group, start, end);
987
+ group = [entry], start = s, end = e;
988
+ }
989
+ }
990
+ // If there is a remaining group, finalize it
991
+ if (group.length)
992
+ addGroup(group, start, end);
993
+ }
994
+ /**
995
+ * Calculates the change magnitude string for a group or line.
996
+ *
997
+ * @param {number} del - Number of deleted characters
998
+ * @param {number} ins - Number of inserted characters
999
+ * @param {number} baseLen - Base length for normalization
1000
+ * @returns {string} - Magnitude string (e.g. "++-")
1001
+ */
1002
+ magnitude(del, ins, baseLen) {
1003
+ const { maxMagnitudeSymbols } = this.options;
1004
+ const total = del + ins;
1005
+ // If there are no changes or base length is zero, return empty string
1006
+ if (total === 0 || baseLen === 0)
1007
+ return '';
1008
+ // Calculate the length of the magnitude string based on the full length
1009
+ const magLen = Math.min(maxMagnitudeSymbols, Math.max(Math.round(total / baseLen * maxMagnitudeSymbols), 1));
1010
+ // Calculate the number of plus and minus symbols
1011
+ const plus = Math.round((ins / total) * magLen);
1012
+ const minus = magLen - plus;
1013
+ // Return the magnitude string with plus and minus symbols
1014
+ return '+'.repeat(plus) + '-'.repeat(minus);
1015
+ }
1016
+ /**
1017
+ * Generates a unified diff output as a string, with optional CLI coloring.
1018
+ *
1019
+ * @param {boolean} cli - If true, use CLI colors; otherwise, ASCII markup
1020
+ * @returns {string} - Unified diff output
1021
+ */
1022
+ output(cli) {
1023
+ const { mode, contextLines, groupedLines, expandLines, showChangeMagnitude, lineBreak } = this.options;
1024
+ // Get the lines and maximum length from the input texts
1025
+ const { linesA, linesB, maxLen } = this.text2lines();
1026
+ const linePad = Math.max(4, maxLen.toString().length);
1027
+ // Helper functions for coloring and formatting (ASCII or CLI colored)
1028
+ const highlight = (s, ansi) => cli ? `\x1b[${ansi}m${s}\x1b[0m` : s;
1029
+ const cy = (s) => highlight(s, '36');
1030
+ const gy = (s) => highlight(s, '90');
1031
+ const gn = (s) => highlight(s, '32');
1032
+ const rd = (s) => highlight(s, '31');
1033
+ const ye = (s) => highlight(s, '33');
1034
+ const del = (s) => cli ? `\x1b[37;41m${s}\x1b[31;49m` : `-[${s}]`;
1035
+ const ins = (s) => cli ? `\x1b[37;42m${s}\x1b[32;49m` : `+[${s}]`;
1036
+ // Function to output a block of lines with optional header
1037
+ const block = (start, end, forced, headerEntry) => {
1038
+ // If there is a header entry, output the header
1039
+ if (headerEntry)
1040
+ header(headerEntry);
1041
+ // Loop through the range and output lines
1042
+ for (let i = start; i <= end; i++)
1043
+ line(i, forced ?? i);
1044
+ out.push('');
1045
+ };
1046
+ // Function to output a header for a group or line
1047
+ const header = (e) => {
1048
+ out.push(`${(' '.repeat(linePad))} ${(cy(`@@ -${(e.line + 1)},${e.delSize} +${(e.line + 1)},${e.insSize} @@`))} ${(showChangeMagnitude ? ye(e.magnitude) : '')}`);
1049
+ };
1050
+ // Function to output a single line with optional diff highlighting
1051
+ const line = (i, forced) => {
1052
+ // If the line exists in either text, output it
1053
+ if (linesA[i] || linesB[i]) {
1054
+ // Find the diff entry for this line, if it exists
1055
+ const entry = this.entries.find(e => e.line === i);
1056
+ // Format the line number with padding
1057
+ const lineNo = (i + 1).toString().padStart(linePad, ' ');
1058
+ if (entry && forced === i) {
1059
+ // If there is an entry, output the line with diff highlighting
1060
+ out.push(`${lineNo} ${rd(`- ${mark(linesA[i], entry.diffs, 'del')}`)}`);
1061
+ out.push(`${' '.repeat(linePad)} ${gn(`+ ${mark(linesB[i], entry.diffs, 'ins')}`)}`);
1062
+ }
1063
+ else {
1064
+ // If no entry, just output the line without diff (context lines)
1065
+ out.push(`${lineNo} ${gy(linesA[i])}`);
1066
+ }
1067
+ }
1068
+ };
1069
+ // Function to mark changes in a line based on the diffs
1070
+ const mark = (line, diffs, type) => {
1071
+ // If there are no diffs or the mode is line, return the line as is
1072
+ if (!diffs.length || mode === 'line')
1073
+ return line;
1074
+ let res = '', idx = 0;
1075
+ // Loop through each diff entry and apply the changes
1076
+ for (const d of diffs) {
1077
+ // Get the position and value based on the type
1078
+ const pos = type === 'del' ? d.posA : d.posB;
1079
+ const val = type === 'del' ? d.del : d.ins;
1080
+ // If the value is empty, skip it
1081
+ if (!val)
1082
+ continue;
1083
+ // Add the unchanged part of the line before the change
1084
+ if (pos > idx)
1085
+ res += line.slice(idx, pos);
1086
+ // Add the changed part of the line with appropriate formatting
1087
+ res += (type === 'del' ? del(val) : ins(val));
1088
+ idx = pos + val.length;
1089
+ }
1090
+ // Return the marked line with any remaining unchanged part
1091
+ return res + line.slice(idx);
1092
+ };
1093
+ let out = [''];
1094
+ switch (true) {
1095
+ // For expandLines, output the entire file context
1096
+ case expandLines:
1097
+ block(0, maxLen);
1098
+ break;
1099
+ // For groupedLines, output each group with its start and end
1100
+ case groupedLines:
1101
+ for (const group of this.grouped)
1102
+ block(group.start, group.end, undefined, group);
1103
+ break;
1104
+ // For individual lines, output each entry with context lines
1105
+ default:
1106
+ for (const entry of this.entries)
1107
+ block(entry.line - contextLines, entry.line + contextLines, entry.line, entry);
1108
+ break;
1109
+ }
1110
+ // Output the final diff as a string (ASCII or CLI colored)
1111
+ return out.join(lineBreak);
1112
+ }
1113
+ /**
1114
+ * Returns the structured diff as an array of DiffLine objects.
1115
+ *
1116
+ * @returns {DiffLine[]} - Array of line-level diffs
1117
+ */
1118
+ getStructuredDiff() { return this.entries; }
1119
+ /**
1120
+ * Returns the grouped diff as an array of DiffGroup objects.
1121
+ *
1122
+ * @returns {DiffGroup[]} - Array of grouped diffs
1123
+ */
1124
+ getGroupedDiff() { return this.grouped; }
1125
+ /**
1126
+ * Returns the unified diff as a plain ASCII string.
1127
+ *
1128
+ * @returns {string} - Unified diff (ASCII)
1129
+ */
1130
+ getASCIIDiff() { return this.output(false); }
1131
+ /**
1132
+ * Returns the unified diff as a CLI-colored string.
1133
+ *
1134
+ * @returns {string} - Unified diff (CLI colors)
1135
+ */
1136
+ getCLIDiff() { return this.output(true); }
1137
+ }
1138
+
1139
+ /**
1140
+ * Hash Table Utility
1141
+ * src/utils/HashTable.ts
1142
+ *
1143
+ * @see https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
1144
+ * @see https://en.wikipedia.org/wiki/Hash_table
1145
+ *
1146
+ * This module implements an instantiable hash table/cache using the FNV-1a hash algorithm.
1147
+ * It allows for multiple independent caches (e.g. for metrics, normalization, etc.) with
1148
+ * type safety and high performance. The FNV-1a algorithm is factored out into its own
1149
+ * static utility class to avoid code duplication and memory overhead.
1150
+ *
1151
+ * The key() method supports any number of string arguments, enabling flexible cache keys
1152
+ * for different use cases (e.g. normalization, metrics, etc.).
1153
+ *
1154
+ * @module Utils/HashTable
1155
+ * @author Paul Köhler (komed3)
1156
+ * @license MIT
1157
+ */
1158
+ /**
1159
+ * Hasher Utility
1160
+ * Static class for FNV-1a hash calculation.
1161
+ */
1162
+ class Hasher {
1163
+ // Constants for the FNV-1a hash algorithm
1164
+ static FNV_PRIME = 0x01000193;
1165
+ static HASH_OFFSET = 0x811c9dc5;
1166
+ /**
1167
+ * Computes a hash value for a given string using the FNV-1a algorithm.
1168
+ * Processes the string in chunks of 4 characters for better performance.
1169
+ *
1170
+ * @param {string} str - The string to hash
1171
+ * @return {number} - The computed hash value as an unsigned 32-bit integer
1172
+ */
1173
+ static fnv1a(str) {
1174
+ const len = str.length;
1175
+ let hash = this.HASH_OFFSET;
1176
+ // Process 4 characters at a time for better performance
1177
+ const chunks = Math.floor(len / 4);
1178
+ for (let i = 0; i < chunks; i++) {
1179
+ const pos = i * 4;
1180
+ // Combine 4 chars into a single number for faster processing
1181
+ const chunk = ((str.charCodeAt(pos)) |
1182
+ (str.charCodeAt(pos + 1) << 8) |
1183
+ (str.charCodeAt(pos + 2) << 16) |
1184
+ (str.charCodeAt(pos + 3) << 24));
1185
+ hash ^= chunk;
1186
+ hash *= this.FNV_PRIME;
1187
+ }
1188
+ // Handle remaining characters
1189
+ const remaining = len % 4;
1190
+ if (remaining > 0) {
1191
+ const pos = chunks * 4;
1192
+ for (let i = 0; i < remaining; i++) {
1193
+ hash ^= str.charCodeAt(pos + i);
1194
+ hash *= this.FNV_PRIME;
1195
+ }
1196
+ }
1197
+ // Final mixing to improve distribution
1198
+ hash ^= hash >>> 16;
1199
+ hash *= 0x85ebca6b;
1200
+ hash ^= hash >>> 13;
1201
+ hash *= 0xc2b2ae35;
1202
+ hash ^= hash >>> 16;
1203
+ // Convert to unsigned 32-bit integer
1204
+ return hash >>> 0;
1205
+ }
1206
+ }
1207
+ /**
1208
+ * HashTable class implements an instantiable hash table/cache.
1209
+ * Allows for multiple independent caches with type safety and high performance.
1210
+ *
1211
+ * @template K - The type of the label for the key (e.g. string, MetricName, …)
1212
+ * @template T - The type of value to be stored in the hash table (e.g. MetricCompute, string, …)
1213
+ */
1214
+ class HashTable {
1215
+ // The max. length of a string to hash, which is set to 2048 characters.
1216
+ static MAX_LEN = 2048;
1217
+ // The max. size of the hash table, which is set to 10,000.
1218
+ static TABLE_SIZE = 10_000;
1219
+ /**
1220
+ * The internal map to store entries.
1221
+ * The key is a string generated from the label and any number of hashed strings.
1222
+ * The value is of type T.
1223
+ */
1224
+ table = new Map();
1225
+ /**
1226
+ * Generates a unique hash key for any number of string arguments.
1227
+ * The key is in the format "label-H1-H2-H3-..."
1228
+ *
1229
+ * @param {K} label - Label for this key (e.g. metric name, normalization flags, …)
1230
+ * @param {string[]} strs - Array of strings to hash (e.g. input, params, …)
1231
+ * @param {boolean} [sorted=false] - Whether to sort the hashes before creating the key
1232
+ * @returns {string|false} - A unique hash key or false if any string is too long
1233
+ */
1234
+ key(label, strs, sorted = false) {
1235
+ // Return false if any string exceeds the maximum length
1236
+ for (const str of strs) {
1237
+ if (str.length > HashTable.MAX_LEN)
1238
+ return false;
1239
+ }
1240
+ // Hash all strings
1241
+ const hashes = strs.map(s => Hasher.fnv1a(s));
1242
+ // Sort them in ascending order
1243
+ if (sorted)
1244
+ hashes.sort();
1245
+ // Build key: label-H1-H2-H3-...
1246
+ return [label, ...hashes].join('-');
1247
+ }
1248
+ /**
1249
+ * Checks if a key exists in the hash table.
1250
+ *
1251
+ * @param {string} key - The key to check
1252
+ * @returns {boolean} - True if the key exists, false otherwise
1253
+ */
1254
+ has(key) { return this.table.has(key); }
1255
+ /**
1256
+ * Retrieves the entry from the hash table by its key.
1257
+ *
1258
+ * @param {string} key - The key to look up
1259
+ * @returns {T|undefined} - The entry if found, undefined otherwise
1260
+ */
1261
+ get(key) { return this.table.get(key); }
1262
+ /**
1263
+ * Adds an entry to the hash table.
1264
+ *
1265
+ * @param {string} key - The hashed key for the entry
1266
+ * @param {T} entry - The entry itself to add
1267
+ * @param {boolean} [update=true] - Whether to update the entry if it already exists
1268
+ * @returns {boolean} - True if added successfully, false if the table is full
1269
+ */
1270
+ set(key, entry, update = true) {
1271
+ // If the table is not full and the key does not exist or update is true, add the entry
1272
+ if (this.table.size < HashTable.TABLE_SIZE && (update || !this.table.has(key))) {
1273
+ this.table.set(key, entry);
1274
+ return true;
1275
+ }
1276
+ return false;
1277
+ }
1278
+ /**
1279
+ * Deletes an entry from the hash table by its key.
1280
+ *
1281
+ * @param {string} key - The key of the entry to delete
1282
+ */
1283
+ delete(key) { this.table.delete(key); }
1284
+ /**
1285
+ * Clears the hash table.
1286
+ * This method removes all entries from the hash table.
1287
+ */
1288
+ clear() { this.table.clear(); }
1289
+ /**
1290
+ * Returns the current size of the hash table.
1291
+ *
1292
+ * @returns {number} - The number of entries in the hash table
1293
+ */
1294
+ size() { return this.table.size; }
1295
+ }
1296
+
1297
+ /**
1298
+ * Normalizer Utility
1299
+ * src/utils/Normalizer.ts
1300
+ *
1301
+ * @see https://en.wikipedia.org/wiki/Text_normalization
1302
+ * @see https://en.wikipedia.org/wiki/Unicode_equivalence
1303
+ *
1304
+ * This module provides a Normalizer class that allows for string normalization based
1305
+ * on various flags. It uses a pipeline of normalization functions that can be reused
1306
+ * and cached for efficiency. The Normalizer can handle both single strings and arrays
1307
+ * of strings, and supports synchronous and asynchronous normalization.
1308
+ *
1309
+ * Supported flags:
1310
+ * 'd' :: Normalize to NFD (Normalization Form Decomposed)
1311
+ * 'u' :: Normalize to NFC (Normalization Form Composed)
1312
+ * 'x' :: Normalize to NFKC (Normalization Form Compatibility Composed)
1313
+ * 'w' :: Collapse whitespace
1314
+ * 't' :: Remove leading and trailing whitespace
1315
+ * 'r' :: Remove double characters
1316
+ * 's' :: Remove punctuation / special characters
1317
+ * 'k' :: Remove non-letter characters
1318
+ * 'n' :: Remove non-number characters
1319
+ * 'i' :: Case insensitive (convert to lowercase)
1320
+ *
1321
+ * @module Utils/Normalizer
1322
+ * @author Paul Köhler (komed3)
1323
+ * @license MIT
1324
+ */
1325
+ /**
1326
+ * The Normalizer class providing methods to normalize strings based on various flags.
1327
+ */
1328
+ class Normalizer {
1329
+ /**
1330
+ * A map that holds normalization functions based on the flags.
1331
+ * This allows for reusing normalization logic without recomputing it.
1332
+ */
1333
+ static pipeline = new Map();
1334
+ /**
1335
+ * A cache to store normalized strings based on the flags and input.
1336
+ * This helps avoid recomputing normalization for the same input and flags.
1337
+ */
1338
+ static cache = new HashTable();
1339
+ /**
1340
+ * Returns a normalization function based on the provided flags.
1341
+ * The flags are a string of characters that define the normalization steps.
1342
+ *
1343
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
1344
+ * @returns {NormalizerFn} - A function that normalizes a string based on the provided flags
1345
+ */
1346
+ static getPipeline(flags) {
1347
+ // Return the cached pipeline if it exists
1348
+ if (Normalizer.pipeline.has(flags))
1349
+ return Normalizer.pipeline.get(flags);
1350
+ // Define the normalization steps based on the flags
1351
+ const steps = [];
1352
+ // Normalize to NFD (Normalization Form Decomposed)
1353
+ if (flags.includes('d'))
1354
+ steps.push(str => str.normalize('NFD'));
1355
+ // Normalize to NFC (Normalization Form Composed)
1356
+ if (flags.includes('u'))
1357
+ steps.push(str => str.normalize('NFC'));
1358
+ // Normalize to NFKC (Normalization Form Compatibility Composed)
1359
+ if (flags.includes('x'))
1360
+ steps.push(str => str.normalize('NFKC'));
1361
+ // Collapse whitespace
1362
+ if (flags.includes('w'))
1363
+ steps.push(str => str.replace(/\s+/g, ' '));
1364
+ // Remove leading and trailing whitespace
1365
+ if (flags.includes('t'))
1366
+ steps.push(str => str.trim());
1367
+ // Remove double characters
1368
+ if (flags.includes('r'))
1369
+ steps.push(str => str.replace(/(.)\1+/g, '$1'));
1370
+ // Remove punctuation / special characters
1371
+ if (flags.includes('s'))
1372
+ steps.push(str => str.replace(/[^\p{L}\p{N}\s]/gu, ''));
1373
+ // Remove non-letter characters
1374
+ if (flags.includes('k'))
1375
+ steps.push(str => str.replace(/[^\p{L}]/gu, ''));
1376
+ // Remove non-number characters
1377
+ if (flags.includes('n'))
1378
+ steps.push(str => str.replace(/\p{N}/gu, ''));
1379
+ // Case insensitive
1380
+ if (flags.includes('i'))
1381
+ steps.push(str => str.toLowerCase());
1382
+ // Build the normalization function from the steps
1383
+ const compiled = (input) => {
1384
+ let res = input;
1385
+ for (const step of steps)
1386
+ res = step(res);
1387
+ return res;
1388
+ };
1389
+ // Cache the compiled function for the given flags
1390
+ Normalizer.pipeline.set(flags, compiled);
1391
+ // Return the compiled normalization function
1392
+ return compiled;
1393
+ }
1394
+ /**
1395
+ * Normalizes the input string or array of strings based on the provided flags.
1396
+ * The flags are a string of characters that define the normalization steps.
1397
+ *
1398
+ * @param {string|string[]} input - The string or array of strings to normalize
1399
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
1400
+ * @returns {string|string[]} - The normalized string(s)
1401
+ */
1402
+ static normalize(input, flags) {
1403
+ // If input is an array, normalize each string in the array
1404
+ if (Array.isArray(input))
1405
+ return input.map(s => Normalizer.normalize(s, flags));
1406
+ // If input or flags are not provided, return the input as is
1407
+ if (!flags || typeof flags !== 'string' || !input)
1408
+ return input;
1409
+ // Generate a cache key based on the flags and input
1410
+ const key = Normalizer.cache.key(flags, [input]);
1411
+ // If the key exists in the cache, return the cached result
1412
+ if (key && Normalizer.cache.has(key))
1413
+ return Normalizer.cache.get(key);
1414
+ // Normalize the input using the pipeline for the given flags
1415
+ const res = Normalizer.getPipeline(flags)(input);
1416
+ // If a key was generated, store the result in the cache
1417
+ if (key)
1418
+ Normalizer.cache.set(key, res);
1419
+ // Return the normalized result
1420
+ return res;
1421
+ }
1422
+ /**
1423
+ * Asynchronously normalizes the input string or array of strings based on the
1424
+ * provided flags. This method is useful for handling large inputs or when
1425
+ * normalization needs to be done in a non-blocking way.
1426
+ *
1427
+ * @param {string|string[]} input - The string or array of strings to normalize
1428
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
1429
+ * @returns {Promise<string|string[]>} - A promise that resolves to the normalized string(s)
1430
+ */
1431
+ static async normalizeAsync(input, flags) {
1432
+ return await (Array.isArray(input)
1433
+ // If input is an array, normalize each string in the array asynchronously
1434
+ ? Promise.all(input.map(s => Normalizer.normalize(s, flags)))
1435
+ // If input is a single string, normalize it asynchronously
1436
+ : Promise.resolve(Normalizer.normalize(input, flags)));
1437
+ }
1438
+ /**
1439
+ * Clears the normalization pipeline and cache.
1440
+ * This is useful for resetting the state of the Normalizer.
1441
+ */
1442
+ static clear() {
1443
+ Normalizer.pipeline.clear();
1444
+ Normalizer.cache.clear();
1445
+ }
1446
+ }
1447
+
1448
+ /**
1449
+ * Filter Utility
1450
+ * src/utils/Filter.ts
1451
+ *
1452
+ * This module provides a Filter class that allows for the management and application of
1453
+ * filters to strings based on hooks. Filters can be added, removed, paused, resumed, and
1454
+ * applied to input strings. Each filter has an id, a function, a priority, and options
1455
+ * for activation and overrideability.
1456
+ *
1457
+ * @module Utils/Filter
1458
+ * @author Paul Köhler (komed3)
1459
+ * @license MIT
1460
+ */
1461
+ /**
1462
+ * The Filter class provides a way to manage and apply filters to strings based on hooks.
1463
+ */
1464
+ class Filter {
1465
+ /**
1466
+ * A static map to hold all filters.
1467
+ * The key is the hook name, and the value is an array of FilterEntry objects.
1468
+ */
1469
+ static filters = new Map();
1470
+ /**
1471
+ * Finds a filter by its hook and id.
1472
+ *
1473
+ * @param {string} hook - The name of the hook
1474
+ * @param {string} id - The id of the filter
1475
+ * @returns {FilterEntry|undefined} - The FilterEntry if found, otherwise undefined
1476
+ */
1477
+ static find(hook, id) {
1478
+ return Filter.filters.get(hook)?.find(f => f.id === id);
1479
+ }
1480
+ /**
1481
+ * Adds a filter to the specified hook.
1482
+ *
1483
+ * @param {string} hook - The name of the hook
1484
+ * @param {string} id - The id of the filter
1485
+ * @param {FilterFn} fn - The filter function
1486
+ * @param {FilterOptions} [opt] - Additional options for the filter
1487
+ * @returns {boolean} - Returns true if the filter was added, false if it was not added due to override restrictions
1488
+ */
1489
+ static add(hook, id, fn, opt = {}) {
1490
+ const { priority = 10, active = true, overrideable = true } = opt;
1491
+ // Check if the filter already exists
1492
+ const filter = Filter.filters.get(hook) ?? [];
1493
+ const index = filter.findIndex(f => f.id === id);
1494
+ // If the filter already exists and is not overrideable, return false
1495
+ if (index >= 0) {
1496
+ const f = filter[index];
1497
+ if (!f.overrideable)
1498
+ return false;
1499
+ filter.splice(index, 1);
1500
+ }
1501
+ // Add the new filter entry
1502
+ filter.push({ id, fn, priority, active, overrideable });
1503
+ // Sort the filters by priority
1504
+ filter.sort((a, b) => a.priority - b.priority);
1505
+ // Update the filters map
1506
+ Filter.filters.set(hook, filter);
1507
+ return true;
1508
+ }
1509
+ /**
1510
+ * Removes a filter by its hook and id.
1511
+ *
1512
+ * @param {string} hook - The name of the hook
1513
+ * @param {string} id - The id of the filter
1514
+ * @returns {boolean} - Returns true if the filter was removed, false if it was not found
1515
+ */
1516
+ static remove(hook, id) {
1517
+ // Get the filter array for the specified hook
1518
+ const filter = Filter.filters.get(hook);
1519
+ // If the filter array does not exist, return false
1520
+ if (!filter)
1521
+ return false;
1522
+ // Find the index of the filter with the specified id
1523
+ const index = filter.findIndex(f => f.id === id);
1524
+ // If the filter is found, remove it and return true
1525
+ if (index >= 0) {
1526
+ filter.splice(index, 1);
1527
+ return true;
1528
+ }
1529
+ return false;
1530
+ }
1531
+ /**
1532
+ * Pauses a filter by its hook and id.
1533
+ *
1534
+ * @param {string} hook - The name of the hook
1535
+ * @param {string} id - The id of the filter
1536
+ * @returns {boolean} - Returns true if the filter was paused, false if it was not found
1537
+ */
1538
+ static pause(hook, id) {
1539
+ // Find the filter entry by hook and id
1540
+ const f = Filter.find(hook, id);
1541
+ if (!f)
1542
+ return false;
1543
+ // Set the active property to false to pause the filter
1544
+ f.active = false;
1545
+ return true;
1546
+ }
1547
+ /**
1548
+ * Resumes a filter by its hook and id.
1549
+ *
1550
+ * @param {string} hook - The name of the hook
1551
+ * @param {string} id - The id of the filter
1552
+ * @returns {boolean} - Returns true if the filter was resumed, false if it was not found
1553
+ */
1554
+ static resume(hook, id) {
1555
+ // Find the filter entry by hook and id
1556
+ const f = Filter.find(hook, id);
1557
+ if (!f)
1558
+ return false;
1559
+ // Set the active property to true to resume the filter
1560
+ f.active = true;
1561
+ return true;
1562
+ }
1563
+ /**
1564
+ * Lists all filters for a given hook.
1565
+ *
1566
+ * @param {string} hook - The name of the hook
1567
+ * @param {boolean} active - If true, only list active filters
1568
+ * @returns {string[]} - An array of filter ids
1569
+ */
1570
+ static list(hook, active = false) {
1571
+ // Get the filter array for the specified hook
1572
+ const filter = Filter.filters.get(hook) ?? [];
1573
+ const list = [];
1574
+ // If active is true, filter the entries based on their active status
1575
+ for (const f of filter)
1576
+ if (!active || f.active)
1577
+ list.push(f.id);
1578
+ return list;
1579
+ }
1580
+ /**
1581
+ * Applies all active filters for a given hook to the input string(s).
1582
+ *
1583
+ * @param {string} hook - The name of the hook
1584
+ * @param {string|string[]} input - The input string(s) to be filtered
1585
+ * @returns {string|string[]} - The filtered string(s)
1586
+ */
1587
+ static apply(hook, input) {
1588
+ // Get the filter array for the specified hook
1589
+ const filter = Filter.filters.get(hook);
1590
+ // If no filters are found for the hook or if no filters are active, return the input unchanged
1591
+ if (!filter || filter.every(f => !f.active))
1592
+ return input;
1593
+ // Apply each active filter function to the given string
1594
+ const applyOne = (s) => {
1595
+ for (const f of filter)
1596
+ if (f.active)
1597
+ s = f.fn(s);
1598
+ return s;
1599
+ };
1600
+ // If the input is an array, apply the filter to each element, otherwise just once
1601
+ return Array.isArray(input) ? input.map(applyOne) : applyOne(input);
1602
+ }
1603
+ /**
1604
+ * Applies all active filters for a given hook to the input string(s) asynchronously.
1605
+ * Each filter function may return a Promise or a plain string; all are awaited in order.
1606
+ *
1607
+ * @param {string} hook - The name of the hook
1608
+ * @param {string|string[]} input - The input string(s) to be filtered
1609
+ * @returns {Promise<string|string[]>} - The filtered string(s)
1610
+ */
1611
+ static async applyAsync(hook, input) {
1612
+ // Get the filter array for the specified hook
1613
+ const filter = Filter.filters.get(hook);
1614
+ // If no filters are found for the hook or if no filters are active, return the input unchanged
1615
+ if (!filter || filter.every(f => !f.active))
1616
+ return input;
1617
+ // Apply each active filter function to the given string
1618
+ // Support both sync and async filter functions
1619
+ const applyOne = async (s) => {
1620
+ for (const f of filter)
1621
+ if (f.active)
1622
+ s = await Promise.resolve(f.fn(s));
1623
+ return s;
1624
+ };
1625
+ // If the input is an array, apply the filter to each element, otherwise just once
1626
+ // Use Promise.all to handle multiple promises if input is an array
1627
+ return Array.isArray(input) ? Promise.all(input.map(applyOne)) : applyOne(input);
1628
+ }
1629
+ /**
1630
+ * Clears all filters or filters for a specific hook.
1631
+ *
1632
+ * @param {string} [hook] - Optional name of the hook to clear filters for
1633
+ */
1634
+ static clear(hook) {
1635
+ // If a specific hook is provided, delete its filters
1636
+ if (hook)
1637
+ Filter.filters.delete(hook);
1638
+ // If no hook is provided, clear all filters
1639
+ else
1640
+ Filter.filters.clear();
1641
+ }
1642
+ }
1643
+
1644
+ /**
1645
+ * Registry Utility
1646
+ * src/utils/Registry.ts
1647
+ *
1648
+ * This module provides a Registry function that allows for registering,
1649
+ * removing, checking, getting, and listing class constructors.
1650
+ *
1651
+ * It is designed to manage class extensions, ensuring that all registered
1652
+ * classes extend a specified base constructor.
1653
+ *
1654
+ * @module Utils/Registry
1655
+ * @author Paul Köhler (komed3)
1656
+ * @license MIT
1657
+ */
1658
+ /**
1659
+ * Global registry object to hold multiple registries.
1660
+ * Each registry is keyed by a string identifier.
1661
+ *
1662
+ * @type {Record<string, RegistryService<any>>}
1663
+ */
1664
+ const registry = Object.create(null);
1665
+ /**
1666
+ * Factory object to hold factory functions for creating instances.
1667
+ * This is used to create instances of registered classes.
1668
+ *
1669
+ * @type {Record<string, ( cls: string, ...args: any[] ) => InstanceType<any>>}
1670
+ */
1671
+ const factory = Object.create(null);
1672
+ /**
1673
+ * Registry function to create a service for managing class constructors.
1674
+ *
1675
+ * @param {string} reg - The name of the registry
1676
+ * @param {RegistryConstructor<T>} ctor - The base constructor that all registered classes must extend
1677
+ * @returns {RegistryService<T>} - An object with methods to register, remove, check, get, and list classes
1678
+ * @throws {Error} If the registry already exists (overwriting is forbidden)
1679
+ */
1680
+ function Registry(reg, ctor) {
1681
+ // Throws an error if the registry already exists
1682
+ if (reg in registry || reg in factory)
1683
+ throw new Error(`registry <${reg}> already exists / overwriting is forbidden`);
1684
+ // Create a registry object to hold class constructors
1685
+ const classes = Object.create(null);
1686
+ const service = {
1687
+ /**
1688
+ * Register a new extension of the base class.
1689
+ *
1690
+ * @param {string} name - The name of the class to register
1691
+ * @param {RegistryConstructor<T>} cls - The class constructor
1692
+ * @param {boolean} [update=false] - Whether to allow overwriting an existing entry
1693
+ * @throws {TypeError} If the class does not extend the base constructor
1694
+ * @throws {Error} If the class name already exists and update is false
1695
+ */
1696
+ add(name, cls, update = false) {
1697
+ if (!(cls.prototype instanceof ctor))
1698
+ throw new TypeError(`class must extend <${reg}>`);
1699
+ if (!update && name in classes)
1700
+ throw new Error(`entry <${name}> already exists / use <update=true> to overwrite`);
1701
+ classes[name] = cls;
1702
+ },
1703
+ /**
1704
+ * Remove a class from the registry.
1705
+ *
1706
+ * @param {string} name - The name of the class to remove
1707
+ */
1708
+ remove(name) { delete classes[name]; },
1709
+ /**
1710
+ * Check if a class is registered.
1711
+ *
1712
+ * @param {string} name - The name of the class to check
1713
+ * @returns {boolean} - True if the class is registered, false otherwise
1714
+ */
1715
+ has(name) { return name in classes; },
1716
+ /**
1717
+ * List all registered class names.
1718
+ *
1719
+ * @returns {string[]} - An array of registered class names
1720
+ */
1721
+ list() { return Object.keys(classes); },
1722
+ /**
1723
+ * Get a registered class by name.
1724
+ *
1725
+ * @param {string} name - The name of the class to retrieve
1726
+ * @returns {RegistryConstructor<T>} - The class constructor
1727
+ * @throws {Error} If the class is not registered
1728
+ */
1729
+ get(name) {
1730
+ if (!(name in classes))
1731
+ throw new Error(`class <${name}> not registered for <${reg}>`);
1732
+ return classes[name];
1733
+ }
1734
+ };
1735
+ // Register the service in the global registry
1736
+ registry[reg] = service;
1737
+ // Create a factory function for creating instances from the registry
1738
+ factory[reg] = (cls, ...args) => (createFromRegistry(reg, cls, ...args));
1739
+ // Return the service object
1740
+ return service;
1741
+ }
1742
+ /**
1743
+ * Resolve a class constructor from a specific registry.
1744
+ *
1745
+ * @param {string} reg - The name of the registry
1746
+ * @param {T|string} cls - The class itself or name of the class to resolve
1747
+ * @returns {T|undefined} - The class constructor if found, otherwise undefined
1748
+ * @throws {ReferenceError} If the registry does not exist
1749
+ */
1750
+ function resolveCls(reg, cls) {
1751
+ if (!(reg in registry))
1752
+ throw new ReferenceError(`registry <${reg}> does not exist`);
1753
+ return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
1754
+ }
1755
+ /**
1756
+ * Create an instance of a class from a specific registry.
1757
+ *
1758
+ * @param {string} reg - The name of the registry
1759
+ * @param {T|string} cls - The class itself or name of the class to instantiate
1760
+ * @param {...any} args - Arguments to pass to the class constructor
1761
+ * @returns {T} - An instance of the class
1762
+ * @throws {Error} If the class cannot be instantiated
1763
+ */
1764
+ function createFromRegistry(reg, cls, ...args) {
1765
+ cls = resolveCls(reg, cls);
1766
+ try {
1767
+ return new cls(...args);
1768
+ }
1769
+ catch (err) {
1770
+ throw new Error(`cannot instantiate class <${cls}>`);
1771
+ }
1772
+ }
1773
+
1774
+ /**
1775
+ * Abstract Metric
1776
+ * src/metric/Metric.ts
1777
+ *
1778
+ * This module defines an abstract class for string metrics, providing a framework for
1779
+ * computing various string similarity metrics. It includes methods for running metrics
1780
+ * in different modes (single, batch, pairwise) synchronous or asynchronous and caching
1781
+ * results to optimize performance. The class is designed to be extended by specific
1782
+ * metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
1783
+ *
1784
+ * It provides:
1785
+ * - A base class for string metrics with common functionality
1786
+ * - Methods for running metrics in different modes
1787
+ * - Pre-computation for trivial cases to optimize performance
1788
+ * - Caching of metric computations to avoid redundant calculations
1789
+ * - Support for symmetrical metrics (same result for inputs in any order)
1790
+ * - Performance tracking capabilities (Profiler)
1791
+ * - Asynchronous execution support for metrics
1792
+ *
1793
+ * This class is intended to be extended by specific metric implementations that will
1794
+ * implement the `compute` method to define the specific metric computation logic.
1795
+ *
1796
+ * @module Metric
1797
+ * @author Paul Köhler (komed3)
1798
+ * @license MIT
1799
+ */
1800
+ // Get the singleton profiler instance for performance monitoring
1801
+ const profiler$2 = Profiler.getInstance();
1802
+ /**
1803
+ * Abstract class representing a generic string metric.
1804
+ *
1805
+ * @abstract
1806
+ * @template R - The type of the raw result, defaulting to `MetricRaw`.
1807
+ */
1808
+ class Metric {
1809
+ // Cache for metric computations to avoid redundant calculations
1810
+ static cache = new HashTable();
1811
+ // Metric name for identification
1812
+ metric;
1813
+ // Inputs for the metric computation, transformed into arrays
1814
+ a;
1815
+ b;
1816
+ // Store original inputs for result mapping
1817
+ origA = [];
1818
+ origB = [];
1819
+ // Options for the metric computation, such as performance tracking
1820
+ options;
1821
+ // Indicates whether the metric is symmetric (same result for inputs in any order)
1822
+ symmetric;
1823
+ /**
1824
+ * Result of the metric computation, which can be a single result or an array of results.
1825
+ * This will be populated after running the metric.
1826
+ */
1827
+ results;
1828
+ /**
1829
+ * Static method to clear the cache of metric computations.
1830
+ */
1831
+ static clear() { this.cache.clear(); }
1832
+ /**
1833
+ * Swaps two strings and their lengths if the first is longer than the second.
1834
+ *
1835
+ * @param {string} a - First string
1836
+ * @param {string} b - Second string
1837
+ * @param {number} m - Length of the first string
1838
+ * @param {number} n - Length of the second string
1839
+ * @returns {[string, string, number, number]} - Swapped strings and lengths
1840
+ */
1841
+ static swap(a, b, m, n) { return m > n ? [b, a, n, m] : [a, b, m, n]; }
1842
+ /**
1843
+ * Clamps the similarity result between 0 and 1.
1844
+ *
1845
+ * @param {number} res - The input similarity to clamp
1846
+ * @returns {number} - The clamped similarity (0 to 1)
1847
+ */
1848
+ static clamp(res) { return Math.max(0, Math.min(1, res)); }
1849
+ /**
1850
+ * Constructor for the Metric class.
1851
+ * Initializes the metric with two inputs (strings or arrays of strings) and options.
1852
+ *
1853
+ * @param {string} metric - The name of the metric (e.g. 'levenshtein')
1854
+ * @param {MetricInput} a - First input string or array of strings
1855
+ * @param {MetricInput} b - Second input string or array of strings
1856
+ * @param {MetricOptions} [opt] - Options for the metric computation
1857
+ * @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
1858
+ * @throws {Error} - If inputs `a` or `b` are empty
1859
+ */
1860
+ constructor(metric, a, b, opt = {}, symmetric = false) {
1861
+ // Set the metric name
1862
+ this.metric = metric;
1863
+ // Set the inputs
1864
+ this.a = Array.isArray(a) ? a : [a];
1865
+ this.b = Array.isArray(b) ? b : [b];
1866
+ // Validate inputs: ensure they are not empty
1867
+ if (this.a.length === 0 || this.b.length === 0)
1868
+ throw new Error(`inputs <a> and <b> must not be empty`);
1869
+ // Set options
1870
+ this.options = opt;
1871
+ this.symmetric = symmetric;
1872
+ }
1873
+ /**
1874
+ * Pre-compute the metric for two strings.
1875
+ * This method is called before the actual computation to handle trivial cases.
1876
+ *
1877
+ * @param {string} a - First string
1878
+ * @param {string} b - Second string
1879
+ * @param {number} m - Length of the first string
1880
+ * @param {number} n - Length of the second string
1881
+ * @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
1882
+ */
1883
+ preCompute(a, b, m, n) {
1884
+ // If strings are identical, return a similarity of 1
1885
+ if (a === b)
1886
+ return { res: 1 };
1887
+ // If the lengths of both strings is less than 2, return a similarity of 0
1888
+ if (m == 0 || n == 0 || (m < 2 && n < 2))
1889
+ return { res: 0 };
1890
+ return undefined;
1891
+ }
1892
+ /**
1893
+ * Abstract method to be implemented by subclasses to perform the metric computation.
1894
+ * This method should contain the logic for computing the metric between two strings.
1895
+ *
1896
+ * @param {string} a - First string
1897
+ * @param {string} b - Second string
1898
+ * @param {number} m - Length of the first string
1899
+ * @param {number} n - Length of the second string
1900
+ * @param {number} maxLen - Maximum length of the strings
1901
+ * @returns {MetricCompute<R>} - The result of the metric computation
1902
+ * @throws {Error} - If not overridden in a subclass
1903
+ */
1904
+ compute(a, b, m, n, maxLen) {
1905
+ throw new Error(`method compute() must be overridden in a subclass`);
1906
+ }
1907
+ /**
1908
+ * Run the metric computation for single inputs (two strings).
1909
+ * Applies preCompute for trivial cases before cache lookup and computation.
1910
+ *
1911
+ * If the profiler is active, it will measure time and memory usage.
1912
+ *
1913
+ * @param {number} i - Pointer to the first string
1914
+ * @param {number} j - Pointer to the second string
1915
+ * @returns {MetricResultSingle<R>} - The result of the metric computation
1916
+ */
1917
+ runSingle(i, j) {
1918
+ // Type safety: convert inputs to strings
1919
+ let a = String(this.a[i]), A = a;
1920
+ let b = String(this.b[j]), B = b;
1921
+ // Get lengths
1922
+ let m = A.length, n = B.length;
1923
+ // Pre-compute trivial cases (identical, empty, etc.)
1924
+ let result = this.preCompute(A, B, m, n);
1925
+ if (!result) {
1926
+ // If the profiler is enabled, measure; else, just run
1927
+ result = profiler$2.run(() => {
1928
+ // Generate a cache key based on the metric and pair of strings `a` and `b`
1929
+ const key = Metric.cache.key(this.metric, [A, B], this.symmetric);
1930
+ // If the key exists in the cache, return the cached result
1931
+ // Otherwise, compute the metric using the algorithm
1932
+ return Metric.cache.get(key || '') ?? (() => {
1933
+ // If the metric is symmetrical, swap `a` and `b` (shorter string first)
1934
+ if (this.symmetric)
1935
+ [A, B, m, n] = Metric.swap(A, B, m, n);
1936
+ // Compute the similarity using the algorithm
1937
+ const res = this.compute(A, B, m, n, Math.max(m, n));
1938
+ // If a key was generated, store the result in the cache
1939
+ if (key)
1940
+ Metric.cache.set(key, res);
1941
+ return res;
1942
+ })();
1943
+ });
1944
+ }
1945
+ // Build metric result object
1946
+ return {
1947
+ metric: this.metric,
1948
+ a: this.origA[i] ?? a,
1949
+ b: this.origB[j] ?? b,
1950
+ ...result
1951
+ };
1952
+ }
1953
+ /**
1954
+ * Run the metric computation for single inputs (two strings) asynchronously.
1955
+ *
1956
+ * @param {number} i - Pointer to the first string
1957
+ * @param {number} j - Pointer to the second string
1958
+ * @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
1959
+ */
1960
+ async runSingleAsync(i, j) {
1961
+ return Promise.resolve(this.runSingle(i, j));
1962
+ }
1963
+ /**
1964
+ * Run the metric computation for batch inputs (arrays of strings).
1965
+ *
1966
+ * It iterates through each string in the first array and computes the metric
1967
+ * against each string in the second array.
1968
+ */
1969
+ runBatch() {
1970
+ const results = [];
1971
+ // Loop through each combination of strings in a[] and b[]
1972
+ for (let i = 0; i < this.a.length; i++)
1973
+ for (let j = 0; j < this.b.length; j++)
1974
+ results.push(this.runSingle(i, j));
1975
+ // Populate the results
1976
+ // `this.results` will be an array of MetricResultSingle
1977
+ this.results = results;
1978
+ }
1979
+ /**
1980
+ * Run the metric computation for batch inputs (arrays of strings) asynchronously.
1981
+ */
1982
+ async runBatchAsync() {
1983
+ const results = [];
1984
+ // Loop through each combination of strings in a[] and b[]
1985
+ for (let i = 0; i < this.a.length; i++)
1986
+ for (let j = 0; j < this.b.length; j++)
1987
+ results.push(await this.runSingleAsync(i, j));
1988
+ // Populate the results
1989
+ // `this.results` will be an array of MetricResultSingle
1990
+ this.results = results;
1991
+ }
1992
+ /**
1993
+ * Run the metric computation for pairwise inputs (A[i] vs B[i]).
1994
+ *
1995
+ * This method assumes that both `a` and `b` are arrays of equal length
1996
+ * and computes the metric only for corresponding index pairs.
1997
+ */
1998
+ runPairwise() {
1999
+ const results = [];
2000
+ // Compute metric for each corresponding pair
2001
+ for (let i = 0; i < this.a.length; i++)
2002
+ results.push(this.runSingle(i, i));
2003
+ // Populate the results
2004
+ // `this.results` will be an array of MetricResultSingle
2005
+ this.results = results;
2006
+ }
2007
+ /**
2008
+ * Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
2009
+ */
2010
+ async runPairwiseAsync() {
2011
+ const results = [];
2012
+ // Compute metric for each corresponding pair
2013
+ for (let i = 0; i < this.a.length; i++)
2014
+ results.push(await this.runSingleAsync(i, i));
2015
+ // Populate the results
2016
+ // `this.results` will be an array of MetricResultSingle
2017
+ this.results = results;
2018
+ }
2019
+ /**
2020
+ * Set the original inputs to which the results of the metric calculation will refer.
2021
+ *
2022
+ * @param {MetricInput} [a] - original input(s) for a
2023
+ * @param {MetricInput} [b] - original input(s) for b
2024
+ */
2025
+ setOriginal(a, b) {
2026
+ if (a)
2027
+ this.origA = Array.isArray(a) ? a : [a];
2028
+ if (b)
2029
+ this.origB = Array.isArray(b) ? b : [b];
2030
+ return this;
2031
+ }
2032
+ /**
2033
+ * Check if the inputs are in batch mode.
2034
+ *
2035
+ * This method checks if either `a` or `b` contains more than one string,
2036
+ * indicating that the metric is being run in batch mode.
2037
+ *
2038
+ * @returns {boolean} - True if either input is an array with more than one element
2039
+ */
2040
+ isBatch() { return this.a.length > 1 || this.b.length > 1; }
2041
+ /**
2042
+ * Check if the inputs are in single mode.
2043
+ *
2044
+ * This method checks if both `a` and `b` are single strings (not arrays),
2045
+ * indicating that the metric is being run on a single pair of strings.
2046
+ *
2047
+ * @returns {boolean} - True if both inputs are single strings
2048
+ */
2049
+ isSingle() { return !this.isBatch(); }
2050
+ /**
2051
+ * Check if the inputs are in pairwise mode.
2052
+ *
2053
+ * This method checks if both `a` and `b` are arrays of the same length,
2054
+ * indicating that the metric is being run on corresponding pairs of strings.
2055
+ *
2056
+ * @returns {boolean} - True if both inputs are arrays of equal length
2057
+ * @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
2058
+ * @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
2059
+ */
2060
+ isPairwise(safe = false) {
2061
+ return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
2062
+ throw new Error(`mode <pairwise> requires arrays of equal length`);
2063
+ })();
2064
+ }
2065
+ /**
2066
+ * Check if the metric is symmetrical.
2067
+ *
2068
+ * This method returns whether the metric is symmetric, meaning it produces the same
2069
+ * result regardless of the order of inputs (e.g., Levenshtein distance).
2070
+ *
2071
+ * @returns {boolean} - True if the metric is symmetric
2072
+ */
2073
+ isSymmetrical() { return this.symmetric; }
2074
+ /**
2075
+ * Determine which mode to run the metric in.
2076
+ *
2077
+ * This method checks the provided mode or defaults to the mode specified in options.
2078
+ * If no mode is specified, it defaults to 'default'.
2079
+ *
2080
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
2081
+ * @returns {MetricMode} - The determined mode
2082
+ */
2083
+ whichMode(mode) { return mode ?? this.options?.mode ?? 'default'; }
2084
+ /**
2085
+ * Clear the cached results of the metric.
2086
+ *
2087
+ * This method resets the `results` property to `undefined`, effectively clearing
2088
+ * any previously computed results. It can be useful for re-running the metric
2089
+ * with new inputs or options.
2090
+ */
2091
+ clear() { this.results = undefined; }
2092
+ /**
2093
+ * Run the metric computation based on the specified mode.
2094
+ *
2095
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
2096
+ * @param {boolean} [clear=true] - Whether to clear previous results before running
2097
+ * @throws {Error} - If an unsupported mode is specified
2098
+ */
2099
+ run(mode, clear = true) {
2100
+ // Clear previous results if requested
2101
+ if (clear)
2102
+ this.clear();
2103
+ switch (this.whichMode(mode)) {
2104
+ // Default mode runs the metric on single inputs or falls back to batch mode
2105
+ case 'default': if (this.isSingle()) {
2106
+ this.results = this.runSingle(0, 0);
2107
+ break;
2108
+ }
2109
+ // Batch mode runs the metric on all combinations of a[] and b[]
2110
+ case 'batch':
2111
+ this.runBatch();
2112
+ break;
2113
+ // Single mode runs the metric on the first elements of a[] and b[]
2114
+ case 'single':
2115
+ this.results = this.runSingle(0, 0);
2116
+ break;
2117
+ // Pairwise mode runs the metric on corresponding pairs of a[] and b[]
2118
+ case 'pairwise':
2119
+ if (this.isPairwise())
2120
+ this.runPairwise();
2121
+ break;
2122
+ // Unsupported mode
2123
+ default: throw new Error(`unsupported mode <${mode}>`);
2124
+ }
2125
+ }
2126
+ /**
2127
+ * Run the metric computation based on the specified mode asynchronously.
2128
+ *
2129
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
2130
+ * @param {boolean} [clear=true] - Whether to clear previous results before running
2131
+ * @returns {Promise<void>} - A promise that resolves when the metric computation is complete
2132
+ * @throws {Error} - If an unsupported mode is specified
2133
+ */
2134
+ async runAsync(mode, clear = true) {
2135
+ // Clear previous results if requested
2136
+ if (clear)
2137
+ this.clear();
2138
+ switch (this.whichMode(mode)) {
2139
+ // Default mode runs the metric on single inputs or falls back to batch mode
2140
+ case 'default': if (this.isSingle()) {
2141
+ this.results = await this.runSingleAsync(0, 0);
2142
+ break;
2143
+ }
2144
+ // Batch mode runs the metric on all combinations of a[] and b[]
2145
+ case 'batch':
2146
+ await this.runBatchAsync();
2147
+ break;
2148
+ // Single mode runs the metric on the first elements of a[] and b[]
2149
+ case 'single':
2150
+ this.results = await this.runSingleAsync(0, 0);
2151
+ break;
2152
+ // Pairwise mode runs the metric on corresponding pairs of a[] and b[]
2153
+ case 'pairwise':
2154
+ if (this.isPairwise())
2155
+ await this.runPairwiseAsync();
2156
+ break;
2157
+ // Unsupported mode
2158
+ default: throw new Error(`unsupported async mode <${mode}>`);
2159
+ }
2160
+ }
2161
+ /**
2162
+ * Get the name of the metric.
2163
+ *
2164
+ * @returns {string} - The name of the metric
2165
+ */
2166
+ getMetricName() { return this.metric; }
2167
+ /**
2168
+ * Get the result of the metric computation.
2169
+ *
2170
+ * @returns {MetricResult<R>} - The result of the metric computation
2171
+ * @throws {Error} - If `run()` has not been called before this method
2172
+ */
2173
+ getResults() {
2174
+ // Ensure that the metric has been run before getting the result
2175
+ if (this.results === undefined)
2176
+ throw new Error(`run() must be called before getResult()`);
2177
+ // Return the result(s)
2178
+ return this.results;
2179
+ }
2180
+ }
2181
+ /**
2182
+ * Metric registry service for managing metric implementations.
2183
+ *
2184
+ * This registry allows for dynamic registration and retrieval of metric classes,
2185
+ * enabling the use of various string similarity metrics in a consistent manner.
2186
+ */
2187
+ const MetricRegistry = Registry('metric', Metric);
2188
+
2189
+ /**
2190
+ * Pool Utility
2191
+ * src/utils/Pool.ts
2192
+ *
2193
+ * @see https://en.wikipedia.org/wiki/Circular_buffer
2194
+ *
2195
+ * The Pool class provides a simple and efficient buffer pool for dynamic programming
2196
+ * algorithms that require temporary arrays (such as Levenshtein, LCS, etc.).
2197
+ * By reusing pre-allocated typed arrays, it reduces memory allocations and garbage
2198
+ * collection overhead, especially for repeated or batch computations.
2199
+ *
2200
+ * It supports different types of buffers (Uint16Array, number[], Set, Map) and allows
2201
+ * for acquiring buffers of specific sizes while managing a maximum pool size.
2202
+ *
2203
+ * @module Utils/Pool
2204
+ * @author Paul Köhler (komed3)
2205
+ * @license MIT
2206
+ */
2207
+ /**
2208
+ * RingPool is a circular buffer implementation that manages a pool of buffers.
2209
+ *
2210
+ * It allows for efficient acquisition and release of buffers, ensuring that
2211
+ * buffers are reused without unnecessary allocations.
2212
+ *
2213
+ * @template T - The type of buffers managed by the pool
2214
+ */
2215
+ class RingPool {
2216
+ maxSize;
2217
+ // The buffers in the pool
2218
+ buffers = [];
2219
+ // The current pointer for acquiring buffers
2220
+ pointer = 0;
2221
+ /**
2222
+ * Creates a new RingPool with a specified maximum size.
2223
+ *
2224
+ * @param {number} maxSize - The maximum number of buffers that can be stored in the pool
2225
+ */
2226
+ constructor(maxSize) {
2227
+ this.maxSize = maxSize;
2228
+ }
2229
+ /**
2230
+ * Acquires a buffer of at least the specified minimum size from the pool.
2231
+ *
2232
+ * @param {number} minSize - The minimum size of the buffer to acquire
2233
+ * @param {boolean} allowOversize - Whether to allow buffers larger than minSize
2234
+ * @return {PoolBuffer<T>|null} - The acquired buffer or null if no suitable buffer is found
2235
+ */
2236
+ acquire(minSize, allowOversize) {
2237
+ const len = this.buffers.length;
2238
+ // Iterate through the buffers in the pool
2239
+ for (let i = 0; i < len; i++) {
2240
+ const idx = (this.pointer + i) % len;
2241
+ const item = this.buffers[idx];
2242
+ // Check if the item size is greater than or equal to the minimum size
2243
+ if (item.size >= minSize) {
2244
+ // Set the pointer to the next position
2245
+ this.pointer = (idx + 1) % len;
2246
+ // If the item size is equal to minSize or oversize is allowed, return the item
2247
+ return allowOversize || item.size === minSize ? item : null;
2248
+ }
2249
+ }
2250
+ // If no suitable buffer is found, return null
2251
+ return null;
2252
+ }
2253
+ /**
2254
+ * Releases a buffer back to the pool.
2255
+ * If the pool is full, it replaces the oldest buffer with the new one.
2256
+ *
2257
+ * @param {PoolBuffer<T>} item - The buffer to release back to the pool
2258
+ */
2259
+ release(item) {
2260
+ if (this.buffers.length < this.maxSize) {
2261
+ // If the pool is not full, simply add the item
2262
+ this.buffers.push(item);
2263
+ }
2264
+ else {
2265
+ // If the pool is full, replace the oldest buffer
2266
+ this.buffers[this.pointer] = item;
2267
+ this.pointer = (this.pointer + 1) % this.maxSize;
2268
+ }
2269
+ }
2270
+ /**
2271
+ * Clears the pool, removing all buffers.
2272
+ * This resets the pointer and empties the buffer list.
2273
+ */
2274
+ clear() {
2275
+ this.buffers = [];
2276
+ this.pointer = 0;
2277
+ }
2278
+ }
2279
+ /**
2280
+ * The Pool class provides a buffer pool for dynamic programming algorithms.
2281
+ *
2282
+ * It allows for efficient reuse of buffers (Uint16Array, number[], Set, Map)
2283
+ * to reduce memory allocations and garbage collection overhead.
2284
+ */
2285
+ class Pool {
2286
+ // Pool Types
2287
+ static CONFIG = {
2288
+ 'uint16': { type: 'uint16', maxSize: 32, maxItemSize: 2048, allowOversize: true },
2289
+ 'number[]': { type: 'number[]', maxSize: 16, maxItemSize: 1024, allowOversize: false },
2290
+ 'set': { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
2291
+ 'map': { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
2292
+ };
2293
+ // Pool Rings for each type
2294
+ static POOLS = {
2295
+ 'uint16': new RingPool(32),
2296
+ 'number[]': new RingPool(16),
2297
+ 'set': new RingPool(8),
2298
+ 'map': new RingPool(8)
2299
+ };
2300
+ /**
2301
+ * Allocates a new buffer of the specified type and size.
2302
+ *
2303
+ * @param {PoolType} type - The type of buffer to allocate
2304
+ * @param {number} size - The size of the buffer to allocate
2305
+ * @return {any} - The newly allocated buffer
2306
+ */
2307
+ static allocate(type, size) {
2308
+ switch (type) {
2309
+ case 'uint16': return new Uint16Array(size);
2310
+ case 'number[]': return new Array(size).fill(0);
2311
+ case 'set': return new Set();
2312
+ case 'map': return new Map();
2313
+ }
2314
+ }
2315
+ /**
2316
+ * Acquires a buffer of the specified type and size from the pool.
2317
+ * If no suitable buffer is available, it allocates a new one.
2318
+ *
2319
+ * @param {PoolType} type - The type of buffer to acquire (e.g., 'uint16', 'number[]', 'set', 'map')
2320
+ * @param {number} size - The size of the buffer to acquire
2321
+ * @return {T} - The acquired buffer of the specified type
2322
+ */
2323
+ static acquire(type, size) {
2324
+ // Get the configuration for the specified type
2325
+ const CONFIG = this.CONFIG[type];
2326
+ // If the requested size exceeds the maximum item size, allocate a new buffer
2327
+ if (size > CONFIG.maxItemSize)
2328
+ return this.allocate(type, size);
2329
+ // Try to acquire a buffer from the pool ring
2330
+ // If a suitable buffer is found, return it (subarray for uint16)
2331
+ const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
2332
+ if (item) {
2333
+ // If the type is 'uint16', return a subarray of the buffer
2334
+ return type === 'uint16' ? item.buffer.subarray(0, size) : item.buffer;
2335
+ }
2336
+ // If no suitable buffer is found, allocate a new one
2337
+ return this.allocate(type, size);
2338
+ }
2339
+ /**
2340
+ * Acquires multiple buffers of the specified type and sizes from the pool.
2341
+ *
2342
+ * @param {PoolType} type - The type of buffers to acquire
2343
+ * @param {number[]} sizes - An array of sizes for each buffer to acquire
2344
+ * @return {T[]} - An array of acquired buffers of the specified type
2345
+ */
2346
+ static acquireMany(type, sizes) {
2347
+ return sizes.map(size => this.acquire(type, size));
2348
+ }
2349
+ /**
2350
+ * Releases a buffer back to the pool.
2351
+ * If the size of the buffer is larger than the maximum item size, it will not be released.
2352
+ *
2353
+ * @param {PoolType} type - The type of buffer to release
2354
+ * @param {T} buffer - The buffer to release
2355
+ * @param {number} size - The size of the buffer
2356
+ */
2357
+ static release(type, buffer, size) {
2358
+ // Get the configuration for the specified type
2359
+ const CONFIG = this.CONFIG[type];
2360
+ // If the size of the buffer is less than or equal to the maximum item size, release it
2361
+ if (size <= CONFIG.maxItemSize) {
2362
+ // Release the buffer back to the pool ring
2363
+ this.POOLS[type].release({ buffer, size });
2364
+ }
2365
+ }
2366
+ }
2367
+
2368
+ /**
2369
+ * Cosine Similarity
2370
+ * src/metric/Cosine.ts
2371
+ *
2372
+ * @see https://en.wikipedia.org/wiki/Cosine_similarity
2373
+ *
2374
+ * Cosine similarity is a metric used to measure how similar two vectors are, regardless
2375
+ * of their magnitude. In text analysis, it is commonly used to compare documents or
2376
+ * strings by representing them as term frequency vectors and computing the cosine of
2377
+ * the angle between these vectors.
2378
+ *
2379
+ * The result is a value between 0 and 1, where 1 means the vectors are identical and
2380
+ * 0 means they are orthogonal (no similarity).
2381
+ *
2382
+ * @module Metric/CosineSimilarity
2383
+ * @author Paul Köhler (komed3)
2384
+ * @license MIT
2385
+ */
2386
+ /**
2387
+ * CosineSimilarity class extends the Metric class to implement the Cosine similarity algorithm.
2388
+ */
2389
+ class CosineSimilarity extends Metric {
2390
+ /**
2391
+ * Constructor for the CosineSimilarity class.
2392
+ *
2393
+ * Initializes the Cosine similarity metric with two input strings or
2394
+ * arrays of strings and optional options.
2395
+ *
2396
+ * @param {MetricInput} a - First input string or array of strings
2397
+ * @param {MetricInput} b - Second input string or array of strings
2398
+ * @param {MetricOptions} [opt] - Options for the metric computation
2399
+ */
2400
+ constructor(a, b, opt = {}) {
2401
+ // Call the parent Metric constructor with the metric name and inputs
2402
+ // Metric is symmetrical
2403
+ super('cosine', a, b, opt, true);
2404
+ }
2405
+ /**
2406
+ * Calculates the term frequency vector for a given string.
2407
+ *
2408
+ * @param {string} str - The input string
2409
+ * @param {string} delimiter - The delimiter to split terms
2410
+ * @return {Map<string, number>} - Term frequency object
2411
+ */
2412
+ _termFreq(str, delimiter) {
2413
+ const terms = str.split(delimiter);
2414
+ const freq = Pool.acquire('map', terms.length);
2415
+ for (const term of terms)
2416
+ freq.set(term, (freq.get(term) || 0) + 1);
2417
+ return freq;
2418
+ }
2419
+ /**
2420
+ * Calculates the Cosine similarity between two strings.
2421
+ *
2422
+ * @param {string} a - First string
2423
+ * @param {string} b - Second string
2424
+ * @return {MetricCompute<CosineRaw>} - Object containing the similarity result and raw values
2425
+ */
2426
+ compute(a, b) {
2427
+ // Get delimiter from options or use default (space)
2428
+ const { delimiter = ' ' } = this.options;
2429
+ // Compute term frequency vectors
2430
+ const termsA = this._termFreq(a, delimiter);
2431
+ const termsB = this._termFreq(b, delimiter);
2432
+ // Calculate dot product and magnitudes
2433
+ let dotProduct = 0, magnitudeA = 0, magnitudeB = 0;
2434
+ // Iterate over terms in A for dotProduct and magnitudeA
2435
+ for (const [term, freqA] of termsA) {
2436
+ const freqB = termsB.get(term) || 0;
2437
+ dotProduct += freqA * freqB;
2438
+ magnitudeA += freqA * freqA;
2439
+ }
2440
+ // Iterate over terms in B for magnitudeB
2441
+ for (const freqB of termsB.values())
2442
+ magnitudeB += freqB * freqB;
2443
+ magnitudeA = Math.sqrt(magnitudeA);
2444
+ magnitudeB = Math.sqrt(magnitudeB);
2445
+ // Release maps back to the pool
2446
+ Pool.release('map', termsA, termsA.size);
2447
+ Pool.release('map', termsB, termsB.size);
2448
+ // Return the result as a MetricCompute object
2449
+ return {
2450
+ res: (magnitudeA && magnitudeB) ? Metric.clamp(dotProduct / (magnitudeA * magnitudeB)) : 0,
2451
+ raw: { dotProduct, magnitudeA, magnitudeB }
2452
+ };
2453
+ }
2454
+ }
2455
+ // Register the Cosine similarity in the metric registry
2456
+ MetricRegistry.add('cosine', CosineSimilarity);
2457
+
2458
+ /**
2459
+ * Damerau-Levenshtein Distance
2460
+ * src/metric/DamerauLevenshtein.ts
2461
+ *
2462
+ * @see https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
2463
+ *
2464
+ * The Damerau-Levenshtein distance extends the classical Levenshtein algorithm by
2465
+ * including transpositions (swapping of two adjacent characters) as a single edit
2466
+ * operation, in addition to insertions, deletions, and substitutions.
2467
+ *
2468
+ * This metric is particularly useful for detecting and correcting common
2469
+ * typographical errors.
2470
+ *
2471
+ * @module Metric/DamerauLevenshtein
2472
+ * @author Paul Köhler (komed3)
2473
+ * @license MIT
2474
+ */
2475
+ /**
2476
+ * DamerauLevenshteinDistance class extends the Metric class to implement the Damerau-Levenshtein algorithm.
2477
+ */
2478
+ class DamerauLevenshteinDistance extends Metric {
2479
+ /**
2480
+ * Constructor for the DamerauLevenshteinDistance class.
2481
+ *
2482
+ * Initializes the Damerau-Levenshtein metric with two input strings or
2483
+ * arrays of strings and optional options.
2484
+ *
2485
+ * @param {MetricInput} a - First input string or array of strings
2486
+ * @param {MetricInput} b - Second input string or array of strings
2487
+ * @param {MetricOptions} [opt] - Options for the metric computation
2488
+ */
2489
+ constructor(a, b, opt = {}) {
2490
+ // Call the parent Metric constructor with the metric name and inputs
2491
+ // Metric is symmetrical
2492
+ super('damerau', a, b, opt, true);
2493
+ }
2494
+ /**
2495
+ * Calculates the normalized Damerau-Levenshtein distance between two strings.
2496
+ *
2497
+ * @param {string} a - First string (always the shorter string for memory efficiency)
2498
+ * @param {string} b - Second string
2499
+ * @param {number} m - Length of the first string (a)
2500
+ * @param {number} n - Length of the second string (b)
2501
+ * @param {number} maxLen - Maximum length of the strings
2502
+ * @return {MetricCompute<DamerauRaw>} - Object containing the similarity result and raw distance
2503
+ */
2504
+ compute(a, b, m, n, maxLen) {
2505
+ // Get three reusable arrays from the Pool for the DP rows
2506
+ const len = m + 1;
2507
+ const [test, prev, curr] = Pool.acquireMany('uint16', [len, len, len]);
2508
+ // Initialize the first row (edit distances from empty string to a)
2509
+ for (let i = 0; i <= m; i++)
2510
+ prev[i] = i;
2511
+ // Fill the DP matrix row by row (over the longer string)
2512
+ for (let j = 1; j <= n; j++) {
2513
+ // Cost of transforming empty string to b[0..j]
2514
+ curr[0] = j;
2515
+ // Get the character code of the current character in b
2516
+ const cb = b.charCodeAt(j - 1);
2517
+ for (let i = 1; i <= m; i++) {
2518
+ // Get the character code of the current character in b
2519
+ const ca = a.charCodeAt(i - 1);
2520
+ // If characters are the same, no cost for substitution
2521
+ const cost = ca === cb ? 0 : 1;
2522
+ // Calculate minimum of deletion, insertion, substitution
2523
+ let val = Math.min(curr[i - 1] + 1, // Insertion
2524
+ prev[i] + 1, // Deletion
2525
+ prev[i - 1] + cost // Substitution
2526
+ );
2527
+ // Check for transposition
2528
+ if (i > 1 && j > 1 &&
2529
+ ca === b.charCodeAt(j - 2) &&
2530
+ cb === a.charCodeAt(i - 2)) {
2531
+ // Transposition
2532
+ val = Math.min(val, test[i - 2] + cost);
2533
+ }
2534
+ // Set the cost for the current cell
2535
+ curr[i] = val;
2536
+ }
2537
+ // Rotate rows: test <= prev, prev <= curr, curr <= test
2538
+ test.set(prev);
2539
+ prev.set(curr);
2540
+ }
2541
+ // The last value in prev is the Damerau-Levenshtein distance
2542
+ const dist = prev[m];
2543
+ // Release arrays back to the pool
2544
+ Pool.release('uint16', test, len);
2545
+ Pool.release('uint16', prev, len);
2546
+ Pool.release('uint16', curr, len);
2547
+ // Normalize by the length of the longer string
2548
+ return {
2549
+ res: maxLen === 0 ? 1 : Metric.clamp(1 - (dist / maxLen)),
2550
+ raw: { dist, maxLen }
2551
+ };
2552
+ }
2553
+ }
2554
+ // Register the Damerau-Levenshtein distance in the metric registry
2555
+ MetricRegistry.add('damerau', DamerauLevenshteinDistance);
2556
+
2557
+ /**
2558
+ * Dice-Sørensen Coefficient
2559
+ * src/metric/DiceSorensen.ts
2560
+ *
2561
+ * @see https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient
2562
+ *
2563
+ * This module implements the Dice-Sørensen coefficient, a statistic used to gauge
2564
+ * the similarity of two samples. It is commonly used in natural language processing
2565
+ * and information retrieval to compare the similarity between two sets of data,
2566
+ * such as text documents. The coefficient is defined as twice the size of the
2567
+ * intersection divided by the sum of the sizes of the two sets.
2568
+ *
2569
+ * The implementation includes methods to compute bigrams from strings and calculate
2570
+ * the coefficient based on these bigrams. It handles edge cases, such as empty
2571
+ * strings and identical strings, to ensure accurate results.
2572
+ *
2573
+ * @module Metric/DiceSorensenCoefficient
2574
+ * @author Paul Köhler (komed3)
2575
+ * @license MIT
2576
+ */
2577
+ /**
2578
+ * DiceSorensenCoefficient class extends the Metric class to implement the Dice-Sørensen coefficient.
2579
+ */
2580
+ class DiceSorensenCoefficient extends Metric {
2581
+ /**
2582
+ * Constructor for the DiceSorensen class.
2583
+ *
2584
+ * Initializes the DiceSorensen metric with two input strings or
2585
+ * arrays of strings and optional options.
2586
+ *
2587
+ * @param {MetricInput} a - First input string or array of strings
2588
+ * @param {MetricInput} b - Second input string or array of strings
2589
+ * @param {MetricOptions} [opt] - Options for the metric computation
2590
+ */
2591
+ constructor(a, b, opt = {}) {
2592
+ // Call the parent Metric constructor with the metric name and inputs
2593
+ // Metric is symmetrical
2594
+ super('dice', a, b, opt, true);
2595
+ }
2596
+ /**
2597
+ * Computes the bigrams of a given string.
2598
+ *
2599
+ * @param {string} str - The input string
2600
+ * @return {Set<string>} - A set of bigrams (two-character sequences) from the string
2601
+ */
2602
+ _bigrams(str) {
2603
+ const len = str.length - 1;
2604
+ const bigrams = Pool.acquire('set', len);
2605
+ // Generate bigrams by iterating through the string
2606
+ for (let i = 0; i < len; i++)
2607
+ bigrams.add(str.substring(i, i + 2));
2608
+ return bigrams;
2609
+ }
2610
+ /**
2611
+ * Calculates the Dice-Sørensen coefficient between two strings.
2612
+ *
2613
+ * @param {string} a - First string
2614
+ * @param {string} b - Second string
2615
+ * @return {MetricCompute<DiceRaw>} - Object containing the similarity result and raw distance
2616
+ */
2617
+ compute(a, b) {
2618
+ // Generate bigrams for both strings
2619
+ const setA = this._bigrams(a);
2620
+ const setB = this._bigrams(b);
2621
+ // Calculate the intersection of bigrams
2622
+ let intersection = 0;
2623
+ for (const bigram of setA)
2624
+ if (setB.has(bigram))
2625
+ intersection++;
2626
+ // Calculate the size of the union of both sets
2627
+ const sizeA = setA.size, sizeB = setB.size;
2628
+ const size = sizeA + sizeB;
2629
+ // Release sets back to the pool
2630
+ Pool.release('set', setA, sizeA);
2631
+ Pool.release('set', setB, sizeB);
2632
+ // Return the result as a MetricCompute object
2633
+ return {
2634
+ res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
2635
+ raw: { intersection, size }
2636
+ };
2637
+ }
2638
+ }
2639
+ // Register the Dice-Sørensen coefficient in the metric registry
2640
+ MetricRegistry.add('dice', DiceSorensenCoefficient);
2641
+
2642
+ /**
2643
+ * Hamming Distance
2644
+ * src/metric/Hamming.ts
2645
+ *
2646
+ * @see https://en.wikipedia.org/wiki/Hamming_distance
2647
+ *
2648
+ * The Hamming distance is a metric for comparing two strings of equal length. It
2649
+ * measures the number of positions at which the corresponding symbols are different.
2650
+ *
2651
+ * This implementation allows for optional padding of the shorter string to equalize
2652
+ * lengths, otherwise it throws an error if the strings are of unequal length.
2653
+ *
2654
+ * @module Metric/HammingDistance
2655
+ * @author Paul Köhler (komed3)
2656
+ * @license MIT
2657
+ */
2658
+ /**
2659
+ * HammingDistance class extends the Metric class to implement the Hamming distance.
2660
+ */
2661
+ class HammingDistance extends Metric {
2662
+ /**
2663
+ * Constructor for the Hamming class.
2664
+ *
2665
+ * Initializes the Hamming distance metric with two input strings or
2666
+ * arrays of strings and optional options.
2667
+ *
2668
+ * @param {MetricInput} a - First input string or array of strings
2669
+ * @param {MetricInput} b - Second input string or array of strings
2670
+ * @param {MetricOptions} opt - Options for the metric computation
2671
+ */
2672
+ constructor(a, b, opt = {}) {
2673
+ // Call the parent Metric constructor with the metric name and inputs
2674
+ // Metric is symmetrical
2675
+ super('hamming', a, b, opt, true);
2676
+ }
2677
+ /**
2678
+ * Calculates the Hamming distance between two strings.
2679
+ *
2680
+ * @param {string} a - First string
2681
+ * @param {string} b - Second string
2682
+ * @param {number} m - Length of the first string
2683
+ * @param {number} n - Length of the second string
2684
+ * @param {number} maxLen - Maximum length of the strings
2685
+ * @return {MetricCompute<HammingRaw>} - Object containing the similarity result and raw distance
2686
+ * @throws {Error} - If strings are of unequal length and padding is not specified
2687
+ */
2688
+ compute(a, b, m, n, maxLen) {
2689
+ // Check for equal string length
2690
+ if (m !== n) {
2691
+ // Optional: use padding to equalize string length
2692
+ if (this.options.pad !== undefined) {
2693
+ if (m < maxLen)
2694
+ a = a.padEnd(maxLen, this.options.pad);
2695
+ if (n < maxLen)
2696
+ b = b.padEnd(maxLen, this.options.pad);
2697
+ m = n = maxLen;
2698
+ }
2699
+ // Standard: Error for unequal length
2700
+ else
2701
+ throw new Error(`strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
2702
+ `use option.pad for automatic adjustment`);
2703
+ }
2704
+ // Calculate the Hamming distance
2705
+ let dist = 0;
2706
+ for (let i = 0; i < a.length; i++)
2707
+ if (a[i] !== b[i])
2708
+ dist++;
2709
+ // Return the result as a MetricCompute object
2710
+ return {
2711
+ res: m === 0 ? 1 : Metric.clamp(1 - dist / m),
2712
+ raw: { dist }
2713
+ };
2714
+ }
2715
+ }
2716
+ // Register the Hamming distance in the metric registry
2717
+ MetricRegistry.add('hamming', HammingDistance);
2718
+
2719
+ /**
2720
+ * Jaccard Index
2721
+ * src/metric/Jaccard.ts
2722
+ *
2723
+ * @see https://en.wikipedia.org/wiki/Jaccard_index
2724
+ *
2725
+ * The Jaccard Index (or Jaccard similarity coefficient) measures the similarity
2726
+ * between two sets by dividing the size of their intersection by the size of
2727
+ * their union. In string similarity, it is often used to compare sets of characters,
2728
+ * tokens, or n-grams. The result is a value between 0 and 1, where 1 means the
2729
+ * sets are identical and 0 means they have no elements in common.
2730
+ *
2731
+ * @module Metric/JaccardIndex
2732
+ * @author Paul Köhler (komed3)
2733
+ * @license MIT
2734
+ */
2735
+ /**
2736
+ * JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm.
2737
+ */
2738
+ class JaccardIndex extends Metric {
2739
+ /**
2740
+ * Constructor for the JaccardIndex class.
2741
+ *
2742
+ * Initializes the Jaccard Index metric with two input strings or
2743
+ * arrays of strings and optional options.
2744
+ *
2745
+ * @param {MetricInput} a - First input string or array of strings
2746
+ * @param {MetricInput} b - Second input string or array of strings
2747
+ * @param {MetricOptions} [opt] - Options for the metric computation
2748
+ */
2749
+ constructor(a, b, opt = {}) {
2750
+ // Call the parent Metric constructor with the metric name and inputs
2751
+ // Metric is symmetrical
2752
+ super('jaccard', a, b, opt, true);
2753
+ }
2754
+ /**
2755
+ * Calculates the Jaccard Index between two strings.
2756
+ *
2757
+ * @param {string} a - First string
2758
+ * @param {string} b - Second string
2759
+ * @param {number} m - Length of the first string
2760
+ * @param {number} n - Length of the second string
2761
+ * @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values
2762
+ */
2763
+ compute(a, b, m, n) {
2764
+ // Acquire two sets from the Pool
2765
+ const [setA, setB] = Pool.acquireMany('set', [m, n]);
2766
+ // Fill setA and setB with unique characters from a and b
2767
+ for (const A of a)
2768
+ setA.add(A);
2769
+ for (const B of b)
2770
+ setB.add(B);
2771
+ // Calculate intersection size
2772
+ let intersection = 0;
2773
+ for (const c of setA)
2774
+ if (setB.has(c))
2775
+ intersection++;
2776
+ // Calculate union size (setA + elements in setB not in setA)
2777
+ const union = setA.size + setB.size - intersection;
2778
+ // Release sets back to the pool
2779
+ Pool.release('set', setA, m);
2780
+ Pool.release('set', setB, n);
2781
+ // Return the result as a MetricCompute object
2782
+ return {
2783
+ res: union === 0 ? 1 : Metric.clamp(intersection / union),
2784
+ raw: { intersection, union }
2785
+ };
2786
+ }
2787
+ }
2788
+ // Register the Jaccard index in the metric registry
2789
+ MetricRegistry.add('jaccard', JaccardIndex);
2790
+
2791
+ /**
2792
+ * Jaro-Winkler Distance
2793
+ * src/metric/JaroWinkler.ts
2794
+ *
2795
+ * @see https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
2796
+ *
2797
+ * The Jaro-Winkler distance is a string similarity metric that gives more weight
2798
+ * to matching characters at the start of the strings. It is especially effective
2799
+ * for short strings and typographical errors, and is widely used in record linkage
2800
+ * and duplicate detection.
2801
+ *
2802
+ * @module Metric/JaroWinkler
2803
+ * @author Paul Köhler (komed3)
2804
+ * @license MIT
2805
+ */
2806
+ /**
2807
+ * JaroWinklerDistance class extends the Metric class to implement the Jaro-Winkler algorithm.
2808
+ */
2809
+ class JaroWinklerDistance extends Metric {
2810
+ /**
2811
+ * Constructor for the JaroWinklerDistance class.
2812
+ *
2813
+ * Initializes the Jaro-Winkler metric with two input strings or
2814
+ * arrays of strings and optional options.
2815
+ *
2816
+ * @param {MetricInput} a - First input string or array of strings
2817
+ * @param {MetricInput} b - Second input string or array of strings
2818
+ * @param {MetricOptions} [opt] - Options for the metric computation
2819
+ */
2820
+ constructor(a, b, opt = {}) {
2821
+ // Call the parent Metric constructor with the metric name and inputs
2822
+ // Metric is symmetrical
2823
+ super('jaro-winkler', a, b, opt, true);
2824
+ }
2825
+ /**
2826
+ * Calculates the Jaro-Winkler similarity between two strings.
2827
+ *
2828
+ * @param {string} a - First string
2829
+ * @param {string} b - Second string
2830
+ * @param {number} m - Length of the first string
2831
+ * @param {number} n - Length of the second string
2832
+ * @return {MetricCompute<JaroWinklerRaw>} - Object containing the similarity result and raw values
2833
+ */
2834
+ compute(a, b, m, n) {
2835
+ // Find matches
2836
+ const matchWindow = Math.max(0, Math.floor(n / 2) - 1);
2837
+ // Use Pool for boolean arrays
2838
+ const matchA = Pool.acquire('uint16', m);
2839
+ const matchB = Pool.acquire('uint16', n);
2840
+ // Initialize match arrays
2841
+ for (let i = 0; i < m; i++)
2842
+ matchA[i] = 0;
2843
+ for (let i = 0; i < n; i++)
2844
+ matchB[i] = 0;
2845
+ // Find matches within the match window
2846
+ let matches = 0;
2847
+ for (let i = 0; i < m; i++) {
2848
+ const start = Math.max(0, i - matchWindow);
2849
+ const end = Math.min(i + matchWindow + 1, n);
2850
+ for (let j = start; j < end; j++) {
2851
+ if (!matchB[j] && a[i] === b[j]) {
2852
+ matchA[i] = 1;
2853
+ matchB[j] = 1;
2854
+ matches++;
2855
+ break;
2856
+ }
2857
+ }
2858
+ }
2859
+ // Set initial values for transpositions, jaro distance, prefix and result
2860
+ let transpos = 0, jaro = 0, prefix = 0, res = 0;
2861
+ // If matches are found, proceed with further calculations
2862
+ if (matches > 0) {
2863
+ // Count transpositions
2864
+ let k = 0;
2865
+ for (let i = 0; i < m; i++) {
2866
+ if (matchA[i]) {
2867
+ while (!matchB[k])
2868
+ k++;
2869
+ if (a[i] !== b[k])
2870
+ transpos++;
2871
+ k++;
2872
+ }
2873
+ }
2874
+ transpos /= 2;
2875
+ // Calculate Jaro similarity
2876
+ jaro = ((matches / m) + (matches / n) +
2877
+ (matches - transpos) / matches) / 3;
2878
+ // Calculate common prefix length (max 4)
2879
+ for (let i = 0; i < Math.min(4, m, n); i++) {
2880
+ if (a[i] === b[i])
2881
+ prefix++;
2882
+ else
2883
+ break;
2884
+ }
2885
+ // Step 5: Calculate Jaro-Winkler similarity
2886
+ res = jaro + prefix * 0.1 * (1 - jaro);
2887
+ }
2888
+ // Release arrays back to the pool
2889
+ Pool.release('uint16', matchA, m);
2890
+ Pool.release('uint16', matchB, n);
2891
+ // Return the result as a MetricCompute object
2892
+ return {
2893
+ res: Metric.clamp(res),
2894
+ raw: { matchWindow, matches, transpos, jaro, prefix }
2895
+ };
2896
+ }
2897
+ }
2898
+ // Register the Jaro-Winkler distance in the metric registry
2899
+ MetricRegistry.add('jaroWinkler', JaroWinklerDistance);
2900
+
2901
+ /**
2902
+ * Longest Common Subsequence (LCS)
2903
+ * src/metric/LCS.ts
2904
+ *
2905
+ * @see https://en.wikipedia.org/wiki/Longest_common_subsequence
2906
+ *
2907
+ * The Longest Common Subsequence (LCS) metric measures the length of the longest
2908
+ * subsequence common to both strings. Unlike substrings, the characters of a
2909
+ * subsequence do not need to be contiguous, but must appear in the same order.
2910
+ *
2911
+ * The LCS is widely used in diff tools, bioinformatics, and approximate string
2912
+ * matching.
2913
+ *
2914
+ * @module Metric/LCS
2915
+ * @author Paul Köhler (komed3)
2916
+ * @license MIT
2917
+ */
2918
+ /**
2919
+ * LCSMetric class extends the Metric class to implement the Longest Common Subsequence algorithm.
2920
+ */
2921
+ class LCSMetric extends Metric {
2922
+ /**
2923
+ * Constructor for the LCSMetric class.
2924
+ *
2925
+ * Initializes the LCS metric with two input strings or
2926
+ * arrays of strings and optional options.
2927
+ *
2928
+ * @param {MetricInput} a - First input string or array of strings
2929
+ * @param {MetricInput} b - Second input string or array of strings
2930
+ * @param {MetricOptions} [opt] - Options for the metric computation
2931
+ */
2932
+ constructor(a, b, opt = {}) {
2933
+ // Call the parent Metric constructor with the metric name and inputs
2934
+ // Metric is symmetrical
2935
+ super('lcs', a, b, opt, true);
2936
+ }
2937
+ /**
2938
+ * Calculates the normalized LCS similarity between two strings.
2939
+ *
2940
+ * @param {string} a - First string
2941
+ * @param {string} b - Second string
2942
+ * @param {number} m - Length of the first string
2943
+ * @param {number} n - Length of the second string
2944
+ * @param {number} maxLen - Maximum length of the strings
2945
+ * @return {MetricCompute<LCSRaw>} - Object containing the similarity result and raw LCS length
2946
+ */
2947
+ compute(a, b, m, n, maxLen) {
2948
+ // Get two reusable arrays from the Pool for the DP rows
2949
+ const len = m + 1;
2950
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
2951
+ // Initialize the first row to zeros
2952
+ for (let i = 0; i <= m; i++)
2953
+ prev[i] = 0;
2954
+ // Fill the DP matrix row by row (over the longer string)
2955
+ for (let j = 1; j <= n; j++) {
2956
+ curr[0] = 0;
2957
+ // Get the character code of the current character in b
2958
+ const cb = b.charCodeAt(j - 1);
2959
+ for (let i = 1; i <= m; i++) {
2960
+ // If characters match, increment the LCS length
2961
+ if (a.charCodeAt(i - 1) === cb)
2962
+ curr[i] = prev[i - 1] + 1;
2963
+ // Otherwise, take the maximum of the left or above cell
2964
+ else
2965
+ curr[i] = Math.max(prev[i], curr[i - 1]);
2966
+ }
2967
+ // Copy current row to previous for next iteration
2968
+ prev.set(curr);
2969
+ }
2970
+ // The last value in prev is the LCS length
2971
+ const lcs = prev[m];
2972
+ // Release arrays back to the pool
2973
+ Pool.release('uint16', prev, len);
2974
+ Pool.release('uint16', curr, len);
2975
+ // Normalize by the length of the longer string
2976
+ return {
2977
+ res: maxLen === 0 ? 1 : Metric.clamp(lcs / maxLen),
2978
+ raw: { lcs, maxLen }
2979
+ };
2980
+ }
2981
+ }
2982
+ // Register the Longest Common Subsequence (LCS) in the metric registry
2983
+ MetricRegistry.add('lcs', LCSMetric);
2984
+
2985
+ /**
2986
+ * Levenshtein Distance
2987
+ * src/metric/Levenshtein.ts
2988
+ *
2989
+ * @see https://en.wikipedia.org/wiki/Levenshtein_distance
2990
+ *
2991
+ * The Levenshtein distance is a classic metric for measuring the minimum number
2992
+ * of single-character edits (insertions, deletions, or substitutions) required
2993
+ * to change one string into another.
2994
+ *
2995
+ * It is widely used in approximate string matching, spell checking, and natural
2996
+ * language processing.
2997
+ *
2998
+ * @module Metric/LevenshteinDistance
2999
+ * @author Paul Köhler (komed3)
3000
+ * @license MIT
3001
+ */
3002
+ /**
3003
+ * LevenshteinDistance class extends the Metric class to implement the Levenshtein distance algorithm.
3004
+ */
3005
+ class LevenshteinDistance extends Metric {
3006
+ /**
3007
+ * Constructor for the Levenshtein class.
3008
+ *
3009
+ * Initializes the Levenshtein metric with two input strings
3010
+ * or arrays of strings and optional options.
3011
+ *
3012
+ * @param {MetricInput} a - First input string or array of strings
3013
+ * @param {MetricInput} b - Second input string or array of strings
3014
+ * @param {MetricOptions} [opt] - Options for the metric computation
3015
+ */
3016
+ constructor(a, b, opt = {}) {
3017
+ // Call the parent Metric constructor with the metric name and inputs
3018
+ // Metric is symmetrical
3019
+ super('levenshtein', a, b, opt, true);
3020
+ }
3021
+ /**
3022
+ * Calculates the Levenshtein distance between two strings.
3023
+ *
3024
+ * @param {string} a - First string
3025
+ * @param {string} b - Second string
3026
+ * @param {number} m - Length of the first string
3027
+ * @param {number} n - Length of the second string
3028
+ * @param {number} maxLen - Maximum length of the strings
3029
+ * @return {MetricCompute<LevenshteinRaw>} - Object containing the similarity result and raw distance
3030
+ */
3031
+ compute(a, b, m, n, maxLen) {
3032
+ // Get two reusable arrays from the Pool for the DP rows
3033
+ const len = m + 1;
3034
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
3035
+ // Initialize the first row (edit distances from empty string to a)
3036
+ for (let i = 0; i <= m; i++)
3037
+ prev[i] = i;
3038
+ // Fill the DP matrix row by row (over the longer string)
3039
+ for (let j = 1; j <= n; j++) {
3040
+ // Cost of transforming empty string to b[0..j]
3041
+ curr[0] = j;
3042
+ // Get the character code of the current character in b
3043
+ const cb = b.charCodeAt(j - 1);
3044
+ for (let i = 1; i <= m; i++) {
3045
+ // Cost is 0 if characters match, 1 otherwise
3046
+ const cost = a.charCodeAt(i - 1) === cb ? 0 : 1;
3047
+ // Calculate the minimum edit distance for current cell
3048
+ curr[i] = Math.min(curr[i - 1] + 1, // Insertion
3049
+ prev[i] + 1, // Deletion
3050
+ prev[i - 1] + cost // Substitution
3051
+ );
3052
+ }
3053
+ // Copy current row to previous for next iteration
3054
+ prev.set(curr);
3055
+ }
3056
+ // The last value in prev is the Levenshtein distance
3057
+ const dist = prev[m];
3058
+ // Release arrays back to the pool
3059
+ Pool.release('uint16', prev, len);
3060
+ Pool.release('uint16', curr, len);
3061
+ // Return the result as a MetricCompute object
3062
+ return {
3063
+ res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
3064
+ raw: { dist, maxLen }
3065
+ };
3066
+ }
3067
+ }
3068
+ // Register the Levenshtein distance in the metric registry
3069
+ MetricRegistry.add('levenshtein', LevenshteinDistance);
3070
+
3071
+ /**
3072
+ * Needleman-Wunsch Algorithm
3073
+ * src/metric/NeedlemanWunsch.ts
3074
+ *
3075
+ * @see https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
3076
+ *
3077
+ * The Needleman-Wunsch algorithm performs global alignment, aligning two strings
3078
+ * entirely, including gaps. It is commonly used in bioinformatics for sequence
3079
+ * alignment.
3080
+ *
3081
+ * @module Metric/NeedlemanWunsch
3082
+ * @author Paul Köhler (komed3)
3083
+ * @license MIT
3084
+ */
3085
+ /**
3086
+ * NeedlemanWunschDistance class extends the Metric class to implement the Needleman-Wunsch algorithm.
3087
+ */
3088
+ class NeedlemanWunschDistance extends Metric {
3089
+ /**
3090
+ * Constructor for the NeedlemanWunsch class.
3091
+ *
3092
+ * Initializes the Needleman-Wunsch metric with two input strings or
3093
+ * arrays of strings and optional options.
3094
+ *
3095
+ * @param {MetricInput} a - First input string or array of strings
3096
+ * @param {MetricInput} b - Second input string or array of strings
3097
+ * @param {MetricOptions} [opt] - Options for the metric computation
3098
+ */
3099
+ constructor(a, b, opt = {}) {
3100
+ // Call the parent Metric constructor with the metric name and inputs
3101
+ // Metric is symmetrical
3102
+ super('needlemanWunsch', a, b, opt, true);
3103
+ }
3104
+ /**
3105
+ * Calculates the Needleman-Wunsch global alignment score between two strings.
3106
+ *
3107
+ * @param {string} a - First string
3108
+ * @param {string} b - Second string
3109
+ * @param {number} m - Length of the first string
3110
+ * @param {number} n - Length of the second string
3111
+ * @param {number} maxLen - Maximum length of the strings
3112
+ * @return {MetricCompute<NeedlemanRaw>} - Object containing the similarity result and raw score
3113
+ */
3114
+ compute(a, b, m, n, maxLen) {
3115
+ // Scoring parameters (can be customized via options if needed)
3116
+ const { match = 1, mismatch = -1, gap = -1 } = this.options;
3117
+ // Get two reusable arrays from the Pool for the DP rows
3118
+ const len = m + 1;
3119
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
3120
+ // Initialize the first row (gap penalties)
3121
+ prev[0] = 0;
3122
+ for (let i = 1; i <= m; i++)
3123
+ prev[i] = prev[i - 1] + gap;
3124
+ // Fill the DP matrix row by row (over the longer string)
3125
+ for (let j = 1; j <= n; j++) {
3126
+ curr[0] = prev[0] + gap;
3127
+ // Get the character code of the current character in b
3128
+ const cb = b.charCodeAt(j - 1);
3129
+ for (let i = 1; i <= m; i++) {
3130
+ // Score for match / mismatch
3131
+ const score = a.charCodeAt(i - 1) === cb ? match : mismatch;
3132
+ // Calculate the maximum score for current cell
3133
+ curr[i] = Math.max(prev[i - 1] + score, // Diagonal (match/mismatch)
3134
+ prev[i] + gap, // Up (gap)
3135
+ curr[i - 1] + gap // Left (gap)
3136
+ );
3137
+ }
3138
+ // Copy current row to previous for next iteration
3139
+ prev.set(curr);
3140
+ }
3141
+ // The last value in prev is the Needleman-Wunsch score
3142
+ const score = prev[m];
3143
+ // Release arrays back to the pool
3144
+ Pool.release('uint16', prev, len);
3145
+ Pool.release('uint16', curr, len);
3146
+ // Use the maximum possible score for the longer string (global alignment)
3147
+ const denum = maxLen * match;
3148
+ // Return the result as a MetricCompute object
3149
+ return {
3150
+ res: denum === 0 ? 0 : Metric.clamp(score / denum),
3151
+ raw: { score, denum }
3152
+ };
3153
+ }
3154
+ }
3155
+ // Register the Needleman-Wunsch algorithm in the metric registry
3156
+ MetricRegistry.add('needlemanWunsch', NeedlemanWunschDistance);
3157
+
3158
+ /**
3159
+ * q-Gram Similarity
3160
+ * src/metric/QGram.ts
3161
+ *
3162
+ * @see https://en.wikipedia.org/wiki/Q-gram
3163
+ *
3164
+ * Q-gram similarity is a string-matching algorithm that compares two strings by
3165
+ * breaking them into substrings (q-grams) of length Q. The similarity is computed
3166
+ * as the size of the intersection of q-gram sets divided by the size of the larger
3167
+ * set.
3168
+ *
3169
+ * This metric is widely used in approximate string matching, information retrieval,
3170
+ * and computational linguistics.
3171
+ *
3172
+ * @module Metric/QGramSimilarity
3173
+ * @author Paul Köhler (komed3)
3174
+ * @license MIT
3175
+ */
3176
+ /**
3177
+ * QGramSimilarity class extends the Metric class to implement the q-Gram similarity algorithm.
3178
+ */
3179
+ class QGramSimilarity extends Metric {
3180
+ /**
3181
+ * Constructor for the QGramSimilarity class.
3182
+ *
3183
+ * Initializes the q-Gram similarity metric with two input strings or
3184
+ * arrays of strings and optional options.
3185
+ *
3186
+ * @param {MetricInput} a - First input string or array of strings
3187
+ * @param {MetricInput} b - Second input string or array of strings
3188
+ * @param {MetricOptions} [opt] - Options for the metric computation
3189
+ */
3190
+ constructor(a, b, opt = {}) {
3191
+ // Call the parent Metric constructor with the metric name and inputs
3192
+ // Metric is symmetrical
3193
+ super('qgram', a, b, opt, true);
3194
+ }
3195
+ /**
3196
+ * Converts a string into a set of q-grams (substrings of length q).
3197
+ *
3198
+ * @param {string} str - The input string
3199
+ * @param {number} q - The length of each q-gram
3200
+ * @return {Set<string>} - Set of q-grams
3201
+ */
3202
+ _qGrams(str, q) {
3203
+ const len = Math.max(0, str.length - q + 1);
3204
+ const grams = Pool.acquire('set', len);
3205
+ for (let i = 0; i < len; i++)
3206
+ grams.add(str.slice(i, i + q));
3207
+ return grams;
3208
+ }
3209
+ /**
3210
+ * Calculates the q-Gram similarity between two strings.
3211
+ *
3212
+ * @param {string} a - First string
3213
+ * @param {string} b - Second string
3214
+ * @return {MetricCompute<QGramRaw>} - Object containing the similarity result and raw values
3215
+ */
3216
+ compute(a, b) {
3217
+ // Get q from options or use default "2"
3218
+ const { q = 2 } = this.options;
3219
+ // Generate q-gram sets for both strings
3220
+ const setA = this._qGrams(a, q);
3221
+ const setB = this._qGrams(b, q);
3222
+ // Calculate intersection size
3223
+ let intersection = 0;
3224
+ for (const gram of setA)
3225
+ if (setB.has(gram))
3226
+ intersection++;
3227
+ // Calculate the size of the larger set
3228
+ const sizeA = setA.size, sizeB = setB.size;
3229
+ const size = Math.max(sizeA, sizeB);
3230
+ // Release sets back to the pool
3231
+ Pool.release('set', setA, sizeA);
3232
+ Pool.release('set', setB, sizeB);
3233
+ // Return the result as a MetricCompute object
3234
+ return {
3235
+ res: size === 0 ? 1 : Metric.clamp(intersection / size),
3236
+ raw: { intersection, size }
3237
+ };
3238
+ }
3239
+ }
3240
+ // Register the q-Gram similariry in the metric registry
3241
+ MetricRegistry.add('qGram', QGramSimilarity);
3242
+
3243
+ /**
3244
+ * Smith-Waterman Algorithm
3245
+ * src/metric/SmithWaterman.ts
3246
+ *
3247
+ * @see https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
3248
+ *
3249
+ * The Smith-Waterman algorithm performs local alignment, finding the best matching
3250
+ * subsequence between two strings. It is commonly used in bioinformatics for local
3251
+ * sequence alignment. Instead of looking at the entire sequence, the Smith–Waterman
3252
+ * algorithm compares segments of all possible lengths and optimizes the similarity
3253
+ * measure.
3254
+ *
3255
+ * @module Metric/SmithWatermanDistance
3256
+ * @author Paul Köhler (komed3)
3257
+ * @license MIT
3258
+ */
3259
+ /**
3260
+ * SmithWatermanDistance class extends the Metric class to implement the Smith-Waterman algorithm.
3261
+ */
3262
+ class SmithWatermanDistance extends Metric {
3263
+ /**
3264
+ * Constructor for the SmithWaterman class.
3265
+ *
3266
+ * Initializes the Smith-Waterman metric with two input strings or
3267
+ * arrays of strings and optional options.
3268
+ *
3269
+ * @param {MetricInput} a - First input string or array of strings
3270
+ * @param {MetricInput} b - Second input string or array of strings
3271
+ * @param {MetricOptions} [opt] - Options for the metric computation
3272
+ */
3273
+ constructor(a, b, opt = {}) {
3274
+ // Call the parent Metric constructor with the metric name and inputs
3275
+ // Metric is symmetrical
3276
+ super('smithWaterman', a, b, opt, true);
3277
+ }
3278
+ /**
3279
+ * Calculates the Smith-Waterman local alignment score between two strings.
3280
+ *
3281
+ * @param {string} a - First string
3282
+ * @param {string} b - Second string
3283
+ * @param {number} m - Length of the first string
3284
+ * @param {number} n - Length of the second string
3285
+ * @return {MetricCompute<SmithWatermanRaw>} - Object containing the similarity result and raw score
3286
+ */
3287
+ compute(a, b, m, n) {
3288
+ // Scoring parameters (can be customized via options if needed)
3289
+ const { match = 2, mismatch = -1, gap = -2 } = this.options;
3290
+ // Get two reusable arrays from the Pool for the DP rows
3291
+ const len = m + 1;
3292
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
3293
+ // Initialize the first row to zeros (Smith-Waterman local alignment)
3294
+ for (let i = 0; i <= m; i++)
3295
+ prev[i] = 0;
3296
+ let maxScore = 0;
3297
+ // Fill the DP matrix row by row (over the longer string)
3298
+ for (let j = 1; j <= n; j++) {
3299
+ // First column always zero
3300
+ curr[0] = 0;
3301
+ // Get the character code of the current character in b
3302
+ const cb = b.charCodeAt(j - 1);
3303
+ for (let i = 1; i <= m; i++) {
3304
+ // Score for match / mismatch
3305
+ const score = a.charCodeAt(i - 1) === cb ? match : mismatch;
3306
+ // Calculate the maximum score for current cell
3307
+ curr[i] = Math.max(0, prev[i - 1] + score, // Diagonal (match/mismatch)
3308
+ prev[i] + gap, // Up (gap)
3309
+ curr[i - 1] + gap // Left (gap)
3310
+ );
3311
+ // Track the maximum score in the matrix
3312
+ if (curr[i] > maxScore)
3313
+ maxScore = curr[i];
3314
+ }
3315
+ // Copy current row to previous for next iteration
3316
+ prev.set(curr);
3317
+ }
3318
+ // Release arrays back to the pool
3319
+ Pool.release('uint16', prev, len);
3320
+ Pool.release('uint16', curr, len);
3321
+ // Use the maximum possible score for the shorter string (local alignment)
3322
+ const denum = Math.min(m * match, n * match);
3323
+ // Return the result as a MetricCompute object
3324
+ return {
3325
+ res: denum === 0 ? 0 : Metric.clamp(maxScore / denum),
3326
+ raw: { score: maxScore, denum }
3327
+ };
3328
+ }
3329
+ }
3330
+ // Register the Smith-Waterman algorithm in the metric registry
3331
+ MetricRegistry.add('smithWaterman', SmithWatermanDistance);
3332
+
3333
+ /**
3334
+ * Abstract Phonetic
3335
+ * src/phonetic/Phonetic.ts
3336
+ *
3337
+ * @see https://en.wikipedia.org/wiki/Phonetic_algorithm
3338
+ *
3339
+ * A phonetic algorithm refers to a method for indexing words according to their
3340
+ * pronunciation. When the algorithm relies on orthography, it is significantly
3341
+ * influenced by the spelling conventions of the language for which it is intended:
3342
+ * since the majority of phonetic algorithms were created for English, they tend
3343
+ * to be less effective for indexing words in other languages.
3344
+ *
3345
+ * Phonetic search has numerous applications, and one of the initial use cases has
3346
+ * been in trademark searches to verify that newly registered trademarks do not
3347
+ * pose a risk of infringing upon existing trademarks due to their pronunciation.
3348
+ *
3349
+ * This module provides an abstract class for generating phonetic indices based
3350
+ * on mappings and rules. It allows for the implementation of various phonetic
3351
+ * algorithms by extending the abstract class.
3352
+ *
3353
+ * @module Phonetic
3354
+ * @author Paul Köhler (komed3)
3355
+ * @license MIT
3356
+ */
3357
+ // Get the singleton profiler instance for performance monitoring
3358
+ const profiler$1 = Profiler.getInstance();
3359
+ /**
3360
+ * Abstract class representing a phonetic algorithm.
3361
+ *
3362
+ * The protected methods `applyRules`, `encode`, `mapChar`, `equalLen`, `word2Chars`,
3363
+ * `exitEarly`, `adjustCode`, `loop` and `loopAsync` can be overridden in subclasses
3364
+ * to implement specific phonetic algorithms.
3365
+ *
3366
+ * @abstract
3367
+ */
3368
+ class Phonetic {
3369
+ // Cache for indexed words to avoid redundant calculations
3370
+ static cache = new HashTable();
3371
+ /**
3372
+ * Default phonetic options.
3373
+ *
3374
+ * This object contains default settings for phonetic algorithms,
3375
+ * implemented in the subclass.
3376
+ */
3377
+ static default;
3378
+ // Phonetic algorithm name for identification
3379
+ algo;
3380
+ // Phonetic map and options for the algorithm
3381
+ options;
3382
+ map;
3383
+ /**
3384
+ * Static method to clear the cache of indexed words.
3385
+ */
3386
+ static clear() { this.cache.clear(); }
3387
+ /**
3388
+ * Constructor for the Phonetic class.
3389
+ *
3390
+ * Initializes the phonetic algorithm with the specified options and mapping.
3391
+ *
3392
+ * @param {string} algo - The name of the algorithm (e.g. 'soundex')
3393
+ * @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
3394
+ * @throws {Error} - If the requested mapping is not declared
3395
+ */
3396
+ constructor(algo, opt = {}) {
3397
+ // Set the options by merging the default options with the provided ones
3398
+ this.options = merge(this.constructor.default ?? {}, opt);
3399
+ // Get the mapping based on the provided options
3400
+ const map = PhoneticMappingRegistry.get(algo, this.options.map);
3401
+ // If the mapping is not defined, throw an error
3402
+ if (map === undefined)
3403
+ throw new Error(`requested mapping <${this.options.map}> is not declared`);
3404
+ this.algo = algo;
3405
+ this.map = map;
3406
+ }
3407
+ /**
3408
+ * Applies phonetic rules to a character in a word context.
3409
+ *
3410
+ * This method is designed to be generic and efficient for all phonetic algorithms.
3411
+ * It checks all rule types (prev, next, prevNot, nextNot, position, etc.) and
3412
+ * returns either the appropriate code (string) or undefined.
3413
+ *
3414
+ * @param {string} char - The current character
3415
+ * @param {number} i - The current position within the word
3416
+ * @param {string[]} chars - The word as an array of characters
3417
+ * @param {number} charLen - The total length of the word
3418
+ * @returns {string|undefined} - The rule code or undefined if no rule applies
3419
+ */
3420
+ applyRules(char, i, chars, charLen) {
3421
+ const { ruleset = [] } = this.map;
3422
+ // If no rules are provided, return undefined
3423
+ if (!ruleset || !ruleset.length)
3424
+ return undefined;
3425
+ // Get the surrounding characters
3426
+ const prev = chars[i - 1] || '', prev2 = chars[i - 2] || '';
3427
+ const next = chars[i + 1] || '', next2 = chars[i + 2] || '';
3428
+ // Iterate over the rules to find a matching rule for the current character
3429
+ for (const rule of ruleset) {
3430
+ // Skip if the rule does not match the current character
3431
+ if (rule.char && rule.char !== char)
3432
+ continue;
3433
+ // Position in the word (start, middle, end)
3434
+ if (rule.position === 'start' && i !== 0)
3435
+ continue;
3436
+ if (rule.position === 'middle' && i > 0 && i < charLen)
3437
+ continue;
3438
+ if (rule.position === 'end' && i !== charLen)
3439
+ continue;
3440
+ // Previous character(s)
3441
+ if (rule.prev && !rule.prev.includes(prev))
3442
+ continue;
3443
+ if (rule.prevNot && rule.prevNot.includes(prev))
3444
+ continue;
3445
+ if (rule.prev2 && !rule.prev2.includes(prev2))
3446
+ continue;
3447
+ if (rule.prev2Not && rule.prev2Not.includes(prev2))
3448
+ continue;
3449
+ // Following character(s)
3450
+ if (rule.next && !rule.next.includes(next))
3451
+ continue;
3452
+ if (rule.nextNot && rule.nextNot.includes(next))
3453
+ continue;
3454
+ if (rule.next2 && !rule.next2.includes(next2))
3455
+ continue;
3456
+ if (rule.next2Not && rule.next2Not.includes(next2))
3457
+ continue;
3458
+ // Special case: Beginning of a word (e.g. chars.slice(0, n))
3459
+ if (rule.leading && !rule.leading.includes(chars.slice(0, rule.leading.length).join('')))
3460
+ continue;
3461
+ // Special case: end of word (e.g. chars.slice(-n))
3462
+ if (rule.trailing && !rule.trailing.includes(chars.slice(-rule.trailing.length).join('')))
3463
+ continue;
3464
+ // Check multiple characters (e.g. bigram/trigram)
3465
+ if (rule.match && !rule.match.every((c, j) => chars[i + j] === c))
3466
+ continue;
3467
+ // If all conditions met, return the rule code
3468
+ return rule.code;
3469
+ }
3470
+ // If no rule matched, return undefined
3471
+ return undefined;
3472
+ }
3473
+ /**
3474
+ * Generates the phonetic code for a given word.
3475
+ *
3476
+ * This method processes the word character by character, applying phonetic rules
3477
+ * and mappings to generate a phonetic code.
3478
+ *
3479
+ * @param {string} word - The input word to be converted into a phonetic code
3480
+ * @returns {string} - The generated phonetic code
3481
+ */
3482
+ encode(word) {
3483
+ const { map = {}, ignore = [] } = this.map;
3484
+ // Get the characters of the word and its length
3485
+ const chars = this.word2Chars(word);
3486
+ const charLen = chars.length;
3487
+ let code = '', lastCode = null;
3488
+ // Iterate over each character in the word
3489
+ for (let i = 0; i < charLen; i++) {
3490
+ const char = chars[i];
3491
+ // Skip characters that are in the ignore list
3492
+ if (ignore.includes(char))
3493
+ continue;
3494
+ // Convert the character to its phonetic code
3495
+ const mapped = this.mapChar(char, i, chars, charLen, lastCode, map);
3496
+ // If no code is generated, skip to the next character
3497
+ if (mapped === undefined)
3498
+ continue;
3499
+ // Append the generated code to the final code
3500
+ code += mapped, lastCode = mapped;
3501
+ // If the code length exceeds the specified limit, exit early
3502
+ if (this.exitEarly(code, i))
3503
+ break;
3504
+ }
3505
+ // Return the adjusted phonetic code
3506
+ return this.adjustCode(code, chars);
3507
+ }
3508
+ /**
3509
+ * Converts a character to its phonetic code based on the mapping and rules.
3510
+ *
3511
+ * @param {string} char - The current character
3512
+ * @param {number} i - The current position within the word
3513
+ * @param {string[]} chars - The word as an array of characters
3514
+ * @param {number} charLen - The total length of the word
3515
+ * @param {string|null} lastCode - The last code generated (to avoid duplicates)
3516
+ * @param {Record<string, string>} map - The phonetic mapping
3517
+ * @returns {string|undefined} - The phonetic code or undefined if no code applies
3518
+ */
3519
+ mapChar(char, i, chars, charLen, lastCode, map) {
3520
+ const { dedupe = true } = this.options;
3521
+ // Apply phonetic rules to the character
3522
+ // If no rules apply, use the mapping
3523
+ // If the character is not in the mapping, return undefined
3524
+ const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? undefined;
3525
+ // De-duplicate the code if necessary
3526
+ return dedupe && c === lastCode ? undefined : c;
3527
+ }
3528
+ /**
3529
+ * Ensures the phonetic code has a fixed length by padding or truncating.
3530
+ *
3531
+ * @param {string} input - The input string to be adjusted
3532
+ * @returns {string} - The adjusted string with fixed length
3533
+ */
3534
+ equalLen(input) {
3535
+ const { length = -1, pad = '0' } = this.options;
3536
+ return length === -1 ? input : (input + pad.repeat(length)).slice(0, length);
3537
+ }
3538
+ /**
3539
+ * Converts a word into an array of characters.
3540
+ *
3541
+ * @param {string} word - The input word to be converted
3542
+ * @returns {string[]} - An array of characters from the input word
3543
+ */
3544
+ word2Chars(word) { return word.toLowerCase().split(''); }
3545
+ /**
3546
+ * Determines whether to exit early based on the current phonetic code length.
3547
+ *
3548
+ * @param {string} code - The current phonetic code
3549
+ * @param {number} i - The current index in the word
3550
+ * @returns {boolean} - True if the code length exceeds the specified limit, false otherwise
3551
+ */
3552
+ exitEarly(code, i) {
3553
+ const { length = -1 } = this.options;
3554
+ return length > 0 && code.length >= length;
3555
+ }
3556
+ /**
3557
+ * Adjusts the phonetic code.
3558
+ *
3559
+ * @param {string} code - The phonetic code to be adjusted
3560
+ * @param {string[]} chars - Characters to be removed from the code
3561
+ * @returns {string} - The adjusted phonetic code
3562
+ */
3563
+ adjustCode(code, chars) { return code; }
3564
+ /**
3565
+ * Processes an array of words to generate their phonetic indices.
3566
+ *
3567
+ * This method iterates over each word, generates its phonetic code,
3568
+ * and ensures that the resulting codes are of equal length.
3569
+ *
3570
+ * @param {string[]} words - An array of words to be processed
3571
+ * @returns {string[]} - An array of phonetic indices for the input words
3572
+ */
3573
+ loop(words) {
3574
+ const index = [];
3575
+ // Loop over each word in the input array
3576
+ for (const word of words) {
3577
+ // Generate a cache key based on the algorithm and word
3578
+ const key = Phonetic.cache.key(this.algo, [word]);
3579
+ // If the key exists in the cache, return the cached result
3580
+ // Otherwise, encode the word using the algorithm
3581
+ const code = Phonetic.cache.get(key || '') ?? (() => {
3582
+ // Get the phonetic code for the word
3583
+ const res = this.encode(word);
3584
+ // If a key was generated, store the result in the cache
3585
+ if (key)
3586
+ Phonetic.cache.set(key, res);
3587
+ return res;
3588
+ })();
3589
+ // If a code is generated, add them to the index
3590
+ if (code && code.length)
3591
+ index.push(this.equalLen(code));
3592
+ }
3593
+ return index;
3594
+ }
3595
+ /**
3596
+ * Asynchronously processes an array of words to generate their phonetic indices.
3597
+ *
3598
+ * This method iterates over each word, generates its phonetic code asynchronously,
3599
+ * and ensures that the resulting codes are of equal length.
3600
+ *
3601
+ * @param {string[]} words - An array of words to be processed
3602
+ * @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words
3603
+ */
3604
+ async loopAsync(words) {
3605
+ const index = [];
3606
+ // Loop over each word in the input array
3607
+ for (const word of words) {
3608
+ // Get the phonetic code for the word asynchronously
3609
+ const code = await Promise.resolve(this.encode(word));
3610
+ // If a code is generated, add them to the index
3611
+ if (code && code.length)
3612
+ index.push(this.equalLen(code));
3613
+ }
3614
+ return index;
3615
+ }
3616
+ /**
3617
+ * Get the name of the phonetic algorithm.
3618
+ *
3619
+ * @returns {string} - The name of the algorithm
3620
+ */
3621
+ getAlgoName() { return this.algo; }
3622
+ /**
3623
+ * Generates a phonetic index for the given input string.
3624
+ *
3625
+ * @param {string} input - The input string to be indexed
3626
+ * @returns {string[]} - An array of phonetic indices for the input words
3627
+ */
3628
+ getIndex(input) {
3629
+ const { delimiter = ' ' } = this.options;
3630
+ // Split the input string by the specified delimiter and loop over it
3631
+ return profiler$1.run(() => this.loop(input.split(delimiter).filter(Boolean)).filter(Boolean));
3632
+ }
3633
+ /**
3634
+ * Asynchronously generates a phonetic index for the given input string.
3635
+ *
3636
+ * @param {string} input - The input string to be indexed
3637
+ * @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words
3638
+ */
3639
+ async getIndexAsync(input) {
3640
+ const { delimiter = ' ' } = this.options;
3641
+ // Split the input string by the specified delimiter and loop over it asynchronously
3642
+ return (await profiler$1.runAsync(async () => await this.loopAsync(input.split(delimiter).filter(Boolean)))).filter(Boolean);
3643
+ }
3644
+ }
3645
+ /**
3646
+ * Phonetic registry service for managing phonetic implementations.
3647
+ *
3648
+ * This registry allows for dynamic registration and retrieval of phonetic classes,
3649
+ * enabling the use of various phonetic algorithms in a consistent manner.
3650
+ */
3651
+ const PhoneticRegistry = Registry('phonetic', Phonetic);
3652
+ /**
3653
+ * Phonetic Mapping Service
3654
+ *
3655
+ * This service provides a simple interface to manage phonetic mappings across
3656
+ * different phonetic algorithms. It allows adding, removing, checking existence,
3657
+ * retrieving, and listing phonetic mappings for specified algorithms.
3658
+ */
3659
+ const PhoneticMappingRegistry = (() => {
3660
+ // Create a registry object to hold mappings
3661
+ const mappings = Object.create(null);
3662
+ // Helper function to retrieve mappings for a specific algorithm
3663
+ const maps = (algo) => (mappings[algo] ||= Object.create(null));
3664
+ return {
3665
+ /**
3666
+ * Adds a phonetic mapping for a specific algorithm and ID.
3667
+ *
3668
+ * @param {string} algo - The phonetic algorithm identifier (e.g., 'soundex', 'metaphone')
3669
+ * @param {string} id - The unique identifier for the mapping
3670
+ * @param {PhoneticMap} map - The phonetic map to be added, containing rules and mappings
3671
+ * @param {boolean} [update=false] - Whether to allow overwriting an existing entry
3672
+ * @throws {Error} If the mapping name already exists and update is false
3673
+ */
3674
+ add(algo, id, map, update = false) {
3675
+ const mappings = maps(algo);
3676
+ if (!update && id in mappings)
3677
+ throw new Error(`entry <${id}> already exists / use <update=true> to overwrite`);
3678
+ mappings[id] = map;
3679
+ },
3680
+ /**
3681
+ * Removes a phonetic mapping for a specific algorithm and ID.
3682
+ *
3683
+ * @param {string} algo - The phonetic algorithm identifier
3684
+ * @param {string} id - The unique identifier for the mapping to be removed
3685
+ */
3686
+ remove(algo, id) { delete maps(algo)[id]; },
3687
+ /**
3688
+ * Checks if a phonetic mapping exists for a specific algorithm and ID.
3689
+ *
3690
+ * @param {string} algo - The phonetic algorithm identifier
3691
+ * @param {string} id - The unique identifier for the mapping to check
3692
+ * @returns {boolean} - Returns true if the mapping exists, false otherwise
3693
+ */
3694
+ has(algo, id) { return id in maps(algo); },
3695
+ /**
3696
+ * Retrieves a phonetic mapping for a specific algorithm and ID.
3697
+ *
3698
+ * @param {string} algo - The phonetic algorithm identifier
3699
+ * @param {string} id - The unique identifier for the mapping to retrieve
3700
+ * @returns {PhoneticMap | undefined} - Returns the phonetic map if found, otherwise undefined
3701
+ */
3702
+ get(algo, id) { return maps(algo)[id]; },
3703
+ /**
3704
+ * Lists all phonetic mappings for a specific algorithm.
3705
+ *
3706
+ * @param {string} algo - The phonetic algorithm identifier
3707
+ * @returns {string[]} - Returns an array of mapping IDs for the specified algorithm
3708
+ */
3709
+ list(algo) { return Object.keys(maps(algo)); }
3710
+ };
3711
+ })();
3712
+
3713
+ /**
3714
+ * Cologne Phonetic Algorithm
3715
+ * src/phonetic/Cologne.ts
3716
+ *
3717
+ * @see https://en.wikipedia.org/wiki/Cologne_phonetics
3718
+ *
3719
+ * Cologne phonetics, also known as `Kölner Phonetik` or the `Cologne process`,
3720
+ * is a phonetic algorithm that assigns a sequence of digits, referred to as the
3721
+ * phonetic code, to words. The purpose of this method is to ensure that words
3722
+ * with identical sounds receive the same code. This algorithm can facilitate a
3723
+ * similarity search among words.
3724
+ *
3725
+ * Cologne phonetics is associated with the well-known Soundex phonetic algorithm,
3726
+ * yet it is specifically optimized for the German language. This algorithm was
3727
+ * introduced by Hans Joachim Postel in 1969.
3728
+ *
3729
+ * The Cologne phonetic algorithm works by mapping letters to digits, ignoring
3730
+ * certain letters, and applying specific rules to handle character combinations.
3731
+ *
3732
+ * @module Phonetic/Cologne
3733
+ * @author Paul Köhler (komed3)
3734
+ * @license MIT
3735
+ */
3736
+ /**
3737
+ * Cologne class extends the Phonetic class to implement the Cologne phonetic algorithm.
3738
+ */
3739
+ class Cologne extends Phonetic {
3740
+ // Default options for the Cologne phonetic algorithm
3741
+ static default = {
3742
+ map: 'default', delimiter: ' ', length: -1, dedupe: true
3743
+ };
3744
+ /**
3745
+ * Constructor for the Cologne class.
3746
+ *
3747
+ * Initializes the Cologne phonetic algorithm with the mapping and options.
3748
+ *
3749
+ * @param {PhoneticOptions} [opt] - Options for the Cologne phonetic algorithm
3750
+ */
3751
+ constructor(opt = {}) { super('cologne', opt); }
3752
+ /**
3753
+ * Adjusts the phonetic code by removing all '0's except the first character.
3754
+ *
3755
+ * @param {string} code - The phonetic code to adjust
3756
+ * @returns {string} - The adjusted phonetic code
3757
+ */
3758
+ adjustCode(code) {
3759
+ return code.slice(0, 1) + code.slice(1).replaceAll('0', '');
3760
+ }
3761
+ }
3762
+ // Register the Cologne algorithm in the phonetic registry
3763
+ PhoneticRegistry.add('cologne', Cologne);
3764
+ // Register the Cologne phonetic mapping
3765
+ PhoneticMappingRegistry.add('cologne', 'default', {
3766
+ map: {
3767
+ a: '0', ä: '0', e: '0', i: '0', j: '0', o: '0', ö: '0', u: '0', ü: '0', y: '0',
3768
+ b: '1', p: '1', d: '2', t: '2', f: '3', v: '3', w: '3', g: '4', k: '4', q: '4',
3769
+ l: '5', m: '6', n: '6', r: '7', c: '8', s: '8', ß: '8', z: '8', x: '48'
3770
+ },
3771
+ ignore: ['h'],
3772
+ ruleset: [
3773
+ { char: 'p', next: ['h'], code: '3' },
3774
+ { char: 'c', position: 'start', next: ['a', 'h', 'k', 'l', 'o', 'q', 'r', 'u', 'x'], code: '4' },
3775
+ { char: 'c', next: ['a', 'h', 'k', 'o', 'q', 'u', 'x'], prevNot: ['s', 'z'], code: '4' },
3776
+ { char: 'd', next: ['c', 's', 'z'], code: '8' },
3777
+ { char: 't', next: ['c', 's', 'z'], code: '8' },
3778
+ { char: 'x', prev: ['c', 'k', 'q'], code: '8' }
3779
+ ]
3780
+ });
3781
+
3782
+ /**
3783
+ * Metaphone Phonetic Algorithm
3784
+ * src/phonetic/Metaphone.ts
3785
+ *
3786
+ * @see https://en.wikipedia.org/wiki/Metaphone
3787
+ *
3788
+ * Metaphone is a phonetic algorithm for indexing words by their English pronunciation.
3789
+ * It encodes words into a string of consonant symbols, allowing for the comparison of
3790
+ * words based on their pronunciation rather than their spelling. Metaphone is more
3791
+ * accurate than Soundex for English and is widely used in search, spell-checking,
3792
+ * and fuzzy matching.
3793
+ *
3794
+ * This implementation uses a mapping and a comprehensive ruleset to efficiently
3795
+ * transform input words into their Metaphone code. The algorithm drops or transforms
3796
+ * letters according to context-sensitive rules, and only retains vowels at the start.
3797
+ *
3798
+ * @module Phonetic/Metaphone
3799
+ * @author Paul Köhler (komed3)
3800
+ * @license MIT
3801
+ */
3802
+ /**
3803
+ * Metaphone class extends the Phonetic class to implement the Metaphone phonetic algorithm.
3804
+ */
3805
+ class Metaphone extends Phonetic {
3806
+ // Default options for the Metaphone phonetic algorithm
3807
+ static default = {
3808
+ map: 'en90', delimiter: ' ', length: -1, pad: '', dedupe: false
3809
+ };
3810
+ /**
3811
+ * Constructor for the Metaphone class.
3812
+ *
3813
+ * Initializes the Metaphone phonetic algorithm with the mapping and options.
3814
+ *
3815
+ * @param {PhoneticOptions} [opt] - Options for the Metaphone phonetic algorithm
3816
+ */
3817
+ constructor(opt = {}) { super('metaphone', opt); }
3818
+ /**
3819
+ * Generates the Metaphone code for a given word.
3820
+ *
3821
+ * @param {string} word - The input word to be converted into a Metaphone code
3822
+ * @returns {string} - The generated Metaphone code
3823
+ */
3824
+ encode(word) {
3825
+ // Remove duplicate adjacent letters except for C
3826
+ word = word.replace(/([A-BD-Z])\1+/gi, (m, c) => c === 'C' ? m : c);
3827
+ // Use the base implementation for rule/mapping application
3828
+ return super.encode(word);
3829
+ }
3830
+ /**
3831
+ * Adjusts the Metaphone code by removing vowels except for the first letter.
3832
+ *
3833
+ * @param {string} code - The Metaphone code to be adjusted
3834
+ * @returns {string} - The adjusted Metaphone code
3835
+ */
3836
+ adjustCode(code) {
3837
+ // Remove vowels except for the first letter
3838
+ return code.slice(0, 1) + code.slice(1).replace(/[AEIOU]/g, '');
3839
+ }
3840
+ }
3841
+ // Register the Metaphone algorithm in the phonetic registry
3842
+ PhoneticRegistry.add('metaphone', Metaphone);
3843
+ /**
3844
+ * Register the Metaphone phonetic mapping for English.
3845
+ *
3846
+ * This version is based on the original BASIC implementation from 1990,
3847
+ * written by Lawrence Philips.
3848
+ *
3849
+ * @see https://gist.github.com/Rostepher/b688f709587ac145a0b3
3850
+ */
3851
+ PhoneticMappingRegistry.add('metaphone', 'en90', {
3852
+ map: {
3853
+ a: 'A', b: 'B', c: 'K', d: 'T', e: 'E', f: 'F',
3854
+ g: 'K', h: 'H', i: 'I', j: 'J', k: 'K',
3855
+ l: 'L', m: 'M', n: 'N', o: 'O', p: 'P', q: 'K',
3856
+ r: 'R', s: 'S', t: 'T', u: 'U', v: 'F', w: 'W',
3857
+ x: 'KS', y: 'Y', z: 'S'
3858
+ },
3859
+ ruleset: [
3860
+ // Drop the first letter if the string begins with `AE`, `GN`, `KN`, `PN` or `WR`
3861
+ { char: 'a', position: 'start', next: ['e'], code: '' },
3862
+ { char: 'g', position: 'start', next: ['n'], code: '' },
3863
+ { char: 'k', position: 'start', next: ['n'], code: '' },
3864
+ { char: 'p', position: 'start', next: ['n'], code: '' },
3865
+ { char: 'w', position: 'start', next: ['r'], code: '' },
3866
+ // Drop `B` if after `M` at the end of the string
3867
+ { char: 'b', position: 'end', prev: ['m'], code: '' },
3868
+ // `C` transforms into `X` if followed by `H` or `IA`
3869
+ { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' },
3870
+ { char: 'c', next: ['i'], next2: ['a'], code: 'X' },
3871
+ // `C` transforms into `S` if followed by `E`, `I` or `Y`
3872
+ { char: 'c', next: ['e', 'i', 'y'], code: 'S' },
3873
+ // `D` transforms into `J` if followed by `GE`, `GI` or `GY`
3874
+ { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' },
3875
+ // Drop `G` if followed by `H` and `H` is not at the end or before a vowel
3876
+ { char: 'g', next: ['h'], next2Not: ['', 'a', 'e', 'i', 'o', 'u'], code: '' },
3877
+ // Drop `G` if followed by `N` or `NED` and is at the end of the string
3878
+ { char: 'g', trailing: 'n', code: '' },
3879
+ { char: 'g', trailing: 'ned', code: '' },
3880
+ // `G` transforms into `J` if before `E`, `I` or `Y` and is not a `GG`
3881
+ { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' },
3882
+ // Drop `H` if after a vowel and not before a vowel
3883
+ { char: 'h', prev: ['a', 'e', 'i', 'o', 'u'], nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
3884
+ // Drop `H` if after `C`, `G`, `P`, `S` or `T`
3885
+ { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' },
3886
+ // Drop `K` if after `C`
3887
+ { char: 'k', prev: ['c'], code: '' },
3888
+ // `PH` transforms into `F`
3889
+ { char: 'p', next: ['h'], code: 'F' },
3890
+ // `S` transforms into `X` if followed by `H`, `IA` or `IO`
3891
+ { char: 's', next: ['h'], code: 'X' },
3892
+ { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' },
3893
+ // `T` transforms into `X` if followed by `IA` or `IO`
3894
+ { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' },
3895
+ // `TH` transforms into `0` (zero)
3896
+ { char: 't', next: ['h'], code: '0' },
3897
+ // Drop `T` if followed by `CH`
3898
+ { char: 't', next: ['c'], next2: ['h'], code: '' },
3899
+ // Drop `W` if not followed by a vowel
3900
+ { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
3901
+ // `WH` transforms into `W` if at the beginning of the string
3902
+ { char: 'h', leading: 'w', code: '' },
3903
+ // `X` transforms into `S` if at the beginning
3904
+ { char: 'x', position: 'start', code: 'S' },
3905
+ // Drop `Y` if not followed by a vowel
3906
+ { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }
3907
+ ]
3908
+ });
3909
+
3910
+ /**
3911
+ * Soundex Phonetic Algorithm
3912
+ * src/phonetic/Soudex.ts
3913
+ *
3914
+ * @see https://en.wikipedia.org/wiki/Soundex
3915
+ *
3916
+ * Soundex is a phonetic algorithm for indexing names by sound. It is used to
3917
+ * encode words into a phonetic representation, allowing for the comparison of
3918
+ * words based on their pronunciation rather than their spelling. This works
3919
+ * by mapping letters to digits, ignoring certain letters, and applying specific
3920
+ * rules to handle character combinations.
3921
+ *
3922
+ * It is particularly useful for matching names that may be spelled differently
3923
+ * but sound similar and commonly used in genealogical research and databases
3924
+ * to find similar-sounding names.
3925
+ *
3926
+ * The Soundex algorithm is not case-sensitive and ignores vowels and certain
3927
+ * consonants. It outputs an array of strings that represents the phonetic code
3928
+ * of the input, typically limited to the length of four characters.
3929
+ *
3930
+ * @module Phonetic/Soundex
3931
+ * @author Paul Köhler (komed3)
3932
+ * @license MIT
3933
+ */
3934
+ /**
3935
+ * Soundex class extends the Phonetic class to implement the Soundex phonetic algorithm.
3936
+ */
3937
+ class Soundex extends Phonetic {
3938
+ // Default options for the Soundex phonetic algorithm
3939
+ static default = {
3940
+ map: 'en', delimiter: ' ', length: 4, pad: '0', dedupe: true
3941
+ };
3942
+ /**
3943
+ * Constructor for the Soundex class.
3944
+ *
3945
+ * Initializes the Soundex phonetic algorithm with the mapping and options.
3946
+ *
3947
+ * @param {PhoneticOptions} [opt] - Options for the Soundex phonetic algorithm
3948
+ */
3949
+ constructor(opt = {}) { super('soundex', opt); }
3950
+ /**
3951
+ * Adjusts the phonetic code by removing leading zeros and ensuring the
3952
+ * first character is uppercase.
3953
+ *
3954
+ * @param {string} code - The phonetic code to adjust
3955
+ * @param {string[]} chars - The characters used in the phonetic code
3956
+ * @returns {string} - The adjusted phonetic code
3957
+ */
3958
+ adjustCode(code, chars) {
3959
+ return chars[0].toUpperCase() + code.slice(1).replaceAll('0', '');
3960
+ }
3961
+ }
3962
+ // Register the Soundex algorithm in the phonetic registry
3963
+ PhoneticRegistry.add('soundex', Soundex);
3964
+ //Register the Soundex phonetic mapping for English.
3965
+ PhoneticMappingRegistry.add('soundex', 'en', {
3966
+ map: {
3967
+ a: '0', e: '0', h: '0', i: '0', o: '0', u: '0', w: '0', y: '0',
3968
+ b: '1', f: '1', p: '1', v: '1',
3969
+ c: '2', g: '2', j: '2', k: '2', q: '2', s: '2', x: '2', z: '2',
3970
+ d: '3', t: '3', l: '4', m: '5', n: '5', r: '6'
3971
+ }
3972
+ });
3973
+ //Register the Soundex phonetic mapping for German.
3974
+ PhoneticMappingRegistry.add('soundex', 'de', {
3975
+ map: {
3976
+ a: '0', ä: '0', e: '0', h: '0', i: '0', j: '0', o: '0', ö: '0', u: '0', ü: '0', y: '0',
3977
+ b: '1', f: '1', p: '1', v: '1', w: '1',
3978
+ c: '2', g: '2', k: '2', q: '2', s: '2', ß: '2', x: '2', z: '2',
3979
+ d: '3', t: '3', l: '4', m: '5', n: '5', r: '6'
3980
+ },
3981
+ ruleset: [
3982
+ { char: 'c', next: ['h'], code: '7' }
3983
+ ]
3984
+ });
3985
+
3986
+ /**
3987
+ * CmpStr Main API
3988
+ * src/CmpStr.ts
3989
+ *
3990
+ * The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
3991
+ * for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
3992
+ * and text analysis. It unifies all core features of the CmpStr package and exposes a
3993
+ * consistent, user-friendly API for both single and batch operations.
3994
+ *
3995
+ * Features:
3996
+ * - Centralized management of metrics, phonetic algorithms, and filters
3997
+ * - Flexible normalization and filtering pipeline for all inputs
3998
+ * - Batch, pairwise, and single string comparison with detailed results
3999
+ * - Phonetic indexing and phonetic-aware search and comparison
4000
+ * - Text analysis and unified diff utilities
4001
+ * - Full TypeScript type safety and extensibility
4002
+ *
4003
+ * @module CmpStr
4004
+ * @author Paul Köhler (komed3)
4005
+ * @license MIT
4006
+ */
4007
+ // Import the Profiler instance for global profiling
4008
+ const profiler = Profiler.getInstance();
4009
+ /**
4010
+ * The main CmpStr class that provides a unified interface for string comparison,
4011
+ * phonetic indexing, filtering, and text analysis.
4012
+ *
4013
+ * @template R - The type of the metric result, defaults to MetricRaw
4014
+ */
4015
+ class CmpStr {
4016
+ /**
4017
+ * --------------------------------------------------------------------------------
4018
+ * Static methods and properties for global access to CmpStr features
4019
+ * --------------------------------------------------------------------------------
4020
+ *
4021
+ * These static methods provide a convenient way to access the core features of
4022
+ * the CmpStr package without needing to instantiate a CmpStr object.
4023
+ */
4024
+ /**
4025
+ * Adds, removes, pauses, resumes, lists, or clears global filters.
4026
+ *
4027
+ * @see Filter
4028
+ */
4029
+ static filter = {
4030
+ add: Filter.add,
4031
+ remove: Filter.remove,
4032
+ pause: Filter.pause,
4033
+ resume: Filter.resume,
4034
+ list: Filter.list,
4035
+ clear: Filter.clear
4036
+ };
4037
+ /**
4038
+ * Adds, removes, checks, or lists available metrics.
4039
+ *
4040
+ * @see MetricRegistry
4041
+ */
4042
+ static metric = {
4043
+ add: MetricRegistry.add,
4044
+ remove: MetricRegistry.remove,
4045
+ has: MetricRegistry.has,
4046
+ list: MetricRegistry.list
4047
+ };
4048
+ /**
4049
+ * Adds, removes, checks, or lists available phonetic algorithms and mappings.
4050
+ *
4051
+ * @see PhoneticRegistry
4052
+ */
4053
+ static phonetic = {
4054
+ add: PhoneticRegistry.add,
4055
+ remove: PhoneticRegistry.remove,
4056
+ has: PhoneticRegistry.has,
4057
+ list: PhoneticRegistry.list,
4058
+ map: {
4059
+ add: PhoneticMappingRegistry.add,
4060
+ remove: PhoneticMappingRegistry.remove,
4061
+ has: PhoneticMappingRegistry.has,
4062
+ list: PhoneticMappingRegistry.list
4063
+ }
4064
+ };
4065
+ /**
4066
+ * Provides access to the global profiler services.
4067
+ *
4068
+ * @see Profiler
4069
+ */
4070
+ static profiler = profiler.services;
4071
+ /**
4072
+ * Clears the caches for normalizer, metric, and phonetic modules.
4073
+ */
4074
+ static clearCache = {
4075
+ normalizer: Normalizer.clear,
4076
+ metric: Metric.clear,
4077
+ phonetic: Phonetic.clear
4078
+ };
4079
+ /**
4080
+ * Returns a TextAnalyzer instance for the given input string.
4081
+ *
4082
+ * @param {string} [input] - The input string
4083
+ * @returns {TextAnalyzer} - The text analyzer
4084
+ */
4085
+ static analyze(input) { return new TextAnalyzer(input); }
4086
+ /**
4087
+ * Returns a DiffChecker instance for computing the unified diff between two texts.
4088
+ *
4089
+ * @param {string} a - The first (original) text
4090
+ * @param {string} b - The second (modified) text
4091
+ * @param {DiffOptions} [opt] - Optional diff configuration
4092
+ * @returns {DiffChecker} - The diff checker instance
4093
+ */
4094
+ static diff(a, b, opt) { return new DiffChecker(a, b, opt); }
4095
+ /**
4096
+ * --------------------------------------------------------------------------------
4097
+ * Instanciate the CmpStr class
4098
+ * --------------------------------------------------------------------------------
4099
+ *
4100
+ * Methods to create a new CmpStr instance with the given options.
4101
+ * Using the static `create` method is recommended to ensure proper instantiation.
4102
+ */
4103
+ /**
4104
+ * Creates a new CmpStr instance with the given options.
4105
+ *
4106
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4107
+ * @returns {CmpStr<R>} - A new CmpStr instance
4108
+ */
4109
+ static create(opt) { return new CmpStr(opt); }
4110
+ // The options object that holds the configuration for this CmpStr instance
4111
+ options = Object.create(null);
4112
+ /**
4113
+ * Creates a new CmpStr instance with the given options.
4114
+ * The constructor is protected to enforce the use of the static `create` method.
4115
+ *
4116
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4117
+ */
4118
+ constructor(opt) {
4119
+ if (opt)
4120
+ typeof opt === 'string'
4121
+ ? this.setSerializedOptions(opt)
4122
+ : this.setOptions(opt);
4123
+ }
4124
+ /**
4125
+ * ---------------------------------------------------------------------------------
4126
+ * Protected utility methods for internal use
4127
+ * ---------------------------------------------------------------------------------
4128
+ *
4129
+ * These methods provide utility functions for converting inputs, merging options,
4130
+ * normalizing inputs, filtering, and preparing inputs for comparison.
4131
+ */
4132
+ /**
4133
+ * Assert a condition and throws if the condition is not met.
4134
+ *
4135
+ * @param {string} cond - The condition to met
4136
+ * @param {any} [test] - Value to test for
4137
+ * @throws {Error} If the condition is not met
4138
+ */
4139
+ assert(cond, test) {
4140
+ switch (cond) {
4141
+ // Check if the metric exists
4142
+ case 'metric':
4143
+ if (!CmpStr.metric.has(test))
4144
+ throw new Error(`CmpStr <metric> must be set, call .setMetric(), ` +
4145
+ `use CmpStr.metric.list() for available metrics`);
4146
+ break;
4147
+ // Check if the phonetic algorithm exists
4148
+ case 'phonetic':
4149
+ if (!CmpStr.phonetic.has(test))
4150
+ throw new Error(`CmpStr <phonetic> must be set, call .setPhonetic(), ` +
4151
+ `use CmpStr.phonetic.list() for available phonetic algorithms`);
4152
+ break;
4153
+ // Throw an error for unknown conditions
4154
+ default: throw new Error(`Cmpstr condition <${cond}> unknown`);
4155
+ }
4156
+ }
4157
+ /**
4158
+ * Assert multiple conditions.
4159
+ *
4160
+ * @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
4161
+ */
4162
+ assertMany(...cond) {
4163
+ for (const [c, test] of cond)
4164
+ this.assert(c, test);
4165
+ }
4166
+ /**
4167
+ * Resolves the options for the CmpStr instance, merging the provided options with
4168
+ * the existing options.
4169
+ *
4170
+ * @param {CmpStrOptions} [opt] - Optional options to merge
4171
+ * @returns {CmpStrOptions} - The resolved options
4172
+ */
4173
+ resolveOptions(opt) {
4174
+ return merge({ ...(this.options ?? Object.create(null)) }, opt);
4175
+ }
4176
+ /**
4177
+ * Normalizes the input string or array using the configured or provided flags.
4178
+ *
4179
+ * @param {MetricInput} input - The input string or array
4180
+ * @param {NormalizeFlags} [flags] - Normalization flags
4181
+ * @returns {MetricInput} - The normalized input
4182
+ */
4183
+ normalize(input, flags) {
4184
+ return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
4185
+ }
4186
+ /**
4187
+ * Applies all active filters to the input string or array.
4188
+ *
4189
+ * @param {MetricInput} input - The input string or array
4190
+ * @param {string} [hook='input'] - The filter hook
4191
+ * @returns {MetricInput} - The filtered string(s)
4192
+ */
4193
+ filter(input, hook) {
4194
+ return Filter.apply(hook, input);
4195
+ }
4196
+ /**
4197
+ * Prepares the input by normalizing and filtering.
4198
+ *
4199
+ * @param {MetricInput} [input] - The input string or array
4200
+ * @param {CmpStrOptions} [opt] - Optional options to use
4201
+ * @returns {MetricInput} - The prepared input
4202
+ */
4203
+ prepare(input, opt) {
4204
+ const { flags, processors } = opt ?? this.options;
4205
+ // Normalize the input using flags (i.e., 'itw')
4206
+ if (flags?.length)
4207
+ input = this.normalize(input, flags);
4208
+ // Filter the input using hooked up filters
4209
+ input = this.filter(input, 'input');
4210
+ // Apply phonetic processors if configured
4211
+ if (processors?.phonetic)
4212
+ input = this.index(input, processors.phonetic);
4213
+ return input;
4214
+ }
4215
+ /**
4216
+ * Post-process the results of the metric computation.
4217
+ *
4218
+ * @param {MetricResult<R>} result - The metric result
4219
+ * @returns {MetricResult<R>} - The post-processed results
4220
+ */
4221
+ postProcess(result, opt) {
4222
+ // Remove "zero similarity" from batch results if configured
4223
+ if (opt?.removeZero && Array.isArray(result))
4224
+ result = result.filter(r => r.res > 0);
4225
+ return result;
4226
+ }
4227
+ /**
4228
+ * Computes the phonetic index for the given input using the specified phonetic algorithm.
4229
+ *
4230
+ * @param {MetricInput} input - The input string or array
4231
+ * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
4232
+ * @returns {MetricInput} - The phonetic index for the given input
4233
+ */
4234
+ index(input, { algo, opt }) {
4235
+ this.assert('phonetic', algo);
4236
+ const phonetic = factory.phonetic(algo, opt);
4237
+ const delimiter = opt?.delimiter ?? ' ';
4238
+ return Array.isArray(input)
4239
+ ? input.map(s => phonetic.getIndex(s).join(delimiter))
4240
+ : phonetic.getIndex(input).join(delimiter);
4241
+ }
4242
+ /**
4243
+ * Computes the metric result for the given inputs, applying normalization and
4244
+ * filtering as configured.
4245
+ *
4246
+ * @template T - The type of the metric result
4247
+ * @param {MetricInput} a - The first input string or array
4248
+ * @param {MetricInput} b - The second input string or array
4249
+ * @param {CmpStrOptions} [opt] - Optional options to use
4250
+ * @param {MetricMode} [mode='single'] - The metric mode to use
4251
+ * @param {boolean} [raw=false] - Whether to return raw results
4252
+ * @param {boolean} [skip=false] - Whether to skip normalization and filtering
4253
+ * @returns {T} - The computed metric result
4254
+ */
4255
+ compute(a, b, opt, mode, raw, skip) {
4256
+ const resolved = this.resolveOptions(opt);
4257
+ this.assert('metric', resolved.metric);
4258
+ // Prepare the input
4259
+ const A = skip ? a : this.prepare(a, resolved);
4260
+ const B = skip ? b : this.prepare(b, resolved);
4261
+ // Get the metric class
4262
+ const metric = factory.metric(resolved.metric, A, B, resolved.opt);
4263
+ // Pass the original inputs to the metric
4264
+ if (resolved.output !== 'prep')
4265
+ metric.setOriginal(a, b);
4266
+ // Compute the metric result
4267
+ metric.run(mode);
4268
+ // Post-process the results
4269
+ const result = this.postProcess(metric.getResults(), resolved);
4270
+ // Resolve and return the result based on the raw flag
4271
+ return this.output(result, raw ?? resolved.raw);
4272
+ }
4273
+ /**
4274
+ * Resolves the result format (raw or formatted).
4275
+ *
4276
+ * @template T - The type of the metric result
4277
+ * @param {MetricResult<R>} result - The metric result
4278
+ * @param {boolean} [raw] - Whether to return raw results
4279
+ * @returns {T} - The resolved result
4280
+ */
4281
+ output(result, raw) {
4282
+ return (raw ?? this.options.raw ? result : Array.isArray(result)
4283
+ ? result.map(r => ({ source: r.a, target: r.b, match: r.res }))
4284
+ : { source: result.a, target: result.b, match: result.res });
4285
+ }
4286
+ /**
4287
+ * ---------------------------------------------------------------------------------
4288
+ * Managing methods for CmpStr
4289
+ * ---------------------------------------------------------------------------------
4290
+ *
4291
+ * These methods provides an interface to set and get properties of the CmpStr
4292
+ * instance, such as options, metric, phonetic algorithm, and more.
4293
+ */
4294
+ /**
4295
+ * Creates a shallow clone of the current instance.
4296
+ *
4297
+ * @returns {CmpStr<R>} - The cloned instance
4298
+ */
4299
+ clone() { return Object.assign(Object.create(Object.getPrototypeOf(this)), this); }
4300
+ /**
4301
+ * Resets the instance, clearing all data and options.
4302
+ *
4303
+ * @returns {this}
4304
+ */
4305
+ reset() { for (const k in this.options)
4306
+ delete this.options[k]; return this; }
4307
+ /**
4308
+ * Sets / replaces the full options object.
4309
+ *
4310
+ * @param {CmpStrOptions} opt - The options
4311
+ * @returns {this}
4312
+ */
4313
+ setOptions(opt) { this.options = opt; return this; }
4314
+ /**
4315
+ * Deep merges and sets new options.
4316
+ *
4317
+ * @param {CmpStrOptions} opt - The options to merge
4318
+ * @returns {this}
4319
+ */
4320
+ mergeOptions(opt) { merge(this.options, opt); return this; }
4321
+ /**
4322
+ * Sets the serialized options from a JSON string.
4323
+ *
4324
+ * @param {string} opt - The serialized options
4325
+ * @returns {this}
4326
+ */
4327
+ setSerializedOptions(opt) { this.options = JSON.parse(opt); return this; }
4328
+ /**
4329
+ * Sets a specific option at the given path.
4330
+ *
4331
+ * @param {string} path - The path to the option
4332
+ * @param {any} value - The value to set
4333
+ * @returns {this}
4334
+ */
4335
+ setOption(path, value) { set(this.options, path, value); return this; }
4336
+ /**
4337
+ * Removes an option at the given path.
4338
+ *
4339
+ * @param {string} path - The path to the option
4340
+ * @returns {this}
4341
+ */
4342
+ rmvOption(path) { rmv(this.options, path); return this; }
4343
+ /**
4344
+ * Enable or disable raw output.
4345
+ *
4346
+ * @param {boolean} enable - Whether to enable or disable raw output
4347
+ * @returns {this}
4348
+ */
4349
+ setRaw(enable) { return this.setOption('raw', enable); }
4350
+ /**
4351
+ * Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
4352
+ *
4353
+ * @param {string} name - The metric name
4354
+ * @returns {this}
4355
+ */
4356
+ setMetric(name) { return this.setOption('metric', name); }
4357
+ /**
4358
+ * Sets the normalization flags (e.g., 'itw', 'nfc').
4359
+ *
4360
+ * @param {NormalizeFlags} flags - The normalization flags
4361
+ * @returns {this}
4362
+ */
4363
+ setFlags(flags) { return this.setOption('flags', flags); }
4364
+ /**
4365
+ * Removes the normalization flags entirely.
4366
+ *
4367
+ * @return {this}
4368
+ */
4369
+ rmvFlags() { return this.rmvOption('flags'); }
4370
+ /**
4371
+ * Sets the pre-processors to use for preparing the input.
4372
+ *
4373
+ * @param {CmpStrProcessors} opt - The processors to set
4374
+ * @returns {this}
4375
+ */
4376
+ setProcessors(opt) { return this.setOption('processors', opt); }
4377
+ /**
4378
+ * Removes the processors entirely.
4379
+ *
4380
+ * @returns {this}
4381
+ */
4382
+ rmvProcessors() { return this.rmvOption('processors'); }
4383
+ /**
4384
+ * Returns the current options object.
4385
+ *
4386
+ * @returns {CmpStrOptions} - The options
4387
+ */
4388
+ getOptions() { return this.options; }
4389
+ /**
4390
+ * Returns the options as a JSON string.
4391
+ *
4392
+ * @returns {string} - The serialized options
4393
+ */
4394
+ getSerializedOptions() { return JSON.stringify(this.options); }
4395
+ /**
4396
+ * Returns a specific option value by path.
4397
+ *
4398
+ * @param {string} path - The path to the option
4399
+ * @returns {any} - The option value
4400
+ */
4401
+ getOption(path) { return get(this.options, path); }
4402
+ /**
4403
+ * ---------------------------------------------------------------------------------
4404
+ * Public core methods for string comparison
4405
+ * ---------------------------------------------------------------------------------
4406
+ *
4407
+ * These methods provide the core functionality of the CmpStr class, allowing for
4408
+ * string comparison, phonetic indexing, filtering, and text search.
4409
+ */
4410
+ /**
4411
+ * Performs a single metric comparison between the source and target.
4412
+ *
4413
+ * @template T - The type of the metric result
4414
+ * @param {string} a - The source string
4415
+ * @param {string} b - The target string
4416
+ * @param {CmpStrOptions} [opt] - Optional options
4417
+ * @returns {T} - The metric result
4418
+ */
4419
+ test(a, b, opt) {
4420
+ return this.compute(a, b, opt, 'single');
4421
+ }
4422
+ /**
4423
+ * Performs a single metric comparison and returns only the numeric score.
4424
+ *
4425
+ * @param {string} a - The source string
4426
+ * @param {string} b - The target string
4427
+ * @param {CmpStrOptions} [opt] - Optional options
4428
+ * @returns {number} - The similarity score (0..1)
4429
+ */
4430
+ compare(a, b, opt) {
4431
+ return this.compute(a, b, opt, 'single', true).res;
4432
+ }
4433
+ /**
4434
+ * Performs a batch metric comparison between source and target strings
4435
+ * or array of strings.
4436
+ *
4437
+ * @template T - The type of the metric result
4438
+ * @param {MetricInput} a - The source string or array of strings
4439
+ * @param {MetricInput} b - The target string or array of strings
4440
+ * @param {CmpStrOptions} [opt] - Optional options
4441
+ * @returns {T} - The batch metric results
4442
+ */
4443
+ batchTest(a, b, opt) {
4444
+ return this.compute(a, b, opt, 'batch');
4445
+ }
4446
+ /**
4447
+ * Performs a batch metric comparison and returns results sorted by score.
4448
+ *
4449
+ * @template T - The type of the metric result
4450
+ * @param {MetricInput} a - The source string or array of strings
4451
+ * @param {MetricInput} b - The target string or array of strings
4452
+ * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
4453
+ * @param {CmpStrOptions} [opt] - Optional options
4454
+ * @returns {T} - The sorted batch results
4455
+ */
4456
+ batchSorted(a, b, dir = 'desc', opt) {
4457
+ return this.output(this.compute(a, b, opt, 'batch', true)
4458
+ .sort((a, b) => dir === 'asc' ? a.res - b.res : b.res - a.res), opt?.raw ?? this.options.raw);
4459
+ }
4460
+ /**
4461
+ * Performs a pairwise metric comparison between source and target strings
4462
+ * or array of strings.
4463
+ *
4464
+ * Input arrays needs of the same length to perform pairwise comparison,
4465
+ * otherwise the method will throw an error.
4466
+ *
4467
+ * @template T - The type of the metric result
4468
+ * @param {MetricInput} a - The source string or array of strings
4469
+ * @param {MetricInput} b - The target string or array of strings
4470
+ * @param {CmpStrOptions} [opt] - Optional options
4471
+ * @returns {T} - The pairwise metric results
4472
+ */
4473
+ pairs(a, b, opt) {
4474
+ return this.compute(a, b, opt, 'pairwise');
4475
+ }
4476
+ /**
4477
+ * Performs a batch comparison and returns only results above the threshold.
4478
+ *
4479
+ * @template T - The type of the metric result
4480
+ * @param {MetricInput} a - The source string or array of strings
4481
+ * @param {MetricInput} b - The target string or array of strings
4482
+ * @param {number} threshold - The similarity threshold (0..1)
4483
+ * @param {CmpStrOptions} [opt] - Optional options
4484
+ * @returns {T} - The filtered batch results
4485
+ */
4486
+ match(a, b, threshold, opt) {
4487
+ return this.output(this.compute(a, b, opt, 'batch', true)
4488
+ .filter(r => r.res >= threshold).sort((a, b) => b.res - a.res), opt?.raw ?? this.options.raw);
4489
+ }
4490
+ /**
4491
+ * Returns the n closest matches from a batch comparison.
4492
+ *
4493
+ * @template T - The type of the metric result
4494
+ * @param {MetricInput} a - The source string or array of strings
4495
+ * @param {MetricInput} b - The target string or array of strings
4496
+ * @param {number} [n=1] - Number of closest matches
4497
+ * @param {CmpStrOptions} [opt] - Optional options
4498
+ * @returns {T} - The closest matches
4499
+ */
4500
+ closest(a, b, n = 1, opt) {
4501
+ return this.batchSorted(a, b, 'desc', opt).slice(0, n);
4502
+ }
4503
+ /**
4504
+ * Returns the n furthest matches from a batch comparison.
4505
+ *
4506
+ * @template T - The type of the metric result
4507
+ * @param {MetricInput} a - The source string or array of strings
4508
+ * @param {MetricInput} b - The target string or array of strings
4509
+ * @param {number} [n=1] - Number of furthest matches
4510
+ * @param {CmpStrOptions} [opt] - Optional options
4511
+ * @returns {T} - The furthest matches
4512
+ */
4513
+ furthest(a, b, n = 1, opt) {
4514
+ return this.batchSorted(a, b, 'asc', opt).slice(0, n);
4515
+ }
4516
+ /**
4517
+ * Performs a normalized and filtered substring search.
4518
+ *
4519
+ * @param {string} needle - The search string
4520
+ * @param {string[]} haystack - The array to search in
4521
+ * @param {NormalizeFlags} [flags] - Normalization flags
4522
+ * @param {CmpStrProcessors} [processors] - Pre-processors to apply
4523
+ * @returns {string[]} - Array of matching entries
4524
+ */
4525
+ search(needle, haystack, flags, processors) {
4526
+ const resolved = this.resolveOptions({ flags, processors });
4527
+ // Prepare the needle and haystack, normalizing and filtering them
4528
+ const test = this.prepare(needle, resolved);
4529
+ const hstk = this.prepare(haystack, resolved);
4530
+ // Filter the haystack based on the normalized test string
4531
+ return haystack.filter((_, i) => hstk[i].includes(test));
4532
+ }
4533
+ /**
4534
+ * Computes a similarity matrix for the given input array.
4535
+ *
4536
+ * @param {string[]} input - The input array
4537
+ * @param {CmpStrOptions} [opt] - Optional options
4538
+ * @returns {number[][]} - The similarity matrix
4539
+ */
4540
+ matrix(input, opt) {
4541
+ input = this.prepare(input, this.resolveOptions(opt));
4542
+ return input.map(a => this.compute(a, input, undefined, 'batch', true, true).map(b => b.res ?? 0));
4543
+ }
4544
+ /**
4545
+ * Computes the phonetic index for a string using the configured
4546
+ * or given algorithm.
4547
+ *
4548
+ * @param {string} [input] - The input string
4549
+ * @param {string} [algo] - The phonetic algorithm to use
4550
+ * @param {PhoneticOptions} [opt] - Optional phonetic options
4551
+ * @returns {string} - The phonetic index as a string
4552
+ */
4553
+ phoneticIndex(input, algo, opt) {
4554
+ const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
4555
+ return this.index(input, { algo: (algo ?? a), opt: opt ?? o });
4556
+ }
4557
+ }
4558
+
4559
+ /**
4560
+ * CmpStrAsync Asynchronous API
4561
+ * src/CmpStrAsync.ts
4562
+ *
4563
+ * The CmpStrAsync class provides a fully asynchronous, Promise-based interface for
4564
+ * advanced string comparison, similarity measurement, phonetic indexing, filtering
4565
+ * and normalization. It extends the CmpStr class and overrides all relevant methods
4566
+ * to support non-blocking, scalable, and I/O-friendly workloads.
4567
+ *
4568
+ * Features:
4569
+ * - Asynchronous normalization, filtering, and metric computation
4570
+ * - Async batch, pairwise, and single string comparison with detailed results
4571
+ * - Async phonetic indexing and phonetic-aware search and comparison
4572
+ * - Full compatibility with the synchronous CmpStr API
4573
+ * - Designed for large-scale, high-performance, and server-side applications
4574
+ *
4575
+ * @module CmpStrAsync
4576
+ * @author Paul Köhler (komed3)
4577
+ * @license MIT
4578
+ */
4579
+ /**
4580
+ * The CmpStrAsync class provides a fully asynchronous API for string comparison,
4581
+ * phonetic indexing, filtering and normalization.
4582
+ *
4583
+ * @template R - The type of the metric result, defaults to MetricRaw
4584
+ */
4585
+ class CmpStrAsync extends CmpStr {
4586
+ /**
4587
+ * --------------------------------------------------------------------------------
4588
+ * Instanciate the CmpStrAsync class
4589
+ * --------------------------------------------------------------------------------
4590
+ *
4591
+ * Methods to create a new CmpStrAsync instance with the given options.
4592
+ * Using the static `create` method is recommended to ensure proper instantiation.
4593
+ */
4594
+ /**
4595
+ * Creates a new CmpStrAsync instance with the given options.
4596
+ *
4597
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4598
+ * @returns {CmpStrAsync<R>} - A new CmpStrAsync instance
4599
+ */
4600
+ static create(opt) {
4601
+ return new CmpStrAsync(opt);
4602
+ }
4603
+ /**
4604
+ * Creates a new CmpStrAsync instance calliing the super constructor.
4605
+ *
4606
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4607
+ */
4608
+ constructor(opt) { super(opt); }
4609
+ /**
4610
+ * ---------------------------------------------------------------------------------
4611
+ * Protected asynchronously utility methods for internal use
4612
+ * ---------------------------------------------------------------------------------
4613
+ *
4614
+ * These methods provide asynchronous normalization, filtering, and metric
4615
+ * computation capabilities, allowing for non-blocking operations.
4616
+ */
4617
+ /**
4618
+ * Asynchronously normalizes the input string or array using the configured or provided flags.
4619
+ *
4620
+ * @param {MetricInput} input - The input string or array
4621
+ * @param {NormalizeFlags} [flags] - Normalization flags
4622
+ * @returns {Promise<MetricInput>} - The normalized input
4623
+ */
4624
+ async normalizeAsync(input, flags) {
4625
+ return Normalizer.normalizeAsync(input, flags ?? this.options.flags ?? '');
4626
+ }
4627
+ /**
4628
+ * Asynchronously applies all active filters to the input string or array.
4629
+ *
4630
+ * @param {MetricInput} input - The input string or array
4631
+ * @param {string} [hook='input'] - The filter hook
4632
+ * @returns {Promise<MetricInput>} - The filtered string(s)
4633
+ */
4634
+ async filterAsync(input, hook) {
4635
+ return Filter.applyAsync(hook, input);
4636
+ }
4637
+ /**
4638
+ * Asynchronously prepares the input by normalizing and filtering.
4639
+ *
4640
+ * @param {MetricInput} [input] - The input string or array
4641
+ * @param {CmpStrOptions} [opt] - Optional options to use
4642
+ * @returns {Promise<MetricInput>} - The prepared input
4643
+ */
4644
+ async prepareAsync(input, opt) {
4645
+ const { flags, processors } = opt ?? this.options;
4646
+ // Normalize the input using flags (i.e., 'itw')
4647
+ if (flags?.length)
4648
+ input = await this.normalizeAsync(input, flags);
4649
+ // Filter the input using hooked up filters
4650
+ input = await this.filterAsync(input, 'input');
4651
+ // Apply phonetic processors if configured
4652
+ if (processors?.phonetic)
4653
+ input = await this.indexAsync(input, processors.phonetic);
4654
+ return input;
4655
+ }
4656
+ /**
4657
+ * Asynchronously computes the phonetic index for the given input using
4658
+ * the specified phonetic algorithm.
4659
+ *
4660
+ * @param {MetricInput} input - The input string or array
4661
+ * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
4662
+ * @returns {Promise<MetricInput>} - The phonetic index for the given input
4663
+ */
4664
+ async indexAsync(input, { algo, opt }) {
4665
+ this.assert('phonetic', algo);
4666
+ const phonetic = factory.phonetic(algo, opt);
4667
+ const delimiter = opt?.delimiter ?? ' ';
4668
+ return Array.isArray(input)
4669
+ ? Promise.all(input.map(s => phonetic.getIndexAsync(s).then(r => r.join(delimiter))))
4670
+ : phonetic.getIndexAsync(input).then(r => r.join(delimiter));
4671
+ }
4672
+ /**
4673
+ * Asynchronously computes the metric result for the given inputs, applying
4674
+ * normalization and filtering as configured.
4675
+ *
4676
+ * @template T - The type of the metric result
4677
+ * @param {MetricInput} a - The first input string or array
4678
+ * @param {MetricInput} b - The second input string or array
4679
+ * @param {CmpStrOptions} [opt] - Optional options to use
4680
+ * @param {MetricMode} [mode='single'] - The metric mode to use
4681
+ * @param {boolean} [raw=false] - Whether to return raw results
4682
+ * @param {boolean} [skip=false] - Whether to skip normalization and filtering
4683
+ * @returns {Promise<T>} - The computed metric result
4684
+ */
4685
+ async computeAsync(a, b, opt, mode, raw, skip) {
4686
+ const resolved = this.resolveOptions(opt);
4687
+ this.assert('metric', resolved.metric);
4688
+ // Prepare the input
4689
+ const A = skip ? a : await this.prepareAsync(a, resolved);
4690
+ const B = skip ? b : await this.prepareAsync(b, resolved);
4691
+ // Get the metric class
4692
+ const metric = factory.metric(resolved.metric, A, B, resolved.opt);
4693
+ // Pass the original inputs to the metric
4694
+ if (resolved.output !== 'prep')
4695
+ metric.setOriginal(a, b);
4696
+ // Compute the metric result
4697
+ await metric.runAsync(mode);
4698
+ // Post-process the results and concat the original inputs
4699
+ const result = this.postProcess(metric.getResults(), resolved);
4700
+ // Resolve and return the result based on the raw flag
4701
+ return this.output(result, raw ?? resolved.raw);
4702
+ }
4703
+ /**
4704
+ * ---------------------------------------------------------------------------------
4705
+ * Public asynchronously core methods for string comparison
4706
+ * ---------------------------------------------------------------------------------
4707
+ *
4708
+ * These methods provide the asynchronous core functionality for string comparison,
4709
+ * phonetic indexing and text search, allowing for non-blocking operations.
4710
+ */
4711
+ /**
4712
+ * Asynchronously performs a single metric comparison.
4713
+ *
4714
+ * @template T - The type of the metric result
4715
+ * @param {string} a - The source string
4716
+ * @param {string} b - The target string
4717
+ * @param {CmpStrOptions} [opt] - Optional options
4718
+ * @returns {Promise<T>} - The metric result
4719
+ */
4720
+ async testAsync(a, b, opt) {
4721
+ return this.computeAsync(a, b, opt, 'single');
4722
+ }
4723
+ /**
4724
+ * Asynchronously performs a single metric comparison returning the numeric score.
4725
+ *
4726
+ * @param {string} a - The source string
4727
+ * @param {string} b - The target string
4728
+ * @param {CmpStrOptions} [opt] - Optional options
4729
+ * @returns {Promise<number>} - The similarity score (0..1)
4730
+ */
4731
+ async compareAsync(a, b, opt) {
4732
+ return (await this.computeAsync(a, b, opt, 'single', true)).res;
4733
+ }
4734
+ /**
4735
+ * Asynchronously performs a batch metric comparison between source and target
4736
+ * strings or array of strings.
4737
+ *
4738
+ * @template T - The type of the metric result
4739
+ * @param {MetricInput} a - The source string or array of strings
4740
+ * @param {MetricInput} b - The target string or array of strings
4741
+ * @param {CmpStrOptions} [opt] - Optional options
4742
+ * @returns {Promise<T>} - The batch metric results
4743
+ */
4744
+ async batchTestAsync(a, b, opt) {
4745
+ return this.computeAsync(a, b, opt, 'batch');
4746
+ }
4747
+ /**
4748
+ * Asynchronously performs a batch metric comparison and returns results sorted by score.
4749
+ *
4750
+ * @template T - The type of the metric result
4751
+ * @param {MetricInput} a - The source string or array of strings
4752
+ * @param {MetricInput} b - The target string or array of strings
4753
+ * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
4754
+ * @param {CmpStrOptions} [opt] - Optional options
4755
+ * @returns {Promise<T>} - The sorted batch results
4756
+ */
4757
+ async batchSortedAsync(a, b, dir = 'desc', opt) {
4758
+ const res = await this.computeAsync(a, b, opt, 'batch', true);
4759
+ return this.output(res.sort((a, b) => dir === 'asc' ? a.res - b.res : b.res - a.res), opt?.raw ?? this.options.raw);
4760
+ }
4761
+ /**
4762
+ * Asynchronously performs a pairwise metric comparison between source and target
4763
+ * strings or array of strings.
4764
+ *
4765
+ * @template T - The type of the metric result
4766
+ * Input arrays needs of the same length to perform pairwise comparison,
4767
+ * otherwise the method will throw an error.
4768
+ *
4769
+ * @param {MetricInput} a - The source string or array of strings
4770
+ * @param {MetricInput} b - The target string or array of strings
4771
+ * @param {CmpStrOptions} [opt] - Optional options
4772
+ * @returns {Promise<T>} - The pairwise metric results
4773
+ */
4774
+ async pairsAsync(a, b, opt) {
4775
+ return this.computeAsync(a, b, opt, 'pairwise');
4776
+ }
4777
+ /**
4778
+ * Asynchronously performs a batch comparison and returns only results above the threshold.
4779
+ *
4780
+ * @template T - The type of the metric result
4781
+ * @param {MetricInput} a - The source string or array of strings
4782
+ * @param {MetricInput} b - The target string or array of strings
4783
+ * @param {number} threshold - The similarity threshold (0..1)
4784
+ * @param {CmpStrOptions} [opt] - Optional options
4785
+ * @returns {Promise<T>} - The filtered batch results
4786
+ */
4787
+ async matchAsync(a, b, threshold, opt) {
4788
+ const res = await this.computeAsync(a, b, opt, 'batch', true);
4789
+ return this.output(res.filter(r => r.res >= threshold).sort((a, b) => b.res - a.res), opt?.raw ?? this.options.raw);
4790
+ }
4791
+ /**
4792
+ * Asynchronously returns the n closest matches from a batch comparison.
4793
+ *
4794
+ * @template T - The type of the metric result
4795
+ * @param {MetricInput} a - The source string or array of strings
4796
+ * @param {MetricInput} b - The target string or array of strings
4797
+ * @param {number} [n=1] - Number of closest matches
4798
+ * @param {CmpStrOptions} [opt] - Optional options
4799
+ * @returns {Promise<T>} - The closest matches
4800
+ */
4801
+ async closestAsync(a, b, n = 1, opt) {
4802
+ return (await this.batchSortedAsync(a, b, 'desc', opt)).slice(0, n);
4803
+ }
4804
+ /**
4805
+ * Asynchronously returns the n furthest matches from a batch comparison.
4806
+ *
4807
+ * @template T - The type of the metric result
4808
+ * @param {MetricInput} a - The source string or array of strings
4809
+ * @param {MetricInput} b - The target string or array of strings
4810
+ * @param {number} [n=1] - Number of furthest matches
4811
+ * @param {CmpStrOptions} [opt] - Optional options
4812
+ * @returns {Promise<T>} - The furthest matches
4813
+ */
4814
+ async furthestAsync(a, b, n = 1, opt) {
4815
+ return (await this.batchSortedAsync(a, b, 'asc', opt)).slice(0, n);
4816
+ }
4817
+ /**
4818
+ * Asynchronously performs a normalized and filtered substring search.
4819
+ *
4820
+ * @param {string} needle - The search string
4821
+ * @param {string[]} haystack - The array to search in
4822
+ * @param {NormalizeFlags} [flags] - Normalization flags
4823
+ * @param {CmpStrProcessors} [processors] - Pre-processors to apply
4824
+ * @returns {Promise<string[]>} - Array of matching entries
4825
+ */
4826
+ async searchAsync(needle, haystack, flags, processors) {
4827
+ const resolved = this.resolveOptions({ flags, processors });
4828
+ // Prepare the needle and haystack, normalizing and filtering them
4829
+ const test = await this.prepareAsync(needle, resolved);
4830
+ const hstk = await this.prepareAsync(haystack, resolved);
4831
+ // Filter the haystack based on the normalized test string
4832
+ return haystack.filter((_, i) => hstk[i].includes(test));
4833
+ }
4834
+ /**
4835
+ * Asynchronously computes a similarity matrix for the given input array.
4836
+ *
4837
+ * @param {string[]} input - The input array
4838
+ * @param {CmpStrOptions} [opt] - Optional options
4839
+ * @returns {Promise<number[][]>} - The similarity matrix
4840
+ */
4841
+ async matrixAsync(input, opt) {
4842
+ input = await this.prepareAsync(input, this.resolveOptions(opt));
4843
+ return Promise.all(input.map(async (a) => (await this.computeAsync(a, input, undefined, 'batch', true, true).then(r => r.map(b => b.res ?? 0)))));
4844
+ }
4845
+ /**
4846
+ * Asynchronously computes the phonetic index for a string using the
4847
+ * configured or given algorithm.
4848
+ *
4849
+ * @param {string} [input] - The input string
4850
+ * @param {string} [algo] - The phonetic algorithm to use
4851
+ * @param {PhoneticOptions} [opt] - Optional phonetic options
4852
+ * @returns {Promise<string>} - The phonetic index as a string
4853
+ */
4854
+ async phoneticIndexAsync(input, algo, opt) {
4855
+ const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
4856
+ return this.indexAsync(input, {
4857
+ algo: (algo ?? a), opt: opt ?? o
4858
+ });
4859
+ }
4860
+ }
4861
+
4862
+ export { CmpStr, CmpStrAsync, DiffChecker, Normalizer, TextAnalyzer };
4863
+ //# sourceMappingURL=CmpStr.esm.js.map