cmpstr 2.0.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -503
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -917
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -197
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
@@ -0,0 +1,4875 @@
1
+ /**
2
+ * CmpStr v3.0.0 dev-1a82e20-250612
3
+ * This is a lightweight, fast and well performing library for calculating string similarity.
4
+ * (c) 2023-2025 Paul Köhler @komed3 / MIT License
5
+ * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
6
+ */
7
+ (function (global, factory) {
8
+ typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
9
+ typeof define === 'function' && define.amd ? define(['exports'], factory) :
10
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.CmpStr = {}));
11
+ })(this, (function (exports) { 'use strict';
12
+
13
+ /**
14
+ * Deep Merge Utility
15
+ * src/utils/DeepMerge.ts
16
+ *
17
+ * This module provides utility functions for deep merging objects, getting values by path,
18
+ * and setting values by path in a deeply nested object structure.
19
+ *
20
+ * It supports dot and bracket notation (e.g. `a.b[0].c`) as well as escaped keys.
21
+ *
22
+ * Included functions:
23
+ * - `get`: Retrieve a deeply nested value by path
24
+ * - `set`: Assign a value to a nested path
25
+ * - `merge`: Deeply merge two objects
26
+ * - `has`: Check whether a path exists
27
+ * - `rmv`: Delete a value at a path
28
+ *
29
+ * @module Utils/DeepMerge
30
+ * @author Paul Köhler
31
+ * @license MIT
32
+ */
33
+ /**
34
+ * Parse a path string into an array of keys.
35
+ *
36
+ * @param {string} p - The path string, e.g. `a.b.c` or `a[0].b`
37
+ * @returns {(string|number)[]} - An array of keys, e.g. `['a', 'b', 'c']` or `['a', 0, 'b']`
38
+ */
39
+ const parse = (p) => (p.replace(/\[(\d+)]/g, '.$1').split('.').map(s => /^\d+$/.test(s) ? +s : s));
40
+ /**
41
+ * Deeply get a value from an object by a path string.
42
+ *
43
+ * @template T - The type of the object to get the value from
44
+ * @param {T} t - The object to get the value from
45
+ * @param {string} path - The path string, e.g. `a.b.c`
46
+ * @param {any} fallback - The default value to return if the path does not exist
47
+ * @returns {T|R|undefined} - The value at the specified path, otherwise the default value
48
+ */
49
+ function get(t, path, fallback) {
50
+ return parse(path).reduce((o, k) => o?.[k] ?? fallback, t);
51
+ }
52
+ /**
53
+ * Deeply set a value in an object by a path string.
54
+ *
55
+ * @template T - The type of the object to get the value from
56
+ * @param {T} t - The object to set the value in
57
+ * @param {string} path - The path string, e.g. `a.b.c`
58
+ * @param {any} value - The value to set at the specified path
59
+ * @returns {T} - The modified object with the value set at the specified path
60
+ * @throws {Error} - Throws an error if the key is not a valid identifier
61
+ */
62
+ function set(t, path, value) {
63
+ // If the path is empty, return the value
64
+ if (path === '')
65
+ return value;
66
+ // Split the path into the first key and the rest of the path
67
+ const [k, ...r] = parse(path);
68
+ // Throw an error if the key is not a valid identifier
69
+ if (t !== undefined && (typeof t !== 'object' || t === null))
70
+ throw Error(`cannot set property <${k}> of <${JSON.stringify(t)}>`);
71
+ // Assign the value to the specified key in the object
72
+ return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
73
+ [k]: set(t?.[k], r.join('.'), value)
74
+ });
75
+ }
76
+ /**
77
+ * Deeply merge two objects, where the second object overrides the first.
78
+ *
79
+ * @template T - The type of the object to get the value from
80
+ * @param {T} t - The target object to merge into
81
+ * @param {T} o - The source object to merge from
82
+ * @param {boolean} [mergeUndefined=false] - Whether to merge undefined values
83
+ * @returns {T} - The merged object
84
+ */
85
+ function merge(t = Object.create(null), o = Object.create(null), mergeUndefined = false) {
86
+ // Iterate over the keys of the source object and merge them into the target object
87
+ return Object.keys(o).forEach(k => {
88
+ const val = o[k];
89
+ // If the value is undefined and mergeUndefined is false, skip it
90
+ if (!mergeUndefined && val === undefined)
91
+ return;
92
+ // Skip dangerous property names to prevent prototype pollution
93
+ if (k === '__proto__' || k === 'constructor')
94
+ return;
95
+ // If the value is an object and not an array, recursively merge it
96
+ t[k] = typeof val === 'object' && !Array.isArray(val)
97
+ ? merge(typeof t[k] === 'object' && !Array.isArray(t[k])
98
+ ? t[k] : Object.create(null), val)
99
+ : val;
100
+ }), t;
101
+ }
102
+ /**
103
+ * Delete a value at a specified path in an object.
104
+ *
105
+ * @template T - The type of the object to get the value from
106
+ * @param {T} t - The object to delete the value from
107
+ * @param {string} path - The path string, e.g. `a.b.c`
108
+ * @param {boolean} [preserveEmpty=false] - Whether to preserve empty objects/arrays
109
+ * @returns {T} - The modified object with the value deleted at the specified path
110
+ */
111
+ function rmv(t, path, preserveEmpty = false) {
112
+ const r = (o, k, i = 0) => {
113
+ const key = k[i];
114
+ // Delete the key if it is not an object or if it is the last key in the path
115
+ if (!o || typeof o !== 'object')
116
+ return false;
117
+ if (i === k.length - 1)
118
+ return delete o[key];
119
+ if (!r(o[key], k, i + 1))
120
+ return false;
121
+ // If preserveEmpty is false, check if the object or array is empty
122
+ if (!preserveEmpty) {
123
+ const val = o[key];
124
+ // If the value is an empty array or object, delete the key
125
+ if (typeof val === 'object' && ((Array.isArray(val) && val.every(v => v == null)) ||
126
+ (!Array.isArray(val) && Object.keys(val).length === 0)))
127
+ delete o[key];
128
+ }
129
+ return true;
130
+ };
131
+ r(t, parse(path));
132
+ return t;
133
+ }
134
+
135
+ /**
136
+ * Profiler Utility
137
+ * src/utils/profiler.ts
138
+ *
139
+ * @see https://en.wikipedia.org/wiki/Profiling_(computer_programming)
140
+ *
141
+ * This class provides methods to run synchronous and asynchronous functions, capturing
142
+ * their execution time and memory usage, and storing the results in a set of profiler
143
+ * entries. It supports both Node.js and browser environments, detecting the environment
144
+ * automatically.
145
+ *
146
+ * The class is optimized for minimal overhead and can be used for fine-grained
147
+ * performance profiling.
148
+ *
149
+ * @module Utils/Profiler
150
+ * @author Paul Köhler (komed3)
151
+ * @license MIT
152
+ */
153
+ /**
154
+ * Profiler class for measuring execution time and memory usage of functions.
155
+ */
156
+ class Profiler {
157
+ // Environment detection
158
+ static ENV;
159
+ // Singleton instance
160
+ static instance;
161
+ // Store for profiler entries
162
+ store = new Set();
163
+ // Total time and memory consumption
164
+ totalTime = 0;
165
+ totalMem = 0;
166
+ // The Profiler active state
167
+ active;
168
+ /**
169
+ * Sets the environment based on the available global objects.
170
+ * Detects if running in Node.js or browser and sets the ENV property accordingly.
171
+ */
172
+ static detectEnv() {
173
+ // Check for Node.js environment
174
+ if (typeof process !== 'undefined')
175
+ Profiler.ENV = 'nodejs';
176
+ // Check for browser environment
177
+ else if (typeof performance !== 'undefined')
178
+ Profiler.ENV = 'browser';
179
+ // If neither, set ENV to unknown
180
+ else
181
+ Profiler.ENV = 'unknown';
182
+ }
183
+ /**
184
+ * Returns the singleton instance of the Perf class.
185
+ * If the instance does not exist, it creates a new one.
186
+ *
187
+ * @param {boolean} [enable=false] - Optional parameter to enable the profiler upon instantiation
188
+ * @returns {Profiler} - Singleton Profiler instance
189
+ */
190
+ static getInstance(enable) {
191
+ // Ensure the environment is detected
192
+ if (!Profiler.ENV)
193
+ Profiler.detectEnv();
194
+ // If instance does not exist, create a new one
195
+ if (!Profiler.instance)
196
+ Profiler.instance = new Profiler(enable);
197
+ // Return singleton instance
198
+ return Profiler.instance;
199
+ }
200
+ /**
201
+ * Private constructor to enforce singleton pattern.
202
+ * Initializes the store for profiler entries.
203
+ *
204
+ * @param {boolean} [enable=false] - Optional parameter to enable the profiler
205
+ */
206
+ constructor(enable) { this.active = enable ?? false; }
207
+ /**
208
+ * Gets the current time based on the environment.
209
+ *
210
+ * Uses process.hrtime.bigint() for Node.js, performance.now() for browsers,
211
+ * and Date.now() as a fallback.
212
+ *
213
+ * @returns {number} - Current time in milliseconds
214
+ */
215
+ now() {
216
+ switch (Profiler.ENV) {
217
+ // Node.js environment
218
+ case 'nodejs': return Number(process.hrtime.bigint()) / 1e6;
219
+ // Browser environment
220
+ case 'browser': return performance.now();
221
+ // Fallback
222
+ default: return Date.now();
223
+ }
224
+ }
225
+ /**
226
+ * Gets the current memory usage based on the environment.
227
+ *
228
+ * Uses process.memoryUsage().heapUsed for Node.js, performance.memory.usedJSHeapSize
229
+ * for browsers, and returns 0 as a fallback.
230
+ *
231
+ * @returns {number} - Current memory usage in bytes
232
+ */
233
+ mem() {
234
+ switch (Profiler.ENV) {
235
+ // Node.js environment
236
+ case 'nodejs': return process.memoryUsage().heapUsed;
237
+ // Browser environment
238
+ case 'browser': return performance.memory?.usedJSHeapSize ?? 0;
239
+ // Fallback
240
+ default: return 0;
241
+ }
242
+ }
243
+ /**
244
+ * Enables the profiler.
245
+ * Sets the active state to true, allowing profiling to occur.
246
+ */
247
+ enable() { this.active = true; }
248
+ /**
249
+ * Disables the profiler.
250
+ * Sets the active state to false, preventing further profiling.
251
+ */
252
+ disable() { this.active = false; }
253
+ /**
254
+ * Resets the profiler by clearing the store, total time and memory consumption.
255
+ * This method is useful for starting a new profiling session.
256
+ */
257
+ clear() {
258
+ this.store.clear();
259
+ this.totalTime = 0;
260
+ this.totalMem = 0;
261
+ }
262
+ /**
263
+ * Runs a synchronous function and profiles its execution time and memory usage.
264
+ * If the profiler is not active, it simply executes the function without profiling.
265
+ *
266
+ * @param {() => T} fn - Function to be executed and profiled
267
+ * @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
268
+ * @returns {T} - The result of the executed function
269
+ */
270
+ run(fn, meta = {}) {
271
+ // If the profiler is not active, simply execute the function without profiling
272
+ if (!this.active)
273
+ return fn();
274
+ // Capture the start time and memory usage
275
+ const startTime = this.now(), startMem = this.mem();
276
+ // Execute the function and capture the result
277
+ const res = fn();
278
+ // Calculate the time and memory consumption
279
+ const deltaTime = this.now() - startTime;
280
+ const deltaMem = this.mem() - startMem;
281
+ // Add the profiling entry to the store
282
+ this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
283
+ this.totalTime += deltaTime, this.totalMem += deltaMem;
284
+ // Return the result of the function
285
+ return res;
286
+ }
287
+ /**
288
+ * Runs an asynchronous function and profiles its execution time and memory usage.
289
+ * If the profiler is not active, it simply executes the function without profiling.
290
+ *
291
+ * @param {() => Promise<T>} fn - Asynchronous function to be executed and profiled
292
+ * @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
293
+ * @returns {Promise<T>} - A promise that resolves to the result of the executed function
294
+ */
295
+ async runAsync(fn, meta = {}) {
296
+ // If the profiler is not active, simply execute the function without profiling
297
+ if (!this.active)
298
+ return await fn();
299
+ // Capture the start time and memory usage
300
+ const startTime = this.now(), startMem = this.mem();
301
+ // Execute the asynchronous function and wait for its result
302
+ const res = await fn();
303
+ // Calculate the time and memory consumption
304
+ const deltaTime = this.now() - startTime;
305
+ const deltaMem = this.mem() - startMem;
306
+ // Add the profiling entry to the store
307
+ this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
308
+ this.totalTime += deltaTime, this.totalMem += deltaMem;
309
+ // Return the result of the function
310
+ return res;
311
+ }
312
+ /**
313
+ * Retrieves all profiler entries stored in the profiler.
314
+ *
315
+ * @returns {ProfilerEntry<any>[]} - An array of profiler entries
316
+ */
317
+ getAll() { return [...this.store]; }
318
+ /**
319
+ * Retrieves the last profiler entry stored in the profiler.
320
+ *
321
+ * @returns {ProfilerEntry<any> | undefined} - The last profiler entry or undefined if no entries exist
322
+ */
323
+ getLast() { return this.getAll().pop(); }
324
+ /**
325
+ * Retrieves the total time and memory consumption recorded by the profiler.
326
+ *
327
+ * @returns {{ time: number, mem: number }} - An object containing total time and memory usage
328
+ */
329
+ getTotal() {
330
+ return {
331
+ time: this.totalTime, mem: this.totalMem
332
+ };
333
+ }
334
+ /**
335
+ * Returns the services provided by the Profiler class.
336
+ * This allows for easy access to the profiler's methods.
337
+ *
338
+ * @returns {ProfilerService<any>} - An object containing methods to control the profiler
339
+ */
340
+ services = {
341
+ enable: this.enable.bind(this),
342
+ disable: this.disable.bind(this),
343
+ clear: this.clear.bind(this),
344
+ report: this.getAll.bind(this),
345
+ last: this.getLast.bind(this),
346
+ total: this.getTotal.bind(this)
347
+ };
348
+ }
349
+
350
+ /**
351
+ * TextAnalyzer Utility
352
+ * src/utils/TextAnalyzer.ts
353
+ *
354
+ * The TextAnalyzer class provides a comprehensive set of methods for analyzing and
355
+ * extracting statistics from a given text. It supports word and sentence tokenization,
356
+ * character and word frequency analysis, syllable estimation, readability metrics
357
+ * (Flesch, Kincaid, LIX, WSTF), and various ratios and histograms. Designed for
358
+ * efficiency and flexibility, it is suitable for linguistic research, readability
359
+ * scoring, and text preprocessing tasks.
360
+ *
361
+ * @module Utils/TextAnalyzer
362
+ * @author Paul Köhler (komed3)
363
+ * @license MIT
364
+ */
365
+ class TextAnalyzer {
366
+ // The original text to analyze
367
+ text;
368
+ // Tokenized words and sentences
369
+ words = [];
370
+ sentences = [];
371
+ // Frequency maps for characters and words
372
+ charFrequency = new Map();
373
+ wordHistogram = new Map();
374
+ syllableCache = new Map();
375
+ /**
376
+ * Constructs a new TextAnalyzer instance with the provided input text.
377
+ *
378
+ * @param {string} input - The text to analyze
379
+ */
380
+ constructor(input) {
381
+ this.text = input.trim();
382
+ this.tokenize();
383
+ this.computeFrequencies();
384
+ }
385
+ /**
386
+ * Tokenizes the input text into words and sentences.
387
+ */
388
+ tokenize() {
389
+ this.words = [], this.sentences = [];
390
+ const text = this.text;
391
+ const wordRegex = /\p{L}+/gu;
392
+ let match;
393
+ // Tokenize words using Unicode property escapes for letters
394
+ while ((match = wordRegex.exec(text)) !== null) {
395
+ this.words.push(match[0].toLowerCase());
396
+ }
397
+ // Tokenize sentences using punctuation marks as delimiters
398
+ this.sentences = text.split(/(?<=[.!?])\s+/).filter(Boolean);
399
+ }
400
+ /**
401
+ * Computes character and word frequencies from the tokenized text.
402
+ */
403
+ computeFrequencies() {
404
+ // Compute character frequencies
405
+ for (const char of this.text)
406
+ this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
407
+ // Compute word frequencies
408
+ for (const word of this.words)
409
+ this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
410
+ }
411
+ /**
412
+ * Estimates the number of syllables in a word using a simple heuristic.
413
+ *
414
+ * @param {string} word - The word to estimate syllables for
415
+ * @returns {number} - Estimated syllable count
416
+ */
417
+ estimateSyllables(word) {
418
+ // Check cache first to avoid redundant calculations
419
+ if (this.syllableCache.has(word))
420
+ return this.syllableCache.get(word);
421
+ // Normalize the word: lowercase and remove non-letter characters
422
+ const clean = word.toLowerCase().replace(/[^a-zäöüß]/g, '');
423
+ const matches = clean.match(/[aeiouyäöü]+/g);
424
+ // Count syllables based on vowel groups
425
+ const count = matches ? matches.length : 1;
426
+ this.syllableCache.set(word, count);
427
+ return count;
428
+ }
429
+ /**
430
+ * Gets the original text length in characters.
431
+ *
432
+ * @return {number} - Length of the text
433
+ */
434
+ getLength() { return this.text.length; }
435
+ /**
436
+ * Gets the number of words in the text.
437
+ *
438
+ * @return {number} - Count of words
439
+ */
440
+ getWordCount() { return this.words.length; }
441
+ /**
442
+ * Gets the number of sentences in the text.
443
+ *
444
+ * @return {number} - Count of sentences
445
+ */
446
+ getSentenceCount() { return this.sentences.length; }
447
+ /**
448
+ * Gets the average word length in the text.
449
+ *
450
+ * @return {number} - Average length of words
451
+ */
452
+ getAvgWordLength() {
453
+ let totalLen = 0;
454
+ for (const w of this.words)
455
+ totalLen += w.length;
456
+ return this.words.length ? totalLen / this.words.length : 0;
457
+ }
458
+ /**
459
+ * Gets the average sentence length in words.
460
+ *
461
+ * @return {number} - Average length of sentences
462
+ */
463
+ getAvgSentenceLength() {
464
+ return this.sentences.length ? this.words.length / this.sentences.length : 0;
465
+ }
466
+ /**
467
+ * Gets a histogram of word frequencies in the text.
468
+ *
469
+ * @returns {Record<string, number>} - A histogram of word frequencies
470
+ */
471
+ getWordHistogram() {
472
+ return Object.fromEntries(this.wordHistogram);
473
+ }
474
+ /**
475
+ * Gets the most common words in the text, limited to a specified number.
476
+ *
477
+ * @param {number} [limit=5] - Maximum number of common words to return
478
+ * @returns {string[]} - Array of the most common words
479
+ */
480
+ getMostCommonWords(limit = 5) {
481
+ return [...this.wordHistogram.entries()]
482
+ .sort((a, b) => b[1] - a[1])
483
+ .slice(0, limit).map(e => e[0]);
484
+ }
485
+ /**
486
+ * Gets the least common words (hapax legomena) in the text.
487
+ *
488
+ * Hapax legomena are words that occur only once in the text.
489
+ *
490
+ * @returns {string[]} - Array of hapax legomena
491
+ */
492
+ getHapaxLegomena() {
493
+ return [...this.wordHistogram.entries()]
494
+ .filter(([, c]) => c === 1)
495
+ .map(e => e[0]);
496
+ }
497
+ /**
498
+ * Checks if the text contains any numbers.
499
+ *
500
+ * @returns {boolean} - True if numbers are present, false otherwise
501
+ */
502
+ hasNumbers() { return /\d/.test(this.text); }
503
+ /**
504
+ * Calculates the ratio of uppercase letters to total letters in the text.
505
+ *
506
+ * @return {number} - Ratio of uppercase letters to total letters
507
+ */
508
+ getUpperCaseRatio() {
509
+ let upper = 0, letters = 0;
510
+ for (let i = 0, len = this.text.length; i < len; i++) {
511
+ const c = this.text[i];
512
+ if (/[A-Za-zÄÖÜäöüß]/.test(c)) {
513
+ letters++;
514
+ if (/[A-ZÄÖÜ]/.test(c))
515
+ upper++;
516
+ }
517
+ }
518
+ return letters ? upper / letters : 0;
519
+ }
520
+ /**
521
+ * Gets the frequency of each character in the text.
522
+ *
523
+ * @returns {Record<string, number>} - A record of character frequencies
524
+ */
525
+ getCharFrequency() {
526
+ return Object.fromEntries(this.charFrequency);
527
+ }
528
+ /**
529
+ * Gets the frequency of each Unicode block in the text.
530
+ *
531
+ * @returns {Record<string, number>} - A record of Unicode block frequencies
532
+ */
533
+ getUnicodeStats() {
534
+ const result = {};
535
+ for (const [char, count] of this.charFrequency) {
536
+ // Get the Unicode block for the character
537
+ const block = char
538
+ .charCodeAt(0).toString(16)
539
+ .padStart(4, '0').toUpperCase();
540
+ // Increment the count for the block
541
+ result[block] = (result[block] ?? 0) + count;
542
+ }
543
+ return result;
544
+ }
545
+ /**
546
+ * Gets the ratio of long words (words with length >= len) to total words.
547
+ *
548
+ * @param {number} [len=7] - Minimum length for a word to be considered long
549
+ * @returns {number} - Ratio of long words to total words
550
+ */
551
+ getLongWordRatio(len = 7) {
552
+ let long = 0;
553
+ for (const w of this.words)
554
+ if (w.length >= len)
555
+ long++;
556
+ return this.words.length ? long / this.words.length : 0;
557
+ }
558
+ /**
559
+ * Gets the ratio of short words (words with length <= len) to total words.
560
+ *
561
+ * @param {number} [len=3] - Maximum length for a word to be considered short
562
+ * @returns {number} - Ratio of short words to total words
563
+ */
564
+ getShortWordRatio(len = 3) {
565
+ let short = 0;
566
+ for (const w of this.words)
567
+ if (w.length <= len)
568
+ short++;
569
+ return this.words.length ? short / this.words.length : 0;
570
+ }
571
+ /**
572
+ * Estimates the number of syllables in the text.
573
+ *
574
+ * @returns {number} - Total estimated syllable count
575
+ */
576
+ getSyllablesCount() {
577
+ let count = 0;
578
+ for (const w of this.words)
579
+ count += this.estimateSyllables(w);
580
+ return count;
581
+ }
582
+ /**
583
+ * Gets the number of monosyllabic words (words with exactly one syllable).
584
+ *
585
+ * @returns {number} - Count of monosyllabic words
586
+ */
587
+ getMonosyllabicWordCount() {
588
+ let count = 0;
589
+ for (const w of this.words)
590
+ if (this.estimateSyllables(w) === 1)
591
+ count++;
592
+ return count;
593
+ }
594
+ /**
595
+ * Gets the number of words with at least a specified minimum syllable count.
596
+ *
597
+ * @param {number} min - Minimum syllable count for a word to be included
598
+ * @returns {number} - Count of words meeting the syllable criteria
599
+ */
600
+ getMinSyllablesWordCount(min) {
601
+ let count = 0;
602
+ for (const w of this.words)
603
+ if (this.estimateSyllables(w) >= min)
604
+ count++;
605
+ return count;
606
+ }
607
+ /**
608
+ * Gets the number of words with at most a specified maximum syllable count.
609
+ *
610
+ * @param {number} max - Maximum syllable count for a word to be included
611
+ * @returns {number} - Count of words meeting the syllable criteria
612
+ */
613
+ getMaxSyllablesWordCount(max) {
614
+ let count = 0;
615
+ for (const w of this.words)
616
+ if (this.estimateSyllables(w) <= max)
617
+ count++;
618
+ return count;
619
+ }
620
+ /**
621
+ * Calculates the Honore's R statistic for the text as a measure of lexical richness.
622
+ *
623
+ * @returns {number} - The Honore's R statistic
624
+ */
625
+ getHonoresR() {
626
+ return (100 * Math.log(this.words.length)) / (1 - (this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1)));
627
+ }
628
+ /**
629
+ * Estimates the reading time for the text based on words per minute (WPM).
630
+ *
631
+ * @param {number} [wpm=200] - Words per minute for the calculation
632
+ * @returns {number} - Estimated reading time in minutes
633
+ */
634
+ getReadingTime(wpm = 200) {
635
+ return Math.max(1, this.words.length / (wpm ?? 1));
636
+ }
637
+ /**
638
+ * Calculates various readability scores based on the text.
639
+ *
640
+ * This method supports multiple readability metrics:
641
+ * - Flesch Reading Ease
642
+ * - Flesch-Kincaid Grade Level
643
+ *
644
+ * @param {'flesch'|'fleschde'|'kincaid'} [metric='flesch'] - The readability metric to calculate
645
+ * @returns {number} - The calculated readability score
646
+ */
647
+ getReadabilityScore(metric = 'flesch') {
648
+ const w = this.words.length || 1;
649
+ const s = this.sentences.length || 1;
650
+ const y = this.getSyllablesCount() || 1;
651
+ const asl = w / s;
652
+ const asw = y / w;
653
+ switch (metric) {
654
+ // Flesch Reading Ease formula
655
+ case 'flesch': return 206.835 - (1.015 * asl) - (84.6 * asw);
656
+ // Flesch Reading Ease formula for German texts
657
+ case 'fleschde': return 180 - asl - (58.5 * asw);
658
+ // Flesch-Kincaid Grade Level formula
659
+ case 'kincaid': return (0.39 * asl) + (11.8 * asw) - 15.59;
660
+ }
661
+ }
662
+ /**
663
+ * Calculates the LIX (Lesbarhetsindex) score for the text.
664
+ *
665
+ * The LIX score is a readability index that combines average word length and sentence length.
666
+ *
667
+ * @returns {number} - The LIX score
668
+ */
669
+ getLIXScore() {
670
+ const w = this.words.length || 1;
671
+ const s = this.sentences.length || 1;
672
+ const l = this.getLongWordRatio() * w;
673
+ return (w / s) + (l / w * 100);
674
+ }
675
+ /**
676
+ * Calculates the Wiener Sachtextformel (WSTF) scores for the text.
677
+ *
678
+ * The WSTF scores are a set of readability metrics based on word and sentence characteristics.
679
+ *
680
+ * @returns {[number, number, number, number]} - An array of WSTF scores
681
+ */
682
+ getWSTFScore() {
683
+ const w = this.words.length || 1;
684
+ const h = this.getMinSyllablesWordCount(3) / w * 100;
685
+ const s = this.getAvgSentenceLength();
686
+ const l = this.getLongWordRatio() * 100;
687
+ const m = this.getMonosyllabicWordCount() / w * 100;
688
+ return [
689
+ 0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.8750,
690
+ 0.2007 * h + 0.1682 * s + 0.1373 * l - 2.7790,
691
+ 0.2963 * h + 0.1905 * s - 1.1144,
692
+ 0.2744 * h + 0.2656 * s - 1.6930
693
+ ];
694
+ }
695
+ }
696
+
697
+ /**
698
+ * DiffChecker Utility
699
+ * src/utils/DiffChecker.ts
700
+ *
701
+ * The DiffChecker class provides a robust and efficient utility for comparing two
702
+ * texts and extracting their differences (full lines or word mode). It supports
703
+ * context-aware grouping of changes, unified diff output (with CLI color or ASCII
704
+ * markup), and detailed change magnitude metrics. The class is highly configurable,
705
+ * allowing users to choose the diff granularity, case sensitivity, context lines,
706
+ * grouping, and output style. It is suitable for text comparison, code review
707
+ * tools, document versioning, and any application requiring precise and human-
708
+ * readable difference reporting.
709
+ *
710
+ * Features:
711
+ * - Line and word-based diffing
712
+ * - Case-insensitive comparison option
713
+ * - Context lines and grouping of adjacent changes
714
+ * - Unified diff output (ASCII or colored CLI)
715
+ * - Highlighting of changed segments within lines
716
+ * - Change magnitude calculation (relative to group or line)
717
+ * - Expand-all mode for full file context
718
+ *
719
+ * @module Utils/DiffChecker
720
+ * @author Paul Köhler (komed3)
721
+ * @license MIT
722
+ */
723
+ /**
724
+ * The DiffChecker class provides methods to compare two texts and generate
725
+ * structured diffs, grouped diffs, and unified diff outputs.
726
+ */
727
+ class DiffChecker {
728
+ // Original input texts and options
729
+ a;
730
+ b;
731
+ options;
732
+ // Computed diff entries and groups
733
+ entries = [];
734
+ grouped = [];
735
+ // Flag to indicate if the diff has already been computed
736
+ diffRun = false;
737
+ /**
738
+ * Constructs a new DiffChecker instance for comparing two texts.
739
+ *
740
+ * @param {string} a - The first (original) text
741
+ * @param {string} b - The second (modified) text
742
+ * @param {DiffOptions} [opt] - Optional diff configuration
743
+ */
744
+ constructor(a, b, opt = {}) {
745
+ // Set the two texts to compare
746
+ this.a = a, this.b = b;
747
+ // Merge default with user-provided options
748
+ this.options = { ...{
749
+ mode: 'word',
750
+ caseInsensitive: false,
751
+ contextLines: 1,
752
+ groupedLines: true,
753
+ expandLines: false,
754
+ showChangeMagnitude: true,
755
+ maxMagnitudeSymbols: 5,
756
+ lineBreak: '\n'
757
+ }, ...opt };
758
+ // Run the diff computation immediately
759
+ this.computeDiff();
760
+ }
761
+ /**
762
+ * Splits both input texts into arrays of lines and returns them
763
+ * with the maximum line count.
764
+ *
765
+ * @returns { linesA: string[], linesB: string[], maxLen: number }
766
+ */
767
+ text2lines() {
768
+ // Trim and split the input texts into lines
769
+ const linesA = this.a.trim().split(/\r?\n/);
770
+ const linesB = this.b.trim().split(/\r?\n/);
771
+ return { linesA, linesB, maxLen: Math.max(linesA.length, linesB.length) };
772
+ }
773
+ /**
774
+ * Tokenizes a string according to the current diff mode (line or word).
775
+ *
776
+ * @param {string} input - The string to tokenize
777
+ * @returns {string[]} - Array of tokens
778
+ */
779
+ tokenize(input) {
780
+ const { mode } = this.options;
781
+ switch (mode) {
782
+ // Tokenize by lines
783
+ case 'line': return [input];
784
+ // Tokenize by words
785
+ case 'word': return input.split(/\s+/);
786
+ }
787
+ }
788
+ /**
789
+ * Concatenates an array of tokens back into a string, respecting the diff mode.
790
+ *
791
+ * @param {string[]} input - Array of tokens
792
+ * @returns {string} - Concatenated string
793
+ */
794
+ concat(input) {
795
+ const { mode } = this.options;
796
+ return input.join(mode === 'word' ? ' ' : '');
797
+ }
798
+ /**
799
+ * Computes the diff between the two input texts and populates the
800
+ * entries and grouped arrays.
801
+ */
802
+ computeDiff() {
803
+ if (!this.diffRun) {
804
+ // Get the lines from both texts
805
+ const { linesA, linesB, maxLen } = this.text2lines();
806
+ // Loop through each line and compare them
807
+ for (let i = 0; i < maxLen; i++) {
808
+ const a = linesA[i] || '';
809
+ const b = linesB[i] || '';
810
+ // Perform line diffing
811
+ this.lineDiff(a, b, i);
812
+ }
813
+ // Find groups of adjacent changes
814
+ this.findGroups();
815
+ // Set the diff run flag to true
816
+ this.diffRun = true;
817
+ }
818
+ }
819
+ /**
820
+ * Compares two lines and records their differences at the configured granularity.
821
+ *
822
+ * @param {string} a - Line from the first text
823
+ * @param {string} b - Line from the second text
824
+ * @param {number} line - Line number
825
+ */
826
+ lineDiff(a, b, line) {
827
+ const { mode, caseInsensitive } = this.options;
828
+ const baseLen = Math.max(a.length, b.length);
829
+ let A = a, B = b;
830
+ // If case-insensitive mode is enabled, convert both lines to lowercase
831
+ if (caseInsensitive)
832
+ A = a.toLowerCase(), B = b.toLowerCase();
833
+ let diffs = [];
834
+ let delSize = 0, insSize = 0;
835
+ if (mode === 'line') {
836
+ // For line mode, compare the entire lines directly
837
+ if (A !== B) {
838
+ diffs.push({
839
+ posA: 0, posB: 0,
840
+ del: a, ins: b,
841
+ size: b.length - a.length
842
+ });
843
+ delSize = a.length;
844
+ insSize = b.length;
845
+ }
846
+ }
847
+ else {
848
+ // For word mode, find precise diffs between tokenized lines
849
+ diffs = this.preciseDiff(a, A, b, B);
850
+ // Calculate total sizes of deletions and insertions
851
+ for (const d of diffs)
852
+ delSize += d.del.length, insSize += d.ins.length;
853
+ }
854
+ if (diffs.length) {
855
+ // Add the diff entry for this line
856
+ this.entries.push({
857
+ line, diffs, delSize, insSize, baseLen,
858
+ totalSize: insSize - delSize,
859
+ magnitude: this.magnitude(delSize, insSize, baseLen)
860
+ });
861
+ }
862
+ }
863
+ /**
864
+ * Finds all minimal diff blocks between two tokenized strings,
865
+ * returning original text and positions.
866
+ *
867
+ * @param {string} a - Original line (case preserved)
868
+ * @param {string} A - Original line (possibly lowercased)
869
+ * @param {string} b - Modified line (case preserved)
870
+ * @param {string} B - Modified line (possibly lowercased)
871
+ * @returns {DiffEntry[]} - Array of diff entries for this line
872
+ */
873
+ preciseDiff(a, A, b, B) {
874
+ // Helper function to calculate positions of tokens in the original text
875
+ const posIndex = (t) => t.reduce((p, _, i) => (p.push(i ? p[i - 1] + t[i - 1].length + 1 : 0), p), []);
876
+ // Original and tokenized arrays, their lengths and position arrays
877
+ const origA = this.tokenize(a);
878
+ const origB = this.tokenize(b);
879
+ const tokenA = this.tokenize(A);
880
+ const tokenB = this.tokenize(B);
881
+ const lenA = tokenA.length;
882
+ const lenB = tokenB.length;
883
+ const posArrA = posIndex(origA);
884
+ const posArrB = posIndex(origB);
885
+ // Find all matching blocks (LCS)
886
+ const matches = [];
887
+ let ai = 0, bi = 0;
888
+ while (ai < lenA && bi < lenB) {
889
+ // If tokens match, find the length of the match
890
+ if (tokenA[ai] === tokenB[bi]) {
891
+ let len = 1;
892
+ // Extend the match as long as tokens continue to match
893
+ while (ai + len < lenA && bi + len < lenB &&
894
+ tokenA[ai + len] === tokenB[bi + len])
895
+ len++;
896
+ matches.push({ ai, bi, len });
897
+ ai += len, bi += len;
898
+ }
899
+ else {
900
+ let found = false;
901
+ // Look ahead for next sync point (greedy, but avoids long tails)
902
+ for (let offset = 1; offset <= 3 && !found; offset++) {
903
+ // Check if the next token in A matches the current token in B
904
+ if (ai + offset < lenA && tokenA[ai + offset] === tokenB[bi]) {
905
+ matches.push({ ai: ai + offset, bi, len: 1 });
906
+ ai += offset + 1, bi += 1, found = true;
907
+ }
908
+ // Check if the next token in B matches the current token in A
909
+ else if (bi + offset < lenB && tokenA[ai] === tokenB[bi + offset]) {
910
+ matches.push({ ai, bi: bi + offset, len: 1 });
911
+ ai += 1, bi += offset + 1, found = true;
912
+ }
913
+ }
914
+ // If no match was found, advance both pointers by one
915
+ if (!found)
916
+ ai++, bi++;
917
+ }
918
+ }
919
+ // Walk through tokens and emit diffs between matches
920
+ const diffs = [];
921
+ let i = 0, j = 0;
922
+ for (const m of matches) {
923
+ // If there are unmatched tokens before the match, record them
924
+ if (i < m.ai || j < m.bi) {
925
+ // Slice the original arrays to get the unmatched tokens
926
+ const delArr = origA.slice(i, m.ai);
927
+ const insArr = origB.slice(j, m.bi);
928
+ // Push the diff entry for unmatched tokens
929
+ diffs.push({
930
+ posA: posArrA[i] ?? 0,
931
+ posB: posArrB[j] ?? 0,
932
+ del: this.concat(delArr),
933
+ ins: this.concat(insArr),
934
+ size: insArr.join('').length - delArr.join('').length
935
+ });
936
+ }
937
+ // Advance to after the match
938
+ i = m.ai + m.len, j = m.bi + m.len;
939
+ }
940
+ // Tail diffs after the last match
941
+ if (i < lenA || j < lenB) {
942
+ // Slice the original arrays to get the unmatched tokens
943
+ const delArr = origA.slice(i);
944
+ const insArr = origB.slice(j);
945
+ // Push the diff entry for unmatched tokens at the end
946
+ diffs.push({
947
+ posA: posArrA[i] ?? 0,
948
+ posB: posArrB[j] ?? 0,
949
+ del: this.concat(delArr),
950
+ ins: this.concat(insArr),
951
+ size: insArr.join('').length - delArr.join('').length
952
+ });
953
+ }
954
+ // Remove empty diffs
955
+ return diffs.filter(d => d.del.length > 0 || d.ins.length > 0);
956
+ }
957
+ /**
958
+ * Groups adjacent changed lines together, including context lines,
959
+ * and calculates group metrics.
960
+ */
961
+ findGroups() {
962
+ const { contextLines } = this.options;
963
+ // Helper function to add a group to the grouped array
964
+ const addGroup = (group, start, end) => {
965
+ // Calculate total sizes and base length for the group
966
+ const [delSize, insSize, totalSize, baseLen] = [
967
+ 'delSize', 'insSize', 'totalSize', 'baseLen'
968
+ ].map(k => group.reduce((sum, e) => sum + e[k], 0));
969
+ // Push the group to the grouped array
970
+ this.grouped.push({
971
+ start, end, delSize, insSize, totalSize,
972
+ line: group[0].line, entries: group,
973
+ magnitude: this.magnitude(delSize, insSize, baseLen)
974
+ });
975
+ };
976
+ let group = [];
977
+ let start = 0, end = 0;
978
+ // Iterate through each diff entry to find groups
979
+ for (const entry of this.entries) {
980
+ const s = Math.max(0, entry.line - contextLines);
981
+ const e = entry.line + contextLines;
982
+ // If the group is empty or the current entry is adjacent to the last one
983
+ if (!group.length || s <= end + 1) {
984
+ // If this is the first entry, set the start position
985
+ if (!group.length)
986
+ start = s;
987
+ end = Math.max(end, e);
988
+ group.push(entry);
989
+ }
990
+ else {
991
+ // If the group is not empty, finalize it and start a new one
992
+ addGroup(group, start, end);
993
+ group = [entry], start = s, end = e;
994
+ }
995
+ }
996
+ // If there is a remaining group, finalize it
997
+ if (group.length)
998
+ addGroup(group, start, end);
999
+ }
1000
+ /**
1001
+ * Calculates the change magnitude string for a group or line.
1002
+ *
1003
+ * @param {number} del - Number of deleted characters
1004
+ * @param {number} ins - Number of inserted characters
1005
+ * @param {number} baseLen - Base length for normalization
1006
+ * @returns {string} - Magnitude string (e.g. "++-")
1007
+ */
1008
+ magnitude(del, ins, baseLen) {
1009
+ const { maxMagnitudeSymbols } = this.options;
1010
+ const total = del + ins;
1011
+ // If there are no changes or base length is zero, return empty string
1012
+ if (total === 0 || baseLen === 0)
1013
+ return '';
1014
+ // Calculate the length of the magnitude string based on the full length
1015
+ const magLen = Math.min(maxMagnitudeSymbols, Math.max(Math.round(total / baseLen * maxMagnitudeSymbols), 1));
1016
+ // Calculate the number of plus and minus symbols
1017
+ const plus = Math.round((ins / total) * magLen);
1018
+ const minus = magLen - plus;
1019
+ // Return the magnitude string with plus and minus symbols
1020
+ return '+'.repeat(plus) + '-'.repeat(minus);
1021
+ }
1022
+ /**
1023
+ * Generates a unified diff output as a string, with optional CLI coloring.
1024
+ *
1025
+ * @param {boolean} cli - If true, use CLI colors; otherwise, ASCII markup
1026
+ * @returns {string} - Unified diff output
1027
+ */
1028
+ output(cli) {
1029
+ const { mode, contextLines, groupedLines, expandLines, showChangeMagnitude, lineBreak } = this.options;
1030
+ // Get the lines and maximum length from the input texts
1031
+ const { linesA, linesB, maxLen } = this.text2lines();
1032
+ const linePad = Math.max(4, maxLen.toString().length);
1033
+ // Helper functions for coloring and formatting (ASCII or CLI colored)
1034
+ const highlight = (s, ansi) => cli ? `\x1b[${ansi}m${s}\x1b[0m` : s;
1035
+ const cy = (s) => highlight(s, '36');
1036
+ const gy = (s) => highlight(s, '90');
1037
+ const gn = (s) => highlight(s, '32');
1038
+ const rd = (s) => highlight(s, '31');
1039
+ const ye = (s) => highlight(s, '33');
1040
+ const del = (s) => cli ? `\x1b[37;41m${s}\x1b[31;49m` : `-[${s}]`;
1041
+ const ins = (s) => cli ? `\x1b[37;42m${s}\x1b[32;49m` : `+[${s}]`;
1042
+ // Function to output a block of lines with optional header
1043
+ const block = (start, end, forced, headerEntry) => {
1044
+ // If there is a header entry, output the header
1045
+ if (headerEntry)
1046
+ header(headerEntry);
1047
+ // Loop through the range and output lines
1048
+ for (let i = start; i <= end; i++)
1049
+ line(i, forced ?? i);
1050
+ out.push('');
1051
+ };
1052
+ // Function to output a header for a group or line
1053
+ const header = (e) => {
1054
+ out.push(`${(' '.repeat(linePad))} ${(cy(`@@ -${(e.line + 1)},${e.delSize} +${(e.line + 1)},${e.insSize} @@`))} ${(showChangeMagnitude ? ye(e.magnitude) : '')}`);
1055
+ };
1056
+ // Function to output a single line with optional diff highlighting
1057
+ const line = (i, forced) => {
1058
+ // If the line exists in either text, output it
1059
+ if (linesA[i] || linesB[i]) {
1060
+ // Find the diff entry for this line, if it exists
1061
+ const entry = this.entries.find(e => e.line === i);
1062
+ // Format the line number with padding
1063
+ const lineNo = (i + 1).toString().padStart(linePad, ' ');
1064
+ if (entry && forced === i) {
1065
+ // If there is an entry, output the line with diff highlighting
1066
+ out.push(`${lineNo} ${rd(`- ${mark(linesA[i], entry.diffs, 'del')}`)}`);
1067
+ out.push(`${' '.repeat(linePad)} ${gn(`+ ${mark(linesB[i], entry.diffs, 'ins')}`)}`);
1068
+ }
1069
+ else {
1070
+ // If no entry, just output the line without diff (context lines)
1071
+ out.push(`${lineNo} ${gy(linesA[i])}`);
1072
+ }
1073
+ }
1074
+ };
1075
+ // Function to mark changes in a line based on the diffs
1076
+ const mark = (line, diffs, type) => {
1077
+ // If there are no diffs or the mode is line, return the line as is
1078
+ if (!diffs.length || mode === 'line')
1079
+ return line;
1080
+ let res = '', idx = 0;
1081
+ // Loop through each diff entry and apply the changes
1082
+ for (const d of diffs) {
1083
+ // Get the position and value based on the type
1084
+ const pos = type === 'del' ? d.posA : d.posB;
1085
+ const val = type === 'del' ? d.del : d.ins;
1086
+ // If the value is empty, skip it
1087
+ if (!val)
1088
+ continue;
1089
+ // Add the unchanged part of the line before the change
1090
+ if (pos > idx)
1091
+ res += line.slice(idx, pos);
1092
+ // Add the changed part of the line with appropriate formatting
1093
+ res += (type === 'del' ? del(val) : ins(val));
1094
+ idx = pos + val.length;
1095
+ }
1096
+ // Return the marked line with any remaining unchanged part
1097
+ return res + line.slice(idx);
1098
+ };
1099
+ let out = [''];
1100
+ switch (true) {
1101
+ // For expandLines, output the entire file context
1102
+ case expandLines:
1103
+ block(0, maxLen);
1104
+ break;
1105
+ // For groupedLines, output each group with its start and end
1106
+ case groupedLines:
1107
+ for (const group of this.grouped)
1108
+ block(group.start, group.end, undefined, group);
1109
+ break;
1110
+ // For individual lines, output each entry with context lines
1111
+ default:
1112
+ for (const entry of this.entries)
1113
+ block(entry.line - contextLines, entry.line + contextLines, entry.line, entry);
1114
+ break;
1115
+ }
1116
+ // Output the final diff as a string (ASCII or CLI colored)
1117
+ return out.join(lineBreak);
1118
+ }
1119
+ /**
1120
+ * Returns the structured diff as an array of DiffLine objects.
1121
+ *
1122
+ * @returns {DiffLine[]} - Array of line-level diffs
1123
+ */
1124
+ getStructuredDiff() { return this.entries; }
1125
+ /**
1126
+ * Returns the grouped diff as an array of DiffGroup objects.
1127
+ *
1128
+ * @returns {DiffGroup[]} - Array of grouped diffs
1129
+ */
1130
+ getGroupedDiff() { return this.grouped; }
1131
+ /**
1132
+ * Returns the unified diff as a plain ASCII string.
1133
+ *
1134
+ * @returns {string} - Unified diff (ASCII)
1135
+ */
1136
+ getASCIIDiff() { return this.output(false); }
1137
+ /**
1138
+ * Returns the unified diff as a CLI-colored string.
1139
+ *
1140
+ * @returns {string} - Unified diff (CLI colors)
1141
+ */
1142
+ getCLIDiff() { return this.output(true); }
1143
+ }
1144
+
1145
+ /**
1146
+ * Hash Table Utility
1147
+ * src/utils/HashTable.ts
1148
+ *
1149
+ * @see https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function
1150
+ * @see https://en.wikipedia.org/wiki/Hash_table
1151
+ *
1152
+ * This module implements an instantiable hash table/cache using the FNV-1a hash algorithm.
1153
+ * It allows for multiple independent caches (e.g. for metrics, normalization, etc.) with
1154
+ * type safety and high performance. The FNV-1a algorithm is factored out into its own
1155
+ * static utility class to avoid code duplication and memory overhead.
1156
+ *
1157
+ * The key() method supports any number of string arguments, enabling flexible cache keys
1158
+ * for different use cases (e.g. normalization, metrics, etc.).
1159
+ *
1160
+ * @module Utils/HashTable
1161
+ * @author Paul Köhler (komed3)
1162
+ * @license MIT
1163
+ */
1164
+ /**
1165
+ * Hasher Utility
1166
+ * Static class for FNV-1a hash calculation.
1167
+ */
1168
+ class Hasher {
1169
+ // Constants for the FNV-1a hash algorithm
1170
+ static FNV_PRIME = 0x01000193;
1171
+ static HASH_OFFSET = 0x811c9dc5;
1172
+ /**
1173
+ * Computes a hash value for a given string using the FNV-1a algorithm.
1174
+ * Processes the string in chunks of 4 characters for better performance.
1175
+ *
1176
+ * @param {string} str - The string to hash
1177
+ * @return {number} - The computed hash value as an unsigned 32-bit integer
1178
+ */
1179
+ static fnv1a(str) {
1180
+ const len = str.length;
1181
+ let hash = this.HASH_OFFSET;
1182
+ // Process 4 characters at a time for better performance
1183
+ const chunks = Math.floor(len / 4);
1184
+ for (let i = 0; i < chunks; i++) {
1185
+ const pos = i * 4;
1186
+ // Combine 4 chars into a single number for faster processing
1187
+ const chunk = ((str.charCodeAt(pos)) |
1188
+ (str.charCodeAt(pos + 1) << 8) |
1189
+ (str.charCodeAt(pos + 2) << 16) |
1190
+ (str.charCodeAt(pos + 3) << 24));
1191
+ hash ^= chunk;
1192
+ hash *= this.FNV_PRIME;
1193
+ }
1194
+ // Handle remaining characters
1195
+ const remaining = len % 4;
1196
+ if (remaining > 0) {
1197
+ const pos = chunks * 4;
1198
+ for (let i = 0; i < remaining; i++) {
1199
+ hash ^= str.charCodeAt(pos + i);
1200
+ hash *= this.FNV_PRIME;
1201
+ }
1202
+ }
1203
+ // Final mixing to improve distribution
1204
+ hash ^= hash >>> 16;
1205
+ hash *= 0x85ebca6b;
1206
+ hash ^= hash >>> 13;
1207
+ hash *= 0xc2b2ae35;
1208
+ hash ^= hash >>> 16;
1209
+ // Convert to unsigned 32-bit integer
1210
+ return hash >>> 0;
1211
+ }
1212
+ }
1213
+ /**
1214
+ * HashTable class implements an instantiable hash table/cache.
1215
+ * Allows for multiple independent caches with type safety and high performance.
1216
+ *
1217
+ * @template K - The type of the label for the key (e.g. string, MetricName, …)
1218
+ * @template T - The type of value to be stored in the hash table (e.g. MetricCompute, string, …)
1219
+ */
1220
+ class HashTable {
1221
+ // The max. length of a string to hash, which is set to 2048 characters.
1222
+ static MAX_LEN = 2048;
1223
+ // The max. size of the hash table, which is set to 10,000.
1224
+ static TABLE_SIZE = 10_000;
1225
+ /**
1226
+ * The internal map to store entries.
1227
+ * The key is a string generated from the label and any number of hashed strings.
1228
+ * The value is of type T.
1229
+ */
1230
+ table = new Map();
1231
+ /**
1232
+ * Generates a unique hash key for any number of string arguments.
1233
+ * The key is in the format "label-H1-H2-H3-..."
1234
+ *
1235
+ * @param {K} label - Label for this key (e.g. metric name, normalization flags, …)
1236
+ * @param {string[]} strs - Array of strings to hash (e.g. input, params, …)
1237
+ * @param {boolean} [sorted=false] - Whether to sort the hashes before creating the key
1238
+ * @returns {string|false} - A unique hash key or false if any string is too long
1239
+ */
1240
+ key(label, strs, sorted = false) {
1241
+ // Return false if any string exceeds the maximum length
1242
+ for (const str of strs) {
1243
+ if (str.length > HashTable.MAX_LEN)
1244
+ return false;
1245
+ }
1246
+ // Hash all strings
1247
+ const hashes = strs.map(s => Hasher.fnv1a(s));
1248
+ // Sort them in ascending order
1249
+ if (sorted)
1250
+ hashes.sort();
1251
+ // Build key: label-H1-H2-H3-...
1252
+ return [label, ...hashes].join('-');
1253
+ }
1254
+ /**
1255
+ * Checks if a key exists in the hash table.
1256
+ *
1257
+ * @param {string} key - The key to check
1258
+ * @returns {boolean} - True if the key exists, false otherwise
1259
+ */
1260
+ has(key) { return this.table.has(key); }
1261
+ /**
1262
+ * Retrieves the entry from the hash table by its key.
1263
+ *
1264
+ * @param {string} key - The key to look up
1265
+ * @returns {T|undefined} - The entry if found, undefined otherwise
1266
+ */
1267
+ get(key) { return this.table.get(key); }
1268
+ /**
1269
+ * Adds an entry to the hash table.
1270
+ *
1271
+ * @param {string} key - The hashed key for the entry
1272
+ * @param {T} entry - The entry itself to add
1273
+ * @param {boolean} [update=true] - Whether to update the entry if it already exists
1274
+ * @returns {boolean} - True if added successfully, false if the table is full
1275
+ */
1276
+ set(key, entry, update = true) {
1277
+ // If the table is not full and the key does not exist or update is true, add the entry
1278
+ if (this.table.size < HashTable.TABLE_SIZE && (update || !this.table.has(key))) {
1279
+ this.table.set(key, entry);
1280
+ return true;
1281
+ }
1282
+ return false;
1283
+ }
1284
+ /**
1285
+ * Deletes an entry from the hash table by its key.
1286
+ *
1287
+ * @param {string} key - The key of the entry to delete
1288
+ */
1289
+ delete(key) { this.table.delete(key); }
1290
+ /**
1291
+ * Clears the hash table.
1292
+ * This method removes all entries from the hash table.
1293
+ */
1294
+ clear() { this.table.clear(); }
1295
+ /**
1296
+ * Returns the current size of the hash table.
1297
+ *
1298
+ * @returns {number} - The number of entries in the hash table
1299
+ */
1300
+ size() { return this.table.size; }
1301
+ }
1302
+
1303
+ /**
1304
+ * Normalizer Utility
1305
+ * src/utils/Normalizer.ts
1306
+ *
1307
+ * @see https://en.wikipedia.org/wiki/Text_normalization
1308
+ * @see https://en.wikipedia.org/wiki/Unicode_equivalence
1309
+ *
1310
+ * This module provides a Normalizer class that allows for string normalization based
1311
+ * on various flags. It uses a pipeline of normalization functions that can be reused
1312
+ * and cached for efficiency. The Normalizer can handle both single strings and arrays
1313
+ * of strings, and supports synchronous and asynchronous normalization.
1314
+ *
1315
+ * Supported flags:
1316
+ * 'd' :: Normalize to NFD (Normalization Form Decomposed)
1317
+ * 'u' :: Normalize to NFC (Normalization Form Composed)
1318
+ * 'x' :: Normalize to NFKC (Normalization Form Compatibility Composed)
1319
+ * 'w' :: Collapse whitespace
1320
+ * 't' :: Remove leading and trailing whitespace
1321
+ * 'r' :: Remove double characters
1322
+ * 's' :: Remove punctuation / special characters
1323
+ * 'k' :: Remove non-letter characters
1324
+ * 'n' :: Remove non-number characters
1325
+ * 'i' :: Case insensitive (convert to lowercase)
1326
+ *
1327
+ * @module Utils/Normalizer
1328
+ * @author Paul Köhler (komed3)
1329
+ * @license MIT
1330
+ */
1331
+ /**
1332
+ * The Normalizer class providing methods to normalize strings based on various flags.
1333
+ */
1334
+ class Normalizer {
1335
+ /**
1336
+ * A map that holds normalization functions based on the flags.
1337
+ * This allows for reusing normalization logic without recomputing it.
1338
+ */
1339
+ static pipeline = new Map();
1340
+ /**
1341
+ * A cache to store normalized strings based on the flags and input.
1342
+ * This helps avoid recomputing normalization for the same input and flags.
1343
+ */
1344
+ static cache = new HashTable();
1345
+ /**
1346
+ * Returns a normalization function based on the provided flags.
1347
+ * The flags are a string of characters that define the normalization steps.
1348
+ *
1349
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
1350
+ * @returns {NormalizerFn} - A function that normalizes a string based on the provided flags
1351
+ */
1352
+ static getPipeline(flags) {
1353
+ // Return the cached pipeline if it exists
1354
+ if (Normalizer.pipeline.has(flags))
1355
+ return Normalizer.pipeline.get(flags);
1356
+ // Define the normalization steps based on the flags
1357
+ const steps = [];
1358
+ // Normalize to NFD (Normalization Form Decomposed)
1359
+ if (flags.includes('d'))
1360
+ steps.push(str => str.normalize('NFD'));
1361
+ // Normalize to NFC (Normalization Form Composed)
1362
+ if (flags.includes('u'))
1363
+ steps.push(str => str.normalize('NFC'));
1364
+ // Normalize to NFKC (Normalization Form Compatibility Composed)
1365
+ if (flags.includes('x'))
1366
+ steps.push(str => str.normalize('NFKC'));
1367
+ // Collapse whitespace
1368
+ if (flags.includes('w'))
1369
+ steps.push(str => str.replace(/\s+/g, ' '));
1370
+ // Remove leading and trailing whitespace
1371
+ if (flags.includes('t'))
1372
+ steps.push(str => str.trim());
1373
+ // Remove double characters
1374
+ if (flags.includes('r'))
1375
+ steps.push(str => str.replace(/(.)\1+/g, '$1'));
1376
+ // Remove punctuation / special characters
1377
+ if (flags.includes('s'))
1378
+ steps.push(str => str.replace(/[^\p{L}\p{N}\s]/gu, ''));
1379
+ // Remove non-letter characters
1380
+ if (flags.includes('k'))
1381
+ steps.push(str => str.replace(/[^\p{L}]/gu, ''));
1382
+ // Remove non-number characters
1383
+ if (flags.includes('n'))
1384
+ steps.push(str => str.replace(/\p{N}/gu, ''));
1385
+ // Case insensitive
1386
+ if (flags.includes('i'))
1387
+ steps.push(str => str.toLowerCase());
1388
+ // Build the normalization function from the steps
1389
+ const compiled = (input) => {
1390
+ let res = input;
1391
+ for (const step of steps)
1392
+ res = step(res);
1393
+ return res;
1394
+ };
1395
+ // Cache the compiled function for the given flags
1396
+ Normalizer.pipeline.set(flags, compiled);
1397
+ // Return the compiled normalization function
1398
+ return compiled;
1399
+ }
1400
+ /**
1401
+ * Normalizes the input string or array of strings based on the provided flags.
1402
+ * The flags are a string of characters that define the normalization steps.
1403
+ *
1404
+ * @param {string|string[]} input - The string or array of strings to normalize
1405
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
1406
+ * @returns {string|string[]} - The normalized string(s)
1407
+ */
1408
+ static normalize(input, flags) {
1409
+ // If input is an array, normalize each string in the array
1410
+ if (Array.isArray(input))
1411
+ return input.map(s => Normalizer.normalize(s, flags));
1412
+ // If input or flags are not provided, return the input as is
1413
+ if (!flags || typeof flags !== 'string' || !input)
1414
+ return input;
1415
+ // Generate a cache key based on the flags and input
1416
+ const key = Normalizer.cache.key(flags, [input]);
1417
+ // If the key exists in the cache, return the cached result
1418
+ if (key && Normalizer.cache.has(key))
1419
+ return Normalizer.cache.get(key);
1420
+ // Normalize the input using the pipeline for the given flags
1421
+ const res = Normalizer.getPipeline(flags)(input);
1422
+ // If a key was generated, store the result in the cache
1423
+ if (key)
1424
+ Normalizer.cache.set(key, res);
1425
+ // Return the normalized result
1426
+ return res;
1427
+ }
1428
+ /**
1429
+ * Asynchronously normalizes the input string or array of strings based on the
1430
+ * provided flags. This method is useful for handling large inputs or when
1431
+ * normalization needs to be done in a non-blocking way.
1432
+ *
1433
+ * @param {string|string[]} input - The string or array of strings to normalize
1434
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
1435
+ * @returns {Promise<string|string[]>} - A promise that resolves to the normalized string(s)
1436
+ */
1437
+ static async normalizeAsync(input, flags) {
1438
+ return await (Array.isArray(input)
1439
+ // If input is an array, normalize each string in the array asynchronously
1440
+ ? Promise.all(input.map(s => Normalizer.normalize(s, flags)))
1441
+ // If input is a single string, normalize it asynchronously
1442
+ : Promise.resolve(Normalizer.normalize(input, flags)));
1443
+ }
1444
+ /**
1445
+ * Clears the normalization pipeline and cache.
1446
+ * This is useful for resetting the state of the Normalizer.
1447
+ */
1448
+ static clear() {
1449
+ Normalizer.pipeline.clear();
1450
+ Normalizer.cache.clear();
1451
+ }
1452
+ }
1453
+
1454
+ /**
1455
+ * Filter Utility
1456
+ * src/utils/Filter.ts
1457
+ *
1458
+ * This module provides a Filter class that allows for the management and application of
1459
+ * filters to strings based on hooks. Filters can be added, removed, paused, resumed, and
1460
+ * applied to input strings. Each filter has an id, a function, a priority, and options
1461
+ * for activation and overrideability.
1462
+ *
1463
+ * @module Utils/Filter
1464
+ * @author Paul Köhler (komed3)
1465
+ * @license MIT
1466
+ */
1467
+ /**
1468
+ * The Filter class provides a way to manage and apply filters to strings based on hooks.
1469
+ */
1470
+ class Filter {
1471
+ /**
1472
+ * A static map to hold all filters.
1473
+ * The key is the hook name, and the value is an array of FilterEntry objects.
1474
+ */
1475
+ static filters = new Map();
1476
+ /**
1477
+ * Finds a filter by its hook and id.
1478
+ *
1479
+ * @param {string} hook - The name of the hook
1480
+ * @param {string} id - The id of the filter
1481
+ * @returns {FilterEntry|undefined} - The FilterEntry if found, otherwise undefined
1482
+ */
1483
+ static find(hook, id) {
1484
+ return Filter.filters.get(hook)?.find(f => f.id === id);
1485
+ }
1486
+ /**
1487
+ * Adds a filter to the specified hook.
1488
+ *
1489
+ * @param {string} hook - The name of the hook
1490
+ * @param {string} id - The id of the filter
1491
+ * @param {FilterFn} fn - The filter function
1492
+ * @param {FilterOptions} [opt] - Additional options for the filter
1493
+ * @returns {boolean} - Returns true if the filter was added, false if it was not added due to override restrictions
1494
+ */
1495
+ static add(hook, id, fn, opt = {}) {
1496
+ const { priority = 10, active = true, overrideable = true } = opt;
1497
+ // Check if the filter already exists
1498
+ const filter = Filter.filters.get(hook) ?? [];
1499
+ const index = filter.findIndex(f => f.id === id);
1500
+ // If the filter already exists and is not overrideable, return false
1501
+ if (index >= 0) {
1502
+ const f = filter[index];
1503
+ if (!f.overrideable)
1504
+ return false;
1505
+ filter.splice(index, 1);
1506
+ }
1507
+ // Add the new filter entry
1508
+ filter.push({ id, fn, priority, active, overrideable });
1509
+ // Sort the filters by priority
1510
+ filter.sort((a, b) => a.priority - b.priority);
1511
+ // Update the filters map
1512
+ Filter.filters.set(hook, filter);
1513
+ return true;
1514
+ }
1515
+ /**
1516
+ * Removes a filter by its hook and id.
1517
+ *
1518
+ * @param {string} hook - The name of the hook
1519
+ * @param {string} id - The id of the filter
1520
+ * @returns {boolean} - Returns true if the filter was removed, false if it was not found
1521
+ */
1522
+ static remove(hook, id) {
1523
+ // Get the filter array for the specified hook
1524
+ const filter = Filter.filters.get(hook);
1525
+ // If the filter array does not exist, return false
1526
+ if (!filter)
1527
+ return false;
1528
+ // Find the index of the filter with the specified id
1529
+ const index = filter.findIndex(f => f.id === id);
1530
+ // If the filter is found, remove it and return true
1531
+ if (index >= 0) {
1532
+ filter.splice(index, 1);
1533
+ return true;
1534
+ }
1535
+ return false;
1536
+ }
1537
+ /**
1538
+ * Pauses a filter by its hook and id.
1539
+ *
1540
+ * @param {string} hook - The name of the hook
1541
+ * @param {string} id - The id of the filter
1542
+ * @returns {boolean} - Returns true if the filter was paused, false if it was not found
1543
+ */
1544
+ static pause(hook, id) {
1545
+ // Find the filter entry by hook and id
1546
+ const f = Filter.find(hook, id);
1547
+ if (!f)
1548
+ return false;
1549
+ // Set the active property to false to pause the filter
1550
+ f.active = false;
1551
+ return true;
1552
+ }
1553
+ /**
1554
+ * Resumes a filter by its hook and id.
1555
+ *
1556
+ * @param {string} hook - The name of the hook
1557
+ * @param {string} id - The id of the filter
1558
+ * @returns {boolean} - Returns true if the filter was resumed, false if it was not found
1559
+ */
1560
+ static resume(hook, id) {
1561
+ // Find the filter entry by hook and id
1562
+ const f = Filter.find(hook, id);
1563
+ if (!f)
1564
+ return false;
1565
+ // Set the active property to true to resume the filter
1566
+ f.active = true;
1567
+ return true;
1568
+ }
1569
+ /**
1570
+ * Lists all filters for a given hook.
1571
+ *
1572
+ * @param {string} hook - The name of the hook
1573
+ * @param {boolean} active - If true, only list active filters
1574
+ * @returns {string[]} - An array of filter ids
1575
+ */
1576
+ static list(hook, active = false) {
1577
+ // Get the filter array for the specified hook
1578
+ const filter = Filter.filters.get(hook) ?? [];
1579
+ const list = [];
1580
+ // If active is true, filter the entries based on their active status
1581
+ for (const f of filter)
1582
+ if (!active || f.active)
1583
+ list.push(f.id);
1584
+ return list;
1585
+ }
1586
+ /**
1587
+ * Applies all active filters for a given hook to the input string(s).
1588
+ *
1589
+ * @param {string} hook - The name of the hook
1590
+ * @param {string|string[]} input - The input string(s) to be filtered
1591
+ * @returns {string|string[]} - The filtered string(s)
1592
+ */
1593
+ static apply(hook, input) {
1594
+ // Get the filter array for the specified hook
1595
+ const filter = Filter.filters.get(hook);
1596
+ // If no filters are found for the hook or if no filters are active, return the input unchanged
1597
+ if (!filter || filter.every(f => !f.active))
1598
+ return input;
1599
+ // Apply each active filter function to the given string
1600
+ const applyOne = (s) => {
1601
+ for (const f of filter)
1602
+ if (f.active)
1603
+ s = f.fn(s);
1604
+ return s;
1605
+ };
1606
+ // If the input is an array, apply the filter to each element, otherwise just once
1607
+ return Array.isArray(input) ? input.map(applyOne) : applyOne(input);
1608
+ }
1609
+ /**
1610
+ * Applies all active filters for a given hook to the input string(s) asynchronously.
1611
+ * Each filter function may return a Promise or a plain string; all are awaited in order.
1612
+ *
1613
+ * @param {string} hook - The name of the hook
1614
+ * @param {string|string[]} input - The input string(s) to be filtered
1615
+ * @returns {Promise<string|string[]>} - The filtered string(s)
1616
+ */
1617
+ static async applyAsync(hook, input) {
1618
+ // Get the filter array for the specified hook
1619
+ const filter = Filter.filters.get(hook);
1620
+ // If no filters are found for the hook or if no filters are active, return the input unchanged
1621
+ if (!filter || filter.every(f => !f.active))
1622
+ return input;
1623
+ // Apply each active filter function to the given string
1624
+ // Support both sync and async filter functions
1625
+ const applyOne = async (s) => {
1626
+ for (const f of filter)
1627
+ if (f.active)
1628
+ s = await Promise.resolve(f.fn(s));
1629
+ return s;
1630
+ };
1631
+ // If the input is an array, apply the filter to each element, otherwise just once
1632
+ // Use Promise.all to handle multiple promises if input is an array
1633
+ return Array.isArray(input) ? Promise.all(input.map(applyOne)) : applyOne(input);
1634
+ }
1635
+ /**
1636
+ * Clears all filters or filters for a specific hook.
1637
+ *
1638
+ * @param {string} [hook] - Optional name of the hook to clear filters for
1639
+ */
1640
+ static clear(hook) {
1641
+ // If a specific hook is provided, delete its filters
1642
+ if (hook)
1643
+ Filter.filters.delete(hook);
1644
+ // If no hook is provided, clear all filters
1645
+ else
1646
+ Filter.filters.clear();
1647
+ }
1648
+ }
1649
+
1650
+ /**
1651
+ * Registry Utility
1652
+ * src/utils/Registry.ts
1653
+ *
1654
+ * This module provides a Registry function that allows for registering,
1655
+ * removing, checking, getting, and listing class constructors.
1656
+ *
1657
+ * It is designed to manage class extensions, ensuring that all registered
1658
+ * classes extend a specified base constructor.
1659
+ *
1660
+ * @module Utils/Registry
1661
+ * @author Paul Köhler (komed3)
1662
+ * @license MIT
1663
+ */
1664
+ /**
1665
+ * Global registry object to hold multiple registries.
1666
+ * Each registry is keyed by a string identifier.
1667
+ *
1668
+ * @type {Record<string, RegistryService<any>>}
1669
+ */
1670
+ const registry = Object.create(null);
1671
+ /**
1672
+ * Factory object to hold factory functions for creating instances.
1673
+ * This is used to create instances of registered classes.
1674
+ *
1675
+ * @type {Record<string, ( cls: string, ...args: any[] ) => InstanceType<any>>}
1676
+ */
1677
+ const factory = Object.create(null);
1678
+ /**
1679
+ * Registry function to create a service for managing class constructors.
1680
+ *
1681
+ * @param {string} reg - The name of the registry
1682
+ * @param {RegistryConstructor<T>} ctor - The base constructor that all registered classes must extend
1683
+ * @returns {RegistryService<T>} - An object with methods to register, remove, check, get, and list classes
1684
+ * @throws {Error} If the registry already exists (overwriting is forbidden)
1685
+ */
1686
+ function Registry(reg, ctor) {
1687
+ // Throws an error if the registry already exists
1688
+ if (reg in registry || reg in factory)
1689
+ throw new Error(`registry <${reg}> already exists / overwriting is forbidden`);
1690
+ // Create a registry object to hold class constructors
1691
+ const classes = Object.create(null);
1692
+ const service = {
1693
+ /**
1694
+ * Register a new extension of the base class.
1695
+ *
1696
+ * @param {string} name - The name of the class to register
1697
+ * @param {RegistryConstructor<T>} cls - The class constructor
1698
+ * @param {boolean} [update=false] - Whether to allow overwriting an existing entry
1699
+ * @throws {TypeError} If the class does not extend the base constructor
1700
+ * @throws {Error} If the class name already exists and update is false
1701
+ */
1702
+ add(name, cls, update = false) {
1703
+ if (!(cls.prototype instanceof ctor))
1704
+ throw new TypeError(`class must extend <${reg}>`);
1705
+ if (!update && name in classes)
1706
+ throw new Error(`entry <${name}> already exists / use <update=true> to overwrite`);
1707
+ classes[name] = cls;
1708
+ },
1709
+ /**
1710
+ * Remove a class from the registry.
1711
+ *
1712
+ * @param {string} name - The name of the class to remove
1713
+ */
1714
+ remove(name) { delete classes[name]; },
1715
+ /**
1716
+ * Check if a class is registered.
1717
+ *
1718
+ * @param {string} name - The name of the class to check
1719
+ * @returns {boolean} - True if the class is registered, false otherwise
1720
+ */
1721
+ has(name) { return name in classes; },
1722
+ /**
1723
+ * List all registered class names.
1724
+ *
1725
+ * @returns {string[]} - An array of registered class names
1726
+ */
1727
+ list() { return Object.keys(classes); },
1728
+ /**
1729
+ * Get a registered class by name.
1730
+ *
1731
+ * @param {string} name - The name of the class to retrieve
1732
+ * @returns {RegistryConstructor<T>} - The class constructor
1733
+ * @throws {Error} If the class is not registered
1734
+ */
1735
+ get(name) {
1736
+ if (!(name in classes))
1737
+ throw new Error(`class <${name}> not registered for <${reg}>`);
1738
+ return classes[name];
1739
+ }
1740
+ };
1741
+ // Register the service in the global registry
1742
+ registry[reg] = service;
1743
+ // Create a factory function for creating instances from the registry
1744
+ factory[reg] = (cls, ...args) => (createFromRegistry(reg, cls, ...args));
1745
+ // Return the service object
1746
+ return service;
1747
+ }
1748
+ /**
1749
+ * Resolve a class constructor from a specific registry.
1750
+ *
1751
+ * @param {string} reg - The name of the registry
1752
+ * @param {T|string} cls - The class itself or name of the class to resolve
1753
+ * @returns {T|undefined} - The class constructor if found, otherwise undefined
1754
+ * @throws {ReferenceError} If the registry does not exist
1755
+ */
1756
+ function resolveCls(reg, cls) {
1757
+ if (!(reg in registry))
1758
+ throw new ReferenceError(`registry <${reg}> does not exist`);
1759
+ return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
1760
+ }
1761
+ /**
1762
+ * Create an instance of a class from a specific registry.
1763
+ *
1764
+ * @param {string} reg - The name of the registry
1765
+ * @param {T|string} cls - The class itself or name of the class to instantiate
1766
+ * @param {...any} args - Arguments to pass to the class constructor
1767
+ * @returns {T} - An instance of the class
1768
+ * @throws {Error} If the class cannot be instantiated
1769
+ */
1770
+ function createFromRegistry(reg, cls, ...args) {
1771
+ cls = resolveCls(reg, cls);
1772
+ try {
1773
+ return new cls(...args);
1774
+ }
1775
+ catch (err) {
1776
+ throw new Error(`cannot instantiate class <${cls}>`);
1777
+ }
1778
+ }
1779
+
1780
+ /**
1781
+ * Abstract Metric
1782
+ * src/metric/Metric.ts
1783
+ *
1784
+ * This module defines an abstract class for string metrics, providing a framework for
1785
+ * computing various string similarity metrics. It includes methods for running metrics
1786
+ * in different modes (single, batch, pairwise) synchronous or asynchronous and caching
1787
+ * results to optimize performance. The class is designed to be extended by specific
1788
+ * metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
1789
+ *
1790
+ * It provides:
1791
+ * - A base class for string metrics with common functionality
1792
+ * - Methods for running metrics in different modes
1793
+ * - Pre-computation for trivial cases to optimize performance
1794
+ * - Caching of metric computations to avoid redundant calculations
1795
+ * - Support for symmetrical metrics (same result for inputs in any order)
1796
+ * - Performance tracking capabilities (Profiler)
1797
+ * - Asynchronous execution support for metrics
1798
+ *
1799
+ * This class is intended to be extended by specific metric implementations that will
1800
+ * implement the `compute` method to define the specific metric computation logic.
1801
+ *
1802
+ * @module Metric
1803
+ * @author Paul Köhler (komed3)
1804
+ * @license MIT
1805
+ */
1806
+ // Get the singleton profiler instance for performance monitoring
1807
+ const profiler$2 = Profiler.getInstance();
1808
+ /**
1809
+ * Abstract class representing a generic string metric.
1810
+ *
1811
+ * @abstract
1812
+ * @template R - The type of the raw result, defaulting to `MetricRaw`.
1813
+ */
1814
+ class Metric {
1815
+ // Cache for metric computations to avoid redundant calculations
1816
+ static cache = new HashTable();
1817
+ // Metric name for identification
1818
+ metric;
1819
+ // Inputs for the metric computation, transformed into arrays
1820
+ a;
1821
+ b;
1822
+ // Store original inputs for result mapping
1823
+ origA = [];
1824
+ origB = [];
1825
+ // Options for the metric computation, such as performance tracking
1826
+ options;
1827
+ // Indicates whether the metric is symmetric (same result for inputs in any order)
1828
+ symmetric;
1829
+ /**
1830
+ * Result of the metric computation, which can be a single result or an array of results.
1831
+ * This will be populated after running the metric.
1832
+ */
1833
+ results;
1834
+ /**
1835
+ * Static method to clear the cache of metric computations.
1836
+ */
1837
+ static clear() { this.cache.clear(); }
1838
+ /**
1839
+ * Swaps two strings and their lengths if the first is longer than the second.
1840
+ *
1841
+ * @param {string} a - First string
1842
+ * @param {string} b - Second string
1843
+ * @param {number} m - Length of the first string
1844
+ * @param {number} n - Length of the second string
1845
+ * @returns {[string, string, number, number]} - Swapped strings and lengths
1846
+ */
1847
+ static swap(a, b, m, n) { return m > n ? [b, a, n, m] : [a, b, m, n]; }
1848
+ /**
1849
+ * Clamps the similarity result between 0 and 1.
1850
+ *
1851
+ * @param {number} res - The input similarity to clamp
1852
+ * @returns {number} - The clamped similarity (0 to 1)
1853
+ */
1854
+ static clamp(res) { return Math.max(0, Math.min(1, res)); }
1855
+ /**
1856
+ * Constructor for the Metric class.
1857
+ * Initializes the metric with two inputs (strings or arrays of strings) and options.
1858
+ *
1859
+ * @param {string} metric - The name of the metric (e.g. 'levenshtein')
1860
+ * @param {MetricInput} a - First input string or array of strings
1861
+ * @param {MetricInput} b - Second input string or array of strings
1862
+ * @param {MetricOptions} [opt] - Options for the metric computation
1863
+ * @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
1864
+ * @throws {Error} - If inputs `a` or `b` are empty
1865
+ */
1866
+ constructor(metric, a, b, opt = {}, symmetric = false) {
1867
+ // Set the metric name
1868
+ this.metric = metric;
1869
+ // Set the inputs
1870
+ this.a = Array.isArray(a) ? a : [a];
1871
+ this.b = Array.isArray(b) ? b : [b];
1872
+ // Validate inputs: ensure they are not empty
1873
+ if (this.a.length === 0 || this.b.length === 0)
1874
+ throw new Error(`inputs <a> and <b> must not be empty`);
1875
+ // Set options
1876
+ this.options = opt;
1877
+ this.symmetric = symmetric;
1878
+ }
1879
+ /**
1880
+ * Pre-compute the metric for two strings.
1881
+ * This method is called before the actual computation to handle trivial cases.
1882
+ *
1883
+ * @param {string} a - First string
1884
+ * @param {string} b - Second string
1885
+ * @param {number} m - Length of the first string
1886
+ * @param {number} n - Length of the second string
1887
+ * @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
1888
+ */
1889
+ preCompute(a, b, m, n) {
1890
+ // If strings are identical, return a similarity of 1
1891
+ if (a === b)
1892
+ return { res: 1 };
1893
+ // If the lengths of both strings is less than 2, return a similarity of 0
1894
+ if (m == 0 || n == 0 || (m < 2 && n < 2))
1895
+ return { res: 0 };
1896
+ return undefined;
1897
+ }
1898
+ /**
1899
+ * Abstract method to be implemented by subclasses to perform the metric computation.
1900
+ * This method should contain the logic for computing the metric between two strings.
1901
+ *
1902
+ * @param {string} a - First string
1903
+ * @param {string} b - Second string
1904
+ * @param {number} m - Length of the first string
1905
+ * @param {number} n - Length of the second string
1906
+ * @param {number} maxLen - Maximum length of the strings
1907
+ * @returns {MetricCompute<R>} - The result of the metric computation
1908
+ * @throws {Error} - If not overridden in a subclass
1909
+ */
1910
+ compute(a, b, m, n, maxLen) {
1911
+ throw new Error(`method compute() must be overridden in a subclass`);
1912
+ }
1913
+ /**
1914
+ * Run the metric computation for single inputs (two strings).
1915
+ * Applies preCompute for trivial cases before cache lookup and computation.
1916
+ *
1917
+ * If the profiler is active, it will measure time and memory usage.
1918
+ *
1919
+ * @param {number} i - Pointer to the first string
1920
+ * @param {number} j - Pointer to the second string
1921
+ * @returns {MetricResultSingle<R>} - The result of the metric computation
1922
+ */
1923
+ runSingle(i, j) {
1924
+ // Type safety: convert inputs to strings
1925
+ let a = String(this.a[i]), A = a;
1926
+ let b = String(this.b[j]), B = b;
1927
+ // Get lengths
1928
+ let m = A.length, n = B.length;
1929
+ // Pre-compute trivial cases (identical, empty, etc.)
1930
+ let result = this.preCompute(A, B, m, n);
1931
+ if (!result) {
1932
+ // If the profiler is enabled, measure; else, just run
1933
+ result = profiler$2.run(() => {
1934
+ // Generate a cache key based on the metric and pair of strings `a` and `b`
1935
+ const key = Metric.cache.key(this.metric, [A, B], this.symmetric);
1936
+ // If the key exists in the cache, return the cached result
1937
+ // Otherwise, compute the metric using the algorithm
1938
+ return Metric.cache.get(key || '') ?? (() => {
1939
+ // If the metric is symmetrical, swap `a` and `b` (shorter string first)
1940
+ if (this.symmetric)
1941
+ [A, B, m, n] = Metric.swap(A, B, m, n);
1942
+ // Compute the similarity using the algorithm
1943
+ const res = this.compute(A, B, m, n, Math.max(m, n));
1944
+ // If a key was generated, store the result in the cache
1945
+ if (key)
1946
+ Metric.cache.set(key, res);
1947
+ return res;
1948
+ })();
1949
+ });
1950
+ }
1951
+ // Build metric result object
1952
+ return {
1953
+ metric: this.metric,
1954
+ a: this.origA[i] ?? a,
1955
+ b: this.origB[j] ?? b,
1956
+ ...result
1957
+ };
1958
+ }
1959
+ /**
1960
+ * Run the metric computation for single inputs (two strings) asynchronously.
1961
+ *
1962
+ * @param {number} i - Pointer to the first string
1963
+ * @param {number} j - Pointer to the second string
1964
+ * @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
1965
+ */
1966
+ async runSingleAsync(i, j) {
1967
+ return Promise.resolve(this.runSingle(i, j));
1968
+ }
1969
+ /**
1970
+ * Run the metric computation for batch inputs (arrays of strings).
1971
+ *
1972
+ * It iterates through each string in the first array and computes the metric
1973
+ * against each string in the second array.
1974
+ */
1975
+ runBatch() {
1976
+ const results = [];
1977
+ // Loop through each combination of strings in a[] and b[]
1978
+ for (let i = 0; i < this.a.length; i++)
1979
+ for (let j = 0; j < this.b.length; j++)
1980
+ results.push(this.runSingle(i, j));
1981
+ // Populate the results
1982
+ // `this.results` will be an array of MetricResultSingle
1983
+ this.results = results;
1984
+ }
1985
+ /**
1986
+ * Run the metric computation for batch inputs (arrays of strings) asynchronously.
1987
+ */
1988
+ async runBatchAsync() {
1989
+ const results = [];
1990
+ // Loop through each combination of strings in a[] and b[]
1991
+ for (let i = 0; i < this.a.length; i++)
1992
+ for (let j = 0; j < this.b.length; j++)
1993
+ results.push(await this.runSingleAsync(i, j));
1994
+ // Populate the results
1995
+ // `this.results` will be an array of MetricResultSingle
1996
+ this.results = results;
1997
+ }
1998
+ /**
1999
+ * Run the metric computation for pairwise inputs (A[i] vs B[i]).
2000
+ *
2001
+ * This method assumes that both `a` and `b` are arrays of equal length
2002
+ * and computes the metric only for corresponding index pairs.
2003
+ */
2004
+ runPairwise() {
2005
+ const results = [];
2006
+ // Compute metric for each corresponding pair
2007
+ for (let i = 0; i < this.a.length; i++)
2008
+ results.push(this.runSingle(i, i));
2009
+ // Populate the results
2010
+ // `this.results` will be an array of MetricResultSingle
2011
+ this.results = results;
2012
+ }
2013
+ /**
2014
+ * Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
2015
+ */
2016
+ async runPairwiseAsync() {
2017
+ const results = [];
2018
+ // Compute metric for each corresponding pair
2019
+ for (let i = 0; i < this.a.length; i++)
2020
+ results.push(await this.runSingleAsync(i, i));
2021
+ // Populate the results
2022
+ // `this.results` will be an array of MetricResultSingle
2023
+ this.results = results;
2024
+ }
2025
+ /**
2026
+ * Set the original inputs to which the results of the metric calculation will refer.
2027
+ *
2028
+ * @param {MetricInput} [a] - original input(s) for a
2029
+ * @param {MetricInput} [b] - original input(s) for b
2030
+ */
2031
+ setOriginal(a, b) {
2032
+ if (a)
2033
+ this.origA = Array.isArray(a) ? a : [a];
2034
+ if (b)
2035
+ this.origB = Array.isArray(b) ? b : [b];
2036
+ return this;
2037
+ }
2038
+ /**
2039
+ * Check if the inputs are in batch mode.
2040
+ *
2041
+ * This method checks if either `a` or `b` contains more than one string,
2042
+ * indicating that the metric is being run in batch mode.
2043
+ *
2044
+ * @returns {boolean} - True if either input is an array with more than one element
2045
+ */
2046
+ isBatch() { return this.a.length > 1 || this.b.length > 1; }
2047
+ /**
2048
+ * Check if the inputs are in single mode.
2049
+ *
2050
+ * This method checks if both `a` and `b` are single strings (not arrays),
2051
+ * indicating that the metric is being run on a single pair of strings.
2052
+ *
2053
+ * @returns {boolean} - True if both inputs are single strings
2054
+ */
2055
+ isSingle() { return !this.isBatch(); }
2056
+ /**
2057
+ * Check if the inputs are in pairwise mode.
2058
+ *
2059
+ * This method checks if both `a` and `b` are arrays of the same length,
2060
+ * indicating that the metric is being run on corresponding pairs of strings.
2061
+ *
2062
+ * @returns {boolean} - True if both inputs are arrays of equal length
2063
+ * @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
2064
+ * @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
2065
+ */
2066
+ isPairwise(safe = false) {
2067
+ return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
2068
+ throw new Error(`mode <pairwise> requires arrays of equal length`);
2069
+ })();
2070
+ }
2071
+ /**
2072
+ * Check if the metric is symmetrical.
2073
+ *
2074
+ * This method returns whether the metric is symmetric, meaning it produces the same
2075
+ * result regardless of the order of inputs (e.g., Levenshtein distance).
2076
+ *
2077
+ * @returns {boolean} - True if the metric is symmetric
2078
+ */
2079
+ isSymmetrical() { return this.symmetric; }
2080
+ /**
2081
+ * Determine which mode to run the metric in.
2082
+ *
2083
+ * This method checks the provided mode or defaults to the mode specified in options.
2084
+ * If no mode is specified, it defaults to 'default'.
2085
+ *
2086
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
2087
+ * @returns {MetricMode} - The determined mode
2088
+ */
2089
+ whichMode(mode) { return mode ?? this.options?.mode ?? 'default'; }
2090
+ /**
2091
+ * Clear the cached results of the metric.
2092
+ *
2093
+ * This method resets the `results` property to `undefined`, effectively clearing
2094
+ * any previously computed results. It can be useful for re-running the metric
2095
+ * with new inputs or options.
2096
+ */
2097
+ clear() { this.results = undefined; }
2098
+ /**
2099
+ * Run the metric computation based on the specified mode.
2100
+ *
2101
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
2102
+ * @param {boolean} [clear=true] - Whether to clear previous results before running
2103
+ * @throws {Error} - If an unsupported mode is specified
2104
+ */
2105
+ run(mode, clear = true) {
2106
+ // Clear previous results if requested
2107
+ if (clear)
2108
+ this.clear();
2109
+ switch (this.whichMode(mode)) {
2110
+ // Default mode runs the metric on single inputs or falls back to batch mode
2111
+ case 'default': if (this.isSingle()) {
2112
+ this.results = this.runSingle(0, 0);
2113
+ break;
2114
+ }
2115
+ // Batch mode runs the metric on all combinations of a[] and b[]
2116
+ case 'batch':
2117
+ this.runBatch();
2118
+ break;
2119
+ // Single mode runs the metric on the first elements of a[] and b[]
2120
+ case 'single':
2121
+ this.results = this.runSingle(0, 0);
2122
+ break;
2123
+ // Pairwise mode runs the metric on corresponding pairs of a[] and b[]
2124
+ case 'pairwise':
2125
+ if (this.isPairwise())
2126
+ this.runPairwise();
2127
+ break;
2128
+ // Unsupported mode
2129
+ default: throw new Error(`unsupported mode <${mode}>`);
2130
+ }
2131
+ }
2132
+ /**
2133
+ * Run the metric computation based on the specified mode asynchronously.
2134
+ *
2135
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
2136
+ * @param {boolean} [clear=true] - Whether to clear previous results before running
2137
+ * @returns {Promise<void>} - A promise that resolves when the metric computation is complete
2138
+ * @throws {Error} - If an unsupported mode is specified
2139
+ */
2140
+ async runAsync(mode, clear = true) {
2141
+ // Clear previous results if requested
2142
+ if (clear)
2143
+ this.clear();
2144
+ switch (this.whichMode(mode)) {
2145
+ // Default mode runs the metric on single inputs or falls back to batch mode
2146
+ case 'default': if (this.isSingle()) {
2147
+ this.results = await this.runSingleAsync(0, 0);
2148
+ break;
2149
+ }
2150
+ // Batch mode runs the metric on all combinations of a[] and b[]
2151
+ case 'batch':
2152
+ await this.runBatchAsync();
2153
+ break;
2154
+ // Single mode runs the metric on the first elements of a[] and b[]
2155
+ case 'single':
2156
+ this.results = await this.runSingleAsync(0, 0);
2157
+ break;
2158
+ // Pairwise mode runs the metric on corresponding pairs of a[] and b[]
2159
+ case 'pairwise':
2160
+ if (this.isPairwise())
2161
+ await this.runPairwiseAsync();
2162
+ break;
2163
+ // Unsupported mode
2164
+ default: throw new Error(`unsupported async mode <${mode}>`);
2165
+ }
2166
+ }
2167
+ /**
2168
+ * Get the name of the metric.
2169
+ *
2170
+ * @returns {string} - The name of the metric
2171
+ */
2172
+ getMetricName() { return this.metric; }
2173
+ /**
2174
+ * Get the result of the metric computation.
2175
+ *
2176
+ * @returns {MetricResult<R>} - The result of the metric computation
2177
+ * @throws {Error} - If `run()` has not been called before this method
2178
+ */
2179
+ getResults() {
2180
+ // Ensure that the metric has been run before getting the result
2181
+ if (this.results === undefined)
2182
+ throw new Error(`run() must be called before getResult()`);
2183
+ // Return the result(s)
2184
+ return this.results;
2185
+ }
2186
+ }
2187
+ /**
2188
+ * Metric registry service for managing metric implementations.
2189
+ *
2190
+ * This registry allows for dynamic registration and retrieval of metric classes,
2191
+ * enabling the use of various string similarity metrics in a consistent manner.
2192
+ */
2193
+ const MetricRegistry = Registry('metric', Metric);
2194
+
2195
+ /**
2196
+ * Pool Utility
2197
+ * src/utils/Pool.ts
2198
+ *
2199
+ * @see https://en.wikipedia.org/wiki/Circular_buffer
2200
+ *
2201
+ * The Pool class provides a simple and efficient buffer pool for dynamic programming
2202
+ * algorithms that require temporary arrays (such as Levenshtein, LCS, etc.).
2203
+ * By reusing pre-allocated typed arrays, it reduces memory allocations and garbage
2204
+ * collection overhead, especially for repeated or batch computations.
2205
+ *
2206
+ * It supports different types of buffers (Uint16Array, number[], Set, Map) and allows
2207
+ * for acquiring buffers of specific sizes while managing a maximum pool size.
2208
+ *
2209
+ * @module Utils/Pool
2210
+ * @author Paul Köhler (komed3)
2211
+ * @license MIT
2212
+ */
2213
+ /**
2214
+ * RingPool is a circular buffer implementation that manages a pool of buffers.
2215
+ *
2216
+ * It allows for efficient acquisition and release of buffers, ensuring that
2217
+ * buffers are reused without unnecessary allocations.
2218
+ *
2219
+ * @template T - The type of buffers managed by the pool
2220
+ */
2221
+ class RingPool {
2222
+ maxSize;
2223
+ // The buffers in the pool
2224
+ buffers = [];
2225
+ // The current pointer for acquiring buffers
2226
+ pointer = 0;
2227
+ /**
2228
+ * Creates a new RingPool with a specified maximum size.
2229
+ *
2230
+ * @param {number} maxSize - The maximum number of buffers that can be stored in the pool
2231
+ */
2232
+ constructor(maxSize) {
2233
+ this.maxSize = maxSize;
2234
+ }
2235
+ /**
2236
+ * Acquires a buffer of at least the specified minimum size from the pool.
2237
+ *
2238
+ * @param {number} minSize - The minimum size of the buffer to acquire
2239
+ * @param {boolean} allowOversize - Whether to allow buffers larger than minSize
2240
+ * @return {PoolBuffer<T>|null} - The acquired buffer or null if no suitable buffer is found
2241
+ */
2242
+ acquire(minSize, allowOversize) {
2243
+ const len = this.buffers.length;
2244
+ // Iterate through the buffers in the pool
2245
+ for (let i = 0; i < len; i++) {
2246
+ const idx = (this.pointer + i) % len;
2247
+ const item = this.buffers[idx];
2248
+ // Check if the item size is greater than or equal to the minimum size
2249
+ if (item.size >= minSize) {
2250
+ // Set the pointer to the next position
2251
+ this.pointer = (idx + 1) % len;
2252
+ // If the item size is equal to minSize or oversize is allowed, return the item
2253
+ return allowOversize || item.size === minSize ? item : null;
2254
+ }
2255
+ }
2256
+ // If no suitable buffer is found, return null
2257
+ return null;
2258
+ }
2259
+ /**
2260
+ * Releases a buffer back to the pool.
2261
+ * If the pool is full, it replaces the oldest buffer with the new one.
2262
+ *
2263
+ * @param {PoolBuffer<T>} item - The buffer to release back to the pool
2264
+ */
2265
+ release(item) {
2266
+ if (this.buffers.length < this.maxSize) {
2267
+ // If the pool is not full, simply add the item
2268
+ this.buffers.push(item);
2269
+ }
2270
+ else {
2271
+ // If the pool is full, replace the oldest buffer
2272
+ this.buffers[this.pointer] = item;
2273
+ this.pointer = (this.pointer + 1) % this.maxSize;
2274
+ }
2275
+ }
2276
+ /**
2277
+ * Clears the pool, removing all buffers.
2278
+ * This resets the pointer and empties the buffer list.
2279
+ */
2280
+ clear() {
2281
+ this.buffers = [];
2282
+ this.pointer = 0;
2283
+ }
2284
+ }
2285
+ /**
2286
+ * The Pool class provides a buffer pool for dynamic programming algorithms.
2287
+ *
2288
+ * It allows for efficient reuse of buffers (Uint16Array, number[], Set, Map)
2289
+ * to reduce memory allocations and garbage collection overhead.
2290
+ */
2291
+ class Pool {
2292
+ // Pool Types
2293
+ static CONFIG = {
2294
+ 'uint16': { type: 'uint16', maxSize: 32, maxItemSize: 2048, allowOversize: true },
2295
+ 'number[]': { type: 'number[]', maxSize: 16, maxItemSize: 1024, allowOversize: false },
2296
+ 'set': { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
2297
+ 'map': { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
2298
+ };
2299
+ // Pool Rings for each type
2300
+ static POOLS = {
2301
+ 'uint16': new RingPool(32),
2302
+ 'number[]': new RingPool(16),
2303
+ 'set': new RingPool(8),
2304
+ 'map': new RingPool(8)
2305
+ };
2306
+ /**
2307
+ * Allocates a new buffer of the specified type and size.
2308
+ *
2309
+ * @param {PoolType} type - The type of buffer to allocate
2310
+ * @param {number} size - The size of the buffer to allocate
2311
+ * @return {any} - The newly allocated buffer
2312
+ */
2313
+ static allocate(type, size) {
2314
+ switch (type) {
2315
+ case 'uint16': return new Uint16Array(size);
2316
+ case 'number[]': return new Array(size).fill(0);
2317
+ case 'set': return new Set();
2318
+ case 'map': return new Map();
2319
+ }
2320
+ }
2321
+ /**
2322
+ * Acquires a buffer of the specified type and size from the pool.
2323
+ * If no suitable buffer is available, it allocates a new one.
2324
+ *
2325
+ * @param {PoolType} type - The type of buffer to acquire (e.g., 'uint16', 'number[]', 'set', 'map')
2326
+ * @param {number} size - The size of the buffer to acquire
2327
+ * @return {T} - The acquired buffer of the specified type
2328
+ */
2329
+ static acquire(type, size) {
2330
+ // Get the configuration for the specified type
2331
+ const CONFIG = this.CONFIG[type];
2332
+ // If the requested size exceeds the maximum item size, allocate a new buffer
2333
+ if (size > CONFIG.maxItemSize)
2334
+ return this.allocate(type, size);
2335
+ // Try to acquire a buffer from the pool ring
2336
+ // If a suitable buffer is found, return it (subarray for uint16)
2337
+ const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
2338
+ if (item) {
2339
+ // If the type is 'uint16', return a subarray of the buffer
2340
+ return type === 'uint16' ? item.buffer.subarray(0, size) : item.buffer;
2341
+ }
2342
+ // If no suitable buffer is found, allocate a new one
2343
+ return this.allocate(type, size);
2344
+ }
2345
+ /**
2346
+ * Acquires multiple buffers of the specified type and sizes from the pool.
2347
+ *
2348
+ * @param {PoolType} type - The type of buffers to acquire
2349
+ * @param {number[]} sizes - An array of sizes for each buffer to acquire
2350
+ * @return {T[]} - An array of acquired buffers of the specified type
2351
+ */
2352
+ static acquireMany(type, sizes) {
2353
+ return sizes.map(size => this.acquire(type, size));
2354
+ }
2355
+ /**
2356
+ * Releases a buffer back to the pool.
2357
+ * If the size of the buffer is larger than the maximum item size, it will not be released.
2358
+ *
2359
+ * @param {PoolType} type - The type of buffer to release
2360
+ * @param {T} buffer - The buffer to release
2361
+ * @param {number} size - The size of the buffer
2362
+ */
2363
+ static release(type, buffer, size) {
2364
+ // Get the configuration for the specified type
2365
+ const CONFIG = this.CONFIG[type];
2366
+ // If the size of the buffer is less than or equal to the maximum item size, release it
2367
+ if (size <= CONFIG.maxItemSize) {
2368
+ // Release the buffer back to the pool ring
2369
+ this.POOLS[type].release({ buffer, size });
2370
+ }
2371
+ }
2372
+ }
2373
+
2374
+ /**
2375
+ * Cosine Similarity
2376
+ * src/metric/Cosine.ts
2377
+ *
2378
+ * @see https://en.wikipedia.org/wiki/Cosine_similarity
2379
+ *
2380
+ * Cosine similarity is a metric used to measure how similar two vectors are, regardless
2381
+ * of their magnitude. In text analysis, it is commonly used to compare documents or
2382
+ * strings by representing them as term frequency vectors and computing the cosine of
2383
+ * the angle between these vectors.
2384
+ *
2385
+ * The result is a value between 0 and 1, where 1 means the vectors are identical and
2386
+ * 0 means they are orthogonal (no similarity).
2387
+ *
2388
+ * @module Metric/CosineSimilarity
2389
+ * @author Paul Köhler (komed3)
2390
+ * @license MIT
2391
+ */
2392
+ /**
2393
+ * CosineSimilarity class extends the Metric class to implement the Cosine similarity algorithm.
2394
+ */
2395
+ class CosineSimilarity extends Metric {
2396
+ /**
2397
+ * Constructor for the CosineSimilarity class.
2398
+ *
2399
+ * Initializes the Cosine similarity metric with two input strings or
2400
+ * arrays of strings and optional options.
2401
+ *
2402
+ * @param {MetricInput} a - First input string or array of strings
2403
+ * @param {MetricInput} b - Second input string or array of strings
2404
+ * @param {MetricOptions} [opt] - Options for the metric computation
2405
+ */
2406
+ constructor(a, b, opt = {}) {
2407
+ // Call the parent Metric constructor with the metric name and inputs
2408
+ // Metric is symmetrical
2409
+ super('cosine', a, b, opt, true);
2410
+ }
2411
+ /**
2412
+ * Calculates the term frequency vector for a given string.
2413
+ *
2414
+ * @param {string} str - The input string
2415
+ * @param {string} delimiter - The delimiter to split terms
2416
+ * @return {Map<string, number>} - Term frequency object
2417
+ */
2418
+ _termFreq(str, delimiter) {
2419
+ const terms = str.split(delimiter);
2420
+ const freq = Pool.acquire('map', terms.length);
2421
+ for (const term of terms)
2422
+ freq.set(term, (freq.get(term) || 0) + 1);
2423
+ return freq;
2424
+ }
2425
+ /**
2426
+ * Calculates the Cosine similarity between two strings.
2427
+ *
2428
+ * @param {string} a - First string
2429
+ * @param {string} b - Second string
2430
+ * @return {MetricCompute<CosineRaw>} - Object containing the similarity result and raw values
2431
+ */
2432
+ compute(a, b) {
2433
+ // Get delimiter from options or use default (space)
2434
+ const { delimiter = ' ' } = this.options;
2435
+ // Compute term frequency vectors
2436
+ const termsA = this._termFreq(a, delimiter);
2437
+ const termsB = this._termFreq(b, delimiter);
2438
+ // Calculate dot product and magnitudes
2439
+ let dotProduct = 0, magnitudeA = 0, magnitudeB = 0;
2440
+ // Iterate over terms in A for dotProduct and magnitudeA
2441
+ for (const [term, freqA] of termsA) {
2442
+ const freqB = termsB.get(term) || 0;
2443
+ dotProduct += freqA * freqB;
2444
+ magnitudeA += freqA * freqA;
2445
+ }
2446
+ // Iterate over terms in B for magnitudeB
2447
+ for (const freqB of termsB.values())
2448
+ magnitudeB += freqB * freqB;
2449
+ magnitudeA = Math.sqrt(magnitudeA);
2450
+ magnitudeB = Math.sqrt(magnitudeB);
2451
+ // Release maps back to the pool
2452
+ Pool.release('map', termsA, termsA.size);
2453
+ Pool.release('map', termsB, termsB.size);
2454
+ // Return the result as a MetricCompute object
2455
+ return {
2456
+ res: (magnitudeA && magnitudeB) ? Metric.clamp(dotProduct / (magnitudeA * magnitudeB)) : 0,
2457
+ raw: { dotProduct, magnitudeA, magnitudeB }
2458
+ };
2459
+ }
2460
+ }
2461
+ // Register the Cosine similarity in the metric registry
2462
+ MetricRegistry.add('cosine', CosineSimilarity);
2463
+
2464
+ /**
2465
+ * Damerau-Levenshtein Distance
2466
+ * src/metric/DamerauLevenshtein.ts
2467
+ *
2468
+ * @see https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
2469
+ *
2470
+ * The Damerau-Levenshtein distance extends the classical Levenshtein algorithm by
2471
+ * including transpositions (swapping of two adjacent characters) as a single edit
2472
+ * operation, in addition to insertions, deletions, and substitutions.
2473
+ *
2474
+ * This metric is particularly useful for detecting and correcting common
2475
+ * typographical errors.
2476
+ *
2477
+ * @module Metric/DamerauLevenshtein
2478
+ * @author Paul Köhler (komed3)
2479
+ * @license MIT
2480
+ */
2481
+ /**
2482
+ * DamerauLevenshteinDistance class extends the Metric class to implement the Damerau-Levenshtein algorithm.
2483
+ */
2484
+ class DamerauLevenshteinDistance extends Metric {
2485
+ /**
2486
+ * Constructor for the DamerauLevenshteinDistance class.
2487
+ *
2488
+ * Initializes the Damerau-Levenshtein metric with two input strings or
2489
+ * arrays of strings and optional options.
2490
+ *
2491
+ * @param {MetricInput} a - First input string or array of strings
2492
+ * @param {MetricInput} b - Second input string or array of strings
2493
+ * @param {MetricOptions} [opt] - Options for the metric computation
2494
+ */
2495
+ constructor(a, b, opt = {}) {
2496
+ // Call the parent Metric constructor with the metric name and inputs
2497
+ // Metric is symmetrical
2498
+ super('damerau', a, b, opt, true);
2499
+ }
2500
+ /**
2501
+ * Calculates the normalized Damerau-Levenshtein distance between two strings.
2502
+ *
2503
+ * @param {string} a - First string (always the shorter string for memory efficiency)
2504
+ * @param {string} b - Second string
2505
+ * @param {number} m - Length of the first string (a)
2506
+ * @param {number} n - Length of the second string (b)
2507
+ * @param {number} maxLen - Maximum length of the strings
2508
+ * @return {MetricCompute<DamerauRaw>} - Object containing the similarity result and raw distance
2509
+ */
2510
+ compute(a, b, m, n, maxLen) {
2511
+ // Get three reusable arrays from the Pool for the DP rows
2512
+ const len = m + 1;
2513
+ const [test, prev, curr] = Pool.acquireMany('uint16', [len, len, len]);
2514
+ // Initialize the first row (edit distances from empty string to a)
2515
+ for (let i = 0; i <= m; i++)
2516
+ prev[i] = i;
2517
+ // Fill the DP matrix row by row (over the longer string)
2518
+ for (let j = 1; j <= n; j++) {
2519
+ // Cost of transforming empty string to b[0..j]
2520
+ curr[0] = j;
2521
+ // Get the character code of the current character in b
2522
+ const cb = b.charCodeAt(j - 1);
2523
+ for (let i = 1; i <= m; i++) {
2524
+ // Get the character code of the current character in b
2525
+ const ca = a.charCodeAt(i - 1);
2526
+ // If characters are the same, no cost for substitution
2527
+ const cost = ca === cb ? 0 : 1;
2528
+ // Calculate minimum of deletion, insertion, substitution
2529
+ let val = Math.min(curr[i - 1] + 1, // Insertion
2530
+ prev[i] + 1, // Deletion
2531
+ prev[i - 1] + cost // Substitution
2532
+ );
2533
+ // Check for transposition
2534
+ if (i > 1 && j > 1 &&
2535
+ ca === b.charCodeAt(j - 2) &&
2536
+ cb === a.charCodeAt(i - 2)) {
2537
+ // Transposition
2538
+ val = Math.min(val, test[i - 2] + cost);
2539
+ }
2540
+ // Set the cost for the current cell
2541
+ curr[i] = val;
2542
+ }
2543
+ // Rotate rows: test <= prev, prev <= curr, curr <= test
2544
+ test.set(prev);
2545
+ prev.set(curr);
2546
+ }
2547
+ // The last value in prev is the Damerau-Levenshtein distance
2548
+ const dist = prev[m];
2549
+ // Release arrays back to the pool
2550
+ Pool.release('uint16', test, len);
2551
+ Pool.release('uint16', prev, len);
2552
+ Pool.release('uint16', curr, len);
2553
+ // Normalize by the length of the longer string
2554
+ return {
2555
+ res: maxLen === 0 ? 1 : Metric.clamp(1 - (dist / maxLen)),
2556
+ raw: { dist, maxLen }
2557
+ };
2558
+ }
2559
+ }
2560
+ // Register the Damerau-Levenshtein distance in the metric registry
2561
+ MetricRegistry.add('damerau', DamerauLevenshteinDistance);
2562
+
2563
+ /**
2564
+ * Dice-Sørensen Coefficient
2565
+ * src/metric/DiceSorensen.ts
2566
+ *
2567
+ * @see https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient
2568
+ *
2569
+ * This module implements the Dice-Sørensen coefficient, a statistic used to gauge
2570
+ * the similarity of two samples. It is commonly used in natural language processing
2571
+ * and information retrieval to compare the similarity between two sets of data,
2572
+ * such as text documents. The coefficient is defined as twice the size of the
2573
+ * intersection divided by the sum of the sizes of the two sets.
2574
+ *
2575
+ * The implementation includes methods to compute bigrams from strings and calculate
2576
+ * the coefficient based on these bigrams. It handles edge cases, such as empty
2577
+ * strings and identical strings, to ensure accurate results.
2578
+ *
2579
+ * @module Metric/DiceSorensenCoefficient
2580
+ * @author Paul Köhler (komed3)
2581
+ * @license MIT
2582
+ */
2583
+ /**
2584
+ * DiceSorensenCoefficient class extends the Metric class to implement the Dice-Sørensen coefficient.
2585
+ */
2586
+ class DiceSorensenCoefficient extends Metric {
2587
+ /**
2588
+ * Constructor for the DiceSorensen class.
2589
+ *
2590
+ * Initializes the DiceSorensen metric with two input strings or
2591
+ * arrays of strings and optional options.
2592
+ *
2593
+ * @param {MetricInput} a - First input string or array of strings
2594
+ * @param {MetricInput} b - Second input string or array of strings
2595
+ * @param {MetricOptions} [opt] - Options for the metric computation
2596
+ */
2597
+ constructor(a, b, opt = {}) {
2598
+ // Call the parent Metric constructor with the metric name and inputs
2599
+ // Metric is symmetrical
2600
+ super('dice', a, b, opt, true);
2601
+ }
2602
+ /**
2603
+ * Computes the bigrams of a given string.
2604
+ *
2605
+ * @param {string} str - The input string
2606
+ * @return {Set<string>} - A set of bigrams (two-character sequences) from the string
2607
+ */
2608
+ _bigrams(str) {
2609
+ const len = str.length - 1;
2610
+ const bigrams = Pool.acquire('set', len);
2611
+ // Generate bigrams by iterating through the string
2612
+ for (let i = 0; i < len; i++)
2613
+ bigrams.add(str.substring(i, i + 2));
2614
+ return bigrams;
2615
+ }
2616
+ /**
2617
+ * Calculates the Dice-Sørensen coefficient between two strings.
2618
+ *
2619
+ * @param {string} a - First string
2620
+ * @param {string} b - Second string
2621
+ * @return {MetricCompute<DiceRaw>} - Object containing the similarity result and raw distance
2622
+ */
2623
+ compute(a, b) {
2624
+ // Generate bigrams for both strings
2625
+ const setA = this._bigrams(a);
2626
+ const setB = this._bigrams(b);
2627
+ // Calculate the intersection of bigrams
2628
+ let intersection = 0;
2629
+ for (const bigram of setA)
2630
+ if (setB.has(bigram))
2631
+ intersection++;
2632
+ // Calculate the size of the union of both sets
2633
+ const sizeA = setA.size, sizeB = setB.size;
2634
+ const size = sizeA + sizeB;
2635
+ // Release sets back to the pool
2636
+ Pool.release('set', setA, sizeA);
2637
+ Pool.release('set', setB, sizeB);
2638
+ // Return the result as a MetricCompute object
2639
+ return {
2640
+ res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
2641
+ raw: { intersection, size }
2642
+ };
2643
+ }
2644
+ }
2645
+ // Register the Dice-Sørensen coefficient in the metric registry
2646
+ MetricRegistry.add('dice', DiceSorensenCoefficient);
2647
+
2648
+ /**
2649
+ * Hamming Distance
2650
+ * src/metric/Hamming.ts
2651
+ *
2652
+ * @see https://en.wikipedia.org/wiki/Hamming_distance
2653
+ *
2654
+ * The Hamming distance is a metric for comparing two strings of equal length. It
2655
+ * measures the number of positions at which the corresponding symbols are different.
2656
+ *
2657
+ * This implementation allows for optional padding of the shorter string to equalize
2658
+ * lengths, otherwise it throws an error if the strings are of unequal length.
2659
+ *
2660
+ * @module Metric/HammingDistance
2661
+ * @author Paul Köhler (komed3)
2662
+ * @license MIT
2663
+ */
2664
+ /**
2665
+ * HammingDistance class extends the Metric class to implement the Hamming distance.
2666
+ */
2667
+ class HammingDistance extends Metric {
2668
+ /**
2669
+ * Constructor for the Hamming class.
2670
+ *
2671
+ * Initializes the Hamming distance metric with two input strings or
2672
+ * arrays of strings and optional options.
2673
+ *
2674
+ * @param {MetricInput} a - First input string or array of strings
2675
+ * @param {MetricInput} b - Second input string or array of strings
2676
+ * @param {MetricOptions} opt - Options for the metric computation
2677
+ */
2678
+ constructor(a, b, opt = {}) {
2679
+ // Call the parent Metric constructor with the metric name and inputs
2680
+ // Metric is symmetrical
2681
+ super('hamming', a, b, opt, true);
2682
+ }
2683
+ /**
2684
+ * Calculates the Hamming distance between two strings.
2685
+ *
2686
+ * @param {string} a - First string
2687
+ * @param {string} b - Second string
2688
+ * @param {number} m - Length of the first string
2689
+ * @param {number} n - Length of the second string
2690
+ * @param {number} maxLen - Maximum length of the strings
2691
+ * @return {MetricCompute<HammingRaw>} - Object containing the similarity result and raw distance
2692
+ * @throws {Error} - If strings are of unequal length and padding is not specified
2693
+ */
2694
+ compute(a, b, m, n, maxLen) {
2695
+ // Check for equal string length
2696
+ if (m !== n) {
2697
+ // Optional: use padding to equalize string length
2698
+ if (this.options.pad !== undefined) {
2699
+ if (m < maxLen)
2700
+ a = a.padEnd(maxLen, this.options.pad);
2701
+ if (n < maxLen)
2702
+ b = b.padEnd(maxLen, this.options.pad);
2703
+ m = n = maxLen;
2704
+ }
2705
+ // Standard: Error for unequal length
2706
+ else
2707
+ throw new Error(`strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
2708
+ `use option.pad for automatic adjustment`);
2709
+ }
2710
+ // Calculate the Hamming distance
2711
+ let dist = 0;
2712
+ for (let i = 0; i < a.length; i++)
2713
+ if (a[i] !== b[i])
2714
+ dist++;
2715
+ // Return the result as a MetricCompute object
2716
+ return {
2717
+ res: m === 0 ? 1 : Metric.clamp(1 - dist / m),
2718
+ raw: { dist }
2719
+ };
2720
+ }
2721
+ }
2722
+ // Register the Hamming distance in the metric registry
2723
+ MetricRegistry.add('hamming', HammingDistance);
2724
+
2725
+ /**
2726
+ * Jaccard Index
2727
+ * src/metric/Jaccard.ts
2728
+ *
2729
+ * @see https://en.wikipedia.org/wiki/Jaccard_index
2730
+ *
2731
+ * The Jaccard Index (or Jaccard similarity coefficient) measures the similarity
2732
+ * between two sets by dividing the size of their intersection by the size of
2733
+ * their union. In string similarity, it is often used to compare sets of characters,
2734
+ * tokens, or n-grams. The result is a value between 0 and 1, where 1 means the
2735
+ * sets are identical and 0 means they have no elements in common.
2736
+ *
2737
+ * @module Metric/JaccardIndex
2738
+ * @author Paul Köhler (komed3)
2739
+ * @license MIT
2740
+ */
2741
+ /**
2742
+ * JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm.
2743
+ */
2744
+ class JaccardIndex extends Metric {
2745
+ /**
2746
+ * Constructor for the JaccardIndex class.
2747
+ *
2748
+ * Initializes the Jaccard Index metric with two input strings or
2749
+ * arrays of strings and optional options.
2750
+ *
2751
+ * @param {MetricInput} a - First input string or array of strings
2752
+ * @param {MetricInput} b - Second input string or array of strings
2753
+ * @param {MetricOptions} [opt] - Options for the metric computation
2754
+ */
2755
+ constructor(a, b, opt = {}) {
2756
+ // Call the parent Metric constructor with the metric name and inputs
2757
+ // Metric is symmetrical
2758
+ super('jaccard', a, b, opt, true);
2759
+ }
2760
+ /**
2761
+ * Calculates the Jaccard Index between two strings.
2762
+ *
2763
+ * @param {string} a - First string
2764
+ * @param {string} b - Second string
2765
+ * @param {number} m - Length of the first string
2766
+ * @param {number} n - Length of the second string
2767
+ * @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values
2768
+ */
2769
+ compute(a, b, m, n) {
2770
+ // Acquire two sets from the Pool
2771
+ const [setA, setB] = Pool.acquireMany('set', [m, n]);
2772
+ // Fill setA and setB with unique characters from a and b
2773
+ for (const A of a)
2774
+ setA.add(A);
2775
+ for (const B of b)
2776
+ setB.add(B);
2777
+ // Calculate intersection size
2778
+ let intersection = 0;
2779
+ for (const c of setA)
2780
+ if (setB.has(c))
2781
+ intersection++;
2782
+ // Calculate union size (setA + elements in setB not in setA)
2783
+ const union = setA.size + setB.size - intersection;
2784
+ // Release sets back to the pool
2785
+ Pool.release('set', setA, m);
2786
+ Pool.release('set', setB, n);
2787
+ // Return the result as a MetricCompute object
2788
+ return {
2789
+ res: union === 0 ? 1 : Metric.clamp(intersection / union),
2790
+ raw: { intersection, union }
2791
+ };
2792
+ }
2793
+ }
2794
+ // Register the Jaccard index in the metric registry
2795
+ MetricRegistry.add('jaccard', JaccardIndex);
2796
+
2797
+ /**
2798
+ * Jaro-Winkler Distance
2799
+ * src/metric/JaroWinkler.ts
2800
+ *
2801
+ * @see https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
2802
+ *
2803
+ * The Jaro-Winkler distance is a string similarity metric that gives more weight
2804
+ * to matching characters at the start of the strings. It is especially effective
2805
+ * for short strings and typographical errors, and is widely used in record linkage
2806
+ * and duplicate detection.
2807
+ *
2808
+ * @module Metric/JaroWinkler
2809
+ * @author Paul Köhler (komed3)
2810
+ * @license MIT
2811
+ */
2812
+ /**
2813
+ * JaroWinklerDistance class extends the Metric class to implement the Jaro-Winkler algorithm.
2814
+ */
2815
+ class JaroWinklerDistance extends Metric {
2816
+ /**
2817
+ * Constructor for the JaroWinklerDistance class.
2818
+ *
2819
+ * Initializes the Jaro-Winkler metric with two input strings or
2820
+ * arrays of strings and optional options.
2821
+ *
2822
+ * @param {MetricInput} a - First input string or array of strings
2823
+ * @param {MetricInput} b - Second input string or array of strings
2824
+ * @param {MetricOptions} [opt] - Options for the metric computation
2825
+ */
2826
+ constructor(a, b, opt = {}) {
2827
+ // Call the parent Metric constructor with the metric name and inputs
2828
+ // Metric is symmetrical
2829
+ super('jaro-winkler', a, b, opt, true);
2830
+ }
2831
+ /**
2832
+ * Calculates the Jaro-Winkler similarity between two strings.
2833
+ *
2834
+ * @param {string} a - First string
2835
+ * @param {string} b - Second string
2836
+ * @param {number} m - Length of the first string
2837
+ * @param {number} n - Length of the second string
2838
+ * @return {MetricCompute<JaroWinklerRaw>} - Object containing the similarity result and raw values
2839
+ */
2840
+ compute(a, b, m, n) {
2841
+ // Find matches
2842
+ const matchWindow = Math.max(0, Math.floor(n / 2) - 1);
2843
+ // Use Pool for boolean arrays
2844
+ const matchA = Pool.acquire('uint16', m);
2845
+ const matchB = Pool.acquire('uint16', n);
2846
+ // Initialize match arrays
2847
+ for (let i = 0; i < m; i++)
2848
+ matchA[i] = 0;
2849
+ for (let i = 0; i < n; i++)
2850
+ matchB[i] = 0;
2851
+ // Find matches within the match window
2852
+ let matches = 0;
2853
+ for (let i = 0; i < m; i++) {
2854
+ const start = Math.max(0, i - matchWindow);
2855
+ const end = Math.min(i + matchWindow + 1, n);
2856
+ for (let j = start; j < end; j++) {
2857
+ if (!matchB[j] && a[i] === b[j]) {
2858
+ matchA[i] = 1;
2859
+ matchB[j] = 1;
2860
+ matches++;
2861
+ break;
2862
+ }
2863
+ }
2864
+ }
2865
+ // Set initial values for transpositions, jaro distance, prefix and result
2866
+ let transpos = 0, jaro = 0, prefix = 0, res = 0;
2867
+ // If matches are found, proceed with further calculations
2868
+ if (matches > 0) {
2869
+ // Count transpositions
2870
+ let k = 0;
2871
+ for (let i = 0; i < m; i++) {
2872
+ if (matchA[i]) {
2873
+ while (!matchB[k])
2874
+ k++;
2875
+ if (a[i] !== b[k])
2876
+ transpos++;
2877
+ k++;
2878
+ }
2879
+ }
2880
+ transpos /= 2;
2881
+ // Calculate Jaro similarity
2882
+ jaro = ((matches / m) + (matches / n) +
2883
+ (matches - transpos) / matches) / 3;
2884
+ // Calculate common prefix length (max 4)
2885
+ for (let i = 0; i < Math.min(4, m, n); i++) {
2886
+ if (a[i] === b[i])
2887
+ prefix++;
2888
+ else
2889
+ break;
2890
+ }
2891
+ // Step 5: Calculate Jaro-Winkler similarity
2892
+ res = jaro + prefix * 0.1 * (1 - jaro);
2893
+ }
2894
+ // Release arrays back to the pool
2895
+ Pool.release('uint16', matchA, m);
2896
+ Pool.release('uint16', matchB, n);
2897
+ // Return the result as a MetricCompute object
2898
+ return {
2899
+ res: Metric.clamp(res),
2900
+ raw: { matchWindow, matches, transpos, jaro, prefix }
2901
+ };
2902
+ }
2903
+ }
2904
+ // Register the Jaro-Winkler distance in the metric registry
2905
+ MetricRegistry.add('jaroWinkler', JaroWinklerDistance);
2906
+
2907
+ /**
2908
+ * Longest Common Subsequence (LCS)
2909
+ * src/metric/LCS.ts
2910
+ *
2911
+ * @see https://en.wikipedia.org/wiki/Longest_common_subsequence
2912
+ *
2913
+ * The Longest Common Subsequence (LCS) metric measures the length of the longest
2914
+ * subsequence common to both strings. Unlike substrings, the characters of a
2915
+ * subsequence do not need to be contiguous, but must appear in the same order.
2916
+ *
2917
+ * The LCS is widely used in diff tools, bioinformatics, and approximate string
2918
+ * matching.
2919
+ *
2920
+ * @module Metric/LCS
2921
+ * @author Paul Köhler (komed3)
2922
+ * @license MIT
2923
+ */
2924
+ /**
2925
+ * LCSMetric class extends the Metric class to implement the Longest Common Subsequence algorithm.
2926
+ */
2927
+ class LCSMetric extends Metric {
2928
+ /**
2929
+ * Constructor for the LCSMetric class.
2930
+ *
2931
+ * Initializes the LCS metric with two input strings or
2932
+ * arrays of strings and optional options.
2933
+ *
2934
+ * @param {MetricInput} a - First input string or array of strings
2935
+ * @param {MetricInput} b - Second input string or array of strings
2936
+ * @param {MetricOptions} [opt] - Options for the metric computation
2937
+ */
2938
+ constructor(a, b, opt = {}) {
2939
+ // Call the parent Metric constructor with the metric name and inputs
2940
+ // Metric is symmetrical
2941
+ super('lcs', a, b, opt, true);
2942
+ }
2943
+ /**
2944
+ * Calculates the normalized LCS similarity between two strings.
2945
+ *
2946
+ * @param {string} a - First string
2947
+ * @param {string} b - Second string
2948
+ * @param {number} m - Length of the first string
2949
+ * @param {number} n - Length of the second string
2950
+ * @param {number} maxLen - Maximum length of the strings
2951
+ * @return {MetricCompute<LCSRaw>} - Object containing the similarity result and raw LCS length
2952
+ */
2953
+ compute(a, b, m, n, maxLen) {
2954
+ // Get two reusable arrays from the Pool for the DP rows
2955
+ const len = m + 1;
2956
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
2957
+ // Initialize the first row to zeros
2958
+ for (let i = 0; i <= m; i++)
2959
+ prev[i] = 0;
2960
+ // Fill the DP matrix row by row (over the longer string)
2961
+ for (let j = 1; j <= n; j++) {
2962
+ curr[0] = 0;
2963
+ // Get the character code of the current character in b
2964
+ const cb = b.charCodeAt(j - 1);
2965
+ for (let i = 1; i <= m; i++) {
2966
+ // If characters match, increment the LCS length
2967
+ if (a.charCodeAt(i - 1) === cb)
2968
+ curr[i] = prev[i - 1] + 1;
2969
+ // Otherwise, take the maximum of the left or above cell
2970
+ else
2971
+ curr[i] = Math.max(prev[i], curr[i - 1]);
2972
+ }
2973
+ // Copy current row to previous for next iteration
2974
+ prev.set(curr);
2975
+ }
2976
+ // The last value in prev is the LCS length
2977
+ const lcs = prev[m];
2978
+ // Release arrays back to the pool
2979
+ Pool.release('uint16', prev, len);
2980
+ Pool.release('uint16', curr, len);
2981
+ // Normalize by the length of the longer string
2982
+ return {
2983
+ res: maxLen === 0 ? 1 : Metric.clamp(lcs / maxLen),
2984
+ raw: { lcs, maxLen }
2985
+ };
2986
+ }
2987
+ }
2988
+ // Register the Longest Common Subsequence (LCS) in the metric registry
2989
+ MetricRegistry.add('lcs', LCSMetric);
2990
+
2991
+ /**
2992
+ * Levenshtein Distance
2993
+ * src/metric/Levenshtein.ts
2994
+ *
2995
+ * @see https://en.wikipedia.org/wiki/Levenshtein_distance
2996
+ *
2997
+ * The Levenshtein distance is a classic metric for measuring the minimum number
2998
+ * of single-character edits (insertions, deletions, or substitutions) required
2999
+ * to change one string into another.
3000
+ *
3001
+ * It is widely used in approximate string matching, spell checking, and natural
3002
+ * language processing.
3003
+ *
3004
+ * @module Metric/LevenshteinDistance
3005
+ * @author Paul Köhler (komed3)
3006
+ * @license MIT
3007
+ */
3008
+ /**
3009
+ * LevenshteinDistance class extends the Metric class to implement the Levenshtein distance algorithm.
3010
+ */
3011
+ class LevenshteinDistance extends Metric {
3012
+ /**
3013
+ * Constructor for the Levenshtein class.
3014
+ *
3015
+ * Initializes the Levenshtein metric with two input strings
3016
+ * or arrays of strings and optional options.
3017
+ *
3018
+ * @param {MetricInput} a - First input string or array of strings
3019
+ * @param {MetricInput} b - Second input string or array of strings
3020
+ * @param {MetricOptions} [opt] - Options for the metric computation
3021
+ */
3022
+ constructor(a, b, opt = {}) {
3023
+ // Call the parent Metric constructor with the metric name and inputs
3024
+ // Metric is symmetrical
3025
+ super('levenshtein', a, b, opt, true);
3026
+ }
3027
+ /**
3028
+ * Calculates the Levenshtein distance between two strings.
3029
+ *
3030
+ * @param {string} a - First string
3031
+ * @param {string} b - Second string
3032
+ * @param {number} m - Length of the first string
3033
+ * @param {number} n - Length of the second string
3034
+ * @param {number} maxLen - Maximum length of the strings
3035
+ * @return {MetricCompute<LevenshteinRaw>} - Object containing the similarity result and raw distance
3036
+ */
3037
+ compute(a, b, m, n, maxLen) {
3038
+ // Get two reusable arrays from the Pool for the DP rows
3039
+ const len = m + 1;
3040
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
3041
+ // Initialize the first row (edit distances from empty string to a)
3042
+ for (let i = 0; i <= m; i++)
3043
+ prev[i] = i;
3044
+ // Fill the DP matrix row by row (over the longer string)
3045
+ for (let j = 1; j <= n; j++) {
3046
+ // Cost of transforming empty string to b[0..j]
3047
+ curr[0] = j;
3048
+ // Get the character code of the current character in b
3049
+ const cb = b.charCodeAt(j - 1);
3050
+ for (let i = 1; i <= m; i++) {
3051
+ // Cost is 0 if characters match, 1 otherwise
3052
+ const cost = a.charCodeAt(i - 1) === cb ? 0 : 1;
3053
+ // Calculate the minimum edit distance for current cell
3054
+ curr[i] = Math.min(curr[i - 1] + 1, // Insertion
3055
+ prev[i] + 1, // Deletion
3056
+ prev[i - 1] + cost // Substitution
3057
+ );
3058
+ }
3059
+ // Copy current row to previous for next iteration
3060
+ prev.set(curr);
3061
+ }
3062
+ // The last value in prev is the Levenshtein distance
3063
+ const dist = prev[m];
3064
+ // Release arrays back to the pool
3065
+ Pool.release('uint16', prev, len);
3066
+ Pool.release('uint16', curr, len);
3067
+ // Return the result as a MetricCompute object
3068
+ return {
3069
+ res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
3070
+ raw: { dist, maxLen }
3071
+ };
3072
+ }
3073
+ }
3074
+ // Register the Levenshtein distance in the metric registry
3075
+ MetricRegistry.add('levenshtein', LevenshteinDistance);
3076
+
3077
+ /**
3078
+ * Needleman-Wunsch Algorithm
3079
+ * src/metric/NeedlemanWunsch.ts
3080
+ *
3081
+ * @see https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
3082
+ *
3083
+ * The Needleman-Wunsch algorithm performs global alignment, aligning two strings
3084
+ * entirely, including gaps. It is commonly used in bioinformatics for sequence
3085
+ * alignment.
3086
+ *
3087
+ * @module Metric/NeedlemanWunsch
3088
+ * @author Paul Köhler (komed3)
3089
+ * @license MIT
3090
+ */
3091
+ /**
3092
+ * NeedlemanWunschDistance class extends the Metric class to implement the Needleman-Wunsch algorithm.
3093
+ */
3094
+ class NeedlemanWunschDistance extends Metric {
3095
+ /**
3096
+ * Constructor for the NeedlemanWunsch class.
3097
+ *
3098
+ * Initializes the Needleman-Wunsch metric with two input strings or
3099
+ * arrays of strings and optional options.
3100
+ *
3101
+ * @param {MetricInput} a - First input string or array of strings
3102
+ * @param {MetricInput} b - Second input string or array of strings
3103
+ * @param {MetricOptions} [opt] - Options for the metric computation
3104
+ */
3105
+ constructor(a, b, opt = {}) {
3106
+ // Call the parent Metric constructor with the metric name and inputs
3107
+ // Metric is symmetrical
3108
+ super('needlemanWunsch', a, b, opt, true);
3109
+ }
3110
+ /**
3111
+ * Calculates the Needleman-Wunsch global alignment score between two strings.
3112
+ *
3113
+ * @param {string} a - First string
3114
+ * @param {string} b - Second string
3115
+ * @param {number} m - Length of the first string
3116
+ * @param {number} n - Length of the second string
3117
+ * @param {number} maxLen - Maximum length of the strings
3118
+ * @return {MetricCompute<NeedlemanRaw>} - Object containing the similarity result and raw score
3119
+ */
3120
+ compute(a, b, m, n, maxLen) {
3121
+ // Scoring parameters (can be customized via options if needed)
3122
+ const { match = 1, mismatch = -1, gap = -1 } = this.options;
3123
+ // Get two reusable arrays from the Pool for the DP rows
3124
+ const len = m + 1;
3125
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
3126
+ // Initialize the first row (gap penalties)
3127
+ prev[0] = 0;
3128
+ for (let i = 1; i <= m; i++)
3129
+ prev[i] = prev[i - 1] + gap;
3130
+ // Fill the DP matrix row by row (over the longer string)
3131
+ for (let j = 1; j <= n; j++) {
3132
+ curr[0] = prev[0] + gap;
3133
+ // Get the character code of the current character in b
3134
+ const cb = b.charCodeAt(j - 1);
3135
+ for (let i = 1; i <= m; i++) {
3136
+ // Score for match / mismatch
3137
+ const score = a.charCodeAt(i - 1) === cb ? match : mismatch;
3138
+ // Calculate the maximum score for current cell
3139
+ curr[i] = Math.max(prev[i - 1] + score, // Diagonal (match/mismatch)
3140
+ prev[i] + gap, // Up (gap)
3141
+ curr[i - 1] + gap // Left (gap)
3142
+ );
3143
+ }
3144
+ // Copy current row to previous for next iteration
3145
+ prev.set(curr);
3146
+ }
3147
+ // The last value in prev is the Needleman-Wunsch score
3148
+ const score = prev[m];
3149
+ // Release arrays back to the pool
3150
+ Pool.release('uint16', prev, len);
3151
+ Pool.release('uint16', curr, len);
3152
+ // Use the maximum possible score for the longer string (global alignment)
3153
+ const denum = maxLen * match;
3154
+ // Return the result as a MetricCompute object
3155
+ return {
3156
+ res: denum === 0 ? 0 : Metric.clamp(score / denum),
3157
+ raw: { score, denum }
3158
+ };
3159
+ }
3160
+ }
3161
+ // Register the Needleman-Wunsch algorithm in the metric registry
3162
+ MetricRegistry.add('needlemanWunsch', NeedlemanWunschDistance);
3163
+
3164
+ /**
3165
+ * q-Gram Similarity
3166
+ * src/metric/QGram.ts
3167
+ *
3168
+ * @see https://en.wikipedia.org/wiki/Q-gram
3169
+ *
3170
+ * Q-gram similarity is a string-matching algorithm that compares two strings by
3171
+ * breaking them into substrings (q-grams) of length Q. The similarity is computed
3172
+ * as the size of the intersection of q-gram sets divided by the size of the larger
3173
+ * set.
3174
+ *
3175
+ * This metric is widely used in approximate string matching, information retrieval,
3176
+ * and computational linguistics.
3177
+ *
3178
+ * @module Metric/QGramSimilarity
3179
+ * @author Paul Köhler (komed3)
3180
+ * @license MIT
3181
+ */
3182
+ /**
3183
+ * QGramSimilarity class extends the Metric class to implement the q-Gram similarity algorithm.
3184
+ */
3185
+ class QGramSimilarity extends Metric {
3186
+ /**
3187
+ * Constructor for the QGramSimilarity class.
3188
+ *
3189
+ * Initializes the q-Gram similarity metric with two input strings or
3190
+ * arrays of strings and optional options.
3191
+ *
3192
+ * @param {MetricInput} a - First input string or array of strings
3193
+ * @param {MetricInput} b - Second input string or array of strings
3194
+ * @param {MetricOptions} [opt] - Options for the metric computation
3195
+ */
3196
+ constructor(a, b, opt = {}) {
3197
+ // Call the parent Metric constructor with the metric name and inputs
3198
+ // Metric is symmetrical
3199
+ super('qgram', a, b, opt, true);
3200
+ }
3201
+ /**
3202
+ * Converts a string into a set of q-grams (substrings of length q).
3203
+ *
3204
+ * @param {string} str - The input string
3205
+ * @param {number} q - The length of each q-gram
3206
+ * @return {Set<string>} - Set of q-grams
3207
+ */
3208
+ _qGrams(str, q) {
3209
+ const len = Math.max(0, str.length - q + 1);
3210
+ const grams = Pool.acquire('set', len);
3211
+ for (let i = 0; i < len; i++)
3212
+ grams.add(str.slice(i, i + q));
3213
+ return grams;
3214
+ }
3215
+ /**
3216
+ * Calculates the q-Gram similarity between two strings.
3217
+ *
3218
+ * @param {string} a - First string
3219
+ * @param {string} b - Second string
3220
+ * @return {MetricCompute<QGramRaw>} - Object containing the similarity result and raw values
3221
+ */
3222
+ compute(a, b) {
3223
+ // Get q from options or use default "2"
3224
+ const { q = 2 } = this.options;
3225
+ // Generate q-gram sets for both strings
3226
+ const setA = this._qGrams(a, q);
3227
+ const setB = this._qGrams(b, q);
3228
+ // Calculate intersection size
3229
+ let intersection = 0;
3230
+ for (const gram of setA)
3231
+ if (setB.has(gram))
3232
+ intersection++;
3233
+ // Calculate the size of the larger set
3234
+ const sizeA = setA.size, sizeB = setB.size;
3235
+ const size = Math.max(sizeA, sizeB);
3236
+ // Release sets back to the pool
3237
+ Pool.release('set', setA, sizeA);
3238
+ Pool.release('set', setB, sizeB);
3239
+ // Return the result as a MetricCompute object
3240
+ return {
3241
+ res: size === 0 ? 1 : Metric.clamp(intersection / size),
3242
+ raw: { intersection, size }
3243
+ };
3244
+ }
3245
+ }
3246
+ // Register the q-Gram similariry in the metric registry
3247
+ MetricRegistry.add('qGram', QGramSimilarity);
3248
+
3249
+ /**
3250
+ * Smith-Waterman Algorithm
3251
+ * src/metric/SmithWaterman.ts
3252
+ *
3253
+ * @see https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
3254
+ *
3255
+ * The Smith-Waterman algorithm performs local alignment, finding the best matching
3256
+ * subsequence between two strings. It is commonly used in bioinformatics for local
3257
+ * sequence alignment. Instead of looking at the entire sequence, the Smith–Waterman
3258
+ * algorithm compares segments of all possible lengths and optimizes the similarity
3259
+ * measure.
3260
+ *
3261
+ * @module Metric/SmithWatermanDistance
3262
+ * @author Paul Köhler (komed3)
3263
+ * @license MIT
3264
+ */
3265
+ /**
3266
+ * SmithWatermanDistance class extends the Metric class to implement the Smith-Waterman algorithm.
3267
+ */
3268
+ class SmithWatermanDistance extends Metric {
3269
+ /**
3270
+ * Constructor for the SmithWaterman class.
3271
+ *
3272
+ * Initializes the Smith-Waterman metric with two input strings or
3273
+ * arrays of strings and optional options.
3274
+ *
3275
+ * @param {MetricInput} a - First input string or array of strings
3276
+ * @param {MetricInput} b - Second input string or array of strings
3277
+ * @param {MetricOptions} [opt] - Options for the metric computation
3278
+ */
3279
+ constructor(a, b, opt = {}) {
3280
+ // Call the parent Metric constructor with the metric name and inputs
3281
+ // Metric is symmetrical
3282
+ super('smithWaterman', a, b, opt, true);
3283
+ }
3284
+ /**
3285
+ * Calculates the Smith-Waterman local alignment score between two strings.
3286
+ *
3287
+ * @param {string} a - First string
3288
+ * @param {string} b - Second string
3289
+ * @param {number} m - Length of the first string
3290
+ * @param {number} n - Length of the second string
3291
+ * @return {MetricCompute<SmithWatermanRaw>} - Object containing the similarity result and raw score
3292
+ */
3293
+ compute(a, b, m, n) {
3294
+ // Scoring parameters (can be customized via options if needed)
3295
+ const { match = 2, mismatch = -1, gap = -2 } = this.options;
3296
+ // Get two reusable arrays from the Pool for the DP rows
3297
+ const len = m + 1;
3298
+ const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
3299
+ // Initialize the first row to zeros (Smith-Waterman local alignment)
3300
+ for (let i = 0; i <= m; i++)
3301
+ prev[i] = 0;
3302
+ let maxScore = 0;
3303
+ // Fill the DP matrix row by row (over the longer string)
3304
+ for (let j = 1; j <= n; j++) {
3305
+ // First column always zero
3306
+ curr[0] = 0;
3307
+ // Get the character code of the current character in b
3308
+ const cb = b.charCodeAt(j - 1);
3309
+ for (let i = 1; i <= m; i++) {
3310
+ // Score for match / mismatch
3311
+ const score = a.charCodeAt(i - 1) === cb ? match : mismatch;
3312
+ // Calculate the maximum score for current cell
3313
+ curr[i] = Math.max(0, prev[i - 1] + score, // Diagonal (match/mismatch)
3314
+ prev[i] + gap, // Up (gap)
3315
+ curr[i - 1] + gap // Left (gap)
3316
+ );
3317
+ // Track the maximum score in the matrix
3318
+ if (curr[i] > maxScore)
3319
+ maxScore = curr[i];
3320
+ }
3321
+ // Copy current row to previous for next iteration
3322
+ prev.set(curr);
3323
+ }
3324
+ // Release arrays back to the pool
3325
+ Pool.release('uint16', prev, len);
3326
+ Pool.release('uint16', curr, len);
3327
+ // Use the maximum possible score for the shorter string (local alignment)
3328
+ const denum = Math.min(m * match, n * match);
3329
+ // Return the result as a MetricCompute object
3330
+ return {
3331
+ res: denum === 0 ? 0 : Metric.clamp(maxScore / denum),
3332
+ raw: { score: maxScore, denum }
3333
+ };
3334
+ }
3335
+ }
3336
+ // Register the Smith-Waterman algorithm in the metric registry
3337
+ MetricRegistry.add('smithWaterman', SmithWatermanDistance);
3338
+
3339
+ /**
3340
+ * Abstract Phonetic
3341
+ * src/phonetic/Phonetic.ts
3342
+ *
3343
+ * @see https://en.wikipedia.org/wiki/Phonetic_algorithm
3344
+ *
3345
+ * A phonetic algorithm refers to a method for indexing words according to their
3346
+ * pronunciation. When the algorithm relies on orthography, it is significantly
3347
+ * influenced by the spelling conventions of the language for which it is intended:
3348
+ * since the majority of phonetic algorithms were created for English, they tend
3349
+ * to be less effective for indexing words in other languages.
3350
+ *
3351
+ * Phonetic search has numerous applications, and one of the initial use cases has
3352
+ * been in trademark searches to verify that newly registered trademarks do not
3353
+ * pose a risk of infringing upon existing trademarks due to their pronunciation.
3354
+ *
3355
+ * This module provides an abstract class for generating phonetic indices based
3356
+ * on mappings and rules. It allows for the implementation of various phonetic
3357
+ * algorithms by extending the abstract class.
3358
+ *
3359
+ * @module Phonetic
3360
+ * @author Paul Köhler (komed3)
3361
+ * @license MIT
3362
+ */
3363
+ // Get the singleton profiler instance for performance monitoring
3364
+ const profiler$1 = Profiler.getInstance();
3365
+ /**
3366
+ * Abstract class representing a phonetic algorithm.
3367
+ *
3368
+ * The protected methods `applyRules`, `encode`, `mapChar`, `equalLen`, `word2Chars`,
3369
+ * `exitEarly`, `adjustCode`, `loop` and `loopAsync` can be overridden in subclasses
3370
+ * to implement specific phonetic algorithms.
3371
+ *
3372
+ * @abstract
3373
+ */
3374
+ class Phonetic {
3375
+ // Cache for indexed words to avoid redundant calculations
3376
+ static cache = new HashTable();
3377
+ /**
3378
+ * Default phonetic options.
3379
+ *
3380
+ * This object contains default settings for phonetic algorithms,
3381
+ * implemented in the subclass.
3382
+ */
3383
+ static default;
3384
+ // Phonetic algorithm name for identification
3385
+ algo;
3386
+ // Phonetic map and options for the algorithm
3387
+ options;
3388
+ map;
3389
+ /**
3390
+ * Static method to clear the cache of indexed words.
3391
+ */
3392
+ static clear() { this.cache.clear(); }
3393
+ /**
3394
+ * Constructor for the Phonetic class.
3395
+ *
3396
+ * Initializes the phonetic algorithm with the specified options and mapping.
3397
+ *
3398
+ * @param {string} algo - The name of the algorithm (e.g. 'soundex')
3399
+ * @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
3400
+ * @throws {Error} - If the requested mapping is not declared
3401
+ */
3402
+ constructor(algo, opt = {}) {
3403
+ // Set the options by merging the default options with the provided ones
3404
+ this.options = merge(this.constructor.default ?? {}, opt);
3405
+ // Get the mapping based on the provided options
3406
+ const map = PhoneticMappingRegistry.get(algo, this.options.map);
3407
+ // If the mapping is not defined, throw an error
3408
+ if (map === undefined)
3409
+ throw new Error(`requested mapping <${this.options.map}> is not declared`);
3410
+ this.algo = algo;
3411
+ this.map = map;
3412
+ }
3413
+ /**
3414
+ * Applies phonetic rules to a character in a word context.
3415
+ *
3416
+ * This method is designed to be generic and efficient for all phonetic algorithms.
3417
+ * It checks all rule types (prev, next, prevNot, nextNot, position, etc.) and
3418
+ * returns either the appropriate code (string) or undefined.
3419
+ *
3420
+ * @param {string} char - The current character
3421
+ * @param {number} i - The current position within the word
3422
+ * @param {string[]} chars - The word as an array of characters
3423
+ * @param {number} charLen - The total length of the word
3424
+ * @returns {string|undefined} - The rule code or undefined if no rule applies
3425
+ */
3426
+ applyRules(char, i, chars, charLen) {
3427
+ const { ruleset = [] } = this.map;
3428
+ // If no rules are provided, return undefined
3429
+ if (!ruleset || !ruleset.length)
3430
+ return undefined;
3431
+ // Get the surrounding characters
3432
+ const prev = chars[i - 1] || '', prev2 = chars[i - 2] || '';
3433
+ const next = chars[i + 1] || '', next2 = chars[i + 2] || '';
3434
+ // Iterate over the rules to find a matching rule for the current character
3435
+ for (const rule of ruleset) {
3436
+ // Skip if the rule does not match the current character
3437
+ if (rule.char && rule.char !== char)
3438
+ continue;
3439
+ // Position in the word (start, middle, end)
3440
+ if (rule.position === 'start' && i !== 0)
3441
+ continue;
3442
+ if (rule.position === 'middle' && i > 0 && i < charLen)
3443
+ continue;
3444
+ if (rule.position === 'end' && i !== charLen)
3445
+ continue;
3446
+ // Previous character(s)
3447
+ if (rule.prev && !rule.prev.includes(prev))
3448
+ continue;
3449
+ if (rule.prevNot && rule.prevNot.includes(prev))
3450
+ continue;
3451
+ if (rule.prev2 && !rule.prev2.includes(prev2))
3452
+ continue;
3453
+ if (rule.prev2Not && rule.prev2Not.includes(prev2))
3454
+ continue;
3455
+ // Following character(s)
3456
+ if (rule.next && !rule.next.includes(next))
3457
+ continue;
3458
+ if (rule.nextNot && rule.nextNot.includes(next))
3459
+ continue;
3460
+ if (rule.next2 && !rule.next2.includes(next2))
3461
+ continue;
3462
+ if (rule.next2Not && rule.next2Not.includes(next2))
3463
+ continue;
3464
+ // Special case: Beginning of a word (e.g. chars.slice(0, n))
3465
+ if (rule.leading && !rule.leading.includes(chars.slice(0, rule.leading.length).join('')))
3466
+ continue;
3467
+ // Special case: end of word (e.g. chars.slice(-n))
3468
+ if (rule.trailing && !rule.trailing.includes(chars.slice(-rule.trailing.length).join('')))
3469
+ continue;
3470
+ // Check multiple characters (e.g. bigram/trigram)
3471
+ if (rule.match && !rule.match.every((c, j) => chars[i + j] === c))
3472
+ continue;
3473
+ // If all conditions met, return the rule code
3474
+ return rule.code;
3475
+ }
3476
+ // If no rule matched, return undefined
3477
+ return undefined;
3478
+ }
3479
+ /**
3480
+ * Generates the phonetic code for a given word.
3481
+ *
3482
+ * This method processes the word character by character, applying phonetic rules
3483
+ * and mappings to generate a phonetic code.
3484
+ *
3485
+ * @param {string} word - The input word to be converted into a phonetic code
3486
+ * @returns {string} - The generated phonetic code
3487
+ */
3488
+ encode(word) {
3489
+ const { map = {}, ignore = [] } = this.map;
3490
+ // Get the characters of the word and its length
3491
+ const chars = this.word2Chars(word);
3492
+ const charLen = chars.length;
3493
+ let code = '', lastCode = null;
3494
+ // Iterate over each character in the word
3495
+ for (let i = 0; i < charLen; i++) {
3496
+ const char = chars[i];
3497
+ // Skip characters that are in the ignore list
3498
+ if (ignore.includes(char))
3499
+ continue;
3500
+ // Convert the character to its phonetic code
3501
+ const mapped = this.mapChar(char, i, chars, charLen, lastCode, map);
3502
+ // If no code is generated, skip to the next character
3503
+ if (mapped === undefined)
3504
+ continue;
3505
+ // Append the generated code to the final code
3506
+ code += mapped, lastCode = mapped;
3507
+ // If the code length exceeds the specified limit, exit early
3508
+ if (this.exitEarly(code, i))
3509
+ break;
3510
+ }
3511
+ // Return the adjusted phonetic code
3512
+ return this.adjustCode(code, chars);
3513
+ }
3514
+ /**
3515
+ * Converts a character to its phonetic code based on the mapping and rules.
3516
+ *
3517
+ * @param {string} char - The current character
3518
+ * @param {number} i - The current position within the word
3519
+ * @param {string[]} chars - The word as an array of characters
3520
+ * @param {number} charLen - The total length of the word
3521
+ * @param {string|null} lastCode - The last code generated (to avoid duplicates)
3522
+ * @param {Record<string, string>} map - The phonetic mapping
3523
+ * @returns {string|undefined} - The phonetic code or undefined if no code applies
3524
+ */
3525
+ mapChar(char, i, chars, charLen, lastCode, map) {
3526
+ const { dedupe = true } = this.options;
3527
+ // Apply phonetic rules to the character
3528
+ // If no rules apply, use the mapping
3529
+ // If the character is not in the mapping, return undefined
3530
+ const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? undefined;
3531
+ // De-duplicate the code if necessary
3532
+ return dedupe && c === lastCode ? undefined : c;
3533
+ }
3534
+ /**
3535
+ * Ensures the phonetic code has a fixed length by padding or truncating.
3536
+ *
3537
+ * @param {string} input - The input string to be adjusted
3538
+ * @returns {string} - The adjusted string with fixed length
3539
+ */
3540
+ equalLen(input) {
3541
+ const { length = -1, pad = '0' } = this.options;
3542
+ return length === -1 ? input : (input + pad.repeat(length)).slice(0, length);
3543
+ }
3544
+ /**
3545
+ * Converts a word into an array of characters.
3546
+ *
3547
+ * @param {string} word - The input word to be converted
3548
+ * @returns {string[]} - An array of characters from the input word
3549
+ */
3550
+ word2Chars(word) { return word.toLowerCase().split(''); }
3551
+ /**
3552
+ * Determines whether to exit early based on the current phonetic code length.
3553
+ *
3554
+ * @param {string} code - The current phonetic code
3555
+ * @param {number} i - The current index in the word
3556
+ * @returns {boolean} - True if the code length exceeds the specified limit, false otherwise
3557
+ */
3558
+ exitEarly(code, i) {
3559
+ const { length = -1 } = this.options;
3560
+ return length > 0 && code.length >= length;
3561
+ }
3562
+ /**
3563
+ * Adjusts the phonetic code.
3564
+ *
3565
+ * @param {string} code - The phonetic code to be adjusted
3566
+ * @param {string[]} chars - Characters to be removed from the code
3567
+ * @returns {string} - The adjusted phonetic code
3568
+ */
3569
+ adjustCode(code, chars) { return code; }
3570
+ /**
3571
+ * Processes an array of words to generate their phonetic indices.
3572
+ *
3573
+ * This method iterates over each word, generates its phonetic code,
3574
+ * and ensures that the resulting codes are of equal length.
3575
+ *
3576
+ * @param {string[]} words - An array of words to be processed
3577
+ * @returns {string[]} - An array of phonetic indices for the input words
3578
+ */
3579
+ loop(words) {
3580
+ const index = [];
3581
+ // Loop over each word in the input array
3582
+ for (const word of words) {
3583
+ // Generate a cache key based on the algorithm and word
3584
+ const key = Phonetic.cache.key(this.algo, [word]);
3585
+ // If the key exists in the cache, return the cached result
3586
+ // Otherwise, encode the word using the algorithm
3587
+ const code = Phonetic.cache.get(key || '') ?? (() => {
3588
+ // Get the phonetic code for the word
3589
+ const res = this.encode(word);
3590
+ // If a key was generated, store the result in the cache
3591
+ if (key)
3592
+ Phonetic.cache.set(key, res);
3593
+ return res;
3594
+ })();
3595
+ // If a code is generated, add them to the index
3596
+ if (code && code.length)
3597
+ index.push(this.equalLen(code));
3598
+ }
3599
+ return index;
3600
+ }
3601
+ /**
3602
+ * Asynchronously processes an array of words to generate their phonetic indices.
3603
+ *
3604
+ * This method iterates over each word, generates its phonetic code asynchronously,
3605
+ * and ensures that the resulting codes are of equal length.
3606
+ *
3607
+ * @param {string[]} words - An array of words to be processed
3608
+ * @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words
3609
+ */
3610
+ async loopAsync(words) {
3611
+ const index = [];
3612
+ // Loop over each word in the input array
3613
+ for (const word of words) {
3614
+ // Get the phonetic code for the word asynchronously
3615
+ const code = await Promise.resolve(this.encode(word));
3616
+ // If a code is generated, add them to the index
3617
+ if (code && code.length)
3618
+ index.push(this.equalLen(code));
3619
+ }
3620
+ return index;
3621
+ }
3622
+ /**
3623
+ * Get the name of the phonetic algorithm.
3624
+ *
3625
+ * @returns {string} - The name of the algorithm
3626
+ */
3627
+ getAlgoName() { return this.algo; }
3628
+ /**
3629
+ * Generates a phonetic index for the given input string.
3630
+ *
3631
+ * @param {string} input - The input string to be indexed
3632
+ * @returns {string[]} - An array of phonetic indices for the input words
3633
+ */
3634
+ getIndex(input) {
3635
+ const { delimiter = ' ' } = this.options;
3636
+ // Split the input string by the specified delimiter and loop over it
3637
+ return profiler$1.run(() => this.loop(input.split(delimiter).filter(Boolean)).filter(Boolean));
3638
+ }
3639
+ /**
3640
+ * Asynchronously generates a phonetic index for the given input string.
3641
+ *
3642
+ * @param {string} input - The input string to be indexed
3643
+ * @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words
3644
+ */
3645
+ async getIndexAsync(input) {
3646
+ const { delimiter = ' ' } = this.options;
3647
+ // Split the input string by the specified delimiter and loop over it asynchronously
3648
+ return (await profiler$1.runAsync(async () => await this.loopAsync(input.split(delimiter).filter(Boolean)))).filter(Boolean);
3649
+ }
3650
+ }
3651
+ /**
3652
+ * Phonetic registry service for managing phonetic implementations.
3653
+ *
3654
+ * This registry allows for dynamic registration and retrieval of phonetic classes,
3655
+ * enabling the use of various phonetic algorithms in a consistent manner.
3656
+ */
3657
+ const PhoneticRegistry = Registry('phonetic', Phonetic);
3658
+ /**
3659
+ * Phonetic Mapping Service
3660
+ *
3661
+ * This service provides a simple interface to manage phonetic mappings across
3662
+ * different phonetic algorithms. It allows adding, removing, checking existence,
3663
+ * retrieving, and listing phonetic mappings for specified algorithms.
3664
+ */
3665
+ const PhoneticMappingRegistry = (() => {
3666
+ // Create a registry object to hold mappings
3667
+ const mappings = Object.create(null);
3668
+ // Helper function to retrieve mappings for a specific algorithm
3669
+ const maps = (algo) => (mappings[algo] ||= Object.create(null));
3670
+ return {
3671
+ /**
3672
+ * Adds a phonetic mapping for a specific algorithm and ID.
3673
+ *
3674
+ * @param {string} algo - The phonetic algorithm identifier (e.g., 'soundex', 'metaphone')
3675
+ * @param {string} id - The unique identifier for the mapping
3676
+ * @param {PhoneticMap} map - The phonetic map to be added, containing rules and mappings
3677
+ * @param {boolean} [update=false] - Whether to allow overwriting an existing entry
3678
+ * @throws {Error} If the mapping name already exists and update is false
3679
+ */
3680
+ add(algo, id, map, update = false) {
3681
+ const mappings = maps(algo);
3682
+ if (!update && id in mappings)
3683
+ throw new Error(`entry <${id}> already exists / use <update=true> to overwrite`);
3684
+ mappings[id] = map;
3685
+ },
3686
+ /**
3687
+ * Removes a phonetic mapping for a specific algorithm and ID.
3688
+ *
3689
+ * @param {string} algo - The phonetic algorithm identifier
3690
+ * @param {string} id - The unique identifier for the mapping to be removed
3691
+ */
3692
+ remove(algo, id) { delete maps(algo)[id]; },
3693
+ /**
3694
+ * Checks if a phonetic mapping exists for a specific algorithm and ID.
3695
+ *
3696
+ * @param {string} algo - The phonetic algorithm identifier
3697
+ * @param {string} id - The unique identifier for the mapping to check
3698
+ * @returns {boolean} - Returns true if the mapping exists, false otherwise
3699
+ */
3700
+ has(algo, id) { return id in maps(algo); },
3701
+ /**
3702
+ * Retrieves a phonetic mapping for a specific algorithm and ID.
3703
+ *
3704
+ * @param {string} algo - The phonetic algorithm identifier
3705
+ * @param {string} id - The unique identifier for the mapping to retrieve
3706
+ * @returns {PhoneticMap | undefined} - Returns the phonetic map if found, otherwise undefined
3707
+ */
3708
+ get(algo, id) { return maps(algo)[id]; },
3709
+ /**
3710
+ * Lists all phonetic mappings for a specific algorithm.
3711
+ *
3712
+ * @param {string} algo - The phonetic algorithm identifier
3713
+ * @returns {string[]} - Returns an array of mapping IDs for the specified algorithm
3714
+ */
3715
+ list(algo) { return Object.keys(maps(algo)); }
3716
+ };
3717
+ })();
3718
+
3719
+ /**
3720
+ * Cologne Phonetic Algorithm
3721
+ * src/phonetic/Cologne.ts
3722
+ *
3723
+ * @see https://en.wikipedia.org/wiki/Cologne_phonetics
3724
+ *
3725
+ * Cologne phonetics, also known as `Kölner Phonetik` or the `Cologne process`,
3726
+ * is a phonetic algorithm that assigns a sequence of digits, referred to as the
3727
+ * phonetic code, to words. The purpose of this method is to ensure that words
3728
+ * with identical sounds receive the same code. This algorithm can facilitate a
3729
+ * similarity search among words.
3730
+ *
3731
+ * Cologne phonetics is associated with the well-known Soundex phonetic algorithm,
3732
+ * yet it is specifically optimized for the German language. This algorithm was
3733
+ * introduced by Hans Joachim Postel in 1969.
3734
+ *
3735
+ * The Cologne phonetic algorithm works by mapping letters to digits, ignoring
3736
+ * certain letters, and applying specific rules to handle character combinations.
3737
+ *
3738
+ * @module Phonetic/Cologne
3739
+ * @author Paul Köhler (komed3)
3740
+ * @license MIT
3741
+ */
3742
+ /**
3743
+ * Cologne class extends the Phonetic class to implement the Cologne phonetic algorithm.
3744
+ */
3745
+ class Cologne extends Phonetic {
3746
+ // Default options for the Cologne phonetic algorithm
3747
+ static default = {
3748
+ map: 'default', delimiter: ' ', length: -1, dedupe: true
3749
+ };
3750
+ /**
3751
+ * Constructor for the Cologne class.
3752
+ *
3753
+ * Initializes the Cologne phonetic algorithm with the mapping and options.
3754
+ *
3755
+ * @param {PhoneticOptions} [opt] - Options for the Cologne phonetic algorithm
3756
+ */
3757
+ constructor(opt = {}) { super('cologne', opt); }
3758
+ /**
3759
+ * Adjusts the phonetic code by removing all '0's except the first character.
3760
+ *
3761
+ * @param {string} code - The phonetic code to adjust
3762
+ * @returns {string} - The adjusted phonetic code
3763
+ */
3764
+ adjustCode(code) {
3765
+ return code.slice(0, 1) + code.slice(1).replaceAll('0', '');
3766
+ }
3767
+ }
3768
+ // Register the Cologne algorithm in the phonetic registry
3769
+ PhoneticRegistry.add('cologne', Cologne);
3770
+ // Register the Cologne phonetic mapping
3771
+ PhoneticMappingRegistry.add('cologne', 'default', {
3772
+ map: {
3773
+ a: '0', ä: '0', e: '0', i: '0', j: '0', o: '0', ö: '0', u: '0', ü: '0', y: '0',
3774
+ b: '1', p: '1', d: '2', t: '2', f: '3', v: '3', w: '3', g: '4', k: '4', q: '4',
3775
+ l: '5', m: '6', n: '6', r: '7', c: '8', s: '8', ß: '8', z: '8', x: '48'
3776
+ },
3777
+ ignore: ['h'],
3778
+ ruleset: [
3779
+ { char: 'p', next: ['h'], code: '3' },
3780
+ { char: 'c', position: 'start', next: ['a', 'h', 'k', 'l', 'o', 'q', 'r', 'u', 'x'], code: '4' },
3781
+ { char: 'c', next: ['a', 'h', 'k', 'o', 'q', 'u', 'x'], prevNot: ['s', 'z'], code: '4' },
3782
+ { char: 'd', next: ['c', 's', 'z'], code: '8' },
3783
+ { char: 't', next: ['c', 's', 'z'], code: '8' },
3784
+ { char: 'x', prev: ['c', 'k', 'q'], code: '8' }
3785
+ ]
3786
+ });
3787
+
3788
+ /**
3789
+ * Metaphone Phonetic Algorithm
3790
+ * src/phonetic/Metaphone.ts
3791
+ *
3792
+ * @see https://en.wikipedia.org/wiki/Metaphone
3793
+ *
3794
+ * Metaphone is a phonetic algorithm for indexing words by their English pronunciation.
3795
+ * It encodes words into a string of consonant symbols, allowing for the comparison of
3796
+ * words based on their pronunciation rather than their spelling. Metaphone is more
3797
+ * accurate than Soundex for English and is widely used in search, spell-checking,
3798
+ * and fuzzy matching.
3799
+ *
3800
+ * This implementation uses a mapping and a comprehensive ruleset to efficiently
3801
+ * transform input words into their Metaphone code. The algorithm drops or transforms
3802
+ * letters according to context-sensitive rules, and only retains vowels at the start.
3803
+ *
3804
+ * @module Phonetic/Metaphone
3805
+ * @author Paul Köhler (komed3)
3806
+ * @license MIT
3807
+ */
3808
+ /**
3809
+ * Metaphone class extends the Phonetic class to implement the Metaphone phonetic algorithm.
3810
+ */
3811
+ class Metaphone extends Phonetic {
3812
+ // Default options for the Metaphone phonetic algorithm
3813
+ static default = {
3814
+ map: 'en90', delimiter: ' ', length: -1, pad: '', dedupe: false
3815
+ };
3816
+ /**
3817
+ * Constructor for the Metaphone class.
3818
+ *
3819
+ * Initializes the Metaphone phonetic algorithm with the mapping and options.
3820
+ *
3821
+ * @param {PhoneticOptions} [opt] - Options for the Metaphone phonetic algorithm
3822
+ */
3823
+ constructor(opt = {}) { super('metaphone', opt); }
3824
+ /**
3825
+ * Generates the Metaphone code for a given word.
3826
+ *
3827
+ * @param {string} word - The input word to be converted into a Metaphone code
3828
+ * @returns {string} - The generated Metaphone code
3829
+ */
3830
+ encode(word) {
3831
+ // Remove duplicate adjacent letters except for C
3832
+ word = word.replace(/([A-BD-Z])\1+/gi, (m, c) => c === 'C' ? m : c);
3833
+ // Use the base implementation for rule/mapping application
3834
+ return super.encode(word);
3835
+ }
3836
+ /**
3837
+ * Adjusts the Metaphone code by removing vowels except for the first letter.
3838
+ *
3839
+ * @param {string} code - The Metaphone code to be adjusted
3840
+ * @returns {string} - The adjusted Metaphone code
3841
+ */
3842
+ adjustCode(code) {
3843
+ // Remove vowels except for the first letter
3844
+ return code.slice(0, 1) + code.slice(1).replace(/[AEIOU]/g, '');
3845
+ }
3846
+ }
3847
+ // Register the Metaphone algorithm in the phonetic registry
3848
+ PhoneticRegistry.add('metaphone', Metaphone);
3849
+ /**
3850
+ * Register the Metaphone phonetic mapping for English.
3851
+ *
3852
+ * This version is based on the original BASIC implementation from 1990,
3853
+ * written by Lawrence Philips.
3854
+ *
3855
+ * @see https://gist.github.com/Rostepher/b688f709587ac145a0b3
3856
+ */
3857
+ PhoneticMappingRegistry.add('metaphone', 'en90', {
3858
+ map: {
3859
+ a: 'A', b: 'B', c: 'K', d: 'T', e: 'E', f: 'F',
3860
+ g: 'K', h: 'H', i: 'I', j: 'J', k: 'K',
3861
+ l: 'L', m: 'M', n: 'N', o: 'O', p: 'P', q: 'K',
3862
+ r: 'R', s: 'S', t: 'T', u: 'U', v: 'F', w: 'W',
3863
+ x: 'KS', y: 'Y', z: 'S'
3864
+ },
3865
+ ruleset: [
3866
+ // Drop the first letter if the string begins with `AE`, `GN`, `KN`, `PN` or `WR`
3867
+ { char: 'a', position: 'start', next: ['e'], code: '' },
3868
+ { char: 'g', position: 'start', next: ['n'], code: '' },
3869
+ { char: 'k', position: 'start', next: ['n'], code: '' },
3870
+ { char: 'p', position: 'start', next: ['n'], code: '' },
3871
+ { char: 'w', position: 'start', next: ['r'], code: '' },
3872
+ // Drop `B` if after `M` at the end of the string
3873
+ { char: 'b', position: 'end', prev: ['m'], code: '' },
3874
+ // `C` transforms into `X` if followed by `H` or `IA`
3875
+ { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' },
3876
+ { char: 'c', next: ['i'], next2: ['a'], code: 'X' },
3877
+ // `C` transforms into `S` if followed by `E`, `I` or `Y`
3878
+ { char: 'c', next: ['e', 'i', 'y'], code: 'S' },
3879
+ // `D` transforms into `J` if followed by `GE`, `GI` or `GY`
3880
+ { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' },
3881
+ // Drop `G` if followed by `H` and `H` is not at the end or before a vowel
3882
+ { char: 'g', next: ['h'], next2Not: ['', 'a', 'e', 'i', 'o', 'u'], code: '' },
3883
+ // Drop `G` if followed by `N` or `NED` and is at the end of the string
3884
+ { char: 'g', trailing: 'n', code: '' },
3885
+ { char: 'g', trailing: 'ned', code: '' },
3886
+ // `G` transforms into `J` if before `E`, `I` or `Y` and is not a `GG`
3887
+ { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' },
3888
+ // Drop `H` if after a vowel and not before a vowel
3889
+ { char: 'h', prev: ['a', 'e', 'i', 'o', 'u'], nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
3890
+ // Drop `H` if after `C`, `G`, `P`, `S` or `T`
3891
+ { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' },
3892
+ // Drop `K` if after `C`
3893
+ { char: 'k', prev: ['c'], code: '' },
3894
+ // `PH` transforms into `F`
3895
+ { char: 'p', next: ['h'], code: 'F' },
3896
+ // `S` transforms into `X` if followed by `H`, `IA` or `IO`
3897
+ { char: 's', next: ['h'], code: 'X' },
3898
+ { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' },
3899
+ // `T` transforms into `X` if followed by `IA` or `IO`
3900
+ { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' },
3901
+ // `TH` transforms into `0` (zero)
3902
+ { char: 't', next: ['h'], code: '0' },
3903
+ // Drop `T` if followed by `CH`
3904
+ { char: 't', next: ['c'], next2: ['h'], code: '' },
3905
+ // Drop `W` if not followed by a vowel
3906
+ { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
3907
+ // `WH` transforms into `W` if at the beginning of the string
3908
+ { char: 'h', leading: 'w', code: '' },
3909
+ // `X` transforms into `S` if at the beginning
3910
+ { char: 'x', position: 'start', code: 'S' },
3911
+ // Drop `Y` if not followed by a vowel
3912
+ { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }
3913
+ ]
3914
+ });
3915
+
3916
+ /**
3917
+ * Soundex Phonetic Algorithm
3918
+ * src/phonetic/Soudex.ts
3919
+ *
3920
+ * @see https://en.wikipedia.org/wiki/Soundex
3921
+ *
3922
+ * Soundex is a phonetic algorithm for indexing names by sound. It is used to
3923
+ * encode words into a phonetic representation, allowing for the comparison of
3924
+ * words based on their pronunciation rather than their spelling. This works
3925
+ * by mapping letters to digits, ignoring certain letters, and applying specific
3926
+ * rules to handle character combinations.
3927
+ *
3928
+ * It is particularly useful for matching names that may be spelled differently
3929
+ * but sound similar and commonly used in genealogical research and databases
3930
+ * to find similar-sounding names.
3931
+ *
3932
+ * The Soundex algorithm is not case-sensitive and ignores vowels and certain
3933
+ * consonants. It outputs an array of strings that represents the phonetic code
3934
+ * of the input, typically limited to the length of four characters.
3935
+ *
3936
+ * @module Phonetic/Soundex
3937
+ * @author Paul Köhler (komed3)
3938
+ * @license MIT
3939
+ */
3940
+ /**
3941
+ * Soundex class extends the Phonetic class to implement the Soundex phonetic algorithm.
3942
+ */
3943
+ class Soundex extends Phonetic {
3944
+ // Default options for the Soundex phonetic algorithm
3945
+ static default = {
3946
+ map: 'en', delimiter: ' ', length: 4, pad: '0', dedupe: true
3947
+ };
3948
+ /**
3949
+ * Constructor for the Soundex class.
3950
+ *
3951
+ * Initializes the Soundex phonetic algorithm with the mapping and options.
3952
+ *
3953
+ * @param {PhoneticOptions} [opt] - Options for the Soundex phonetic algorithm
3954
+ */
3955
+ constructor(opt = {}) { super('soundex', opt); }
3956
+ /**
3957
+ * Adjusts the phonetic code by removing leading zeros and ensuring the
3958
+ * first character is uppercase.
3959
+ *
3960
+ * @param {string} code - The phonetic code to adjust
3961
+ * @param {string[]} chars - The characters used in the phonetic code
3962
+ * @returns {string} - The adjusted phonetic code
3963
+ */
3964
+ adjustCode(code, chars) {
3965
+ return chars[0].toUpperCase() + code.slice(1).replaceAll('0', '');
3966
+ }
3967
+ }
3968
+ // Register the Soundex algorithm in the phonetic registry
3969
+ PhoneticRegistry.add('soundex', Soundex);
3970
+ //Register the Soundex phonetic mapping for English.
3971
+ PhoneticMappingRegistry.add('soundex', 'en', {
3972
+ map: {
3973
+ a: '0', e: '0', h: '0', i: '0', o: '0', u: '0', w: '0', y: '0',
3974
+ b: '1', f: '1', p: '1', v: '1',
3975
+ c: '2', g: '2', j: '2', k: '2', q: '2', s: '2', x: '2', z: '2',
3976
+ d: '3', t: '3', l: '4', m: '5', n: '5', r: '6'
3977
+ }
3978
+ });
3979
+ //Register the Soundex phonetic mapping for German.
3980
+ PhoneticMappingRegistry.add('soundex', 'de', {
3981
+ map: {
3982
+ a: '0', ä: '0', e: '0', h: '0', i: '0', j: '0', o: '0', ö: '0', u: '0', ü: '0', y: '0',
3983
+ b: '1', f: '1', p: '1', v: '1', w: '1',
3984
+ c: '2', g: '2', k: '2', q: '2', s: '2', ß: '2', x: '2', z: '2',
3985
+ d: '3', t: '3', l: '4', m: '5', n: '5', r: '6'
3986
+ },
3987
+ ruleset: [
3988
+ { char: 'c', next: ['h'], code: '7' }
3989
+ ]
3990
+ });
3991
+
3992
+ /**
3993
+ * CmpStr Main API
3994
+ * src/CmpStr.ts
3995
+ *
3996
+ * The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
3997
+ * for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
3998
+ * and text analysis. It unifies all core features of the CmpStr package and exposes a
3999
+ * consistent, user-friendly API for both single and batch operations.
4000
+ *
4001
+ * Features:
4002
+ * - Centralized management of metrics, phonetic algorithms, and filters
4003
+ * - Flexible normalization and filtering pipeline for all inputs
4004
+ * - Batch, pairwise, and single string comparison with detailed results
4005
+ * - Phonetic indexing and phonetic-aware search and comparison
4006
+ * - Text analysis and unified diff utilities
4007
+ * - Full TypeScript type safety and extensibility
4008
+ *
4009
+ * @module CmpStr
4010
+ * @author Paul Köhler (komed3)
4011
+ * @license MIT
4012
+ */
4013
+ // Import the Profiler instance for global profiling
4014
+ const profiler = Profiler.getInstance();
4015
+ /**
4016
+ * The main CmpStr class that provides a unified interface for string comparison,
4017
+ * phonetic indexing, filtering, and text analysis.
4018
+ *
4019
+ * @template R - The type of the metric result, defaults to MetricRaw
4020
+ */
4021
+ class CmpStr {
4022
+ /**
4023
+ * --------------------------------------------------------------------------------
4024
+ * Static methods and properties for global access to CmpStr features
4025
+ * --------------------------------------------------------------------------------
4026
+ *
4027
+ * These static methods provide a convenient way to access the core features of
4028
+ * the CmpStr package without needing to instantiate a CmpStr object.
4029
+ */
4030
+ /**
4031
+ * Adds, removes, pauses, resumes, lists, or clears global filters.
4032
+ *
4033
+ * @see Filter
4034
+ */
4035
+ static filter = {
4036
+ add: Filter.add,
4037
+ remove: Filter.remove,
4038
+ pause: Filter.pause,
4039
+ resume: Filter.resume,
4040
+ list: Filter.list,
4041
+ clear: Filter.clear
4042
+ };
4043
+ /**
4044
+ * Adds, removes, checks, or lists available metrics.
4045
+ *
4046
+ * @see MetricRegistry
4047
+ */
4048
+ static metric = {
4049
+ add: MetricRegistry.add,
4050
+ remove: MetricRegistry.remove,
4051
+ has: MetricRegistry.has,
4052
+ list: MetricRegistry.list
4053
+ };
4054
+ /**
4055
+ * Adds, removes, checks, or lists available phonetic algorithms and mappings.
4056
+ *
4057
+ * @see PhoneticRegistry
4058
+ */
4059
+ static phonetic = {
4060
+ add: PhoneticRegistry.add,
4061
+ remove: PhoneticRegistry.remove,
4062
+ has: PhoneticRegistry.has,
4063
+ list: PhoneticRegistry.list,
4064
+ map: {
4065
+ add: PhoneticMappingRegistry.add,
4066
+ remove: PhoneticMappingRegistry.remove,
4067
+ has: PhoneticMappingRegistry.has,
4068
+ list: PhoneticMappingRegistry.list
4069
+ }
4070
+ };
4071
+ /**
4072
+ * Provides access to the global profiler services.
4073
+ *
4074
+ * @see Profiler
4075
+ */
4076
+ static profiler = profiler.services;
4077
+ /**
4078
+ * Clears the caches for normalizer, metric, and phonetic modules.
4079
+ */
4080
+ static clearCache = {
4081
+ normalizer: Normalizer.clear,
4082
+ metric: Metric.clear,
4083
+ phonetic: Phonetic.clear
4084
+ };
4085
+ /**
4086
+ * Returns a TextAnalyzer instance for the given input string.
4087
+ *
4088
+ * @param {string} [input] - The input string
4089
+ * @returns {TextAnalyzer} - The text analyzer
4090
+ */
4091
+ static analyze(input) { return new TextAnalyzer(input); }
4092
+ /**
4093
+ * Returns a DiffChecker instance for computing the unified diff between two texts.
4094
+ *
4095
+ * @param {string} a - The first (original) text
4096
+ * @param {string} b - The second (modified) text
4097
+ * @param {DiffOptions} [opt] - Optional diff configuration
4098
+ * @returns {DiffChecker} - The diff checker instance
4099
+ */
4100
+ static diff(a, b, opt) { return new DiffChecker(a, b, opt); }
4101
+ /**
4102
+ * --------------------------------------------------------------------------------
4103
+ * Instanciate the CmpStr class
4104
+ * --------------------------------------------------------------------------------
4105
+ *
4106
+ * Methods to create a new CmpStr instance with the given options.
4107
+ * Using the static `create` method is recommended to ensure proper instantiation.
4108
+ */
4109
+ /**
4110
+ * Creates a new CmpStr instance with the given options.
4111
+ *
4112
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4113
+ * @returns {CmpStr<R>} - A new CmpStr instance
4114
+ */
4115
+ static create(opt) { return new CmpStr(opt); }
4116
+ // The options object that holds the configuration for this CmpStr instance
4117
+ options = Object.create(null);
4118
+ /**
4119
+ * Creates a new CmpStr instance with the given options.
4120
+ * The constructor is protected to enforce the use of the static `create` method.
4121
+ *
4122
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4123
+ */
4124
+ constructor(opt) {
4125
+ if (opt)
4126
+ typeof opt === 'string'
4127
+ ? this.setSerializedOptions(opt)
4128
+ : this.setOptions(opt);
4129
+ }
4130
+ /**
4131
+ * ---------------------------------------------------------------------------------
4132
+ * Protected utility methods for internal use
4133
+ * ---------------------------------------------------------------------------------
4134
+ *
4135
+ * These methods provide utility functions for converting inputs, merging options,
4136
+ * normalizing inputs, filtering, and preparing inputs for comparison.
4137
+ */
4138
+ /**
4139
+ * Assert a condition and throws if the condition is not met.
4140
+ *
4141
+ * @param {string} cond - The condition to met
4142
+ * @param {any} [test] - Value to test for
4143
+ * @throws {Error} If the condition is not met
4144
+ */
4145
+ assert(cond, test) {
4146
+ switch (cond) {
4147
+ // Check if the metric exists
4148
+ case 'metric':
4149
+ if (!CmpStr.metric.has(test))
4150
+ throw new Error(`CmpStr <metric> must be set, call .setMetric(), ` +
4151
+ `use CmpStr.metric.list() for available metrics`);
4152
+ break;
4153
+ // Check if the phonetic algorithm exists
4154
+ case 'phonetic':
4155
+ if (!CmpStr.phonetic.has(test))
4156
+ throw new Error(`CmpStr <phonetic> must be set, call .setPhonetic(), ` +
4157
+ `use CmpStr.phonetic.list() for available phonetic algorithms`);
4158
+ break;
4159
+ // Throw an error for unknown conditions
4160
+ default: throw new Error(`Cmpstr condition <${cond}> unknown`);
4161
+ }
4162
+ }
4163
+ /**
4164
+ * Assert multiple conditions.
4165
+ *
4166
+ * @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
4167
+ */
4168
+ assertMany(...cond) {
4169
+ for (const [c, test] of cond)
4170
+ this.assert(c, test);
4171
+ }
4172
+ /**
4173
+ * Resolves the options for the CmpStr instance, merging the provided options with
4174
+ * the existing options.
4175
+ *
4176
+ * @param {CmpStrOptions} [opt] - Optional options to merge
4177
+ * @returns {CmpStrOptions} - The resolved options
4178
+ */
4179
+ resolveOptions(opt) {
4180
+ return merge({ ...(this.options ?? Object.create(null)) }, opt);
4181
+ }
4182
+ /**
4183
+ * Normalizes the input string or array using the configured or provided flags.
4184
+ *
4185
+ * @param {MetricInput} input - The input string or array
4186
+ * @param {NormalizeFlags} [flags] - Normalization flags
4187
+ * @returns {MetricInput} - The normalized input
4188
+ */
4189
+ normalize(input, flags) {
4190
+ return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
4191
+ }
4192
+ /**
4193
+ * Applies all active filters to the input string or array.
4194
+ *
4195
+ * @param {MetricInput} input - The input string or array
4196
+ * @param {string} [hook='input'] - The filter hook
4197
+ * @returns {MetricInput} - The filtered string(s)
4198
+ */
4199
+ filter(input, hook) {
4200
+ return Filter.apply(hook, input);
4201
+ }
4202
+ /**
4203
+ * Prepares the input by normalizing and filtering.
4204
+ *
4205
+ * @param {MetricInput} [input] - The input string or array
4206
+ * @param {CmpStrOptions} [opt] - Optional options to use
4207
+ * @returns {MetricInput} - The prepared input
4208
+ */
4209
+ prepare(input, opt) {
4210
+ const { flags, processors } = opt ?? this.options;
4211
+ // Normalize the input using flags (i.e., 'itw')
4212
+ if (flags?.length)
4213
+ input = this.normalize(input, flags);
4214
+ // Filter the input using hooked up filters
4215
+ input = this.filter(input, 'input');
4216
+ // Apply phonetic processors if configured
4217
+ if (processors?.phonetic)
4218
+ input = this.index(input, processors.phonetic);
4219
+ return input;
4220
+ }
4221
+ /**
4222
+ * Post-process the results of the metric computation.
4223
+ *
4224
+ * @param {MetricResult<R>} result - The metric result
4225
+ * @returns {MetricResult<R>} - The post-processed results
4226
+ */
4227
+ postProcess(result, opt) {
4228
+ // Remove "zero similarity" from batch results if configured
4229
+ if (opt?.removeZero && Array.isArray(result))
4230
+ result = result.filter(r => r.res > 0);
4231
+ return result;
4232
+ }
4233
+ /**
4234
+ * Computes the phonetic index for the given input using the specified phonetic algorithm.
4235
+ *
4236
+ * @param {MetricInput} input - The input string or array
4237
+ * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
4238
+ * @returns {MetricInput} - The phonetic index for the given input
4239
+ */
4240
+ index(input, { algo, opt }) {
4241
+ this.assert('phonetic', algo);
4242
+ const phonetic = factory.phonetic(algo, opt);
4243
+ const delimiter = opt?.delimiter ?? ' ';
4244
+ return Array.isArray(input)
4245
+ ? input.map(s => phonetic.getIndex(s).join(delimiter))
4246
+ : phonetic.getIndex(input).join(delimiter);
4247
+ }
4248
+ /**
4249
+ * Computes the metric result for the given inputs, applying normalization and
4250
+ * filtering as configured.
4251
+ *
4252
+ * @template T - The type of the metric result
4253
+ * @param {MetricInput} a - The first input string or array
4254
+ * @param {MetricInput} b - The second input string or array
4255
+ * @param {CmpStrOptions} [opt] - Optional options to use
4256
+ * @param {MetricMode} [mode='single'] - The metric mode to use
4257
+ * @param {boolean} [raw=false] - Whether to return raw results
4258
+ * @param {boolean} [skip=false] - Whether to skip normalization and filtering
4259
+ * @returns {T} - The computed metric result
4260
+ */
4261
+ compute(a, b, opt, mode, raw, skip) {
4262
+ const resolved = this.resolveOptions(opt);
4263
+ this.assert('metric', resolved.metric);
4264
+ // Prepare the input
4265
+ const A = skip ? a : this.prepare(a, resolved);
4266
+ const B = skip ? b : this.prepare(b, resolved);
4267
+ // Get the metric class
4268
+ const metric = factory.metric(resolved.metric, A, B, resolved.opt);
4269
+ // Pass the original inputs to the metric
4270
+ if (resolved.output !== 'prep')
4271
+ metric.setOriginal(a, b);
4272
+ // Compute the metric result
4273
+ metric.run(mode);
4274
+ // Post-process the results
4275
+ const result = this.postProcess(metric.getResults(), resolved);
4276
+ // Resolve and return the result based on the raw flag
4277
+ return this.output(result, raw ?? resolved.raw);
4278
+ }
4279
+ /**
4280
+ * Resolves the result format (raw or formatted).
4281
+ *
4282
+ * @template T - The type of the metric result
4283
+ * @param {MetricResult<R>} result - The metric result
4284
+ * @param {boolean} [raw] - Whether to return raw results
4285
+ * @returns {T} - The resolved result
4286
+ */
4287
+ output(result, raw) {
4288
+ return (raw ?? this.options.raw ? result : Array.isArray(result)
4289
+ ? result.map(r => ({ source: r.a, target: r.b, match: r.res }))
4290
+ : { source: result.a, target: result.b, match: result.res });
4291
+ }
4292
+ /**
4293
+ * ---------------------------------------------------------------------------------
4294
+ * Managing methods for CmpStr
4295
+ * ---------------------------------------------------------------------------------
4296
+ *
4297
+ * These methods provides an interface to set and get properties of the CmpStr
4298
+ * instance, such as options, metric, phonetic algorithm, and more.
4299
+ */
4300
+ /**
4301
+ * Creates a shallow clone of the current instance.
4302
+ *
4303
+ * @returns {CmpStr<R>} - The cloned instance
4304
+ */
4305
+ clone() { return Object.assign(Object.create(Object.getPrototypeOf(this)), this); }
4306
+ /**
4307
+ * Resets the instance, clearing all data and options.
4308
+ *
4309
+ * @returns {this}
4310
+ */
4311
+ reset() { for (const k in this.options)
4312
+ delete this.options[k]; return this; }
4313
+ /**
4314
+ * Sets / replaces the full options object.
4315
+ *
4316
+ * @param {CmpStrOptions} opt - The options
4317
+ * @returns {this}
4318
+ */
4319
+ setOptions(opt) { this.options = opt; return this; }
4320
+ /**
4321
+ * Deep merges and sets new options.
4322
+ *
4323
+ * @param {CmpStrOptions} opt - The options to merge
4324
+ * @returns {this}
4325
+ */
4326
+ mergeOptions(opt) { merge(this.options, opt); return this; }
4327
+ /**
4328
+ * Sets the serialized options from a JSON string.
4329
+ *
4330
+ * @param {string} opt - The serialized options
4331
+ * @returns {this}
4332
+ */
4333
+ setSerializedOptions(opt) { this.options = JSON.parse(opt); return this; }
4334
+ /**
4335
+ * Sets a specific option at the given path.
4336
+ *
4337
+ * @param {string} path - The path to the option
4338
+ * @param {any} value - The value to set
4339
+ * @returns {this}
4340
+ */
4341
+ setOption(path, value) { set(this.options, path, value); return this; }
4342
+ /**
4343
+ * Removes an option at the given path.
4344
+ *
4345
+ * @param {string} path - The path to the option
4346
+ * @returns {this}
4347
+ */
4348
+ rmvOption(path) { rmv(this.options, path); return this; }
4349
+ /**
4350
+ * Enable or disable raw output.
4351
+ *
4352
+ * @param {boolean} enable - Whether to enable or disable raw output
4353
+ * @returns {this}
4354
+ */
4355
+ setRaw(enable) { return this.setOption('raw', enable); }
4356
+ /**
4357
+ * Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
4358
+ *
4359
+ * @param {string} name - The metric name
4360
+ * @returns {this}
4361
+ */
4362
+ setMetric(name) { return this.setOption('metric', name); }
4363
+ /**
4364
+ * Sets the normalization flags (e.g., 'itw', 'nfc').
4365
+ *
4366
+ * @param {NormalizeFlags} flags - The normalization flags
4367
+ * @returns {this}
4368
+ */
4369
+ setFlags(flags) { return this.setOption('flags', flags); }
4370
+ /**
4371
+ * Removes the normalization flags entirely.
4372
+ *
4373
+ * @return {this}
4374
+ */
4375
+ rmvFlags() { return this.rmvOption('flags'); }
4376
+ /**
4377
+ * Sets the pre-processors to use for preparing the input.
4378
+ *
4379
+ * @param {CmpStrProcessors} opt - The processors to set
4380
+ * @returns {this}
4381
+ */
4382
+ setProcessors(opt) { return this.setOption('processors', opt); }
4383
+ /**
4384
+ * Removes the processors entirely.
4385
+ *
4386
+ * @returns {this}
4387
+ */
4388
+ rmvProcessors() { return this.rmvOption('processors'); }
4389
+ /**
4390
+ * Returns the current options object.
4391
+ *
4392
+ * @returns {CmpStrOptions} - The options
4393
+ */
4394
+ getOptions() { return this.options; }
4395
+ /**
4396
+ * Returns the options as a JSON string.
4397
+ *
4398
+ * @returns {string} - The serialized options
4399
+ */
4400
+ getSerializedOptions() { return JSON.stringify(this.options); }
4401
+ /**
4402
+ * Returns a specific option value by path.
4403
+ *
4404
+ * @param {string} path - The path to the option
4405
+ * @returns {any} - The option value
4406
+ */
4407
+ getOption(path) { return get(this.options, path); }
4408
+ /**
4409
+ * ---------------------------------------------------------------------------------
4410
+ * Public core methods for string comparison
4411
+ * ---------------------------------------------------------------------------------
4412
+ *
4413
+ * These methods provide the core functionality of the CmpStr class, allowing for
4414
+ * string comparison, phonetic indexing, filtering, and text search.
4415
+ */
4416
+ /**
4417
+ * Performs a single metric comparison between the source and target.
4418
+ *
4419
+ * @template T - The type of the metric result
4420
+ * @param {string} a - The source string
4421
+ * @param {string} b - The target string
4422
+ * @param {CmpStrOptions} [opt] - Optional options
4423
+ * @returns {T} - The metric result
4424
+ */
4425
+ test(a, b, opt) {
4426
+ return this.compute(a, b, opt, 'single');
4427
+ }
4428
+ /**
4429
+ * Performs a single metric comparison and returns only the numeric score.
4430
+ *
4431
+ * @param {string} a - The source string
4432
+ * @param {string} b - The target string
4433
+ * @param {CmpStrOptions} [opt] - Optional options
4434
+ * @returns {number} - The similarity score (0..1)
4435
+ */
4436
+ compare(a, b, opt) {
4437
+ return this.compute(a, b, opt, 'single', true).res;
4438
+ }
4439
+ /**
4440
+ * Performs a batch metric comparison between source and target strings
4441
+ * or array of strings.
4442
+ *
4443
+ * @template T - The type of the metric result
4444
+ * @param {MetricInput} a - The source string or array of strings
4445
+ * @param {MetricInput} b - The target string or array of strings
4446
+ * @param {CmpStrOptions} [opt] - Optional options
4447
+ * @returns {T} - The batch metric results
4448
+ */
4449
+ batchTest(a, b, opt) {
4450
+ return this.compute(a, b, opt, 'batch');
4451
+ }
4452
+ /**
4453
+ * Performs a batch metric comparison and returns results sorted by score.
4454
+ *
4455
+ * @template T - The type of the metric result
4456
+ * @param {MetricInput} a - The source string or array of strings
4457
+ * @param {MetricInput} b - The target string or array of strings
4458
+ * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
4459
+ * @param {CmpStrOptions} [opt] - Optional options
4460
+ * @returns {T} - The sorted batch results
4461
+ */
4462
+ batchSorted(a, b, dir = 'desc', opt) {
4463
+ return this.output(this.compute(a, b, opt, 'batch', true)
4464
+ .sort((a, b) => dir === 'asc' ? a.res - b.res : b.res - a.res), opt?.raw ?? this.options.raw);
4465
+ }
4466
+ /**
4467
+ * Performs a pairwise metric comparison between source and target strings
4468
+ * or array of strings.
4469
+ *
4470
+ * Input arrays needs of the same length to perform pairwise comparison,
4471
+ * otherwise the method will throw an error.
4472
+ *
4473
+ * @template T - The type of the metric result
4474
+ * @param {MetricInput} a - The source string or array of strings
4475
+ * @param {MetricInput} b - The target string or array of strings
4476
+ * @param {CmpStrOptions} [opt] - Optional options
4477
+ * @returns {T} - The pairwise metric results
4478
+ */
4479
+ pairs(a, b, opt) {
4480
+ return this.compute(a, b, opt, 'pairwise');
4481
+ }
4482
+ /**
4483
+ * Performs a batch comparison and returns only results above the threshold.
4484
+ *
4485
+ * @template T - The type of the metric result
4486
+ * @param {MetricInput} a - The source string or array of strings
4487
+ * @param {MetricInput} b - The target string or array of strings
4488
+ * @param {number} threshold - The similarity threshold (0..1)
4489
+ * @param {CmpStrOptions} [opt] - Optional options
4490
+ * @returns {T} - The filtered batch results
4491
+ */
4492
+ match(a, b, threshold, opt) {
4493
+ return this.output(this.compute(a, b, opt, 'batch', true)
4494
+ .filter(r => r.res >= threshold).sort((a, b) => b.res - a.res), opt?.raw ?? this.options.raw);
4495
+ }
4496
+ /**
4497
+ * Returns the n closest matches from a batch comparison.
4498
+ *
4499
+ * @template T - The type of the metric result
4500
+ * @param {MetricInput} a - The source string or array of strings
4501
+ * @param {MetricInput} b - The target string or array of strings
4502
+ * @param {number} [n=1] - Number of closest matches
4503
+ * @param {CmpStrOptions} [opt] - Optional options
4504
+ * @returns {T} - The closest matches
4505
+ */
4506
+ closest(a, b, n = 1, opt) {
4507
+ return this.batchSorted(a, b, 'desc', opt).slice(0, n);
4508
+ }
4509
+ /**
4510
+ * Returns the n furthest matches from a batch comparison.
4511
+ *
4512
+ * @template T - The type of the metric result
4513
+ * @param {MetricInput} a - The source string or array of strings
4514
+ * @param {MetricInput} b - The target string or array of strings
4515
+ * @param {number} [n=1] - Number of furthest matches
4516
+ * @param {CmpStrOptions} [opt] - Optional options
4517
+ * @returns {T} - The furthest matches
4518
+ */
4519
+ furthest(a, b, n = 1, opt) {
4520
+ return this.batchSorted(a, b, 'asc', opt).slice(0, n);
4521
+ }
4522
+ /**
4523
+ * Performs a normalized and filtered substring search.
4524
+ *
4525
+ * @param {string} needle - The search string
4526
+ * @param {string[]} haystack - The array to search in
4527
+ * @param {NormalizeFlags} [flags] - Normalization flags
4528
+ * @param {CmpStrProcessors} [processors] - Pre-processors to apply
4529
+ * @returns {string[]} - Array of matching entries
4530
+ */
4531
+ search(needle, haystack, flags, processors) {
4532
+ const resolved = this.resolveOptions({ flags, processors });
4533
+ // Prepare the needle and haystack, normalizing and filtering them
4534
+ const test = this.prepare(needle, resolved);
4535
+ const hstk = this.prepare(haystack, resolved);
4536
+ // Filter the haystack based on the normalized test string
4537
+ return haystack.filter((_, i) => hstk[i].includes(test));
4538
+ }
4539
+ /**
4540
+ * Computes a similarity matrix for the given input array.
4541
+ *
4542
+ * @param {string[]} input - The input array
4543
+ * @param {CmpStrOptions} [opt] - Optional options
4544
+ * @returns {number[][]} - The similarity matrix
4545
+ */
4546
+ matrix(input, opt) {
4547
+ input = this.prepare(input, this.resolveOptions(opt));
4548
+ return input.map(a => this.compute(a, input, undefined, 'batch', true, true).map(b => b.res ?? 0));
4549
+ }
4550
+ /**
4551
+ * Computes the phonetic index for a string using the configured
4552
+ * or given algorithm.
4553
+ *
4554
+ * @param {string} [input] - The input string
4555
+ * @param {string} [algo] - The phonetic algorithm to use
4556
+ * @param {PhoneticOptions} [opt] - Optional phonetic options
4557
+ * @returns {string} - The phonetic index as a string
4558
+ */
4559
+ phoneticIndex(input, algo, opt) {
4560
+ const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
4561
+ return this.index(input, { algo: (algo ?? a), opt: opt ?? o });
4562
+ }
4563
+ }
4564
+
4565
+ /**
4566
+ * CmpStrAsync Asynchronous API
4567
+ * src/CmpStrAsync.ts
4568
+ *
4569
+ * The CmpStrAsync class provides a fully asynchronous, Promise-based interface for
4570
+ * advanced string comparison, similarity measurement, phonetic indexing, filtering
4571
+ * and normalization. It extends the CmpStr class and overrides all relevant methods
4572
+ * to support non-blocking, scalable, and I/O-friendly workloads.
4573
+ *
4574
+ * Features:
4575
+ * - Asynchronous normalization, filtering, and metric computation
4576
+ * - Async batch, pairwise, and single string comparison with detailed results
4577
+ * - Async phonetic indexing and phonetic-aware search and comparison
4578
+ * - Full compatibility with the synchronous CmpStr API
4579
+ * - Designed for large-scale, high-performance, and server-side applications
4580
+ *
4581
+ * @module CmpStrAsync
4582
+ * @author Paul Köhler (komed3)
4583
+ * @license MIT
4584
+ */
4585
+ /**
4586
+ * The CmpStrAsync class provides a fully asynchronous API for string comparison,
4587
+ * phonetic indexing, filtering and normalization.
4588
+ *
4589
+ * @template R - The type of the metric result, defaults to MetricRaw
4590
+ */
4591
+ class CmpStrAsync extends CmpStr {
4592
+ /**
4593
+ * --------------------------------------------------------------------------------
4594
+ * Instanciate the CmpStrAsync class
4595
+ * --------------------------------------------------------------------------------
4596
+ *
4597
+ * Methods to create a new CmpStrAsync instance with the given options.
4598
+ * Using the static `create` method is recommended to ensure proper instantiation.
4599
+ */
4600
+ /**
4601
+ * Creates a new CmpStrAsync instance with the given options.
4602
+ *
4603
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4604
+ * @returns {CmpStrAsync<R>} - A new CmpStrAsync instance
4605
+ */
4606
+ static create(opt) {
4607
+ return new CmpStrAsync(opt);
4608
+ }
4609
+ /**
4610
+ * Creates a new CmpStrAsync instance calliing the super constructor.
4611
+ *
4612
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
4613
+ */
4614
+ constructor(opt) { super(opt); }
4615
+ /**
4616
+ * ---------------------------------------------------------------------------------
4617
+ * Protected asynchronously utility methods for internal use
4618
+ * ---------------------------------------------------------------------------------
4619
+ *
4620
+ * These methods provide asynchronous normalization, filtering, and metric
4621
+ * computation capabilities, allowing for non-blocking operations.
4622
+ */
4623
+ /**
4624
+ * Asynchronously normalizes the input string or array using the configured or provided flags.
4625
+ *
4626
+ * @param {MetricInput} input - The input string or array
4627
+ * @param {NormalizeFlags} [flags] - Normalization flags
4628
+ * @returns {Promise<MetricInput>} - The normalized input
4629
+ */
4630
+ async normalizeAsync(input, flags) {
4631
+ return Normalizer.normalizeAsync(input, flags ?? this.options.flags ?? '');
4632
+ }
4633
+ /**
4634
+ * Asynchronously applies all active filters to the input string or array.
4635
+ *
4636
+ * @param {MetricInput} input - The input string or array
4637
+ * @param {string} [hook='input'] - The filter hook
4638
+ * @returns {Promise<MetricInput>} - The filtered string(s)
4639
+ */
4640
+ async filterAsync(input, hook) {
4641
+ return Filter.applyAsync(hook, input);
4642
+ }
4643
+ /**
4644
+ * Asynchronously prepares the input by normalizing and filtering.
4645
+ *
4646
+ * @param {MetricInput} [input] - The input string or array
4647
+ * @param {CmpStrOptions} [opt] - Optional options to use
4648
+ * @returns {Promise<MetricInput>} - The prepared input
4649
+ */
4650
+ async prepareAsync(input, opt) {
4651
+ const { flags, processors } = opt ?? this.options;
4652
+ // Normalize the input using flags (i.e., 'itw')
4653
+ if (flags?.length)
4654
+ input = await this.normalizeAsync(input, flags);
4655
+ // Filter the input using hooked up filters
4656
+ input = await this.filterAsync(input, 'input');
4657
+ // Apply phonetic processors if configured
4658
+ if (processors?.phonetic)
4659
+ input = await this.indexAsync(input, processors.phonetic);
4660
+ return input;
4661
+ }
4662
+ /**
4663
+ * Asynchronously computes the phonetic index for the given input using
4664
+ * the specified phonetic algorithm.
4665
+ *
4666
+ * @param {MetricInput} input - The input string or array
4667
+ * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
4668
+ * @returns {Promise<MetricInput>} - The phonetic index for the given input
4669
+ */
4670
+ async indexAsync(input, { algo, opt }) {
4671
+ this.assert('phonetic', algo);
4672
+ const phonetic = factory.phonetic(algo, opt);
4673
+ const delimiter = opt?.delimiter ?? ' ';
4674
+ return Array.isArray(input)
4675
+ ? Promise.all(input.map(s => phonetic.getIndexAsync(s).then(r => r.join(delimiter))))
4676
+ : phonetic.getIndexAsync(input).then(r => r.join(delimiter));
4677
+ }
4678
+ /**
4679
+ * Asynchronously computes the metric result for the given inputs, applying
4680
+ * normalization and filtering as configured.
4681
+ *
4682
+ * @template T - The type of the metric result
4683
+ * @param {MetricInput} a - The first input string or array
4684
+ * @param {MetricInput} b - The second input string or array
4685
+ * @param {CmpStrOptions} [opt] - Optional options to use
4686
+ * @param {MetricMode} [mode='single'] - The metric mode to use
4687
+ * @param {boolean} [raw=false] - Whether to return raw results
4688
+ * @param {boolean} [skip=false] - Whether to skip normalization and filtering
4689
+ * @returns {Promise<T>} - The computed metric result
4690
+ */
4691
+ async computeAsync(a, b, opt, mode, raw, skip) {
4692
+ const resolved = this.resolveOptions(opt);
4693
+ this.assert('metric', resolved.metric);
4694
+ // Prepare the input
4695
+ const A = skip ? a : await this.prepareAsync(a, resolved);
4696
+ const B = skip ? b : await this.prepareAsync(b, resolved);
4697
+ // Get the metric class
4698
+ const metric = factory.metric(resolved.metric, A, B, resolved.opt);
4699
+ // Pass the original inputs to the metric
4700
+ if (resolved.output !== 'prep')
4701
+ metric.setOriginal(a, b);
4702
+ // Compute the metric result
4703
+ await metric.runAsync(mode);
4704
+ // Post-process the results and concat the original inputs
4705
+ const result = this.postProcess(metric.getResults(), resolved);
4706
+ // Resolve and return the result based on the raw flag
4707
+ return this.output(result, raw ?? resolved.raw);
4708
+ }
4709
+ /**
4710
+ * ---------------------------------------------------------------------------------
4711
+ * Public asynchronously core methods for string comparison
4712
+ * ---------------------------------------------------------------------------------
4713
+ *
4714
+ * These methods provide the asynchronous core functionality for string comparison,
4715
+ * phonetic indexing and text search, allowing for non-blocking operations.
4716
+ */
4717
+ /**
4718
+ * Asynchronously performs a single metric comparison.
4719
+ *
4720
+ * @template T - The type of the metric result
4721
+ * @param {string} a - The source string
4722
+ * @param {string} b - The target string
4723
+ * @param {CmpStrOptions} [opt] - Optional options
4724
+ * @returns {Promise<T>} - The metric result
4725
+ */
4726
+ async testAsync(a, b, opt) {
4727
+ return this.computeAsync(a, b, opt, 'single');
4728
+ }
4729
+ /**
4730
+ * Asynchronously performs a single metric comparison returning the numeric score.
4731
+ *
4732
+ * @param {string} a - The source string
4733
+ * @param {string} b - The target string
4734
+ * @param {CmpStrOptions} [opt] - Optional options
4735
+ * @returns {Promise<number>} - The similarity score (0..1)
4736
+ */
4737
+ async compareAsync(a, b, opt) {
4738
+ return (await this.computeAsync(a, b, opt, 'single', true)).res;
4739
+ }
4740
+ /**
4741
+ * Asynchronously performs a batch metric comparison between source and target
4742
+ * strings or array of strings.
4743
+ *
4744
+ * @template T - The type of the metric result
4745
+ * @param {MetricInput} a - The source string or array of strings
4746
+ * @param {MetricInput} b - The target string or array of strings
4747
+ * @param {CmpStrOptions} [opt] - Optional options
4748
+ * @returns {Promise<T>} - The batch metric results
4749
+ */
4750
+ async batchTestAsync(a, b, opt) {
4751
+ return this.computeAsync(a, b, opt, 'batch');
4752
+ }
4753
+ /**
4754
+ * Asynchronously performs a batch metric comparison and returns results sorted by score.
4755
+ *
4756
+ * @template T - The type of the metric result
4757
+ * @param {MetricInput} a - The source string or array of strings
4758
+ * @param {MetricInput} b - The target string or array of strings
4759
+ * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
4760
+ * @param {CmpStrOptions} [opt] - Optional options
4761
+ * @returns {Promise<T>} - The sorted batch results
4762
+ */
4763
+ async batchSortedAsync(a, b, dir = 'desc', opt) {
4764
+ const res = await this.computeAsync(a, b, opt, 'batch', true);
4765
+ return this.output(res.sort((a, b) => dir === 'asc' ? a.res - b.res : b.res - a.res), opt?.raw ?? this.options.raw);
4766
+ }
4767
+ /**
4768
+ * Asynchronously performs a pairwise metric comparison between source and target
4769
+ * strings or array of strings.
4770
+ *
4771
+ * @template T - The type of the metric result
4772
+ * Input arrays needs of the same length to perform pairwise comparison,
4773
+ * otherwise the method will throw an error.
4774
+ *
4775
+ * @param {MetricInput} a - The source string or array of strings
4776
+ * @param {MetricInput} b - The target string or array of strings
4777
+ * @param {CmpStrOptions} [opt] - Optional options
4778
+ * @returns {Promise<T>} - The pairwise metric results
4779
+ */
4780
+ async pairsAsync(a, b, opt) {
4781
+ return this.computeAsync(a, b, opt, 'pairwise');
4782
+ }
4783
+ /**
4784
+ * Asynchronously performs a batch comparison and returns only results above the threshold.
4785
+ *
4786
+ * @template T - The type of the metric result
4787
+ * @param {MetricInput} a - The source string or array of strings
4788
+ * @param {MetricInput} b - The target string or array of strings
4789
+ * @param {number} threshold - The similarity threshold (0..1)
4790
+ * @param {CmpStrOptions} [opt] - Optional options
4791
+ * @returns {Promise<T>} - The filtered batch results
4792
+ */
4793
+ async matchAsync(a, b, threshold, opt) {
4794
+ const res = await this.computeAsync(a, b, opt, 'batch', true);
4795
+ return this.output(res.filter(r => r.res >= threshold).sort((a, b) => b.res - a.res), opt?.raw ?? this.options.raw);
4796
+ }
4797
+ /**
4798
+ * Asynchronously returns the n closest matches from a batch comparison.
4799
+ *
4800
+ * @template T - The type of the metric result
4801
+ * @param {MetricInput} a - The source string or array of strings
4802
+ * @param {MetricInput} b - The target string or array of strings
4803
+ * @param {number} [n=1] - Number of closest matches
4804
+ * @param {CmpStrOptions} [opt] - Optional options
4805
+ * @returns {Promise<T>} - The closest matches
4806
+ */
4807
+ async closestAsync(a, b, n = 1, opt) {
4808
+ return (await this.batchSortedAsync(a, b, 'desc', opt)).slice(0, n);
4809
+ }
4810
+ /**
4811
+ * Asynchronously returns the n furthest matches from a batch comparison.
4812
+ *
4813
+ * @template T - The type of the metric result
4814
+ * @param {MetricInput} a - The source string or array of strings
4815
+ * @param {MetricInput} b - The target string or array of strings
4816
+ * @param {number} [n=1] - Number of furthest matches
4817
+ * @param {CmpStrOptions} [opt] - Optional options
4818
+ * @returns {Promise<T>} - The furthest matches
4819
+ */
4820
+ async furthestAsync(a, b, n = 1, opt) {
4821
+ return (await this.batchSortedAsync(a, b, 'asc', opt)).slice(0, n);
4822
+ }
4823
+ /**
4824
+ * Asynchronously performs a normalized and filtered substring search.
4825
+ *
4826
+ * @param {string} needle - The search string
4827
+ * @param {string[]} haystack - The array to search in
4828
+ * @param {NormalizeFlags} [flags] - Normalization flags
4829
+ * @param {CmpStrProcessors} [processors] - Pre-processors to apply
4830
+ * @returns {Promise<string[]>} - Array of matching entries
4831
+ */
4832
+ async searchAsync(needle, haystack, flags, processors) {
4833
+ const resolved = this.resolveOptions({ flags, processors });
4834
+ // Prepare the needle and haystack, normalizing and filtering them
4835
+ const test = await this.prepareAsync(needle, resolved);
4836
+ const hstk = await this.prepareAsync(haystack, resolved);
4837
+ // Filter the haystack based on the normalized test string
4838
+ return haystack.filter((_, i) => hstk[i].includes(test));
4839
+ }
4840
+ /**
4841
+ * Asynchronously computes a similarity matrix for the given input array.
4842
+ *
4843
+ * @param {string[]} input - The input array
4844
+ * @param {CmpStrOptions} [opt] - Optional options
4845
+ * @returns {Promise<number[][]>} - The similarity matrix
4846
+ */
4847
+ async matrixAsync(input, opt) {
4848
+ input = await this.prepareAsync(input, this.resolveOptions(opt));
4849
+ return Promise.all(input.map(async (a) => (await this.computeAsync(a, input, undefined, 'batch', true, true).then(r => r.map(b => b.res ?? 0)))));
4850
+ }
4851
+ /**
4852
+ * Asynchronously computes the phonetic index for a string using the
4853
+ * configured or given algorithm.
4854
+ *
4855
+ * @param {string} [input] - The input string
4856
+ * @param {string} [algo] - The phonetic algorithm to use
4857
+ * @param {PhoneticOptions} [opt] - Optional phonetic options
4858
+ * @returns {Promise<string>} - The phonetic index as a string
4859
+ */
4860
+ async phoneticIndexAsync(input, algo, opt) {
4861
+ const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
4862
+ return this.indexAsync(input, {
4863
+ algo: (algo ?? a), opt: opt ?? o
4864
+ });
4865
+ }
4866
+ }
4867
+
4868
+ exports.CmpStr = CmpStr;
4869
+ exports.CmpStrAsync = CmpStrAsync;
4870
+ exports.DiffChecker = DiffChecker;
4871
+ exports.Normalizer = Normalizer;
4872
+ exports.TextAnalyzer = TextAnalyzer;
4873
+
4874
+ }));
4875
+ //# sourceMappingURL=CmpStr.umd.js.map