cmpstr 2.0.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -503
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -917
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -197
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
package/src/CmpStr.js DELETED
@@ -1,917 +0,0 @@
1
- /**
2
- * class CmpStr
3
- *
4
- * The CmpStr class is the core of the cmpstr package. It provides methods to calculate
5
- * string similarity, find the closest matches in arrays, and generate similarity
6
- * matrices. The class supports built-in algorithms (e.g., Levenshtein, Dice-Sørensen)
7
- * and allows users to add custom algorithms. It also includes features like string
8
- * normalization, caching, and extensibility.
9
- *
10
- * @author komed3 (Paul Köhler)
11
- * @license MIT
12
- */
13
-
14
- 'use strict';
15
-
16
- /**
17
- * module exports
18
- * @public
19
- */
20
-
21
- module.exports = class CmpStr {
22
-
23
- /**
24
- * --------------------------------------------------
25
- * Global Variables
26
- * --------------------------------------------------
27
- */
28
-
29
- /**
30
- * all pre-defined similarity algorithms
31
- *
32
- * @private
33
- * @type {Object}
34
- */
35
- #algorithms = {
36
- cosine: './algorithms/cosine',
37
- damerau: './algorithms/damerau',
38
- dice: './algorithms/dice',
39
- hamming: './algorithms/hamming',
40
- jaccard: './algorithms/jaccard',
41
- jaro: './algorithms/jaroWinkler',
42
- lcs: './algorithms/lcs',
43
- levenshtein: './algorithms/levenshtein',
44
- needlemanWunsch: './algorithms/needlemanWunsch',
45
- qGram: './algorithms/qGram',
46
- smithWaterman: './algorithms/smithWaterman',
47
- soundex: './algorithms/soundex'
48
- };
49
-
50
- /**
51
- * stores the names of loaded algorithms
52
- *
53
- * @since 2.0.2
54
- * @private
55
- * @type {Set<String>}
56
- */
57
- #loadedAlgo = new Set ();
58
-
59
- /**
60
- * normalized strings cache
61
- *
62
- * @private
63
- * @type {Map<String, String>}
64
- */
65
- #cache = new Map ();
66
-
67
- /**
68
- * added filters for string normalization
69
- *
70
- * @private
71
- * @type {Map<String, Object[]>}
72
- */
73
- #filter = new Map ();
74
-
75
- /**
76
- * default normalization flags
77
- * set by setFlags()
78
- *
79
- * @private
80
- * @type {String}
81
- */
82
- #flags = '';
83
-
84
- /**
85
- * current algorithm to use for similarity calculations
86
- * set by setAlgo(), addAlgo() or constructor()
87
- *
88
- * @private
89
- * @type {String}
90
- */
91
- #algo;
92
-
93
- /**
94
- * base string for comparison
95
- * set by setStr or constructor()
96
- *
97
- * @private
98
- * @type {String}
99
- */
100
- #str;
101
-
102
- /**
103
- * stores the current ready state
104
- *
105
- * @since 2.0.2
106
- * @private
107
- * @type {Boolean}
108
- */
109
- #readyState = false;
110
-
111
- /**
112
- * --------------------------------------------------
113
- * Constructor
114
- * --------------------------------------------------
115
- */
116
-
117
- /**
118
- * initializes a CmpStr instance
119
- * algorithm and base string can be set by initialization
120
- *
121
- * @param {String} algo name of the algorithm to use for calculation
122
- * @param {String} str string to set as the base
123
- */
124
- constructor ( algo = undefined, str = undefined ) {
125
-
126
- if ( algo !== undefined ) {
127
-
128
- this.setAlgo( algo );
129
-
130
- }
131
-
132
- if ( str !== undefined ) {
133
-
134
- this.setStr( str );
135
-
136
- }
137
-
138
- };
139
-
140
- /**
141
- * --------------------------------------------------
142
- * Ready State
143
- * --------------------------------------------------
144
- */
145
-
146
- /**
147
- * checks whether string and algorithm are set correctly
148
- *
149
- * @returns {Boolean} true if ready, false otherwise
150
- */
151
- isReady () {
152
-
153
- return this.#readyState;
154
-
155
- };
156
-
157
- /**
158
- * updates the readiness state
159
- *
160
- * @since 2.0.2
161
- * @private
162
- */
163
- #updateReadyState () {
164
-
165
- this.#readyState = (
166
- typeof this.#algo === 'string' &&
167
- this.isAlgo( this.#algo ) &&
168
- typeof this.#str === 'string' &&
169
- this.#str.length !== 0
170
- );
171
-
172
- };
173
-
174
- /**
175
- * checks ready state and throws an error if not
176
- *
177
- * @private
178
- * @returns {Boolean} true if ready
179
- * @throws {Error} if CmpStr is not ready
180
- */
181
- #checkReady () {
182
-
183
- if ( !this.#readyState ) {
184
-
185
- throw new Error(
186
- `CmpStr instance is not ready. Ensure the algorithm and base string are set.`
187
- );
188
-
189
- }
190
-
191
- return true;
192
-
193
- };
194
-
195
- /**
196
- * --------------------------------------------------
197
- * Base String
198
- * --------------------------------------------------
199
- */
200
-
201
- /**
202
- * sets the base string for comparison
203
- *
204
- * @param {String} str string to set as the base
205
- * @returns {Boolean} always returns true
206
- */
207
- setStr ( str ) {
208
-
209
- this.#str = String ( str );
210
-
211
- this.#updateReadyState();
212
-
213
- return true;
214
-
215
- };
216
-
217
- /**
218
- * gets the base string for comparison
219
- *
220
- * @since 2.0.2
221
- * @returns {String} base string
222
- */
223
- getStr () {
224
-
225
- return this.#str;
226
-
227
- };
228
-
229
- /**
230
- * --------------------------------------------------
231
- * Algorithms
232
- * --------------------------------------------------
233
- */
234
-
235
- /**
236
- * list all registered or loaded similarity algorithms
237
- *
238
- * @param {Boolean} [loadedOnly=false] it true, only loaded algorithm names are returned
239
- * @returns {String[]} array of algorithm names
240
- */
241
- listAlgo ( loadedOnly = false ) {
242
-
243
- return loadedOnly
244
- ? [ ...this.#loadedAlgo ]
245
- : [ ...Object.keys( this.#algorithms ) ];
246
-
247
- };
248
-
249
- /**
250
- * checks if an algorithm is registered
251
- *
252
- * @param {String} algo name of the algorithm
253
- * @returns {Boolean} true if the algorithm is registered, false otherwise
254
- */
255
- isAlgo ( algo ) {
256
-
257
- return algo in this.#algorithms;
258
-
259
- };
260
-
261
- /**
262
- * sets the current algorithm to use for similarity calculations
263
- *
264
- * @param {String} algo name of the algorithm
265
- * @returns {Boolean} true if the algorithm has been set
266
- */
267
- setAlgo ( algo ) {
268
-
269
- if ( this.#loadAlgo( algo ) ) {
270
-
271
- this.#algo = algo;
272
-
273
- this.#updateReadyState();
274
-
275
- return true;
276
-
277
- }
278
-
279
- };
280
-
281
- /**
282
- * gets the current algorithm to use for similarity calculations
283
- *
284
- * @since 2.0.2
285
- * @returns {String} name of the algorithm
286
- */
287
- getAlgo () {
288
-
289
- return this.#algo;
290
-
291
- };
292
-
293
- /**
294
- * adds a new similarity algorithm
295
- *
296
- * @param {String} algo name of the algorithm
297
- * @param {Function} callback function implementing the algorithm (must accept two strings and return a number)
298
- * @param {Boolean} [useIt=true] whether to set this algorithm as the current one
299
- * @returns {Boolean} returns true if the algorithms was added successfully
300
- * @throws {Error} if the algorithm cannot be added
301
- */
302
- addAlgo ( algo, callback, useIt = true ) {
303
-
304
- if (
305
- !this.isAlgo( algo ) &&
306
- typeof callback === 'function' &&
307
- callback.length >= 2 &&
308
- typeof callback.apply( null, [ 'abc', 'abc' ] ) === 'number'
309
- ) {
310
-
311
- this.#algorithms[ algo ] = callback;
312
-
313
- if ( useIt ) {
314
-
315
- this.setAlgo( algo );
316
-
317
- }
318
-
319
- return true;
320
-
321
- } else {
322
-
323
- throw new Error (
324
- `Algorithm "${algo}" cannot be added.`
325
- );
326
-
327
- }
328
-
329
- };
330
-
331
- /**
332
- * removes a registered similarity algorithm
333
- *
334
- * @param {String} algo name of the algorithm
335
- * @returns {Boolean} true if the algorithm was removed successfully
336
- * @throws {Error} if the algorithm is not defined
337
- */
338
- rmvAlgo ( algo ) {
339
-
340
- if ( this.isAlgo( algo ) ) {
341
-
342
- delete this.#algorithms[ algo ];
343
-
344
- this.#loadedAlgo.delete( algo );
345
-
346
- if ( this.#algo === algo ) {
347
-
348
- /* reset current algorithm if it was removed */
349
-
350
- this.#algo = undefined;
351
-
352
- this.#updateReadyState();
353
-
354
- }
355
-
356
- return true;
357
-
358
- } else {
359
-
360
- throw new Error (
361
- `Algorithm "${algo}" is not defined.`
362
- );
363
-
364
- }
365
-
366
- };
367
-
368
- /**
369
- * lazy-loads the specified algorithm module
370
- *
371
- * @private
372
- * @param {String} algo name of the similarity algorithm
373
- * @returns {Boolean} true if the algorithm is loaded
374
- * @throws {Error} if the algorithm cannot be loaded or is not defined
375
- */
376
- #loadAlgo ( algo ) {
377
-
378
- if ( this.#loadedAlgo.has( algo ) ) {
379
-
380
- return true;
381
-
382
- } else if ( this.isAlgo( algo ) ) {
383
-
384
- let typeOf = typeof this.#algorithms[ algo ];
385
-
386
- if ( typeOf === 'function' ) {
387
-
388
- this.#loadedAlgo.add( algo );
389
-
390
- return true;
391
-
392
- } else if ( typeOf === 'string' ) {
393
-
394
- try {
395
-
396
- /* lazy-load algorithm module */
397
-
398
- this.#algorithms[ algo ] = require(
399
- this.#algorithms[ algo ]
400
- );
401
-
402
- this.#loadedAlgo.add( algo );
403
-
404
- return true;
405
-
406
- } catch ( err ) {
407
-
408
- throw new Error (
409
- `Failed to load algorithm "${algo}".`,
410
- { cause: err }
411
- );
412
-
413
- }
414
-
415
- } else {
416
-
417
- throw new Error (
418
- `Algorithm "${algo}" cannot be loaded.`
419
- );
420
-
421
- }
422
-
423
- } else {
424
-
425
- throw new Error (
426
- `Algorithm "${algo}" is not defined.`
427
- );
428
-
429
- }
430
-
431
- };
432
-
433
- /**
434
- * --------------------------------------------------
435
- * Custom Filters
436
- * --------------------------------------------------
437
- */
438
-
439
- /**
440
- * list all added or artice filter names
441
- *
442
- * @param {Boolean} [activeOnly=false] if true, only names of active filters are returned
443
- * @returns {String[]} array of filter names
444
- */
445
- listFilter ( activeOnly = false ) {
446
-
447
- return activeOnly
448
- ? Array.from( this.#filter.entries() )
449
- .filter( ( [ _, filter ] ) => filter.active )
450
- .map( ( [ name ] ) => name )
451
- : [ ...this.#filter.keys() ];
452
-
453
- };
454
-
455
- /**
456
- * adds a custom normalization filter
457
- *
458
- * @param {String} name filter name
459
- * @param {Function} callback function implementing the filter (must accept a string and returns a normalized one)
460
- * @param {Int} [priority=10] priority of the filter (lower numbers are processed first)
461
- * @returns {Boolean} returns true if the filter was added successfully
462
- * @throws {Error} if the filter cannot be added
463
- */
464
- addFilter ( name, callback, priority = 10 ) {
465
-
466
- if (
467
- !this.#filter.has( name ) &&
468
- typeof callback === 'function' &&
469
- callback.length == 1 &&
470
- typeof callback.apply( null, [ 'abc' ] ) === 'string'
471
- ) {
472
-
473
- this.#filter.set( name, {
474
- callback, priority,
475
- active: true
476
- } );
477
-
478
- this.clearCache();
479
-
480
- return true;
481
-
482
- } else {
483
-
484
- throw new Error (
485
- `Filter "${filter}" cannot be added.`
486
- );
487
-
488
- }
489
-
490
- };
491
-
492
- /**
493
- * removes a custom normalization filter
494
- *
495
- * @param {String} name filter name
496
- * @returns {Boolean} true if the filter was removed successfully
497
- * @throws {Error} if the filter does not exists
498
- */
499
- rmvFilter ( name ) {
500
-
501
- if ( this.#filter.delete( name ) ) {
502
-
503
- this.clearCache();
504
-
505
- return true;
506
-
507
- } else {
508
-
509
- throw new Error (
510
- `Filter "${filter}" does not exists.`
511
- );
512
-
513
- }
514
-
515
- };
516
-
517
- /**
518
- * pauses a custom normalization filter
519
- *
520
- * @param {String} name filter name
521
- * @returns {Boolean} true if the filter was paused successfully
522
- * @throws {Error} if the filter does not exists
523
- */
524
- pauseFilter ( name ) {
525
-
526
- if ( this.#filter.has( name ) ) {
527
-
528
- this.#filter.get( name ).active = false;
529
-
530
- this.clearCache();
531
-
532
- return true;
533
-
534
- } else {
535
-
536
- throw new Error (
537
- `Filter "${filter}" does not exists.`
538
- );
539
-
540
- }
541
-
542
- };
543
-
544
- /**
545
- * resumes a custom normalization filter
546
- *
547
- * @param {String} name filter name
548
- * @returns {Boolean} true if the filter was resumed successfully
549
- * @throws {Error} if the filter does not exists
550
- */
551
- resumeFilter ( name ) {
552
-
553
- if ( this.#filter.has( name ) ) {
554
-
555
- this.#filter.get( name ).active = true;
556
-
557
- this.clearCache();
558
-
559
- return true;
560
-
561
- } else {
562
-
563
- throw new Error (
564
- `Filter "${filter}" does not exists.`
565
- );
566
-
567
- }
568
-
569
- };
570
-
571
- /**
572
- * clears normalization filters (remove all of them)
573
- *
574
- * @returns {Boolean} always returns true
575
- */
576
- clearFilter () {
577
-
578
- this.#filter.clear();
579
-
580
- this.clearCache();
581
-
582
- return true;
583
-
584
- };
585
-
586
- /**
587
- * applies all active filters to a string
588
- *
589
- * @private
590
- * @param {String} str string to process
591
- * @returns {String} filtered string
592
- * @throws {Error} if applying filters cause an error
593
- */
594
- #applyFilters ( str ) {
595
-
596
- try {
597
-
598
- return Array.from( this.#filter.values() ).flat().filter(
599
- ( filter ) => filter.active
600
- ).sort(
601
- ( a, b ) => a.priority - b.priority
602
- ).reduce(
603
- ( res, filter ) => filter.callback.apply( null, [ res ] ),
604
- String ( str )
605
- );
606
-
607
- } catch ( err ) {
608
-
609
- throw new Error (
610
- `Error while applying filters.`,
611
- { cause: err }
612
- );
613
-
614
- }
615
-
616
- };
617
-
618
- /**
619
- * --------------------------------------------------
620
- * Normalization
621
- * --------------------------------------------------
622
- */
623
-
624
- /**
625
- * set default normalization flags
626
- *
627
- * @param {String} [flags=''] normalization flags
628
- * @returns {Boolean} always returns true
629
- */
630
- setFlags ( flags = '' ) {
631
-
632
- this.#flags = String ( flags );
633
-
634
- };
635
-
636
- /**
637
- * get default normalization flags
638
- *
639
- * @since 2.0.2
640
- * @returns {String} normalization flags
641
- */
642
- getFlags () {
643
-
644
- return this.#flags;
645
-
646
- };
647
-
648
- /**
649
- * normalizes a string by chainable options; uses cache to increase
650
- * performance and custom filters for advanced behavior
651
- *
652
- * list of all supported flags:
653
- *
654
- * s :: remove special chars
655
- * w :: collapse whitespaces
656
- * r :: remove repeated chars
657
- * k :: keep only letters
658
- * n :: ignore numbers
659
- * t :: trim whitespaces
660
- * i :: case insensitivity
661
- * d :: decompose unicode
662
- * u :: normalize unicode
663
- *
664
- * @param {String|String[]} string string(s) to normalize
665
- * @param {String} [flags=''] normalization flags
666
- * @returns {String|String[]} normalized string(s)
667
- * @throws {Error} if normalization cause an error
668
- */
669
- normalize ( input, flags = '' ) {
670
-
671
- const processStr = ( str ) => {
672
-
673
- let res = String ( str );
674
-
675
- /* use normalized string from cache to increase performance */
676
-
677
- let key = `${res}::${flags}`;
678
-
679
- if ( this.#cache.has( key ) ) {
680
-
681
- return this.#cache.get( key );
682
-
683
- }
684
-
685
- /* apply custom filters */
686
-
687
- res = this.#applyFilters( res );
688
-
689
- /* normalize using flags */
690
-
691
- try {
692
-
693
- if ( flags.includes( 's' ) ) res = res.replace( /[^a-z0-9]/gi, '' );
694
- if ( flags.includes( 'w' ) ) res = res.replace( /\s+/g, ' ' );
695
- if ( flags.includes( 'r' ) ) res = res.replace( /(.)\1+/g, '$1' );
696
- if ( flags.includes( 'k' ) ) res = res.replace( /[^a-z]/gi, '' );
697
- if ( flags.includes( 'n' ) ) res = res.replace( /[0-9]/g, '' );
698
- if ( flags.includes( 't' ) ) res = res.trim();
699
- if ( flags.includes( 'i' ) ) res = res.toLowerCase();
700
- if ( flags.includes( 'd' ) ) res = res.normalize( 'NFD' ).replace( /[\u0300-\u036f]/g, '' );
701
- if ( flags.includes( 'u' ) ) res = res.normalize( 'NFC' );
702
-
703
- } catch ( err ) {
704
-
705
- throw new Error (
706
- `Error while normalization.`,
707
- { cause: err }
708
- );
709
-
710
- }
711
-
712
- /* store the normalized string in the cache */
713
-
714
- this.#cache.set( key, res );
715
-
716
- return res;
717
-
718
- }
719
-
720
- /* processing multiple string */
721
-
722
- if ( Array.isArray( input ) ) {
723
-
724
- return input.map(
725
- ( str ) => processStr( str )
726
- );
727
-
728
- }
729
-
730
- return processStr( input );
731
-
732
- };
733
-
734
- /**
735
- * clears the normalization cache
736
- *
737
- * @returns {Boolean} always returns true
738
- */
739
- clearCache () {
740
-
741
- this.#cache.clear();
742
-
743
- return true;
744
-
745
- };
746
-
747
- /**
748
- * --------------------------------------------------
749
- * Similarity Comparison
750
- * --------------------------------------------------
751
- */
752
-
753
- /**
754
- * compares two string a and b using the passed algorithm
755
- *
756
- * @param {String} algo name of the algorithm
757
- * @param {String} a string a
758
- * @param {String} b string b
759
- * @param {Object} [config={}] config (flags, args)
760
- * @returns {Mixed} similarity score (0..1) or raw output
761
- * @throws {Error} if algorithm cause an error
762
- */
763
- compare ( algo, a, b, config = {} ) {
764
-
765
- if ( this.#loadAlgo( algo ) ) {
766
-
767
- /* handle trivial cases */
768
-
769
- if ( a === b ) return 1; // strings are identical
770
- if ( a.length < 2 || b.length < 2 ) return 0; // too short to compare
771
-
772
- /* apply similarity algorithm */
773
-
774
- const {
775
- flags = this.#flags,
776
- options = {}
777
- } = config;
778
-
779
- try {
780
-
781
- return this.#algorithms[ algo ].apply( null, [
782
- this.normalize( a, flags ),
783
- this.normalize( b, flags ),
784
- options
785
- ] );
786
-
787
- } catch ( err ) {
788
-
789
- throw new Error (
790
- `Error in algorithm "${algo}".`,
791
- { cause: err }
792
- );
793
-
794
- }
795
-
796
- }
797
-
798
- };
799
-
800
- /**
801
- * tests the similarity between the base string and a target string
802
- * using the current algorithm
803
- *
804
- * @param {String} str target string
805
- * @param {Object} [config={}] config (flags, args)
806
- * @returns {Mixed} similarity score (0..1) or raw output
807
- */
808
- test ( str, config = {} ) {
809
-
810
- if ( this.#checkReady() ) {
811
-
812
- return this.compare(
813
- this.#algo,
814
- this.#str, str,
815
- config
816
- );
817
-
818
- }
819
-
820
- };
821
-
822
- /**
823
- * tests the similarity of multiple strings against the base string
824
- *
825
- * @param {String[]} arr array of strings
826
- * @param {Object} [config={}] config (flags, args)
827
- * @returns {Object[]} array of objects, each containing the target string and its similarity score / raw output
828
- */
829
- batchTest ( arr, config = {} ) {
830
-
831
- if ( this.#checkReady() ) {
832
-
833
- return [ ...arr ].map( ( str ) => ( {
834
- target: str,
835
- match: this.compare(
836
- this.#algo,
837
- this.#str, str,
838
- config
839
- )
840
- } ) );
841
-
842
- }
843
-
844
- };
845
-
846
- /**
847
- * finds strings in an array that exceed a similarity threshold
848
- * returns the array sorted by highest similarity
849
- *
850
- * @param {String[]} arr array of strings
851
- * @param {Object} [config={}] config (flags, threshold, args)
852
- * @returns {Object[]} array of objects, sorted by highest similarity
853
- */
854
- match ( arr, config = {} ) {
855
-
856
- const { threshold = 0 } = config;
857
-
858
- delete config?.options?.raw;
859
-
860
- return this.batchTest(
861
- arr, config
862
- ).filter(
863
- ( r ) => r.match >= threshold
864
- ).sort(
865
- ( a, b ) => b.match - a.match
866
- );
867
-
868
- };
869
-
870
- /**
871
- * finds the closest matching string from an array
872
- *
873
- * @param {String[]} arr array of strings
874
- * @param {Object} [config={}] config (flags, args)
875
- * @returns {String} closest matching string
876
- */
877
- closest ( arr, config = {} ) {
878
-
879
- let res = this.match(
880
- arr, config
881
- );
882
-
883
- return res.length && res[ 0 ].match > 0
884
- ? res[ 0 ].target
885
- : undefined;
886
-
887
- };
888
-
889
- /**
890
- * generate a similarity matrix for an array of strings
891
- *
892
- * @param {String} algo name of the algorithm
893
- * @param {String[]} arr array of strings to cross-compare
894
- * @param {Object} [config={}] config (flags, args)
895
- * @returns {Number[][]} 2D array representing the similarity matrix
896
- */
897
- similarityMatrix ( algo, arr, config = {} ) {
898
-
899
- if ( this.#loadAlgo( algo ) ) {
900
-
901
- delete config?.options?.raw;
902
-
903
- return [ ...arr ].map( ( a, i ) => {
904
-
905
- return [ ...arr ].map(
906
- ( b, j ) => i === j ? 1 : this.compare(
907
- algo, a, b, config
908
- )
909
- );
910
-
911
- } );
912
-
913
- }
914
-
915
- };
916
-
917
- };