cmpstr 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -499
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -912
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -204
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
package/src/CmpStr.js DELETED
@@ -1,912 +0,0 @@
1
- /**
2
- * class CmpStr
3
- *
4
- * The CmpStr class is the core of the cmpstr package. It provides methods to calculate
5
- * string similarity, find the closest matches in arrays, and generate similarity
6
- * matrices. The class supports built-in algorithms (e.g., Levenshtein, Dice-Sørensen)
7
- * and allows users to add custom algorithms. It also includes features like string
8
- * normalization, caching, and extensibility.
9
- *
10
- * @author komed3 (Paul Köhler)
11
- * @license MIT
12
- */
13
-
14
- 'use strict';
15
-
16
- /**
17
- * module exports
18
- * @public
19
- */
20
-
21
- module.exports = class CmpStr {
22
-
23
- /**
24
- * --------------------------------------------------
25
- * Global Variables
26
- * --------------------------------------------------
27
- */
28
-
29
- /**
30
- * all pre-defined similarity algorithms
31
- *
32
- * @private
33
- * @type {Object}
34
- */
35
- #algorithms = {
36
- cosine: './algorithms/cosine',
37
- damerau: './algorithms/damerau',
38
- dice: './algorithms/dice',
39
- hamming: './algorithms/hamming',
40
- jaccard: './algorithms/jaccard',
41
- jaro: './algorithms/jaroWinkler',
42
- lcs: './algorithms/lcs',
43
- levenshtein: './algorithms/levenshtein',
44
- needlemanWunsch: './algorithms/needlemanWunsch',
45
- qGram: './algorithms/qGram',
46
- smithWaterman: './algorithms/smithWaterman',
47
- soundex: './algorithms/soundex'
48
- };
49
-
50
- /**
51
- * stores the names of loaded algorithms
52
- *
53
- * @since 2.0.2
54
- * @private
55
- * @type {Set<String>}
56
- */
57
- #loadedAlgo = new Set ();
58
-
59
- /**
60
- * normalized strings cache
61
- *
62
- * @private
63
- * @type {Map<String, String>}
64
- */
65
- #cache = new Map ();
66
-
67
- /**
68
- * added filters for string normalization
69
- *
70
- * @private
71
- * @type {Map<String, Object[]>}
72
- */
73
- #filter = new Map ();
74
-
75
- /**
76
- * default normalization flags
77
- * set by setFlags()
78
- *
79
- * @private
80
- * @type {String}
81
- */
82
- #flags = '';
83
-
84
- /**
85
- * current algorithm to use for similarity calculations
86
- * set by setAlgo(), addAlgo() or constructor()
87
- *
88
- * @private
89
- * @type {String}
90
- */
91
- #algo;
92
-
93
- /**
94
- * base string for comparison
95
- * set by setStr or constructor()
96
- *
97
- * @private
98
- * @type {String}
99
- */
100
- #str;
101
-
102
- /**
103
- * stores the current ready state
104
- *
105
- * @since 2.0.2
106
- * @private
107
- * @type {Boolean}
108
- */
109
- #readyState = false;
110
-
111
- /**
112
- * --------------------------------------------------
113
- * Constructor
114
- * --------------------------------------------------
115
- */
116
-
117
- /**
118
- * initializes a CmpStr instance
119
- * algorithm and base string can be set by initialization
120
- *
121
- * @param {String} algo name of the algorithm to use for calculation
122
- * @param {String} str string to set as the base
123
- */
124
- constructor ( algo = undefined, str = undefined ) {
125
-
126
- if ( algo !== undefined ) {
127
-
128
- this.setAlgo( algo );
129
-
130
- }
131
-
132
- if ( str !== undefined ) {
133
-
134
- this.setStr( str );
135
-
136
- }
137
-
138
- };
139
-
140
- /**
141
- * --------------------------------------------------
142
- * Ready State
143
- * --------------------------------------------------
144
- */
145
-
146
- /**
147
- * checks whether string and algorithm are set correctly
148
- *
149
- * @returns {Boolean} true if ready, false otherwise
150
- */
151
- isReady () {
152
-
153
- return this.#readyState;
154
-
155
- };
156
-
157
- /**
158
- * updates the readiness state
159
- *
160
- * @since 2.0.2
161
- * @private
162
- */
163
- #updateReadyState () {
164
-
165
- this.#readyState = (
166
- typeof this.#algo === 'string' &&
167
- this.isAlgo( this.#algo ) &&
168
- typeof this.#str === 'string' &&
169
- this.#str.length !== 0
170
- );
171
-
172
- };
173
-
174
- /**
175
- * checks ready state and throws an error if not
176
- *
177
- * @private
178
- * @returns {Boolean} true if ready
179
- * @throws {Error} if CmpStr is not ready
180
- */
181
- #checkReady () {
182
-
183
- if ( !this.#readyState ) {
184
-
185
- throw new Error(
186
- `CmpStr instance is not ready. Ensure the algorithm and base string are set.`
187
- );
188
-
189
- }
190
-
191
- return true;
192
-
193
- };
194
-
195
- /**
196
- * --------------------------------------------------
197
- * Base String
198
- * --------------------------------------------------
199
- */
200
-
201
- /**
202
- * sets the base string for comparison
203
- *
204
- * @param {String} str string to set as the base
205
- * @returns {Boolean} always returns true
206
- */
207
- setStr ( str ) {
208
-
209
- this.#str = String ( str );
210
-
211
- this.#updateReadyState();
212
-
213
- return true;
214
-
215
- };
216
-
217
- /**
218
- * gets the base string for comparison
219
- *
220
- * @since 2.0.2
221
- * @returns {String} base string
222
- */
223
- getStr () {
224
-
225
- return this.#str;
226
-
227
- };
228
-
229
- /**
230
- * --------------------------------------------------
231
- * Algorithms
232
- * --------------------------------------------------
233
- */
234
-
235
- /**
236
- * list all registered similarity algorithms
237
- *
238
- * @param {Boolean} [loadedOnly=false] it true, only loaded algorithm names are returned
239
- * @returns {String[]} array of algorithm names
240
- */
241
- listAlgo ( loadedOnly = false ) {
242
-
243
- return loadedOnly
244
- ? [ ...this.#loadedAlgo ]
245
- : [ ...Object.keys( this.#algorithms ) ];
246
-
247
- };
248
-
249
- /**
250
- * checks if an algorithm is registered
251
- *
252
- * @param {String} algo name of the algorithm
253
- * @returns {Boolean} true if the algorithm is registered, false otherwise
254
- */
255
- isAlgo ( algo ) {
256
-
257
- return algo in this.#algorithms;
258
-
259
- };
260
-
261
- /**
262
- * sets the current algorithm to use for similarity calculations
263
- *
264
- * @param {String} algo name of the algorithm
265
- * @returns {Boolean} true if the algorithm has been set
266
- */
267
- setAlgo ( algo ) {
268
-
269
- if ( this.#loadAlgo( algo ) ) {
270
-
271
- this.#algo = algo;
272
-
273
- this.#updateReadyState();
274
-
275
- return true;
276
-
277
- }
278
-
279
- };
280
-
281
- /**
282
- * gets the current algorithm to use for similarity calculations
283
- *
284
- * @since 2.0.2
285
- * @returns {String} name of the algorithm
286
- */
287
- getAlgo () {
288
-
289
- return this.#algo;
290
-
291
- };
292
-
293
- /**
294
- * adds a new similarity algorithm
295
- *
296
- * @param {String} algo name of the algorithm
297
- * @param {Function} callback function implementing the algorithm (must accept two strings and return a number)
298
- * @param {Boolean} [useIt=true] whether to set this algorithm as the current one
299
- * @returns {Boolean} returns true if the algorithms was added successfully
300
- * @throws {Error} if the algorithm cannot be added
301
- */
302
- addAlgo ( algo, callback, useIt = true ) {
303
-
304
- if (
305
- !this.isAlgo( algo ) &&
306
- typeof callback === 'function' &&
307
- callback.length >= 2 &&
308
- typeof callback.apply( null, [ 'abc', 'abc' ] ) === 'number'
309
- ) {
310
-
311
- this.#algorithms[ algo ] = callback;
312
-
313
- if ( useIt ) {
314
-
315
- this.setAlgo( algo );
316
-
317
- }
318
-
319
- return true;
320
-
321
- } else {
322
-
323
- throw new Error (
324
- `Algorithm "${algo}" cannot be added.`
325
- );
326
-
327
- }
328
-
329
- };
330
-
331
- /**
332
- * removes a registered similarity algorithm
333
- *
334
- * @param {String} algo name of the algorithm
335
- * @returns {Boolean} true if the algorithm was removed successfully
336
- * @throws {Error} if the algorithm is not defined
337
- */
338
- rmvAlgo ( algo ) {
339
-
340
- if ( this.isAlgo( algo ) ) {
341
-
342
- delete this.#algorithms[ algo ];
343
-
344
- this.#loadedAlgo.delete( algo );
345
-
346
- if ( this.#algo === algo ) {
347
-
348
- /* reset current algorithm if it was removed */
349
-
350
- this.#algo = undefined;
351
-
352
- this.#updateReadyState();
353
-
354
- }
355
-
356
- return true;
357
-
358
- } else {
359
-
360
- throw new Error (
361
- `Algorithm "${algo}" is not defined.`
362
- );
363
-
364
- }
365
-
366
- };
367
-
368
- /**
369
- * lazy-loads the specified algorithm module
370
- *
371
- * @private
372
- * @param {String} algo name of the similarity algorithm
373
- * @returns {Boolean} true if the algorithm is loaded
374
- * @throws {Error} if the algorithm cannot be loaded or is not defined
375
- */
376
- #loadAlgo ( algo ) {
377
-
378
- if ( this.#loadedAlgo.has( algo ) ) {
379
-
380
- return true;
381
-
382
- } else if ( this.isAlgo( algo ) ) {
383
-
384
- let typeOf = typeof this.#algorithms[ algo ];
385
-
386
- if ( typeOf === 'function' ) {
387
-
388
- this.#loadedAlgo.add( algo );
389
-
390
- return true;
391
-
392
- } else if ( typeOf === 'string' ) {
393
-
394
- try {
395
-
396
- /* lazy-load algorithm module */
397
-
398
- this.#algorithms[ algo ] = require(
399
- this.#algorithms[ algo ]
400
- );
401
-
402
- this.#loadedAlgo.add( algo );
403
-
404
- return true;
405
-
406
- } catch ( err ) {
407
-
408
- throw new Error (
409
- `Failed to load algorithm "${algo}".`,
410
- { cause: err }
411
- );
412
-
413
- }
414
-
415
- } else {
416
-
417
- throw new Error (
418
- `Algorithm "${algo}" cannot be loaded.`
419
- );
420
-
421
- }
422
-
423
- } else {
424
-
425
- throw new Error (
426
- `Algorithm "${algo}" is not defined.`
427
- );
428
-
429
- }
430
-
431
- };
432
-
433
- /**
434
- * --------------------------------------------------
435
- * Custom Filters
436
- * --------------------------------------------------
437
- */
438
-
439
- /**
440
- * list all added filters
441
- *
442
- * @returns {String[]} array of filter names
443
- */
444
- listFilter () {
445
-
446
- return [ ...this.#filter.keys() ];
447
-
448
- };
449
-
450
- /**
451
- * adds a custom normalization filter
452
- *
453
- * @param {String} name filter name
454
- * @param {Function} callback function implementing the filter (must accept a string and returns a normalized one)
455
- * @param {Int} [priority=10] priority of the filter (lower numbers are processed first)
456
- * @returns {Boolean} returns true if the filter was added successfully
457
- * @throws {Error} if the filter cannot be added
458
- */
459
- addFilter ( name, callback, priority = 10 ) {
460
-
461
- if (
462
- !this.#filter.has( name ) &&
463
- typeof callback === 'function' &&
464
- callback.length == 1 &&
465
- typeof callback.apply( null, [ 'abc' ] ) === 'string'
466
- ) {
467
-
468
- this.#filter.set( name, {
469
- callback, priority,
470
- active: true
471
- } );
472
-
473
- this.clearCache();
474
-
475
- return true;
476
-
477
- } else {
478
-
479
- throw new Error (
480
- `Filter "${filter}" cannot be added.`
481
- );
482
-
483
- }
484
-
485
- };
486
-
487
- /**
488
- * removes a custom normalization filter
489
- *
490
- * @param {String} name filter name
491
- * @returns {Boolean} true if the filter was removed successfully
492
- * @throws {Error} if the filter does not exists
493
- */
494
- rmvFilter ( name ) {
495
-
496
- if ( this.#filter.delete( name ) ) {
497
-
498
- this.clearCache();
499
-
500
- return true;
501
-
502
- } else {
503
-
504
- throw new Error (
505
- `Filter "${filter}" does not exists.`
506
- );
507
-
508
- }
509
-
510
- };
511
-
512
- /**
513
- * pauses a custom normalization filter
514
- *
515
- * @param {String} name filter name
516
- * @returns {Boolean} true if the filter was paused successfully
517
- * @throws {Error} if the filter does not exists
518
- */
519
- pauseFilter ( name ) {
520
-
521
- if ( this.#filter.has( name ) ) {
522
-
523
- this.#filter.get( name ).active = false;
524
-
525
- this.clearCache();
526
-
527
- return true;
528
-
529
- } else {
530
-
531
- throw new Error (
532
- `Filter "${filter}" does not exists.`
533
- );
534
-
535
- }
536
-
537
- };
538
-
539
- /**
540
- * resumes a custom normalization filter
541
- *
542
- * @param {String} name filter name
543
- * @returns {Boolean} true if the filter was resumed successfully
544
- * @throws {Error} if the filter does not exists
545
- */
546
- resumeFilter ( name ) {
547
-
548
- if ( this.#filter.has( name ) ) {
549
-
550
- this.#filter.get( name ).active = true;
551
-
552
- this.clearCache();
553
-
554
- return true;
555
-
556
- } else {
557
-
558
- throw new Error (
559
- `Filter "${filter}" does not exists.`
560
- );
561
-
562
- }
563
-
564
- };
565
-
566
- /**
567
- * clears normalization filters (remove all of them)
568
- *
569
- * @returns {Boolean} always returns true
570
- */
571
- clearFilter () {
572
-
573
- this.#filter.clear();
574
-
575
- this.clearCache();
576
-
577
- return true;
578
-
579
- };
580
-
581
- /**
582
- * applies all active filters to a string
583
- *
584
- * @private
585
- * @param {String} str string to process
586
- * @returns {String} filtered string
587
- * @throws {Error} if applying filters cause an error
588
- */
589
- #applyFilters ( str ) {
590
-
591
- try {
592
-
593
- return Array.from( this.#filter.values() ).flat().filter(
594
- ( filter ) => filter.active
595
- ).sort(
596
- ( a, b ) => a.priority - b.priority
597
- ).reduce(
598
- ( res, filter ) => filter.callback.apply( null, [ res ] ),
599
- String ( str )
600
- );
601
-
602
- } catch ( err ) {
603
-
604
- throw new Error (
605
- `Error while applying filters.`,
606
- { cause: err }
607
- );
608
-
609
- }
610
-
611
- };
612
-
613
- /**
614
- * --------------------------------------------------
615
- * Normalization
616
- * --------------------------------------------------
617
- */
618
-
619
- /**
620
- * set default normalization flags
621
- *
622
- * @param {String} [flags=''] normalization flags
623
- * @returns {Boolean} always returns true
624
- */
625
- setFlags ( flags = '' ) {
626
-
627
- this.#flags = String ( flags );
628
-
629
- };
630
-
631
- /**
632
- * get default normalization flags
633
- *
634
- * @since 2.0.2
635
- * @returns {String} normalization flags
636
- */
637
- getFlags () {
638
-
639
- return this.#flags;
640
-
641
- };
642
-
643
- /**
644
- * normalizes a string by chainable options; uses cache to increase
645
- * performance and custom filters for advanced behavior
646
- *
647
- * list of all supported flags:
648
- *
649
- * s :: remove special chars
650
- * w :: collapse whitespaces
651
- * r :: remove repeated chars
652
- * k :: keep only letters
653
- * n :: ignore numbers
654
- * t :: trim whitespaces
655
- * i :: case insensitivity
656
- * d :: decompose unicode
657
- * u :: normalize unicode
658
- *
659
- * @param {String|String[]} string string(s) to normalize
660
- * @param {String} [flags=''] normalization flags
661
- * @returns {String|String[]} normalized string(s)
662
- * @throws {Error} if normalization cause an error
663
- */
664
- normalize ( input, flags = '' ) {
665
-
666
- const processStr = ( str ) => {
667
-
668
- let res = String ( str );
669
-
670
- /* use normalized string from cache to increase performance */
671
-
672
- let key = `${res}::${flags}`;
673
-
674
- if ( this.#cache.has( key ) ) {
675
-
676
- return this.#cache.get( key );
677
-
678
- }
679
-
680
- /* apply custom filters */
681
-
682
- res = this.#applyFilters( res );
683
-
684
- /* normalize using flags */
685
-
686
- try {
687
-
688
- if ( flags.includes( 's' ) ) res = res.replace( /[^a-z0-9]/gi, '' );
689
- if ( flags.includes( 'w' ) ) res = res.replace( /\s+/g, ' ' );
690
- if ( flags.includes( 'r' ) ) res = res.replace( /(.)\1+/g, '$1' );
691
- if ( flags.includes( 'k' ) ) res = res.replace( /[^a-z]/gi, '' );
692
- if ( flags.includes( 'n' ) ) res = res.replace( /[0-9]/g, '' );
693
- if ( flags.includes( 't' ) ) res = res.trim();
694
- if ( flags.includes( 'i' ) ) res = res.toLowerCase();
695
- if ( flags.includes( 'd' ) ) res = res.normalize( 'NFD' ).replace( /[\u0300-\u036f]/g, '' );
696
- if ( flags.includes( 'u' ) ) res = res.normalize( 'NFC' );
697
-
698
- } catch ( err ) {
699
-
700
- throw new Error (
701
- `Error while normalization.`,
702
- { cause: err }
703
- );
704
-
705
- }
706
-
707
- /* store the normalized string in the cache */
708
-
709
- this.#cache.set( key, res );
710
-
711
- return res;
712
-
713
- }
714
-
715
- /* processing multiple string */
716
-
717
- if ( Array.isArray( input ) ) {
718
-
719
- return input.map(
720
- ( str ) => processStr( str )
721
- );
722
-
723
- }
724
-
725
- return processStr( input );
726
-
727
- };
728
-
729
- /**
730
- * clears the normalization cache
731
- *
732
- * @returns {Boolean} always returns true
733
- */
734
- clearCache () {
735
-
736
- this.#cache.clear();
737
-
738
- return true;
739
-
740
- };
741
-
742
- /**
743
- * --------------------------------------------------
744
- * Similarity Comparison
745
- * --------------------------------------------------
746
- */
747
-
748
- /**
749
- * compares two string a and b using the passed algorithm
750
- *
751
- * @param {String} algo name of the algorithm
752
- * @param {String} a string a
753
- * @param {String} b string b
754
- * @param {Object} [config={}] config (flags, args)
755
- * @returns {Mixed} similarity score (0..1) or raw output
756
- * @throws {Error} if algorithm cause an error
757
- */
758
- compare ( algo, a, b, config = {} ) {
759
-
760
- if ( this.#loadAlgo( algo ) ) {
761
-
762
- /* handle trivial cases */
763
-
764
- if ( a === b ) return 1; // strings are identical
765
- if ( a.length < 2 || b.length < 2 ) return 0; // too short to compare
766
-
767
- /* apply similarity algorithm */
768
-
769
- const {
770
- flags = this.#flags,
771
- options = {}
772
- } = config;
773
-
774
- try {
775
-
776
- return this.#algorithms[ algo ].apply( null, [
777
- this.normalize( a, flags ),
778
- this.normalize( b, flags ),
779
- options
780
- ] );
781
-
782
- } catch ( err ) {
783
-
784
- throw new Error (
785
- `Error in algorithm "${algo}".`,
786
- { cause: err }
787
- );
788
-
789
- }
790
-
791
- }
792
-
793
- };
794
-
795
- /**
796
- * tests the similarity between the base string and a target string
797
- * using the current algorithm
798
- *
799
- * @param {String} str target string
800
- * @param {Object} [config={}] config (flags, args)
801
- * @returns {Mixed} similarity score (0..1) or raw output
802
- */
803
- test ( str, config = {} ) {
804
-
805
- if ( this.#checkReady() ) {
806
-
807
- return this.compare(
808
- this.#algo,
809
- this.#str, str,
810
- config
811
- );
812
-
813
- }
814
-
815
- };
816
-
817
- /**
818
- * tests the similarity of multiple strings against the base string
819
- *
820
- * @param {String[]} arr array of strings
821
- * @param {Object} [config={}] config (flags, args)
822
- * @returns {Object[]} array of objects, each containing the target string and its similarity score / raw output
823
- */
824
- batchTest ( arr, config = {} ) {
825
-
826
- if ( this.#checkReady() ) {
827
-
828
- return [ ...arr ].map( ( str ) => ( {
829
- target: str,
830
- match: this.compare(
831
- this.#algo,
832
- this.#str, str,
833
- config
834
- )
835
- } ) );
836
-
837
- }
838
-
839
- };
840
-
841
- /**
842
- * finds strings in an array that exceed a similarity threshold
843
- * returns the array sorted by highest similarity
844
- *
845
- * @param {String[]} arr array of strings
846
- * @param {Object} [config={}] config (flags, threshold, args)
847
- * @returns {Object[]} array of objects, sorted by highest similarity
848
- */
849
- match ( arr, config = {} ) {
850
-
851
- const { threshold = 0 } = config;
852
-
853
- delete config?.options?.raw;
854
-
855
- return this.batchTest(
856
- arr, config
857
- ).filter(
858
- ( r ) => r.match >= threshold
859
- ).sort(
860
- ( a, b ) => b.match - a.match
861
- );
862
-
863
- };
864
-
865
- /**
866
- * finds the closest matching string from an array
867
- *
868
- * @param {String[]} arr array of strings
869
- * @param {Object} [config={}] config (flags, args)
870
- * @returns {String} closest matching string
871
- */
872
- closest ( arr, config = {} ) {
873
-
874
- let res = this.match(
875
- arr, config
876
- );
877
-
878
- return res.length && res[ 0 ].match > 0
879
- ? res[ 0 ].target
880
- : undefined;
881
-
882
- };
883
-
884
- /**
885
- * generate a similarity matrix for an array of strings
886
- *
887
- * @param {String} algo name of the algorithm
888
- * @param {String[]} arr array of strings to cross-compare
889
- * @param {Object} [config={}] config (flags, args)
890
- * @returns {Number[][]} 2D array representing the similarity matrix
891
- */
892
- similarityMatrix ( algo, arr, config = {} ) {
893
-
894
- if ( this.#loadAlgo( algo ) ) {
895
-
896
- delete config?.options?.raw;
897
-
898
- return [ ...arr ].map( ( a, i ) => {
899
-
900
- return [ ...arr ].map(
901
- ( b, j ) => i === j ? 1 : this.compare(
902
- algo, a, b, config
903
- )
904
- );
905
-
906
- } );
907
-
908
- }
909
-
910
- };
911
-
912
- };