cmpstr 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +438 -79
- package/package.json +47 -25
- package/src/CmpStr.js +784 -0
- package/src/CmpStrAsync.js +191 -0
- package/src/algorithms/cosine.js +86 -0
- package/src/algorithms/damerau.js +78 -0
- package/src/algorithms/dice.js +65 -0
- package/src/algorithms/hamming.js +44 -0
- package/src/algorithms/jaccard.js +34 -0
- package/src/algorithms/jaroWinkler.js +106 -0
- package/src/algorithms/lcs.js +58 -0
- package/src/algorithms/levenshtein.js +70 -0
- package/src/algorithms/needlemanWunsch.js +72 -0
- package/src/algorithms/qGram.js +63 -0
- package/src/algorithms/smithWaterman.js +78 -0
- package/src/algorithms/soundex.js +152 -0
- package/src/index.js +47 -0
- package/index.js +0 -425
package/src/CmpStr.js
ADDED
|
@@ -0,0 +1,784 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* class CmpStr
|
|
3
|
+
*
|
|
4
|
+
* The CmpStr class is the core of the cmpstr package. It provides methods to calculate
|
|
5
|
+
* string similarity, find the closest matches in arrays, and generate similarity
|
|
6
|
+
* matrices. The class supports built-in algorithms (e.g., Levenshtein, Dice-Sørensen)
|
|
7
|
+
* and allows users to add custom algorithms. It also includes features like string
|
|
8
|
+
* normalization, caching, and extensibility.
|
|
9
|
+
*
|
|
10
|
+
* @author komed3 (Paul Köhler)
|
|
11
|
+
* @license MIT
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
'use strict';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* module exports
|
|
18
|
+
* @public
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
module.exports = class CmpStr {
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* all pre-defined similarity algorithms
|
|
25
|
+
*
|
|
26
|
+
* @private
|
|
27
|
+
* @type {Object}
|
|
28
|
+
*/
|
|
29
|
+
#algorithms = {
|
|
30
|
+
cosine: './algorithms/cosine',
|
|
31
|
+
damerau: './algorithms/damerau',
|
|
32
|
+
dice: './algorithms/dice',
|
|
33
|
+
hamming: './algorithms/hamming',
|
|
34
|
+
jaccard: './algorithms/jaccard',
|
|
35
|
+
jaro: './algorithms/jaroWinkler',
|
|
36
|
+
lcs: './algorithms/lcs',
|
|
37
|
+
levenshtein: './algorithms/levenshtein',
|
|
38
|
+
needlemanWunsch: './algorithms/needlemanWunsch',
|
|
39
|
+
qGram: './algorithms/qGram',
|
|
40
|
+
smithWaterman: './algorithms/smithWaterman',
|
|
41
|
+
soundex: './algorithms/soundex'
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* normalized strings cache
|
|
46
|
+
*
|
|
47
|
+
* @private
|
|
48
|
+
* @type {Map<String, String>}
|
|
49
|
+
*/
|
|
50
|
+
#cache = new Map ();
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* added filters for string normalization
|
|
54
|
+
*
|
|
55
|
+
* @private
|
|
56
|
+
* @type {Map<String, Object[]>}
|
|
57
|
+
*/
|
|
58
|
+
#filter = new Map ();
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* default normalization flags
|
|
62
|
+
* set by setFlags()
|
|
63
|
+
*
|
|
64
|
+
* @public
|
|
65
|
+
* @type {String}
|
|
66
|
+
*/
|
|
67
|
+
flags = '';
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* base string for comparison
|
|
71
|
+
* set by setStr or constructor()
|
|
72
|
+
*
|
|
73
|
+
* @public
|
|
74
|
+
* @type {String}
|
|
75
|
+
*/
|
|
76
|
+
str;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* current algorithm to use for similarity calculations
|
|
80
|
+
* set by setAlgo(), addAlgo() or constructor()
|
|
81
|
+
*
|
|
82
|
+
* @public
|
|
83
|
+
* @type {String}
|
|
84
|
+
*/
|
|
85
|
+
algo;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* initializes a CmpStr instance
|
|
89
|
+
* algorithm and base string can be set by initialization
|
|
90
|
+
*
|
|
91
|
+
* @param {String} algo name of the algorithm to use for calculation
|
|
92
|
+
* @param {String} str string to set as the base
|
|
93
|
+
*/
|
|
94
|
+
constructor ( algo = undefined, str = undefined ) {
|
|
95
|
+
|
|
96
|
+
if ( algo !== undefined ) {
|
|
97
|
+
|
|
98
|
+
this.setAlgo( algo );
|
|
99
|
+
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if ( str !== undefined ) {
|
|
103
|
+
|
|
104
|
+
this.setStr( str );
|
|
105
|
+
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* checks whether string and algorithm are set correctly
|
|
112
|
+
*
|
|
113
|
+
* @returns {Boolean} true if ready, false otherwise
|
|
114
|
+
*/
|
|
115
|
+
isReady () {
|
|
116
|
+
|
|
117
|
+
return (
|
|
118
|
+
typeof this.algo === 'string' &&
|
|
119
|
+
this.isAlgo( this.algo ) &&
|
|
120
|
+
typeof this.str === 'string' &&
|
|
121
|
+
this.str.length != 0
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* checks ready state and throws an error if not
|
|
128
|
+
*
|
|
129
|
+
* @returns {Boolean} true if ready
|
|
130
|
+
* @throws {Error} if CmpStr is not ready
|
|
131
|
+
*/
|
|
132
|
+
_checkReady () {
|
|
133
|
+
|
|
134
|
+
if ( !this.isReady() ) {
|
|
135
|
+
|
|
136
|
+
throw new Error(
|
|
137
|
+
`CmpStr instance is not ready. Ensure the algorithm and base string are set.`
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return true;
|
|
143
|
+
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* sets the base string for comparison
|
|
148
|
+
*
|
|
149
|
+
* @param {String} str string to set as the base
|
|
150
|
+
* @returns {Boolean} always returns true
|
|
151
|
+
*/
|
|
152
|
+
setStr ( str ) {
|
|
153
|
+
|
|
154
|
+
this.str = String ( str );
|
|
155
|
+
|
|
156
|
+
return true;
|
|
157
|
+
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* --------------------------------------------------
|
|
162
|
+
* Algorithms
|
|
163
|
+
* --------------------------------------------------
|
|
164
|
+
*/
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* list all registered similarity algorithms
|
|
168
|
+
*
|
|
169
|
+
* @returns {String[]} array of algorithm names
|
|
170
|
+
*/
|
|
171
|
+
listAlgo () {
|
|
172
|
+
|
|
173
|
+
return [ ...Object.keys( this.#algorithms ) ];
|
|
174
|
+
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* checks if an algorithm is registered
|
|
179
|
+
*
|
|
180
|
+
* @param {String} algo name of the algorithm
|
|
181
|
+
* @returns {Boolean} true if the algorithm is registered, false otherwise
|
|
182
|
+
*/
|
|
183
|
+
isAlgo ( algo ) {
|
|
184
|
+
|
|
185
|
+
return algo in this.#algorithms;
|
|
186
|
+
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* sets the current algorithm to use for similarity calculations
|
|
191
|
+
*
|
|
192
|
+
* @param {String} algo name of the algorithm
|
|
193
|
+
* @returns {Boolean} true if the algorithm has been set
|
|
194
|
+
*/
|
|
195
|
+
setAlgo ( algo ) {
|
|
196
|
+
|
|
197
|
+
if ( this._loadAlgo( algo ) ) {
|
|
198
|
+
|
|
199
|
+
this.algo = algo;
|
|
200
|
+
|
|
201
|
+
return true;
|
|
202
|
+
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* adds a new similarity algorithm
|
|
209
|
+
*
|
|
210
|
+
* @param {String} algo name of the algorithm
|
|
211
|
+
* @param {Function} callback function implementing the algorithm (must accept two strings and return a number)
|
|
212
|
+
* @param {Boolean} [useIt=true] whether to set this algorithm as the current one
|
|
213
|
+
* @returns {Boolean} returns true if the algorithms was added successfully
|
|
214
|
+
* @throws {Error} if the algorithm cannot be added
|
|
215
|
+
*/
|
|
216
|
+
addAlgo ( algo, callback, useIt = true ) {
|
|
217
|
+
|
|
218
|
+
if (
|
|
219
|
+
!this.isAlgo( algo ) &&
|
|
220
|
+
typeof callback === 'function' &&
|
|
221
|
+
callback.length >= 2 &&
|
|
222
|
+
typeof callback.apply( null, [ 'abc', 'abc' ] ) === 'number'
|
|
223
|
+
) {
|
|
224
|
+
|
|
225
|
+
this.#algorithms[ algo ] = callback;
|
|
226
|
+
|
|
227
|
+
if ( useIt ) {
|
|
228
|
+
|
|
229
|
+
this.setAlgo( algo );
|
|
230
|
+
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return true;
|
|
234
|
+
|
|
235
|
+
} else {
|
|
236
|
+
|
|
237
|
+
throw new Error (
|
|
238
|
+
`Algorithm "${algo}" cannot be added.`
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* removes a registered similarity algorithm
|
|
247
|
+
*
|
|
248
|
+
* @param {String} algo name of the algorithm
|
|
249
|
+
* @returns {Boolean} true if the algorithm was removed successfully
|
|
250
|
+
* @throws {Error} if the algorithm is not defined
|
|
251
|
+
*/
|
|
252
|
+
rmvAlgo ( algo ) {
|
|
253
|
+
|
|
254
|
+
if ( this.isAlgo( algo ) ) {
|
|
255
|
+
|
|
256
|
+
delete this.#algorithms[ algo ];
|
|
257
|
+
|
|
258
|
+
if ( this.algo === algo ) {
|
|
259
|
+
|
|
260
|
+
/* reset current algorithm if it was removed */
|
|
261
|
+
|
|
262
|
+
this.algo = undefined;
|
|
263
|
+
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return true;
|
|
267
|
+
|
|
268
|
+
} else {
|
|
269
|
+
|
|
270
|
+
throw new Error (
|
|
271
|
+
`Algorithm "${algo}" is not defined.`
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* lazy-loads the specified algorithm module
|
|
280
|
+
*
|
|
281
|
+
* @param {String} algo name of the similarity algorithm
|
|
282
|
+
* @returns {Boolean} true if the algorithm is loaded
|
|
283
|
+
* @throws {Error} if the algorithm cannot be loaded or is not defined
|
|
284
|
+
*/
|
|
285
|
+
_loadAlgo ( algo ) {
|
|
286
|
+
|
|
287
|
+
if ( this.isAlgo( algo ) ) {
|
|
288
|
+
|
|
289
|
+
let typeOf = typeof this.#algorithms[ algo ];
|
|
290
|
+
|
|
291
|
+
if ( typeOf === 'function' ) {
|
|
292
|
+
|
|
293
|
+
return true;
|
|
294
|
+
|
|
295
|
+
} else if ( typeOf === 'string' ) {
|
|
296
|
+
|
|
297
|
+
try {
|
|
298
|
+
|
|
299
|
+
/* lazy-load algorithm module */
|
|
300
|
+
|
|
301
|
+
this.#algorithms[ algo ] = require(
|
|
302
|
+
this.#algorithms[ algo ]
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
return true;
|
|
306
|
+
|
|
307
|
+
} catch ( err ) {
|
|
308
|
+
|
|
309
|
+
throw new Error (
|
|
310
|
+
`Failed to load algorithm "${algo}".`,
|
|
311
|
+
{ cause: err }
|
|
312
|
+
);
|
|
313
|
+
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
} else {
|
|
317
|
+
|
|
318
|
+
throw new Error (
|
|
319
|
+
`Algorithm "${algo}" cannot be loaded.`
|
|
320
|
+
);
|
|
321
|
+
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
} else {
|
|
325
|
+
|
|
326
|
+
throw new Error (
|
|
327
|
+
`Algorithm "${algo}" is not defined.`
|
|
328
|
+
);
|
|
329
|
+
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* --------------------------------------------------
|
|
336
|
+
* Custom Filters
|
|
337
|
+
* --------------------------------------------------
|
|
338
|
+
*/
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* list all added filters
|
|
342
|
+
*
|
|
343
|
+
* @returns {String[]} array of filter names
|
|
344
|
+
*/
|
|
345
|
+
listFilter () {
|
|
346
|
+
|
|
347
|
+
return [ ...this.#filter.keys() ];
|
|
348
|
+
|
|
349
|
+
};
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* adds a custom normalization filter
|
|
353
|
+
*
|
|
354
|
+
* @param {String} name filter name
|
|
355
|
+
* @param {Function} callback function implementing the filter (must accept a string and returns a normalized one)
|
|
356
|
+
* @param {Int} [priority=10] priority of the filter (lower numbers are processed first)
|
|
357
|
+
* @returns {Boolean} returns true if the filter was added successfully
|
|
358
|
+
* @throws {Error} if the filter cannot be added
|
|
359
|
+
*/
|
|
360
|
+
addFilter ( name, callback, priority = 10 ) {
|
|
361
|
+
|
|
362
|
+
if (
|
|
363
|
+
!this.#filter.has( name ) &&
|
|
364
|
+
typeof callback === 'function' &&
|
|
365
|
+
callback.length == 1 &&
|
|
366
|
+
typeof callback.apply( null, [ 'abc' ] ) === 'string'
|
|
367
|
+
) {
|
|
368
|
+
|
|
369
|
+
this.#filter.set( name, {
|
|
370
|
+
callback, priority,
|
|
371
|
+
active: true
|
|
372
|
+
} );
|
|
373
|
+
|
|
374
|
+
this.clearCache();
|
|
375
|
+
|
|
376
|
+
return true;
|
|
377
|
+
|
|
378
|
+
} else {
|
|
379
|
+
|
|
380
|
+
throw new Error (
|
|
381
|
+
`Filter "${filter}" cannot be added.`
|
|
382
|
+
);
|
|
383
|
+
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* removes a custom normalization filter
|
|
390
|
+
*
|
|
391
|
+
* @param {String} name filter name
|
|
392
|
+
* @returns {Boolean} true if the filter was removed successfully
|
|
393
|
+
* @throws {Error} if the filter does not exists
|
|
394
|
+
*/
|
|
395
|
+
rmvFilter ( name ) {
|
|
396
|
+
|
|
397
|
+
if ( this.#filter.delete( name ) ) {
|
|
398
|
+
|
|
399
|
+
this.clearCache();
|
|
400
|
+
|
|
401
|
+
return true;
|
|
402
|
+
|
|
403
|
+
} else {
|
|
404
|
+
|
|
405
|
+
throw new Error (
|
|
406
|
+
`Filter "${filter}" does not exists.`
|
|
407
|
+
);
|
|
408
|
+
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* pauses a custom normalization filter
|
|
415
|
+
*
|
|
416
|
+
* @param {String} name filter name
|
|
417
|
+
* @returns {Boolean} true if the filter was paused successfully
|
|
418
|
+
* @throws {Error} if the filter does not exists
|
|
419
|
+
*/
|
|
420
|
+
pauseFilter ( name ) {
|
|
421
|
+
|
|
422
|
+
if ( this.#filter.has( name ) ) {
|
|
423
|
+
|
|
424
|
+
this.#filter.get( name ).active = false;
|
|
425
|
+
|
|
426
|
+
this.clearCache();
|
|
427
|
+
|
|
428
|
+
return true;
|
|
429
|
+
|
|
430
|
+
} else {
|
|
431
|
+
|
|
432
|
+
throw new Error (
|
|
433
|
+
`Filter "${filter}" does not exists.`
|
|
434
|
+
);
|
|
435
|
+
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* resumes a custom normalization filter
|
|
442
|
+
*
|
|
443
|
+
* @param {String} name filter name
|
|
444
|
+
* @returns {Boolean} true if the filter was resumed successfully
|
|
445
|
+
* @throws {Error} if the filter does not exists
|
|
446
|
+
*/
|
|
447
|
+
resumeFilter ( name ) {
|
|
448
|
+
|
|
449
|
+
if ( this.#filter.has( name ) ) {
|
|
450
|
+
|
|
451
|
+
this.#filter.get( name ).active = true;
|
|
452
|
+
|
|
453
|
+
this.clearCache();
|
|
454
|
+
|
|
455
|
+
return true;
|
|
456
|
+
|
|
457
|
+
} else {
|
|
458
|
+
|
|
459
|
+
throw new Error (
|
|
460
|
+
`Filter "${filter}" does not exists.`
|
|
461
|
+
);
|
|
462
|
+
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
};
|
|
466
|
+
|
|
467
|
+
/**
|
|
468
|
+
* clears normalization filters (remove all of them)
|
|
469
|
+
*
|
|
470
|
+
* @returns {Boolean} always returns true
|
|
471
|
+
*/
|
|
472
|
+
clearFilter () {
|
|
473
|
+
|
|
474
|
+
this.#filter.clear();
|
|
475
|
+
|
|
476
|
+
this.clearCache();
|
|
477
|
+
|
|
478
|
+
return true;
|
|
479
|
+
|
|
480
|
+
};
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* applies all active filters to a string
|
|
484
|
+
*
|
|
485
|
+
* @param {String} str string to process
|
|
486
|
+
* @returns {String} filtered string
|
|
487
|
+
* @throws {Error} if applying filters cause an error
|
|
488
|
+
*/
|
|
489
|
+
_applyFilters ( str ) {
|
|
490
|
+
|
|
491
|
+
try {
|
|
492
|
+
|
|
493
|
+
return Array.from( this.#filter.values() ).flat().filter(
|
|
494
|
+
( filter ) => filter.active
|
|
495
|
+
).sort(
|
|
496
|
+
( a, b ) => a.priority - b.priority
|
|
497
|
+
).reduce(
|
|
498
|
+
( res, filter ) => filter.callback.apply( null, [ res ] ),
|
|
499
|
+
String ( str )
|
|
500
|
+
);
|
|
501
|
+
|
|
502
|
+
} catch ( err ) {
|
|
503
|
+
|
|
504
|
+
throw new Error (
|
|
505
|
+
`Error while applying filters.`,
|
|
506
|
+
{ cause: err }
|
|
507
|
+
);
|
|
508
|
+
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
/**
|
|
514
|
+
* --------------------------------------------------
|
|
515
|
+
* Normalization
|
|
516
|
+
* --------------------------------------------------
|
|
517
|
+
*/
|
|
518
|
+
|
|
519
|
+
/**
|
|
520
|
+
* set default normalization flags
|
|
521
|
+
*
|
|
522
|
+
* @param {String} [flags=''] normalization flags
|
|
523
|
+
* @returns {Boolean} always returns true
|
|
524
|
+
*/
|
|
525
|
+
setFlags ( flags = '' ) {
|
|
526
|
+
|
|
527
|
+
this.flags = String ( flags );
|
|
528
|
+
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* normalizes a string by chainable options; uses cache to increase
|
|
533
|
+
* performance and custom filters for advanced behavior
|
|
534
|
+
*
|
|
535
|
+
* list of all supported flags:
|
|
536
|
+
*
|
|
537
|
+
* s :: remove special chars
|
|
538
|
+
* w :: collapse whitespaces
|
|
539
|
+
* r :: remove repeated chars
|
|
540
|
+
* k :: keep only letters
|
|
541
|
+
* n :: ignore numbers
|
|
542
|
+
* t :: trim whitespaces
|
|
543
|
+
* i :: case insensitivity
|
|
544
|
+
* d :: decompose unicode
|
|
545
|
+
* u :: normalize unicode
|
|
546
|
+
*
|
|
547
|
+
* @param {String} string string to normalize
|
|
548
|
+
* @param {String} [flags=''] normalization flags
|
|
549
|
+
* @returns {String} normalized string
|
|
550
|
+
* @throws {Error} if normalization cause an error
|
|
551
|
+
*/
|
|
552
|
+
normalize ( str, flags = '' ) {
|
|
553
|
+
|
|
554
|
+
let res = String ( str );
|
|
555
|
+
|
|
556
|
+
/* use normalized string from cache to increase performance */
|
|
557
|
+
|
|
558
|
+
let key = `${res}::${flags}`;
|
|
559
|
+
|
|
560
|
+
if ( this.#cache.has( key ) ) {
|
|
561
|
+
|
|
562
|
+
return this.#cache.get( key );
|
|
563
|
+
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
/* apply custom filters */
|
|
567
|
+
|
|
568
|
+
res = this._applyFilters( res );
|
|
569
|
+
|
|
570
|
+
/* normalize using flags */
|
|
571
|
+
|
|
572
|
+
try {
|
|
573
|
+
|
|
574
|
+
if ( flags.includes( 's' ) ) res = res.replace( /[^a-z0-9]/gi, '' );
|
|
575
|
+
if ( flags.includes( 'w' ) ) res = res.replace( /\s+/g, ' ' );
|
|
576
|
+
if ( flags.includes( 'r' ) ) res = res.replace( /(.)\1+/g, '$1' );
|
|
577
|
+
if ( flags.includes( 'k' ) ) res = res.replace( /[^a-z]/gi, '' );
|
|
578
|
+
if ( flags.includes( 'n' ) ) res = res.replace( /[0-9]/g, '' );
|
|
579
|
+
if ( flags.includes( 't' ) ) res = res.trim();
|
|
580
|
+
if ( flags.includes( 'i' ) ) res = res.toLowerCase();
|
|
581
|
+
if ( flags.includes( 'd' ) ) res = res.normalize( 'NFD' ).replace( /[\u0300-\u036f]/g, '' );
|
|
582
|
+
if ( flags.includes( 'u' ) ) res = res.normalize( 'NFC' );
|
|
583
|
+
|
|
584
|
+
} catch ( err ) {
|
|
585
|
+
|
|
586
|
+
throw new Error (
|
|
587
|
+
`Error while normalization.`,
|
|
588
|
+
{ cause: err }
|
|
589
|
+
);
|
|
590
|
+
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
/* store the normalized string in the cache */
|
|
594
|
+
|
|
595
|
+
this.#cache.set( key, res );
|
|
596
|
+
|
|
597
|
+
return res;
|
|
598
|
+
|
|
599
|
+
};
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* clears the normalization cache
|
|
603
|
+
*
|
|
604
|
+
* @returns {Boolean} always returns true
|
|
605
|
+
*/
|
|
606
|
+
clearCache () {
|
|
607
|
+
|
|
608
|
+
this.#cache.clear();
|
|
609
|
+
|
|
610
|
+
return true;
|
|
611
|
+
|
|
612
|
+
};
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* --------------------------------------------------
|
|
616
|
+
* Similarity Comparison
|
|
617
|
+
* --------------------------------------------------
|
|
618
|
+
*/
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* compares two string a and b using the passed algorithm
|
|
622
|
+
*
|
|
623
|
+
* @param {String} algo name of the algorithm
|
|
624
|
+
* @param {String} a string a
|
|
625
|
+
* @param {String} b string b
|
|
626
|
+
* @param {Object} [config={}] config (flags, args)
|
|
627
|
+
* @returns {Mixed} similarity score (0..1) or raw output
|
|
628
|
+
* @throws {Error} if algorithm cause an error
|
|
629
|
+
*/
|
|
630
|
+
compare ( algo, a, b, config = {} ) {
|
|
631
|
+
|
|
632
|
+
if ( this._loadAlgo( algo ) ) {
|
|
633
|
+
|
|
634
|
+
/* handle trivial cases */
|
|
635
|
+
|
|
636
|
+
if ( a === b ) return 1; // strings are identical
|
|
637
|
+
if ( a.length < 2 || b.length < 2 ) return 0; // too short to compare
|
|
638
|
+
|
|
639
|
+
/* apply similarity algorithm */
|
|
640
|
+
|
|
641
|
+
const {
|
|
642
|
+
flags = this.flags,
|
|
643
|
+
options = {}
|
|
644
|
+
} = config;
|
|
645
|
+
|
|
646
|
+
try {
|
|
647
|
+
|
|
648
|
+
return this.#algorithms[ algo ].apply( null, [
|
|
649
|
+
this.normalize( a, flags ),
|
|
650
|
+
this.normalize( b, flags ),
|
|
651
|
+
options
|
|
652
|
+
] );
|
|
653
|
+
|
|
654
|
+
} catch ( err ) {
|
|
655
|
+
|
|
656
|
+
throw new Error (
|
|
657
|
+
`Error in algorithm "${algo}".`,
|
|
658
|
+
{ cause: err }
|
|
659
|
+
);
|
|
660
|
+
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
};
|
|
666
|
+
|
|
667
|
+
/**
|
|
668
|
+
* tests the similarity between the base string and a target string
|
|
669
|
+
* using the current algorithm
|
|
670
|
+
*
|
|
671
|
+
* @param {String} str target string
|
|
672
|
+
* @param {Object} [config={}] config (flags, args)
|
|
673
|
+
* @returns {Mixed} similarity score (0..1) or raw output
|
|
674
|
+
*/
|
|
675
|
+
test ( str, config = {} ) {
|
|
676
|
+
|
|
677
|
+
if ( this._checkReady() ) {
|
|
678
|
+
|
|
679
|
+
return this.compare(
|
|
680
|
+
this.algo,
|
|
681
|
+
this.str, str,
|
|
682
|
+
config
|
|
683
|
+
);
|
|
684
|
+
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
};
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* tests the similarity of multiple strings against the base string
|
|
691
|
+
*
|
|
692
|
+
* @param {String[]} arr array of strings
|
|
693
|
+
* @param {Object} [config={}] config (flags, args)
|
|
694
|
+
* @returns {Object[]} array of objects, each containing the target string and its similarity score / raw output
|
|
695
|
+
*/
|
|
696
|
+
batchTest ( arr, config = {} ) {
|
|
697
|
+
|
|
698
|
+
if ( this._checkReady() ) {
|
|
699
|
+
|
|
700
|
+
return [ ...arr ].map( ( str ) => ( {
|
|
701
|
+
target: str,
|
|
702
|
+
match: this.compare(
|
|
703
|
+
this.algo,
|
|
704
|
+
this.str, str,
|
|
705
|
+
config
|
|
706
|
+
)
|
|
707
|
+
} ) );
|
|
708
|
+
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
};
|
|
712
|
+
|
|
713
|
+
/**
|
|
714
|
+
* finds strings in an array that exceed a similarity threshold
|
|
715
|
+
* returns the array sorted by highest similarity
|
|
716
|
+
*
|
|
717
|
+
* @param {String[]} arr array of strings
|
|
718
|
+
* @param {Object} [config={}] config (flags, threshold, args)
|
|
719
|
+
* @returns {Object[]} array of objects, sorted by highest similarity
|
|
720
|
+
*/
|
|
721
|
+
match ( arr, config = {} ) {
|
|
722
|
+
|
|
723
|
+
const { threshold = 0 } = config;
|
|
724
|
+
|
|
725
|
+
delete config?.options?.raw;
|
|
726
|
+
|
|
727
|
+
return this.batchTest(
|
|
728
|
+
arr, config
|
|
729
|
+
).filter(
|
|
730
|
+
( r ) => r.match >= threshold
|
|
731
|
+
).sort(
|
|
732
|
+
( a, b ) => b.match - a.match
|
|
733
|
+
);
|
|
734
|
+
|
|
735
|
+
};
|
|
736
|
+
|
|
737
|
+
/**
|
|
738
|
+
* finds the closest matching string from an array
|
|
739
|
+
*
|
|
740
|
+
* @param {String[]} arr array of strings
|
|
741
|
+
* @param {Object} [config={}] config (flags, args)
|
|
742
|
+
* @returns {String} closest matching string
|
|
743
|
+
*/
|
|
744
|
+
closest ( arr, config = {} ) {
|
|
745
|
+
|
|
746
|
+
let res = this.match(
|
|
747
|
+
arr, config
|
|
748
|
+
);
|
|
749
|
+
|
|
750
|
+
return res.length && res[ 0 ].match > 0
|
|
751
|
+
? res[ 0 ].target
|
|
752
|
+
: undefined;
|
|
753
|
+
|
|
754
|
+
};
|
|
755
|
+
|
|
756
|
+
/**
|
|
757
|
+
* generate a similarity matrix for an array of strings
|
|
758
|
+
*
|
|
759
|
+
* @param {String} algo name of the algorithm
|
|
760
|
+
* @param {String[]} arr array of strings to cross-compare
|
|
761
|
+
* @param {Object} [config={}] config (flags, args)
|
|
762
|
+
* @returns {Number[][]} 2D array representing the similarity matrix
|
|
763
|
+
*/
|
|
764
|
+
similarityMatrix ( algo, arr, config = {} ) {
|
|
765
|
+
|
|
766
|
+
if ( this._loadAlgo( algo ) ) {
|
|
767
|
+
|
|
768
|
+
delete config?.options?.raw;
|
|
769
|
+
|
|
770
|
+
return [ ...arr ].map( ( a, i ) => {
|
|
771
|
+
|
|
772
|
+
return [ ...arr ].map(
|
|
773
|
+
( b, j ) => i === j ? 1 : this.compare(
|
|
774
|
+
algo, a, b, config
|
|
775
|
+
)
|
|
776
|
+
);
|
|
777
|
+
|
|
778
|
+
} );
|
|
779
|
+
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
};
|
|
783
|
+
|
|
784
|
+
};
|