cmpstr 1.0.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js DELETED
@@ -1,432 +0,0 @@
1
- /**
2
- * cmpstr
3
- * lightweight npm package to calculate string similarity
4
- *
5
- * @author komed3 (Paul Köhler)
6
- * @version 1.0.3
7
- * @license MIT
8
- */
9
-
10
- 'use strict';
11
-
12
- /**
13
- * basic functions
14
- * @private
15
- */
16
-
17
- /**
18
- * normalize string
19
- * @param {String} str string
20
- * @param {Null|String} flags options
21
- * @returns normalized string
22
- */
23
- const normalize = ( str, flags = null ) => {
24
-
25
- str = str.toString();
26
-
27
- ( flags || '' ).toString().split( '' ).forEach( ( f ) => {
28
-
29
- /**
30
- * normalize options
31
- * i case insensitive
32
- * s non-whitespace
33
- */
34
-
35
- switch( f.toLowerCase() ) {
36
-
37
- case 'i':
38
- str = str.toLowerCase();
39
- break;
40
-
41
- case 's':
42
- str = str.replace( /[^\S]+/g, '' );
43
- break;
44
-
45
- default:
46
- /* do nothing */
47
- break;
48
-
49
- }
50
-
51
- } );
52
-
53
- return str;
54
-
55
- };
56
-
57
- /**
58
- * get bigrams from string
59
- * @param {String} str string
60
- * @returns bigrams
61
- */
62
- const str2bigrams = ( str ) => {
63
-
64
- let bigrams = new Set();
65
-
66
- for( let i = 0; i < str.length - 1; i++ ) {
67
-
68
- bigrams.add(
69
- str.substring( i, i + 2 )
70
- );
71
-
72
- }
73
-
74
- return bigrams;
75
-
76
- };
77
-
78
- /**
79
- * compare strings by given algorithm
80
- * @param {String} algo algorithm to use
81
- * @param {String} a string 1
82
- * @param {String} b string 2
83
- * @param {Null|String} flags options
84
- * @returns similarity
85
- */
86
- const cpmByAlgo = ( algo, a, b, flags = null ) => {
87
-
88
- switch( algo ) {
89
-
90
- case 'levenshtein':
91
- return levenshtein( a, b, flags );
92
-
93
- case 'diceCoefficient':
94
- return diceCoefficient( a, b, flags );
95
-
96
- default:
97
- return 0;
98
-
99
- }
100
-
101
- };
102
-
103
- /**
104
- * search for closest string
105
- * @param {String} algo algorithm to use
106
- * @param {String} test test string
107
- * @param {Array} arr targets to test
108
- * @param {Null|String} flags options
109
- * @returns closest target
110
- */
111
- const findClosest = ( algo, test, arr, flags = null ) => {
112
-
113
- let best = -Infinity,
114
- idx = 0,
115
- pct;
116
-
117
- /* search for closest element in arr */
118
-
119
- [ ...arr ].forEach( ( str, i ) => {
120
-
121
- pct = cpmByAlgo( algo, test, str, flags );
122
-
123
- if( pct > best ) {
124
-
125
- /* save closest target */
126
-
127
- best = pct;
128
- idx = i;
129
-
130
- }
131
-
132
- } );
133
-
134
- /* return closest target */
135
-
136
- return arr[ idx ];
137
-
138
- };
139
-
140
- /**
141
- * sort best matches to test string
142
- * @param {String} algo algorithm to use
143
- * @param {String} test test string
144
- * @param {Array} arr targets to test
145
- * @param {Null|String} flags options
146
- * @param {Float} threshold required similarity
147
- * @returns sorted matches
148
- */
149
- const bestMatch = ( algo, test, arr, flags = null, threshold = 0 ) => {
150
-
151
- let matches = [],
152
- pct;
153
-
154
- /* calculate similarity for each arr items */
155
-
156
- [ ...arr ].forEach( ( str ) => {
157
-
158
- pct = cpmByAlgo( algo, test, str, flags );
159
-
160
- if( pct >= threshold ) {
161
-
162
- matches.push( {
163
- target: str,
164
- match: pct
165
- } );
166
-
167
- }
168
-
169
- } );
170
-
171
- /* sort by highest similarity */
172
-
173
- let sorted = matches.sort( ( a, b ) => {
174
- return b.match - a.match;
175
- } );
176
-
177
- /* return sorted matches */
178
-
179
- return sorted;
180
-
181
- };
182
-
183
- /**
184
- * similarity calculations
185
- * @public
186
- */
187
-
188
- /**
189
- * calculate levenshtein similarity (in percent)
190
- * @param {String} a string 1
191
- * @param {String} b string 2
192
- * @param {Null|String} flags options
193
- * @returns similarity 0..1
194
- */
195
- const levenshtein = ( a, b, flags = null ) => {
196
-
197
- /* normalize string */
198
-
199
- a = normalize( a, flags );
200
- b = normalize( b, flags );
201
-
202
- if( a == b ) {
203
-
204
- /* both string are similar or empty */
205
-
206
- return 1;
207
-
208
- } else if( a.length < 2 || b.length < 2 ) {
209
-
210
- /* for 0-letter or 1-letter strings */
211
-
212
- return 0;
213
-
214
- } else {
215
-
216
- /* get levenshtein distance */
217
-
218
- let distance = levenshteinDistance( a, b );
219
-
220
- /* return percentage */
221
-
222
- return 1 - (
223
- distance / Math.max(
224
- a.length,
225
- b.length
226
- )
227
- );
228
-
229
- }
230
-
231
- };
232
-
233
- /**
234
- * get levenshtein distance
235
- * @param {String} a string 1
236
- * @param {String} b string 2
237
- * @param {Null|String} flags options
238
- * @returns distance
239
- */
240
- const levenshteinDistance = ( a, b, flags = null ) => {
241
-
242
- /* normalize string */
243
-
244
- a = normalize( a, flags );
245
- b = normalize( b, flags );
246
-
247
- if( a == b ) {
248
-
249
- /* both string are similar or empty */
250
-
251
- return 0;
252
-
253
- } else if( a.length == 0 ) {
254
-
255
- /* empty string 1 */
256
-
257
- return b.length;
258
-
259
- } else if( b.length == 0 ) {
260
-
261
- /* empty string 2 */
262
-
263
- return a.length;
264
-
265
- } else {
266
-
267
- /* create matrix */
268
-
269
- const matrix = [];
270
-
271
- for( let i = 0; i <= a.length; i++ ) {
272
-
273
- const row = [];
274
-
275
- for( let j = 0; j <= b.length; j++ ) {
276
-
277
- row.push( j );
278
-
279
- }
280
-
281
- row[0] = i;
282
-
283
- matrix.push( row );
284
-
285
- }
286
-
287
- /* calculate distance */
288
-
289
- for( let i = 1; i <= a.length; i++ ) {
290
-
291
- for( let j = 1; j <= b.length; j++ ) {
292
-
293
- if( a[ i - 1 ] === b[ j - 1 ] ) {
294
-
295
- matrix[ i ][ j ] = matrix[ i - 1 ][ j - 1 ];
296
-
297
- } else {
298
-
299
- matrix[ i ][ j ] = 1 + Math.min(
300
- matrix[ i ][ j - 1 ],
301
- matrix[ i - 1 ][ j - 1 ],
302
- matrix[ i - 1 ][ j ]
303
- );
304
-
305
- }
306
-
307
- }
308
-
309
- }
310
-
311
- /* return levenshtein distance */
312
-
313
- return matrix[ a.length ][ b.length ];
314
-
315
- }
316
-
317
- };
318
-
319
- /**
320
- * search for closest target to test string
321
- * @param {String} test test string
322
- * @param {Array} arr targets to test
323
- * @param {Null|String} flags options
324
- * @returns closest target
325
- */
326
- const levenshteinClosest = ( test, arr, flags = null ) => {
327
-
328
- return findClosest( 'levenshtein', test, arr, flags );
329
-
330
- };
331
-
332
- /**
333
- * sort best matches to test string
334
- * @param {String} test test string
335
- * @param {Array} arr targets to test
336
- * @param {Null|String} flags options
337
- * @param {Float} threshold required similarity
338
- * @returns sorted matches
339
- */
340
- const levenshteinMatch = ( test, arr, flags = null, threshold = 0 ) => {
341
-
342
- return bestMatch( 'levenshtein', test, arr, flags, threshold );
343
-
344
- };
345
-
346
- /**
347
- * calculate dice coefficient
348
- * @param {String} a string 1
349
- * @param {String} b string 2
350
- * @param {Null|String} flags options
351
- * @returns dice coefficient
352
- */
353
- const diceCoefficient = ( a, b, flags = null ) => {
354
-
355
- /* normalize string */
356
-
357
- a = normalize( a, flags );
358
- b = normalize( b, flags );
359
-
360
- if( a == b ) {
361
-
362
- /* both string are similar or empty */
363
-
364
- return 1;
365
-
366
- } else if( a.length < 2 || b.length < 2 ) {
367
-
368
- /* for 0-letter or 1-letter strings */
369
-
370
- return 0;
371
-
372
- } else {
373
-
374
- /* get bigrams */
375
-
376
- let setA = str2bigrams( a ),
377
- setB = str2bigrams( b );
378
-
379
- /* calculate dice coefficient */
380
-
381
- return (
382
- ( new Set( [ ...setA ].filter( ( x ) => {
383
- return setB.has( x );
384
- } ) ) ).size * 2
385
- ) / (
386
- setA.size +
387
- setB.size
388
- );
389
-
390
- }
391
-
392
- }
393
-
394
- /**
395
- * search for closest target to test string
396
- * @param {String} test test string
397
- * @param {Array} arr targets to test
398
- * @param {Null|String} flags options
399
- * @returns closest target
400
- */
401
- const diceClosest = ( test, arr, flags = null ) => {
402
-
403
- return findClosest( 'diceCoefficient', test, arr, flags );
404
-
405
- };
406
-
407
- /**
408
- * sort best matches to test string
409
- * @param {String} test test string
410
- * @param {Array} arr targets to test
411
- * @param {Null|String} flags options
412
- * @param {Float} threshold required similarity
413
- * @returns sorted matches
414
- */
415
- const diceMatch = ( test, arr, flags = null, threshold = 0 ) => {
416
-
417
- return bestMatch( 'diceCoefficient', test, arr, flags, threshold );
418
-
419
- };
420
-
421
- /**
422
- * export module functions
423
- */
424
- module.exports = {
425
- levenshtein,
426
- levenshteinDistance,
427
- levenshteinClosest,
428
- levenshteinMatch,
429
- diceCoefficient,
430
- diceClosest,
431
- diceMatch
432
- };