cmpstr 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js DELETED
@@ -1,425 +0,0 @@
1
- /**
2
- * cmpstr
3
- * lightweight npm package to calculate string similarity
4
- *
5
- * @author komed3 (Paul Köhler)
6
- * @version 1.0.2
7
- * @license MIT
8
- */
9
-
10
- 'use strict'
11
-
12
- /**
13
- * basic functions
14
- * @private
15
- */
16
-
17
- /**
18
- * normalize string
19
- * @param {String} str string
20
- * @param {Null|String} flags options
21
- * @returns normalized string
22
- */
23
- const normalize = ( str, flags = null ) => {
24
-
25
- str = str.toString();
26
-
27
- ( flags || '' ).toString().split( '' ).forEach( ( f ) => {
28
-
29
- /**
30
- * normalize options
31
- * i case insensitive
32
- * s non-whitespace
33
- */
34
-
35
- switch( f.toLowerCase() ) {
36
-
37
- case 'i':
38
- str = str.toLowerCase();
39
- break;
40
-
41
- case 's':
42
- str = str.replace( /[^\S]+/g, '' );
43
- break;
44
-
45
- default:
46
- /* do nothing */
47
- break;
48
-
49
- }
50
-
51
- } );
52
-
53
- return str;
54
-
55
- };
56
-
57
- /**
58
- * get bigrams from string
59
- * @param {String} str string
60
- * @returns bigrams
61
- */
62
- const str2bigrams = ( str ) => {
63
-
64
- let bigrams = new Set();
65
-
66
- for( let i = 0; i < str.length - 1; i++ ) {
67
-
68
- bigrams.add(
69
- str.substring( i, i + 2 )
70
- );
71
-
72
- }
73
-
74
- return bigrams;
75
-
76
- };
77
-
78
- /**
79
- * compare strings by given algorithm
80
- * @param {String} algo algorithm to use
81
- * @param {String} a string 1
82
- * @param {String} b string 2
83
- * @param {Null|String} flags options
84
- * @returns similarity
85
- */
86
- const cpmByAlgo = ( algo, a, b, flags ) => {
87
-
88
- switch( algo ) {
89
-
90
- case 'levenshtein':
91
- return levenshtein( a, b, flags );
92
-
93
- case 'diceCoefficient':
94
- return diceCoefficient( a, b, flags );
95
-
96
- default:
97
- return 0;
98
-
99
- }
100
-
101
- };
102
-
103
- /**
104
- * search for closest string
105
- * @param {String} algo algorithm to use
106
- * @param {String} test test string
107
- * @param {Array} arr targets to test
108
- * @param {Null|String} flags options
109
- * @returns closest target
110
- */
111
- const findClosest = ( algo, test, arr, flags ) => {
112
-
113
- let best = -Infinity,
114
- idx = 0,
115
- pct;
116
-
117
- /* search for closest element in arr */
118
-
119
- [ ...arr ].forEach( ( str, i ) => {
120
-
121
- pct = cpmByAlgo( algo, test, str, flags );
122
-
123
- if( pct > best ) {
124
-
125
- /* save closest target */
126
-
127
- best = pct;
128
- idx = i;
129
-
130
- }
131
-
132
- } );
133
-
134
- /* return closest target */
135
-
136
- return arr[ idx ];
137
-
138
- };
139
-
140
- /**
141
- * sort best matches to test string
142
- * @param {String} algo algorithm to use
143
- * @param {String} test test string
144
- * @param {Array} arr targets to test
145
- * @param {Null|String} flags options
146
- * @returns sorted matches
147
- */
148
- const bestMatch = ( algo, test, arr, flags = null ) => {
149
-
150
- let matches = [],
151
- pct;
152
-
153
- /* calculate similarity for each arr items */
154
-
155
- [ ...arr ].forEach( ( str ) => {
156
-
157
- pct = cpmByAlgo( algo, test, str, flags );
158
-
159
- matches.push( {
160
- target: str,
161
- match: pct
162
- } );
163
-
164
- } );
165
-
166
- /* sort by highest similarity */
167
-
168
- let sorted = matches.sort( ( a, b ) => {
169
- return b.match - a.match;
170
- } );
171
-
172
- /* return sorted matches */
173
-
174
- return sorted;
175
-
176
- };
177
-
178
- /**
179
- * similarity calculations
180
- * @public
181
- */
182
-
183
- /**
184
- * calculate levenshtein similarity (in percent)
185
- * @param {String} a string 1
186
- * @param {String} b string 2
187
- * @param {Null|String} flags options
188
- * @returns similarity 0..1
189
- */
190
- const levenshtein = ( a, b, flags = null ) => {
191
-
192
- /* normalize string */
193
-
194
- a = normalize( a, flags );
195
- b = normalize( b, flags );
196
-
197
- if( a == b ) {
198
-
199
- /* both string are similar or empty */
200
-
201
- return 1;
202
-
203
- } else if( a.length < 2 || b.length < 2 ) {
204
-
205
- /* for 0-letter or 1-letter strings */
206
-
207
- return 0;
208
-
209
- } else {
210
-
211
- /* get levenshtein distance */
212
-
213
- let distance = levenshteinDistance( a, b );
214
-
215
- /* return percentage */
216
-
217
- return 1 - (
218
- distance / Math.max(
219
- a.length,
220
- b.length
221
- )
222
- );
223
-
224
- }
225
-
226
- };
227
-
228
- /**
229
- * get levenshtein distance
230
- * @param {String} a string 1
231
- * @param {String} b string 2
232
- * @param {Null|String} flags options
233
- * @returns distance
234
- */
235
- const levenshteinDistance = ( a, b, flags = null ) => {
236
-
237
- /* normalize string */
238
-
239
- a = normalize( a, flags );
240
- b = normalize( b, flags );
241
-
242
- if( a == b ) {
243
-
244
- /* both string are similar or empty */
245
-
246
- return 0;
247
-
248
- } else if( a.length == 0 ) {
249
-
250
- /* empty string 1 */
251
-
252
- return b.length;
253
-
254
- } else if( b.length == 0 ) {
255
-
256
- /* empty string 2 */
257
-
258
- return a.length;
259
-
260
- } else {
261
-
262
- /* create matrix */
263
-
264
- const matrix = [];
265
-
266
- for( let i = 0; i <= a.length; i++ ) {
267
-
268
- const row = [];
269
-
270
- for( let j = 0; j <= b.length; j++ ) {
271
-
272
- row.push( j );
273
-
274
- }
275
-
276
- row[0] = i;
277
-
278
- matrix.push( row );
279
-
280
- }
281
-
282
- /* calculate distance */
283
-
284
- for( let i = 1; i <= a.length; i++ ) {
285
-
286
- for( let j = 1; j <= b.length; j++ ) {
287
-
288
- if( a[ i - 1 ] === b[ j - 1 ] ) {
289
-
290
- matrix[ i ][ j ] = matrix[ i - 1 ][ j - 1 ];
291
-
292
- } else {
293
-
294
- matrix[ i ][ j ] = 1 + Math.min(
295
- matrix[ i ][ j - 1 ],
296
- matrix[ i - 1 ][ j - 1 ],
297
- matrix[ i - 1 ][ j ]
298
- );
299
-
300
- }
301
-
302
- }
303
-
304
- }
305
-
306
- /* return levenshtein distance */
307
-
308
- return matrix[ a.length ][ b.length ];
309
-
310
- }
311
-
312
- };
313
-
314
- /**
315
- * search for closest target to test string
316
- * @param {String} test test string
317
- * @param {Array} arr targets to test
318
- * @param {Null|String} flags options
319
- * @returns closest target
320
- */
321
- const levenshteinClosest = ( test, arr, flags = null ) => {
322
-
323
- return findClosest( 'levenshtein', test, arr, flags );
324
-
325
- };
326
-
327
- /**
328
- * sort best matches to test string
329
- * @param {String} test test string
330
- * @param {Array} arr targets to test
331
- * @param {Null|String} flags options
332
- * @returns sorted matches
333
- */
334
- const levenshteinMatch = ( test, arr, flags = null ) => {
335
-
336
- return bestMatch( 'levenshtein', test, arr, flags );
337
-
338
- };
339
-
340
- /**
341
- * calculate dice coefficient
342
- * @param {String} a string 1
343
- * @param {String} b string 2
344
- * @param {Null|String} flags options
345
- * @returns dice coefficient
346
- */
347
- const diceCoefficient = ( a, b, flags = null ) => {
348
-
349
- /* normalize string */
350
-
351
- a = normalize( a, flags );
352
- b = normalize( b, flags );
353
-
354
- if( a == b ) {
355
-
356
- /* both string are similar or empty */
357
-
358
- return 1;
359
-
360
- } else if( a.length < 2 || b.length < 2 ) {
361
-
362
- /* for 0-letter or 1-letter strings */
363
-
364
- return 0;
365
-
366
- } else {
367
-
368
- /* get bigrams */
369
-
370
- let setA = str2bigrams( a ),
371
- setB = str2bigrams( b );
372
-
373
- /* calculate dice coefficient */
374
-
375
- return (
376
- ( new Set( [ ...setA ].filter( ( x ) => {
377
- return setB.has( x );
378
- } ) ) ).size * 2
379
- ) / (
380
- setA.size +
381
- setB.size
382
- );
383
-
384
- }
385
-
386
- }
387
-
388
- /**
389
- * search for closest target to test string
390
- * @param {String} test test string
391
- * @param {Array} arr targets to test
392
- * @param {Null|String} flags options
393
- * @returns closest target
394
- */
395
- const diceClosest = ( test, arr, flags = null ) => {
396
-
397
- return findClosest( 'diceCoefficient', test, arr, flags );
398
-
399
- };
400
-
401
- /**
402
- * sort best matches to test string
403
- * @param {String} test test string
404
- * @param {Array} arr targets to test
405
- * @param {Null|String} flags options
406
- * @returns sorted matches
407
- */
408
- const diceMatch = ( test, arr, flags = null ) => {
409
-
410
- return bestMatch( 'diceCoefficient', test, arr, flags );
411
-
412
- };
413
-
414
- /**
415
- * export module functions
416
- */
417
- module.exports = {
418
- levenshtein,
419
- levenshteinDistance,
420
- levenshteinClosest,
421
- levenshteinMatch,
422
- diceCoefficient,
423
- diceClosest,
424
- diceMatch
425
- };