cmpstr 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -3
- package/index.js +96 -23
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -24,16 +24,38 @@ Sample of how to use the package in your code:
|
|
|
24
24
|
let str1 = 'kitten';
|
|
25
25
|
let str2 = 'sitting';
|
|
26
26
|
|
|
27
|
+
/**
|
|
28
|
+
* levenshteinDistance
|
|
29
|
+
* expected: 3
|
|
30
|
+
*/
|
|
27
31
|
let distance = cmpstr.levenshteinDistance( str1, str2 );
|
|
28
|
-
// expected 3
|
|
29
32
|
|
|
33
|
+
/**
|
|
34
|
+
* diceCoefficient
|
|
35
|
+
* expected: 0.3636363636363636
|
|
36
|
+
*/
|
|
30
37
|
let dice = cmpstr.diceCoefficient( str1, str2 );
|
|
31
|
-
// expected 0.3636363636363636
|
|
32
38
|
|
|
39
|
+
/**
|
|
40
|
+
* diceClosest
|
|
41
|
+
* expected: bestest
|
|
42
|
+
*/
|
|
33
43
|
let closest = cmpstr.diceClosest( 'best', [
|
|
34
44
|
'better', 'bestest', 'well', 'good'
|
|
35
45
|
] );
|
|
36
|
-
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* levenshteinMatch
|
|
49
|
+
* expected: [
|
|
50
|
+
* { target: 'bestest', match: 0.5714285714285714 },
|
|
51
|
+
* { target: 'better', match: 0.5 },
|
|
52
|
+
* { target: 'well', match: 0.25 },
|
|
53
|
+
* { target: 'good', match: 0 }
|
|
54
|
+
* ]
|
|
55
|
+
*/
|
|
56
|
+
let matches = cmpstr.levenshteinMatch( 'best', [
|
|
57
|
+
'better', 'bestest', 'well', 'good'
|
|
58
|
+
] );
|
|
37
59
|
```
|
|
38
60
|
|
|
39
61
|
## API
|
|
@@ -59,6 +81,10 @@ Returns the match percentage of two strings ``a`` and ``b``. The output value is
|
|
|
59
81
|
|
|
60
82
|
Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array.
|
|
61
83
|
|
|
84
|
+
#### ``levenshteinMatch( str, arr )``
|
|
85
|
+
|
|
86
|
+
Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
|
87
|
+
|
|
62
88
|
### Sørensen-Dice coefficient
|
|
63
89
|
|
|
64
90
|
#### ``diceCoefficient( a, b )``
|
|
@@ -68,3 +94,7 @@ This function evaluates the similarity of two given strings ``a`` and ``b`` as p
|
|
|
68
94
|
#### ``diceClosest( str, arr )``
|
|
69
95
|
|
|
70
96
|
As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well.
|
|
97
|
+
|
|
98
|
+
#### ``diceMatch( str, arr )``
|
|
99
|
+
|
|
100
|
+
Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
package/index.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* lightweight npm package to calculate string similarity
|
|
4
4
|
*
|
|
5
5
|
* @author komed3 (Paul Köhler)
|
|
6
|
-
* @version 1.0.
|
|
6
|
+
* @version 1.0.1
|
|
7
7
|
* @license MIT
|
|
8
8
|
*/
|
|
9
9
|
|
|
@@ -30,7 +30,7 @@ const normalize = ( str ) => {
|
|
|
30
30
|
* @param {String} str string
|
|
31
31
|
* @returns bigrams
|
|
32
32
|
*/
|
|
33
|
-
const
|
|
33
|
+
const str2bigrams = ( str ) => {
|
|
34
34
|
|
|
35
35
|
let bigrams = new Set();
|
|
36
36
|
|
|
@@ -46,11 +46,35 @@ const bbigrams = ( str ) => {
|
|
|
46
46
|
|
|
47
47
|
};
|
|
48
48
|
|
|
49
|
+
/**
|
|
50
|
+
*
|
|
51
|
+
* @param {String} algo algorithm to use
|
|
52
|
+
* @param {String} a string 1
|
|
53
|
+
* @param {String} b string 2
|
|
54
|
+
* @returns similarity
|
|
55
|
+
*/
|
|
56
|
+
const cpmByAlgo = ( algo, a, b ) => {
|
|
57
|
+
|
|
58
|
+
switch( algo ) {
|
|
59
|
+
|
|
60
|
+
case 'levenshtein':
|
|
61
|
+
return levenshtein( a, b );
|
|
62
|
+
|
|
63
|
+
case 'diceCoefficient':
|
|
64
|
+
return diceCoefficient( a, b );
|
|
65
|
+
|
|
66
|
+
default:
|
|
67
|
+
return 0;
|
|
68
|
+
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
};
|
|
72
|
+
|
|
49
73
|
/**
|
|
50
74
|
* search for closest string
|
|
51
75
|
* @param {String} algo algorithm to use
|
|
52
76
|
* @param {String} test test string
|
|
53
|
-
* @param
|
|
77
|
+
* @param {Array} arr targets to test
|
|
54
78
|
* @returns closest target
|
|
55
79
|
*/
|
|
56
80
|
const findClosest = ( algo, test, arr ) => {
|
|
@@ -63,21 +87,7 @@ const findClosest = ( algo, test, arr ) => {
|
|
|
63
87
|
|
|
64
88
|
arr.forEach( ( str, i ) => {
|
|
65
89
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
case 'levenshtein':
|
|
69
|
-
pct = levenshtein( test, str );
|
|
70
|
-
break;
|
|
71
|
-
|
|
72
|
-
case 'diceCoefficient':
|
|
73
|
-
pct = diceCoefficient( test, str );
|
|
74
|
-
break;
|
|
75
|
-
|
|
76
|
-
default:
|
|
77
|
-
pct = 0;
|
|
78
|
-
break;
|
|
79
|
-
|
|
80
|
-
}
|
|
90
|
+
pct = cpmByAlgo( algo, test, str );
|
|
81
91
|
|
|
82
92
|
if( pct > best ) {
|
|
83
93
|
|
|
@@ -96,6 +106,43 @@ const findClosest = ( algo, test, arr ) => {
|
|
|
96
106
|
|
|
97
107
|
};
|
|
98
108
|
|
|
109
|
+
/**
|
|
110
|
+
* sort best matches to test string
|
|
111
|
+
* @param {String} algo algorithm to use
|
|
112
|
+
* @param {String} test test string
|
|
113
|
+
* @param {Array} arr targets to test
|
|
114
|
+
* @returns sorted matches
|
|
115
|
+
*/
|
|
116
|
+
const bestMatch = ( algo, test, arr ) => {
|
|
117
|
+
|
|
118
|
+
let matches = [],
|
|
119
|
+
pct;
|
|
120
|
+
|
|
121
|
+
/* calculate similarity for each arr items */
|
|
122
|
+
|
|
123
|
+
arr.forEach( ( str ) => {
|
|
124
|
+
|
|
125
|
+
pct = cpmByAlgo( algo, test, str );
|
|
126
|
+
|
|
127
|
+
matches.push( {
|
|
128
|
+
target: str,
|
|
129
|
+
match: pct
|
|
130
|
+
} );
|
|
131
|
+
|
|
132
|
+
} );
|
|
133
|
+
|
|
134
|
+
/* sort by highest similarity */
|
|
135
|
+
|
|
136
|
+
let sorted = matches.sort( ( a, b ) => {
|
|
137
|
+
return b.match - a.match;
|
|
138
|
+
} );
|
|
139
|
+
|
|
140
|
+
/* return sorted matches */
|
|
141
|
+
|
|
142
|
+
return sorted;
|
|
143
|
+
|
|
144
|
+
};
|
|
145
|
+
|
|
99
146
|
/**
|
|
100
147
|
* similarity calculations
|
|
101
148
|
* @public
|
|
@@ -233,7 +280,7 @@ const levenshteinDistance = ( a, b ) => {
|
|
|
233
280
|
/**
|
|
234
281
|
* search for closest target to test string
|
|
235
282
|
* @param {String} test test string
|
|
236
|
-
* @param
|
|
283
|
+
* @param {Array} arr targets to test
|
|
237
284
|
* @returns closest target
|
|
238
285
|
*/
|
|
239
286
|
const levenshteinClosest = ( test, arr ) => {
|
|
@@ -242,6 +289,18 @@ const levenshteinClosest = ( test, arr ) => {
|
|
|
242
289
|
|
|
243
290
|
};
|
|
244
291
|
|
|
292
|
+
/**
|
|
293
|
+
* sort best matches to test string
|
|
294
|
+
* @param {String} test test string
|
|
295
|
+
* @param {Array} arr targets to test
|
|
296
|
+
* @returns sorted matches
|
|
297
|
+
*/
|
|
298
|
+
const levenshteinMatch = ( test, arr ) => {
|
|
299
|
+
|
|
300
|
+
return bestMatch( 'levenshtein', test, arr );
|
|
301
|
+
|
|
302
|
+
};
|
|
303
|
+
|
|
245
304
|
/**
|
|
246
305
|
* calculate dice coefficient
|
|
247
306
|
* @param {String} a string 1
|
|
@@ -271,8 +330,8 @@ const diceCoefficient = ( a, b ) => {
|
|
|
271
330
|
|
|
272
331
|
/* get bigrams */
|
|
273
332
|
|
|
274
|
-
let setA =
|
|
275
|
-
setB =
|
|
333
|
+
let setA = str2bigrams( a ),
|
|
334
|
+
setB = str2bigrams( b );
|
|
276
335
|
|
|
277
336
|
/* calculate dice coefficient */
|
|
278
337
|
|
|
@@ -292,7 +351,7 @@ const diceCoefficient = ( a, b ) => {
|
|
|
292
351
|
/**
|
|
293
352
|
* search for closest target to test string
|
|
294
353
|
* @param {String} test test string
|
|
295
|
-
* @param
|
|
354
|
+
* @param {Array} arr targets to test
|
|
296
355
|
* @returns closest target
|
|
297
356
|
*/
|
|
298
357
|
const diceClosest = ( test, arr ) => {
|
|
@@ -301,6 +360,18 @@ const diceClosest = ( test, arr ) => {
|
|
|
301
360
|
|
|
302
361
|
};
|
|
303
362
|
|
|
363
|
+
/**
|
|
364
|
+
* sort best matches to test string
|
|
365
|
+
* @param {String} test test string
|
|
366
|
+
* @param {Array} arr targets to test
|
|
367
|
+
* @returns sorted matches
|
|
368
|
+
*/
|
|
369
|
+
const diceMatch = ( test, arr ) => {
|
|
370
|
+
|
|
371
|
+
return bestMatch( 'diceCoefficient', test, arr );
|
|
372
|
+
|
|
373
|
+
};
|
|
374
|
+
|
|
304
375
|
/**
|
|
305
376
|
* export module functions
|
|
306
377
|
*/
|
|
@@ -308,6 +379,8 @@ module.exports = {
|
|
|
308
379
|
levenshtein,
|
|
309
380
|
levenshteinDistance,
|
|
310
381
|
levenshteinClosest,
|
|
382
|
+
levenshteinMatch,
|
|
311
383
|
diceCoefficient,
|
|
312
|
-
diceClosest
|
|
384
|
+
diceClosest,
|
|
385
|
+
diceMatch
|
|
313
386
|
};
|