cmpstr 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -8
- package/index.js +69 -30
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -69,32 +69,56 @@ Learn more about both by visiting these links:
|
|
|
69
69
|
|
|
70
70
|
### Levenshtein distance
|
|
71
71
|
|
|
72
|
-
#### ``levenshteinDistance( a, b )``
|
|
72
|
+
#### ``levenshteinDistance( a, b [, flags = null ] )``
|
|
73
73
|
|
|
74
74
|
Calculates the difference between two strings ``a`` and ``b`` and returns the Levenshtein distance as an integer value.
|
|
75
75
|
|
|
76
|
-
#### ``levenshtein( a, b )``
|
|
76
|
+
#### ``levenshtein( a, b [, flags = null ] )``
|
|
77
77
|
|
|
78
78
|
Returns the match percentage of two strings ``a`` and ``b``. The output value is in the range ``0..1`` as a floating point number.
|
|
79
79
|
|
|
80
|
-
#### ``levenshteinClosest( str, arr )``
|
|
80
|
+
#### ``levenshteinClosest( str, arr [, flags = null ] )``
|
|
81
81
|
|
|
82
82
|
Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array.
|
|
83
83
|
|
|
84
|
-
#### ``levenshteinMatch( str, arr )``
|
|
84
|
+
#### ``levenshteinMatch( str, arr [, flags = null ] )``
|
|
85
85
|
|
|
86
86
|
Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
|
87
87
|
|
|
88
88
|
### Sørensen-Dice coefficient
|
|
89
89
|
|
|
90
|
-
#### ``diceCoefficient( a, b )``
|
|
90
|
+
#### ``diceCoefficient( a, b [, flags = null ] )``
|
|
91
91
|
|
|
92
92
|
This function evaluates the similarity of two given strings ``a`` and ``b`` as percentage value according to the Sørensen-Dice coefficient and returns the result as floating point number.
|
|
93
93
|
|
|
94
|
-
#### ``diceClosest( str, arr )``
|
|
94
|
+
#### ``diceClosest( str, arr [, flags = null ] )``
|
|
95
95
|
|
|
96
96
|
As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well.
|
|
97
97
|
|
|
98
|
-
#### ``diceMatch( str, arr )``
|
|
98
|
+
#### ``diceMatch( str, arr [, flags = null ] )``
|
|
99
99
|
|
|
100
|
-
Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
|
100
|
+
Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
|
101
|
+
|
|
102
|
+
### Flags
|
|
103
|
+
|
|
104
|
+
Each method can be passed the ``flags`` options listed below:
|
|
105
|
+
|
|
106
|
+
| Flag | Option |
|
|
107
|
+
| ----- | ------------------------------ |
|
|
108
|
+
| ``i`` | case insensitive |
|
|
109
|
+
| ``s`` | non-whitespace characters only |
|
|
110
|
+
|
|
111
|
+
## Patch notes
|
|
112
|
+
|
|
113
|
+
### 1.0.2
|
|
114
|
+
|
|
115
|
+
* Add normalize options ``i`` and ``s``
|
|
116
|
+
* Minor fixes
|
|
117
|
+
|
|
118
|
+
### 1.0.1
|
|
119
|
+
|
|
120
|
+
* Minor fixes
|
|
121
|
+
|
|
122
|
+
### 1.0.0
|
|
123
|
+
|
|
124
|
+
* Initial release
|
package/index.js
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* lightweight npm package to calculate string similarity
|
|
4
4
|
*
|
|
5
5
|
* @author komed3 (Paul Köhler)
|
|
6
|
-
* @version 1.0.
|
|
6
|
+
* @version 1.0.2
|
|
7
7
|
* @license MIT
|
|
8
8
|
*/
|
|
9
9
|
|
|
@@ -17,11 +17,40 @@
|
|
|
17
17
|
/**
|
|
18
18
|
* normalize string
|
|
19
19
|
* @param {String} str string
|
|
20
|
+
* @param {Null|String} flags options
|
|
20
21
|
* @returns normalized string
|
|
21
22
|
*/
|
|
22
|
-
const normalize = ( str ) => {
|
|
23
|
+
const normalize = ( str, flags = null ) => {
|
|
23
24
|
|
|
24
|
-
|
|
25
|
+
str = str.toString();
|
|
26
|
+
|
|
27
|
+
( flags || '' ).toString().split( '' ).forEach( ( f ) => {
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* normalize options
|
|
31
|
+
* i case insensitive
|
|
32
|
+
* s non-whitespace
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
switch( f.toLowerCase() ) {
|
|
36
|
+
|
|
37
|
+
case 'i':
|
|
38
|
+
str = str.toLowerCase();
|
|
39
|
+
break;
|
|
40
|
+
|
|
41
|
+
case 's':
|
|
42
|
+
str = str.replace( /[^\S]+/g, '' );
|
|
43
|
+
break;
|
|
44
|
+
|
|
45
|
+
default:
|
|
46
|
+
/* do nothing */
|
|
47
|
+
break;
|
|
48
|
+
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
} );
|
|
52
|
+
|
|
53
|
+
return str;
|
|
25
54
|
|
|
26
55
|
};
|
|
27
56
|
|
|
@@ -47,21 +76,22 @@ const str2bigrams = ( str ) => {
|
|
|
47
76
|
};
|
|
48
77
|
|
|
49
78
|
/**
|
|
50
|
-
*
|
|
79
|
+
* compare strings by given algorithm
|
|
51
80
|
* @param {String} algo algorithm to use
|
|
52
81
|
* @param {String} a string 1
|
|
53
82
|
* @param {String} b string 2
|
|
83
|
+
* @param {Null|String} flags options
|
|
54
84
|
* @returns similarity
|
|
55
85
|
*/
|
|
56
|
-
const cpmByAlgo = ( algo, a, b ) => {
|
|
86
|
+
const cpmByAlgo = ( algo, a, b, flags ) => {
|
|
57
87
|
|
|
58
88
|
switch( algo ) {
|
|
59
89
|
|
|
60
90
|
case 'levenshtein':
|
|
61
|
-
return levenshtein( a, b );
|
|
91
|
+
return levenshtein( a, b, flags );
|
|
62
92
|
|
|
63
93
|
case 'diceCoefficient':
|
|
64
|
-
return diceCoefficient( a, b );
|
|
94
|
+
return diceCoefficient( a, b, flags );
|
|
65
95
|
|
|
66
96
|
default:
|
|
67
97
|
return 0;
|
|
@@ -75,9 +105,10 @@ const cpmByAlgo = ( algo, a, b ) => {
|
|
|
75
105
|
* @param {String} algo algorithm to use
|
|
76
106
|
* @param {String} test test string
|
|
77
107
|
* @param {Array} arr targets to test
|
|
108
|
+
* @param {Null|String} flags options
|
|
78
109
|
* @returns closest target
|
|
79
110
|
*/
|
|
80
|
-
const findClosest = ( algo, test, arr ) => {
|
|
111
|
+
const findClosest = ( algo, test, arr, flags ) => {
|
|
81
112
|
|
|
82
113
|
let best = -Infinity,
|
|
83
114
|
idx = 0,
|
|
@@ -85,9 +116,9 @@ const findClosest = ( algo, test, arr ) => {
|
|
|
85
116
|
|
|
86
117
|
/* search for closest element in arr */
|
|
87
118
|
|
|
88
|
-
arr.forEach( ( str, i ) => {
|
|
119
|
+
[ ...arr ].forEach( ( str, i ) => {
|
|
89
120
|
|
|
90
|
-
pct = cpmByAlgo( algo, test, str );
|
|
121
|
+
pct = cpmByAlgo( algo, test, str, flags );
|
|
91
122
|
|
|
92
123
|
if( pct > best ) {
|
|
93
124
|
|
|
@@ -111,18 +142,19 @@ const findClosest = ( algo, test, arr ) => {
|
|
|
111
142
|
* @param {String} algo algorithm to use
|
|
112
143
|
* @param {String} test test string
|
|
113
144
|
* @param {Array} arr targets to test
|
|
145
|
+
* @param {Null|String} flags options
|
|
114
146
|
* @returns sorted matches
|
|
115
147
|
*/
|
|
116
|
-
const bestMatch = ( algo, test, arr ) => {
|
|
148
|
+
const bestMatch = ( algo, test, arr, flags = null ) => {
|
|
117
149
|
|
|
118
150
|
let matches = [],
|
|
119
151
|
pct;
|
|
120
152
|
|
|
121
153
|
/* calculate similarity for each arr items */
|
|
122
154
|
|
|
123
|
-
arr.forEach( ( str ) => {
|
|
155
|
+
[ ...arr ].forEach( ( str ) => {
|
|
124
156
|
|
|
125
|
-
pct = cpmByAlgo( algo, test, str );
|
|
157
|
+
pct = cpmByAlgo( algo, test, str, flags );
|
|
126
158
|
|
|
127
159
|
matches.push( {
|
|
128
160
|
target: str,
|
|
@@ -152,14 +184,15 @@ const bestMatch = ( algo, test, arr ) => {
|
|
|
152
184
|
* calculate levenshtein similarity (in percent)
|
|
153
185
|
* @param {String} a string 1
|
|
154
186
|
* @param {String} b string 2
|
|
187
|
+
* @param {Null|String} flags options
|
|
155
188
|
* @returns similarity 0..1
|
|
156
189
|
*/
|
|
157
|
-
const levenshtein = ( a, b ) => {
|
|
190
|
+
const levenshtein = ( a, b, flags = null ) => {
|
|
158
191
|
|
|
159
192
|
/* normalize string */
|
|
160
193
|
|
|
161
|
-
a = normalize( a );
|
|
162
|
-
b = normalize( b );
|
|
194
|
+
a = normalize( a, flags );
|
|
195
|
+
b = normalize( b, flags );
|
|
163
196
|
|
|
164
197
|
if( a == b ) {
|
|
165
198
|
|
|
@@ -196,14 +229,15 @@ const levenshtein = ( a, b ) => {
|
|
|
196
229
|
* get levenshtein distance
|
|
197
230
|
* @param {String} a string 1
|
|
198
231
|
* @param {String} b string 2
|
|
232
|
+
* @param {Null|String} flags options
|
|
199
233
|
* @returns distance
|
|
200
234
|
*/
|
|
201
|
-
const levenshteinDistance = ( a, b ) => {
|
|
235
|
+
const levenshteinDistance = ( a, b, flags = null ) => {
|
|
202
236
|
|
|
203
237
|
/* normalize string */
|
|
204
238
|
|
|
205
|
-
a = normalize( a );
|
|
206
|
-
b = normalize( b );
|
|
239
|
+
a = normalize( a, flags );
|
|
240
|
+
b = normalize( b, flags );
|
|
207
241
|
|
|
208
242
|
if( a == b ) {
|
|
209
243
|
|
|
@@ -281,11 +315,12 @@ const levenshteinDistance = ( a, b ) => {
|
|
|
281
315
|
* search for closest target to test string
|
|
282
316
|
* @param {String} test test string
|
|
283
317
|
* @param {Array} arr targets to test
|
|
318
|
+
* @param {Null|String} flags options
|
|
284
319
|
* @returns closest target
|
|
285
320
|
*/
|
|
286
|
-
const levenshteinClosest = ( test, arr ) => {
|
|
321
|
+
const levenshteinClosest = ( test, arr, flags = null ) => {
|
|
287
322
|
|
|
288
|
-
return findClosest( 'levenshtein', test, arr );
|
|
323
|
+
return findClosest( 'levenshtein', test, arr, flags );
|
|
289
324
|
|
|
290
325
|
};
|
|
291
326
|
|
|
@@ -293,11 +328,12 @@ const levenshteinClosest = ( test, arr ) => {
|
|
|
293
328
|
* sort best matches to test string
|
|
294
329
|
* @param {String} test test string
|
|
295
330
|
* @param {Array} arr targets to test
|
|
331
|
+
* @param {Null|String} flags options
|
|
296
332
|
* @returns sorted matches
|
|
297
333
|
*/
|
|
298
|
-
const levenshteinMatch = ( test, arr ) => {
|
|
334
|
+
const levenshteinMatch = ( test, arr, flags = null ) => {
|
|
299
335
|
|
|
300
|
-
return bestMatch( 'levenshtein', test, arr );
|
|
336
|
+
return bestMatch( 'levenshtein', test, arr, flags );
|
|
301
337
|
|
|
302
338
|
};
|
|
303
339
|
|
|
@@ -305,14 +341,15 @@ const levenshteinMatch = ( test, arr ) => {
|
|
|
305
341
|
* calculate dice coefficient
|
|
306
342
|
* @param {String} a string 1
|
|
307
343
|
* @param {String} b string 2
|
|
344
|
+
* @param {Null|String} flags options
|
|
308
345
|
* @returns dice coefficient
|
|
309
346
|
*/
|
|
310
|
-
const diceCoefficient = ( a, b ) => {
|
|
347
|
+
const diceCoefficient = ( a, b, flags = null ) => {
|
|
311
348
|
|
|
312
349
|
/* normalize string */
|
|
313
350
|
|
|
314
|
-
a = normalize( a );
|
|
315
|
-
b = normalize( b );
|
|
351
|
+
a = normalize( a, flags );
|
|
352
|
+
b = normalize( b, flags );
|
|
316
353
|
|
|
317
354
|
if( a == b ) {
|
|
318
355
|
|
|
@@ -352,11 +389,12 @@ const diceCoefficient = ( a, b ) => {
|
|
|
352
389
|
* search for closest target to test string
|
|
353
390
|
* @param {String} test test string
|
|
354
391
|
* @param {Array} arr targets to test
|
|
392
|
+
* @param {Null|String} flags options
|
|
355
393
|
* @returns closest target
|
|
356
394
|
*/
|
|
357
|
-
const diceClosest = ( test, arr ) => {
|
|
395
|
+
const diceClosest = ( test, arr, flags = null ) => {
|
|
358
396
|
|
|
359
|
-
return findClosest( 'diceCoefficient', test, arr );
|
|
397
|
+
return findClosest( 'diceCoefficient', test, arr, flags );
|
|
360
398
|
|
|
361
399
|
};
|
|
362
400
|
|
|
@@ -364,11 +402,12 @@ const diceClosest = ( test, arr ) => {
|
|
|
364
402
|
* sort best matches to test string
|
|
365
403
|
* @param {String} test test string
|
|
366
404
|
* @param {Array} arr targets to test
|
|
405
|
+
* @param {Null|String} flags options
|
|
367
406
|
* @returns sorted matches
|
|
368
407
|
*/
|
|
369
|
-
const diceMatch = ( test, arr ) => {
|
|
408
|
+
const diceMatch = ( test, arr, flags = null ) => {
|
|
370
409
|
|
|
371
|
-
return bestMatch( 'diceCoefficient', test, arr );
|
|
410
|
+
return bestMatch( 'diceCoefficient', test, arr, flags );
|
|
372
411
|
|
|
373
412
|
};
|
|
374
413
|
|