cmpstr 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +62 -8
  2. package/index.js +152 -40
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -24,16 +24,38 @@ Sample of how to use the package in your code:
24
24
  let str1 = 'kitten';
25
25
  let str2 = 'sitting';
26
26
 
27
+ /**
28
+ * levenshteinDistance
29
+ * expected: 3
30
+ */
27
31
  let distance = cmpstr.levenshteinDistance( str1, str2 );
28
- // expected 3
29
32
 
33
+ /**
34
+ * diceCoefficient
35
+ * expected: 0.3636363636363636
36
+ */
30
37
  let dice = cmpstr.diceCoefficient( str1, str2 );
31
- // expected 0.3636363636363636
32
38
 
39
+ /**
40
+ * diceClosest
41
+ * expected: bestest
42
+ */
33
43
  let closest = cmpstr.diceClosest( 'best', [
34
44
  'better', 'bestest', 'well', 'good'
35
45
  ] );
36
- // expected bestest
46
+
47
+ /**
48
+ * levenshteinMatch
49
+ * expected: [
50
+ * { target: 'bestest', match: 0.5714285714285714 },
51
+ * { target: 'better', match: 0.5 },
52
+ * { target: 'well', match: 0.25 },
53
+ * { target: 'good', match: 0 }
54
+ * ]
55
+ */
56
+ let matches = cmpstr.levenshteinMatch( 'best', [
57
+ 'better', 'bestest', 'well', 'good'
58
+ ] );
37
59
  ```
38
60
 
39
61
  ## API
@@ -47,24 +69,56 @@ Learn more about both by visiting these links:
47
69
 
48
70
  ### Levenshtein distance
49
71
 
50
- #### ``levenshteinDistance( a, b )``
72
+ #### ``levenshteinDistance( a, b [, flags = null ] )``
51
73
 
52
74
  Calculates the difference between two strings ``a`` and ``b`` and returns the Levenshtein distance as an integer value.
53
75
 
54
- #### ``levenshtein( a, b )``
76
+ #### ``levenshtein( a, b [, flags = null ] )``
55
77
 
56
78
  Returns the match percentage of two strings ``a`` and ``b``. The output value is in the range ``0..1`` as a floating point number.
57
79
 
58
- #### ``levenshteinClosest( str, arr )``
80
+ #### ``levenshteinClosest( str, arr [, flags = null ] )``
59
81
 
60
82
  Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array.
61
83
 
84
+ #### ``levenshteinMatch( str, arr [, flags = null ] )``
85
+
86
+ Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order.
87
+
62
88
  ### Sørensen-Dice coefficient
63
89
 
64
- #### ``diceCoefficient( a, b )``
90
+ #### ``diceCoefficient( a, b [, flags = null ] )``
65
91
 
66
92
  This function evaluates the similarity of two given strings ``a`` and ``b`` as percentage value according to the Sørensen-Dice coefficient and returns the result as floating point number.
67
93
 
68
- #### ``diceClosest( str, arr )``
94
+ #### ``diceClosest( str, arr [, flags = null ] )``
69
95
 
70
96
  As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well.
97
+
98
+ #### ``diceMatch( str, arr [, flags = null ] )``
99
+
100
+ Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
101
+
102
+ ### Flags
103
+
104
+ Each method can be passed the ``flags`` options listed below:
105
+
106
+ | Flag | Option |
107
+ | ----- | ------------------------------ |
108
+ | ``i`` | case insensitive |
109
+ | ``s`` | non-whitespace characters only |
110
+
111
+ ## Patch notes
112
+
113
+ ### 1.0.2
114
+
115
+ * Add normalize options ``i`` and ``s``
116
+ * Minor fixes
117
+
118
+ ### 1.0.1
119
+
120
+ * Minor fixes
121
+
122
+ ### 1.0.0
123
+
124
+ * Initial release
package/index.js CHANGED
@@ -3,7 +3,7 @@
3
3
  * lightweight npm package to calculate string similarity
4
4
  *
5
5
  * @author komed3 (Paul Köhler)
6
- * @version 1.0.0
6
+ * @version 1.0.2
7
7
  * @license MIT
8
8
  */
9
9
 
@@ -17,11 +17,40 @@
17
17
  /**
18
18
  * normalize string
19
19
  * @param {String} str string
20
+ * @param {Null|String} flags options
20
21
  * @returns normalized string
21
22
  */
22
- const normalize = ( str ) => {
23
+ const normalize = ( str, flags = null ) => {
23
24
 
24
- return str.toString();
25
+ str = str.toString();
26
+
27
+ ( flags || '' ).toString().split( '' ).forEach( ( f ) => {
28
+
29
+ /**
30
+ * normalize options
31
+ * i case insensitive
32
+ * s non-whitespace
33
+ */
34
+
35
+ switch( f.toLowerCase() ) {
36
+
37
+ case 'i':
38
+ str = str.toLowerCase();
39
+ break;
40
+
41
+ case 's':
42
+ str = str.replace( /[^\S]+/g, '' );
43
+ break;
44
+
45
+ default:
46
+ /* do nothing */
47
+ break;
48
+
49
+ }
50
+
51
+ } );
52
+
53
+ return str;
25
54
 
26
55
  };
27
56
 
@@ -30,7 +59,7 @@ const normalize = ( str ) => {
30
59
  * @param {String} str string
31
60
  * @returns bigrams
32
61
  */
33
- const bbigrams = ( str ) => {
62
+ const str2bigrams = ( str ) => {
34
63
 
35
64
  let bigrams = new Set();
36
65
 
@@ -46,14 +75,40 @@ const bbigrams = ( str ) => {
46
75
 
47
76
  };
48
77
 
78
+ /**
79
+ * compare strings by given algorithm
80
+ * @param {String} algo algorithm to use
81
+ * @param {String} a string 1
82
+ * @param {String} b string 2
83
+ * @param {Null|String} flags options
84
+ * @returns similarity
85
+ */
86
+ const cpmByAlgo = ( algo, a, b, flags ) => {
87
+
88
+ switch( algo ) {
89
+
90
+ case 'levenshtein':
91
+ return levenshtein( a, b, flags );
92
+
93
+ case 'diceCoefficient':
94
+ return diceCoefficient( a, b, flags );
95
+
96
+ default:
97
+ return 0;
98
+
99
+ }
100
+
101
+ };
102
+
49
103
  /**
50
104
  * search for closest string
51
105
  * @param {String} algo algorithm to use
52
106
  * @param {String} test test string
53
- * @param {Array} arr targets to test
107
+ * @param {Array} arr targets to test
108
+ * @param {Null|String} flags options
54
109
  * @returns closest target
55
110
  */
56
- const findClosest = ( algo, test, arr ) => {
111
+ const findClosest = ( algo, test, arr, flags ) => {
57
112
 
58
113
  let best = -Infinity,
59
114
  idx = 0,
@@ -61,23 +116,9 @@ const findClosest = ( algo, test, arr ) => {
61
116
 
62
117
  /* search for closest element in arr */
63
118
 
64
- arr.forEach( ( str, i ) => {
65
-
66
- switch( algo ) {
67
-
68
- case 'levenshtein':
69
- pct = levenshtein( test, str );
70
- break;
71
-
72
- case 'diceCoefficient':
73
- pct = diceCoefficient( test, str );
74
- break;
75
-
76
- default:
77
- pct = 0;
78
- break;
119
+ [ ...arr ].forEach( ( str, i ) => {
79
120
 
80
- }
121
+ pct = cpmByAlgo( algo, test, str, flags );
81
122
 
82
123
  if( pct > best ) {
83
124
 
@@ -96,6 +137,44 @@ const findClosest = ( algo, test, arr ) => {
96
137
 
97
138
  };
98
139
 
140
+ /**
141
+ * sort best matches to test string
142
+ * @param {String} algo algorithm to use
143
+ * @param {String} test test string
144
+ * @param {Array} arr targets to test
145
+ * @param {Null|String} flags options
146
+ * @returns sorted matches
147
+ */
148
+ const bestMatch = ( algo, test, arr, flags = null ) => {
149
+
150
+ let matches = [],
151
+ pct;
152
+
153
+ /* calculate similarity for each arr items */
154
+
155
+ [ ...arr ].forEach( ( str ) => {
156
+
157
+ pct = cpmByAlgo( algo, test, str, flags );
158
+
159
+ matches.push( {
160
+ target: str,
161
+ match: pct
162
+ } );
163
+
164
+ } );
165
+
166
+ /* sort by highest similarity */
167
+
168
+ let sorted = matches.sort( ( a, b ) => {
169
+ return b.match - a.match;
170
+ } );
171
+
172
+ /* return sorted matches */
173
+
174
+ return sorted;
175
+
176
+ };
177
+
99
178
  /**
100
179
  * similarity calculations
101
180
  * @public
@@ -105,14 +184,15 @@ const findClosest = ( algo, test, arr ) => {
105
184
  * calculate levenshtein similarity (in percent)
106
185
  * @param {String} a string 1
107
186
  * @param {String} b string 2
187
+ * @param {Null|String} flags options
108
188
  * @returns similarity 0..1
109
189
  */
110
- const levenshtein = ( a, b ) => {
190
+ const levenshtein = ( a, b, flags = null ) => {
111
191
 
112
192
  /* normalize string */
113
193
 
114
- a = normalize( a );
115
- b = normalize( b );
194
+ a = normalize( a, flags );
195
+ b = normalize( b, flags );
116
196
 
117
197
  if( a == b ) {
118
198
 
@@ -149,14 +229,15 @@ const levenshtein = ( a, b ) => {
149
229
  * get levenshtein distance
150
230
  * @param {String} a string 1
151
231
  * @param {String} b string 2
232
+ * @param {Null|String} flags options
152
233
  * @returns distance
153
234
  */
154
- const levenshteinDistance = ( a, b ) => {
235
+ const levenshteinDistance = ( a, b, flags = null ) => {
155
236
 
156
237
  /* normalize string */
157
238
 
158
- a = normalize( a );
159
- b = normalize( b );
239
+ a = normalize( a, flags );
240
+ b = normalize( b, flags );
160
241
 
161
242
  if( a == b ) {
162
243
 
@@ -233,12 +314,26 @@ const levenshteinDistance = ( a, b ) => {
233
314
  /**
234
315
  * search for closest target to test string
235
316
  * @param {String} test test string
236
- * @param {Array} arr targets to test
317
+ * @param {Array} arr targets to test
318
+ * @param {Null|String} flags options
237
319
  * @returns closest target
238
320
  */
239
- const levenshteinClosest = ( test, arr ) => {
321
+ const levenshteinClosest = ( test, arr, flags = null ) => {
322
+
323
+ return findClosest( 'levenshtein', test, arr, flags );
324
+
325
+ };
326
+
327
+ /**
328
+ * sort best matches to test string
329
+ * @param {String} test test string
330
+ * @param {Array} arr targets to test
331
+ * @param {Null|String} flags options
332
+ * @returns sorted matches
333
+ */
334
+ const levenshteinMatch = ( test, arr, flags = null ) => {
240
335
 
241
- return findClosest( 'levenshtein', test, arr );
336
+ return bestMatch( 'levenshtein', test, arr, flags );
242
337
 
243
338
  };
244
339
 
@@ -246,14 +341,15 @@ const levenshteinClosest = ( test, arr ) => {
246
341
  * calculate dice coefficient
247
342
  * @param {String} a string 1
248
343
  * @param {String} b string 2
344
+ * @param {Null|String} flags options
249
345
  * @returns dice coefficient
250
346
  */
251
- const diceCoefficient = ( a, b ) => {
347
+ const diceCoefficient = ( a, b, flags = null ) => {
252
348
 
253
349
  /* normalize string */
254
350
 
255
- a = normalize( a );
256
- b = normalize( b );
351
+ a = normalize( a, flags );
352
+ b = normalize( b, flags );
257
353
 
258
354
  if( a == b ) {
259
355
 
@@ -271,8 +367,8 @@ const diceCoefficient = ( a, b ) => {
271
367
 
272
368
  /* get bigrams */
273
369
 
274
- let setA = bbigrams( a ),
275
- setB = bbigrams( b );
370
+ let setA = str2bigrams( a ),
371
+ setB = str2bigrams( b );
276
372
 
277
373
  /* calculate dice coefficient */
278
374
 
@@ -292,12 +388,26 @@ const diceCoefficient = ( a, b ) => {
292
388
  /**
293
389
  * search for closest target to test string
294
390
  * @param {String} test test string
295
- * @param {Array} arr targets to test
391
+ * @param {Array} arr targets to test
392
+ * @param {Null|String} flags options
296
393
  * @returns closest target
297
394
  */
298
- const diceClosest = ( test, arr ) => {
395
+ const diceClosest = ( test, arr, flags = null ) => {
396
+
397
+ return findClosest( 'diceCoefficient', test, arr, flags );
398
+
399
+ };
400
+
401
+ /**
402
+ * sort best matches to test string
403
+ * @param {String} test test string
404
+ * @param {Array} arr targets to test
405
+ * @param {Null|String} flags options
406
+ * @returns sorted matches
407
+ */
408
+ const diceMatch = ( test, arr, flags = null ) => {
299
409
 
300
- return findClosest( 'diceCoefficient', test, arr );
410
+ return bestMatch( 'diceCoefficient', test, arr, flags );
301
411
 
302
412
  };
303
413
 
@@ -308,6 +418,8 @@ module.exports = {
308
418
  levenshtein,
309
419
  levenshteinDistance,
310
420
  levenshteinClosest,
421
+ levenshteinMatch,
311
422
  diceCoefficient,
312
- diceClosest
423
+ diceClosest,
424
+ diceMatch
313
425
  };
package/package.json CHANGED
@@ -7,7 +7,7 @@
7
7
  "url" : "https://komed3.de"
8
8
  },
9
9
  "homepage": "https://github.com/komed3/cmpstr#readme",
10
- "version": "1.0.0",
10
+ "version": "1.0.2",
11
11
  "license": "MIT",
12
12
  "keywords": [
13
13
  "string",