cmpstr 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +33 -3
  2. package/index.js +96 -23
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -24,16 +24,38 @@ Sample of how to use the package in your code:
24
24
  let str1 = 'kitten';
25
25
  let str2 = 'sitting';
26
26
 
27
+ /**
28
+ * levenshteinDistance
29
+ * expected: 3
30
+ */
27
31
  let distance = cmpstr.levenshteinDistance( str1, str2 );
28
- // expected 3
29
32
 
33
+ /**
34
+ * diceCoefficient
35
+ * expected: 0.3636363636363636
36
+ */
30
37
  let dice = cmpstr.diceCoefficient( str1, str2 );
31
- // expected 0.3636363636363636
32
38
 
39
+ /**
40
+ * diceClosest
41
+ * expected: bestest
42
+ */
33
43
  let closest = cmpstr.diceClosest( 'best', [
34
44
  'better', 'bestest', 'well', 'good'
35
45
  ] );
36
- // expected bestest
46
+
47
+ /**
48
+ * levenshteinMatch
49
+ * expected: [
50
+ * { target: 'bestest', match: 0.5714285714285714 },
51
+ * { target: 'better', match: 0.5 },
52
+ * { target: 'well', match: 0.25 },
53
+ * { target: 'good', match: 0 }
54
+ * ]
55
+ */
56
+ let matches = cmpstr.levenshteinMatch( 'best', [
57
+ 'better', 'bestest', 'well', 'good'
58
+ ] );
37
59
  ```
38
60
 
39
61
  ## API
@@ -59,6 +81,10 @@ Returns the match percentage of two strings ``a`` and ``b``. The output value is
59
81
 
60
82
  Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array.
61
83
 
84
+ #### ``levenshteinMatch( str, arr )``
85
+
86
+ Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order.
87
+
62
88
  ### Sørensen-Dice coefficient
63
89
 
64
90
  #### ``diceCoefficient( a, b )``
@@ -68,3 +94,7 @@ This function evaluates the similarity of two given strings ``a`` and ``b`` as p
68
94
  #### ``diceClosest( str, arr )``
69
95
 
70
96
  As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well.
97
+
98
+ #### ``diceMatch( str, arr )``
99
+
100
+ Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
package/index.js CHANGED
@@ -3,7 +3,7 @@
3
3
  * lightweight npm package to calculate string similarity
4
4
  *
5
5
  * @author komed3 (Paul Köhler)
6
- * @version 1.0.0
6
+ * @version 1.0.1
7
7
  * @license MIT
8
8
  */
9
9
 
@@ -30,7 +30,7 @@ const normalize = ( str ) => {
30
30
  * @param {String} str string
31
31
  * @returns bigrams
32
32
  */
33
- const bbigrams = ( str ) => {
33
+ const str2bigrams = ( str ) => {
34
34
 
35
35
  let bigrams = new Set();
36
36
 
@@ -46,11 +46,35 @@ const bbigrams = ( str ) => {
46
46
 
47
47
  };
48
48
 
49
+ /**
50
+ *
51
+ * @param {String} algo algorithm to use
52
+ * @param {String} a string 1
53
+ * @param {String} b string 2
54
+ * @returns similarity
55
+ */
56
+ const cpmByAlgo = ( algo, a, b ) => {
57
+
58
+ switch( algo ) {
59
+
60
+ case 'levenshtein':
61
+ return levenshtein( a, b );
62
+
63
+ case 'diceCoefficient':
64
+ return diceCoefficient( a, b );
65
+
66
+ default:
67
+ return 0;
68
+
69
+ }
70
+
71
+ };
72
+
49
73
  /**
50
74
  * search for closest string
51
75
  * @param {String} algo algorithm to use
52
76
  * @param {String} test test string
53
- * @param {Array} arr targets to test
77
+ * @param {Array} arr targets to test
54
78
  * @returns closest target
55
79
  */
56
80
  const findClosest = ( algo, test, arr ) => {
@@ -63,21 +87,7 @@ const findClosest = ( algo, test, arr ) => {
63
87
 
64
88
  arr.forEach( ( str, i ) => {
65
89
 
66
- switch( algo ) {
67
-
68
- case 'levenshtein':
69
- pct = levenshtein( test, str );
70
- break;
71
-
72
- case 'diceCoefficient':
73
- pct = diceCoefficient( test, str );
74
- break;
75
-
76
- default:
77
- pct = 0;
78
- break;
79
-
80
- }
90
+ pct = cpmByAlgo( algo, test, str );
81
91
 
82
92
  if( pct > best ) {
83
93
 
@@ -96,6 +106,43 @@ const findClosest = ( algo, test, arr ) => {
96
106
 
97
107
  };
98
108
 
109
+ /**
110
+ * sort best matches to test string
111
+ * @param {String} algo algorithm to use
112
+ * @param {String} test test string
113
+ * @param {Array} arr targets to test
114
+ * @returns sorted matches
115
+ */
116
+ const bestMatch = ( algo, test, arr ) => {
117
+
118
+ let matches = [],
119
+ pct;
120
+
121
+ /* calculate similarity for each arr items */
122
+
123
+ arr.forEach( ( str ) => {
124
+
125
+ pct = cpmByAlgo( algo, test, str );
126
+
127
+ matches.push( {
128
+ target: str,
129
+ match: pct
130
+ } );
131
+
132
+ } );
133
+
134
+ /* sort by highest similarity */
135
+
136
+ let sorted = matches.sort( ( a, b ) => {
137
+ return b.match - a.match;
138
+ } );
139
+
140
+ /* return sorted matches */
141
+
142
+ return sorted;
143
+
144
+ };
145
+
99
146
  /**
100
147
  * similarity calculations
101
148
  * @public
@@ -233,7 +280,7 @@ const levenshteinDistance = ( a, b ) => {
233
280
  /**
234
281
  * search for closest target to test string
235
282
  * @param {String} test test string
236
- * @param {Array} arr targets to test
283
+ * @param {Array} arr targets to test
237
284
  * @returns closest target
238
285
  */
239
286
  const levenshteinClosest = ( test, arr ) => {
@@ -242,6 +289,18 @@ const levenshteinClosest = ( test, arr ) => {
242
289
 
243
290
  };
244
291
 
292
+ /**
293
+ * sort best matches to test string
294
+ * @param {String} test test string
295
+ * @param {Array} arr targets to test
296
+ * @returns sorted matches
297
+ */
298
+ const levenshteinMatch = ( test, arr ) => {
299
+
300
+ return bestMatch( 'levenshtein', test, arr );
301
+
302
+ };
303
+
245
304
  /**
246
305
  * calculate dice coefficient
247
306
  * @param {String} a string 1
@@ -271,8 +330,8 @@ const diceCoefficient = ( a, b ) => {
271
330
 
272
331
  /* get bigrams */
273
332
 
274
- let setA = bbigrams( a ),
275
- setB = bbigrams( b );
333
+ let setA = str2bigrams( a ),
334
+ setB = str2bigrams( b );
276
335
 
277
336
  /* calculate dice coefficient */
278
337
 
@@ -292,7 +351,7 @@ const diceCoefficient = ( a, b ) => {
292
351
  /**
293
352
  * search for closest target to test string
294
353
  * @param {String} test test string
295
- * @param {Array} arr targets to test
354
+ * @param {Array} arr targets to test
296
355
  * @returns closest target
297
356
  */
298
357
  const diceClosest = ( test, arr ) => {
@@ -301,6 +360,18 @@ const diceClosest = ( test, arr ) => {
301
360
 
302
361
  };
303
362
 
363
+ /**
364
+ * sort best matches to test string
365
+ * @param {String} test test string
366
+ * @param {Array} arr targets to test
367
+ * @returns sorted matches
368
+ */
369
+ const diceMatch = ( test, arr ) => {
370
+
371
+ return bestMatch( 'diceCoefficient', test, arr );
372
+
373
+ };
374
+
304
375
  /**
305
376
  * export module functions
306
377
  */
@@ -308,6 +379,8 @@ module.exports = {
308
379
  levenshtein,
309
380
  levenshteinDistance,
310
381
  levenshteinClosest,
382
+ levenshteinMatch,
311
383
  diceCoefficient,
312
- diceClosest
384
+ diceClosest,
385
+ diceMatch
313
386
  };
package/package.json CHANGED
@@ -7,7 +7,7 @@
7
7
  "url" : "https://komed3.de"
8
8
  },
9
9
  "homepage": "https://github.com/komed3/cmpstr#readme",
10
- "version": "1.0.0",
10
+ "version": "1.0.1",
11
11
  "license": "MIT",
12
12
  "keywords": [
13
13
  "string",