cmpstr 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +32 -8
  2. package/index.js +69 -30
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -69,32 +69,56 @@ Learn more about both by visiting these links:
69
69
 
70
70
  ### Levenshtein distance
71
71
 
72
- #### ``levenshteinDistance( a, b )``
72
+ #### ``levenshteinDistance( a, b [, flags = null ] )``
73
73
 
74
74
  Calculates the difference between two strings ``a`` and ``b`` and returns the Levenshtein distance as an integer value.
75
75
 
76
- #### ``levenshtein( a, b )``
76
+ #### ``levenshtein( a, b [, flags = null ] )``
77
77
 
78
78
  Returns the match percentage of two strings ``a`` and ``b``. The output value is in the range ``0..1`` as a floating point number.
79
79
 
80
- #### ``levenshteinClosest( str, arr )``
80
+ #### ``levenshteinClosest( str, arr [, flags = null ] )``
81
81
 
82
82
  Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array.
83
83
 
84
- #### ``levenshteinMatch( str, arr )``
84
+ #### ``levenshteinMatch( str, arr [, flags = null ] )``
85
85
 
86
86
  Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order.
87
87
 
88
88
  ### Sørensen-Dice coefficient
89
89
 
90
- #### ``diceCoefficient( a, b )``
90
+ #### ``diceCoefficient( a, b [, flags = null ] )``
91
91
 
92
92
  This function evaluates the similarity of two given strings ``a`` and ``b`` as percentage value according to the Sørensen-Dice coefficient and returns the result as floating point number.
93
93
 
94
- #### ``diceClosest( str, arr )``
94
+ #### ``diceClosest( str, arr [, flags = null ] )``
95
95
 
96
96
  As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well.
97
97
 
98
- #### ``diceMatch( str, arr )``
98
+ #### ``diceMatch( str, arr [, flags = null ] )``
99
99
 
100
- Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
100
+ Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
101
+
102
+ ### Flags
103
+
104
+ Each method can be passed the ``flags`` options listed below:
105
+
106
+ | Flag | Option |
107
+ | ----- | ------------------------------ |
108
+ | ``i`` | case insensitive |
109
+ | ``s`` | non-whitespace characters only |
110
+
111
+ ## Patch notes
112
+
113
+ ### 1.0.2
114
+
115
+ * Add normalize options ``i`` and ``s``
116
+ * Minor fixes
117
+
118
+ ### 1.0.1
119
+
120
+ * Minor fixes
121
+
122
+ ### 1.0.0
123
+
124
+ * Initial release
package/index.js CHANGED
@@ -3,7 +3,7 @@
3
3
  * lightweight npm package to calculate string similarity
4
4
  *
5
5
  * @author komed3 (Paul Köhler)
6
- * @version 1.0.1
6
+ * @version 1.0.2
7
7
  * @license MIT
8
8
  */
9
9
 
@@ -17,11 +17,40 @@
17
17
  /**
18
18
  * normalize string
19
19
  * @param {String} str string
20
+ * @param {Null|String} flags options
20
21
  * @returns normalized string
21
22
  */
22
- const normalize = ( str ) => {
23
+ const normalize = ( str, flags = null ) => {
23
24
 
24
- return str.toString();
25
+ str = str.toString();
26
+
27
+ ( flags || '' ).toString().split( '' ).forEach( ( f ) => {
28
+
29
+ /**
30
+ * normalize options
31
+ * i case insensitive
32
+ * s non-whitespace
33
+ */
34
+
35
+ switch( f.toLowerCase() ) {
36
+
37
+ case 'i':
38
+ str = str.toLowerCase();
39
+ break;
40
+
41
+ case 's':
42
+ str = str.replace( /[^\S]+/g, '' );
43
+ break;
44
+
45
+ default:
46
+ /* do nothing */
47
+ break;
48
+
49
+ }
50
+
51
+ } );
52
+
53
+ return str;
25
54
 
26
55
  };
27
56
 
@@ -47,21 +76,22 @@ const str2bigrams = ( str ) => {
47
76
  };
48
77
 
49
78
  /**
50
- *
79
+ * compare strings by given algorithm
51
80
  * @param {String} algo algorithm to use
52
81
  * @param {String} a string 1
53
82
  * @param {String} b string 2
83
+ * @param {Null|String} flags options
54
84
  * @returns similarity
55
85
  */
56
- const cpmByAlgo = ( algo, a, b ) => {
86
+ const cpmByAlgo = ( algo, a, b, flags ) => {
57
87
 
58
88
  switch( algo ) {
59
89
 
60
90
  case 'levenshtein':
61
- return levenshtein( a, b );
91
+ return levenshtein( a, b, flags );
62
92
 
63
93
  case 'diceCoefficient':
64
- return diceCoefficient( a, b );
94
+ return diceCoefficient( a, b, flags );
65
95
 
66
96
  default:
67
97
  return 0;
@@ -75,9 +105,10 @@ const cpmByAlgo = ( algo, a, b ) => {
75
105
  * @param {String} algo algorithm to use
76
106
  * @param {String} test test string
77
107
  * @param {Array} arr targets to test
108
+ * @param {Null|String} flags options
78
109
  * @returns closest target
79
110
  */
80
- const findClosest = ( algo, test, arr ) => {
111
+ const findClosest = ( algo, test, arr, flags ) => {
81
112
 
82
113
  let best = -Infinity,
83
114
  idx = 0,
@@ -85,9 +116,9 @@ const findClosest = ( algo, test, arr ) => {
85
116
 
86
117
  /* search for closest element in arr */
87
118
 
88
- arr.forEach( ( str, i ) => {
119
+ [ ...arr ].forEach( ( str, i ) => {
89
120
 
90
- pct = cpmByAlgo( algo, test, str );
121
+ pct = cpmByAlgo( algo, test, str, flags );
91
122
 
92
123
  if( pct > best ) {
93
124
 
@@ -111,18 +142,19 @@ const findClosest = ( algo, test, arr ) => {
111
142
  * @param {String} algo algorithm to use
112
143
  * @param {String} test test string
113
144
  * @param {Array} arr targets to test
145
+ * @param {Null|String} flags options
114
146
  * @returns sorted matches
115
147
  */
116
- const bestMatch = ( algo, test, arr ) => {
148
+ const bestMatch = ( algo, test, arr, flags = null ) => {
117
149
 
118
150
  let matches = [],
119
151
  pct;
120
152
 
121
153
  /* calculate similarity for each arr items */
122
154
 
123
- arr.forEach( ( str ) => {
155
+ [ ...arr ].forEach( ( str ) => {
124
156
 
125
- pct = cpmByAlgo( algo, test, str );
157
+ pct = cpmByAlgo( algo, test, str, flags );
126
158
 
127
159
  matches.push( {
128
160
  target: str,
@@ -152,14 +184,15 @@ const bestMatch = ( algo, test, arr ) => {
152
184
  * calculate levenshtein similarity (in percent)
153
185
  * @param {String} a string 1
154
186
  * @param {String} b string 2
187
+ * @param {Null|String} flags options
155
188
  * @returns similarity 0..1
156
189
  */
157
- const levenshtein = ( a, b ) => {
190
+ const levenshtein = ( a, b, flags = null ) => {
158
191
 
159
192
  /* normalize string */
160
193
 
161
- a = normalize( a );
162
- b = normalize( b );
194
+ a = normalize( a, flags );
195
+ b = normalize( b, flags );
163
196
 
164
197
  if( a == b ) {
165
198
 
@@ -196,14 +229,15 @@ const levenshtein = ( a, b ) => {
196
229
  * get levenshtein distance
197
230
  * @param {String} a string 1
198
231
  * @param {String} b string 2
232
+ * @param {Null|String} flags options
199
233
  * @returns distance
200
234
  */
201
- const levenshteinDistance = ( a, b ) => {
235
+ const levenshteinDistance = ( a, b, flags = null ) => {
202
236
 
203
237
  /* normalize string */
204
238
 
205
- a = normalize( a );
206
- b = normalize( b );
239
+ a = normalize( a, flags );
240
+ b = normalize( b, flags );
207
241
 
208
242
  if( a == b ) {
209
243
 
@@ -281,11 +315,12 @@ const levenshteinDistance = ( a, b ) => {
281
315
  * search for closest target to test string
282
316
  * @param {String} test test string
283
317
  * @param {Array} arr targets to test
318
+ * @param {Null|String} flags options
284
319
  * @returns closest target
285
320
  */
286
- const levenshteinClosest = ( test, arr ) => {
321
+ const levenshteinClosest = ( test, arr, flags = null ) => {
287
322
 
288
- return findClosest( 'levenshtein', test, arr );
323
+ return findClosest( 'levenshtein', test, arr, flags );
289
324
 
290
325
  };
291
326
 
@@ -293,11 +328,12 @@ const levenshteinClosest = ( test, arr ) => {
293
328
  * sort best matches to test string
294
329
  * @param {String} test test string
295
330
  * @param {Array} arr targets to test
331
+ * @param {Null|String} flags options
296
332
  * @returns sorted matches
297
333
  */
298
- const levenshteinMatch = ( test, arr ) => {
334
+ const levenshteinMatch = ( test, arr, flags = null ) => {
299
335
 
300
- return bestMatch( 'levenshtein', test, arr );
336
+ return bestMatch( 'levenshtein', test, arr, flags );
301
337
 
302
338
  };
303
339
 
@@ -305,14 +341,15 @@ const levenshteinMatch = ( test, arr ) => {
305
341
  * calculate dice coefficient
306
342
  * @param {String} a string 1
307
343
  * @param {String} b string 2
344
+ * @param {Null|String} flags options
308
345
  * @returns dice coefficient
309
346
  */
310
- const diceCoefficient = ( a, b ) => {
347
+ const diceCoefficient = ( a, b, flags = null ) => {
311
348
 
312
349
  /* normalize string */
313
350
 
314
- a = normalize( a );
315
- b = normalize( b );
351
+ a = normalize( a, flags );
352
+ b = normalize( b, flags );
316
353
 
317
354
  if( a == b ) {
318
355
 
@@ -352,11 +389,12 @@ const diceCoefficient = ( a, b ) => {
352
389
  * search for closest target to test string
353
390
  * @param {String} test test string
354
391
  * @param {Array} arr targets to test
392
+ * @param {Null|String} flags options
355
393
  * @returns closest target
356
394
  */
357
- const diceClosest = ( test, arr ) => {
395
+ const diceClosest = ( test, arr, flags = null ) => {
358
396
 
359
- return findClosest( 'diceCoefficient', test, arr );
397
+ return findClosest( 'diceCoefficient', test, arr, flags );
360
398
 
361
399
  };
362
400
 
@@ -364,11 +402,12 @@ const diceClosest = ( test, arr ) => {
364
402
  * sort best matches to test string
365
403
  * @param {String} test test string
366
404
  * @param {Array} arr targets to test
405
+ * @param {Null|String} flags options
367
406
  * @returns sorted matches
368
407
  */
369
- const diceMatch = ( test, arr ) => {
408
+ const diceMatch = ( test, arr, flags = null ) => {
370
409
 
371
- return bestMatch( 'diceCoefficient', test, arr );
410
+ return bestMatch( 'diceCoefficient', test, arr, flags );
372
411
 
373
412
  };
374
413
 
package/package.json CHANGED
@@ -7,7 +7,7 @@
7
7
  "url" : "https://komed3.de"
8
8
  },
9
9
  "homepage": "https://github.com/komed3/cmpstr#readme",
10
- "version": "1.0.1",
10
+ "version": "1.0.2",
11
11
  "license": "MIT",
12
12
  "keywords": [
13
13
  "string",