cmpstr 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -5
- package/index.js +20 -13
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ This lightweight npm package can be used to __calculate the similarity of string
|
|
|
4
4
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
7
|
-
Using
|
|
7
|
+
Using __Node.js__, install the package with the following shell command:
|
|
8
8
|
|
|
9
9
|
```sh
|
|
10
10
|
npm install cmpstr
|
|
@@ -58,6 +58,16 @@ let matches = cmpstr.levenshteinMatch( 'best', [
|
|
|
58
58
|
] );
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
+
### JavaScript
|
|
62
|
+
|
|
63
|
+
Using JavaScript load this package by embed this file via [jsDelivr](https://www.jsdelivr.com/package/npm/cmpstr):
|
|
64
|
+
|
|
65
|
+
```js
|
|
66
|
+
import cmpstr from "https://cdn.jsdelivr.net/npm/cmpstr@1.0.3/+esm";
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Remember: To use ``import`` you need to load your JavaScript file as ``type="module"``.
|
|
70
|
+
|
|
61
71
|
## API
|
|
62
72
|
|
|
63
73
|
The npm package ``cmpstr`` supports two different methods for determining the similarity of two strings. The __Levenshtein distance__, as the minimum number of inserting, deleting and replacing operations to convert one string into another, and the __Sørensen-Dice coefficient__ to measure the similarity of two samples.
|
|
@@ -81,9 +91,9 @@ Returns the match percentage of two strings ``a`` and ``b``. The output value is
|
|
|
81
91
|
|
|
82
92
|
Returns the best match of the string ``str`` against the array ``arr`` of passed strings. The function returns the most closely matched string found in the array.
|
|
83
93
|
|
|
84
|
-
#### ``levenshteinMatch( str, arr [, flags = null ] )``
|
|
94
|
+
#### ``levenshteinMatch( str, arr [, flags = null [, threshold = 0 ] ] )``
|
|
85
95
|
|
|
86
|
-
Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
|
96
|
+
Calculates the similarity of all strings contained in the array ``arr`` according to Levenshtein compared to ``str`` and returns an array of all samples sorted by matching in descending order. The ``threshold`` specifies the minimum required similarity.
|
|
87
97
|
|
|
88
98
|
### Sørensen-Dice coefficient
|
|
89
99
|
|
|
@@ -95,9 +105,9 @@ This function evaluates the similarity of two given strings ``a`` and ``b`` as p
|
|
|
95
105
|
|
|
96
106
|
As another way to find the best match between the string ``str`` and a given array ``arr`` of samples, this function uses the Sørensen-Dice coefficient. It returns the most matching string as well.
|
|
97
107
|
|
|
98
|
-
#### ``diceMatch( str, arr [, flags = null ] )``
|
|
108
|
+
#### ``diceMatch( str, arr [, flags = null [, threshold = 0 ] ] )``
|
|
99
109
|
|
|
100
|
-
Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order.
|
|
110
|
+
Calculates the similarity of all strings contained in the array ``arr`` according to Sørensen-Dice coefficient compared to ``str`` and returns an array of all samples sorted by matching in descending order. The ``threshold`` specifies the minimum required similarity.
|
|
101
111
|
|
|
102
112
|
### Flags
|
|
103
113
|
|
|
@@ -110,6 +120,10 @@ Each method can be passed the ``flags`` options listed below:
|
|
|
110
120
|
|
|
111
121
|
## Patch notes
|
|
112
122
|
|
|
123
|
+
### 1.0.3
|
|
124
|
+
|
|
125
|
+
* Add ``threshold`` to specify the minimum required similarity
|
|
126
|
+
|
|
113
127
|
### 1.0.2
|
|
114
128
|
|
|
115
129
|
* Add normalize options ``i`` and ``s``
|
package/index.js
CHANGED
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
* lightweight npm package to calculate string similarity
|
|
4
4
|
*
|
|
5
5
|
* @author komed3 (Paul Köhler)
|
|
6
|
-
* @version 1.0.
|
|
6
|
+
* @version 1.0.3
|
|
7
7
|
* @license MIT
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
'use strict'
|
|
10
|
+
'use strict';
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* basic functions
|
|
@@ -83,7 +83,7 @@ const str2bigrams = ( str ) => {
|
|
|
83
83
|
* @param {Null|String} flags options
|
|
84
84
|
* @returns similarity
|
|
85
85
|
*/
|
|
86
|
-
const cpmByAlgo = ( algo, a, b, flags ) => {
|
|
86
|
+
const cpmByAlgo = ( algo, a, b, flags = null ) => {
|
|
87
87
|
|
|
88
88
|
switch( algo ) {
|
|
89
89
|
|
|
@@ -108,7 +108,7 @@ const cpmByAlgo = ( algo, a, b, flags ) => {
|
|
|
108
108
|
* @param {Null|String} flags options
|
|
109
109
|
* @returns closest target
|
|
110
110
|
*/
|
|
111
|
-
const findClosest = ( algo, test, arr, flags ) => {
|
|
111
|
+
const findClosest = ( algo, test, arr, flags = null ) => {
|
|
112
112
|
|
|
113
113
|
let best = -Infinity,
|
|
114
114
|
idx = 0,
|
|
@@ -143,9 +143,10 @@ const findClosest = ( algo, test, arr, flags ) => {
|
|
|
143
143
|
* @param {String} test test string
|
|
144
144
|
* @param {Array} arr targets to test
|
|
145
145
|
* @param {Null|String} flags options
|
|
146
|
+
* @param {Float} threshold required similarity
|
|
146
147
|
* @returns sorted matches
|
|
147
148
|
*/
|
|
148
|
-
const bestMatch = ( algo, test, arr, flags = null ) => {
|
|
149
|
+
const bestMatch = ( algo, test, arr, flags = null, threshold = 0 ) => {
|
|
149
150
|
|
|
150
151
|
let matches = [],
|
|
151
152
|
pct;
|
|
@@ -156,10 +157,14 @@ const bestMatch = ( algo, test, arr, flags = null ) => {
|
|
|
156
157
|
|
|
157
158
|
pct = cpmByAlgo( algo, test, str, flags );
|
|
158
159
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
160
|
+
if( pct >= threshold ) {
|
|
161
|
+
|
|
162
|
+
matches.push( {
|
|
163
|
+
target: str,
|
|
164
|
+
match: pct
|
|
165
|
+
} );
|
|
166
|
+
|
|
167
|
+
}
|
|
163
168
|
|
|
164
169
|
} );
|
|
165
170
|
|
|
@@ -329,11 +334,12 @@ const levenshteinClosest = ( test, arr, flags = null ) => {
|
|
|
329
334
|
* @param {String} test test string
|
|
330
335
|
* @param {Array} arr targets to test
|
|
331
336
|
* @param {Null|String} flags options
|
|
337
|
+
* @param {Float} threshold required similarity
|
|
332
338
|
* @returns sorted matches
|
|
333
339
|
*/
|
|
334
|
-
const levenshteinMatch = ( test, arr, flags = null ) => {
|
|
340
|
+
const levenshteinMatch = ( test, arr, flags = null, threshold = 0 ) => {
|
|
335
341
|
|
|
336
|
-
return bestMatch( 'levenshtein', test, arr, flags );
|
|
342
|
+
return bestMatch( 'levenshtein', test, arr, flags, threshold );
|
|
337
343
|
|
|
338
344
|
};
|
|
339
345
|
|
|
@@ -403,11 +409,12 @@ const diceClosest = ( test, arr, flags = null ) => {
|
|
|
403
409
|
* @param {String} test test string
|
|
404
410
|
* @param {Array} arr targets to test
|
|
405
411
|
* @param {Null|String} flags options
|
|
412
|
+
* @param {Float} threshold required similarity
|
|
406
413
|
* @returns sorted matches
|
|
407
414
|
*/
|
|
408
|
-
const diceMatch = ( test, arr, flags = null ) => {
|
|
415
|
+
const diceMatch = ( test, arr, flags = null, threshold = 0 ) => {
|
|
409
416
|
|
|
410
|
-
return bestMatch( 'diceCoefficient', test, arr, flags );
|
|
417
|
+
return bestMatch( 'diceCoefficient', test, arr, flags, threshold );
|
|
411
418
|
|
|
412
419
|
};
|
|
413
420
|
|