cmpstr 2.0.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -503
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -917
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -197
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
@@ -1,19 +0,0 @@
1
- import { CmpStr, Config, BatchResult } from './CmpStr';
2
-
3
- export declare class CmpStrAsync extends CmpStr {
4
-
5
- normalizeAsync ( input: string|string[], flags?: string ) : Promise<string|string[]>;
6
-
7
- compareAsync ( algo: string, a: string, b: string, config?: Config ) : Promise<number | any>;
8
-
9
- testAsync ( str: string, config?: Config ) : Promise<number | any>;
10
-
11
- batchTestAsync ( arr: string[], config?: Config ) : Promise<BatchResult[]>;
12
-
13
- matchAsync ( arr: string[], config?: Config ) : Promise<BatchResult[]>;
14
-
15
- closestAsync ( arr: string[], config?: Config ) : Promise<string | undefined>;
16
-
17
- similarityMatrixAsync ( algo: string, arr: string[], config?: Config ) : Promise<number[][]>;
18
-
19
- }
@@ -1,197 +0,0 @@
1
- /**
2
- * class CmpStrAsync
3
- * extends CmpStr
4
- *
5
- * The CmpStrAsync class extends the CmpStr class and provides asynchronous
6
- * versions of its methods. It uses Promises and setImmediate to ensure
7
- * non-blocking execution, making it suitable for use in asynchronous workflows.
8
- *
9
- * @author Paul Köhler (komed3)
10
- * @license MIT
11
- */
12
-
13
- 'use strict';
14
-
15
- /**
16
- * module dependencies
17
- * @private
18
- */
19
-
20
- const CmpStr = require( './CmpStr' );
21
-
22
- /**
23
- * module exports
24
- * @public
25
- */
26
-
27
- module.exports = class CmpStrAsync extends CmpStr {
28
-
29
- /**
30
- * initializes a CmpStrAsync instance
31
- * algorithm and base string can be set by initialization
32
- *
33
- * @param {String} algo name of the algorithm to use for calculation
34
- * @param {String} str string to set as the base
35
- */
36
- constructor ( algo = undefined, str = undefined ) {
37
-
38
- super ( algo, str );
39
-
40
- };
41
-
42
- /**
43
- * generic async wrapper for methods
44
- *
45
- * @private
46
- * @param {Function} method method to call
47
- * @param {...any} args arguments to pass to the method
48
- * @returns {Promise} Promise resolving the result of the method
49
- */
50
- #asyncWrapper ( method, ...args ) {
51
-
52
- return new Promise ( ( resolve, reject ) => {
53
-
54
- setImmediate( () => {
55
-
56
- try {
57
-
58
- resolve( method.apply( this, args ) );
59
-
60
- } catch ( err ) {
61
-
62
- reject( err );
63
-
64
- }
65
-
66
- } );
67
-
68
- } );
69
-
70
- };
71
-
72
- /**
73
- * --------------------------------------------------
74
- * Asynchronous Methods
75
- * --------------------------------------------------
76
- */
77
-
78
- /**
79
- * normalizes a string by chainable options; uses cache to increase
80
- * performance and custom filters for advanced behavior
81
- *
82
- * @since 2.0.2
83
- * @param {String|String[]} input string(s) to normalize
84
- * @param {String} [flags=''] normalization flags
85
- * @returns {Promise} Promise resolving string normalization
86
- */
87
- normalizeAsync ( input, flags = '' ) {
88
-
89
- return this.#asyncWrapper(
90
- this.normalize,
91
- input, flags
92
- );
93
-
94
- };
95
-
96
- /**
97
- * compares two string a and b using the passed algorithm
98
- *
99
- * @param {String} algo name of the algorithm
100
- * @param {String} a string a
101
- * @param {String} b string b
102
- * @param {Object} [config={}] config (flags, args)
103
- * @returns {Promise} Promise resolving similarity between a and b
104
- */
105
- compareAsync ( algo, a, b, config = {} ) {
106
-
107
- return this.#asyncWrapper(
108
- this.compare,
109
- algo, a, b, config
110
- );
111
-
112
- };
113
-
114
- /**
115
- * tests the similarity between the base string and a target string
116
- * using the current algorithm
117
- *
118
- * @param {String} str target string
119
- * @param {Object} [config={}] config (flags, args)
120
- * @returns {Promise} Promise resolving similarity to base string
121
- */
122
- testAsync ( str, config = {} ) {
123
-
124
- return this.#asyncWrapper(
125
- this.test,
126
- str, config
127
- );
128
-
129
- };
130
-
131
- /**
132
- * tests the similarity of multiple strings against the base string
133
- *
134
- * @param {String[]} arr array of strings
135
- * @param {Object} [config={}] config (flags, args)
136
- * @returns {Promise} Promise resolving an array of objects, each containing target string and similarity score
137
- */
138
- batchTestAsync ( arr, config = {} ) {
139
-
140
- return this.#asyncWrapper(
141
- this.batchTest,
142
- arr, config
143
- );
144
-
145
- };
146
-
147
- /**
148
- * finds strings in an array that exceed a similarity threshold
149
- * returns the array sorted by highest similarity
150
- *
151
- * @param {String[]} arr array of strings
152
- * @param {Object} [config={}] config (flags, threshold, args)
153
- * @returns {Promise} Promise resolving an array of objects, sorted by highest similarity
154
- */
155
- matchAsync ( arr, config = {} ) {
156
-
157
- return this.#asyncWrapper(
158
- this.match,
159
- arr, config
160
- );
161
-
162
- };
163
-
164
- /**
165
- * finds the closest matching string from an array
166
- *
167
- * @param {String[]} arr array of strings
168
- * @param {Object} [config={}] config (flags, args)
169
- * @returns {Promise} Promise resolving the closest matching string
170
- */
171
- closestAsync ( arr, config = {} ) {
172
-
173
- return this.#asyncWrapper(
174
- this.closest,
175
- arr, config
176
- );
177
-
178
- };
179
-
180
- /**
181
- * generate a similarity matrix for an array of strings
182
- *
183
- * @param {String} algo name of the algorithm
184
- * @param {String[]} arr array of strings to cross-compare
185
- * @param {Object} [config={}] config (flags, args)
186
- * @returns {Promise} Promise resolving an 2D array representing the similarity matrix
187
- */
188
- similarityMatrixAsync ( algo, arr, config = {} ) {
189
-
190
- return this.#asyncWrapper(
191
- this.similarityMatrix,
192
- algo, arr, config
193
- );
194
-
195
- };
196
-
197
- };
@@ -1,86 +0,0 @@
1
- /**
2
- * Cosine Similarity
3
- * CmpStr module
4
- *
5
- * Cosine similarity is a measure how similar two vectors are. It's often used
6
- * in text analysis to compare texts based on the words they contain.
7
- *
8
- * @author Paul Köhler (komed3)
9
- * @license MIT
10
- */
11
-
12
- 'use strict';
13
-
14
- /**
15
- * private helper function
16
- * get term frequency from string
17
- * @private
18
- *
19
- * @param {String} str string
20
- * @param {String} delimiter term delimiter
21
- * @returns {Object} term frequency
22
- */
23
- const _termFreq = ( str, delimiter ) => {
24
-
25
- let freq = {};
26
-
27
- str.split( delimiter ).forEach( ( term ) => {
28
-
29
- freq[ term ] = ( freq[ term ] || 0 ) + 1;
30
-
31
- } );
32
-
33
- return freq;
34
-
35
- };
36
-
37
- /**
38
- * module exports
39
- * @public
40
- *
41
- * @param {String} a string a
42
- * @param {String} b string b
43
- * @param {Object} options having {
44
- * @param {String} [delimiter=' '] term delimiter
45
- * }
46
- * @returns {Number} similarity score (0..1)
47
- */
48
-
49
- module.exports = ( a, b, { delimiter = ' ' } = {} ) => {
50
-
51
- /* step 1: count the frequency of chars per string */
52
-
53
- let termsA = _termFreq( a, delimiter ),
54
- termsB = _termFreq( b, delimiter );
55
-
56
- let allTerms = new Set ( [
57
- ...Object.keys( termsA ),
58
- ...Object.keys( termsB )
59
- ] );
60
-
61
- /* step 2: calculate the dot product */
62
-
63
- let dotProduct = [ ...allTerms ].reduce(
64
- ( sum, char ) => sum + ( termsA[ char ] || 0 ) * ( termsB[ char ] || 0 ),
65
- 0
66
- );
67
-
68
- /* step 3: calculate the vector magnitudes */
69
-
70
- let magnitudeA = Math.sqrt( [ ...allTerms ].reduce(
71
- ( sum, char ) => sum + ( termsA[ char ] || 0 ) ** 2,
72
- 0
73
- ) );
74
-
75
- let magnitudeB = Math.sqrt( [ ...allTerms ].reduce(
76
- ( sum, char ) => sum + ( termsB[ char ] || 0 ) ** 2,
77
- 0
78
- ) );
79
-
80
- /* step 4: calculate Cosine similarity */
81
-
82
- return magnitudeA && magnitudeB
83
- ? dotProduct / ( magnitudeA * magnitudeB )
84
- : 0;
85
-
86
- };
@@ -1,78 +0,0 @@
1
- /**
2
- * Damerau-Levenshtein Distance
3
- * CmpStr module
4
- *
5
- * The Damerau-Levenshtein distance differs from the classical Levenshtein
6
- * distance by including transpositions among its allowable operations in
7
- * addition to the three classical single-character edit operations
8
- * (insertions, deletions and substitutions). Useful for correcting typos.
9
- *
10
- * @author Paul Köhler (komed3)
11
- * @license MIT
12
- */
13
-
14
- 'use strict';
15
-
16
- /**
17
- * module exports
18
- * @public
19
- *
20
- * @param {String} a string a
21
- * @param {String} b string b
22
- * @param {Object} options having {
23
- * @param {Boolean} [raw=false] if true the raw distance is returned
24
- * }
25
- * @returns {Number} similarity score (0..1) or distance
26
- */
27
-
28
- module.exports = ( a, b, { raw = false } = {} ) => {
29
-
30
- /* step 1: initialize scoring matrix */
31
-
32
- let matrix = Array.from(
33
- { length: a.length + 1 },
34
- ( _, i ) => Array.from(
35
- { length: b.length + 1 },
36
- ( _, j ) => i && j ? 0 : i || j
37
- )
38
- );
39
-
40
- /* step 2: calculate Damerau-Levenshtein distance */
41
-
42
- for ( let i = 1; i <= a.length; i++ ) {
43
-
44
- for ( let j = 1; j <= b.length; j++ ) {
45
-
46
- let cost = a[ i - 1 ] === b[ j - 1 ] ? 0 : 1;
47
-
48
- matrix[ i ][ j ] = Math.min(
49
- matrix[ i - 1 ][ j ] + 1,
50
- matrix[ i ][ j - 1 ] + 1,
51
- matrix[ i - 1 ][ j - 1 ] + cost
52
- );
53
-
54
- if (
55
- i > 1 && j > 1 &&
56
- a[ i - 1 ] === b[ j - 2 ] &&
57
- a[ i - 2 ] === b[ j - 1 ]
58
- ) {
59
-
60
- matrix[ i ][ j ] = Math.min(
61
- matrix[ i ][ j ],
62
- matrix[ i - 2 ][ j - 2 ] + cost
63
- );
64
-
65
- }
66
-
67
- }
68
-
69
- }
70
-
71
- /* step 3: get Damerau-Levenshtein distance as value between 0..1 */
72
-
73
- return raw ? matrix[ a.length ][ b.length ] : 1 - (
74
- matrix[ a.length ][ b.length ] /
75
- Math.max( a.length, b.length )
76
- );
77
-
78
- };
@@ -1,65 +0,0 @@
1
- /**
2
- * Dice-Sørensen Coefficient
3
- * CmpStr module
4
- *
5
- * The Dice-Sørensen index equals twice the number of elements common to
6
- * both sets divided by the sum of the number of elements in each set.
7
- * Equivalently, the index is the size of the intersection as a fraction
8
- * of the average size of the two sets.
9
- *
10
- * @author Paul Köhler (komed3)
11
- * @license MIT
12
- */
13
-
14
- 'use strict';
15
-
16
- /**
17
- * private helper function
18
- * get bigrams from string
19
- * @private
20
- *
21
- * @param {String} str string
22
- * @returns {Set} set of bigrams
23
- */
24
- const _str2bigrams = ( str ) => {
25
-
26
- let bigrams = new Set ();
27
-
28
- for ( let i = 0; i < str.length - 1; i++ ) {
29
-
30
- bigrams.add( str.substring( i, i + 2 ) );
31
-
32
- }
33
-
34
- return bigrams;
35
-
36
- };
37
-
38
- /**
39
- * module exports
40
- * @public
41
- *
42
- * @param {String} a string a
43
- * @param {String} b string b
44
- * @returns {Number} similarity score (0..1)
45
- */
46
-
47
- module.exports = ( a, b ) => {
48
-
49
- /* step 1: generate bigrams from strings */
50
-
51
- let setA = _str2bigrams( a ),
52
- setB = _str2bigrams( b );
53
-
54
- /* step 2: calculate coefficient */
55
-
56
- return (
57
- ( new Set ( [ ...setA ].filter( ( test ) => {
58
- return setB.has( test );
59
- } ) ) ).size * 2
60
- ) / (
61
- setA.size +
62
- setB.size
63
- );
64
-
65
- };
@@ -1,44 +0,0 @@
1
- /**
2
- * Hamming Distance
3
- * CmpStr module
4
- *
5
- * The Hamming distance between two equal-length strings of symbols is the
6
- * number of positions at which the corresponding symbols are different.
7
- *
8
- * @author Paul Köhler (komed3)
9
- * @license MIT
10
- */
11
-
12
- 'use strict';
13
-
14
- /**
15
- * module exports
16
- * @public
17
- *
18
- * @param {String} a string a
19
- * @param {String} b string b
20
- * @returns {Number} similarity score (0..1)
21
- * @throws {Error} if string not of equal length
22
- */
23
-
24
- module.exports = ( a, b ) => {
25
-
26
- if ( a.length !== b.length ) {
27
-
28
- /* strings must be of equal length for this calculation */
29
-
30
- throw new Error (
31
- `Strings must be of equal length for Hamming Distance`
32
- );
33
-
34
- }
35
-
36
- return 1 - (
37
- [ ...a ].reduce(
38
- ( sum, char, i ) => sum + ( char !== b[ i ] ? 1 : 0 ),
39
- 0
40
- ) /
41
- a.length
42
- );
43
-
44
- };
@@ -1,34 +0,0 @@
1
- /**
2
- * Jaccard Index
3
- * CmpStr module
4
- *
5
- * The Jaccard Index measures the similarity between two sets by dividing
6
- * the size of their intersection by the size of their union.
7
- *
8
- * @author Paul Köhler (komed3)
9
- * @license MIT
10
- */
11
-
12
- 'use strict';
13
-
14
- /**
15
- * module exports
16
- * @public
17
- *
18
- * @param {String} a string a
19
- * @param {String} b string b
20
- * @returns {Number} similarity score (0..1)
21
- */
22
-
23
- module.exports = ( a, b ) => {
24
-
25
- let setA = new Set ( a ),
26
- setB = new Set ( b );
27
-
28
- return (
29
- new Set ( [ ...setA ].filter( x => setB.has( x ) ) )
30
- ).size / (
31
- new Set ( [ ...setA, ...setB ] )
32
- ).size;
33
-
34
- };
@@ -1,106 +0,0 @@
1
- /**
2
- * Jaro-Winkler Distance
3
- * CmpStr module
4
- *
5
- * Jaro-Winkler is a string similarity metric that gives more weight to
6
- * matching characters at the start of the strings.
7
- *
8
- * @author Paul Köhler (komed3)
9
- * @license MIT
10
- */
11
-
12
- 'use strict';
13
-
14
- /**
15
- * module exports
16
- * @public
17
- *
18
- * @param {String} a string a
19
- * @param {String} b string b
20
- * @param {Object} options having {
21
- * @param {Boolean} [raw=false] if true the raw distance is returned
22
- * }
23
- * @returns {Number} similarity score (0..1) or distance
24
- */
25
-
26
- module.exports = ( a, b, { raw = false } = {} ) => {
27
-
28
- /* step 1: check for matches between strings */
29
-
30
- let matchWindow = Math.floor(
31
- Math.max( a.length, b.length ) / 2
32
- ) - 1;
33
-
34
- let aMatches = Array( a.length ).fill( false ),
35
- bMatches = Array( b.length ).fill( false );
36
-
37
- let matches = 0;
38
-
39
- for ( let i = 0; i < a.length; i++ ) {
40
-
41
- for (
42
- let j = Math.max( 0, i - matchWindow );
43
- j < Math.min( i + matchWindow + 1, b.length );
44
- j++
45
- ) {
46
-
47
- if ( !bMatches[ j ] && a[ i ] === b[ j ] ) {
48
-
49
- aMatches[ i ] = true;
50
- bMatches[ j ] = true;
51
-
52
- matches++;
53
-
54
- break;
55
-
56
- }
57
-
58
- }
59
-
60
- }
61
-
62
- if ( matches === 0 ) {
63
-
64
- /* if no matches found, return 0 */
65
-
66
- return 0;
67
-
68
- }
69
-
70
- /* step 2: calculate transpositions */
71
-
72
- let transpos = 0,
73
- k = 0;
74
-
75
- for ( let i = 0; i < a.length; i++ ) {
76
-
77
- if ( aMatches[ i ] ) {
78
-
79
- while ( !bMatches[ k ] ) k++;
80
-
81
- if ( a[ i ] !== b[ k ] ) transpos++;
82
-
83
- k++;
84
-
85
- }
86
-
87
- }
88
-
89
- /* step 3: calculate Jaro-Winkler distance */
90
-
91
- let jaro = (
92
- ( matches / a.length ) +
93
- ( matches / b.length ) +
94
- ( matches - ( transpos / 2 ) ) /
95
- matches
96
- ) / 3;
97
-
98
- /* step 4: get Jaro-Winkler as value between 0..1 */
99
-
100
- return raw ? jaro : jaro + Math.min(
101
- 4, [ ...a ].findIndex(
102
- ( char, i ) => char !== b[ i ]
103
- ) || 0
104
- ) * 0.1 * ( 1 - jaro );
105
-
106
- };
@@ -1,58 +0,0 @@
1
- /**
2
- * Longest Common Subsequence (LCS)
3
- * CmpStr module
4
- *
5
- * LCS measures the length of the longest subsequence common to both strings.
6
- *
7
- * @author Paul Köhler (komed3)
8
- * @license MIT
9
- */
10
-
11
- 'use strict';
12
-
13
- /**
14
- * module exports
15
- * @public
16
- *
17
- * @param {String} a string a
18
- * @param {String} b string b
19
- * @returns {Number} similarity score (0..1)
20
- */
21
-
22
- module.exports = ( a, b ) => {
23
-
24
- /* step 1: initialize scoring matrix */
25
-
26
- let matrix = Array( a.length + 1 ).fill( null ).map(
27
- () => Array( b.length + 1 ).fill( 0 )
28
- );
29
-
30
- for ( let i = 1; i <= a.length; i++ ) {
31
-
32
- for ( let j = 1; j <= b.length; j++ ) {
33
-
34
- if ( a[ i - 1 ] === b[ j - 1 ] ) {
35
-
36
- matrix[ i ][ j ] = matrix[ i - 1 ][ j - 1 ] + 1;
37
-
38
- } else {
39
-
40
- matrix[ i ][ j ] = Math.max(
41
- matrix[ i - 1 ][ j ],
42
- matrix[ i ][ j - 1 ]
43
- );
44
-
45
- }
46
-
47
- }
48
-
49
- }
50
-
51
- /* step 2: calculate LCS */
52
-
53
- return (
54
- matrix[ a.length ][ b.length ] /
55
- Math.max( a.length, b.length )
56
- );
57
-
58
- };