cmpstr 3.0.3 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2228 -4944
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +1 -1
- package/dist/CmpStr.umd.js +2348 -5040
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +1 -1
- package/dist/cjs/CmpStr.cjs +1 -405
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +1 -221
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +1 -261
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +37 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +1 -89
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +1 -405
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +1 -221
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +1 -261
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +29 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +1 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +1 -89
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/root.d.ts +38 -0
- package/package.json +14 -8
package/dist/esm/CmpStr.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { merge, set, rmv, get } from './utils/DeepMerge.mjs';
|
|
3
3
|
import { Profiler } from './utils/Profiler.mjs';
|
|
4
4
|
import { TextAnalyzer } from './utils/TextAnalyzer.mjs';
|
|
@@ -28,49 +28,8 @@ import {
|
|
|
28
28
|
Phonetic
|
|
29
29
|
} from './phonetic/Phonetic.mjs';
|
|
30
30
|
|
|
31
|
-
/**
|
|
32
|
-
* CmpStr Main API
|
|
33
|
-
* src/CmpStr.ts
|
|
34
|
-
*
|
|
35
|
-
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
36
|
-
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
37
|
-
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
38
|
-
* consistent, user-friendly API for both single and batch operations.
|
|
39
|
-
*
|
|
40
|
-
* Features:
|
|
41
|
-
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
42
|
-
* - Flexible normalization and filtering pipeline for all inputs
|
|
43
|
-
* - Batch, pairwise, and single string comparison with detailed results
|
|
44
|
-
* - Phonetic indexing and phonetic-aware search and comparison
|
|
45
|
-
* - Text analysis and unified diff utilities
|
|
46
|
-
* - Full TypeScript type safety and extensibility
|
|
47
|
-
*
|
|
48
|
-
* @module CmpStr
|
|
49
|
-
* @author Paul Köhler (komed3)
|
|
50
|
-
* @license MIT
|
|
51
|
-
*/
|
|
52
|
-
// Import the Profiler instance for global profiling
|
|
53
31
|
const profiler = Profiler.getInstance();
|
|
54
|
-
/**
|
|
55
|
-
* The main CmpStr class that provides a unified interface for string comparison,
|
|
56
|
-
* phonetic indexing, filtering, and text analysis.
|
|
57
|
-
*
|
|
58
|
-
* @template R - The type of the metric result, defaults to MetricRaw
|
|
59
|
-
*/
|
|
60
32
|
class CmpStr {
|
|
61
|
-
/**
|
|
62
|
-
* --------------------------------------------------------------------------------
|
|
63
|
-
* Static methods and properties for global access to CmpStr features
|
|
64
|
-
* --------------------------------------------------------------------------------
|
|
65
|
-
*
|
|
66
|
-
* These static methods provide a convenient way to access the core features of
|
|
67
|
-
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
68
|
-
*/
|
|
69
|
-
/**
|
|
70
|
-
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
71
|
-
*
|
|
72
|
-
* @see Filter
|
|
73
|
-
*/
|
|
74
33
|
static filter = {
|
|
75
34
|
add: Filter.add,
|
|
76
35
|
remove: Filter.remove,
|
|
@@ -79,22 +38,12 @@ class CmpStr {
|
|
|
79
38
|
list: Filter.list,
|
|
80
39
|
clear: Filter.clear
|
|
81
40
|
};
|
|
82
|
-
/**
|
|
83
|
-
* Adds, removes, checks, or lists available metrics.
|
|
84
|
-
*
|
|
85
|
-
* @see MetricRegistry
|
|
86
|
-
*/
|
|
87
41
|
static metric = {
|
|
88
42
|
add: MetricRegistry.add,
|
|
89
43
|
remove: MetricRegistry.remove,
|
|
90
44
|
has: MetricRegistry.has,
|
|
91
45
|
list: MetricRegistry.list
|
|
92
46
|
};
|
|
93
|
-
/**
|
|
94
|
-
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
95
|
-
*
|
|
96
|
-
* @see PhoneticRegistry
|
|
97
|
-
*/
|
|
98
47
|
static phonetic = {
|
|
99
48
|
add: PhoneticRegistry.add,
|
|
100
49
|
remove: PhoneticRegistry.remove,
|
|
@@ -107,89 +56,30 @@ class CmpStr {
|
|
|
107
56
|
list: PhoneticMappingRegistry.list
|
|
108
57
|
}
|
|
109
58
|
};
|
|
110
|
-
/**
|
|
111
|
-
* Provides access to the global profiler services.
|
|
112
|
-
*
|
|
113
|
-
* @see Profiler
|
|
114
|
-
*/
|
|
115
59
|
static profiler = profiler.services;
|
|
116
|
-
/**
|
|
117
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
118
|
-
*/
|
|
119
60
|
static clearCache = {
|
|
120
61
|
normalizer: Normalizer.clear,
|
|
121
62
|
metric: Metric.clear,
|
|
122
63
|
phonetic: Phonetic.clear
|
|
123
64
|
};
|
|
124
|
-
/**
|
|
125
|
-
* Returns a TextAnalyzer instance for the given input string.
|
|
126
|
-
*
|
|
127
|
-
* @param {string} [input] - The input string
|
|
128
|
-
* @returns {TextAnalyzer} - The text analyzer
|
|
129
|
-
*/
|
|
130
65
|
static analyze(input) {
|
|
131
66
|
return new TextAnalyzer(input);
|
|
132
67
|
}
|
|
133
|
-
/**
|
|
134
|
-
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
135
|
-
*
|
|
136
|
-
* @param {string} a - The first (original) text
|
|
137
|
-
* @param {string} b - The second (modified) text
|
|
138
|
-
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
139
|
-
* @returns {DiffChecker} - The diff checker instance
|
|
140
|
-
*/
|
|
141
68
|
static diff(a, b, opt) {
|
|
142
69
|
return new DiffChecker(a, b, opt);
|
|
143
70
|
}
|
|
144
|
-
/**
|
|
145
|
-
* --------------------------------------------------------------------------------
|
|
146
|
-
* Instanciate the CmpStr class
|
|
147
|
-
* --------------------------------------------------------------------------------
|
|
148
|
-
*
|
|
149
|
-
* Methods to create a new CmpStr instance with the given options.
|
|
150
|
-
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
151
|
-
*/
|
|
152
|
-
/**
|
|
153
|
-
* Creates a new CmpStr instance with the given options.
|
|
154
|
-
*
|
|
155
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
156
|
-
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
157
|
-
*/
|
|
158
71
|
static create(opt) {
|
|
159
72
|
return new CmpStr(opt);
|
|
160
73
|
}
|
|
161
|
-
// The options object that holds the configuration for this CmpStr instance
|
|
162
74
|
options = Object.create(null);
|
|
163
|
-
/**
|
|
164
|
-
* Creates a new CmpStr instance with the given options.
|
|
165
|
-
* The constructor is protected to enforce the use of the static `create` method.
|
|
166
|
-
*
|
|
167
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
168
|
-
*/
|
|
169
75
|
constructor(opt) {
|
|
170
76
|
if (opt)
|
|
171
77
|
typeof opt === 'string'
|
|
172
78
|
? this.setSerializedOptions(opt)
|
|
173
79
|
: this.setOptions(opt);
|
|
174
80
|
}
|
|
175
|
-
/**
|
|
176
|
-
* ---------------------------------------------------------------------------------
|
|
177
|
-
* Protected utility methods for internal use
|
|
178
|
-
* ---------------------------------------------------------------------------------
|
|
179
|
-
*
|
|
180
|
-
* These methods provide utility functions for converting inputs, merging options,
|
|
181
|
-
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
182
|
-
*/
|
|
183
|
-
/**
|
|
184
|
-
* Assert a condition and throws if the condition is not met.
|
|
185
|
-
*
|
|
186
|
-
* @param {string} cond - The condition to met
|
|
187
|
-
* @param {any} [test] - Value to test for
|
|
188
|
-
* @throws {Error} If the condition is not met
|
|
189
|
-
*/
|
|
190
81
|
assert(cond, test) {
|
|
191
82
|
switch (cond) {
|
|
192
|
-
// Check if the metric exists
|
|
193
83
|
case 'metric':
|
|
194
84
|
if (!CmpStr.metric.has(test))
|
|
195
85
|
throw new Error(
|
|
@@ -197,7 +87,6 @@ class CmpStr {
|
|
|
197
87
|
`use CmpStr.metric.list() for available metrics`
|
|
198
88
|
);
|
|
199
89
|
break;
|
|
200
|
-
// Check if the phonetic algorithm exists
|
|
201
90
|
case 'phonetic':
|
|
202
91
|
if (!CmpStr.phonetic.has(test))
|
|
203
92
|
throw new Error(
|
|
@@ -205,85 +94,34 @@ class CmpStr {
|
|
|
205
94
|
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
206
95
|
);
|
|
207
96
|
break;
|
|
208
|
-
// Throw an error for unknown conditions
|
|
209
97
|
default:
|
|
210
98
|
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
211
99
|
}
|
|
212
100
|
}
|
|
213
|
-
/**
|
|
214
|
-
* Assert multiple conditions.
|
|
215
|
-
*
|
|
216
|
-
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
217
|
-
*/
|
|
218
101
|
assertMany(...cond) {
|
|
219
102
|
for (const [c, test] of cond) this.assert(c, test);
|
|
220
103
|
}
|
|
221
|
-
/**
|
|
222
|
-
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
223
|
-
* the existing options.
|
|
224
|
-
*
|
|
225
|
-
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
226
|
-
* @returns {CmpStrOptions} - The resolved options
|
|
227
|
-
*/
|
|
228
104
|
resolveOptions(opt) {
|
|
229
105
|
return merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
230
106
|
}
|
|
231
|
-
/**
|
|
232
|
-
* Normalizes the input string or array using the configured or provided flags.
|
|
233
|
-
*
|
|
234
|
-
* @param {MetricInput} input - The input string or array
|
|
235
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
236
|
-
* @returns {MetricInput} - The normalized input
|
|
237
|
-
*/
|
|
238
107
|
normalize(input, flags) {
|
|
239
108
|
return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
|
|
240
109
|
}
|
|
241
|
-
/**
|
|
242
|
-
* Applies all active filters to the input string or array.
|
|
243
|
-
*
|
|
244
|
-
* @param {MetricInput} input - The input string or array
|
|
245
|
-
* @param {string} [hook='input'] - The filter hook
|
|
246
|
-
* @returns {MetricInput} - The filtered string(s)
|
|
247
|
-
*/
|
|
248
110
|
filter(input, hook) {
|
|
249
111
|
return Filter.apply(hook, input);
|
|
250
112
|
}
|
|
251
|
-
/**
|
|
252
|
-
* Prepares the input by normalizing and filtering.
|
|
253
|
-
*
|
|
254
|
-
* @param {MetricInput} [input] - The input string or array
|
|
255
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
256
|
-
* @returns {MetricInput} - The prepared input
|
|
257
|
-
*/
|
|
258
113
|
prepare(input, opt) {
|
|
259
114
|
const { flags, processors } = opt ?? this.options;
|
|
260
|
-
// Normalize the input using flags (i.e., 'itw')
|
|
261
115
|
if (flags?.length) input = this.normalize(input, flags);
|
|
262
|
-
// Filter the input using hooked up filters
|
|
263
116
|
input = this.filter(input, 'input');
|
|
264
|
-
// Apply phonetic processors if configured
|
|
265
117
|
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
266
118
|
return input;
|
|
267
119
|
}
|
|
268
|
-
/**
|
|
269
|
-
* Post-process the results of the metric computation.
|
|
270
|
-
*
|
|
271
|
-
* @param {MetricResult<R>} result - The metric result
|
|
272
|
-
* @returns {MetricResult<R>} - The post-processed results
|
|
273
|
-
*/
|
|
274
120
|
postProcess(result, opt) {
|
|
275
|
-
// Remove "zero similarity" from batch results if configured
|
|
276
121
|
if (opt?.removeZero && Array.isArray(result))
|
|
277
122
|
result = result.filter((r) => r.res > 0);
|
|
278
123
|
return result;
|
|
279
124
|
}
|
|
280
|
-
/**
|
|
281
|
-
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
282
|
-
*
|
|
283
|
-
* @param {MetricInput} input - The input string or array
|
|
284
|
-
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
285
|
-
* @returns {MetricInput} - The phonetic index for the given input
|
|
286
|
-
*/
|
|
287
125
|
index(input, { algo, opt }) {
|
|
288
126
|
this.assert('phonetic', algo);
|
|
289
127
|
const phonetic = factory.phonetic(algo, opt);
|
|
@@ -292,26 +130,11 @@ class CmpStr {
|
|
|
292
130
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
293
131
|
: phonetic.getIndex(input).join(delimiter);
|
|
294
132
|
}
|
|
295
|
-
/**
|
|
296
|
-
* Computes the metric result for the given inputs, applying normalization and
|
|
297
|
-
* filtering as configured.
|
|
298
|
-
*
|
|
299
|
-
* @template T - The type of the metric result
|
|
300
|
-
* @param {MetricInput} a - The first input string or array
|
|
301
|
-
* @param {MetricInput} b - The second input string or array
|
|
302
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
303
|
-
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
304
|
-
* @param {boolean} [raw=false] - Whether to return raw results
|
|
305
|
-
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
306
|
-
* @returns {T} - The computed metric result
|
|
307
|
-
*/
|
|
308
133
|
compute(a, b, opt, mode, raw, skip) {
|
|
309
134
|
const resolved = this.resolveOptions(opt);
|
|
310
135
|
this.assert('metric', resolved.metric);
|
|
311
|
-
// Prepare the input
|
|
312
136
|
const A = skip ? a : this.prepare(a, resolved);
|
|
313
137
|
const B = skip ? b : this.prepare(b, resolved);
|
|
314
|
-
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
315
138
|
if (
|
|
316
139
|
resolved.safeEmpty &&
|
|
317
140
|
((Array.isArray(A) && A.length === 0) ||
|
|
@@ -321,25 +144,12 @@ class CmpStr {
|
|
|
321
144
|
) {
|
|
322
145
|
return [];
|
|
323
146
|
}
|
|
324
|
-
// Get the metric class
|
|
325
147
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
326
|
-
// Pass the original inputs to the metric
|
|
327
148
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
328
|
-
// Compute the metric result
|
|
329
149
|
metric.run(mode);
|
|
330
|
-
// Post-process the results
|
|
331
150
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
332
|
-
// Resolve and return the result based on the raw flag
|
|
333
151
|
return this.output(result, raw ?? resolved.raw);
|
|
334
152
|
}
|
|
335
|
-
/**
|
|
336
|
-
* Resolves the result format (raw or formatted).
|
|
337
|
-
*
|
|
338
|
-
* @template T - The type of the metric result
|
|
339
|
-
* @param {MetricResult<R>} result - The metric result
|
|
340
|
-
* @param {boolean} [raw] - Whether to return raw results
|
|
341
|
-
* @returns {T} - The resolved result
|
|
342
|
-
*/
|
|
343
153
|
output(result, raw) {
|
|
344
154
|
return (raw ?? this.options.raw)
|
|
345
155
|
? result
|
|
@@ -347,213 +157,69 @@ class CmpStr {
|
|
|
347
157
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
348
158
|
: { source: result.a, target: result.b, match: result.res };
|
|
349
159
|
}
|
|
350
|
-
/**
|
|
351
|
-
* ---------------------------------------------------------------------------------
|
|
352
|
-
* Managing methods for CmpStr
|
|
353
|
-
* ---------------------------------------------------------------------------------
|
|
354
|
-
*
|
|
355
|
-
* These methods provides an interface to set and get properties of the CmpStr
|
|
356
|
-
* instance, such as options, metric, phonetic algorithm, and more.
|
|
357
|
-
*/
|
|
358
|
-
/**
|
|
359
|
-
* Creates a shallow clone of the current instance.
|
|
360
|
-
*
|
|
361
|
-
* @returns {CmpStr<R>} - The cloned instance
|
|
362
|
-
*/
|
|
363
160
|
clone() {
|
|
364
161
|
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
365
162
|
}
|
|
366
|
-
/**
|
|
367
|
-
* Resets the instance, clearing all data and options.
|
|
368
|
-
*
|
|
369
|
-
* @returns {this}
|
|
370
|
-
*/
|
|
371
163
|
reset() {
|
|
372
164
|
for (const k in this.options) delete this.options[k];
|
|
373
165
|
return this;
|
|
374
166
|
}
|
|
375
|
-
/**
|
|
376
|
-
* Sets / replaces the full options object.
|
|
377
|
-
*
|
|
378
|
-
* @param {CmpStrOptions} opt - The options
|
|
379
|
-
* @returns {this}
|
|
380
|
-
*/
|
|
381
167
|
setOptions(opt) {
|
|
382
168
|
this.options = opt;
|
|
383
169
|
return this;
|
|
384
170
|
}
|
|
385
|
-
/**
|
|
386
|
-
* Deep merges and sets new options.
|
|
387
|
-
*
|
|
388
|
-
* @param {CmpStrOptions} opt - The options to merge
|
|
389
|
-
* @returns {this}
|
|
390
|
-
*/
|
|
391
171
|
mergeOptions(opt) {
|
|
392
172
|
merge(this.options, opt);
|
|
393
173
|
return this;
|
|
394
174
|
}
|
|
395
|
-
/**
|
|
396
|
-
* Sets the serialized options from a JSON string.
|
|
397
|
-
*
|
|
398
|
-
* @param {string} opt - The serialized options
|
|
399
|
-
* @returns {this}
|
|
400
|
-
*/
|
|
401
175
|
setSerializedOptions(opt) {
|
|
402
176
|
this.options = JSON.parse(opt);
|
|
403
177
|
return this;
|
|
404
178
|
}
|
|
405
|
-
/**
|
|
406
|
-
* Sets a specific option at the given path.
|
|
407
|
-
*
|
|
408
|
-
* @param {string} path - The path to the option
|
|
409
|
-
* @param {any} value - The value to set
|
|
410
|
-
* @returns {this}
|
|
411
|
-
*/
|
|
412
179
|
setOption(path, value) {
|
|
413
180
|
set(this.options, path, value);
|
|
414
181
|
return this;
|
|
415
182
|
}
|
|
416
|
-
/**
|
|
417
|
-
* Removes an option at the given path.
|
|
418
|
-
*
|
|
419
|
-
* @param {string} path - The path to the option
|
|
420
|
-
* @returns {this}
|
|
421
|
-
*/
|
|
422
183
|
rmvOption(path) {
|
|
423
184
|
rmv(this.options, path);
|
|
424
185
|
return this;
|
|
425
186
|
}
|
|
426
|
-
/**
|
|
427
|
-
* Enable or disable raw output.
|
|
428
|
-
*
|
|
429
|
-
* @param {boolean} enable - Whether to enable or disable raw output
|
|
430
|
-
* @returns {this}
|
|
431
|
-
*/
|
|
432
187
|
setRaw(enable) {
|
|
433
188
|
return this.setOption('raw', enable);
|
|
434
189
|
}
|
|
435
|
-
/**
|
|
436
|
-
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
437
|
-
*
|
|
438
|
-
* @param {string} name - The metric name
|
|
439
|
-
* @returns {this}
|
|
440
|
-
*/
|
|
441
190
|
setMetric(name) {
|
|
442
191
|
return this.setOption('metric', name);
|
|
443
192
|
}
|
|
444
|
-
/**
|
|
445
|
-
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
446
|
-
*
|
|
447
|
-
* @param {NormalizeFlags} flags - The normalization flags
|
|
448
|
-
* @returns {this}
|
|
449
|
-
*/
|
|
450
193
|
setFlags(flags) {
|
|
451
194
|
return this.setOption('flags', flags);
|
|
452
195
|
}
|
|
453
|
-
/**
|
|
454
|
-
* Removes the normalization flags entirely.
|
|
455
|
-
*
|
|
456
|
-
* @return {this}
|
|
457
|
-
*/
|
|
458
196
|
rmvFlags() {
|
|
459
197
|
return this.rmvOption('flags');
|
|
460
198
|
}
|
|
461
|
-
/**
|
|
462
|
-
* Sets the pre-processors to use for preparing the input.
|
|
463
|
-
*
|
|
464
|
-
* @param {CmpStrProcessors} opt - The processors to set
|
|
465
|
-
* @returns {this}
|
|
466
|
-
*/
|
|
467
199
|
setProcessors(opt) {
|
|
468
200
|
return this.setOption('processors', opt);
|
|
469
201
|
}
|
|
470
|
-
/**
|
|
471
|
-
* Removes the processors entirely.
|
|
472
|
-
*
|
|
473
|
-
* @returns {this}
|
|
474
|
-
*/
|
|
475
202
|
rmvProcessors() {
|
|
476
203
|
return this.rmvOption('processors');
|
|
477
204
|
}
|
|
478
|
-
/**
|
|
479
|
-
* Returns the current options object.
|
|
480
|
-
*
|
|
481
|
-
* @returns {CmpStrOptions} - The options
|
|
482
|
-
*/
|
|
483
205
|
getOptions() {
|
|
484
206
|
return this.options;
|
|
485
207
|
}
|
|
486
|
-
/**
|
|
487
|
-
* Returns the options as a JSON string.
|
|
488
|
-
*
|
|
489
|
-
* @returns {string} - The serialized options
|
|
490
|
-
*/
|
|
491
208
|
getSerializedOptions() {
|
|
492
209
|
return JSON.stringify(this.options);
|
|
493
210
|
}
|
|
494
|
-
/**
|
|
495
|
-
* Returns a specific option value by path.
|
|
496
|
-
*
|
|
497
|
-
* @param {string} path - The path to the option
|
|
498
|
-
* @returns {any} - The option value
|
|
499
|
-
*/
|
|
500
211
|
getOption(path) {
|
|
501
212
|
return get(this.options, path);
|
|
502
213
|
}
|
|
503
|
-
/**
|
|
504
|
-
* ---------------------------------------------------------------------------------
|
|
505
|
-
* Public core methods for string comparison
|
|
506
|
-
* ---------------------------------------------------------------------------------
|
|
507
|
-
*
|
|
508
|
-
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
509
|
-
* string comparison, phonetic indexing, filtering, and text search.
|
|
510
|
-
*/
|
|
511
|
-
/**
|
|
512
|
-
* Performs a single metric comparison between the source and target.
|
|
513
|
-
*
|
|
514
|
-
* @template T - The type of the metric result
|
|
515
|
-
* @param {string} a - The source string
|
|
516
|
-
* @param {string} b - The target string
|
|
517
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
518
|
-
* @returns {T} - The metric result
|
|
519
|
-
*/
|
|
520
214
|
test(a, b, opt) {
|
|
521
215
|
return this.compute(a, b, opt, 'single');
|
|
522
216
|
}
|
|
523
|
-
/**
|
|
524
|
-
* Performs a single metric comparison and returns only the numeric score.
|
|
525
|
-
*
|
|
526
|
-
* @param {string} a - The source string
|
|
527
|
-
* @param {string} b - The target string
|
|
528
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
529
|
-
* @returns {number} - The similarity score (0..1)
|
|
530
|
-
*/
|
|
531
217
|
compare(a, b, opt) {
|
|
532
218
|
return this.compute(a, b, opt, 'single', true).res;
|
|
533
219
|
}
|
|
534
|
-
/**
|
|
535
|
-
* Performs a batch metric comparison between source and target strings
|
|
536
|
-
* or array of strings.
|
|
537
|
-
*
|
|
538
|
-
* @template T - The type of the metric result
|
|
539
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
540
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
541
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
542
|
-
* @returns {T} - The batch metric results
|
|
543
|
-
*/
|
|
544
220
|
batchTest(a, b, opt) {
|
|
545
221
|
return this.compute(a, b, opt, 'batch');
|
|
546
222
|
}
|
|
547
|
-
/**
|
|
548
|
-
* Performs a batch metric comparison and returns results sorted by score.
|
|
549
|
-
*
|
|
550
|
-
* @template T - The type of the metric result
|
|
551
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
552
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
553
|
-
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
554
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
555
|
-
* @returns {T} - The sorted batch results
|
|
556
|
-
*/
|
|
557
223
|
batchSorted(a, b, dir = 'desc', opt) {
|
|
558
224
|
return this.output(
|
|
559
225
|
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
@@ -562,32 +228,9 @@ class CmpStr {
|
|
|
562
228
|
opt?.raw ?? this.options.raw
|
|
563
229
|
);
|
|
564
230
|
}
|
|
565
|
-
/**
|
|
566
|
-
* Performs a pairwise metric comparison between source and target strings
|
|
567
|
-
* or array of strings.
|
|
568
|
-
*
|
|
569
|
-
* Input arrays needs of the same length to perform pairwise comparison,
|
|
570
|
-
* otherwise the method will throw an error.
|
|
571
|
-
*
|
|
572
|
-
* @template T - The type of the metric result
|
|
573
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
574
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
575
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
576
|
-
* @returns {T} - The pairwise metric results
|
|
577
|
-
*/
|
|
578
231
|
pairs(a, b, opt) {
|
|
579
232
|
return this.compute(a, b, opt, 'pairwise');
|
|
580
233
|
}
|
|
581
|
-
/**
|
|
582
|
-
* Performs a batch comparison and returns only results above the threshold.
|
|
583
|
-
*
|
|
584
|
-
* @template T - The type of the metric result
|
|
585
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
586
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
587
|
-
* @param {number} threshold - The similarity threshold (0..1)
|
|
588
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
589
|
-
* @returns {T} - The filtered batch results
|
|
590
|
-
*/
|
|
591
234
|
match(a, b, threshold, opt) {
|
|
592
235
|
return this.output(
|
|
593
236
|
this.compute(a, b, opt, 'batch', true)
|
|
@@ -596,56 +239,18 @@ class CmpStr {
|
|
|
596
239
|
opt?.raw ?? this.options.raw
|
|
597
240
|
);
|
|
598
241
|
}
|
|
599
|
-
/**
|
|
600
|
-
* Returns the n closest matches from a batch comparison.
|
|
601
|
-
*
|
|
602
|
-
* @template T - The type of the metric result
|
|
603
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
604
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
605
|
-
* @param {number} [n=1] - Number of closest matches
|
|
606
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
607
|
-
* @returns {T} - The closest matches
|
|
608
|
-
*/
|
|
609
242
|
closest(a, b, n = 1, opt) {
|
|
610
243
|
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
611
244
|
}
|
|
612
|
-
/**
|
|
613
|
-
* Returns the n furthest matches from a batch comparison.
|
|
614
|
-
*
|
|
615
|
-
* @template T - The type of the metric result
|
|
616
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
617
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
618
|
-
* @param {number} [n=1] - Number of furthest matches
|
|
619
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
620
|
-
* @returns {T} - The furthest matches
|
|
621
|
-
*/
|
|
622
245
|
furthest(a, b, n = 1, opt) {
|
|
623
246
|
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
624
247
|
}
|
|
625
|
-
/**
|
|
626
|
-
* Performs a normalized and filtered substring search.
|
|
627
|
-
*
|
|
628
|
-
* @param {string} needle - The search string
|
|
629
|
-
* @param {string[]} haystack - The array to search in
|
|
630
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
631
|
-
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
632
|
-
* @returns {string[]} - Array of matching entries
|
|
633
|
-
*/
|
|
634
248
|
search(needle, haystack, flags, processors) {
|
|
635
249
|
const resolved = this.resolveOptions({ flags, processors });
|
|
636
|
-
// Prepare the needle and haystack, normalizing and filtering them
|
|
637
250
|
const test = this.prepare(needle, resolved);
|
|
638
251
|
const hstk = this.prepare(haystack, resolved);
|
|
639
|
-
// Filter the haystack based on the normalized test string
|
|
640
252
|
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
641
253
|
}
|
|
642
|
-
/**
|
|
643
|
-
* Computes a similarity matrix for the given input array.
|
|
644
|
-
*
|
|
645
|
-
* @param {string[]} input - The input array
|
|
646
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
647
|
-
* @returns {number[][]} - The similarity matrix
|
|
648
|
-
*/
|
|
649
254
|
matrix(input, opt) {
|
|
650
255
|
input = this.prepare(input, this.resolveOptions(opt));
|
|
651
256
|
return input.map((a) =>
|
|
@@ -654,15 +259,6 @@ class CmpStr {
|
|
|
654
259
|
)
|
|
655
260
|
);
|
|
656
261
|
}
|
|
657
|
-
/**
|
|
658
|
-
* Computes the phonetic index for a string using the configured
|
|
659
|
-
* or given algorithm.
|
|
660
|
-
*
|
|
661
|
-
* @param {string} [input] - The input string
|
|
662
|
-
* @param {string} [algo] - The phonetic algorithm to use
|
|
663
|
-
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
664
|
-
* @returns {string} - The phonetic index as a string
|
|
665
|
-
*/
|
|
666
262
|
phoneticIndex(input, algo, opt) {
|
|
667
263
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
668
264
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|