cmpstr 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2228 -4930
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +2348 -5026
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +10 -404
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +10 -220
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +2 -262
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +37 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +2 -90
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +10 -404
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +10 -220
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +2 -262
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +29 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +1 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +2 -90
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +38 -0
- package/dist/types/utils/Types.d.ts +1 -0
- package/package.json +15 -9
package/dist/esm/CmpStr.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { merge, set, rmv, get } from './utils/DeepMerge.mjs';
|
|
3
3
|
import { Profiler } from './utils/Profiler.mjs';
|
|
4
4
|
import { TextAnalyzer } from './utils/TextAnalyzer.mjs';
|
|
@@ -28,49 +28,8 @@ import {
|
|
|
28
28
|
Phonetic
|
|
29
29
|
} from './phonetic/Phonetic.mjs';
|
|
30
30
|
|
|
31
|
-
/**
|
|
32
|
-
* CmpStr Main API
|
|
33
|
-
* src/CmpStr.ts
|
|
34
|
-
*
|
|
35
|
-
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
36
|
-
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
37
|
-
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
38
|
-
* consistent, user-friendly API for both single and batch operations.
|
|
39
|
-
*
|
|
40
|
-
* Features:
|
|
41
|
-
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
42
|
-
* - Flexible normalization and filtering pipeline for all inputs
|
|
43
|
-
* - Batch, pairwise, and single string comparison with detailed results
|
|
44
|
-
* - Phonetic indexing and phonetic-aware search and comparison
|
|
45
|
-
* - Text analysis and unified diff utilities
|
|
46
|
-
* - Full TypeScript type safety and extensibility
|
|
47
|
-
*
|
|
48
|
-
* @module CmpStr
|
|
49
|
-
* @author Paul Köhler (komed3)
|
|
50
|
-
* @license MIT
|
|
51
|
-
*/
|
|
52
|
-
// Import the Profiler instance for global profiling
|
|
53
31
|
const profiler = Profiler.getInstance();
|
|
54
|
-
/**
|
|
55
|
-
* The main CmpStr class that provides a unified interface for string comparison,
|
|
56
|
-
* phonetic indexing, filtering, and text analysis.
|
|
57
|
-
*
|
|
58
|
-
* @template R - The type of the metric result, defaults to MetricRaw
|
|
59
|
-
*/
|
|
60
32
|
class CmpStr {
|
|
61
|
-
/**
|
|
62
|
-
* --------------------------------------------------------------------------------
|
|
63
|
-
* Static methods and properties for global access to CmpStr features
|
|
64
|
-
* --------------------------------------------------------------------------------
|
|
65
|
-
*
|
|
66
|
-
* These static methods provide a convenient way to access the core features of
|
|
67
|
-
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
68
|
-
*/
|
|
69
|
-
/**
|
|
70
|
-
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
71
|
-
*
|
|
72
|
-
* @see Filter
|
|
73
|
-
*/
|
|
74
33
|
static filter = {
|
|
75
34
|
add: Filter.add,
|
|
76
35
|
remove: Filter.remove,
|
|
@@ -79,22 +38,12 @@ class CmpStr {
|
|
|
79
38
|
list: Filter.list,
|
|
80
39
|
clear: Filter.clear
|
|
81
40
|
};
|
|
82
|
-
/**
|
|
83
|
-
* Adds, removes, checks, or lists available metrics.
|
|
84
|
-
*
|
|
85
|
-
* @see MetricRegistry
|
|
86
|
-
*/
|
|
87
41
|
static metric = {
|
|
88
42
|
add: MetricRegistry.add,
|
|
89
43
|
remove: MetricRegistry.remove,
|
|
90
44
|
has: MetricRegistry.has,
|
|
91
45
|
list: MetricRegistry.list
|
|
92
46
|
};
|
|
93
|
-
/**
|
|
94
|
-
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
95
|
-
*
|
|
96
|
-
* @see PhoneticRegistry
|
|
97
|
-
*/
|
|
98
47
|
static phonetic = {
|
|
99
48
|
add: PhoneticRegistry.add,
|
|
100
49
|
remove: PhoneticRegistry.remove,
|
|
@@ -107,89 +56,30 @@ class CmpStr {
|
|
|
107
56
|
list: PhoneticMappingRegistry.list
|
|
108
57
|
}
|
|
109
58
|
};
|
|
110
|
-
/**
|
|
111
|
-
* Provides access to the global profiler services.
|
|
112
|
-
*
|
|
113
|
-
* @see Profiler
|
|
114
|
-
*/
|
|
115
59
|
static profiler = profiler.services;
|
|
116
|
-
/**
|
|
117
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
118
|
-
*/
|
|
119
60
|
static clearCache = {
|
|
120
61
|
normalizer: Normalizer.clear,
|
|
121
62
|
metric: Metric.clear,
|
|
122
63
|
phonetic: Phonetic.clear
|
|
123
64
|
};
|
|
124
|
-
/**
|
|
125
|
-
* Returns a TextAnalyzer instance for the given input string.
|
|
126
|
-
*
|
|
127
|
-
* @param {string} [input] - The input string
|
|
128
|
-
* @returns {TextAnalyzer} - The text analyzer
|
|
129
|
-
*/
|
|
130
65
|
static analyze(input) {
|
|
131
66
|
return new TextAnalyzer(input);
|
|
132
67
|
}
|
|
133
|
-
/**
|
|
134
|
-
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
135
|
-
*
|
|
136
|
-
* @param {string} a - The first (original) text
|
|
137
|
-
* @param {string} b - The second (modified) text
|
|
138
|
-
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
139
|
-
* @returns {DiffChecker} - The diff checker instance
|
|
140
|
-
*/
|
|
141
68
|
static diff(a, b, opt) {
|
|
142
69
|
return new DiffChecker(a, b, opt);
|
|
143
70
|
}
|
|
144
|
-
/**
|
|
145
|
-
* --------------------------------------------------------------------------------
|
|
146
|
-
* Instanciate the CmpStr class
|
|
147
|
-
* --------------------------------------------------------------------------------
|
|
148
|
-
*
|
|
149
|
-
* Methods to create a new CmpStr instance with the given options.
|
|
150
|
-
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
151
|
-
*/
|
|
152
|
-
/**
|
|
153
|
-
* Creates a new CmpStr instance with the given options.
|
|
154
|
-
*
|
|
155
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
156
|
-
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
157
|
-
*/
|
|
158
71
|
static create(opt) {
|
|
159
72
|
return new CmpStr(opt);
|
|
160
73
|
}
|
|
161
|
-
// The options object that holds the configuration for this CmpStr instance
|
|
162
74
|
options = Object.create(null);
|
|
163
|
-
/**
|
|
164
|
-
* Creates a new CmpStr instance with the given options.
|
|
165
|
-
* The constructor is protected to enforce the use of the static `create` method.
|
|
166
|
-
*
|
|
167
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
168
|
-
*/
|
|
169
75
|
constructor(opt) {
|
|
170
76
|
if (opt)
|
|
171
77
|
typeof opt === 'string'
|
|
172
78
|
? this.setSerializedOptions(opt)
|
|
173
79
|
: this.setOptions(opt);
|
|
174
80
|
}
|
|
175
|
-
/**
|
|
176
|
-
* ---------------------------------------------------------------------------------
|
|
177
|
-
* Protected utility methods for internal use
|
|
178
|
-
* ---------------------------------------------------------------------------------
|
|
179
|
-
*
|
|
180
|
-
* These methods provide utility functions for converting inputs, merging options,
|
|
181
|
-
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
182
|
-
*/
|
|
183
|
-
/**
|
|
184
|
-
* Assert a condition and throws if the condition is not met.
|
|
185
|
-
*
|
|
186
|
-
* @param {string} cond - The condition to met
|
|
187
|
-
* @param {any} [test] - Value to test for
|
|
188
|
-
* @throws {Error} If the condition is not met
|
|
189
|
-
*/
|
|
190
81
|
assert(cond, test) {
|
|
191
82
|
switch (cond) {
|
|
192
|
-
// Check if the metric exists
|
|
193
83
|
case 'metric':
|
|
194
84
|
if (!CmpStr.metric.has(test))
|
|
195
85
|
throw new Error(
|
|
@@ -197,7 +87,6 @@ class CmpStr {
|
|
|
197
87
|
`use CmpStr.metric.list() for available metrics`
|
|
198
88
|
);
|
|
199
89
|
break;
|
|
200
|
-
// Check if the phonetic algorithm exists
|
|
201
90
|
case 'phonetic':
|
|
202
91
|
if (!CmpStr.phonetic.has(test))
|
|
203
92
|
throw new Error(
|
|
@@ -205,85 +94,34 @@ class CmpStr {
|
|
|
205
94
|
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
206
95
|
);
|
|
207
96
|
break;
|
|
208
|
-
// Throw an error for unknown conditions
|
|
209
97
|
default:
|
|
210
98
|
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
211
99
|
}
|
|
212
100
|
}
|
|
213
|
-
/**
|
|
214
|
-
* Assert multiple conditions.
|
|
215
|
-
*
|
|
216
|
-
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
217
|
-
*/
|
|
218
101
|
assertMany(...cond) {
|
|
219
102
|
for (const [c, test] of cond) this.assert(c, test);
|
|
220
103
|
}
|
|
221
|
-
/**
|
|
222
|
-
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
223
|
-
* the existing options.
|
|
224
|
-
*
|
|
225
|
-
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
226
|
-
* @returns {CmpStrOptions} - The resolved options
|
|
227
|
-
*/
|
|
228
104
|
resolveOptions(opt) {
|
|
229
105
|
return merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
230
106
|
}
|
|
231
|
-
/**
|
|
232
|
-
* Normalizes the input string or array using the configured or provided flags.
|
|
233
|
-
*
|
|
234
|
-
* @param {MetricInput} input - The input string or array
|
|
235
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
236
|
-
* @returns {MetricInput} - The normalized input
|
|
237
|
-
*/
|
|
238
107
|
normalize(input, flags) {
|
|
239
108
|
return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
|
|
240
109
|
}
|
|
241
|
-
/**
|
|
242
|
-
* Applies all active filters to the input string or array.
|
|
243
|
-
*
|
|
244
|
-
* @param {MetricInput} input - The input string or array
|
|
245
|
-
* @param {string} [hook='input'] - The filter hook
|
|
246
|
-
* @returns {MetricInput} - The filtered string(s)
|
|
247
|
-
*/
|
|
248
110
|
filter(input, hook) {
|
|
249
111
|
return Filter.apply(hook, input);
|
|
250
112
|
}
|
|
251
|
-
/**
|
|
252
|
-
* Prepares the input by normalizing and filtering.
|
|
253
|
-
*
|
|
254
|
-
* @param {MetricInput} [input] - The input string or array
|
|
255
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
256
|
-
* @returns {MetricInput} - The prepared input
|
|
257
|
-
*/
|
|
258
113
|
prepare(input, opt) {
|
|
259
114
|
const { flags, processors } = opt ?? this.options;
|
|
260
|
-
// Normalize the input using flags (i.e., 'itw')
|
|
261
115
|
if (flags?.length) input = this.normalize(input, flags);
|
|
262
|
-
// Filter the input using hooked up filters
|
|
263
116
|
input = this.filter(input, 'input');
|
|
264
|
-
// Apply phonetic processors if configured
|
|
265
117
|
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
266
118
|
return input;
|
|
267
119
|
}
|
|
268
|
-
/**
|
|
269
|
-
* Post-process the results of the metric computation.
|
|
270
|
-
*
|
|
271
|
-
* @param {MetricResult<R>} result - The metric result
|
|
272
|
-
* @returns {MetricResult<R>} - The post-processed results
|
|
273
|
-
*/
|
|
274
120
|
postProcess(result, opt) {
|
|
275
|
-
// Remove "zero similarity" from batch results if configured
|
|
276
121
|
if (opt?.removeZero && Array.isArray(result))
|
|
277
122
|
result = result.filter((r) => r.res > 0);
|
|
278
123
|
return result;
|
|
279
124
|
}
|
|
280
|
-
/**
|
|
281
|
-
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
282
|
-
*
|
|
283
|
-
* @param {MetricInput} input - The input string or array
|
|
284
|
-
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
285
|
-
* @returns {MetricInput} - The phonetic index for the given input
|
|
286
|
-
*/
|
|
287
125
|
index(input, { algo, opt }) {
|
|
288
126
|
this.assert('phonetic', algo);
|
|
289
127
|
const phonetic = factory.phonetic(algo, opt);
|
|
@@ -292,44 +130,26 @@ class CmpStr {
|
|
|
292
130
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
293
131
|
: phonetic.getIndex(input).join(delimiter);
|
|
294
132
|
}
|
|
295
|
-
/**
|
|
296
|
-
* Computes the metric result for the given inputs, applying normalization and
|
|
297
|
-
* filtering as configured.
|
|
298
|
-
*
|
|
299
|
-
* @template T - The type of the metric result
|
|
300
|
-
* @param {MetricInput} a - The first input string or array
|
|
301
|
-
* @param {MetricInput} b - The second input string or array
|
|
302
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
303
|
-
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
304
|
-
* @param {boolean} [raw=false] - Whether to return raw results
|
|
305
|
-
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
306
|
-
* @returns {T} - The computed metric result
|
|
307
|
-
*/
|
|
308
133
|
compute(a, b, opt, mode, raw, skip) {
|
|
309
134
|
const resolved = this.resolveOptions(opt);
|
|
310
135
|
this.assert('metric', resolved.metric);
|
|
311
|
-
// Prepare the input
|
|
312
136
|
const A = skip ? a : this.prepare(a, resolved);
|
|
313
137
|
const B = skip ? b : this.prepare(b, resolved);
|
|
314
|
-
|
|
138
|
+
if (
|
|
139
|
+
resolved.safeEmpty &&
|
|
140
|
+
((Array.isArray(A) && A.length === 0) ||
|
|
141
|
+
(Array.isArray(B) && B.length === 0) ||
|
|
142
|
+
A === '' ||
|
|
143
|
+
B === '')
|
|
144
|
+
) {
|
|
145
|
+
return [];
|
|
146
|
+
}
|
|
315
147
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
316
|
-
// Pass the original inputs to the metric
|
|
317
148
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
318
|
-
// Compute the metric result
|
|
319
149
|
metric.run(mode);
|
|
320
|
-
// Post-process the results
|
|
321
150
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
322
|
-
// Resolve and return the result based on the raw flag
|
|
323
151
|
return this.output(result, raw ?? resolved.raw);
|
|
324
152
|
}
|
|
325
|
-
/**
|
|
326
|
-
* Resolves the result format (raw or formatted).
|
|
327
|
-
*
|
|
328
|
-
* @template T - The type of the metric result
|
|
329
|
-
* @param {MetricResult<R>} result - The metric result
|
|
330
|
-
* @param {boolean} [raw] - Whether to return raw results
|
|
331
|
-
* @returns {T} - The resolved result
|
|
332
|
-
*/
|
|
333
153
|
output(result, raw) {
|
|
334
154
|
return (raw ?? this.options.raw)
|
|
335
155
|
? result
|
|
@@ -337,213 +157,69 @@ class CmpStr {
|
|
|
337
157
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
338
158
|
: { source: result.a, target: result.b, match: result.res };
|
|
339
159
|
}
|
|
340
|
-
/**
|
|
341
|
-
* ---------------------------------------------------------------------------------
|
|
342
|
-
* Managing methods for CmpStr
|
|
343
|
-
* ---------------------------------------------------------------------------------
|
|
344
|
-
*
|
|
345
|
-
* These methods provides an interface to set and get properties of the CmpStr
|
|
346
|
-
* instance, such as options, metric, phonetic algorithm, and more.
|
|
347
|
-
*/
|
|
348
|
-
/**
|
|
349
|
-
* Creates a shallow clone of the current instance.
|
|
350
|
-
*
|
|
351
|
-
* @returns {CmpStr<R>} - The cloned instance
|
|
352
|
-
*/
|
|
353
160
|
clone() {
|
|
354
161
|
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
355
162
|
}
|
|
356
|
-
/**
|
|
357
|
-
* Resets the instance, clearing all data and options.
|
|
358
|
-
*
|
|
359
|
-
* @returns {this}
|
|
360
|
-
*/
|
|
361
163
|
reset() {
|
|
362
164
|
for (const k in this.options) delete this.options[k];
|
|
363
165
|
return this;
|
|
364
166
|
}
|
|
365
|
-
/**
|
|
366
|
-
* Sets / replaces the full options object.
|
|
367
|
-
*
|
|
368
|
-
* @param {CmpStrOptions} opt - The options
|
|
369
|
-
* @returns {this}
|
|
370
|
-
*/
|
|
371
167
|
setOptions(opt) {
|
|
372
168
|
this.options = opt;
|
|
373
169
|
return this;
|
|
374
170
|
}
|
|
375
|
-
/**
|
|
376
|
-
* Deep merges and sets new options.
|
|
377
|
-
*
|
|
378
|
-
* @param {CmpStrOptions} opt - The options to merge
|
|
379
|
-
* @returns {this}
|
|
380
|
-
*/
|
|
381
171
|
mergeOptions(opt) {
|
|
382
172
|
merge(this.options, opt);
|
|
383
173
|
return this;
|
|
384
174
|
}
|
|
385
|
-
/**
|
|
386
|
-
* Sets the serialized options from a JSON string.
|
|
387
|
-
*
|
|
388
|
-
* @param {string} opt - The serialized options
|
|
389
|
-
* @returns {this}
|
|
390
|
-
*/
|
|
391
175
|
setSerializedOptions(opt) {
|
|
392
176
|
this.options = JSON.parse(opt);
|
|
393
177
|
return this;
|
|
394
178
|
}
|
|
395
|
-
/**
|
|
396
|
-
* Sets a specific option at the given path.
|
|
397
|
-
*
|
|
398
|
-
* @param {string} path - The path to the option
|
|
399
|
-
* @param {any} value - The value to set
|
|
400
|
-
* @returns {this}
|
|
401
|
-
*/
|
|
402
179
|
setOption(path, value) {
|
|
403
180
|
set(this.options, path, value);
|
|
404
181
|
return this;
|
|
405
182
|
}
|
|
406
|
-
/**
|
|
407
|
-
* Removes an option at the given path.
|
|
408
|
-
*
|
|
409
|
-
* @param {string} path - The path to the option
|
|
410
|
-
* @returns {this}
|
|
411
|
-
*/
|
|
412
183
|
rmvOption(path) {
|
|
413
184
|
rmv(this.options, path);
|
|
414
185
|
return this;
|
|
415
186
|
}
|
|
416
|
-
/**
|
|
417
|
-
* Enable or disable raw output.
|
|
418
|
-
*
|
|
419
|
-
* @param {boolean} enable - Whether to enable or disable raw output
|
|
420
|
-
* @returns {this}
|
|
421
|
-
*/
|
|
422
187
|
setRaw(enable) {
|
|
423
188
|
return this.setOption('raw', enable);
|
|
424
189
|
}
|
|
425
|
-
/**
|
|
426
|
-
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
427
|
-
*
|
|
428
|
-
* @param {string} name - The metric name
|
|
429
|
-
* @returns {this}
|
|
430
|
-
*/
|
|
431
190
|
setMetric(name) {
|
|
432
191
|
return this.setOption('metric', name);
|
|
433
192
|
}
|
|
434
|
-
/**
|
|
435
|
-
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
436
|
-
*
|
|
437
|
-
* @param {NormalizeFlags} flags - The normalization flags
|
|
438
|
-
* @returns {this}
|
|
439
|
-
*/
|
|
440
193
|
setFlags(flags) {
|
|
441
194
|
return this.setOption('flags', flags);
|
|
442
195
|
}
|
|
443
|
-
/**
|
|
444
|
-
* Removes the normalization flags entirely.
|
|
445
|
-
*
|
|
446
|
-
* @return {this}
|
|
447
|
-
*/
|
|
448
196
|
rmvFlags() {
|
|
449
197
|
return this.rmvOption('flags');
|
|
450
198
|
}
|
|
451
|
-
/**
|
|
452
|
-
* Sets the pre-processors to use for preparing the input.
|
|
453
|
-
*
|
|
454
|
-
* @param {CmpStrProcessors} opt - The processors to set
|
|
455
|
-
* @returns {this}
|
|
456
|
-
*/
|
|
457
199
|
setProcessors(opt) {
|
|
458
200
|
return this.setOption('processors', opt);
|
|
459
201
|
}
|
|
460
|
-
/**
|
|
461
|
-
* Removes the processors entirely.
|
|
462
|
-
*
|
|
463
|
-
* @returns {this}
|
|
464
|
-
*/
|
|
465
202
|
rmvProcessors() {
|
|
466
203
|
return this.rmvOption('processors');
|
|
467
204
|
}
|
|
468
|
-
/**
|
|
469
|
-
* Returns the current options object.
|
|
470
|
-
*
|
|
471
|
-
* @returns {CmpStrOptions} - The options
|
|
472
|
-
*/
|
|
473
205
|
getOptions() {
|
|
474
206
|
return this.options;
|
|
475
207
|
}
|
|
476
|
-
/**
|
|
477
|
-
* Returns the options as a JSON string.
|
|
478
|
-
*
|
|
479
|
-
* @returns {string} - The serialized options
|
|
480
|
-
*/
|
|
481
208
|
getSerializedOptions() {
|
|
482
209
|
return JSON.stringify(this.options);
|
|
483
210
|
}
|
|
484
|
-
/**
|
|
485
|
-
* Returns a specific option value by path.
|
|
486
|
-
*
|
|
487
|
-
* @param {string} path - The path to the option
|
|
488
|
-
* @returns {any} - The option value
|
|
489
|
-
*/
|
|
490
211
|
getOption(path) {
|
|
491
212
|
return get(this.options, path);
|
|
492
213
|
}
|
|
493
|
-
/**
|
|
494
|
-
* ---------------------------------------------------------------------------------
|
|
495
|
-
* Public core methods for string comparison
|
|
496
|
-
* ---------------------------------------------------------------------------------
|
|
497
|
-
*
|
|
498
|
-
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
499
|
-
* string comparison, phonetic indexing, filtering, and text search.
|
|
500
|
-
*/
|
|
501
|
-
/**
|
|
502
|
-
* Performs a single metric comparison between the source and target.
|
|
503
|
-
*
|
|
504
|
-
* @template T - The type of the metric result
|
|
505
|
-
* @param {string} a - The source string
|
|
506
|
-
* @param {string} b - The target string
|
|
507
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
508
|
-
* @returns {T} - The metric result
|
|
509
|
-
*/
|
|
510
214
|
test(a, b, opt) {
|
|
511
215
|
return this.compute(a, b, opt, 'single');
|
|
512
216
|
}
|
|
513
|
-
/**
|
|
514
|
-
* Performs a single metric comparison and returns only the numeric score.
|
|
515
|
-
*
|
|
516
|
-
* @param {string} a - The source string
|
|
517
|
-
* @param {string} b - The target string
|
|
518
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
519
|
-
* @returns {number} - The similarity score (0..1)
|
|
520
|
-
*/
|
|
521
217
|
compare(a, b, opt) {
|
|
522
218
|
return this.compute(a, b, opt, 'single', true).res;
|
|
523
219
|
}
|
|
524
|
-
/**
|
|
525
|
-
* Performs a batch metric comparison between source and target strings
|
|
526
|
-
* or array of strings.
|
|
527
|
-
*
|
|
528
|
-
* @template T - The type of the metric result
|
|
529
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
530
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
531
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
532
|
-
* @returns {T} - The batch metric results
|
|
533
|
-
*/
|
|
534
220
|
batchTest(a, b, opt) {
|
|
535
221
|
return this.compute(a, b, opt, 'batch');
|
|
536
222
|
}
|
|
537
|
-
/**
|
|
538
|
-
* Performs a batch metric comparison and returns results sorted by score.
|
|
539
|
-
*
|
|
540
|
-
* @template T - The type of the metric result
|
|
541
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
542
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
543
|
-
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
544
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
545
|
-
* @returns {T} - The sorted batch results
|
|
546
|
-
*/
|
|
547
223
|
batchSorted(a, b, dir = 'desc', opt) {
|
|
548
224
|
return this.output(
|
|
549
225
|
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
@@ -552,32 +228,9 @@ class CmpStr {
|
|
|
552
228
|
opt?.raw ?? this.options.raw
|
|
553
229
|
);
|
|
554
230
|
}
|
|
555
|
-
/**
|
|
556
|
-
* Performs a pairwise metric comparison between source and target strings
|
|
557
|
-
* or array of strings.
|
|
558
|
-
*
|
|
559
|
-
* Input arrays needs of the same length to perform pairwise comparison,
|
|
560
|
-
* otherwise the method will throw an error.
|
|
561
|
-
*
|
|
562
|
-
* @template T - The type of the metric result
|
|
563
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
564
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
565
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
566
|
-
* @returns {T} - The pairwise metric results
|
|
567
|
-
*/
|
|
568
231
|
pairs(a, b, opt) {
|
|
569
232
|
return this.compute(a, b, opt, 'pairwise');
|
|
570
233
|
}
|
|
571
|
-
/**
|
|
572
|
-
* Performs a batch comparison and returns only results above the threshold.
|
|
573
|
-
*
|
|
574
|
-
* @template T - The type of the metric result
|
|
575
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
576
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
577
|
-
* @param {number} threshold - The similarity threshold (0..1)
|
|
578
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
579
|
-
* @returns {T} - The filtered batch results
|
|
580
|
-
*/
|
|
581
234
|
match(a, b, threshold, opt) {
|
|
582
235
|
return this.output(
|
|
583
236
|
this.compute(a, b, opt, 'batch', true)
|
|
@@ -586,56 +239,18 @@ class CmpStr {
|
|
|
586
239
|
opt?.raw ?? this.options.raw
|
|
587
240
|
);
|
|
588
241
|
}
|
|
589
|
-
/**
|
|
590
|
-
* Returns the n closest matches from a batch comparison.
|
|
591
|
-
*
|
|
592
|
-
* @template T - The type of the metric result
|
|
593
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
594
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
595
|
-
* @param {number} [n=1] - Number of closest matches
|
|
596
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
597
|
-
* @returns {T} - The closest matches
|
|
598
|
-
*/
|
|
599
242
|
closest(a, b, n = 1, opt) {
|
|
600
243
|
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
601
244
|
}
|
|
602
|
-
/**
|
|
603
|
-
* Returns the n furthest matches from a batch comparison.
|
|
604
|
-
*
|
|
605
|
-
* @template T - The type of the metric result
|
|
606
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
607
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
608
|
-
* @param {number} [n=1] - Number of furthest matches
|
|
609
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
610
|
-
* @returns {T} - The furthest matches
|
|
611
|
-
*/
|
|
612
245
|
furthest(a, b, n = 1, opt) {
|
|
613
246
|
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
614
247
|
}
|
|
615
|
-
/**
|
|
616
|
-
* Performs a normalized and filtered substring search.
|
|
617
|
-
*
|
|
618
|
-
* @param {string} needle - The search string
|
|
619
|
-
* @param {string[]} haystack - The array to search in
|
|
620
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
621
|
-
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
622
|
-
* @returns {string[]} - Array of matching entries
|
|
623
|
-
*/
|
|
624
248
|
search(needle, haystack, flags, processors) {
|
|
625
249
|
const resolved = this.resolveOptions({ flags, processors });
|
|
626
|
-
// Prepare the needle and haystack, normalizing and filtering them
|
|
627
250
|
const test = this.prepare(needle, resolved);
|
|
628
251
|
const hstk = this.prepare(haystack, resolved);
|
|
629
|
-
// Filter the haystack based on the normalized test string
|
|
630
252
|
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
631
253
|
}
|
|
632
|
-
/**
|
|
633
|
-
* Computes a similarity matrix for the given input array.
|
|
634
|
-
*
|
|
635
|
-
* @param {string[]} input - The input array
|
|
636
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
637
|
-
* @returns {number[][]} - The similarity matrix
|
|
638
|
-
*/
|
|
639
254
|
matrix(input, opt) {
|
|
640
255
|
input = this.prepare(input, this.resolveOptions(opt));
|
|
641
256
|
return input.map((a) =>
|
|
@@ -644,15 +259,6 @@ class CmpStr {
|
|
|
644
259
|
)
|
|
645
260
|
);
|
|
646
261
|
}
|
|
647
|
-
/**
|
|
648
|
-
* Computes the phonetic index for a string using the configured
|
|
649
|
-
* or given algorithm.
|
|
650
|
-
*
|
|
651
|
-
* @param {string} [input] - The input string
|
|
652
|
-
* @param {string} [algo] - The phonetic algorithm to use
|
|
653
|
-
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
654
|
-
* @returns {string} - The phonetic index as a string
|
|
655
|
-
*/
|
|
656
262
|
phoneticIndex(input, algo, opt) {
|
|
657
263
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
658
264
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|