cmpstr 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2228 -4930
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +2348 -5026
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +10 -404
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +10 -220
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +2 -262
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +37 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +2 -90
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +10 -404
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +10 -220
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +2 -262
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +29 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +1 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +2 -90
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +38 -0
- package/dist/types/utils/Types.d.ts +1 -0
- package/package.json +15 -9
package/dist/cjs/CmpStr.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var DeepMerge = require('./utils/DeepMerge.cjs');
|
|
@@ -26,49 +26,8 @@ require('./phonetic/Metaphone.cjs');
|
|
|
26
26
|
require('./phonetic/Soundex.cjs');
|
|
27
27
|
var Phonetic = require('./phonetic/Phonetic.cjs');
|
|
28
28
|
|
|
29
|
-
/**
|
|
30
|
-
* CmpStr Main API
|
|
31
|
-
* src/CmpStr.ts
|
|
32
|
-
*
|
|
33
|
-
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
34
|
-
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
35
|
-
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
36
|
-
* consistent, user-friendly API for both single and batch operations.
|
|
37
|
-
*
|
|
38
|
-
* Features:
|
|
39
|
-
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
40
|
-
* - Flexible normalization and filtering pipeline for all inputs
|
|
41
|
-
* - Batch, pairwise, and single string comparison with detailed results
|
|
42
|
-
* - Phonetic indexing and phonetic-aware search and comparison
|
|
43
|
-
* - Text analysis and unified diff utilities
|
|
44
|
-
* - Full TypeScript type safety and extensibility
|
|
45
|
-
*
|
|
46
|
-
* @module CmpStr
|
|
47
|
-
* @author Paul Köhler (komed3)
|
|
48
|
-
* @license MIT
|
|
49
|
-
*/
|
|
50
|
-
// Import the Profiler instance for global profiling
|
|
51
29
|
const profiler = Profiler.Profiler.getInstance();
|
|
52
|
-
/**
|
|
53
|
-
* The main CmpStr class that provides a unified interface for string comparison,
|
|
54
|
-
* phonetic indexing, filtering, and text analysis.
|
|
55
|
-
*
|
|
56
|
-
* @template R - The type of the metric result, defaults to MetricRaw
|
|
57
|
-
*/
|
|
58
30
|
class CmpStr {
|
|
59
|
-
/**
|
|
60
|
-
* --------------------------------------------------------------------------------
|
|
61
|
-
* Static methods and properties for global access to CmpStr features
|
|
62
|
-
* --------------------------------------------------------------------------------
|
|
63
|
-
*
|
|
64
|
-
* These static methods provide a convenient way to access the core features of
|
|
65
|
-
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
66
|
-
*/
|
|
67
|
-
/**
|
|
68
|
-
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
69
|
-
*
|
|
70
|
-
* @see Filter
|
|
71
|
-
*/
|
|
72
31
|
static filter = {
|
|
73
32
|
add: Filter.Filter.add,
|
|
74
33
|
remove: Filter.Filter.remove,
|
|
@@ -77,22 +36,12 @@ class CmpStr {
|
|
|
77
36
|
list: Filter.Filter.list,
|
|
78
37
|
clear: Filter.Filter.clear
|
|
79
38
|
};
|
|
80
|
-
/**
|
|
81
|
-
* Adds, removes, checks, or lists available metrics.
|
|
82
|
-
*
|
|
83
|
-
* @see MetricRegistry
|
|
84
|
-
*/
|
|
85
39
|
static metric = {
|
|
86
40
|
add: Metric.MetricRegistry.add,
|
|
87
41
|
remove: Metric.MetricRegistry.remove,
|
|
88
42
|
has: Metric.MetricRegistry.has,
|
|
89
43
|
list: Metric.MetricRegistry.list
|
|
90
44
|
};
|
|
91
|
-
/**
|
|
92
|
-
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
93
|
-
*
|
|
94
|
-
* @see PhoneticRegistry
|
|
95
|
-
*/
|
|
96
45
|
static phonetic = {
|
|
97
46
|
add: Phonetic.PhoneticRegistry.add,
|
|
98
47
|
remove: Phonetic.PhoneticRegistry.remove,
|
|
@@ -105,89 +54,30 @@ class CmpStr {
|
|
|
105
54
|
list: Phonetic.PhoneticMappingRegistry.list
|
|
106
55
|
}
|
|
107
56
|
};
|
|
108
|
-
/**
|
|
109
|
-
* Provides access to the global profiler services.
|
|
110
|
-
*
|
|
111
|
-
* @see Profiler
|
|
112
|
-
*/
|
|
113
57
|
static profiler = profiler.services;
|
|
114
|
-
/**
|
|
115
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
116
|
-
*/
|
|
117
58
|
static clearCache = {
|
|
118
59
|
normalizer: Normalizer.Normalizer.clear,
|
|
119
60
|
metric: Metric.Metric.clear,
|
|
120
61
|
phonetic: Phonetic.Phonetic.clear
|
|
121
62
|
};
|
|
122
|
-
/**
|
|
123
|
-
* Returns a TextAnalyzer instance for the given input string.
|
|
124
|
-
*
|
|
125
|
-
* @param {string} [input] - The input string
|
|
126
|
-
* @returns {TextAnalyzer} - The text analyzer
|
|
127
|
-
*/
|
|
128
63
|
static analyze(input) {
|
|
129
64
|
return new TextAnalyzer.TextAnalyzer(input);
|
|
130
65
|
}
|
|
131
|
-
/**
|
|
132
|
-
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
133
|
-
*
|
|
134
|
-
* @param {string} a - The first (original) text
|
|
135
|
-
* @param {string} b - The second (modified) text
|
|
136
|
-
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
137
|
-
* @returns {DiffChecker} - The diff checker instance
|
|
138
|
-
*/
|
|
139
66
|
static diff(a, b, opt) {
|
|
140
67
|
return new DiffChecker.DiffChecker(a, b, opt);
|
|
141
68
|
}
|
|
142
|
-
/**
|
|
143
|
-
* --------------------------------------------------------------------------------
|
|
144
|
-
* Instanciate the CmpStr class
|
|
145
|
-
* --------------------------------------------------------------------------------
|
|
146
|
-
*
|
|
147
|
-
* Methods to create a new CmpStr instance with the given options.
|
|
148
|
-
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
149
|
-
*/
|
|
150
|
-
/**
|
|
151
|
-
* Creates a new CmpStr instance with the given options.
|
|
152
|
-
*
|
|
153
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
154
|
-
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
155
|
-
*/
|
|
156
69
|
static create(opt) {
|
|
157
70
|
return new CmpStr(opt);
|
|
158
71
|
}
|
|
159
|
-
// The options object that holds the configuration for this CmpStr instance
|
|
160
72
|
options = Object.create(null);
|
|
161
|
-
/**
|
|
162
|
-
* Creates a new CmpStr instance with the given options.
|
|
163
|
-
* The constructor is protected to enforce the use of the static `create` method.
|
|
164
|
-
*
|
|
165
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
166
|
-
*/
|
|
167
73
|
constructor(opt) {
|
|
168
74
|
if (opt)
|
|
169
75
|
typeof opt === 'string'
|
|
170
76
|
? this.setSerializedOptions(opt)
|
|
171
77
|
: this.setOptions(opt);
|
|
172
78
|
}
|
|
173
|
-
/**
|
|
174
|
-
* ---------------------------------------------------------------------------------
|
|
175
|
-
* Protected utility methods for internal use
|
|
176
|
-
* ---------------------------------------------------------------------------------
|
|
177
|
-
*
|
|
178
|
-
* These methods provide utility functions for converting inputs, merging options,
|
|
179
|
-
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
180
|
-
*/
|
|
181
|
-
/**
|
|
182
|
-
* Assert a condition and throws if the condition is not met.
|
|
183
|
-
*
|
|
184
|
-
* @param {string} cond - The condition to met
|
|
185
|
-
* @param {any} [test] - Value to test for
|
|
186
|
-
* @throws {Error} If the condition is not met
|
|
187
|
-
*/
|
|
188
79
|
assert(cond, test) {
|
|
189
80
|
switch (cond) {
|
|
190
|
-
// Check if the metric exists
|
|
191
81
|
case 'metric':
|
|
192
82
|
if (!CmpStr.metric.has(test))
|
|
193
83
|
throw new Error(
|
|
@@ -195,7 +85,6 @@ class CmpStr {
|
|
|
195
85
|
`use CmpStr.metric.list() for available metrics`
|
|
196
86
|
);
|
|
197
87
|
break;
|
|
198
|
-
// Check if the phonetic algorithm exists
|
|
199
88
|
case 'phonetic':
|
|
200
89
|
if (!CmpStr.phonetic.has(test))
|
|
201
90
|
throw new Error(
|
|
@@ -203,88 +92,37 @@ class CmpStr {
|
|
|
203
92
|
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
204
93
|
);
|
|
205
94
|
break;
|
|
206
|
-
// Throw an error for unknown conditions
|
|
207
95
|
default:
|
|
208
96
|
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
209
97
|
}
|
|
210
98
|
}
|
|
211
|
-
/**
|
|
212
|
-
* Assert multiple conditions.
|
|
213
|
-
*
|
|
214
|
-
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
215
|
-
*/
|
|
216
99
|
assertMany(...cond) {
|
|
217
100
|
for (const [c, test] of cond) this.assert(c, test);
|
|
218
101
|
}
|
|
219
|
-
/**
|
|
220
|
-
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
221
|
-
* the existing options.
|
|
222
|
-
*
|
|
223
|
-
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
224
|
-
* @returns {CmpStrOptions} - The resolved options
|
|
225
|
-
*/
|
|
226
102
|
resolveOptions(opt) {
|
|
227
103
|
return DeepMerge.merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
228
104
|
}
|
|
229
|
-
/**
|
|
230
|
-
* Normalizes the input string or array using the configured or provided flags.
|
|
231
|
-
*
|
|
232
|
-
* @param {MetricInput} input - The input string or array
|
|
233
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
234
|
-
* @returns {MetricInput} - The normalized input
|
|
235
|
-
*/
|
|
236
105
|
normalize(input, flags) {
|
|
237
106
|
return Normalizer.Normalizer.normalize(
|
|
238
107
|
input,
|
|
239
108
|
flags ?? this.options.flags ?? ''
|
|
240
109
|
);
|
|
241
110
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Applies all active filters to the input string or array.
|
|
244
|
-
*
|
|
245
|
-
* @param {MetricInput} input - The input string or array
|
|
246
|
-
* @param {string} [hook='input'] - The filter hook
|
|
247
|
-
* @returns {MetricInput} - The filtered string(s)
|
|
248
|
-
*/
|
|
249
111
|
filter(input, hook) {
|
|
250
112
|
return Filter.Filter.apply(hook, input);
|
|
251
113
|
}
|
|
252
|
-
/**
|
|
253
|
-
* Prepares the input by normalizing and filtering.
|
|
254
|
-
*
|
|
255
|
-
* @param {MetricInput} [input] - The input string or array
|
|
256
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
257
|
-
* @returns {MetricInput} - The prepared input
|
|
258
|
-
*/
|
|
259
114
|
prepare(input, opt) {
|
|
260
115
|
const { flags, processors } = opt ?? this.options;
|
|
261
|
-
// Normalize the input using flags (i.e., 'itw')
|
|
262
116
|
if (flags?.length) input = this.normalize(input, flags);
|
|
263
|
-
// Filter the input using hooked up filters
|
|
264
117
|
input = this.filter(input, 'input');
|
|
265
|
-
// Apply phonetic processors if configured
|
|
266
118
|
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
267
119
|
return input;
|
|
268
120
|
}
|
|
269
|
-
/**
|
|
270
|
-
* Post-process the results of the metric computation.
|
|
271
|
-
*
|
|
272
|
-
* @param {MetricResult<R>} result - The metric result
|
|
273
|
-
* @returns {MetricResult<R>} - The post-processed results
|
|
274
|
-
*/
|
|
275
121
|
postProcess(result, opt) {
|
|
276
|
-
// Remove "zero similarity" from batch results if configured
|
|
277
122
|
if (opt?.removeZero && Array.isArray(result))
|
|
278
123
|
result = result.filter((r) => r.res > 0);
|
|
279
124
|
return result;
|
|
280
125
|
}
|
|
281
|
-
/**
|
|
282
|
-
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
283
|
-
*
|
|
284
|
-
* @param {MetricInput} input - The input string or array
|
|
285
|
-
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
286
|
-
* @returns {MetricInput} - The phonetic index for the given input
|
|
287
|
-
*/
|
|
288
126
|
index(input, { algo, opt }) {
|
|
289
127
|
this.assert('phonetic', algo);
|
|
290
128
|
const phonetic = Registry.factory.phonetic(algo, opt);
|
|
@@ -293,44 +131,26 @@ class CmpStr {
|
|
|
293
131
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
294
132
|
: phonetic.getIndex(input).join(delimiter);
|
|
295
133
|
}
|
|
296
|
-
/**
|
|
297
|
-
* Computes the metric result for the given inputs, applying normalization and
|
|
298
|
-
* filtering as configured.
|
|
299
|
-
*
|
|
300
|
-
* @template T - The type of the metric result
|
|
301
|
-
* @param {MetricInput} a - The first input string or array
|
|
302
|
-
* @param {MetricInput} b - The second input string or array
|
|
303
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
304
|
-
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
305
|
-
* @param {boolean} [raw=false] - Whether to return raw results
|
|
306
|
-
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
307
|
-
* @returns {T} - The computed metric result
|
|
308
|
-
*/
|
|
309
134
|
compute(a, b, opt, mode, raw, skip) {
|
|
310
135
|
const resolved = this.resolveOptions(opt);
|
|
311
136
|
this.assert('metric', resolved.metric);
|
|
312
|
-
// Prepare the input
|
|
313
137
|
const A = skip ? a : this.prepare(a, resolved);
|
|
314
138
|
const B = skip ? b : this.prepare(b, resolved);
|
|
315
|
-
|
|
139
|
+
if (
|
|
140
|
+
resolved.safeEmpty &&
|
|
141
|
+
((Array.isArray(A) && A.length === 0) ||
|
|
142
|
+
(Array.isArray(B) && B.length === 0) ||
|
|
143
|
+
A === '' ||
|
|
144
|
+
B === '')
|
|
145
|
+
) {
|
|
146
|
+
return [];
|
|
147
|
+
}
|
|
316
148
|
const metric = Registry.factory.metric(resolved.metric, A, B, resolved.opt);
|
|
317
|
-
// Pass the original inputs to the metric
|
|
318
149
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
319
|
-
// Compute the metric result
|
|
320
150
|
metric.run(mode);
|
|
321
|
-
// Post-process the results
|
|
322
151
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
323
|
-
// Resolve and return the result based on the raw flag
|
|
324
152
|
return this.output(result, raw ?? resolved.raw);
|
|
325
153
|
}
|
|
326
|
-
/**
|
|
327
|
-
* Resolves the result format (raw or formatted).
|
|
328
|
-
*
|
|
329
|
-
* @template T - The type of the metric result
|
|
330
|
-
* @param {MetricResult<R>} result - The metric result
|
|
331
|
-
* @param {boolean} [raw] - Whether to return raw results
|
|
332
|
-
* @returns {T} - The resolved result
|
|
333
|
-
*/
|
|
334
154
|
output(result, raw) {
|
|
335
155
|
return (raw ?? this.options.raw)
|
|
336
156
|
? result
|
|
@@ -338,213 +158,69 @@ class CmpStr {
|
|
|
338
158
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
339
159
|
: { source: result.a, target: result.b, match: result.res };
|
|
340
160
|
}
|
|
341
|
-
/**
|
|
342
|
-
* ---------------------------------------------------------------------------------
|
|
343
|
-
* Managing methods for CmpStr
|
|
344
|
-
* ---------------------------------------------------------------------------------
|
|
345
|
-
*
|
|
346
|
-
* These methods provides an interface to set and get properties of the CmpStr
|
|
347
|
-
* instance, such as options, metric, phonetic algorithm, and more.
|
|
348
|
-
*/
|
|
349
|
-
/**
|
|
350
|
-
* Creates a shallow clone of the current instance.
|
|
351
|
-
*
|
|
352
|
-
* @returns {CmpStr<R>} - The cloned instance
|
|
353
|
-
*/
|
|
354
161
|
clone() {
|
|
355
162
|
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
356
163
|
}
|
|
357
|
-
/**
|
|
358
|
-
* Resets the instance, clearing all data and options.
|
|
359
|
-
*
|
|
360
|
-
* @returns {this}
|
|
361
|
-
*/
|
|
362
164
|
reset() {
|
|
363
165
|
for (const k in this.options) delete this.options[k];
|
|
364
166
|
return this;
|
|
365
167
|
}
|
|
366
|
-
/**
|
|
367
|
-
* Sets / replaces the full options object.
|
|
368
|
-
*
|
|
369
|
-
* @param {CmpStrOptions} opt - The options
|
|
370
|
-
* @returns {this}
|
|
371
|
-
*/
|
|
372
168
|
setOptions(opt) {
|
|
373
169
|
this.options = opt;
|
|
374
170
|
return this;
|
|
375
171
|
}
|
|
376
|
-
/**
|
|
377
|
-
* Deep merges and sets new options.
|
|
378
|
-
*
|
|
379
|
-
* @param {CmpStrOptions} opt - The options to merge
|
|
380
|
-
* @returns {this}
|
|
381
|
-
*/
|
|
382
172
|
mergeOptions(opt) {
|
|
383
173
|
DeepMerge.merge(this.options, opt);
|
|
384
174
|
return this;
|
|
385
175
|
}
|
|
386
|
-
/**
|
|
387
|
-
* Sets the serialized options from a JSON string.
|
|
388
|
-
*
|
|
389
|
-
* @param {string} opt - The serialized options
|
|
390
|
-
* @returns {this}
|
|
391
|
-
*/
|
|
392
176
|
setSerializedOptions(opt) {
|
|
393
177
|
this.options = JSON.parse(opt);
|
|
394
178
|
return this;
|
|
395
179
|
}
|
|
396
|
-
/**
|
|
397
|
-
* Sets a specific option at the given path.
|
|
398
|
-
*
|
|
399
|
-
* @param {string} path - The path to the option
|
|
400
|
-
* @param {any} value - The value to set
|
|
401
|
-
* @returns {this}
|
|
402
|
-
*/
|
|
403
180
|
setOption(path, value) {
|
|
404
181
|
DeepMerge.set(this.options, path, value);
|
|
405
182
|
return this;
|
|
406
183
|
}
|
|
407
|
-
/**
|
|
408
|
-
* Removes an option at the given path.
|
|
409
|
-
*
|
|
410
|
-
* @param {string} path - The path to the option
|
|
411
|
-
* @returns {this}
|
|
412
|
-
*/
|
|
413
184
|
rmvOption(path) {
|
|
414
185
|
DeepMerge.rmv(this.options, path);
|
|
415
186
|
return this;
|
|
416
187
|
}
|
|
417
|
-
/**
|
|
418
|
-
* Enable or disable raw output.
|
|
419
|
-
*
|
|
420
|
-
* @param {boolean} enable - Whether to enable or disable raw output
|
|
421
|
-
* @returns {this}
|
|
422
|
-
*/
|
|
423
188
|
setRaw(enable) {
|
|
424
189
|
return this.setOption('raw', enable);
|
|
425
190
|
}
|
|
426
|
-
/**
|
|
427
|
-
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
428
|
-
*
|
|
429
|
-
* @param {string} name - The metric name
|
|
430
|
-
* @returns {this}
|
|
431
|
-
*/
|
|
432
191
|
setMetric(name) {
|
|
433
192
|
return this.setOption('metric', name);
|
|
434
193
|
}
|
|
435
|
-
/**
|
|
436
|
-
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
437
|
-
*
|
|
438
|
-
* @param {NormalizeFlags} flags - The normalization flags
|
|
439
|
-
* @returns {this}
|
|
440
|
-
*/
|
|
441
194
|
setFlags(flags) {
|
|
442
195
|
return this.setOption('flags', flags);
|
|
443
196
|
}
|
|
444
|
-
/**
|
|
445
|
-
* Removes the normalization flags entirely.
|
|
446
|
-
*
|
|
447
|
-
* @return {this}
|
|
448
|
-
*/
|
|
449
197
|
rmvFlags() {
|
|
450
198
|
return this.rmvOption('flags');
|
|
451
199
|
}
|
|
452
|
-
/**
|
|
453
|
-
* Sets the pre-processors to use for preparing the input.
|
|
454
|
-
*
|
|
455
|
-
* @param {CmpStrProcessors} opt - The processors to set
|
|
456
|
-
* @returns {this}
|
|
457
|
-
*/
|
|
458
200
|
setProcessors(opt) {
|
|
459
201
|
return this.setOption('processors', opt);
|
|
460
202
|
}
|
|
461
|
-
/**
|
|
462
|
-
* Removes the processors entirely.
|
|
463
|
-
*
|
|
464
|
-
* @returns {this}
|
|
465
|
-
*/
|
|
466
203
|
rmvProcessors() {
|
|
467
204
|
return this.rmvOption('processors');
|
|
468
205
|
}
|
|
469
|
-
/**
|
|
470
|
-
* Returns the current options object.
|
|
471
|
-
*
|
|
472
|
-
* @returns {CmpStrOptions} - The options
|
|
473
|
-
*/
|
|
474
206
|
getOptions() {
|
|
475
207
|
return this.options;
|
|
476
208
|
}
|
|
477
|
-
/**
|
|
478
|
-
* Returns the options as a JSON string.
|
|
479
|
-
*
|
|
480
|
-
* @returns {string} - The serialized options
|
|
481
|
-
*/
|
|
482
209
|
getSerializedOptions() {
|
|
483
210
|
return JSON.stringify(this.options);
|
|
484
211
|
}
|
|
485
|
-
/**
|
|
486
|
-
* Returns a specific option value by path.
|
|
487
|
-
*
|
|
488
|
-
* @param {string} path - The path to the option
|
|
489
|
-
* @returns {any} - The option value
|
|
490
|
-
*/
|
|
491
212
|
getOption(path) {
|
|
492
213
|
return DeepMerge.get(this.options, path);
|
|
493
214
|
}
|
|
494
|
-
/**
|
|
495
|
-
* ---------------------------------------------------------------------------------
|
|
496
|
-
* Public core methods for string comparison
|
|
497
|
-
* ---------------------------------------------------------------------------------
|
|
498
|
-
*
|
|
499
|
-
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
500
|
-
* string comparison, phonetic indexing, filtering, and text search.
|
|
501
|
-
*/
|
|
502
|
-
/**
|
|
503
|
-
* Performs a single metric comparison between the source and target.
|
|
504
|
-
*
|
|
505
|
-
* @template T - The type of the metric result
|
|
506
|
-
* @param {string} a - The source string
|
|
507
|
-
* @param {string} b - The target string
|
|
508
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
509
|
-
* @returns {T} - The metric result
|
|
510
|
-
*/
|
|
511
215
|
test(a, b, opt) {
|
|
512
216
|
return this.compute(a, b, opt, 'single');
|
|
513
217
|
}
|
|
514
|
-
/**
|
|
515
|
-
* Performs a single metric comparison and returns only the numeric score.
|
|
516
|
-
*
|
|
517
|
-
* @param {string} a - The source string
|
|
518
|
-
* @param {string} b - The target string
|
|
519
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
520
|
-
* @returns {number} - The similarity score (0..1)
|
|
521
|
-
*/
|
|
522
218
|
compare(a, b, opt) {
|
|
523
219
|
return this.compute(a, b, opt, 'single', true).res;
|
|
524
220
|
}
|
|
525
|
-
/**
|
|
526
|
-
* Performs a batch metric comparison between source and target strings
|
|
527
|
-
* or array of strings.
|
|
528
|
-
*
|
|
529
|
-
* @template T - The type of the metric result
|
|
530
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
531
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
532
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
533
|
-
* @returns {T} - The batch metric results
|
|
534
|
-
*/
|
|
535
221
|
batchTest(a, b, opt) {
|
|
536
222
|
return this.compute(a, b, opt, 'batch');
|
|
537
223
|
}
|
|
538
|
-
/**
|
|
539
|
-
* Performs a batch metric comparison and returns results sorted by score.
|
|
540
|
-
*
|
|
541
|
-
* @template T - The type of the metric result
|
|
542
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
543
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
544
|
-
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
545
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
546
|
-
* @returns {T} - The sorted batch results
|
|
547
|
-
*/
|
|
548
224
|
batchSorted(a, b, dir = 'desc', opt) {
|
|
549
225
|
return this.output(
|
|
550
226
|
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
@@ -553,32 +229,9 @@ class CmpStr {
|
|
|
553
229
|
opt?.raw ?? this.options.raw
|
|
554
230
|
);
|
|
555
231
|
}
|
|
556
|
-
/**
|
|
557
|
-
* Performs a pairwise metric comparison between source and target strings
|
|
558
|
-
* or array of strings.
|
|
559
|
-
*
|
|
560
|
-
* Input arrays needs of the same length to perform pairwise comparison,
|
|
561
|
-
* otherwise the method will throw an error.
|
|
562
|
-
*
|
|
563
|
-
* @template T - The type of the metric result
|
|
564
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
565
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
566
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
567
|
-
* @returns {T} - The pairwise metric results
|
|
568
|
-
*/
|
|
569
232
|
pairs(a, b, opt) {
|
|
570
233
|
return this.compute(a, b, opt, 'pairwise');
|
|
571
234
|
}
|
|
572
|
-
/**
|
|
573
|
-
* Performs a batch comparison and returns only results above the threshold.
|
|
574
|
-
*
|
|
575
|
-
* @template T - The type of the metric result
|
|
576
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
577
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
578
|
-
* @param {number} threshold - The similarity threshold (0..1)
|
|
579
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
580
|
-
* @returns {T} - The filtered batch results
|
|
581
|
-
*/
|
|
582
235
|
match(a, b, threshold, opt) {
|
|
583
236
|
return this.output(
|
|
584
237
|
this.compute(a, b, opt, 'batch', true)
|
|
@@ -587,56 +240,18 @@ class CmpStr {
|
|
|
587
240
|
opt?.raw ?? this.options.raw
|
|
588
241
|
);
|
|
589
242
|
}
|
|
590
|
-
/**
|
|
591
|
-
* Returns the n closest matches from a batch comparison.
|
|
592
|
-
*
|
|
593
|
-
* @template T - The type of the metric result
|
|
594
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
595
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
596
|
-
* @param {number} [n=1] - Number of closest matches
|
|
597
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
598
|
-
* @returns {T} - The closest matches
|
|
599
|
-
*/
|
|
600
243
|
closest(a, b, n = 1, opt) {
|
|
601
244
|
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
602
245
|
}
|
|
603
|
-
/**
|
|
604
|
-
* Returns the n furthest matches from a batch comparison.
|
|
605
|
-
*
|
|
606
|
-
* @template T - The type of the metric result
|
|
607
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
608
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
609
|
-
* @param {number} [n=1] - Number of furthest matches
|
|
610
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
611
|
-
* @returns {T} - The furthest matches
|
|
612
|
-
*/
|
|
613
246
|
furthest(a, b, n = 1, opt) {
|
|
614
247
|
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
615
248
|
}
|
|
616
|
-
/**
|
|
617
|
-
* Performs a normalized and filtered substring search.
|
|
618
|
-
*
|
|
619
|
-
* @param {string} needle - The search string
|
|
620
|
-
* @param {string[]} haystack - The array to search in
|
|
621
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
622
|
-
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
623
|
-
* @returns {string[]} - Array of matching entries
|
|
624
|
-
*/
|
|
625
249
|
search(needle, haystack, flags, processors) {
|
|
626
250
|
const resolved = this.resolveOptions({ flags, processors });
|
|
627
|
-
// Prepare the needle and haystack, normalizing and filtering them
|
|
628
251
|
const test = this.prepare(needle, resolved);
|
|
629
252
|
const hstk = this.prepare(haystack, resolved);
|
|
630
|
-
// Filter the haystack based on the normalized test string
|
|
631
253
|
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
632
254
|
}
|
|
633
|
-
/**
|
|
634
|
-
* Computes a similarity matrix for the given input array.
|
|
635
|
-
*
|
|
636
|
-
* @param {string[]} input - The input array
|
|
637
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
638
|
-
* @returns {number[][]} - The similarity matrix
|
|
639
|
-
*/
|
|
640
255
|
matrix(input, opt) {
|
|
641
256
|
input = this.prepare(input, this.resolveOptions(opt));
|
|
642
257
|
return input.map((a) =>
|
|
@@ -645,15 +260,6 @@ class CmpStr {
|
|
|
645
260
|
)
|
|
646
261
|
);
|
|
647
262
|
}
|
|
648
|
-
/**
|
|
649
|
-
* Computes the phonetic index for a string using the configured
|
|
650
|
-
* or given algorithm.
|
|
651
|
-
*
|
|
652
|
-
* @param {string} [input] - The input string
|
|
653
|
-
* @param {string} [algo] - The phonetic algorithm to use
|
|
654
|
-
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
655
|
-
* @returns {string} - The phonetic index as a string
|
|
656
|
-
*/
|
|
657
263
|
phoneticIndex(input, algo, opt) {
|
|
658
264
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
659
265
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|