cmpstr 3.0.3 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2228 -4944
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +1 -1
- package/dist/CmpStr.umd.js +2348 -5040
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +1 -1
- package/dist/cjs/CmpStr.cjs +1 -405
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +1 -221
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +1 -261
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +37 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +1 -89
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +1 -405
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +1 -221
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +1 -261
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +29 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +1 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +1 -89
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/root.d.ts +38 -0
- package/package.json +14 -8
package/dist/CmpStr.umd.min.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.0.
|
|
2
|
+
* CmpStr v3.0.4 build-74e65a5-250915
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2025 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
package/dist/cjs/CmpStr.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var DeepMerge = require('./utils/DeepMerge.cjs');
|
|
@@ -26,49 +26,8 @@ require('./phonetic/Metaphone.cjs');
|
|
|
26
26
|
require('./phonetic/Soundex.cjs');
|
|
27
27
|
var Phonetic = require('./phonetic/Phonetic.cjs');
|
|
28
28
|
|
|
29
|
-
/**
|
|
30
|
-
* CmpStr Main API
|
|
31
|
-
* src/CmpStr.ts
|
|
32
|
-
*
|
|
33
|
-
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
34
|
-
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
35
|
-
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
36
|
-
* consistent, user-friendly API for both single and batch operations.
|
|
37
|
-
*
|
|
38
|
-
* Features:
|
|
39
|
-
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
40
|
-
* - Flexible normalization and filtering pipeline for all inputs
|
|
41
|
-
* - Batch, pairwise, and single string comparison with detailed results
|
|
42
|
-
* - Phonetic indexing and phonetic-aware search and comparison
|
|
43
|
-
* - Text analysis and unified diff utilities
|
|
44
|
-
* - Full TypeScript type safety and extensibility
|
|
45
|
-
*
|
|
46
|
-
* @module CmpStr
|
|
47
|
-
* @author Paul Köhler (komed3)
|
|
48
|
-
* @license MIT
|
|
49
|
-
*/
|
|
50
|
-
// Import the Profiler instance for global profiling
|
|
51
29
|
const profiler = Profiler.Profiler.getInstance();
|
|
52
|
-
/**
|
|
53
|
-
* The main CmpStr class that provides a unified interface for string comparison,
|
|
54
|
-
* phonetic indexing, filtering, and text analysis.
|
|
55
|
-
*
|
|
56
|
-
* @template R - The type of the metric result, defaults to MetricRaw
|
|
57
|
-
*/
|
|
58
30
|
class CmpStr {
|
|
59
|
-
/**
|
|
60
|
-
* --------------------------------------------------------------------------------
|
|
61
|
-
* Static methods and properties for global access to CmpStr features
|
|
62
|
-
* --------------------------------------------------------------------------------
|
|
63
|
-
*
|
|
64
|
-
* These static methods provide a convenient way to access the core features of
|
|
65
|
-
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
66
|
-
*/
|
|
67
|
-
/**
|
|
68
|
-
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
69
|
-
*
|
|
70
|
-
* @see Filter
|
|
71
|
-
*/
|
|
72
31
|
static filter = {
|
|
73
32
|
add: Filter.Filter.add,
|
|
74
33
|
remove: Filter.Filter.remove,
|
|
@@ -77,22 +36,12 @@ class CmpStr {
|
|
|
77
36
|
list: Filter.Filter.list,
|
|
78
37
|
clear: Filter.Filter.clear
|
|
79
38
|
};
|
|
80
|
-
/**
|
|
81
|
-
* Adds, removes, checks, or lists available metrics.
|
|
82
|
-
*
|
|
83
|
-
* @see MetricRegistry
|
|
84
|
-
*/
|
|
85
39
|
static metric = {
|
|
86
40
|
add: Metric.MetricRegistry.add,
|
|
87
41
|
remove: Metric.MetricRegistry.remove,
|
|
88
42
|
has: Metric.MetricRegistry.has,
|
|
89
43
|
list: Metric.MetricRegistry.list
|
|
90
44
|
};
|
|
91
|
-
/**
|
|
92
|
-
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
93
|
-
*
|
|
94
|
-
* @see PhoneticRegistry
|
|
95
|
-
*/
|
|
96
45
|
static phonetic = {
|
|
97
46
|
add: Phonetic.PhoneticRegistry.add,
|
|
98
47
|
remove: Phonetic.PhoneticRegistry.remove,
|
|
@@ -105,89 +54,30 @@ class CmpStr {
|
|
|
105
54
|
list: Phonetic.PhoneticMappingRegistry.list
|
|
106
55
|
}
|
|
107
56
|
};
|
|
108
|
-
/**
|
|
109
|
-
* Provides access to the global profiler services.
|
|
110
|
-
*
|
|
111
|
-
* @see Profiler
|
|
112
|
-
*/
|
|
113
57
|
static profiler = profiler.services;
|
|
114
|
-
/**
|
|
115
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
116
|
-
*/
|
|
117
58
|
static clearCache = {
|
|
118
59
|
normalizer: Normalizer.Normalizer.clear,
|
|
119
60
|
metric: Metric.Metric.clear,
|
|
120
61
|
phonetic: Phonetic.Phonetic.clear
|
|
121
62
|
};
|
|
122
|
-
/**
|
|
123
|
-
* Returns a TextAnalyzer instance for the given input string.
|
|
124
|
-
*
|
|
125
|
-
* @param {string} [input] - The input string
|
|
126
|
-
* @returns {TextAnalyzer} - The text analyzer
|
|
127
|
-
*/
|
|
128
63
|
static analyze(input) {
|
|
129
64
|
return new TextAnalyzer.TextAnalyzer(input);
|
|
130
65
|
}
|
|
131
|
-
/**
|
|
132
|
-
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
133
|
-
*
|
|
134
|
-
* @param {string} a - The first (original) text
|
|
135
|
-
* @param {string} b - The second (modified) text
|
|
136
|
-
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
137
|
-
* @returns {DiffChecker} - The diff checker instance
|
|
138
|
-
*/
|
|
139
66
|
static diff(a, b, opt) {
|
|
140
67
|
return new DiffChecker.DiffChecker(a, b, opt);
|
|
141
68
|
}
|
|
142
|
-
/**
|
|
143
|
-
* --------------------------------------------------------------------------------
|
|
144
|
-
* Instanciate the CmpStr class
|
|
145
|
-
* --------------------------------------------------------------------------------
|
|
146
|
-
*
|
|
147
|
-
* Methods to create a new CmpStr instance with the given options.
|
|
148
|
-
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
149
|
-
*/
|
|
150
|
-
/**
|
|
151
|
-
* Creates a new CmpStr instance with the given options.
|
|
152
|
-
*
|
|
153
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
154
|
-
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
155
|
-
*/
|
|
156
69
|
static create(opt) {
|
|
157
70
|
return new CmpStr(opt);
|
|
158
71
|
}
|
|
159
|
-
// The options object that holds the configuration for this CmpStr instance
|
|
160
72
|
options = Object.create(null);
|
|
161
|
-
/**
|
|
162
|
-
* Creates a new CmpStr instance with the given options.
|
|
163
|
-
* The constructor is protected to enforce the use of the static `create` method.
|
|
164
|
-
*
|
|
165
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
166
|
-
*/
|
|
167
73
|
constructor(opt) {
|
|
168
74
|
if (opt)
|
|
169
75
|
typeof opt === 'string'
|
|
170
76
|
? this.setSerializedOptions(opt)
|
|
171
77
|
: this.setOptions(opt);
|
|
172
78
|
}
|
|
173
|
-
/**
|
|
174
|
-
* ---------------------------------------------------------------------------------
|
|
175
|
-
* Protected utility methods for internal use
|
|
176
|
-
* ---------------------------------------------------------------------------------
|
|
177
|
-
*
|
|
178
|
-
* These methods provide utility functions for converting inputs, merging options,
|
|
179
|
-
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
180
|
-
*/
|
|
181
|
-
/**
|
|
182
|
-
* Assert a condition and throws if the condition is not met.
|
|
183
|
-
*
|
|
184
|
-
* @param {string} cond - The condition to met
|
|
185
|
-
* @param {any} [test] - Value to test for
|
|
186
|
-
* @throws {Error} If the condition is not met
|
|
187
|
-
*/
|
|
188
79
|
assert(cond, test) {
|
|
189
80
|
switch (cond) {
|
|
190
|
-
// Check if the metric exists
|
|
191
81
|
case 'metric':
|
|
192
82
|
if (!CmpStr.metric.has(test))
|
|
193
83
|
throw new Error(
|
|
@@ -195,7 +85,6 @@ class CmpStr {
|
|
|
195
85
|
`use CmpStr.metric.list() for available metrics`
|
|
196
86
|
);
|
|
197
87
|
break;
|
|
198
|
-
// Check if the phonetic algorithm exists
|
|
199
88
|
case 'phonetic':
|
|
200
89
|
if (!CmpStr.phonetic.has(test))
|
|
201
90
|
throw new Error(
|
|
@@ -203,88 +92,37 @@ class CmpStr {
|
|
|
203
92
|
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
204
93
|
);
|
|
205
94
|
break;
|
|
206
|
-
// Throw an error for unknown conditions
|
|
207
95
|
default:
|
|
208
96
|
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
209
97
|
}
|
|
210
98
|
}
|
|
211
|
-
/**
|
|
212
|
-
* Assert multiple conditions.
|
|
213
|
-
*
|
|
214
|
-
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
215
|
-
*/
|
|
216
99
|
assertMany(...cond) {
|
|
217
100
|
for (const [c, test] of cond) this.assert(c, test);
|
|
218
101
|
}
|
|
219
|
-
/**
|
|
220
|
-
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
221
|
-
* the existing options.
|
|
222
|
-
*
|
|
223
|
-
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
224
|
-
* @returns {CmpStrOptions} - The resolved options
|
|
225
|
-
*/
|
|
226
102
|
resolveOptions(opt) {
|
|
227
103
|
return DeepMerge.merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
228
104
|
}
|
|
229
|
-
/**
|
|
230
|
-
* Normalizes the input string or array using the configured or provided flags.
|
|
231
|
-
*
|
|
232
|
-
* @param {MetricInput} input - The input string or array
|
|
233
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
234
|
-
* @returns {MetricInput} - The normalized input
|
|
235
|
-
*/
|
|
236
105
|
normalize(input, flags) {
|
|
237
106
|
return Normalizer.Normalizer.normalize(
|
|
238
107
|
input,
|
|
239
108
|
flags ?? this.options.flags ?? ''
|
|
240
109
|
);
|
|
241
110
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Applies all active filters to the input string or array.
|
|
244
|
-
*
|
|
245
|
-
* @param {MetricInput} input - The input string or array
|
|
246
|
-
* @param {string} [hook='input'] - The filter hook
|
|
247
|
-
* @returns {MetricInput} - The filtered string(s)
|
|
248
|
-
*/
|
|
249
111
|
filter(input, hook) {
|
|
250
112
|
return Filter.Filter.apply(hook, input);
|
|
251
113
|
}
|
|
252
|
-
/**
|
|
253
|
-
* Prepares the input by normalizing and filtering.
|
|
254
|
-
*
|
|
255
|
-
* @param {MetricInput} [input] - The input string or array
|
|
256
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
257
|
-
* @returns {MetricInput} - The prepared input
|
|
258
|
-
*/
|
|
259
114
|
prepare(input, opt) {
|
|
260
115
|
const { flags, processors } = opt ?? this.options;
|
|
261
|
-
// Normalize the input using flags (i.e., 'itw')
|
|
262
116
|
if (flags?.length) input = this.normalize(input, flags);
|
|
263
|
-
// Filter the input using hooked up filters
|
|
264
117
|
input = this.filter(input, 'input');
|
|
265
|
-
// Apply phonetic processors if configured
|
|
266
118
|
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
267
119
|
return input;
|
|
268
120
|
}
|
|
269
|
-
/**
|
|
270
|
-
* Post-process the results of the metric computation.
|
|
271
|
-
*
|
|
272
|
-
* @param {MetricResult<R>} result - The metric result
|
|
273
|
-
* @returns {MetricResult<R>} - The post-processed results
|
|
274
|
-
*/
|
|
275
121
|
postProcess(result, opt) {
|
|
276
|
-
// Remove "zero similarity" from batch results if configured
|
|
277
122
|
if (opt?.removeZero && Array.isArray(result))
|
|
278
123
|
result = result.filter((r) => r.res > 0);
|
|
279
124
|
return result;
|
|
280
125
|
}
|
|
281
|
-
/**
|
|
282
|
-
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
283
|
-
*
|
|
284
|
-
* @param {MetricInput} input - The input string or array
|
|
285
|
-
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
286
|
-
* @returns {MetricInput} - The phonetic index for the given input
|
|
287
|
-
*/
|
|
288
126
|
index(input, { algo, opt }) {
|
|
289
127
|
this.assert('phonetic', algo);
|
|
290
128
|
const phonetic = Registry.factory.phonetic(algo, opt);
|
|
@@ -293,26 +131,11 @@ class CmpStr {
|
|
|
293
131
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
294
132
|
: phonetic.getIndex(input).join(delimiter);
|
|
295
133
|
}
|
|
296
|
-
/**
|
|
297
|
-
* Computes the metric result for the given inputs, applying normalization and
|
|
298
|
-
* filtering as configured.
|
|
299
|
-
*
|
|
300
|
-
* @template T - The type of the metric result
|
|
301
|
-
* @param {MetricInput} a - The first input string or array
|
|
302
|
-
* @param {MetricInput} b - The second input string or array
|
|
303
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
304
|
-
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
305
|
-
* @param {boolean} [raw=false] - Whether to return raw results
|
|
306
|
-
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
307
|
-
* @returns {T} - The computed metric result
|
|
308
|
-
*/
|
|
309
134
|
compute(a, b, opt, mode, raw, skip) {
|
|
310
135
|
const resolved = this.resolveOptions(opt);
|
|
311
136
|
this.assert('metric', resolved.metric);
|
|
312
|
-
// Prepare the input
|
|
313
137
|
const A = skip ? a : this.prepare(a, resolved);
|
|
314
138
|
const B = skip ? b : this.prepare(b, resolved);
|
|
315
|
-
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
316
139
|
if (
|
|
317
140
|
resolved.safeEmpty &&
|
|
318
141
|
((Array.isArray(A) && A.length === 0) ||
|
|
@@ -322,25 +145,12 @@ class CmpStr {
|
|
|
322
145
|
) {
|
|
323
146
|
return [];
|
|
324
147
|
}
|
|
325
|
-
// Get the metric class
|
|
326
148
|
const metric = Registry.factory.metric(resolved.metric, A, B, resolved.opt);
|
|
327
|
-
// Pass the original inputs to the metric
|
|
328
149
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
329
|
-
// Compute the metric result
|
|
330
150
|
metric.run(mode);
|
|
331
|
-
// Post-process the results
|
|
332
151
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
333
|
-
// Resolve and return the result based on the raw flag
|
|
334
152
|
return this.output(result, raw ?? resolved.raw);
|
|
335
153
|
}
|
|
336
|
-
/**
|
|
337
|
-
* Resolves the result format (raw or formatted).
|
|
338
|
-
*
|
|
339
|
-
* @template T - The type of the metric result
|
|
340
|
-
* @param {MetricResult<R>} result - The metric result
|
|
341
|
-
* @param {boolean} [raw] - Whether to return raw results
|
|
342
|
-
* @returns {T} - The resolved result
|
|
343
|
-
*/
|
|
344
154
|
output(result, raw) {
|
|
345
155
|
return (raw ?? this.options.raw)
|
|
346
156
|
? result
|
|
@@ -348,213 +158,69 @@ class CmpStr {
|
|
|
348
158
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
349
159
|
: { source: result.a, target: result.b, match: result.res };
|
|
350
160
|
}
|
|
351
|
-
/**
|
|
352
|
-
* ---------------------------------------------------------------------------------
|
|
353
|
-
* Managing methods for CmpStr
|
|
354
|
-
* ---------------------------------------------------------------------------------
|
|
355
|
-
*
|
|
356
|
-
* These methods provides an interface to set and get properties of the CmpStr
|
|
357
|
-
* instance, such as options, metric, phonetic algorithm, and more.
|
|
358
|
-
*/
|
|
359
|
-
/**
|
|
360
|
-
* Creates a shallow clone of the current instance.
|
|
361
|
-
*
|
|
362
|
-
* @returns {CmpStr<R>} - The cloned instance
|
|
363
|
-
*/
|
|
364
161
|
clone() {
|
|
365
162
|
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
366
163
|
}
|
|
367
|
-
/**
|
|
368
|
-
* Resets the instance, clearing all data and options.
|
|
369
|
-
*
|
|
370
|
-
* @returns {this}
|
|
371
|
-
*/
|
|
372
164
|
reset() {
|
|
373
165
|
for (const k in this.options) delete this.options[k];
|
|
374
166
|
return this;
|
|
375
167
|
}
|
|
376
|
-
/**
|
|
377
|
-
* Sets / replaces the full options object.
|
|
378
|
-
*
|
|
379
|
-
* @param {CmpStrOptions} opt - The options
|
|
380
|
-
* @returns {this}
|
|
381
|
-
*/
|
|
382
168
|
setOptions(opt) {
|
|
383
169
|
this.options = opt;
|
|
384
170
|
return this;
|
|
385
171
|
}
|
|
386
|
-
/**
|
|
387
|
-
* Deep merges and sets new options.
|
|
388
|
-
*
|
|
389
|
-
* @param {CmpStrOptions} opt - The options to merge
|
|
390
|
-
* @returns {this}
|
|
391
|
-
*/
|
|
392
172
|
mergeOptions(opt) {
|
|
393
173
|
DeepMerge.merge(this.options, opt);
|
|
394
174
|
return this;
|
|
395
175
|
}
|
|
396
|
-
/**
|
|
397
|
-
* Sets the serialized options from a JSON string.
|
|
398
|
-
*
|
|
399
|
-
* @param {string} opt - The serialized options
|
|
400
|
-
* @returns {this}
|
|
401
|
-
*/
|
|
402
176
|
setSerializedOptions(opt) {
|
|
403
177
|
this.options = JSON.parse(opt);
|
|
404
178
|
return this;
|
|
405
179
|
}
|
|
406
|
-
/**
|
|
407
|
-
* Sets a specific option at the given path.
|
|
408
|
-
*
|
|
409
|
-
* @param {string} path - The path to the option
|
|
410
|
-
* @param {any} value - The value to set
|
|
411
|
-
* @returns {this}
|
|
412
|
-
*/
|
|
413
180
|
setOption(path, value) {
|
|
414
181
|
DeepMerge.set(this.options, path, value);
|
|
415
182
|
return this;
|
|
416
183
|
}
|
|
417
|
-
/**
|
|
418
|
-
* Removes an option at the given path.
|
|
419
|
-
*
|
|
420
|
-
* @param {string} path - The path to the option
|
|
421
|
-
* @returns {this}
|
|
422
|
-
*/
|
|
423
184
|
rmvOption(path) {
|
|
424
185
|
DeepMerge.rmv(this.options, path);
|
|
425
186
|
return this;
|
|
426
187
|
}
|
|
427
|
-
/**
|
|
428
|
-
* Enable or disable raw output.
|
|
429
|
-
*
|
|
430
|
-
* @param {boolean} enable - Whether to enable or disable raw output
|
|
431
|
-
* @returns {this}
|
|
432
|
-
*/
|
|
433
188
|
setRaw(enable) {
|
|
434
189
|
return this.setOption('raw', enable);
|
|
435
190
|
}
|
|
436
|
-
/**
|
|
437
|
-
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
438
|
-
*
|
|
439
|
-
* @param {string} name - The metric name
|
|
440
|
-
* @returns {this}
|
|
441
|
-
*/
|
|
442
191
|
setMetric(name) {
|
|
443
192
|
return this.setOption('metric', name);
|
|
444
193
|
}
|
|
445
|
-
/**
|
|
446
|
-
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
447
|
-
*
|
|
448
|
-
* @param {NormalizeFlags} flags - The normalization flags
|
|
449
|
-
* @returns {this}
|
|
450
|
-
*/
|
|
451
194
|
setFlags(flags) {
|
|
452
195
|
return this.setOption('flags', flags);
|
|
453
196
|
}
|
|
454
|
-
/**
|
|
455
|
-
* Removes the normalization flags entirely.
|
|
456
|
-
*
|
|
457
|
-
* @return {this}
|
|
458
|
-
*/
|
|
459
197
|
rmvFlags() {
|
|
460
198
|
return this.rmvOption('flags');
|
|
461
199
|
}
|
|
462
|
-
/**
|
|
463
|
-
* Sets the pre-processors to use for preparing the input.
|
|
464
|
-
*
|
|
465
|
-
* @param {CmpStrProcessors} opt - The processors to set
|
|
466
|
-
* @returns {this}
|
|
467
|
-
*/
|
|
468
200
|
setProcessors(opt) {
|
|
469
201
|
return this.setOption('processors', opt);
|
|
470
202
|
}
|
|
471
|
-
/**
|
|
472
|
-
* Removes the processors entirely.
|
|
473
|
-
*
|
|
474
|
-
* @returns {this}
|
|
475
|
-
*/
|
|
476
203
|
rmvProcessors() {
|
|
477
204
|
return this.rmvOption('processors');
|
|
478
205
|
}
|
|
479
|
-
/**
|
|
480
|
-
* Returns the current options object.
|
|
481
|
-
*
|
|
482
|
-
* @returns {CmpStrOptions} - The options
|
|
483
|
-
*/
|
|
484
206
|
getOptions() {
|
|
485
207
|
return this.options;
|
|
486
208
|
}
|
|
487
|
-
/**
|
|
488
|
-
* Returns the options as a JSON string.
|
|
489
|
-
*
|
|
490
|
-
* @returns {string} - The serialized options
|
|
491
|
-
*/
|
|
492
209
|
getSerializedOptions() {
|
|
493
210
|
return JSON.stringify(this.options);
|
|
494
211
|
}
|
|
495
|
-
/**
|
|
496
|
-
* Returns a specific option value by path.
|
|
497
|
-
*
|
|
498
|
-
* @param {string} path - The path to the option
|
|
499
|
-
* @returns {any} - The option value
|
|
500
|
-
*/
|
|
501
212
|
getOption(path) {
|
|
502
213
|
return DeepMerge.get(this.options, path);
|
|
503
214
|
}
|
|
504
|
-
/**
|
|
505
|
-
* ---------------------------------------------------------------------------------
|
|
506
|
-
* Public core methods for string comparison
|
|
507
|
-
* ---------------------------------------------------------------------------------
|
|
508
|
-
*
|
|
509
|
-
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
510
|
-
* string comparison, phonetic indexing, filtering, and text search.
|
|
511
|
-
*/
|
|
512
|
-
/**
|
|
513
|
-
* Performs a single metric comparison between the source and target.
|
|
514
|
-
*
|
|
515
|
-
* @template T - The type of the metric result
|
|
516
|
-
* @param {string} a - The source string
|
|
517
|
-
* @param {string} b - The target string
|
|
518
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
519
|
-
* @returns {T} - The metric result
|
|
520
|
-
*/
|
|
521
215
|
test(a, b, opt) {
|
|
522
216
|
return this.compute(a, b, opt, 'single');
|
|
523
217
|
}
|
|
524
|
-
/**
|
|
525
|
-
* Performs a single metric comparison and returns only the numeric score.
|
|
526
|
-
*
|
|
527
|
-
* @param {string} a - The source string
|
|
528
|
-
* @param {string} b - The target string
|
|
529
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
530
|
-
* @returns {number} - The similarity score (0..1)
|
|
531
|
-
*/
|
|
532
218
|
compare(a, b, opt) {
|
|
533
219
|
return this.compute(a, b, opt, 'single', true).res;
|
|
534
220
|
}
|
|
535
|
-
/**
|
|
536
|
-
* Performs a batch metric comparison between source and target strings
|
|
537
|
-
* or array of strings.
|
|
538
|
-
*
|
|
539
|
-
* @template T - The type of the metric result
|
|
540
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
541
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
542
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
543
|
-
* @returns {T} - The batch metric results
|
|
544
|
-
*/
|
|
545
221
|
batchTest(a, b, opt) {
|
|
546
222
|
return this.compute(a, b, opt, 'batch');
|
|
547
223
|
}
|
|
548
|
-
/**
|
|
549
|
-
* Performs a batch metric comparison and returns results sorted by score.
|
|
550
|
-
*
|
|
551
|
-
* @template T - The type of the metric result
|
|
552
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
553
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
554
|
-
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
555
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
556
|
-
* @returns {T} - The sorted batch results
|
|
557
|
-
*/
|
|
558
224
|
batchSorted(a, b, dir = 'desc', opt) {
|
|
559
225
|
return this.output(
|
|
560
226
|
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
@@ -563,32 +229,9 @@ class CmpStr {
|
|
|
563
229
|
opt?.raw ?? this.options.raw
|
|
564
230
|
);
|
|
565
231
|
}
|
|
566
|
-
/**
|
|
567
|
-
* Performs a pairwise metric comparison between source and target strings
|
|
568
|
-
* or array of strings.
|
|
569
|
-
*
|
|
570
|
-
* Input arrays needs of the same length to perform pairwise comparison,
|
|
571
|
-
* otherwise the method will throw an error.
|
|
572
|
-
*
|
|
573
|
-
* @template T - The type of the metric result
|
|
574
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
575
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
576
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
577
|
-
* @returns {T} - The pairwise metric results
|
|
578
|
-
*/
|
|
579
232
|
pairs(a, b, opt) {
|
|
580
233
|
return this.compute(a, b, opt, 'pairwise');
|
|
581
234
|
}
|
|
582
|
-
/**
|
|
583
|
-
* Performs a batch comparison and returns only results above the threshold.
|
|
584
|
-
*
|
|
585
|
-
* @template T - The type of the metric result
|
|
586
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
587
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
588
|
-
* @param {number} threshold - The similarity threshold (0..1)
|
|
589
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
590
|
-
* @returns {T} - The filtered batch results
|
|
591
|
-
*/
|
|
592
235
|
match(a, b, threshold, opt) {
|
|
593
236
|
return this.output(
|
|
594
237
|
this.compute(a, b, opt, 'batch', true)
|
|
@@ -597,56 +240,18 @@ class CmpStr {
|
|
|
597
240
|
opt?.raw ?? this.options.raw
|
|
598
241
|
);
|
|
599
242
|
}
|
|
600
|
-
/**
|
|
601
|
-
* Returns the n closest matches from a batch comparison.
|
|
602
|
-
*
|
|
603
|
-
* @template T - The type of the metric result
|
|
604
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
605
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
606
|
-
* @param {number} [n=1] - Number of closest matches
|
|
607
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
608
|
-
* @returns {T} - The closest matches
|
|
609
|
-
*/
|
|
610
243
|
closest(a, b, n = 1, opt) {
|
|
611
244
|
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
612
245
|
}
|
|
613
|
-
/**
|
|
614
|
-
* Returns the n furthest matches from a batch comparison.
|
|
615
|
-
*
|
|
616
|
-
* @template T - The type of the metric result
|
|
617
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
618
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
619
|
-
* @param {number} [n=1] - Number of furthest matches
|
|
620
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
621
|
-
* @returns {T} - The furthest matches
|
|
622
|
-
*/
|
|
623
246
|
furthest(a, b, n = 1, opt) {
|
|
624
247
|
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
625
248
|
}
|
|
626
|
-
/**
|
|
627
|
-
* Performs a normalized and filtered substring search.
|
|
628
|
-
*
|
|
629
|
-
* @param {string} needle - The search string
|
|
630
|
-
* @param {string[]} haystack - The array to search in
|
|
631
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
632
|
-
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
633
|
-
* @returns {string[]} - Array of matching entries
|
|
634
|
-
*/
|
|
635
249
|
search(needle, haystack, flags, processors) {
|
|
636
250
|
const resolved = this.resolveOptions({ flags, processors });
|
|
637
|
-
// Prepare the needle and haystack, normalizing and filtering them
|
|
638
251
|
const test = this.prepare(needle, resolved);
|
|
639
252
|
const hstk = this.prepare(haystack, resolved);
|
|
640
|
-
// Filter the haystack based on the normalized test string
|
|
641
253
|
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
642
254
|
}
|
|
643
|
-
/**
|
|
644
|
-
* Computes a similarity matrix for the given input array.
|
|
645
|
-
*
|
|
646
|
-
* @param {string[]} input - The input array
|
|
647
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
648
|
-
* @returns {number[][]} - The similarity matrix
|
|
649
|
-
*/
|
|
650
255
|
matrix(input, opt) {
|
|
651
256
|
input = this.prepare(input, this.resolveOptions(opt));
|
|
652
257
|
return input.map((a) =>
|
|
@@ -655,15 +260,6 @@ class CmpStr {
|
|
|
655
260
|
)
|
|
656
261
|
);
|
|
657
262
|
}
|
|
658
|
-
/**
|
|
659
|
-
* Computes the phonetic index for a string using the configured
|
|
660
|
-
* or given algorithm.
|
|
661
|
-
*
|
|
662
|
-
* @param {string} [input] - The input string
|
|
663
|
-
* @param {string} [algo] - The phonetic algorithm to use
|
|
664
|
-
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
665
|
-
* @returns {string} - The phonetic index as a string
|
|
666
|
-
*/
|
|
667
263
|
phoneticIndex(input, algo, opt) {
|
|
668
264
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
669
265
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|