cmpstr 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/CmpStr.esm.js +2487 -4948
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +3 -3
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +2601 -5040
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +3 -3
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +41 -405
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +38 -221
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +1 -261
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +49 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +10 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +1 -89
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/StructuredData.cjs +145 -0
- package/dist/cjs/utils/StructuredData.cjs.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +41 -405
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +38 -221
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +1 -261
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +35 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +10 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +1 -89
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/StructuredData.mjs +143 -0
- package/dist/esm/utils/StructuredData.mjs.map +1 -0
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/CmpStr.d.ts +90 -8
- package/dist/types/CmpStrAsync.d.ts +82 -8
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +39 -0
- package/dist/types/utils/Pool.d.ts +2 -2
- package/dist/types/utils/StructuredData.d.ts +162 -0
- package/dist/types/utils/Types.d.ts +35 -1
- package/package.json +63 -22
package/dist/esm/CmpStr.mjs
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
// CmpStr v3.0
|
|
1
|
+
// CmpStr v3.1.0 build-76aadb9-260117 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { merge, set, rmv, get } from './utils/DeepMerge.mjs';
|
|
3
3
|
import { Profiler } from './utils/Profiler.mjs';
|
|
4
4
|
import { TextAnalyzer } from './utils/TextAnalyzer.mjs';
|
|
5
5
|
import { DiffChecker } from './utils/DiffChecker.mjs';
|
|
6
6
|
import { Normalizer } from './utils/Normalizer.mjs';
|
|
7
7
|
import { Filter } from './utils/Filter.mjs';
|
|
8
|
+
import { StructuredData } from './utils/StructuredData.mjs';
|
|
8
9
|
import { factory } from './utils/Registry.mjs';
|
|
9
10
|
import './metric/Cosine.mjs';
|
|
10
11
|
import './metric/DamerauLevenshtein.mjs';
|
|
@@ -28,49 +29,8 @@ import {
|
|
|
28
29
|
Phonetic
|
|
29
30
|
} from './phonetic/Phonetic.mjs';
|
|
30
31
|
|
|
31
|
-
/**
|
|
32
|
-
* CmpStr Main API
|
|
33
|
-
* src/CmpStr.ts
|
|
34
|
-
*
|
|
35
|
-
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
36
|
-
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
37
|
-
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
38
|
-
* consistent, user-friendly API for both single and batch operations.
|
|
39
|
-
*
|
|
40
|
-
* Features:
|
|
41
|
-
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
42
|
-
* - Flexible normalization and filtering pipeline for all inputs
|
|
43
|
-
* - Batch, pairwise, and single string comparison with detailed results
|
|
44
|
-
* - Phonetic indexing and phonetic-aware search and comparison
|
|
45
|
-
* - Text analysis and unified diff utilities
|
|
46
|
-
* - Full TypeScript type safety and extensibility
|
|
47
|
-
*
|
|
48
|
-
* @module CmpStr
|
|
49
|
-
* @author Paul Köhler (komed3)
|
|
50
|
-
* @license MIT
|
|
51
|
-
*/
|
|
52
|
-
// Import the Profiler instance for global profiling
|
|
53
32
|
const profiler = Profiler.getInstance();
|
|
54
|
-
/**
|
|
55
|
-
* The main CmpStr class that provides a unified interface for string comparison,
|
|
56
|
-
* phonetic indexing, filtering, and text analysis.
|
|
57
|
-
*
|
|
58
|
-
* @template R - The type of the metric result, defaults to MetricRaw
|
|
59
|
-
*/
|
|
60
33
|
class CmpStr {
|
|
61
|
-
/**
|
|
62
|
-
* --------------------------------------------------------------------------------
|
|
63
|
-
* Static methods and properties for global access to CmpStr features
|
|
64
|
-
* --------------------------------------------------------------------------------
|
|
65
|
-
*
|
|
66
|
-
* These static methods provide a convenient way to access the core features of
|
|
67
|
-
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
68
|
-
*/
|
|
69
|
-
/**
|
|
70
|
-
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
71
|
-
*
|
|
72
|
-
* @see Filter
|
|
73
|
-
*/
|
|
74
34
|
static filter = {
|
|
75
35
|
add: Filter.add,
|
|
76
36
|
remove: Filter.remove,
|
|
@@ -79,22 +39,12 @@ class CmpStr {
|
|
|
79
39
|
list: Filter.list,
|
|
80
40
|
clear: Filter.clear
|
|
81
41
|
};
|
|
82
|
-
/**
|
|
83
|
-
* Adds, removes, checks, or lists available metrics.
|
|
84
|
-
*
|
|
85
|
-
* @see MetricRegistry
|
|
86
|
-
*/
|
|
87
42
|
static metric = {
|
|
88
43
|
add: MetricRegistry.add,
|
|
89
44
|
remove: MetricRegistry.remove,
|
|
90
45
|
has: MetricRegistry.has,
|
|
91
46
|
list: MetricRegistry.list
|
|
92
47
|
};
|
|
93
|
-
/**
|
|
94
|
-
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
95
|
-
*
|
|
96
|
-
* @see PhoneticRegistry
|
|
97
|
-
*/
|
|
98
48
|
static phonetic = {
|
|
99
49
|
add: PhoneticRegistry.add,
|
|
100
50
|
remove: PhoneticRegistry.remove,
|
|
@@ -107,89 +57,30 @@ class CmpStr {
|
|
|
107
57
|
list: PhoneticMappingRegistry.list
|
|
108
58
|
}
|
|
109
59
|
};
|
|
110
|
-
/**
|
|
111
|
-
* Provides access to the global profiler services.
|
|
112
|
-
*
|
|
113
|
-
* @see Profiler
|
|
114
|
-
*/
|
|
115
60
|
static profiler = profiler.services;
|
|
116
|
-
/**
|
|
117
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
118
|
-
*/
|
|
119
61
|
static clearCache = {
|
|
120
62
|
normalizer: Normalizer.clear,
|
|
121
63
|
metric: Metric.clear,
|
|
122
64
|
phonetic: Phonetic.clear
|
|
123
65
|
};
|
|
124
|
-
/**
|
|
125
|
-
* Returns a TextAnalyzer instance for the given input string.
|
|
126
|
-
*
|
|
127
|
-
* @param {string} [input] - The input string
|
|
128
|
-
* @returns {TextAnalyzer} - The text analyzer
|
|
129
|
-
*/
|
|
130
66
|
static analyze(input) {
|
|
131
67
|
return new TextAnalyzer(input);
|
|
132
68
|
}
|
|
133
|
-
/**
|
|
134
|
-
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
135
|
-
*
|
|
136
|
-
* @param {string} a - The first (original) text
|
|
137
|
-
* @param {string} b - The second (modified) text
|
|
138
|
-
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
139
|
-
* @returns {DiffChecker} - The diff checker instance
|
|
140
|
-
*/
|
|
141
69
|
static diff(a, b, opt) {
|
|
142
70
|
return new DiffChecker(a, b, opt);
|
|
143
71
|
}
|
|
144
|
-
/**
|
|
145
|
-
* --------------------------------------------------------------------------------
|
|
146
|
-
* Instanciate the CmpStr class
|
|
147
|
-
* --------------------------------------------------------------------------------
|
|
148
|
-
*
|
|
149
|
-
* Methods to create a new CmpStr instance with the given options.
|
|
150
|
-
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
151
|
-
*/
|
|
152
|
-
/**
|
|
153
|
-
* Creates a new CmpStr instance with the given options.
|
|
154
|
-
*
|
|
155
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
156
|
-
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
157
|
-
*/
|
|
158
72
|
static create(opt) {
|
|
159
73
|
return new CmpStr(opt);
|
|
160
74
|
}
|
|
161
|
-
// The options object that holds the configuration for this CmpStr instance
|
|
162
75
|
options = Object.create(null);
|
|
163
|
-
/**
|
|
164
|
-
* Creates a new CmpStr instance with the given options.
|
|
165
|
-
* The constructor is protected to enforce the use of the static `create` method.
|
|
166
|
-
*
|
|
167
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
168
|
-
*/
|
|
169
76
|
constructor(opt) {
|
|
170
77
|
if (opt)
|
|
171
78
|
typeof opt === 'string'
|
|
172
79
|
? this.setSerializedOptions(opt)
|
|
173
80
|
: this.setOptions(opt);
|
|
174
81
|
}
|
|
175
|
-
/**
|
|
176
|
-
* ---------------------------------------------------------------------------------
|
|
177
|
-
* Protected utility methods for internal use
|
|
178
|
-
* ---------------------------------------------------------------------------------
|
|
179
|
-
*
|
|
180
|
-
* These methods provide utility functions for converting inputs, merging options,
|
|
181
|
-
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
182
|
-
*/
|
|
183
|
-
/**
|
|
184
|
-
* Assert a condition and throws if the condition is not met.
|
|
185
|
-
*
|
|
186
|
-
* @param {string} cond - The condition to met
|
|
187
|
-
* @param {any} [test] - Value to test for
|
|
188
|
-
* @throws {Error} If the condition is not met
|
|
189
|
-
*/
|
|
190
82
|
assert(cond, test) {
|
|
191
83
|
switch (cond) {
|
|
192
|
-
// Check if the metric exists
|
|
193
84
|
case 'metric':
|
|
194
85
|
if (!CmpStr.metric.has(test))
|
|
195
86
|
throw new Error(
|
|
@@ -197,7 +88,6 @@ class CmpStr {
|
|
|
197
88
|
`use CmpStr.metric.list() for available metrics`
|
|
198
89
|
);
|
|
199
90
|
break;
|
|
200
|
-
// Check if the phonetic algorithm exists
|
|
201
91
|
case 'phonetic':
|
|
202
92
|
if (!CmpStr.phonetic.has(test))
|
|
203
93
|
throw new Error(
|
|
@@ -205,85 +95,34 @@ class CmpStr {
|
|
|
205
95
|
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
206
96
|
);
|
|
207
97
|
break;
|
|
208
|
-
// Throw an error for unknown conditions
|
|
209
98
|
default:
|
|
210
99
|
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
211
100
|
}
|
|
212
101
|
}
|
|
213
|
-
/**
|
|
214
|
-
* Assert multiple conditions.
|
|
215
|
-
*
|
|
216
|
-
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
217
|
-
*/
|
|
218
102
|
assertMany(...cond) {
|
|
219
103
|
for (const [c, test] of cond) this.assert(c, test);
|
|
220
104
|
}
|
|
221
|
-
/**
|
|
222
|
-
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
223
|
-
* the existing options.
|
|
224
|
-
*
|
|
225
|
-
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
226
|
-
* @returns {CmpStrOptions} - The resolved options
|
|
227
|
-
*/
|
|
228
105
|
resolveOptions(opt) {
|
|
229
106
|
return merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
230
107
|
}
|
|
231
|
-
/**
|
|
232
|
-
* Normalizes the input string or array using the configured or provided flags.
|
|
233
|
-
*
|
|
234
|
-
* @param {MetricInput} input - The input string or array
|
|
235
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
236
|
-
* @returns {MetricInput} - The normalized input
|
|
237
|
-
*/
|
|
238
108
|
normalize(input, flags) {
|
|
239
109
|
return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
|
|
240
110
|
}
|
|
241
|
-
/**
|
|
242
|
-
* Applies all active filters to the input string or array.
|
|
243
|
-
*
|
|
244
|
-
* @param {MetricInput} input - The input string or array
|
|
245
|
-
* @param {string} [hook='input'] - The filter hook
|
|
246
|
-
* @returns {MetricInput} - The filtered string(s)
|
|
247
|
-
*/
|
|
248
111
|
filter(input, hook) {
|
|
249
112
|
return Filter.apply(hook, input);
|
|
250
113
|
}
|
|
251
|
-
/**
|
|
252
|
-
* Prepares the input by normalizing and filtering.
|
|
253
|
-
*
|
|
254
|
-
* @param {MetricInput} [input] - The input string or array
|
|
255
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
256
|
-
* @returns {MetricInput} - The prepared input
|
|
257
|
-
*/
|
|
258
114
|
prepare(input, opt) {
|
|
259
115
|
const { flags, processors } = opt ?? this.options;
|
|
260
|
-
// Normalize the input using flags (i.e., 'itw')
|
|
261
116
|
if (flags?.length) input = this.normalize(input, flags);
|
|
262
|
-
// Filter the input using hooked up filters
|
|
263
117
|
input = this.filter(input, 'input');
|
|
264
|
-
// Apply phonetic processors if configured
|
|
265
118
|
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
266
119
|
return input;
|
|
267
120
|
}
|
|
268
|
-
/**
|
|
269
|
-
* Post-process the results of the metric computation.
|
|
270
|
-
*
|
|
271
|
-
* @param {MetricResult<R>} result - The metric result
|
|
272
|
-
* @returns {MetricResult<R>} - The post-processed results
|
|
273
|
-
*/
|
|
274
121
|
postProcess(result, opt) {
|
|
275
|
-
// Remove "zero similarity" from batch results if configured
|
|
276
122
|
if (opt?.removeZero && Array.isArray(result))
|
|
277
123
|
result = result.filter((r) => r.res > 0);
|
|
278
124
|
return result;
|
|
279
125
|
}
|
|
280
|
-
/**
|
|
281
|
-
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
282
|
-
*
|
|
283
|
-
* @param {MetricInput} input - The input string or array
|
|
284
|
-
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
285
|
-
* @returns {MetricInput} - The phonetic index for the given input
|
|
286
|
-
*/
|
|
287
126
|
index(input, { algo, opt }) {
|
|
288
127
|
this.assert('phonetic', algo);
|
|
289
128
|
const phonetic = factory.phonetic(algo, opt);
|
|
@@ -292,26 +131,14 @@ class CmpStr {
|
|
|
292
131
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
293
132
|
: phonetic.getIndex(input).join(delimiter);
|
|
294
133
|
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
*
|
|
299
|
-
* @template T - The type of the metric result
|
|
300
|
-
* @param {MetricInput} a - The first input string or array
|
|
301
|
-
* @param {MetricInput} b - The second input string or array
|
|
302
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
303
|
-
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
304
|
-
* @param {boolean} [raw=false] - Whether to return raw results
|
|
305
|
-
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
306
|
-
* @returns {T} - The computed metric result
|
|
307
|
-
*/
|
|
134
|
+
structured(data, key) {
|
|
135
|
+
return StructuredData.create(data, key);
|
|
136
|
+
}
|
|
308
137
|
compute(a, b, opt, mode, raw, skip) {
|
|
309
138
|
const resolved = this.resolveOptions(opt);
|
|
310
139
|
this.assert('metric', resolved.metric);
|
|
311
|
-
// Prepare the input
|
|
312
140
|
const A = skip ? a : this.prepare(a, resolved);
|
|
313
141
|
const B = skip ? b : this.prepare(b, resolved);
|
|
314
|
-
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
315
142
|
if (
|
|
316
143
|
resolved.safeEmpty &&
|
|
317
144
|
((Array.isArray(A) && A.length === 0) ||
|
|
@@ -321,25 +148,12 @@ class CmpStr {
|
|
|
321
148
|
) {
|
|
322
149
|
return [];
|
|
323
150
|
}
|
|
324
|
-
// Get the metric class
|
|
325
151
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
326
|
-
// Pass the original inputs to the metric
|
|
327
152
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
328
|
-
// Compute the metric result
|
|
329
153
|
metric.run(mode);
|
|
330
|
-
// Post-process the results
|
|
331
154
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
332
|
-
// Resolve and return the result based on the raw flag
|
|
333
155
|
return this.output(result, raw ?? resolved.raw);
|
|
334
156
|
}
|
|
335
|
-
/**
|
|
336
|
-
* Resolves the result format (raw or formatted).
|
|
337
|
-
*
|
|
338
|
-
* @template T - The type of the metric result
|
|
339
|
-
* @param {MetricResult<R>} result - The metric result
|
|
340
|
-
* @param {boolean} [raw] - Whether to return raw results
|
|
341
|
-
* @returns {T} - The resolved result
|
|
342
|
-
*/
|
|
343
157
|
output(result, raw) {
|
|
344
158
|
return (raw ?? this.options.raw)
|
|
345
159
|
? result
|
|
@@ -347,213 +161,69 @@ class CmpStr {
|
|
|
347
161
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
348
162
|
: { source: result.a, target: result.b, match: result.res };
|
|
349
163
|
}
|
|
350
|
-
/**
|
|
351
|
-
* ---------------------------------------------------------------------------------
|
|
352
|
-
* Managing methods for CmpStr
|
|
353
|
-
* ---------------------------------------------------------------------------------
|
|
354
|
-
*
|
|
355
|
-
* These methods provides an interface to set and get properties of the CmpStr
|
|
356
|
-
* instance, such as options, metric, phonetic algorithm, and more.
|
|
357
|
-
*/
|
|
358
|
-
/**
|
|
359
|
-
* Creates a shallow clone of the current instance.
|
|
360
|
-
*
|
|
361
|
-
* @returns {CmpStr<R>} - The cloned instance
|
|
362
|
-
*/
|
|
363
164
|
clone() {
|
|
364
165
|
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
365
166
|
}
|
|
366
|
-
/**
|
|
367
|
-
* Resets the instance, clearing all data and options.
|
|
368
|
-
*
|
|
369
|
-
* @returns {this}
|
|
370
|
-
*/
|
|
371
167
|
reset() {
|
|
372
168
|
for (const k in this.options) delete this.options[k];
|
|
373
169
|
return this;
|
|
374
170
|
}
|
|
375
|
-
/**
|
|
376
|
-
* Sets / replaces the full options object.
|
|
377
|
-
*
|
|
378
|
-
* @param {CmpStrOptions} opt - The options
|
|
379
|
-
* @returns {this}
|
|
380
|
-
*/
|
|
381
171
|
setOptions(opt) {
|
|
382
172
|
this.options = opt;
|
|
383
173
|
return this;
|
|
384
174
|
}
|
|
385
|
-
/**
|
|
386
|
-
* Deep merges and sets new options.
|
|
387
|
-
*
|
|
388
|
-
* @param {CmpStrOptions} opt - The options to merge
|
|
389
|
-
* @returns {this}
|
|
390
|
-
*/
|
|
391
175
|
mergeOptions(opt) {
|
|
392
176
|
merge(this.options, opt);
|
|
393
177
|
return this;
|
|
394
178
|
}
|
|
395
|
-
/**
|
|
396
|
-
* Sets the serialized options from a JSON string.
|
|
397
|
-
*
|
|
398
|
-
* @param {string} opt - The serialized options
|
|
399
|
-
* @returns {this}
|
|
400
|
-
*/
|
|
401
179
|
setSerializedOptions(opt) {
|
|
402
180
|
this.options = JSON.parse(opt);
|
|
403
181
|
return this;
|
|
404
182
|
}
|
|
405
|
-
/**
|
|
406
|
-
* Sets a specific option at the given path.
|
|
407
|
-
*
|
|
408
|
-
* @param {string} path - The path to the option
|
|
409
|
-
* @param {any} value - The value to set
|
|
410
|
-
* @returns {this}
|
|
411
|
-
*/
|
|
412
183
|
setOption(path, value) {
|
|
413
184
|
set(this.options, path, value);
|
|
414
185
|
return this;
|
|
415
186
|
}
|
|
416
|
-
/**
|
|
417
|
-
* Removes an option at the given path.
|
|
418
|
-
*
|
|
419
|
-
* @param {string} path - The path to the option
|
|
420
|
-
* @returns {this}
|
|
421
|
-
*/
|
|
422
187
|
rmvOption(path) {
|
|
423
188
|
rmv(this.options, path);
|
|
424
189
|
return this;
|
|
425
190
|
}
|
|
426
|
-
/**
|
|
427
|
-
* Enable or disable raw output.
|
|
428
|
-
*
|
|
429
|
-
* @param {boolean} enable - Whether to enable or disable raw output
|
|
430
|
-
* @returns {this}
|
|
431
|
-
*/
|
|
432
191
|
setRaw(enable) {
|
|
433
192
|
return this.setOption('raw', enable);
|
|
434
193
|
}
|
|
435
|
-
/**
|
|
436
|
-
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
437
|
-
*
|
|
438
|
-
* @param {string} name - The metric name
|
|
439
|
-
* @returns {this}
|
|
440
|
-
*/
|
|
441
194
|
setMetric(name) {
|
|
442
195
|
return this.setOption('metric', name);
|
|
443
196
|
}
|
|
444
|
-
/**
|
|
445
|
-
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
446
|
-
*
|
|
447
|
-
* @param {NormalizeFlags} flags - The normalization flags
|
|
448
|
-
* @returns {this}
|
|
449
|
-
*/
|
|
450
197
|
setFlags(flags) {
|
|
451
198
|
return this.setOption('flags', flags);
|
|
452
199
|
}
|
|
453
|
-
/**
|
|
454
|
-
* Removes the normalization flags entirely.
|
|
455
|
-
*
|
|
456
|
-
* @return {this}
|
|
457
|
-
*/
|
|
458
200
|
rmvFlags() {
|
|
459
201
|
return this.rmvOption('flags');
|
|
460
202
|
}
|
|
461
|
-
/**
|
|
462
|
-
* Sets the pre-processors to use for preparing the input.
|
|
463
|
-
*
|
|
464
|
-
* @param {CmpStrProcessors} opt - The processors to set
|
|
465
|
-
* @returns {this}
|
|
466
|
-
*/
|
|
467
203
|
setProcessors(opt) {
|
|
468
204
|
return this.setOption('processors', opt);
|
|
469
205
|
}
|
|
470
|
-
/**
|
|
471
|
-
* Removes the processors entirely.
|
|
472
|
-
*
|
|
473
|
-
* @returns {this}
|
|
474
|
-
*/
|
|
475
206
|
rmvProcessors() {
|
|
476
207
|
return this.rmvOption('processors');
|
|
477
208
|
}
|
|
478
|
-
/**
|
|
479
|
-
* Returns the current options object.
|
|
480
|
-
*
|
|
481
|
-
* @returns {CmpStrOptions} - The options
|
|
482
|
-
*/
|
|
483
209
|
getOptions() {
|
|
484
210
|
return this.options;
|
|
485
211
|
}
|
|
486
|
-
/**
|
|
487
|
-
* Returns the options as a JSON string.
|
|
488
|
-
*
|
|
489
|
-
* @returns {string} - The serialized options
|
|
490
|
-
*/
|
|
491
212
|
getSerializedOptions() {
|
|
492
213
|
return JSON.stringify(this.options);
|
|
493
214
|
}
|
|
494
|
-
/**
|
|
495
|
-
* Returns a specific option value by path.
|
|
496
|
-
*
|
|
497
|
-
* @param {string} path - The path to the option
|
|
498
|
-
* @returns {any} - The option value
|
|
499
|
-
*/
|
|
500
215
|
getOption(path) {
|
|
501
216
|
return get(this.options, path);
|
|
502
217
|
}
|
|
503
|
-
/**
|
|
504
|
-
* ---------------------------------------------------------------------------------
|
|
505
|
-
* Public core methods for string comparison
|
|
506
|
-
* ---------------------------------------------------------------------------------
|
|
507
|
-
*
|
|
508
|
-
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
509
|
-
* string comparison, phonetic indexing, filtering, and text search.
|
|
510
|
-
*/
|
|
511
|
-
/**
|
|
512
|
-
* Performs a single metric comparison between the source and target.
|
|
513
|
-
*
|
|
514
|
-
* @template T - The type of the metric result
|
|
515
|
-
* @param {string} a - The source string
|
|
516
|
-
* @param {string} b - The target string
|
|
517
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
518
|
-
* @returns {T} - The metric result
|
|
519
|
-
*/
|
|
520
218
|
test(a, b, opt) {
|
|
521
219
|
return this.compute(a, b, opt, 'single');
|
|
522
220
|
}
|
|
523
|
-
/**
|
|
524
|
-
* Performs a single metric comparison and returns only the numeric score.
|
|
525
|
-
*
|
|
526
|
-
* @param {string} a - The source string
|
|
527
|
-
* @param {string} b - The target string
|
|
528
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
529
|
-
* @returns {number} - The similarity score (0..1)
|
|
530
|
-
*/
|
|
531
221
|
compare(a, b, opt) {
|
|
532
222
|
return this.compute(a, b, opt, 'single', true).res;
|
|
533
223
|
}
|
|
534
|
-
/**
|
|
535
|
-
* Performs a batch metric comparison between source and target strings
|
|
536
|
-
* or array of strings.
|
|
537
|
-
*
|
|
538
|
-
* @template T - The type of the metric result
|
|
539
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
540
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
541
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
542
|
-
* @returns {T} - The batch metric results
|
|
543
|
-
*/
|
|
544
224
|
batchTest(a, b, opt) {
|
|
545
225
|
return this.compute(a, b, opt, 'batch');
|
|
546
226
|
}
|
|
547
|
-
/**
|
|
548
|
-
* Performs a batch metric comparison and returns results sorted by score.
|
|
549
|
-
*
|
|
550
|
-
* @template T - The type of the metric result
|
|
551
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
552
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
553
|
-
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
554
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
555
|
-
* @returns {T} - The sorted batch results
|
|
556
|
-
*/
|
|
557
227
|
batchSorted(a, b, dir = 'desc', opt) {
|
|
558
228
|
return this.output(
|
|
559
229
|
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
@@ -562,32 +232,9 @@ class CmpStr {
|
|
|
562
232
|
opt?.raw ?? this.options.raw
|
|
563
233
|
);
|
|
564
234
|
}
|
|
565
|
-
/**
|
|
566
|
-
* Performs a pairwise metric comparison between source and target strings
|
|
567
|
-
* or array of strings.
|
|
568
|
-
*
|
|
569
|
-
* Input arrays needs of the same length to perform pairwise comparison,
|
|
570
|
-
* otherwise the method will throw an error.
|
|
571
|
-
*
|
|
572
|
-
* @template T - The type of the metric result
|
|
573
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
574
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
575
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
576
|
-
* @returns {T} - The pairwise metric results
|
|
577
|
-
*/
|
|
578
235
|
pairs(a, b, opt) {
|
|
579
236
|
return this.compute(a, b, opt, 'pairwise');
|
|
580
237
|
}
|
|
581
|
-
/**
|
|
582
|
-
* Performs a batch comparison and returns only results above the threshold.
|
|
583
|
-
*
|
|
584
|
-
* @template T - The type of the metric result
|
|
585
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
586
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
587
|
-
* @param {number} threshold - The similarity threshold (0..1)
|
|
588
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
589
|
-
* @returns {T} - The filtered batch results
|
|
590
|
-
*/
|
|
591
238
|
match(a, b, threshold, opt) {
|
|
592
239
|
return this.output(
|
|
593
240
|
this.compute(a, b, opt, 'batch', true)
|
|
@@ -596,56 +243,18 @@ class CmpStr {
|
|
|
596
243
|
opt?.raw ?? this.options.raw
|
|
597
244
|
);
|
|
598
245
|
}
|
|
599
|
-
/**
|
|
600
|
-
* Returns the n closest matches from a batch comparison.
|
|
601
|
-
*
|
|
602
|
-
* @template T - The type of the metric result
|
|
603
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
604
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
605
|
-
* @param {number} [n=1] - Number of closest matches
|
|
606
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
607
|
-
* @returns {T} - The closest matches
|
|
608
|
-
*/
|
|
609
246
|
closest(a, b, n = 1, opt) {
|
|
610
247
|
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
611
248
|
}
|
|
612
|
-
/**
|
|
613
|
-
* Returns the n furthest matches from a batch comparison.
|
|
614
|
-
*
|
|
615
|
-
* @template T - The type of the metric result
|
|
616
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
617
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
618
|
-
* @param {number} [n=1] - Number of furthest matches
|
|
619
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
620
|
-
* @returns {T} - The furthest matches
|
|
621
|
-
*/
|
|
622
249
|
furthest(a, b, n = 1, opt) {
|
|
623
250
|
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
624
251
|
}
|
|
625
|
-
/**
|
|
626
|
-
* Performs a normalized and filtered substring search.
|
|
627
|
-
*
|
|
628
|
-
* @param {string} needle - The search string
|
|
629
|
-
* @param {string[]} haystack - The array to search in
|
|
630
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
631
|
-
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
632
|
-
* @returns {string[]} - Array of matching entries
|
|
633
|
-
*/
|
|
634
252
|
search(needle, haystack, flags, processors) {
|
|
635
253
|
const resolved = this.resolveOptions({ flags, processors });
|
|
636
|
-
// Prepare the needle and haystack, normalizing and filtering them
|
|
637
254
|
const test = this.prepare(needle, resolved);
|
|
638
255
|
const hstk = this.prepare(haystack, resolved);
|
|
639
|
-
// Filter the haystack based on the normalized test string
|
|
640
256
|
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
641
257
|
}
|
|
642
|
-
/**
|
|
643
|
-
* Computes a similarity matrix for the given input array.
|
|
644
|
-
*
|
|
645
|
-
* @param {string[]} input - The input array
|
|
646
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
647
|
-
* @returns {number[][]} - The similarity matrix
|
|
648
|
-
*/
|
|
649
258
|
matrix(input, opt) {
|
|
650
259
|
input = this.prepare(input, this.resolveOptions(opt));
|
|
651
260
|
return input.map((a) =>
|
|
@@ -654,19 +263,46 @@ class CmpStr {
|
|
|
654
263
|
)
|
|
655
264
|
);
|
|
656
265
|
}
|
|
657
|
-
/**
|
|
658
|
-
* Computes the phonetic index for a string using the configured
|
|
659
|
-
* or given algorithm.
|
|
660
|
-
*
|
|
661
|
-
* @param {string} [input] - The input string
|
|
662
|
-
* @param {string} [algo] - The phonetic algorithm to use
|
|
663
|
-
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
664
|
-
* @returns {string} - The phonetic index as a string
|
|
665
|
-
*/
|
|
666
266
|
phoneticIndex(input, algo, opt) {
|
|
667
267
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
668
268
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|
|
669
269
|
}
|
|
270
|
+
structuredLookup(query, data, key, opt) {
|
|
271
|
+
return this.structured(data, key).lookup(
|
|
272
|
+
(q, items, options) => this.batchTest(q, items, options),
|
|
273
|
+
query,
|
|
274
|
+
opt
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
structuredMatch(query, data, key, threshold, opt) {
|
|
278
|
+
return this.structured(data, key).lookup(
|
|
279
|
+
(q, items, options) => this.match(q, items, threshold, options),
|
|
280
|
+
query,
|
|
281
|
+
{ ...opt, sort: 'desc' }
|
|
282
|
+
);
|
|
283
|
+
}
|
|
284
|
+
structuredClosest(query, data, key, n = 1, opt) {
|
|
285
|
+
return this.structured(data, key).lookup(
|
|
286
|
+
(q, items, options) => this.closest(q, items, n, options),
|
|
287
|
+
query,
|
|
288
|
+
{ ...opt, sort: 'desc' }
|
|
289
|
+
);
|
|
290
|
+
}
|
|
291
|
+
structuredFurthest(query, data, key, n = 1, opt) {
|
|
292
|
+
return this.structured(data, key).lookup(
|
|
293
|
+
(q, items, options) => this.furthest(q, items, n, options),
|
|
294
|
+
query,
|
|
295
|
+
{ ...opt, sort: 'asc' }
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
structuredPairs(data, key, other, otherKey, opt) {
|
|
299
|
+
return this.structured(data, key).lookupPairs(
|
|
300
|
+
(items, otherItems, options) => this.pairs(items, otherItems, options),
|
|
301
|
+
other,
|
|
302
|
+
otherKey,
|
|
303
|
+
opt
|
|
304
|
+
);
|
|
305
|
+
}
|
|
670
306
|
}
|
|
671
307
|
|
|
672
308
|
export { CmpStr };
|