cmpstr 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/CmpStr.esm.js +2487 -4948
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +3 -3
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +2601 -5040
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +3 -3
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +41 -405
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +38 -221
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +1 -261
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +49 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +10 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +1 -89
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/StructuredData.cjs +145 -0
- package/dist/cjs/utils/StructuredData.cjs.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +41 -405
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +38 -221
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +1 -261
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +35 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +10 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +1 -89
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/StructuredData.mjs +143 -0
- package/dist/esm/utils/StructuredData.mjs.map +1 -0
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/CmpStr.d.ts +90 -8
- package/dist/types/CmpStrAsync.d.ts +82 -8
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +39 -0
- package/dist/types/utils/Pool.d.ts +2 -2
- package/dist/types/utils/StructuredData.d.ts +162 -0
- package/dist/types/utils/Types.d.ts +35 -1
- package/package.json +63 -22
package/dist/cjs/CmpStr.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.0
|
|
1
|
+
// CmpStr v3.1.0 build-76aadb9-260117 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var DeepMerge = require('./utils/DeepMerge.cjs');
|
|
@@ -7,6 +7,7 @@ var TextAnalyzer = require('./utils/TextAnalyzer.cjs');
|
|
|
7
7
|
var DiffChecker = require('./utils/DiffChecker.cjs');
|
|
8
8
|
var Normalizer = require('./utils/Normalizer.cjs');
|
|
9
9
|
var Filter = require('./utils/Filter.cjs');
|
|
10
|
+
var StructuredData = require('./utils/StructuredData.cjs');
|
|
10
11
|
var Registry = require('./utils/Registry.cjs');
|
|
11
12
|
require('./metric/Cosine.cjs');
|
|
12
13
|
require('./metric/DamerauLevenshtein.cjs');
|
|
@@ -26,49 +27,8 @@ require('./phonetic/Metaphone.cjs');
|
|
|
26
27
|
require('./phonetic/Soundex.cjs');
|
|
27
28
|
var Phonetic = require('./phonetic/Phonetic.cjs');
|
|
28
29
|
|
|
29
|
-
/**
|
|
30
|
-
* CmpStr Main API
|
|
31
|
-
* src/CmpStr.ts
|
|
32
|
-
*
|
|
33
|
-
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
34
|
-
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
35
|
-
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
36
|
-
* consistent, user-friendly API for both single and batch operations.
|
|
37
|
-
*
|
|
38
|
-
* Features:
|
|
39
|
-
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
40
|
-
* - Flexible normalization and filtering pipeline for all inputs
|
|
41
|
-
* - Batch, pairwise, and single string comparison with detailed results
|
|
42
|
-
* - Phonetic indexing and phonetic-aware search and comparison
|
|
43
|
-
* - Text analysis and unified diff utilities
|
|
44
|
-
* - Full TypeScript type safety and extensibility
|
|
45
|
-
*
|
|
46
|
-
* @module CmpStr
|
|
47
|
-
* @author Paul Köhler (komed3)
|
|
48
|
-
* @license MIT
|
|
49
|
-
*/
|
|
50
|
-
// Import the Profiler instance for global profiling
|
|
51
30
|
const profiler = Profiler.Profiler.getInstance();
|
|
52
|
-
/**
|
|
53
|
-
* The main CmpStr class that provides a unified interface for string comparison,
|
|
54
|
-
* phonetic indexing, filtering, and text analysis.
|
|
55
|
-
*
|
|
56
|
-
* @template R - The type of the metric result, defaults to MetricRaw
|
|
57
|
-
*/
|
|
58
31
|
class CmpStr {
|
|
59
|
-
/**
|
|
60
|
-
* --------------------------------------------------------------------------------
|
|
61
|
-
* Static methods and properties for global access to CmpStr features
|
|
62
|
-
* --------------------------------------------------------------------------------
|
|
63
|
-
*
|
|
64
|
-
* These static methods provide a convenient way to access the core features of
|
|
65
|
-
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
66
|
-
*/
|
|
67
|
-
/**
|
|
68
|
-
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
69
|
-
*
|
|
70
|
-
* @see Filter
|
|
71
|
-
*/
|
|
72
32
|
static filter = {
|
|
73
33
|
add: Filter.Filter.add,
|
|
74
34
|
remove: Filter.Filter.remove,
|
|
@@ -77,22 +37,12 @@ class CmpStr {
|
|
|
77
37
|
list: Filter.Filter.list,
|
|
78
38
|
clear: Filter.Filter.clear
|
|
79
39
|
};
|
|
80
|
-
/**
|
|
81
|
-
* Adds, removes, checks, or lists available metrics.
|
|
82
|
-
*
|
|
83
|
-
* @see MetricRegistry
|
|
84
|
-
*/
|
|
85
40
|
static metric = {
|
|
86
41
|
add: Metric.MetricRegistry.add,
|
|
87
42
|
remove: Metric.MetricRegistry.remove,
|
|
88
43
|
has: Metric.MetricRegistry.has,
|
|
89
44
|
list: Metric.MetricRegistry.list
|
|
90
45
|
};
|
|
91
|
-
/**
|
|
92
|
-
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
93
|
-
*
|
|
94
|
-
* @see PhoneticRegistry
|
|
95
|
-
*/
|
|
96
46
|
static phonetic = {
|
|
97
47
|
add: Phonetic.PhoneticRegistry.add,
|
|
98
48
|
remove: Phonetic.PhoneticRegistry.remove,
|
|
@@ -105,89 +55,30 @@ class CmpStr {
|
|
|
105
55
|
list: Phonetic.PhoneticMappingRegistry.list
|
|
106
56
|
}
|
|
107
57
|
};
|
|
108
|
-
/**
|
|
109
|
-
* Provides access to the global profiler services.
|
|
110
|
-
*
|
|
111
|
-
* @see Profiler
|
|
112
|
-
*/
|
|
113
58
|
static profiler = profiler.services;
|
|
114
|
-
/**
|
|
115
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
116
|
-
*/
|
|
117
59
|
static clearCache = {
|
|
118
60
|
normalizer: Normalizer.Normalizer.clear,
|
|
119
61
|
metric: Metric.Metric.clear,
|
|
120
62
|
phonetic: Phonetic.Phonetic.clear
|
|
121
63
|
};
|
|
122
|
-
/**
|
|
123
|
-
* Returns a TextAnalyzer instance for the given input string.
|
|
124
|
-
*
|
|
125
|
-
* @param {string} [input] - The input string
|
|
126
|
-
* @returns {TextAnalyzer} - The text analyzer
|
|
127
|
-
*/
|
|
128
64
|
static analyze(input) {
|
|
129
65
|
return new TextAnalyzer.TextAnalyzer(input);
|
|
130
66
|
}
|
|
131
|
-
/**
|
|
132
|
-
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
133
|
-
*
|
|
134
|
-
* @param {string} a - The first (original) text
|
|
135
|
-
* @param {string} b - The second (modified) text
|
|
136
|
-
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
137
|
-
* @returns {DiffChecker} - The diff checker instance
|
|
138
|
-
*/
|
|
139
67
|
static diff(a, b, opt) {
|
|
140
68
|
return new DiffChecker.DiffChecker(a, b, opt);
|
|
141
69
|
}
|
|
142
|
-
/**
|
|
143
|
-
* --------------------------------------------------------------------------------
|
|
144
|
-
* Instanciate the CmpStr class
|
|
145
|
-
* --------------------------------------------------------------------------------
|
|
146
|
-
*
|
|
147
|
-
* Methods to create a new CmpStr instance with the given options.
|
|
148
|
-
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
149
|
-
*/
|
|
150
|
-
/**
|
|
151
|
-
* Creates a new CmpStr instance with the given options.
|
|
152
|
-
*
|
|
153
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
154
|
-
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
155
|
-
*/
|
|
156
70
|
static create(opt) {
|
|
157
71
|
return new CmpStr(opt);
|
|
158
72
|
}
|
|
159
|
-
// The options object that holds the configuration for this CmpStr instance
|
|
160
73
|
options = Object.create(null);
|
|
161
|
-
/**
|
|
162
|
-
* Creates a new CmpStr instance with the given options.
|
|
163
|
-
* The constructor is protected to enforce the use of the static `create` method.
|
|
164
|
-
*
|
|
165
|
-
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
166
|
-
*/
|
|
167
74
|
constructor(opt) {
|
|
168
75
|
if (opt)
|
|
169
76
|
typeof opt === 'string'
|
|
170
77
|
? this.setSerializedOptions(opt)
|
|
171
78
|
: this.setOptions(opt);
|
|
172
79
|
}
|
|
173
|
-
/**
|
|
174
|
-
* ---------------------------------------------------------------------------------
|
|
175
|
-
* Protected utility methods for internal use
|
|
176
|
-
* ---------------------------------------------------------------------------------
|
|
177
|
-
*
|
|
178
|
-
* These methods provide utility functions for converting inputs, merging options,
|
|
179
|
-
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
180
|
-
*/
|
|
181
|
-
/**
|
|
182
|
-
* Assert a condition and throws if the condition is not met.
|
|
183
|
-
*
|
|
184
|
-
* @param {string} cond - The condition to met
|
|
185
|
-
* @param {any} [test] - Value to test for
|
|
186
|
-
* @throws {Error} If the condition is not met
|
|
187
|
-
*/
|
|
188
80
|
assert(cond, test) {
|
|
189
81
|
switch (cond) {
|
|
190
|
-
// Check if the metric exists
|
|
191
82
|
case 'metric':
|
|
192
83
|
if (!CmpStr.metric.has(test))
|
|
193
84
|
throw new Error(
|
|
@@ -195,7 +86,6 @@ class CmpStr {
|
|
|
195
86
|
`use CmpStr.metric.list() for available metrics`
|
|
196
87
|
);
|
|
197
88
|
break;
|
|
198
|
-
// Check if the phonetic algorithm exists
|
|
199
89
|
case 'phonetic':
|
|
200
90
|
if (!CmpStr.phonetic.has(test))
|
|
201
91
|
throw new Error(
|
|
@@ -203,88 +93,37 @@ class CmpStr {
|
|
|
203
93
|
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
204
94
|
);
|
|
205
95
|
break;
|
|
206
|
-
// Throw an error for unknown conditions
|
|
207
96
|
default:
|
|
208
97
|
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
209
98
|
}
|
|
210
99
|
}
|
|
211
|
-
/**
|
|
212
|
-
* Assert multiple conditions.
|
|
213
|
-
*
|
|
214
|
-
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
215
|
-
*/
|
|
216
100
|
assertMany(...cond) {
|
|
217
101
|
for (const [c, test] of cond) this.assert(c, test);
|
|
218
102
|
}
|
|
219
|
-
/**
|
|
220
|
-
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
221
|
-
* the existing options.
|
|
222
|
-
*
|
|
223
|
-
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
224
|
-
* @returns {CmpStrOptions} - The resolved options
|
|
225
|
-
*/
|
|
226
103
|
resolveOptions(opt) {
|
|
227
104
|
return DeepMerge.merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
228
105
|
}
|
|
229
|
-
/**
|
|
230
|
-
* Normalizes the input string or array using the configured or provided flags.
|
|
231
|
-
*
|
|
232
|
-
* @param {MetricInput} input - The input string or array
|
|
233
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
234
|
-
* @returns {MetricInput} - The normalized input
|
|
235
|
-
*/
|
|
236
106
|
normalize(input, flags) {
|
|
237
107
|
return Normalizer.Normalizer.normalize(
|
|
238
108
|
input,
|
|
239
109
|
flags ?? this.options.flags ?? ''
|
|
240
110
|
);
|
|
241
111
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Applies all active filters to the input string or array.
|
|
244
|
-
*
|
|
245
|
-
* @param {MetricInput} input - The input string or array
|
|
246
|
-
* @param {string} [hook='input'] - The filter hook
|
|
247
|
-
* @returns {MetricInput} - The filtered string(s)
|
|
248
|
-
*/
|
|
249
112
|
filter(input, hook) {
|
|
250
113
|
return Filter.Filter.apply(hook, input);
|
|
251
114
|
}
|
|
252
|
-
/**
|
|
253
|
-
* Prepares the input by normalizing and filtering.
|
|
254
|
-
*
|
|
255
|
-
* @param {MetricInput} [input] - The input string or array
|
|
256
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
257
|
-
* @returns {MetricInput} - The prepared input
|
|
258
|
-
*/
|
|
259
115
|
prepare(input, opt) {
|
|
260
116
|
const { flags, processors } = opt ?? this.options;
|
|
261
|
-
// Normalize the input using flags (i.e., 'itw')
|
|
262
117
|
if (flags?.length) input = this.normalize(input, flags);
|
|
263
|
-
// Filter the input using hooked up filters
|
|
264
118
|
input = this.filter(input, 'input');
|
|
265
|
-
// Apply phonetic processors if configured
|
|
266
119
|
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
267
120
|
return input;
|
|
268
121
|
}
|
|
269
|
-
/**
|
|
270
|
-
* Post-process the results of the metric computation.
|
|
271
|
-
*
|
|
272
|
-
* @param {MetricResult<R>} result - The metric result
|
|
273
|
-
* @returns {MetricResult<R>} - The post-processed results
|
|
274
|
-
*/
|
|
275
122
|
postProcess(result, opt) {
|
|
276
|
-
// Remove "zero similarity" from batch results if configured
|
|
277
123
|
if (opt?.removeZero && Array.isArray(result))
|
|
278
124
|
result = result.filter((r) => r.res > 0);
|
|
279
125
|
return result;
|
|
280
126
|
}
|
|
281
|
-
/**
|
|
282
|
-
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
283
|
-
*
|
|
284
|
-
* @param {MetricInput} input - The input string or array
|
|
285
|
-
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
286
|
-
* @returns {MetricInput} - The phonetic index for the given input
|
|
287
|
-
*/
|
|
288
127
|
index(input, { algo, opt }) {
|
|
289
128
|
this.assert('phonetic', algo);
|
|
290
129
|
const phonetic = Registry.factory.phonetic(algo, opt);
|
|
@@ -293,26 +132,14 @@ class CmpStr {
|
|
|
293
132
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
294
133
|
: phonetic.getIndex(input).join(delimiter);
|
|
295
134
|
}
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
*
|
|
300
|
-
* @template T - The type of the metric result
|
|
301
|
-
* @param {MetricInput} a - The first input string or array
|
|
302
|
-
* @param {MetricInput} b - The second input string or array
|
|
303
|
-
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
304
|
-
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
305
|
-
* @param {boolean} [raw=false] - Whether to return raw results
|
|
306
|
-
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
307
|
-
* @returns {T} - The computed metric result
|
|
308
|
-
*/
|
|
135
|
+
structured(data, key) {
|
|
136
|
+
return StructuredData.StructuredData.create(data, key);
|
|
137
|
+
}
|
|
309
138
|
compute(a, b, opt, mode, raw, skip) {
|
|
310
139
|
const resolved = this.resolveOptions(opt);
|
|
311
140
|
this.assert('metric', resolved.metric);
|
|
312
|
-
// Prepare the input
|
|
313
141
|
const A = skip ? a : this.prepare(a, resolved);
|
|
314
142
|
const B = skip ? b : this.prepare(b, resolved);
|
|
315
|
-
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
316
143
|
if (
|
|
317
144
|
resolved.safeEmpty &&
|
|
318
145
|
((Array.isArray(A) && A.length === 0) ||
|
|
@@ -322,25 +149,12 @@ class CmpStr {
|
|
|
322
149
|
) {
|
|
323
150
|
return [];
|
|
324
151
|
}
|
|
325
|
-
// Get the metric class
|
|
326
152
|
const metric = Registry.factory.metric(resolved.metric, A, B, resolved.opt);
|
|
327
|
-
// Pass the original inputs to the metric
|
|
328
153
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
329
|
-
// Compute the metric result
|
|
330
154
|
metric.run(mode);
|
|
331
|
-
// Post-process the results
|
|
332
155
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
333
|
-
// Resolve and return the result based on the raw flag
|
|
334
156
|
return this.output(result, raw ?? resolved.raw);
|
|
335
157
|
}
|
|
336
|
-
/**
|
|
337
|
-
* Resolves the result format (raw or formatted).
|
|
338
|
-
*
|
|
339
|
-
* @template T - The type of the metric result
|
|
340
|
-
* @param {MetricResult<R>} result - The metric result
|
|
341
|
-
* @param {boolean} [raw] - Whether to return raw results
|
|
342
|
-
* @returns {T} - The resolved result
|
|
343
|
-
*/
|
|
344
158
|
output(result, raw) {
|
|
345
159
|
return (raw ?? this.options.raw)
|
|
346
160
|
? result
|
|
@@ -348,213 +162,69 @@ class CmpStr {
|
|
|
348
162
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
349
163
|
: { source: result.a, target: result.b, match: result.res };
|
|
350
164
|
}
|
|
351
|
-
/**
|
|
352
|
-
* ---------------------------------------------------------------------------------
|
|
353
|
-
* Managing methods for CmpStr
|
|
354
|
-
* ---------------------------------------------------------------------------------
|
|
355
|
-
*
|
|
356
|
-
* These methods provides an interface to set and get properties of the CmpStr
|
|
357
|
-
* instance, such as options, metric, phonetic algorithm, and more.
|
|
358
|
-
*/
|
|
359
|
-
/**
|
|
360
|
-
* Creates a shallow clone of the current instance.
|
|
361
|
-
*
|
|
362
|
-
* @returns {CmpStr<R>} - The cloned instance
|
|
363
|
-
*/
|
|
364
165
|
clone() {
|
|
365
166
|
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
366
167
|
}
|
|
367
|
-
/**
|
|
368
|
-
* Resets the instance, clearing all data and options.
|
|
369
|
-
*
|
|
370
|
-
* @returns {this}
|
|
371
|
-
*/
|
|
372
168
|
reset() {
|
|
373
169
|
for (const k in this.options) delete this.options[k];
|
|
374
170
|
return this;
|
|
375
171
|
}
|
|
376
|
-
/**
|
|
377
|
-
* Sets / replaces the full options object.
|
|
378
|
-
*
|
|
379
|
-
* @param {CmpStrOptions} opt - The options
|
|
380
|
-
* @returns {this}
|
|
381
|
-
*/
|
|
382
172
|
setOptions(opt) {
|
|
383
173
|
this.options = opt;
|
|
384
174
|
return this;
|
|
385
175
|
}
|
|
386
|
-
/**
|
|
387
|
-
* Deep merges and sets new options.
|
|
388
|
-
*
|
|
389
|
-
* @param {CmpStrOptions} opt - The options to merge
|
|
390
|
-
* @returns {this}
|
|
391
|
-
*/
|
|
392
176
|
mergeOptions(opt) {
|
|
393
177
|
DeepMerge.merge(this.options, opt);
|
|
394
178
|
return this;
|
|
395
179
|
}
|
|
396
|
-
/**
|
|
397
|
-
* Sets the serialized options from a JSON string.
|
|
398
|
-
*
|
|
399
|
-
* @param {string} opt - The serialized options
|
|
400
|
-
* @returns {this}
|
|
401
|
-
*/
|
|
402
180
|
setSerializedOptions(opt) {
|
|
403
181
|
this.options = JSON.parse(opt);
|
|
404
182
|
return this;
|
|
405
183
|
}
|
|
406
|
-
/**
|
|
407
|
-
* Sets a specific option at the given path.
|
|
408
|
-
*
|
|
409
|
-
* @param {string} path - The path to the option
|
|
410
|
-
* @param {any} value - The value to set
|
|
411
|
-
* @returns {this}
|
|
412
|
-
*/
|
|
413
184
|
setOption(path, value) {
|
|
414
185
|
DeepMerge.set(this.options, path, value);
|
|
415
186
|
return this;
|
|
416
187
|
}
|
|
417
|
-
/**
|
|
418
|
-
* Removes an option at the given path.
|
|
419
|
-
*
|
|
420
|
-
* @param {string} path - The path to the option
|
|
421
|
-
* @returns {this}
|
|
422
|
-
*/
|
|
423
188
|
rmvOption(path) {
|
|
424
189
|
DeepMerge.rmv(this.options, path);
|
|
425
190
|
return this;
|
|
426
191
|
}
|
|
427
|
-
/**
|
|
428
|
-
* Enable or disable raw output.
|
|
429
|
-
*
|
|
430
|
-
* @param {boolean} enable - Whether to enable or disable raw output
|
|
431
|
-
* @returns {this}
|
|
432
|
-
*/
|
|
433
192
|
setRaw(enable) {
|
|
434
193
|
return this.setOption('raw', enable);
|
|
435
194
|
}
|
|
436
|
-
/**
|
|
437
|
-
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
438
|
-
*
|
|
439
|
-
* @param {string} name - The metric name
|
|
440
|
-
* @returns {this}
|
|
441
|
-
*/
|
|
442
195
|
setMetric(name) {
|
|
443
196
|
return this.setOption('metric', name);
|
|
444
197
|
}
|
|
445
|
-
/**
|
|
446
|
-
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
447
|
-
*
|
|
448
|
-
* @param {NormalizeFlags} flags - The normalization flags
|
|
449
|
-
* @returns {this}
|
|
450
|
-
*/
|
|
451
198
|
setFlags(flags) {
|
|
452
199
|
return this.setOption('flags', flags);
|
|
453
200
|
}
|
|
454
|
-
/**
|
|
455
|
-
* Removes the normalization flags entirely.
|
|
456
|
-
*
|
|
457
|
-
* @return {this}
|
|
458
|
-
*/
|
|
459
201
|
rmvFlags() {
|
|
460
202
|
return this.rmvOption('flags');
|
|
461
203
|
}
|
|
462
|
-
/**
|
|
463
|
-
* Sets the pre-processors to use for preparing the input.
|
|
464
|
-
*
|
|
465
|
-
* @param {CmpStrProcessors} opt - The processors to set
|
|
466
|
-
* @returns {this}
|
|
467
|
-
*/
|
|
468
204
|
setProcessors(opt) {
|
|
469
205
|
return this.setOption('processors', opt);
|
|
470
206
|
}
|
|
471
|
-
/**
|
|
472
|
-
* Removes the processors entirely.
|
|
473
|
-
*
|
|
474
|
-
* @returns {this}
|
|
475
|
-
*/
|
|
476
207
|
rmvProcessors() {
|
|
477
208
|
return this.rmvOption('processors');
|
|
478
209
|
}
|
|
479
|
-
/**
|
|
480
|
-
* Returns the current options object.
|
|
481
|
-
*
|
|
482
|
-
* @returns {CmpStrOptions} - The options
|
|
483
|
-
*/
|
|
484
210
|
getOptions() {
|
|
485
211
|
return this.options;
|
|
486
212
|
}
|
|
487
|
-
/**
|
|
488
|
-
* Returns the options as a JSON string.
|
|
489
|
-
*
|
|
490
|
-
* @returns {string} - The serialized options
|
|
491
|
-
*/
|
|
492
213
|
getSerializedOptions() {
|
|
493
214
|
return JSON.stringify(this.options);
|
|
494
215
|
}
|
|
495
|
-
/**
|
|
496
|
-
* Returns a specific option value by path.
|
|
497
|
-
*
|
|
498
|
-
* @param {string} path - The path to the option
|
|
499
|
-
* @returns {any} - The option value
|
|
500
|
-
*/
|
|
501
216
|
getOption(path) {
|
|
502
217
|
return DeepMerge.get(this.options, path);
|
|
503
218
|
}
|
|
504
|
-
/**
|
|
505
|
-
* ---------------------------------------------------------------------------------
|
|
506
|
-
* Public core methods for string comparison
|
|
507
|
-
* ---------------------------------------------------------------------------------
|
|
508
|
-
*
|
|
509
|
-
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
510
|
-
* string comparison, phonetic indexing, filtering, and text search.
|
|
511
|
-
*/
|
|
512
|
-
/**
|
|
513
|
-
* Performs a single metric comparison between the source and target.
|
|
514
|
-
*
|
|
515
|
-
* @template T - The type of the metric result
|
|
516
|
-
* @param {string} a - The source string
|
|
517
|
-
* @param {string} b - The target string
|
|
518
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
519
|
-
* @returns {T} - The metric result
|
|
520
|
-
*/
|
|
521
219
|
test(a, b, opt) {
|
|
522
220
|
return this.compute(a, b, opt, 'single');
|
|
523
221
|
}
|
|
524
|
-
/**
|
|
525
|
-
* Performs a single metric comparison and returns only the numeric score.
|
|
526
|
-
*
|
|
527
|
-
* @param {string} a - The source string
|
|
528
|
-
* @param {string} b - The target string
|
|
529
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
530
|
-
* @returns {number} - The similarity score (0..1)
|
|
531
|
-
*/
|
|
532
222
|
compare(a, b, opt) {
|
|
533
223
|
return this.compute(a, b, opt, 'single', true).res;
|
|
534
224
|
}
|
|
535
|
-
/**
|
|
536
|
-
* Performs a batch metric comparison between source and target strings
|
|
537
|
-
* or array of strings.
|
|
538
|
-
*
|
|
539
|
-
* @template T - The type of the metric result
|
|
540
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
541
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
542
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
543
|
-
* @returns {T} - The batch metric results
|
|
544
|
-
*/
|
|
545
225
|
batchTest(a, b, opt) {
|
|
546
226
|
return this.compute(a, b, opt, 'batch');
|
|
547
227
|
}
|
|
548
|
-
/**
|
|
549
|
-
* Performs a batch metric comparison and returns results sorted by score.
|
|
550
|
-
*
|
|
551
|
-
* @template T - The type of the metric result
|
|
552
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
553
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
554
|
-
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
555
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
556
|
-
* @returns {T} - The sorted batch results
|
|
557
|
-
*/
|
|
558
228
|
batchSorted(a, b, dir = 'desc', opt) {
|
|
559
229
|
return this.output(
|
|
560
230
|
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
@@ -563,32 +233,9 @@ class CmpStr {
|
|
|
563
233
|
opt?.raw ?? this.options.raw
|
|
564
234
|
);
|
|
565
235
|
}
|
|
566
|
-
/**
|
|
567
|
-
* Performs a pairwise metric comparison between source and target strings
|
|
568
|
-
* or array of strings.
|
|
569
|
-
*
|
|
570
|
-
* Input arrays needs of the same length to perform pairwise comparison,
|
|
571
|
-
* otherwise the method will throw an error.
|
|
572
|
-
*
|
|
573
|
-
* @template T - The type of the metric result
|
|
574
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
575
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
576
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
577
|
-
* @returns {T} - The pairwise metric results
|
|
578
|
-
*/
|
|
579
236
|
pairs(a, b, opt) {
|
|
580
237
|
return this.compute(a, b, opt, 'pairwise');
|
|
581
238
|
}
|
|
582
|
-
/**
|
|
583
|
-
* Performs a batch comparison and returns only results above the threshold.
|
|
584
|
-
*
|
|
585
|
-
* @template T - The type of the metric result
|
|
586
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
587
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
588
|
-
* @param {number} threshold - The similarity threshold (0..1)
|
|
589
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
590
|
-
* @returns {T} - The filtered batch results
|
|
591
|
-
*/
|
|
592
239
|
match(a, b, threshold, opt) {
|
|
593
240
|
return this.output(
|
|
594
241
|
this.compute(a, b, opt, 'batch', true)
|
|
@@ -597,56 +244,18 @@ class CmpStr {
|
|
|
597
244
|
opt?.raw ?? this.options.raw
|
|
598
245
|
);
|
|
599
246
|
}
|
|
600
|
-
/**
|
|
601
|
-
* Returns the n closest matches from a batch comparison.
|
|
602
|
-
*
|
|
603
|
-
* @template T - The type of the metric result
|
|
604
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
605
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
606
|
-
* @param {number} [n=1] - Number of closest matches
|
|
607
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
608
|
-
* @returns {T} - The closest matches
|
|
609
|
-
*/
|
|
610
247
|
closest(a, b, n = 1, opt) {
|
|
611
248
|
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
612
249
|
}
|
|
613
|
-
/**
|
|
614
|
-
* Returns the n furthest matches from a batch comparison.
|
|
615
|
-
*
|
|
616
|
-
* @template T - The type of the metric result
|
|
617
|
-
* @param {MetricInput} a - The source string or array of strings
|
|
618
|
-
* @param {MetricInput} b - The target string or array of strings
|
|
619
|
-
* @param {number} [n=1] - Number of furthest matches
|
|
620
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
621
|
-
* @returns {T} - The furthest matches
|
|
622
|
-
*/
|
|
623
250
|
furthest(a, b, n = 1, opt) {
|
|
624
251
|
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
625
252
|
}
|
|
626
|
-
/**
|
|
627
|
-
* Performs a normalized and filtered substring search.
|
|
628
|
-
*
|
|
629
|
-
* @param {string} needle - The search string
|
|
630
|
-
* @param {string[]} haystack - The array to search in
|
|
631
|
-
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
632
|
-
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
633
|
-
* @returns {string[]} - Array of matching entries
|
|
634
|
-
*/
|
|
635
253
|
search(needle, haystack, flags, processors) {
|
|
636
254
|
const resolved = this.resolveOptions({ flags, processors });
|
|
637
|
-
// Prepare the needle and haystack, normalizing and filtering them
|
|
638
255
|
const test = this.prepare(needle, resolved);
|
|
639
256
|
const hstk = this.prepare(haystack, resolved);
|
|
640
|
-
// Filter the haystack based on the normalized test string
|
|
641
257
|
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
642
258
|
}
|
|
643
|
-
/**
|
|
644
|
-
* Computes a similarity matrix for the given input array.
|
|
645
|
-
*
|
|
646
|
-
* @param {string[]} input - The input array
|
|
647
|
-
* @param {CmpStrOptions} [opt] - Optional options
|
|
648
|
-
* @returns {number[][]} - The similarity matrix
|
|
649
|
-
*/
|
|
650
259
|
matrix(input, opt) {
|
|
651
260
|
input = this.prepare(input, this.resolveOptions(opt));
|
|
652
261
|
return input.map((a) =>
|
|
@@ -655,19 +264,46 @@ class CmpStr {
|
|
|
655
264
|
)
|
|
656
265
|
);
|
|
657
266
|
}
|
|
658
|
-
/**
|
|
659
|
-
* Computes the phonetic index for a string using the configured
|
|
660
|
-
* or given algorithm.
|
|
661
|
-
*
|
|
662
|
-
* @param {string} [input] - The input string
|
|
663
|
-
* @param {string} [algo] - The phonetic algorithm to use
|
|
664
|
-
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
665
|
-
* @returns {string} - The phonetic index as a string
|
|
666
|
-
*/
|
|
667
267
|
phoneticIndex(input, algo, opt) {
|
|
668
268
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
669
269
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|
|
670
270
|
}
|
|
271
|
+
structuredLookup(query, data, key, opt) {
|
|
272
|
+
return this.structured(data, key).lookup(
|
|
273
|
+
(q, items, options) => this.batchTest(q, items, options),
|
|
274
|
+
query,
|
|
275
|
+
opt
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
structuredMatch(query, data, key, threshold, opt) {
|
|
279
|
+
return this.structured(data, key).lookup(
|
|
280
|
+
(q, items, options) => this.match(q, items, threshold, options),
|
|
281
|
+
query,
|
|
282
|
+
{ ...opt, sort: 'desc' }
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
structuredClosest(query, data, key, n = 1, opt) {
|
|
286
|
+
return this.structured(data, key).lookup(
|
|
287
|
+
(q, items, options) => this.closest(q, items, n, options),
|
|
288
|
+
query,
|
|
289
|
+
{ ...opt, sort: 'desc' }
|
|
290
|
+
);
|
|
291
|
+
}
|
|
292
|
+
structuredFurthest(query, data, key, n = 1, opt) {
|
|
293
|
+
return this.structured(data, key).lookup(
|
|
294
|
+
(q, items, options) => this.furthest(q, items, n, options),
|
|
295
|
+
query,
|
|
296
|
+
{ ...opt, sort: 'asc' }
|
|
297
|
+
);
|
|
298
|
+
}
|
|
299
|
+
structuredPairs(data, key, other, otherKey, opt) {
|
|
300
|
+
return this.structured(data, key).lookupPairs(
|
|
301
|
+
(items, otherItems, options) => this.pairs(items, otherItems, options),
|
|
302
|
+
other,
|
|
303
|
+
otherKey,
|
|
304
|
+
opt
|
|
305
|
+
);
|
|
306
|
+
}
|
|
671
307
|
}
|
|
672
308
|
|
|
673
309
|
exports.CmpStr = CmpStr;
|