cmpstr 2.0.3 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +75 -503
- package/dist/CmpStr.esm.js +4863 -0
- package/dist/CmpStr.esm.js.map +1 -0
- package/dist/CmpStr.esm.min.js +8 -0
- package/dist/CmpStr.esm.min.js.map +1 -0
- package/dist/CmpStr.umd.js +4875 -0
- package/dist/CmpStr.umd.js.map +1 -0
- package/dist/CmpStr.umd.min.js +8 -0
- package/dist/CmpStr.umd.min.js.map +1 -0
- package/dist/cjs/CmpStr.js +663 -0
- package/dist/cjs/CmpStr.js.map +1 -0
- package/dist/cjs/CmpStrAsync.js +336 -0
- package/dist/cjs/CmpStrAsync.js.map +1 -0
- package/dist/cjs/index.js +15 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/metric/Cosine.js +101 -0
- package/dist/cjs/metric/Cosine.js.map +1 -0
- package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
- package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/cjs/metric/DiceSorensen.js +91 -0
- package/dist/cjs/metric/DiceSorensen.js.map +1 -0
- package/dist/cjs/metric/Hamming.js +82 -0
- package/dist/cjs/metric/Hamming.js.map +1 -0
- package/dist/cjs/metric/Jaccard.js +76 -0
- package/dist/cjs/metric/Jaccard.js.map +1 -0
- package/dist/cjs/metric/JaroWinkler.js +114 -0
- package/dist/cjs/metric/JaroWinkler.js.map +1 -0
- package/dist/cjs/metric/LCS.js +89 -0
- package/dist/cjs/metric/LCS.js.map +1 -0
- package/dist/cjs/metric/Levenshtein.js +94 -0
- package/dist/cjs/metric/Levenshtein.js.map +1 -0
- package/dist/cjs/metric/Metric.js +445 -0
- package/dist/cjs/metric/Metric.js.map +1 -0
- package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
- package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/cjs/metric/SmithWaterman.js +98 -0
- package/dist/cjs/metric/SmithWaterman.js.map +1 -0
- package/dist/cjs/metric/qGram.js +91 -0
- package/dist/cjs/metric/qGram.js.map +1 -0
- package/dist/cjs/phonetic/Cologne.js +112 -0
- package/dist/cjs/phonetic/Cologne.js.map +1 -0
- package/dist/cjs/phonetic/Metaphone.js +172 -0
- package/dist/cjs/phonetic/Metaphone.js.map +1 -0
- package/dist/cjs/phonetic/Phonetic.js +413 -0
- package/dist/cjs/phonetic/Phonetic.js.map +1 -0
- package/dist/cjs/phonetic/Soundex.js +135 -0
- package/dist/cjs/phonetic/Soundex.js.map +1 -0
- package/dist/cjs/utils/DeepMerge.js +144 -0
- package/dist/cjs/utils/DeepMerge.js.map +1 -0
- package/dist/cjs/utils/DiffChecker.js +500 -0
- package/dist/cjs/utils/DiffChecker.js.map +1 -0
- package/dist/cjs/utils/Filter.js +189 -0
- package/dist/cjs/utils/Filter.js.map +1 -0
- package/dist/cjs/utils/HashTable.js +175 -0
- package/dist/cjs/utils/HashTable.js.map +1 -0
- package/dist/cjs/utils/Normalizer.js +144 -0
- package/dist/cjs/utils/Normalizer.js.map +1 -0
- package/dist/cjs/utils/Pool.js +196 -0
- package/dist/cjs/utils/Pool.js.map +1 -0
- package/dist/cjs/utils/Profiler.js +229 -0
- package/dist/cjs/utils/Profiler.js.map +1 -0
- package/dist/cjs/utils/Registry.js +148 -0
- package/dist/cjs/utils/Registry.js.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.js +358 -0
- package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
- package/dist/esm/CmpStr.js +662 -0
- package/dist/esm/CmpStr.js.map +1 -0
- package/dist/esm/CmpStrAsync.js +331 -0
- package/dist/esm/CmpStrAsync.js.map +1 -0
- package/dist/esm/index.js +7 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/metric/Cosine.js +99 -0
- package/dist/esm/metric/Cosine.js.map +1 -0
- package/dist/esm/metric/DamerauLevenshtein.js +108 -0
- package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/esm/metric/DiceSorensen.js +89 -0
- package/dist/esm/metric/DiceSorensen.js.map +1 -0
- package/dist/esm/metric/Hamming.js +77 -0
- package/dist/esm/metric/Hamming.js.map +1 -0
- package/dist/esm/metric/Jaccard.js +74 -0
- package/dist/esm/metric/Jaccard.js.map +1 -0
- package/dist/esm/metric/JaroWinkler.js +112 -0
- package/dist/esm/metric/JaroWinkler.js.map +1 -0
- package/dist/esm/metric/LCS.js +87 -0
- package/dist/esm/metric/LCS.js.map +1 -0
- package/dist/esm/metric/Levenshtein.js +92 -0
- package/dist/esm/metric/Levenshtein.js.map +1 -0
- package/dist/esm/metric/Metric.js +442 -0
- package/dist/esm/metric/Metric.js.map +1 -0
- package/dist/esm/metric/NeedlemanWunsch.js +93 -0
- package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/esm/metric/SmithWaterman.js +96 -0
- package/dist/esm/metric/SmithWaterman.js.map +1 -0
- package/dist/esm/metric/qGram.js +89 -0
- package/dist/esm/metric/qGram.js.map +1 -0
- package/dist/esm/phonetic/Cologne.js +114 -0
- package/dist/esm/phonetic/Cologne.js.map +1 -0
- package/dist/esm/phonetic/Metaphone.js +174 -0
- package/dist/esm/phonetic/Metaphone.js.map +1 -0
- package/dist/esm/phonetic/Phonetic.js +409 -0
- package/dist/esm/phonetic/Phonetic.js.map +1 -0
- package/dist/esm/phonetic/Soundex.js +137 -0
- package/dist/esm/phonetic/Soundex.js.map +1 -0
- package/dist/esm/utils/DeepMerge.js +139 -0
- package/dist/esm/utils/DeepMerge.js.map +1 -0
- package/dist/esm/utils/DiffChecker.js +498 -0
- package/dist/esm/utils/DiffChecker.js.map +1 -0
- package/dist/esm/utils/Filter.js +187 -0
- package/dist/esm/utils/Filter.js.map +1 -0
- package/dist/esm/utils/HashTable.js +173 -0
- package/dist/esm/utils/HashTable.js.map +1 -0
- package/dist/esm/utils/Normalizer.js +142 -0
- package/dist/esm/utils/Normalizer.js.map +1 -0
- package/dist/esm/utils/Pool.js +194 -0
- package/dist/esm/utils/Pool.js.map +1 -0
- package/dist/esm/utils/Profiler.js +227 -0
- package/dist/esm/utils/Profiler.js.map +1 -0
- package/dist/esm/utils/Registry.js +142 -0
- package/dist/esm/utils/Registry.js.map +1 -0
- package/dist/esm/utils/TextAnalyzer.js +356 -0
- package/dist/esm/utils/TextAnalyzer.js.map +1 -0
- package/dist/types/CmpStr.d.ts +472 -0
- package/dist/types/CmpStrAsync.d.ts +233 -0
- package/dist/types/index.d.ts +51 -0
- package/dist/types/metric/Cosine.d.ts +57 -0
- package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
- package/dist/types/metric/DiceSorensen.d.ts +57 -0
- package/dist/types/metric/Hamming.d.ts +49 -0
- package/dist/types/metric/Jaccard.d.ts +48 -0
- package/dist/types/metric/JaroWinkler.d.ts +50 -0
- package/dist/types/metric/LCS.d.ts +50 -0
- package/dist/types/metric/Levenshtein.d.ts +50 -0
- package/dist/types/metric/Metric.d.ts +261 -0
- package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
- package/dist/types/metric/SmithWaterman.d.ts +48 -0
- package/dist/types/metric/index.d.ts +41 -0
- package/dist/types/metric/qGram.d.ts +56 -0
- package/dist/types/phonetic/Cologne.d.ts +46 -0
- package/dist/types/phonetic/Metaphone.d.ts +50 -0
- package/dist/types/phonetic/Phonetic.d.ts +189 -0
- package/dist/types/phonetic/Soundex.d.ts +49 -0
- package/dist/types/phonetic/index.d.ts +30 -0
- package/dist/types/utils/DeepMerge.d.ts +70 -0
- package/dist/types/utils/DiffChecker.d.ts +137 -0
- package/dist/types/utils/Filter.d.ts +97 -0
- package/dist/types/utils/HashTable.d.ts +86 -0
- package/dist/types/utils/Normalizer.d.ts +76 -0
- package/dist/types/utils/Pool.d.ts +63 -0
- package/dist/types/utils/Profiler.d.ts +129 -0
- package/dist/types/utils/Registry.d.ts +57 -0
- package/dist/types/utils/TextAnalyzer.d.ts +199 -0
- package/dist/types/utils/Types.d.ts +313 -0
- package/package.json +62 -49
- package/src/CmpStr.d.ts +0 -70
- package/src/CmpStr.js +0 -917
- package/src/CmpStrAsync.d.ts +0 -19
- package/src/CmpStrAsync.js +0 -197
- package/src/algorithms/cosine.js +0 -86
- package/src/algorithms/damerau.js +0 -78
- package/src/algorithms/dice.js +0 -65
- package/src/algorithms/hamming.js +0 -44
- package/src/algorithms/jaccard.js +0 -34
- package/src/algorithms/jaroWinkler.js +0 -106
- package/src/algorithms/lcs.js +0 -58
- package/src/algorithms/levenshtein.js +0 -70
- package/src/algorithms/needlemanWunsch.js +0 -72
- package/src/algorithms/qGram.js +0 -63
- package/src/algorithms/smithWaterman.js +0 -78
- package/src/algorithms/soundex.js +0 -152
- package/src/index.d.ts +0 -3
- package/src/index.js +0 -47
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
// CmpStr v3.0.0 dev-1a82e20-250612 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
var DeepMerge = require('./utils/DeepMerge.js');
|
|
5
|
+
var Profiler = require('./utils/Profiler.js');
|
|
6
|
+
var TextAnalyzer = require('./utils/TextAnalyzer.js');
|
|
7
|
+
var DiffChecker = require('./utils/DiffChecker.js');
|
|
8
|
+
var Normalizer = require('./utils/Normalizer.js');
|
|
9
|
+
var Filter = require('./utils/Filter.js');
|
|
10
|
+
var Registry = require('./utils/Registry.js');
|
|
11
|
+
require('./metric/Cosine.js');
|
|
12
|
+
require('./metric/DamerauLevenshtein.js');
|
|
13
|
+
require('./metric/DiceSorensen.js');
|
|
14
|
+
require('./metric/Hamming.js');
|
|
15
|
+
require('./metric/Jaccard.js');
|
|
16
|
+
require('./metric/JaroWinkler.js');
|
|
17
|
+
require('./metric/LCS.js');
|
|
18
|
+
require('./metric/Levenshtein.js');
|
|
19
|
+
require('./metric/NeedlemanWunsch.js');
|
|
20
|
+
require('./metric/qGram.js');
|
|
21
|
+
require('./metric/SmithWaterman.js');
|
|
22
|
+
var Metric = require('./metric/Metric.js');
|
|
23
|
+
require('./phonetic/Cologne.js');
|
|
24
|
+
require('./phonetic/Metaphone.js');
|
|
25
|
+
require('./phonetic/Soundex.js');
|
|
26
|
+
var Phonetic = require('./phonetic/Phonetic.js');
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* CmpStr Main API
|
|
30
|
+
* src/CmpStr.ts
|
|
31
|
+
*
|
|
32
|
+
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
|
|
33
|
+
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
|
|
34
|
+
* and text analysis. It unifies all core features of the CmpStr package and exposes a
|
|
35
|
+
* consistent, user-friendly API for both single and batch operations.
|
|
36
|
+
*
|
|
37
|
+
* Features:
|
|
38
|
+
* - Centralized management of metrics, phonetic algorithms, and filters
|
|
39
|
+
* - Flexible normalization and filtering pipeline for all inputs
|
|
40
|
+
* - Batch, pairwise, and single string comparison with detailed results
|
|
41
|
+
* - Phonetic indexing and phonetic-aware search and comparison
|
|
42
|
+
* - Text analysis and unified diff utilities
|
|
43
|
+
* - Full TypeScript type safety and extensibility
|
|
44
|
+
*
|
|
45
|
+
* @module CmpStr
|
|
46
|
+
* @author Paul Köhler (komed3)
|
|
47
|
+
* @license MIT
|
|
48
|
+
*/
|
|
49
|
+
// Import the Profiler instance for global profiling
|
|
50
|
+
const profiler = Profiler.Profiler.getInstance();
|
|
51
|
+
/**
|
|
52
|
+
* The main CmpStr class that provides a unified interface for string comparison,
|
|
53
|
+
* phonetic indexing, filtering, and text analysis.
|
|
54
|
+
*
|
|
55
|
+
* @template R - The type of the metric result, defaults to MetricRaw
|
|
56
|
+
*/
|
|
57
|
+
class CmpStr {
|
|
58
|
+
/**
|
|
59
|
+
* --------------------------------------------------------------------------------
|
|
60
|
+
* Static methods and properties for global access to CmpStr features
|
|
61
|
+
* --------------------------------------------------------------------------------
|
|
62
|
+
*
|
|
63
|
+
* These static methods provide a convenient way to access the core features of
|
|
64
|
+
* the CmpStr package without needing to instantiate a CmpStr object.
|
|
65
|
+
*/
|
|
66
|
+
/**
|
|
67
|
+
* Adds, removes, pauses, resumes, lists, or clears global filters.
|
|
68
|
+
*
|
|
69
|
+
* @see Filter
|
|
70
|
+
*/
|
|
71
|
+
static filter = {
|
|
72
|
+
add: Filter.Filter.add,
|
|
73
|
+
remove: Filter.Filter.remove,
|
|
74
|
+
pause: Filter.Filter.pause,
|
|
75
|
+
resume: Filter.Filter.resume,
|
|
76
|
+
list: Filter.Filter.list,
|
|
77
|
+
clear: Filter.Filter.clear
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Adds, removes, checks, or lists available metrics.
|
|
81
|
+
*
|
|
82
|
+
* @see MetricRegistry
|
|
83
|
+
*/
|
|
84
|
+
static metric = {
|
|
85
|
+
add: Metric.MetricRegistry.add,
|
|
86
|
+
remove: Metric.MetricRegistry.remove,
|
|
87
|
+
has: Metric.MetricRegistry.has,
|
|
88
|
+
list: Metric.MetricRegistry.list
|
|
89
|
+
};
|
|
90
|
+
/**
|
|
91
|
+
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
|
|
92
|
+
*
|
|
93
|
+
* @see PhoneticRegistry
|
|
94
|
+
*/
|
|
95
|
+
static phonetic = {
|
|
96
|
+
add: Phonetic.PhoneticRegistry.add,
|
|
97
|
+
remove: Phonetic.PhoneticRegistry.remove,
|
|
98
|
+
has: Phonetic.PhoneticRegistry.has,
|
|
99
|
+
list: Phonetic.PhoneticRegistry.list,
|
|
100
|
+
map: {
|
|
101
|
+
add: Phonetic.PhoneticMappingRegistry.add,
|
|
102
|
+
remove: Phonetic.PhoneticMappingRegistry.remove,
|
|
103
|
+
has: Phonetic.PhoneticMappingRegistry.has,
|
|
104
|
+
list: Phonetic.PhoneticMappingRegistry.list
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
/**
|
|
108
|
+
* Provides access to the global profiler services.
|
|
109
|
+
*
|
|
110
|
+
* @see Profiler
|
|
111
|
+
*/
|
|
112
|
+
static profiler = profiler.services;
|
|
113
|
+
/**
|
|
114
|
+
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
115
|
+
*/
|
|
116
|
+
static clearCache = {
|
|
117
|
+
normalizer: Normalizer.Normalizer.clear,
|
|
118
|
+
metric: Metric.Metric.clear,
|
|
119
|
+
phonetic: Phonetic.Phonetic.clear
|
|
120
|
+
};
|
|
121
|
+
/**
|
|
122
|
+
* Returns a TextAnalyzer instance for the given input string.
|
|
123
|
+
*
|
|
124
|
+
* @param {string} [input] - The input string
|
|
125
|
+
* @returns {TextAnalyzer} - The text analyzer
|
|
126
|
+
*/
|
|
127
|
+
static analyze(input) {
|
|
128
|
+
return new TextAnalyzer.TextAnalyzer(input);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Returns a DiffChecker instance for computing the unified diff between two texts.
|
|
132
|
+
*
|
|
133
|
+
* @param {string} a - The first (original) text
|
|
134
|
+
* @param {string} b - The second (modified) text
|
|
135
|
+
* @param {DiffOptions} [opt] - Optional diff configuration
|
|
136
|
+
* @returns {DiffChecker} - The diff checker instance
|
|
137
|
+
*/
|
|
138
|
+
static diff(a, b, opt) {
|
|
139
|
+
return new DiffChecker.DiffChecker(a, b, opt);
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* --------------------------------------------------------------------------------
|
|
143
|
+
* Instanciate the CmpStr class
|
|
144
|
+
* --------------------------------------------------------------------------------
|
|
145
|
+
*
|
|
146
|
+
* Methods to create a new CmpStr instance with the given options.
|
|
147
|
+
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
148
|
+
*/
|
|
149
|
+
/**
|
|
150
|
+
* Creates a new CmpStr instance with the given options.
|
|
151
|
+
*
|
|
152
|
+
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
153
|
+
* @returns {CmpStr<R>} - A new CmpStr instance
|
|
154
|
+
*/
|
|
155
|
+
static create(opt) {
|
|
156
|
+
return new CmpStr(opt);
|
|
157
|
+
}
|
|
158
|
+
// The options object that holds the configuration for this CmpStr instance
|
|
159
|
+
options = Object.create(null);
|
|
160
|
+
/**
|
|
161
|
+
* Creates a new CmpStr instance with the given options.
|
|
162
|
+
* The constructor is protected to enforce the use of the static `create` method.
|
|
163
|
+
*
|
|
164
|
+
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
165
|
+
*/
|
|
166
|
+
constructor(opt) {
|
|
167
|
+
if (opt)
|
|
168
|
+
typeof opt === 'string'
|
|
169
|
+
? this.setSerializedOptions(opt)
|
|
170
|
+
: this.setOptions(opt);
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* ---------------------------------------------------------------------------------
|
|
174
|
+
* Protected utility methods for internal use
|
|
175
|
+
* ---------------------------------------------------------------------------------
|
|
176
|
+
*
|
|
177
|
+
* These methods provide utility functions for converting inputs, merging options,
|
|
178
|
+
* normalizing inputs, filtering, and preparing inputs for comparison.
|
|
179
|
+
*/
|
|
180
|
+
/**
|
|
181
|
+
* Assert a condition and throws if the condition is not met.
|
|
182
|
+
*
|
|
183
|
+
* @param {string} cond - The condition to met
|
|
184
|
+
* @param {any} [test] - Value to test for
|
|
185
|
+
* @throws {Error} If the condition is not met
|
|
186
|
+
*/
|
|
187
|
+
assert(cond, test) {
|
|
188
|
+
switch (cond) {
|
|
189
|
+
// Check if the metric exists
|
|
190
|
+
case 'metric':
|
|
191
|
+
if (!CmpStr.metric.has(test))
|
|
192
|
+
throw new Error(
|
|
193
|
+
`CmpStr <metric> must be set, call .setMetric(), ` +
|
|
194
|
+
`use CmpStr.metric.list() for available metrics`
|
|
195
|
+
);
|
|
196
|
+
break;
|
|
197
|
+
// Check if the phonetic algorithm exists
|
|
198
|
+
case 'phonetic':
|
|
199
|
+
if (!CmpStr.phonetic.has(test))
|
|
200
|
+
throw new Error(
|
|
201
|
+
`CmpStr <phonetic> must be set, call .setPhonetic(), ` +
|
|
202
|
+
`use CmpStr.phonetic.list() for available phonetic algorithms`
|
|
203
|
+
);
|
|
204
|
+
break;
|
|
205
|
+
// Throw an error for unknown conditions
|
|
206
|
+
default:
|
|
207
|
+
throw new Error(`Cmpstr condition <${cond}> unknown`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Assert multiple conditions.
|
|
212
|
+
*
|
|
213
|
+
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
|
|
214
|
+
*/
|
|
215
|
+
assertMany(...cond) {
|
|
216
|
+
for (const [c, test] of cond) this.assert(c, test);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
220
|
+
* the existing options.
|
|
221
|
+
*
|
|
222
|
+
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
223
|
+
* @returns {CmpStrOptions} - The resolved options
|
|
224
|
+
*/
|
|
225
|
+
resolveOptions(opt) {
|
|
226
|
+
return DeepMerge.merge({ ...(this.options ?? Object.create(null)) }, opt);
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Normalizes the input string or array using the configured or provided flags.
|
|
230
|
+
*
|
|
231
|
+
* @param {MetricInput} input - The input string or array
|
|
232
|
+
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
233
|
+
* @returns {MetricInput} - The normalized input
|
|
234
|
+
*/
|
|
235
|
+
normalize(input, flags) {
|
|
236
|
+
return Normalizer.Normalizer.normalize(
|
|
237
|
+
input,
|
|
238
|
+
flags ?? this.options.flags ?? ''
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Applies all active filters to the input string or array.
|
|
243
|
+
*
|
|
244
|
+
* @param {MetricInput} input - The input string or array
|
|
245
|
+
* @param {string} [hook='input'] - The filter hook
|
|
246
|
+
* @returns {MetricInput} - The filtered string(s)
|
|
247
|
+
*/
|
|
248
|
+
filter(input, hook) {
|
|
249
|
+
return Filter.Filter.apply(hook, input);
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Prepares the input by normalizing and filtering.
|
|
253
|
+
*
|
|
254
|
+
* @param {MetricInput} [input] - The input string or array
|
|
255
|
+
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
256
|
+
* @returns {MetricInput} - The prepared input
|
|
257
|
+
*/
|
|
258
|
+
prepare(input, opt) {
|
|
259
|
+
const { flags, processors } = opt ?? this.options;
|
|
260
|
+
// Normalize the input using flags (i.e., 'itw')
|
|
261
|
+
if (flags?.length) input = this.normalize(input, flags);
|
|
262
|
+
// Filter the input using hooked up filters
|
|
263
|
+
input = this.filter(input, 'input');
|
|
264
|
+
// Apply phonetic processors if configured
|
|
265
|
+
if (processors?.phonetic) input = this.index(input, processors.phonetic);
|
|
266
|
+
return input;
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Post-process the results of the metric computation.
|
|
270
|
+
*
|
|
271
|
+
* @param {MetricResult<R>} result - The metric result
|
|
272
|
+
* @returns {MetricResult<R>} - The post-processed results
|
|
273
|
+
*/
|
|
274
|
+
postProcess(result, opt) {
|
|
275
|
+
// Remove "zero similarity" from batch results if configured
|
|
276
|
+
if (opt?.removeZero && Array.isArray(result))
|
|
277
|
+
result = result.filter((r) => r.res > 0);
|
|
278
|
+
return result;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Computes the phonetic index for the given input using the specified phonetic algorithm.
|
|
282
|
+
*
|
|
283
|
+
* @param {MetricInput} input - The input string or array
|
|
284
|
+
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
285
|
+
* @returns {MetricInput} - The phonetic index for the given input
|
|
286
|
+
*/
|
|
287
|
+
index(input, { algo, opt }) {
|
|
288
|
+
this.assert('phonetic', algo);
|
|
289
|
+
const phonetic = Registry.factory.phonetic(algo, opt);
|
|
290
|
+
const delimiter = opt?.delimiter ?? ' ';
|
|
291
|
+
return Array.isArray(input)
|
|
292
|
+
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
293
|
+
: phonetic.getIndex(input).join(delimiter);
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Computes the metric result for the given inputs, applying normalization and
|
|
297
|
+
* filtering as configured.
|
|
298
|
+
*
|
|
299
|
+
* @template T - The type of the metric result
|
|
300
|
+
* @param {MetricInput} a - The first input string or array
|
|
301
|
+
* @param {MetricInput} b - The second input string or array
|
|
302
|
+
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
303
|
+
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
304
|
+
* @param {boolean} [raw=false] - Whether to return raw results
|
|
305
|
+
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
306
|
+
* @returns {T} - The computed metric result
|
|
307
|
+
*/
|
|
308
|
+
compute(a, b, opt, mode, raw, skip) {
|
|
309
|
+
const resolved = this.resolveOptions(opt);
|
|
310
|
+
this.assert('metric', resolved.metric);
|
|
311
|
+
// Prepare the input
|
|
312
|
+
const A = skip ? a : this.prepare(a, resolved);
|
|
313
|
+
const B = skip ? b : this.prepare(b, resolved);
|
|
314
|
+
// Get the metric class
|
|
315
|
+
const metric = Registry.factory.metric(resolved.metric, A, B, resolved.opt);
|
|
316
|
+
// Pass the original inputs to the metric
|
|
317
|
+
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
318
|
+
// Compute the metric result
|
|
319
|
+
metric.run(mode);
|
|
320
|
+
// Post-process the results
|
|
321
|
+
const result = this.postProcess(metric.getResults(), resolved);
|
|
322
|
+
// Resolve and return the result based on the raw flag
|
|
323
|
+
return this.output(result, raw ?? resolved.raw);
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Resolves the result format (raw or formatted).
|
|
327
|
+
*
|
|
328
|
+
* @template T - The type of the metric result
|
|
329
|
+
* @param {MetricResult<R>} result - The metric result
|
|
330
|
+
* @param {boolean} [raw] - Whether to return raw results
|
|
331
|
+
* @returns {T} - The resolved result
|
|
332
|
+
*/
|
|
333
|
+
output(result, raw) {
|
|
334
|
+
return (raw ?? this.options.raw)
|
|
335
|
+
? result
|
|
336
|
+
: Array.isArray(result)
|
|
337
|
+
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
338
|
+
: { source: result.a, target: result.b, match: result.res };
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* ---------------------------------------------------------------------------------
|
|
342
|
+
* Managing methods for CmpStr
|
|
343
|
+
* ---------------------------------------------------------------------------------
|
|
344
|
+
*
|
|
345
|
+
* These methods provides an interface to set and get properties of the CmpStr
|
|
346
|
+
* instance, such as options, metric, phonetic algorithm, and more.
|
|
347
|
+
*/
|
|
348
|
+
/**
|
|
349
|
+
* Creates a shallow clone of the current instance.
|
|
350
|
+
*
|
|
351
|
+
* @returns {CmpStr<R>} - The cloned instance
|
|
352
|
+
*/
|
|
353
|
+
clone() {
|
|
354
|
+
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Resets the instance, clearing all data and options.
|
|
358
|
+
*
|
|
359
|
+
* @returns {this}
|
|
360
|
+
*/
|
|
361
|
+
reset() {
|
|
362
|
+
for (const k in this.options) delete this.options[k];
|
|
363
|
+
return this;
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Sets / replaces the full options object.
|
|
367
|
+
*
|
|
368
|
+
* @param {CmpStrOptions} opt - The options
|
|
369
|
+
* @returns {this}
|
|
370
|
+
*/
|
|
371
|
+
setOptions(opt) {
|
|
372
|
+
this.options = opt;
|
|
373
|
+
return this;
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Deep merges and sets new options.
|
|
377
|
+
*
|
|
378
|
+
* @param {CmpStrOptions} opt - The options to merge
|
|
379
|
+
* @returns {this}
|
|
380
|
+
*/
|
|
381
|
+
mergeOptions(opt) {
|
|
382
|
+
DeepMerge.merge(this.options, opt);
|
|
383
|
+
return this;
|
|
384
|
+
}
|
|
385
|
+
/**
|
|
386
|
+
* Sets the serialized options from a JSON string.
|
|
387
|
+
*
|
|
388
|
+
* @param {string} opt - The serialized options
|
|
389
|
+
* @returns {this}
|
|
390
|
+
*/
|
|
391
|
+
setSerializedOptions(opt) {
|
|
392
|
+
this.options = JSON.parse(opt);
|
|
393
|
+
return this;
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Sets a specific option at the given path.
|
|
397
|
+
*
|
|
398
|
+
* @param {string} path - The path to the option
|
|
399
|
+
* @param {any} value - The value to set
|
|
400
|
+
* @returns {this}
|
|
401
|
+
*/
|
|
402
|
+
setOption(path, value) {
|
|
403
|
+
DeepMerge.set(this.options, path, value);
|
|
404
|
+
return this;
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Removes an option at the given path.
|
|
408
|
+
*
|
|
409
|
+
* @param {string} path - The path to the option
|
|
410
|
+
* @returns {this}
|
|
411
|
+
*/
|
|
412
|
+
rmvOption(path) {
|
|
413
|
+
DeepMerge.rmv(this.options, path);
|
|
414
|
+
return this;
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Enable or disable raw output.
|
|
418
|
+
*
|
|
419
|
+
* @param {boolean} enable - Whether to enable or disable raw output
|
|
420
|
+
* @returns {this}
|
|
421
|
+
*/
|
|
422
|
+
setRaw(enable) {
|
|
423
|
+
return this.setOption('raw', enable);
|
|
424
|
+
}
|
|
425
|
+
/**
|
|
426
|
+
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
427
|
+
*
|
|
428
|
+
* @param {string} name - The metric name
|
|
429
|
+
* @returns {this}
|
|
430
|
+
*/
|
|
431
|
+
setMetric(name) {
|
|
432
|
+
return this.setOption('metric', name);
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
436
|
+
*
|
|
437
|
+
* @param {NormalizeFlags} flags - The normalization flags
|
|
438
|
+
* @returns {this}
|
|
439
|
+
*/
|
|
440
|
+
setFlags(flags) {
|
|
441
|
+
return this.setOption('flags', flags);
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Removes the normalization flags entirely.
|
|
445
|
+
*
|
|
446
|
+
* @return {this}
|
|
447
|
+
*/
|
|
448
|
+
rmvFlags() {
|
|
449
|
+
return this.rmvOption('flags');
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Sets the pre-processors to use for preparing the input.
|
|
453
|
+
*
|
|
454
|
+
* @param {CmpStrProcessors} opt - The processors to set
|
|
455
|
+
* @returns {this}
|
|
456
|
+
*/
|
|
457
|
+
setProcessors(opt) {
|
|
458
|
+
return this.setOption('processors', opt);
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Removes the processors entirely.
|
|
462
|
+
*
|
|
463
|
+
* @returns {this}
|
|
464
|
+
*/
|
|
465
|
+
rmvProcessors() {
|
|
466
|
+
return this.rmvOption('processors');
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Returns the current options object.
|
|
470
|
+
*
|
|
471
|
+
* @returns {CmpStrOptions} - The options
|
|
472
|
+
*/
|
|
473
|
+
getOptions() {
|
|
474
|
+
return this.options;
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Returns the options as a JSON string.
|
|
478
|
+
*
|
|
479
|
+
* @returns {string} - The serialized options
|
|
480
|
+
*/
|
|
481
|
+
getSerializedOptions() {
|
|
482
|
+
return JSON.stringify(this.options);
|
|
483
|
+
}
|
|
484
|
+
/**
|
|
485
|
+
* Returns a specific option value by path.
|
|
486
|
+
*
|
|
487
|
+
* @param {string} path - The path to the option
|
|
488
|
+
* @returns {any} - The option value
|
|
489
|
+
*/
|
|
490
|
+
getOption(path) {
|
|
491
|
+
return DeepMerge.get(this.options, path);
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* ---------------------------------------------------------------------------------
|
|
495
|
+
* Public core methods for string comparison
|
|
496
|
+
* ---------------------------------------------------------------------------------
|
|
497
|
+
*
|
|
498
|
+
* These methods provide the core functionality of the CmpStr class, allowing for
|
|
499
|
+
* string comparison, phonetic indexing, filtering, and text search.
|
|
500
|
+
*/
|
|
501
|
+
/**
|
|
502
|
+
* Performs a single metric comparison between the source and target.
|
|
503
|
+
*
|
|
504
|
+
* @template T - The type of the metric result
|
|
505
|
+
* @param {string} a - The source string
|
|
506
|
+
* @param {string} b - The target string
|
|
507
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
508
|
+
* @returns {T} - The metric result
|
|
509
|
+
*/
|
|
510
|
+
test(a, b, opt) {
|
|
511
|
+
return this.compute(a, b, opt, 'single');
|
|
512
|
+
}
|
|
513
|
+
/**
|
|
514
|
+
* Performs a single metric comparison and returns only the numeric score.
|
|
515
|
+
*
|
|
516
|
+
* @param {string} a - The source string
|
|
517
|
+
* @param {string} b - The target string
|
|
518
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
519
|
+
* @returns {number} - The similarity score (0..1)
|
|
520
|
+
*/
|
|
521
|
+
compare(a, b, opt) {
|
|
522
|
+
return this.compute(a, b, opt, 'single', true).res;
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Performs a batch metric comparison between source and target strings
|
|
526
|
+
* or array of strings.
|
|
527
|
+
*
|
|
528
|
+
* @template T - The type of the metric result
|
|
529
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
530
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
531
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
532
|
+
* @returns {T} - The batch metric results
|
|
533
|
+
*/
|
|
534
|
+
batchTest(a, b, opt) {
|
|
535
|
+
return this.compute(a, b, opt, 'batch');
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* Performs a batch metric comparison and returns results sorted by score.
|
|
539
|
+
*
|
|
540
|
+
* @template T - The type of the metric result
|
|
541
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
542
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
543
|
+
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
544
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
545
|
+
* @returns {T} - The sorted batch results
|
|
546
|
+
*/
|
|
547
|
+
batchSorted(a, b, dir = 'desc', opt) {
|
|
548
|
+
return this.output(
|
|
549
|
+
this.compute(a, b, opt, 'batch', true).sort((a, b) =>
|
|
550
|
+
dir === 'asc' ? a.res - b.res : b.res - a.res
|
|
551
|
+
),
|
|
552
|
+
opt?.raw ?? this.options.raw
|
|
553
|
+
);
|
|
554
|
+
}
|
|
555
|
+
/**
|
|
556
|
+
* Performs a pairwise metric comparison between source and target strings
|
|
557
|
+
* or array of strings.
|
|
558
|
+
*
|
|
559
|
+
* Input arrays needs of the same length to perform pairwise comparison,
|
|
560
|
+
* otherwise the method will throw an error.
|
|
561
|
+
*
|
|
562
|
+
* @template T - The type of the metric result
|
|
563
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
564
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
565
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
566
|
+
* @returns {T} - The pairwise metric results
|
|
567
|
+
*/
|
|
568
|
+
pairs(a, b, opt) {
|
|
569
|
+
return this.compute(a, b, opt, 'pairwise');
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* Performs a batch comparison and returns only results above the threshold.
|
|
573
|
+
*
|
|
574
|
+
* @template T - The type of the metric result
|
|
575
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
576
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
577
|
+
* @param {number} threshold - The similarity threshold (0..1)
|
|
578
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
579
|
+
* @returns {T} - The filtered batch results
|
|
580
|
+
*/
|
|
581
|
+
match(a, b, threshold, opt) {
|
|
582
|
+
return this.output(
|
|
583
|
+
this.compute(a, b, opt, 'batch', true)
|
|
584
|
+
.filter((r) => r.res >= threshold)
|
|
585
|
+
.sort((a, b) => b.res - a.res),
|
|
586
|
+
opt?.raw ?? this.options.raw
|
|
587
|
+
);
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Returns the n closest matches from a batch comparison.
|
|
591
|
+
*
|
|
592
|
+
* @template T - The type of the metric result
|
|
593
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
594
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
595
|
+
* @param {number} [n=1] - Number of closest matches
|
|
596
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
597
|
+
* @returns {T} - The closest matches
|
|
598
|
+
*/
|
|
599
|
+
closest(a, b, n = 1, opt) {
|
|
600
|
+
return this.batchSorted(a, b, 'desc', opt).slice(0, n);
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Returns the n furthest matches from a batch comparison.
|
|
604
|
+
*
|
|
605
|
+
* @template T - The type of the metric result
|
|
606
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
607
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
608
|
+
* @param {number} [n=1] - Number of furthest matches
|
|
609
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
610
|
+
* @returns {T} - The furthest matches
|
|
611
|
+
*/
|
|
612
|
+
furthest(a, b, n = 1, opt) {
|
|
613
|
+
return this.batchSorted(a, b, 'asc', opt).slice(0, n);
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Performs a normalized and filtered substring search.
|
|
617
|
+
*
|
|
618
|
+
* @param {string} needle - The search string
|
|
619
|
+
* @param {string[]} haystack - The array to search in
|
|
620
|
+
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
621
|
+
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
622
|
+
* @returns {string[]} - Array of matching entries
|
|
623
|
+
*/
|
|
624
|
+
search(needle, haystack, flags, processors) {
|
|
625
|
+
const resolved = this.resolveOptions({ flags, processors });
|
|
626
|
+
// Prepare the needle and haystack, normalizing and filtering them
|
|
627
|
+
const test = this.prepare(needle, resolved);
|
|
628
|
+
const hstk = this.prepare(haystack, resolved);
|
|
629
|
+
// Filter the haystack based on the normalized test string
|
|
630
|
+
return haystack.filter((_, i) => hstk[i].includes(test));
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Computes a similarity matrix for the given input array.
|
|
634
|
+
*
|
|
635
|
+
* @param {string[]} input - The input array
|
|
636
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
637
|
+
* @returns {number[][]} - The similarity matrix
|
|
638
|
+
*/
|
|
639
|
+
matrix(input, opt) {
|
|
640
|
+
input = this.prepare(input, this.resolveOptions(opt));
|
|
641
|
+
return input.map((a) =>
|
|
642
|
+
this.compute(a, input, undefined, 'batch', true, true).map(
|
|
643
|
+
(b) => b.res ?? 0
|
|
644
|
+
)
|
|
645
|
+
);
|
|
646
|
+
}
|
|
647
|
+
/**
|
|
648
|
+
* Computes the phonetic index for a string using the configured
|
|
649
|
+
* or given algorithm.
|
|
650
|
+
*
|
|
651
|
+
* @param {string} [input] - The input string
|
|
652
|
+
* @param {string} [algo] - The phonetic algorithm to use
|
|
653
|
+
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
654
|
+
* @returns {string} - The phonetic index as a string
|
|
655
|
+
*/
|
|
656
|
+
phoneticIndex(input, algo, opt) {
|
|
657
|
+
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
658
|
+
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
exports.CmpStr = CmpStr;
|
|
663
|
+
//# sourceMappingURL=CmpStr.js.map
|