cmpstr 3.2.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -18
- package/dist/CmpStr.esm.js +1904 -1211
- package/dist/CmpStr.esm.min.js +2 -3
- package/dist/CmpStr.umd.js +1924 -1236
- package/dist/CmpStr.umd.min.js +2 -3
- package/dist/cjs/CmpStr.cjs +134 -64
- package/dist/cjs/CmpStrAsync.cjs +60 -37
- package/dist/cjs/index.cjs +1 -2
- package/dist/cjs/metric/Cosine.cjs +1 -2
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -2
- package/dist/cjs/metric/DiceSorensen.cjs +1 -2
- package/dist/cjs/metric/Hamming.cjs +5 -4
- package/dist/cjs/metric/Jaccard.cjs +1 -2
- package/dist/cjs/metric/JaroWinkler.cjs +1 -2
- package/dist/cjs/metric/LCS.cjs +1 -2
- package/dist/cjs/metric/Levenshtein.cjs +1 -2
- package/dist/cjs/metric/Metric.cjs +90 -53
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -2
- package/dist/cjs/metric/QGram.cjs +1 -2
- package/dist/cjs/metric/SmithWaterman.cjs +1 -2
- package/dist/cjs/phonetic/Caverphone.cjs +1 -2
- package/dist/cjs/phonetic/Cologne.cjs +1 -2
- package/dist/cjs/phonetic/Metaphone.cjs +1 -2
- package/dist/cjs/phonetic/Phonetic.cjs +80 -48
- package/dist/cjs/phonetic/Soundex.cjs +1 -2
- package/dist/cjs/root.cjs +6 -3
- package/dist/cjs/utils/DeepMerge.cjs +109 -99
- package/dist/cjs/utils/DiffChecker.cjs +1 -2
- package/dist/cjs/utils/Errors.cjs +106 -0
- package/dist/cjs/utils/Filter.cjs +97 -37
- package/dist/cjs/utils/HashTable.cjs +44 -30
- package/dist/cjs/utils/Normalizer.cjs +84 -35
- package/dist/cjs/utils/OptionsValidator.cjs +211 -0
- package/dist/cjs/utils/Pool.cjs +57 -19
- package/dist/cjs/utils/Profiler.cjs +41 -28
- package/dist/cjs/utils/Registry.cjs +48 -24
- package/dist/cjs/utils/StructuredData.cjs +95 -57
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -2
- package/dist/esm/CmpStr.mjs +133 -61
- package/dist/esm/CmpStrAsync.mjs +56 -33
- package/dist/esm/index.mjs +1 -2
- package/dist/esm/metric/Cosine.mjs +1 -2
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -2
- package/dist/esm/metric/DiceSorensen.mjs +1 -2
- package/dist/esm/metric/Hamming.mjs +5 -4
- package/dist/esm/metric/Jaccard.mjs +1 -2
- package/dist/esm/metric/JaroWinkler.mjs +1 -2
- package/dist/esm/metric/LCS.mjs +1 -2
- package/dist/esm/metric/Levenshtein.mjs +1 -2
- package/dist/esm/metric/Metric.mjs +92 -53
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -2
- package/dist/esm/metric/QGram.mjs +1 -2
- package/dist/esm/metric/SmithWaterman.mjs +1 -2
- package/dist/esm/phonetic/Caverphone.mjs +1 -2
- package/dist/esm/phonetic/Cologne.mjs +1 -2
- package/dist/esm/phonetic/Metaphone.mjs +1 -2
- package/dist/esm/phonetic/Phonetic.mjs +83 -48
- package/dist/esm/phonetic/Soundex.mjs +1 -2
- package/dist/esm/root.mjs +5 -4
- package/dist/esm/utils/DeepMerge.mjs +109 -95
- package/dist/esm/utils/DiffChecker.mjs +1 -2
- package/dist/esm/utils/Errors.mjs +106 -0
- package/dist/esm/utils/Filter.mjs +97 -37
- package/dist/esm/utils/HashTable.mjs +44 -30
- package/dist/esm/utils/Normalizer.mjs +84 -35
- package/dist/esm/utils/OptionsValidator.mjs +210 -0
- package/dist/esm/utils/Pool.mjs +53 -19
- package/dist/esm/utils/Profiler.mjs +41 -28
- package/dist/esm/utils/Registry.mjs +48 -24
- package/dist/esm/utils/StructuredData.mjs +95 -57
- package/dist/esm/utils/TextAnalyzer.mjs +1 -2
- package/dist/types/CmpStr.d.ts +25 -14
- package/dist/types/CmpStrAsync.d.ts +4 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/metric/Metric.d.ts +15 -14
- package/dist/types/phonetic/Phonetic.d.ts +7 -4
- package/dist/types/root.d.ts +4 -2
- package/dist/types/utils/DeepMerge.d.ts +80 -58
- package/dist/types/utils/Errors.d.ts +154 -0
- package/dist/types/utils/Filter.d.ts +8 -1
- package/dist/types/utils/HashTable.d.ts +12 -11
- package/dist/types/utils/Normalizer.d.ts +5 -1
- package/dist/types/utils/OptionsValidator.d.ts +193 -0
- package/dist/types/utils/Pool.d.ts +2 -0
- package/dist/types/utils/Profiler.d.ts +9 -28
- package/dist/types/utils/Registry.d.ts +3 -3
- package/dist/types/utils/StructuredData.d.ts +6 -1
- package/dist/types/utils/Types.d.ts +39 -1
- package/package.json +20 -11
- package/dist/CmpStr.esm.js.map +0 -1
- package/dist/CmpStr.esm.min.js.map +0 -1
- package/dist/CmpStr.umd.js.map +0 -1
- package/dist/CmpStr.umd.min.js.map +0 -1
- package/dist/cjs/CmpStr.cjs.map +0 -1
- package/dist/cjs/CmpStrAsync.cjs.map +0 -1
- package/dist/cjs/index.cjs.map +0 -1
- package/dist/cjs/metric/Cosine.cjs.map +0 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +0 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +0 -1
- package/dist/cjs/metric/Hamming.cjs.map +0 -1
- package/dist/cjs/metric/Jaccard.cjs.map +0 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +0 -1
- package/dist/cjs/metric/LCS.cjs.map +0 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +0 -1
- package/dist/cjs/metric/Metric.cjs.map +0 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +0 -1
- package/dist/cjs/metric/QGram.cjs.map +0 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +0 -1
- package/dist/cjs/phonetic/Caverphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +0 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Phonetic.cjs.map +0 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +0 -1
- package/dist/cjs/root.cjs.map +0 -1
- package/dist/cjs/utils/DeepMerge.cjs.map +0 -1
- package/dist/cjs/utils/DiffChecker.cjs.map +0 -1
- package/dist/cjs/utils/Filter.cjs.map +0 -1
- package/dist/cjs/utils/HashTable.cjs.map +0 -1
- package/dist/cjs/utils/Normalizer.cjs.map +0 -1
- package/dist/cjs/utils/Pool.cjs.map +0 -1
- package/dist/cjs/utils/Profiler.cjs.map +0 -1
- package/dist/cjs/utils/Registry.cjs.map +0 -1
- package/dist/cjs/utils/StructuredData.cjs.map +0 -1
- package/dist/cjs/utils/TextAnalyzer.cjs.map +0 -1
- package/dist/esm/CmpStr.mjs.map +0 -1
- package/dist/esm/CmpStrAsync.mjs.map +0 -1
- package/dist/esm/index.mjs.map +0 -1
- package/dist/esm/metric/Cosine.mjs.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +0 -1
- package/dist/esm/metric/DiceSorensen.mjs.map +0 -1
- package/dist/esm/metric/Hamming.mjs.map +0 -1
- package/dist/esm/metric/Jaccard.mjs.map +0 -1
- package/dist/esm/metric/JaroWinkler.mjs.map +0 -1
- package/dist/esm/metric/LCS.mjs.map +0 -1
- package/dist/esm/metric/Levenshtein.mjs.map +0 -1
- package/dist/esm/metric/Metric.mjs.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +0 -1
- package/dist/esm/metric/QGram.mjs.map +0 -1
- package/dist/esm/metric/SmithWaterman.mjs.map +0 -1
- package/dist/esm/phonetic/Caverphone.mjs.map +0 -1
- package/dist/esm/phonetic/Cologne.mjs.map +0 -1
- package/dist/esm/phonetic/Metaphone.mjs.map +0 -1
- package/dist/esm/phonetic/Phonetic.mjs.map +0 -1
- package/dist/esm/phonetic/Soundex.mjs.map +0 -1
- package/dist/esm/root.mjs.map +0 -1
- package/dist/esm/utils/DeepMerge.mjs.map +0 -1
- package/dist/esm/utils/DiffChecker.mjs.map +0 -1
- package/dist/esm/utils/Filter.mjs.map +0 -1
- package/dist/esm/utils/HashTable.mjs.map +0 -1
- package/dist/esm/utils/Normalizer.mjs.map +0 -1
- package/dist/esm/utils/Pool.mjs.map +0 -1
- package/dist/esm/utils/Profiler.mjs.map +0 -1
- package/dist/esm/utils/Registry.mjs.map +0 -1
- package/dist/esm/utils/StructuredData.mjs.map +0 -1
- package/dist/esm/utils/TextAnalyzer.mjs.map +0 -1
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { CmpStrValidationError, ErrorUtil } from './Errors.mjs';
|
|
2
3
|
import { Pool } from './Pool.mjs';
|
|
3
4
|
|
|
4
5
|
class StructuredData {
|
|
5
6
|
data;
|
|
6
7
|
key;
|
|
8
|
+
static SORT_ASC = (a, b) => a.res - b.res;
|
|
9
|
+
static SORT_DESC = (a, b) => b.res - a.res;
|
|
7
10
|
static create(data, key) {
|
|
8
11
|
return new StructuredData(data, key);
|
|
9
12
|
}
|
|
@@ -12,14 +15,17 @@ class StructuredData {
|
|
|
12
15
|
this.key = key;
|
|
13
16
|
}
|
|
14
17
|
extractFrom(arr, key) {
|
|
15
|
-
const
|
|
16
|
-
|
|
18
|
+
const n = arr.length;
|
|
19
|
+
const result = new Array(n);
|
|
20
|
+
for (let i = 0; i < n; i++) {
|
|
17
21
|
const val = arr[i][key];
|
|
18
|
-
result[i] =
|
|
22
|
+
result[i] = val != null ? String(val) : '';
|
|
19
23
|
}
|
|
20
24
|
return result;
|
|
21
25
|
}
|
|
22
|
-
extract
|
|
26
|
+
extract() {
|
|
27
|
+
return this.extractFrom(this.data, this.key);
|
|
28
|
+
}
|
|
23
29
|
isMetricResult(v) {
|
|
24
30
|
return (
|
|
25
31
|
typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
|
|
@@ -37,64 +43,89 @@ class StructuredData {
|
|
|
37
43
|
normalizeResults(results) {
|
|
38
44
|
if (!Array.isArray(results) || results.length === 0) return [];
|
|
39
45
|
const first = results[0];
|
|
40
|
-
let
|
|
41
|
-
if (this.isMetricResult(first))
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
46
|
+
let out = new Array(results.length);
|
|
47
|
+
if (this.isMetricResult(first)) {
|
|
48
|
+
const src = results;
|
|
49
|
+
for (let i = 0; i < src.length; i++) out[i] = { ...src[i], __idx: i };
|
|
50
|
+
} else if (this.isCmpStrResult(first)) {
|
|
51
|
+
const src = results;
|
|
52
|
+
for (let i = 0; i < src.length; i++) {
|
|
53
|
+
const r = src[i];
|
|
54
|
+
out[i] = {
|
|
55
|
+
metric: 'unknown',
|
|
56
|
+
a: r.source,
|
|
57
|
+
b: r.target,
|
|
58
|
+
res: r.match,
|
|
59
|
+
raw: r.raw,
|
|
60
|
+
__idx: i
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
} else
|
|
64
|
+
throw new CmpStrValidationError(
|
|
52
65
|
'Unsupported result format for StructuredData normalization.'
|
|
53
66
|
);
|
|
54
|
-
return
|
|
67
|
+
return out;
|
|
55
68
|
}
|
|
56
69
|
rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
|
|
57
|
-
const
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
const occurrence = occurrenceCount.get(targetStr) ?? 0;
|
|
74
|
-
occurrenceCount.set(targetStr, occurrence + 1);
|
|
75
|
-
dataIndex = indices[occurrence % indices.length];
|
|
76
|
-
} else {
|
|
77
|
-
dataIndex = result.__idx ?? i;
|
|
70
|
+
const m = extractedStrings.length,
|
|
71
|
+
n = results.length;
|
|
72
|
+
const stringToIndices = Pool.acquire('map', m);
|
|
73
|
+
const occurrenceCount = Pool.acquire('map', n);
|
|
74
|
+
const output = new Array(n);
|
|
75
|
+
stringToIndices.clear();
|
|
76
|
+
occurrenceCount.clear();
|
|
77
|
+
try {
|
|
78
|
+
for (let i = 0; i < m; i++) {
|
|
79
|
+
const str = extractedStrings[i];
|
|
80
|
+
let arr = stringToIndices.get(str);
|
|
81
|
+
if (!arr) {
|
|
82
|
+
arr = [];
|
|
83
|
+
stringToIndices.set(str, arr);
|
|
84
|
+
}
|
|
85
|
+
arr.push(i);
|
|
78
86
|
}
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
87
|
+
let out = 0;
|
|
88
|
+
for (let i = 0; i < n; i++) {
|
|
89
|
+
const result = results[i];
|
|
90
|
+
if (removeZero && result.res === 0) continue;
|
|
91
|
+
const targetStr = result.b || '';
|
|
92
|
+
const indices = stringToIndices.get(targetStr);
|
|
93
|
+
let dataIndex;
|
|
94
|
+
if (indices && indices.length > 0) {
|
|
95
|
+
const occurrence = occurrenceCount.get(targetStr) ?? 0;
|
|
96
|
+
occurrenceCount.set(targetStr, occurrence + 1);
|
|
97
|
+
dataIndex = indices[occurrence % indices.length];
|
|
98
|
+
} else {
|
|
99
|
+
dataIndex = result.__idx ?? i;
|
|
100
|
+
}
|
|
101
|
+
if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
|
|
102
|
+
const sourceObj = sourceData[dataIndex];
|
|
103
|
+
const mappedTarget = extractedStrings[dataIndex] || targetStr;
|
|
104
|
+
if (objectsOnly) output[out++] = sourceObj;
|
|
105
|
+
else
|
|
106
|
+
output[out++] = {
|
|
107
|
+
obj: sourceObj,
|
|
108
|
+
key: this.key,
|
|
109
|
+
result: {
|
|
110
|
+
source: result.a,
|
|
111
|
+
target: mappedTarget,
|
|
112
|
+
match: result.res
|
|
113
|
+
},
|
|
114
|
+
...(result.raw ? { raw: result.raw } : null)
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
output.length = out;
|
|
118
|
+
return output;
|
|
119
|
+
} finally {
|
|
120
|
+
Pool.release('map', stringToIndices, m);
|
|
121
|
+
Pool.release('map', occurrenceCount, n);
|
|
90
122
|
}
|
|
91
|
-
output.length = out;
|
|
92
|
-
return output;
|
|
93
123
|
}
|
|
94
124
|
sort(results, sort) {
|
|
95
125
|
if (!sort || results.length <= 1) return results;
|
|
96
|
-
|
|
97
|
-
|
|
126
|
+
return results.sort(
|
|
127
|
+
sort === 'asc' ? StructuredData.SORT_ASC : StructuredData.SORT_DESC
|
|
128
|
+
);
|
|
98
129
|
}
|
|
99
130
|
finalizeLookup(results, extractedStrings, opt) {
|
|
100
131
|
return this.rebuild(
|
|
@@ -106,10 +137,18 @@ class StructuredData {
|
|
|
106
137
|
);
|
|
107
138
|
}
|
|
108
139
|
performLookup(fn, extractedStrings, opt) {
|
|
109
|
-
return
|
|
140
|
+
return ErrorUtil.wrap(
|
|
141
|
+
() => this.finalizeLookup(fn(), extractedStrings, opt),
|
|
142
|
+
'StructuredData lookup failed',
|
|
143
|
+
{ key: this.key }
|
|
144
|
+
);
|
|
110
145
|
}
|
|
111
146
|
async performLookupAsync(fn, extractedStrings, opt) {
|
|
112
|
-
return
|
|
147
|
+
return await ErrorUtil.wrapAsync(
|
|
148
|
+
async () => this.finalizeLookup(await fn(), extractedStrings, opt),
|
|
149
|
+
'StructuredData async lookup failed',
|
|
150
|
+
{ key: this.key }
|
|
151
|
+
);
|
|
113
152
|
}
|
|
114
153
|
lookup(fn, query, opt) {
|
|
115
154
|
const b = this.extract();
|
|
@@ -150,4 +189,3 @@ class StructuredData {
|
|
|
150
189
|
}
|
|
151
190
|
|
|
152
191
|
export { StructuredData };
|
|
153
|
-
//# sourceMappingURL=StructuredData.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
class TextAnalyzer {
|
|
3
3
|
static REGEX = {
|
|
4
4
|
number: /\d/,
|
|
@@ -194,4 +194,3 @@ class TextAnalyzer {
|
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
export { TextAnalyzer };
|
|
197
|
-
//# sourceMappingURL=TextAnalyzer.mjs.map
|
package/dist/types/CmpStr.d.ts
CHANGED
|
@@ -98,8 +98,8 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
98
98
|
static readonly clearCache: {
|
|
99
99
|
normalizer: typeof Normalizer.clear;
|
|
100
100
|
filter: typeof Filter.clearPipeline;
|
|
101
|
-
metric:
|
|
102
|
-
phonetic:
|
|
101
|
+
metric: typeof Metric.clear;
|
|
102
|
+
phonetic: typeof Phonetic.clear;
|
|
103
103
|
};
|
|
104
104
|
/**
|
|
105
105
|
* Returns a TextAnalyzer instance for the given input string.
|
|
@@ -153,7 +153,8 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
153
153
|
*
|
|
154
154
|
* @param {string} cond - The condition to met
|
|
155
155
|
* @param {any} [test] - Value to test for
|
|
156
|
-
* @throws {
|
|
156
|
+
* @throws {CmpStrValidationError} - If the specified metric or phonetic algorithm is not found
|
|
157
|
+
* @throws {CmpStrInternalError} - If an unknown condition is specified
|
|
157
158
|
*/
|
|
158
159
|
protected assert(cond: string, test?: any): void;
|
|
159
160
|
/**
|
|
@@ -164,10 +165,11 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
164
165
|
protected assertMany(...cond: [string, any?][]): void;
|
|
165
166
|
/**
|
|
166
167
|
* Resolves the options for the CmpStr instance, merging the provided options with
|
|
167
|
-
* the existing options.
|
|
168
|
+
* the existing options. Validates them and throws if the options are invalid.
|
|
168
169
|
*
|
|
169
170
|
* @param {CmpStrOptions} [opt] - Optional options to merge
|
|
170
171
|
* @returns {CmpStrOptions} - The resolved options
|
|
172
|
+
* @throws {CmpStrValidationError} - If the merged options are invalid
|
|
171
173
|
*/
|
|
172
174
|
protected resolveOptions(opt?: CmpStrOptions): CmpStrOptions;
|
|
173
175
|
/**
|
|
@@ -233,6 +235,8 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
233
235
|
* @param {boolean} [raw=false] - Whether to return raw results
|
|
234
236
|
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
235
237
|
* @returns {T} - The computed metric result
|
|
238
|
+
* @throws {CmpStrValidationError} - If the options are invalid
|
|
239
|
+
* @throws {CmpStrInternalError} - If the computation fails due to internal errors
|
|
236
240
|
*/
|
|
237
241
|
protected compute<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): T;
|
|
238
242
|
/**
|
|
@@ -242,6 +246,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
242
246
|
* @param {MetricResult< R >} result - The metric result
|
|
243
247
|
* @param {boolean} [raw] - Whether to return raw results
|
|
244
248
|
* @returns {T} - The resolved result
|
|
249
|
+
* @throws {CmpStrInternalError} - If the output format cannot be resolved
|
|
245
250
|
*/
|
|
246
251
|
protected output<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(result: MetricResult<R>, raw?: boolean): T;
|
|
247
252
|
/**
|
|
@@ -257,7 +262,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
257
262
|
*
|
|
258
263
|
* @returns {CmpStr< R >} - The cloned instance
|
|
259
264
|
*/
|
|
260
|
-
clone
|
|
265
|
+
clone(): CmpStr<R>;
|
|
261
266
|
/**
|
|
262
267
|
* Resets the instance, clearing all data and options.
|
|
263
268
|
*
|
|
@@ -269,6 +274,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
269
274
|
*
|
|
270
275
|
* @param {CmpStrOptions} opt - The options
|
|
271
276
|
* @returns {this}
|
|
277
|
+
* @throws {CmpStrValidationError} - If the provided options are invalid
|
|
272
278
|
*/
|
|
273
279
|
setOptions(opt: CmpStrOptions): this;
|
|
274
280
|
/**
|
|
@@ -276,6 +282,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
276
282
|
*
|
|
277
283
|
* @param {CmpStrOptions} opt - The options to merge
|
|
278
284
|
* @returns {this}
|
|
285
|
+
* @throws {CmpStrValidationError} - If the merged options are invalid
|
|
279
286
|
*/
|
|
280
287
|
mergeOptions(opt: CmpStrOptions): this;
|
|
281
288
|
/**
|
|
@@ -283,6 +290,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
283
290
|
*
|
|
284
291
|
* @param {string} opt - The serialized options
|
|
285
292
|
* @returns {this}
|
|
293
|
+
* @throws {CmpStrValidationError} - If the provided string is not valid JSON or the options are invalid
|
|
286
294
|
*/
|
|
287
295
|
setSerializedOptions(opt: string): this;
|
|
288
296
|
/**
|
|
@@ -291,6 +299,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
291
299
|
* @param {string} path - The path to the option
|
|
292
300
|
* @param {any} value - The value to set
|
|
293
301
|
* @returns {this}
|
|
302
|
+
* @throws {CmpStrValidationError} - If the updated options are invalid
|
|
294
303
|
*/
|
|
295
304
|
setOption(path: string, value: any): this;
|
|
296
305
|
/**
|
|
@@ -306,59 +315,59 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
306
315
|
* @param {boolean} enable - Whether to enable or disable raw output
|
|
307
316
|
* @returns {this}
|
|
308
317
|
*/
|
|
309
|
-
setRaw
|
|
318
|
+
setRaw(enable: boolean): this;
|
|
310
319
|
/**
|
|
311
320
|
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
|
|
312
321
|
*
|
|
313
322
|
* @param {string} name - The metric name
|
|
314
323
|
* @returns {this}
|
|
315
324
|
*/
|
|
316
|
-
setMetric
|
|
325
|
+
setMetric(name: string): this;
|
|
317
326
|
/**
|
|
318
327
|
* Sets the normalization flags (e.g., 'itw', 'nfc').
|
|
319
328
|
*
|
|
320
329
|
* @param {NormalizeFlags} flags - The normalization flags
|
|
321
330
|
* @returns {this}
|
|
322
331
|
*/
|
|
323
|
-
setFlags
|
|
332
|
+
setFlags(flags: NormalizeFlags): this;
|
|
324
333
|
/**
|
|
325
334
|
* Removes the normalization flags entirely.
|
|
326
335
|
*
|
|
327
336
|
* @return {this}
|
|
328
337
|
*/
|
|
329
|
-
rmvFlags
|
|
338
|
+
rmvFlags(): this;
|
|
330
339
|
/**
|
|
331
340
|
* Sets the pre-processors to use for preparing the input.
|
|
332
341
|
*
|
|
333
342
|
* @param {CmpStrProcessors} opt - The processors to set
|
|
334
343
|
* @returns {this}
|
|
335
344
|
*/
|
|
336
|
-
setProcessors
|
|
345
|
+
setProcessors(opt: CmpStrProcessors): this;
|
|
337
346
|
/**
|
|
338
347
|
* Removes the processors entirely.
|
|
339
348
|
*
|
|
340
349
|
* @returns {this}
|
|
341
350
|
*/
|
|
342
|
-
rmvProcessors
|
|
351
|
+
rmvProcessors(): this;
|
|
343
352
|
/**
|
|
344
353
|
* Returns the current options object.
|
|
345
354
|
*
|
|
346
355
|
* @returns {CmpStrOptions} - The options
|
|
347
356
|
*/
|
|
348
|
-
getOptions
|
|
357
|
+
getOptions(): CmpStrOptions;
|
|
349
358
|
/**
|
|
350
359
|
* Returns the options as a JSON string.
|
|
351
360
|
*
|
|
352
361
|
* @returns {string} - The serialized options
|
|
353
362
|
*/
|
|
354
|
-
getSerializedOptions
|
|
363
|
+
getSerializedOptions(): string;
|
|
355
364
|
/**
|
|
356
365
|
* Returns a specific option value by path.
|
|
357
366
|
*
|
|
358
367
|
* @param {string} path - The path to the option
|
|
359
368
|
* @returns {any} - The option value
|
|
360
369
|
*/
|
|
361
|
-
getOption
|
|
370
|
+
getOption(path: string): any;
|
|
362
371
|
/**
|
|
363
372
|
* ================================================================================-
|
|
364
373
|
* Public core methods for string comparison
|
|
@@ -468,6 +477,8 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
468
477
|
/**
|
|
469
478
|
* Computes a similarity matrix for the given input array.
|
|
470
479
|
*
|
|
480
|
+
* Only works for symmetric metrics.
|
|
481
|
+
*
|
|
471
482
|
* @param {string[]} input - The input array
|
|
472
483
|
* @param {CmpStrOptions} [opt] - Optional options
|
|
473
484
|
* @returns {number[][]} - The similarity matrix
|
|
@@ -106,6 +106,8 @@ export declare class CmpStrAsync<R = MetricRaw> extends CmpStr<R> {
|
|
|
106
106
|
* @param {boolean} [raw=false] - Whether to return raw results
|
|
107
107
|
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
108
108
|
* @returns {Promise< T >} - The computed metric result
|
|
109
|
+
* @throws {CmpStrValidationError} - If the options are invalid
|
|
110
|
+
* @throws {CmpStrInternalError} - If the computation fails due to internal errors
|
|
109
111
|
*/
|
|
110
112
|
protected computeAsync<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): Promise<T>;
|
|
111
113
|
/**
|
|
@@ -217,6 +219,8 @@ export declare class CmpStrAsync<R = MetricRaw> extends CmpStr<R> {
|
|
|
217
219
|
/**
|
|
218
220
|
* Asynchronously computes a similarity matrix for the given input array.
|
|
219
221
|
*
|
|
222
|
+
* Only works for symmetric metrics.
|
|
223
|
+
*
|
|
220
224
|
* @param {string[]} input - The input array
|
|
221
225
|
* @param {CmpStrOptions} [opt] - Optional options
|
|
222
226
|
* @returns {Promise< number[][] >} - The similarity matrix
|
package/dist/types/index.d.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* filtering, and text analysis. It is designed for both high-level application development
|
|
8
8
|
* and research, offering a unified API for single, batch, and pairwise operations.
|
|
9
9
|
*
|
|
10
|
-
* Version: 3.
|
|
10
|
+
* Version: 3.3.0
|
|
11
11
|
* Author: Paul Köhler (komed3)
|
|
12
12
|
* License: MIT
|
|
13
13
|
*
|
|
@@ -41,11 +41,12 @@
|
|
|
41
41
|
* For asynchronous workloads, use `CmpStrAsync`, which provides the same API with
|
|
42
42
|
* Promise-based, non-blocking methods for large-scale or I/O-bound operations.
|
|
43
43
|
*
|
|
44
|
-
* @version 3.
|
|
44
|
+
* @version 3.3.0
|
|
45
45
|
* @author Paul Köhler (komed3)
|
|
46
46
|
* @license MIT
|
|
47
47
|
*/
|
|
48
48
|
export * from './utils/Types';
|
|
49
|
+
export type { CmpStrError, CmpStrValidationError, CmpStrNotFoundError, CmpStrUsageError, CmpStrInternalError, ErrorCode } from './utils/Errors';
|
|
49
50
|
export { CmpStr } from './CmpStr';
|
|
50
51
|
export { CmpStrAsync } from './CmpStrAsync';
|
|
51
52
|
export { DiffChecker } from './utils/DiffChecker';
|
|
@@ -56,7 +56,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
56
56
|
/**
|
|
57
57
|
* Static method to clear the cache of metric computations.
|
|
58
58
|
*/
|
|
59
|
-
static clear
|
|
59
|
+
static clear(): void;
|
|
60
60
|
/**
|
|
61
61
|
* Swaps two strings and their lengths if the first is longer than the second.
|
|
62
62
|
*
|
|
@@ -66,14 +66,14 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
66
66
|
* @param {number} n - Length of the second string
|
|
67
67
|
* @returns {[ string, string, number, number ]} - Swapped strings and lengths
|
|
68
68
|
*/
|
|
69
|
-
protected static swap
|
|
69
|
+
protected static swap(a: string, b: string, m: number, n: number): [string, string, number, number];
|
|
70
70
|
/**
|
|
71
71
|
* Clamps the similarity result between 0 and 1.
|
|
72
72
|
*
|
|
73
73
|
* @param {number} res - The input similarity to clamp
|
|
74
74
|
* @returns {number} - The clamped similarity (0 to 1)
|
|
75
75
|
*/
|
|
76
|
-
protected static clamp
|
|
76
|
+
protected static clamp(res: number): number;
|
|
77
77
|
/**
|
|
78
78
|
* Constructor for the Metric class.
|
|
79
79
|
* Initializes the metric with two inputs (strings or arrays of strings) and options.
|
|
@@ -83,7 +83,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
83
83
|
* @param {MetricInput} b - Second input string or array of strings
|
|
84
84
|
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
85
85
|
* @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
|
|
86
|
-
* @throws {
|
|
86
|
+
* @throws {CmpStrUsageError} - If the inputs are empty or invalid
|
|
87
87
|
*/
|
|
88
88
|
constructor(metric: string, a: MetricInput, b: MetricInput, opt?: MetricOptions, symmetric?: boolean);
|
|
89
89
|
/**
|
|
@@ -107,7 +107,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
107
107
|
* @param {number} n - Length of the second string
|
|
108
108
|
* @param {number} maxLen - Maximum length of the strings
|
|
109
109
|
* @returns {MetricCompute< R >} - The result of the metric computation
|
|
110
|
-
* @throws {
|
|
110
|
+
* @throws {CmpStrInternalError} - If the method is not overridden in a subclass
|
|
111
111
|
*/
|
|
112
112
|
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<R>;
|
|
113
113
|
/**
|
|
@@ -119,6 +119,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
119
119
|
* @param {number} i - Pointer to the first string
|
|
120
120
|
* @param {number} j - Pointer to the second string
|
|
121
121
|
* @returns {MetricResultSingle< R >} - The result of the metric computation
|
|
122
|
+
* @throws {CmpStrInternalError} - If the metric computation fails for the given inputs
|
|
122
123
|
*/
|
|
123
124
|
private runSingle;
|
|
124
125
|
/**
|
|
@@ -166,7 +167,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
166
167
|
*
|
|
167
168
|
* @returns {boolean} - True if either input is an array with more than one element
|
|
168
169
|
*/
|
|
169
|
-
isBatch
|
|
170
|
+
isBatch(): boolean;
|
|
170
171
|
/**
|
|
171
172
|
* Check if the inputs are in single mode.
|
|
172
173
|
*
|
|
@@ -175,7 +176,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
175
176
|
*
|
|
176
177
|
* @returns {boolean} - True if both inputs are single strings
|
|
177
178
|
*/
|
|
178
|
-
isSingle
|
|
179
|
+
isSingle(): boolean;
|
|
179
180
|
/**
|
|
180
181
|
* Check if the inputs are in pairwise mode.
|
|
181
182
|
*
|
|
@@ -184,7 +185,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
184
185
|
*
|
|
185
186
|
* @returns {boolean} - True if both inputs are arrays of equal length
|
|
186
187
|
* @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
|
|
187
|
-
* @throws {
|
|
188
|
+
* @throws {CmpStrUsageError} - If `safe` is false and the lengths of `a` and `b` are not equal
|
|
188
189
|
*/
|
|
189
190
|
isPairwise(safe?: boolean): boolean;
|
|
190
191
|
/**
|
|
@@ -195,7 +196,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
195
196
|
*
|
|
196
197
|
* @returns {boolean} - True if the metric is symmetric
|
|
197
198
|
*/
|
|
198
|
-
isSymmetrical
|
|
199
|
+
isSymmetrical(): boolean;
|
|
199
200
|
/**
|
|
200
201
|
* Determine which mode to run the metric in.
|
|
201
202
|
*
|
|
@@ -205,7 +206,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
205
206
|
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
206
207
|
* @returns {MetricMode} - The determined mode
|
|
207
208
|
*/
|
|
208
|
-
whichMode
|
|
209
|
+
whichMode(mode?: MetricMode): MetricMode;
|
|
209
210
|
/**
|
|
210
211
|
* Clear the cached results of the metric.
|
|
211
212
|
*
|
|
@@ -213,13 +214,13 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
213
214
|
* any previously computed results. It can be useful for re-running the metric
|
|
214
215
|
* with new inputs or options.
|
|
215
216
|
*/
|
|
216
|
-
clear
|
|
217
|
+
clear(): void;
|
|
217
218
|
/**
|
|
218
219
|
* Run the metric computation based on the specified mode.
|
|
219
220
|
*
|
|
220
221
|
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
221
222
|
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
222
|
-
* @throws {
|
|
223
|
+
* @throws {CmpStrInternalError} - If an unsupported mode is specified
|
|
223
224
|
*/
|
|
224
225
|
run(mode?: MetricMode, clear?: boolean): void;
|
|
225
226
|
/**
|
|
@@ -228,7 +229,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
228
229
|
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
229
230
|
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
230
231
|
* @returns {Promise<void>} - A promise that resolves when the metric computation is complete
|
|
231
|
-
* @throws {
|
|
232
|
+
* @throws {CmpStrInternalError} - If an unsupported mode is specified
|
|
232
233
|
*/
|
|
233
234
|
runAsync(mode?: MetricMode, clear?: boolean): Promise<void>;
|
|
234
235
|
/**
|
|
@@ -236,7 +237,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
236
237
|
*
|
|
237
238
|
* @returns {string} - The name of the metric
|
|
238
239
|
*/
|
|
239
|
-
getMetricName
|
|
240
|
+
getMetricName(): string;
|
|
240
241
|
/**
|
|
241
242
|
* Get the result of the metric computation.
|
|
242
243
|
*
|
|
@@ -49,10 +49,11 @@ export declare abstract class Phonetic {
|
|
|
49
49
|
protected readonly options: PhoneticOptions;
|
|
50
50
|
protected readonly optKey: string;
|
|
51
51
|
protected readonly map: PhoneticMap;
|
|
52
|
+
protected readonly ignoreSet: Set<string>;
|
|
52
53
|
/**
|
|
53
54
|
* Static method to clear the cache of indexed words.
|
|
54
55
|
*/
|
|
55
|
-
static clear
|
|
56
|
+
static clear(): void;
|
|
56
57
|
/**
|
|
57
58
|
* Constructor for the Phonetic class.
|
|
58
59
|
*
|
|
@@ -61,7 +62,7 @@ export declare abstract class Phonetic {
|
|
|
61
62
|
*
|
|
62
63
|
* @param {string} algo - The name of the algorithm (e.g. 'soundex')
|
|
63
64
|
* @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
|
|
64
|
-
* @throws {
|
|
65
|
+
* @throws {CmpStrNotFoundError} - If no mapping is specified or if the requested mapping is not declared
|
|
65
66
|
*/
|
|
66
67
|
constructor(algo: string, opt?: PhoneticOptions);
|
|
67
68
|
/**
|
|
@@ -124,7 +125,7 @@ export declare abstract class Phonetic {
|
|
|
124
125
|
* @param {string} word - The input word to be converted
|
|
125
126
|
* @returns {string[]} - An array of characters from the input word
|
|
126
127
|
*/
|
|
127
|
-
protected word2Chars
|
|
128
|
+
protected word2Chars(word: string): string[];
|
|
128
129
|
/**
|
|
129
130
|
* Determines whether to exit early based on the current phonetic code length.
|
|
130
131
|
*
|
|
@@ -149,6 +150,7 @@ export declare abstract class Phonetic {
|
|
|
149
150
|
*
|
|
150
151
|
* @param {string[]} words - An array of words to be processed
|
|
151
152
|
* @returns {string[]} - An array of phonetic indices for the input words
|
|
153
|
+
* @throws {CmpStrInternalError} - If the phonetic index generation fails
|
|
152
154
|
*/
|
|
153
155
|
protected loop(words: string[]): string[];
|
|
154
156
|
/**
|
|
@@ -159,6 +161,7 @@ export declare abstract class Phonetic {
|
|
|
159
161
|
*
|
|
160
162
|
* @param {string[]} words - An array of words to be processed
|
|
161
163
|
* @returns {Promise< string[] >} - A promise that resolves to an array of phonetic indices for the input words
|
|
164
|
+
* @throws {CmpStrInternalError} - If the asynchronous phonetic index generation fails
|
|
162
165
|
*/
|
|
163
166
|
protected loopAsync(words: string[]): Promise<string[]>;
|
|
164
167
|
/**
|
|
@@ -166,7 +169,7 @@ export declare abstract class Phonetic {
|
|
|
166
169
|
*
|
|
167
170
|
* @returns {string} - The name of the algorithm
|
|
168
171
|
*/
|
|
169
|
-
getAlgoName
|
|
172
|
+
getAlgoName(): string;
|
|
170
173
|
/**
|
|
171
174
|
* Generates a phonetic index for the given input string.
|
|
172
175
|
*
|
package/dist/types/root.d.ts
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* Please visit CmpStr's documentation for more information:
|
|
10
10
|
* https://github.com/komed3/cmpstr/wiki/Extending-CmpStr
|
|
11
11
|
*
|
|
12
|
-
* @version 3.
|
|
12
|
+
* @version 3.3.0
|
|
13
13
|
* @author Paul Köhler (komed3)
|
|
14
14
|
* @license MIT
|
|
15
15
|
*/
|
|
@@ -31,9 +31,11 @@ export { Metric, MetricCls, MetricRegistry } from './metric';
|
|
|
31
31
|
* - PhoneticRegistry: Phonetic registry service for managing phonetic algorithm implementations.
|
|
32
32
|
*/
|
|
33
33
|
export { Phonetic, PhoneticCls, PhoneticMappingRegistry, PhoneticRegistry } from './phonetic';
|
|
34
|
-
export
|
|
34
|
+
export { DeepMerge } from './utils/DeepMerge';
|
|
35
|
+
export * as CmpStrError from './utils/Errors';
|
|
35
36
|
export { Filter } from './utils/Filter';
|
|
36
37
|
export { Hasher, HashTable } from './utils/HashTable';
|
|
38
|
+
export { OptionsValidator } from './utils/OptionsValidator';
|
|
37
39
|
export { Pool } from './utils/Pool';
|
|
38
40
|
export { Profiler } from './utils/Profiler';
|
|
39
41
|
export { StructuredData } from './utils/StructuredData';
|