cmpstr 3.2.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -18
- package/dist/CmpStr.esm.js +1904 -1211
- package/dist/CmpStr.esm.min.js +2 -3
- package/dist/CmpStr.umd.js +1924 -1236
- package/dist/CmpStr.umd.min.js +2 -3
- package/dist/cjs/CmpStr.cjs +134 -64
- package/dist/cjs/CmpStrAsync.cjs +60 -37
- package/dist/cjs/index.cjs +1 -2
- package/dist/cjs/metric/Cosine.cjs +1 -2
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -2
- package/dist/cjs/metric/DiceSorensen.cjs +1 -2
- package/dist/cjs/metric/Hamming.cjs +5 -4
- package/dist/cjs/metric/Jaccard.cjs +1 -2
- package/dist/cjs/metric/JaroWinkler.cjs +1 -2
- package/dist/cjs/metric/LCS.cjs +1 -2
- package/dist/cjs/metric/Levenshtein.cjs +1 -2
- package/dist/cjs/metric/Metric.cjs +90 -53
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -2
- package/dist/cjs/metric/QGram.cjs +1 -2
- package/dist/cjs/metric/SmithWaterman.cjs +1 -2
- package/dist/cjs/phonetic/Caverphone.cjs +1 -2
- package/dist/cjs/phonetic/Cologne.cjs +1 -2
- package/dist/cjs/phonetic/Metaphone.cjs +1 -2
- package/dist/cjs/phonetic/Phonetic.cjs +80 -48
- package/dist/cjs/phonetic/Soundex.cjs +1 -2
- package/dist/cjs/root.cjs +6 -3
- package/dist/cjs/utils/DeepMerge.cjs +109 -99
- package/dist/cjs/utils/DiffChecker.cjs +1 -2
- package/dist/cjs/utils/Errors.cjs +106 -0
- package/dist/cjs/utils/Filter.cjs +97 -37
- package/dist/cjs/utils/HashTable.cjs +44 -30
- package/dist/cjs/utils/Normalizer.cjs +84 -35
- package/dist/cjs/utils/OptionsValidator.cjs +211 -0
- package/dist/cjs/utils/Pool.cjs +57 -19
- package/dist/cjs/utils/Profiler.cjs +41 -28
- package/dist/cjs/utils/Registry.cjs +48 -24
- package/dist/cjs/utils/StructuredData.cjs +95 -57
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -2
- package/dist/esm/CmpStr.mjs +133 -61
- package/dist/esm/CmpStrAsync.mjs +56 -33
- package/dist/esm/index.mjs +1 -2
- package/dist/esm/metric/Cosine.mjs +1 -2
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -2
- package/dist/esm/metric/DiceSorensen.mjs +1 -2
- package/dist/esm/metric/Hamming.mjs +5 -4
- package/dist/esm/metric/Jaccard.mjs +1 -2
- package/dist/esm/metric/JaroWinkler.mjs +1 -2
- package/dist/esm/metric/LCS.mjs +1 -2
- package/dist/esm/metric/Levenshtein.mjs +1 -2
- package/dist/esm/metric/Metric.mjs +92 -53
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -2
- package/dist/esm/metric/QGram.mjs +1 -2
- package/dist/esm/metric/SmithWaterman.mjs +1 -2
- package/dist/esm/phonetic/Caverphone.mjs +1 -2
- package/dist/esm/phonetic/Cologne.mjs +1 -2
- package/dist/esm/phonetic/Metaphone.mjs +1 -2
- package/dist/esm/phonetic/Phonetic.mjs +83 -48
- package/dist/esm/phonetic/Soundex.mjs +1 -2
- package/dist/esm/root.mjs +5 -4
- package/dist/esm/utils/DeepMerge.mjs +109 -95
- package/dist/esm/utils/DiffChecker.mjs +1 -2
- package/dist/esm/utils/Errors.mjs +106 -0
- package/dist/esm/utils/Filter.mjs +97 -37
- package/dist/esm/utils/HashTable.mjs +44 -30
- package/dist/esm/utils/Normalizer.mjs +84 -35
- package/dist/esm/utils/OptionsValidator.mjs +210 -0
- package/dist/esm/utils/Pool.mjs +53 -19
- package/dist/esm/utils/Profiler.mjs +41 -28
- package/dist/esm/utils/Registry.mjs +48 -24
- package/dist/esm/utils/StructuredData.mjs +95 -57
- package/dist/esm/utils/TextAnalyzer.mjs +1 -2
- package/dist/types/CmpStr.d.ts +25 -14
- package/dist/types/CmpStrAsync.d.ts +4 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/metric/Metric.d.ts +15 -14
- package/dist/types/phonetic/Phonetic.d.ts +7 -4
- package/dist/types/root.d.ts +4 -2
- package/dist/types/utils/DeepMerge.d.ts +80 -58
- package/dist/types/utils/Errors.d.ts +154 -0
- package/dist/types/utils/Filter.d.ts +8 -1
- package/dist/types/utils/HashTable.d.ts +12 -11
- package/dist/types/utils/Normalizer.d.ts +5 -1
- package/dist/types/utils/OptionsValidator.d.ts +193 -0
- package/dist/types/utils/Pool.d.ts +2 -0
- package/dist/types/utils/Profiler.d.ts +9 -28
- package/dist/types/utils/Registry.d.ts +3 -3
- package/dist/types/utils/StructuredData.d.ts +6 -1
- package/dist/types/utils/Types.d.ts +39 -1
- package/package.json +20 -11
- package/dist/CmpStr.esm.js.map +0 -1
- package/dist/CmpStr.esm.min.js.map +0 -1
- package/dist/CmpStr.umd.js.map +0 -1
- package/dist/CmpStr.umd.min.js.map +0 -1
- package/dist/cjs/CmpStr.cjs.map +0 -1
- package/dist/cjs/CmpStrAsync.cjs.map +0 -1
- package/dist/cjs/index.cjs.map +0 -1
- package/dist/cjs/metric/Cosine.cjs.map +0 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +0 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +0 -1
- package/dist/cjs/metric/Hamming.cjs.map +0 -1
- package/dist/cjs/metric/Jaccard.cjs.map +0 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +0 -1
- package/dist/cjs/metric/LCS.cjs.map +0 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +0 -1
- package/dist/cjs/metric/Metric.cjs.map +0 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +0 -1
- package/dist/cjs/metric/QGram.cjs.map +0 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +0 -1
- package/dist/cjs/phonetic/Caverphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +0 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Phonetic.cjs.map +0 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +0 -1
- package/dist/cjs/root.cjs.map +0 -1
- package/dist/cjs/utils/DeepMerge.cjs.map +0 -1
- package/dist/cjs/utils/DiffChecker.cjs.map +0 -1
- package/dist/cjs/utils/Filter.cjs.map +0 -1
- package/dist/cjs/utils/HashTable.cjs.map +0 -1
- package/dist/cjs/utils/Normalizer.cjs.map +0 -1
- package/dist/cjs/utils/Pool.cjs.map +0 -1
- package/dist/cjs/utils/Profiler.cjs.map +0 -1
- package/dist/cjs/utils/Registry.cjs.map +0 -1
- package/dist/cjs/utils/StructuredData.cjs.map +0 -1
- package/dist/cjs/utils/TextAnalyzer.cjs.map +0 -1
- package/dist/esm/CmpStr.mjs.map +0 -1
- package/dist/esm/CmpStrAsync.mjs.map +0 -1
- package/dist/esm/index.mjs.map +0 -1
- package/dist/esm/metric/Cosine.mjs.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +0 -1
- package/dist/esm/metric/DiceSorensen.mjs.map +0 -1
- package/dist/esm/metric/Hamming.mjs.map +0 -1
- package/dist/esm/metric/Jaccard.mjs.map +0 -1
- package/dist/esm/metric/JaroWinkler.mjs.map +0 -1
- package/dist/esm/metric/LCS.mjs.map +0 -1
- package/dist/esm/metric/Levenshtein.mjs.map +0 -1
- package/dist/esm/metric/Metric.mjs.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +0 -1
- package/dist/esm/metric/QGram.mjs.map +0 -1
- package/dist/esm/metric/SmithWaterman.mjs.map +0 -1
- package/dist/esm/phonetic/Caverphone.mjs.map +0 -1
- package/dist/esm/phonetic/Cologne.mjs.map +0 -1
- package/dist/esm/phonetic/Metaphone.mjs.map +0 -1
- package/dist/esm/phonetic/Phonetic.mjs.map +0 -1
- package/dist/esm/phonetic/Soundex.mjs.map +0 -1
- package/dist/esm/root.mjs.map +0 -1
- package/dist/esm/utils/DeepMerge.mjs.map +0 -1
- package/dist/esm/utils/DiffChecker.mjs.map +0 -1
- package/dist/esm/utils/Filter.mjs.map +0 -1
- package/dist/esm/utils/HashTable.mjs.map +0 -1
- package/dist/esm/utils/Normalizer.mjs.map +0 -1
- package/dist/esm/utils/Pool.mjs.map +0 -1
- package/dist/esm/utils/Profiler.mjs.map +0 -1
- package/dist/esm/utils/Registry.mjs.map +0 -1
- package/dist/esm/utils/StructuredData.mjs.map +0 -1
- package/dist/esm/utils/TextAnalyzer.mjs.map +0 -1
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import {
|
|
3
|
+
ErrorUtil,
|
|
4
|
+
CmpStrInternalError,
|
|
5
|
+
CmpStrUsageError
|
|
6
|
+
} from '../utils/Errors.mjs';
|
|
2
7
|
import { HashTable, Hasher } from '../utils/HashTable.mjs';
|
|
3
8
|
import { Profiler } from '../utils/Profiler.mjs';
|
|
4
9
|
import { Registry } from '../utils/Registry.mjs';
|
|
@@ -15,15 +20,24 @@ class Metric {
|
|
|
15
20
|
optKey;
|
|
16
21
|
symmetric;
|
|
17
22
|
results;
|
|
18
|
-
static clear
|
|
19
|
-
|
|
20
|
-
|
|
23
|
+
static clear() {
|
|
24
|
+
this.cache.clear();
|
|
25
|
+
}
|
|
26
|
+
static swap(a, b, m, n) {
|
|
27
|
+
return m > n ? [b, a, n, m] : [a, b, m, n];
|
|
28
|
+
}
|
|
29
|
+
static clamp(res) {
|
|
30
|
+
return Math.max(0, Math.min(1, res));
|
|
31
|
+
}
|
|
21
32
|
constructor(metric, a, b, opt = {}, symmetric = false) {
|
|
22
33
|
this.metric = metric;
|
|
23
34
|
this.a = Array.isArray(a) ? a : [a];
|
|
24
35
|
this.b = Array.isArray(b) ? b : [b];
|
|
25
|
-
|
|
26
|
-
|
|
36
|
+
ErrorUtil.assert(
|
|
37
|
+
this.a.length > 0 && this.b.length > 0,
|
|
38
|
+
`Inputs <a> and <b> must not be empty`,
|
|
39
|
+
{ a: this.a, b: this.b }
|
|
40
|
+
);
|
|
27
41
|
this.options = opt;
|
|
28
42
|
this.optKey = Hasher.fastFNV1a(
|
|
29
43
|
JSON.stringify(opt, Object.keys(opt).sort())
|
|
@@ -36,37 +50,46 @@ class Metric {
|
|
|
36
50
|
return undefined;
|
|
37
51
|
}
|
|
38
52
|
compute(a, b, m, n, maxLen) {
|
|
39
|
-
throw new
|
|
53
|
+
throw new CmpStrInternalError(
|
|
54
|
+
`Method compute() must be overridden in a subclass`
|
|
55
|
+
);
|
|
40
56
|
}
|
|
41
57
|
runSingle(i, j) {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if (
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
58
|
+
return ErrorUtil.wrap(
|
|
59
|
+
() => {
|
|
60
|
+
let a = String(this.a[i]),
|
|
61
|
+
A = a;
|
|
62
|
+
let b = String(this.b[j]),
|
|
63
|
+
B = b;
|
|
64
|
+
let m = A.length,
|
|
65
|
+
n = B.length;
|
|
66
|
+
let result = this.preCompute(A, B, m, n);
|
|
67
|
+
if (!result) {
|
|
68
|
+
result = profiler.run(() => {
|
|
69
|
+
if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
|
|
70
|
+
let key = Metric.cache.key(this.metric, [A, B], this.symmetric);
|
|
71
|
+
if (key) key += this.optKey;
|
|
72
|
+
return (
|
|
73
|
+
Metric.cache.get(key || '') ??
|
|
74
|
+
(() => {
|
|
75
|
+
const maxLen = m > n ? m : n;
|
|
76
|
+
const res = this.compute(A, B, m, n, maxLen);
|
|
77
|
+
if (key) Metric.cache.set(key, res);
|
|
78
|
+
return res;
|
|
79
|
+
})()
|
|
80
|
+
);
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
return {
|
|
84
|
+
metric: this.metric,
|
|
85
|
+
a: this.origA.length > i ? this.origA[i] : a,
|
|
86
|
+
b: this.origB.length > j ? this.origB[j] : b,
|
|
87
|
+
...result
|
|
88
|
+
};
|
|
89
|
+
},
|
|
90
|
+
`Failed to compute metric for inputs at indices a[${i}] and b[${j}]`,
|
|
91
|
+
{ i, j }
|
|
92
|
+
);
|
|
70
93
|
}
|
|
71
94
|
async runSingleAsync(i, j) {
|
|
72
95
|
return Promise.resolve(this.runSingle(i, j));
|
|
@@ -79,11 +102,11 @@ class Metric {
|
|
|
79
102
|
this.results = results;
|
|
80
103
|
}
|
|
81
104
|
async runBatchAsync() {
|
|
82
|
-
const
|
|
105
|
+
const tasks = [];
|
|
83
106
|
for (let i = 0; i < this.a.length; i++)
|
|
84
107
|
for (let j = 0; j < this.b.length; j++)
|
|
85
|
-
|
|
86
|
-
this.results =
|
|
108
|
+
tasks.push(this.runSingleAsync(i, j));
|
|
109
|
+
this.results = await Promise.all(tasks);
|
|
87
110
|
}
|
|
88
111
|
runPairwise() {
|
|
89
112
|
const results = [];
|
|
@@ -91,29 +114,42 @@ class Metric {
|
|
|
91
114
|
this.results = results;
|
|
92
115
|
}
|
|
93
116
|
async runPairwiseAsync() {
|
|
94
|
-
const
|
|
117
|
+
const tasks = [];
|
|
95
118
|
for (let i = 0; i < this.a.length; i++)
|
|
96
|
-
|
|
97
|
-
this.results =
|
|
119
|
+
tasks.push(this.runSingleAsync(i, i));
|
|
120
|
+
this.results = await Promise.all(tasks);
|
|
98
121
|
}
|
|
99
122
|
setOriginal(a, b) {
|
|
100
123
|
if (a) this.origA = Array.isArray(a) ? a : [a];
|
|
101
124
|
if (b) this.origB = Array.isArray(b) ? b : [b];
|
|
102
125
|
return this;
|
|
103
126
|
}
|
|
104
|
-
isBatch
|
|
105
|
-
|
|
127
|
+
isBatch() {
|
|
128
|
+
return this.a.length > 1 || this.b.length > 1;
|
|
129
|
+
}
|
|
130
|
+
isSingle() {
|
|
131
|
+
return !this.isBatch();
|
|
132
|
+
}
|
|
106
133
|
isPairwise(safe = false) {
|
|
107
134
|
return this.isBatch() && this.a.length === this.b.length
|
|
108
135
|
? true
|
|
109
136
|
: !safe &&
|
|
110
137
|
(() => {
|
|
111
|
-
throw new
|
|
138
|
+
throw new CmpStrUsageError(
|
|
139
|
+
`Mode <pairwise> requires arrays of equal length`,
|
|
140
|
+
{ a: this.a, b: this.b }
|
|
141
|
+
);
|
|
112
142
|
})();
|
|
113
143
|
}
|
|
114
|
-
isSymmetrical
|
|
115
|
-
|
|
116
|
-
|
|
144
|
+
isSymmetrical() {
|
|
145
|
+
return this.symmetric;
|
|
146
|
+
}
|
|
147
|
+
whichMode(mode) {
|
|
148
|
+
return mode ?? this.options.mode ?? 'default';
|
|
149
|
+
}
|
|
150
|
+
clear() {
|
|
151
|
+
this.results = undefined;
|
|
152
|
+
}
|
|
117
153
|
run(mode, clear = true) {
|
|
118
154
|
if (clear) this.clear();
|
|
119
155
|
switch (this.whichMode(mode)) {
|
|
@@ -132,7 +168,7 @@ class Metric {
|
|
|
132
168
|
if (this.isPairwise()) this.runPairwise();
|
|
133
169
|
break;
|
|
134
170
|
default:
|
|
135
|
-
throw new
|
|
171
|
+
throw new CmpStrInternalError(`Unsupported mode <${mode}>`);
|
|
136
172
|
}
|
|
137
173
|
}
|
|
138
174
|
async runAsync(mode, clear = true) {
|
|
@@ -153,17 +189,20 @@ class Metric {
|
|
|
153
189
|
if (this.isPairwise()) await this.runPairwiseAsync();
|
|
154
190
|
break;
|
|
155
191
|
default:
|
|
156
|
-
throw new
|
|
192
|
+
throw new CmpStrInternalError(`Unsupported async mode <${mode}>`);
|
|
157
193
|
}
|
|
158
194
|
}
|
|
159
|
-
getMetricName
|
|
195
|
+
getMetricName() {
|
|
196
|
+
return this.metric;
|
|
197
|
+
}
|
|
160
198
|
getResults() {
|
|
161
|
-
|
|
162
|
-
|
|
199
|
+
ErrorUtil.assert(
|
|
200
|
+
this.results !== undefined,
|
|
201
|
+
`run() must be called before getResults()`
|
|
202
|
+
);
|
|
163
203
|
return this.results;
|
|
164
204
|
}
|
|
165
205
|
}
|
|
166
206
|
const MetricRegistry = Registry('metric', Metric);
|
|
167
207
|
|
|
168
208
|
export { Metric, MetricRegistry };
|
|
169
|
-
//# sourceMappingURL=Metric.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { Pool } from '../utils/Pool.mjs';
|
|
3
3
|
import { MetricRegistry, Metric } from './Metric.mjs';
|
|
4
4
|
|
|
@@ -41,4 +41,3 @@ class NeedlemanWunschDistance extends Metric {
|
|
|
41
41
|
MetricRegistry.add('needlemanWunsch', NeedlemanWunschDistance);
|
|
42
42
|
|
|
43
43
|
export { NeedlemanWunschDistance };
|
|
44
|
-
//# sourceMappingURL=NeedlemanWunsch.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { Pool } from '../utils/Pool.mjs';
|
|
3
3
|
import { MetricRegistry, Metric } from './Metric.mjs';
|
|
4
4
|
|
|
@@ -35,4 +35,3 @@ class QGramSimilarity extends Metric {
|
|
|
35
35
|
MetricRegistry.add('qGram', QGramSimilarity);
|
|
36
36
|
|
|
37
37
|
export { QGramSimilarity };
|
|
38
|
-
//# sourceMappingURL=QGram.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { Pool } from '../utils/Pool.mjs';
|
|
3
3
|
import { MetricRegistry, Metric } from './Metric.mjs';
|
|
4
4
|
|
|
@@ -42,4 +42,3 @@ class SmithWatermanDistance extends Metric {
|
|
|
42
42
|
MetricRegistry.add('smithWaterman', SmithWatermanDistance);
|
|
43
43
|
|
|
44
44
|
export { SmithWatermanDistance };
|
|
45
|
-
//# sourceMappingURL=SmithWaterman.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import {
|
|
3
3
|
PhoneticRegistry,
|
|
4
4
|
PhoneticMappingRegistry,
|
|
@@ -118,4 +118,3 @@ PhoneticMappingRegistry.add('caverphone', 'en2', {
|
|
|
118
118
|
});
|
|
119
119
|
|
|
120
120
|
export { Caverphone };
|
|
121
|
-
//# sourceMappingURL=Caverphone.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import {
|
|
3
3
|
PhoneticRegistry,
|
|
4
4
|
PhoneticMappingRegistry,
|
|
@@ -69,4 +69,3 @@ PhoneticMappingRegistry.add('cologne', 'default', {
|
|
|
69
69
|
});
|
|
70
70
|
|
|
71
71
|
export { Cologne };
|
|
72
|
-
//# sourceMappingURL=Cologne.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import {
|
|
3
3
|
PhoneticRegistry,
|
|
4
4
|
PhoneticMappingRegistry,
|
|
@@ -99,4 +99,3 @@ PhoneticMappingRegistry.add('metaphone', 'en90', {
|
|
|
99
99
|
});
|
|
100
100
|
|
|
101
101
|
export { Metaphone };
|
|
102
|
-
//# sourceMappingURL=Metaphone.mjs.map
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
2
|
-
import {
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { DeepMerge } from '../utils/DeepMerge.mjs';
|
|
3
|
+
import { CmpStrNotFoundError, ErrorUtil } from '../utils/Errors.mjs';
|
|
3
4
|
import { HashTable, Hasher } from '../utils/HashTable.mjs';
|
|
4
5
|
import { Profiler } from '../utils/Profiler.mjs';
|
|
5
6
|
import { Registry } from '../utils/Registry.mjs';
|
|
@@ -12,42 +13,59 @@ class Phonetic {
|
|
|
12
13
|
options;
|
|
13
14
|
optKey;
|
|
14
15
|
map;
|
|
15
|
-
|
|
16
|
+
ignoreSet;
|
|
17
|
+
static clear() {
|
|
18
|
+
this.cache.clear();
|
|
19
|
+
}
|
|
16
20
|
constructor(algo, opt = {}) {
|
|
17
21
|
const defaults = this.constructor.default ?? {};
|
|
18
22
|
const mapId = opt.map ?? defaults.map;
|
|
19
|
-
if (!mapId)
|
|
23
|
+
if (!mapId)
|
|
24
|
+
throw new CmpStrNotFoundError(
|
|
25
|
+
`No mapping specified for phonetic algorithm`,
|
|
26
|
+
{ algo }
|
|
27
|
+
);
|
|
20
28
|
const map = PhoneticMappingRegistry.get(algo, mapId);
|
|
21
29
|
if (map === undefined)
|
|
22
|
-
throw new
|
|
23
|
-
|
|
30
|
+
throw new CmpStrNotFoundError(
|
|
31
|
+
`Requested mapping <${mapId}> is not declared`,
|
|
32
|
+
{ algo, mapId }
|
|
33
|
+
);
|
|
34
|
+
this.options = DeepMerge.merge(
|
|
35
|
+
DeepMerge.merge(defaults, map.options ?? {}),
|
|
36
|
+
opt
|
|
37
|
+
);
|
|
24
38
|
this.optKey = Hasher.fastFNV1a(
|
|
25
39
|
JSON.stringify(this.options, Object.keys(this.options).sort())
|
|
26
40
|
).toString();
|
|
27
41
|
this.algo = algo;
|
|
28
42
|
this.map = map;
|
|
43
|
+
this.ignoreSet = new Set(map.ignore ?? []);
|
|
29
44
|
}
|
|
30
45
|
applyPattern(word) {
|
|
31
46
|
const { patterns = [] } = this.map;
|
|
32
|
-
if (!patterns
|
|
47
|
+
if (!patterns.length) return word;
|
|
33
48
|
for (const { pattern, replace, all = false } of patterns) {
|
|
34
|
-
word =
|
|
49
|
+
word = all
|
|
50
|
+
? word.replaceAll(pattern, replace)
|
|
51
|
+
: word.replace(pattern, replace);
|
|
35
52
|
}
|
|
36
53
|
return word;
|
|
37
54
|
}
|
|
38
55
|
applyRules(char, i, chars, charLen) {
|
|
39
56
|
const { ruleset = [] } = this.map;
|
|
40
|
-
if (!ruleset
|
|
57
|
+
if (!ruleset.length) return undefined;
|
|
41
58
|
const prev = chars[i - 1] || '',
|
|
42
59
|
prev2 = chars[i - 2] || '';
|
|
43
60
|
const next = chars[i + 1] || '',
|
|
44
61
|
next2 = chars[i + 2] || '';
|
|
62
|
+
const str = chars.join('');
|
|
45
63
|
for (const rule of ruleset) {
|
|
46
64
|
if (rule.char && rule.char !== char) continue;
|
|
47
65
|
if (rule.position === 'start' && i !== 0) continue;
|
|
48
66
|
if (rule.position === 'middle' && (i === 0 || i === charLen - 1))
|
|
49
67
|
continue;
|
|
50
|
-
if (rule.position === 'end' && i !== charLen) continue;
|
|
68
|
+
if (rule.position === 'end' && i !== charLen - 1) continue;
|
|
51
69
|
if (rule.prev && !rule.prev.includes(prev)) continue;
|
|
52
70
|
if (rule.prevNot && rule.prevNot.includes(prev)) continue;
|
|
53
71
|
if (rule.prev2 && !rule.prev2.includes(prev2)) continue;
|
|
@@ -58,12 +76,12 @@ class Phonetic {
|
|
|
58
76
|
if (rule.next2Not && rule.next2Not.includes(next2)) continue;
|
|
59
77
|
if (
|
|
60
78
|
rule.leading &&
|
|
61
|
-
!rule.leading.includes(
|
|
79
|
+
!rule.leading.includes(str.slice(0, rule.leading.length))
|
|
62
80
|
)
|
|
63
81
|
continue;
|
|
64
82
|
if (
|
|
65
83
|
rule.trailing &&
|
|
66
|
-
!rule.trailing.includes(
|
|
84
|
+
!rule.trailing.includes(str.slice(-rule.trailing.length))
|
|
67
85
|
)
|
|
68
86
|
continue;
|
|
69
87
|
if (rule.match && !rule.match.every((c, j) => chars[i + j] === c))
|
|
@@ -73,7 +91,7 @@ class Phonetic {
|
|
|
73
91
|
return undefined;
|
|
74
92
|
}
|
|
75
93
|
encode(word) {
|
|
76
|
-
const { map = {}
|
|
94
|
+
const { map = {} } = this.map;
|
|
77
95
|
word = this.applyPattern(word);
|
|
78
96
|
const chars = this.word2Chars(word);
|
|
79
97
|
const charLen = chars.length;
|
|
@@ -81,7 +99,7 @@ class Phonetic {
|
|
|
81
99
|
lastCode = null;
|
|
82
100
|
for (let i = 0; i < charLen; i++) {
|
|
83
101
|
const char = chars[i];
|
|
84
|
-
if (
|
|
102
|
+
if (this.ignoreSet.has(char)) continue;
|
|
85
103
|
const mapped = this.mapChar(char, i, chars, charLen, lastCode, map);
|
|
86
104
|
if (mapped === undefined) continue;
|
|
87
105
|
((code += mapped), (lastCode = mapped));
|
|
@@ -100,7 +118,9 @@ class Phonetic {
|
|
|
100
118
|
? input
|
|
101
119
|
: (input + pad.repeat(length)).slice(0, length);
|
|
102
120
|
}
|
|
103
|
-
word2Chars
|
|
121
|
+
word2Chars(word) {
|
|
122
|
+
return Array.from(word.toLowerCase());
|
|
123
|
+
}
|
|
104
124
|
exitEarly(code, i) {
|
|
105
125
|
const { length = -1 } = this.options;
|
|
106
126
|
return length > 0 && code.length >= length;
|
|
@@ -109,37 +129,52 @@ class Phonetic {
|
|
|
109
129
|
return code;
|
|
110
130
|
}
|
|
111
131
|
loop(words) {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
const
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
132
|
+
return ErrorUtil.wrap(
|
|
133
|
+
() => {
|
|
134
|
+
const index = [];
|
|
135
|
+
for (const word of words) {
|
|
136
|
+
let key = Phonetic.cache.key(this.algo, [word]);
|
|
137
|
+
if (key) key += this.optKey;
|
|
138
|
+
const code =
|
|
139
|
+
Phonetic.cache.get(key || '') ??
|
|
140
|
+
(() => {
|
|
141
|
+
const res = this.encode(word);
|
|
142
|
+
if (key) Phonetic.cache.set(key, res);
|
|
143
|
+
return res;
|
|
144
|
+
})();
|
|
145
|
+
if (code && code.length) index.push(this.equalLen(code));
|
|
146
|
+
}
|
|
147
|
+
return index;
|
|
148
|
+
},
|
|
149
|
+
`Failed to generate phonetic index`,
|
|
150
|
+
{ algo: this.algo, words }
|
|
151
|
+
);
|
|
125
152
|
}
|
|
126
153
|
async loopAsync(words) {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
154
|
+
return ErrorUtil.wrapAsync(
|
|
155
|
+
async () => {
|
|
156
|
+
const index = [];
|
|
157
|
+
for (const word of words) {
|
|
158
|
+
const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
|
|
159
|
+
const code = await Promise.resolve(
|
|
160
|
+
Phonetic.cache.get(key || '') ??
|
|
161
|
+
(() => {
|
|
162
|
+
const res = this.encode(word);
|
|
163
|
+
if (key) Phonetic.cache.set(key, res);
|
|
164
|
+
return res;
|
|
165
|
+
})()
|
|
166
|
+
);
|
|
167
|
+
if (code && code.length) index.push(this.equalLen(code));
|
|
168
|
+
}
|
|
169
|
+
return index;
|
|
170
|
+
},
|
|
171
|
+
`Failed to generate phonetic index asynchronously`,
|
|
172
|
+
{ algo: this.algo, words }
|
|
173
|
+
);
|
|
174
|
+
}
|
|
175
|
+
getAlgoName() {
|
|
176
|
+
return this.algo;
|
|
141
177
|
}
|
|
142
|
-
getAlgoName = () => this.algo;
|
|
143
178
|
getIndex(input) {
|
|
144
179
|
const { delimiter = ' ' } = this.options;
|
|
145
180
|
return profiler.run(() =>
|
|
@@ -162,10 +197,11 @@ const PhoneticMappingRegistry = (() => {
|
|
|
162
197
|
return Object.freeze({
|
|
163
198
|
add(algo, id, map, update = false) {
|
|
164
199
|
const mappings = maps(algo);
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
200
|
+
ErrorUtil.assert(
|
|
201
|
+
!(!id || id in mappings) || update,
|
|
202
|
+
`Entry <${id}> already exists / use <update=true> to overwrite`,
|
|
203
|
+
{ algo, id }
|
|
204
|
+
);
|
|
169
205
|
mappings[id] = map;
|
|
170
206
|
},
|
|
171
207
|
remove(algo, id) {
|
|
@@ -184,4 +220,3 @@ const PhoneticMappingRegistry = (() => {
|
|
|
184
220
|
})();
|
|
185
221
|
|
|
186
222
|
export { Phonetic, PhoneticMappingRegistry, PhoneticRegistry };
|
|
187
|
-
//# sourceMappingURL=Phonetic.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import {
|
|
3
3
|
PhoneticRegistry,
|
|
4
4
|
PhoneticMappingRegistry,
|
|
@@ -88,4 +88,3 @@ PhoneticMappingRegistry.add('soundex', 'de', {
|
|
|
88
88
|
});
|
|
89
89
|
|
|
90
90
|
export { Soundex };
|
|
91
|
-
//# sourceMappingURL=Soundex.mjs.map
|
package/dist/esm/root.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
export { CmpStr } from './CmpStr.mjs';
|
|
3
3
|
export { CmpStrAsync } from './CmpStrAsync.mjs';
|
|
4
4
|
export { DiffChecker } from './utils/DiffChecker.mjs';
|
|
@@ -25,11 +25,12 @@ export {
|
|
|
25
25
|
PhoneticMappingRegistry,
|
|
26
26
|
PhoneticRegistry
|
|
27
27
|
} from './phonetic/Phonetic.mjs';
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
export { DeepMerge } from './utils/DeepMerge.mjs';
|
|
29
|
+
import * as Errors from './utils/Errors.mjs';
|
|
30
|
+
export { Errors as CmpStrError };
|
|
30
31
|
export { Filter } from './utils/Filter.mjs';
|
|
31
32
|
export { HashTable, Hasher } from './utils/HashTable.mjs';
|
|
33
|
+
export { OptionsValidator } from './utils/OptionsValidator.mjs';
|
|
32
34
|
export { Pool } from './utils/Pool.mjs';
|
|
33
35
|
export { Profiler } from './utils/Profiler.mjs';
|
|
34
36
|
export { StructuredData } from './utils/StructuredData.mjs';
|
|
35
|
-
//# sourceMappingURL=root.mjs.map
|