cmpstr 3.2.1 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -18
- package/dist/CmpStr.esm.js +1904 -1211
- package/dist/CmpStr.esm.min.js +2 -3
- package/dist/CmpStr.umd.js +1924 -1236
- package/dist/CmpStr.umd.min.js +2 -3
- package/dist/cjs/CmpStr.cjs +134 -64
- package/dist/cjs/CmpStrAsync.cjs +60 -37
- package/dist/cjs/index.cjs +1 -2
- package/dist/cjs/metric/Cosine.cjs +1 -2
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -2
- package/dist/cjs/metric/DiceSorensen.cjs +1 -2
- package/dist/cjs/metric/Hamming.cjs +5 -4
- package/dist/cjs/metric/Jaccard.cjs +1 -2
- package/dist/cjs/metric/JaroWinkler.cjs +1 -2
- package/dist/cjs/metric/LCS.cjs +1 -2
- package/dist/cjs/metric/Levenshtein.cjs +1 -2
- package/dist/cjs/metric/Metric.cjs +90 -53
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -2
- package/dist/cjs/metric/QGram.cjs +1 -2
- package/dist/cjs/metric/SmithWaterman.cjs +1 -2
- package/dist/cjs/phonetic/Caverphone.cjs +1 -2
- package/dist/cjs/phonetic/Cologne.cjs +1 -2
- package/dist/cjs/phonetic/Metaphone.cjs +1 -2
- package/dist/cjs/phonetic/Phonetic.cjs +80 -48
- package/dist/cjs/phonetic/Soundex.cjs +1 -2
- package/dist/cjs/root.cjs +6 -3
- package/dist/cjs/utils/DeepMerge.cjs +109 -99
- package/dist/cjs/utils/DiffChecker.cjs +1 -2
- package/dist/cjs/utils/Errors.cjs +106 -0
- package/dist/cjs/utils/Filter.cjs +97 -37
- package/dist/cjs/utils/HashTable.cjs +44 -30
- package/dist/cjs/utils/Normalizer.cjs +84 -35
- package/dist/cjs/utils/OptionsValidator.cjs +211 -0
- package/dist/cjs/utils/Pool.cjs +57 -19
- package/dist/cjs/utils/Profiler.cjs +41 -28
- package/dist/cjs/utils/Registry.cjs +48 -24
- package/dist/cjs/utils/StructuredData.cjs +95 -57
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -2
- package/dist/esm/CmpStr.mjs +133 -61
- package/dist/esm/CmpStrAsync.mjs +56 -33
- package/dist/esm/index.mjs +1 -2
- package/dist/esm/metric/Cosine.mjs +1 -2
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -2
- package/dist/esm/metric/DiceSorensen.mjs +1 -2
- package/dist/esm/metric/Hamming.mjs +5 -4
- package/dist/esm/metric/Jaccard.mjs +1 -2
- package/dist/esm/metric/JaroWinkler.mjs +1 -2
- package/dist/esm/metric/LCS.mjs +1 -2
- package/dist/esm/metric/Levenshtein.mjs +1 -2
- package/dist/esm/metric/Metric.mjs +92 -53
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -2
- package/dist/esm/metric/QGram.mjs +1 -2
- package/dist/esm/metric/SmithWaterman.mjs +1 -2
- package/dist/esm/phonetic/Caverphone.mjs +1 -2
- package/dist/esm/phonetic/Cologne.mjs +1 -2
- package/dist/esm/phonetic/Metaphone.mjs +1 -2
- package/dist/esm/phonetic/Phonetic.mjs +83 -48
- package/dist/esm/phonetic/Soundex.mjs +1 -2
- package/dist/esm/root.mjs +5 -4
- package/dist/esm/utils/DeepMerge.mjs +109 -95
- package/dist/esm/utils/DiffChecker.mjs +1 -2
- package/dist/esm/utils/Errors.mjs +106 -0
- package/dist/esm/utils/Filter.mjs +97 -37
- package/dist/esm/utils/HashTable.mjs +44 -30
- package/dist/esm/utils/Normalizer.mjs +84 -35
- package/dist/esm/utils/OptionsValidator.mjs +210 -0
- package/dist/esm/utils/Pool.mjs +53 -19
- package/dist/esm/utils/Profiler.mjs +41 -28
- package/dist/esm/utils/Registry.mjs +48 -24
- package/dist/esm/utils/StructuredData.mjs +95 -57
- package/dist/esm/utils/TextAnalyzer.mjs +1 -2
- package/dist/types/CmpStr.d.ts +25 -14
- package/dist/types/CmpStrAsync.d.ts +4 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/metric/Metric.d.ts +15 -14
- package/dist/types/phonetic/Phonetic.d.ts +7 -4
- package/dist/types/root.d.ts +4 -2
- package/dist/types/utils/DeepMerge.d.ts +80 -58
- package/dist/types/utils/Errors.d.ts +154 -0
- package/dist/types/utils/Filter.d.ts +8 -1
- package/dist/types/utils/HashTable.d.ts +12 -11
- package/dist/types/utils/Normalizer.d.ts +5 -1
- package/dist/types/utils/OptionsValidator.d.ts +193 -0
- package/dist/types/utils/Pool.d.ts +2 -0
- package/dist/types/utils/Profiler.d.ts +9 -28
- package/dist/types/utils/Registry.d.ts +3 -3
- package/dist/types/utils/StructuredData.d.ts +6 -1
- package/dist/types/utils/Types.d.ts +39 -1
- package/package.json +20 -11
- package/dist/CmpStr.esm.js.map +0 -1
- package/dist/CmpStr.esm.min.js.map +0 -1
- package/dist/CmpStr.umd.js.map +0 -1
- package/dist/CmpStr.umd.min.js.map +0 -1
- package/dist/cjs/CmpStr.cjs.map +0 -1
- package/dist/cjs/CmpStrAsync.cjs.map +0 -1
- package/dist/cjs/index.cjs.map +0 -1
- package/dist/cjs/metric/Cosine.cjs.map +0 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +0 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +0 -1
- package/dist/cjs/metric/Hamming.cjs.map +0 -1
- package/dist/cjs/metric/Jaccard.cjs.map +0 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +0 -1
- package/dist/cjs/metric/LCS.cjs.map +0 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +0 -1
- package/dist/cjs/metric/Metric.cjs.map +0 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +0 -1
- package/dist/cjs/metric/QGram.cjs.map +0 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +0 -1
- package/dist/cjs/phonetic/Caverphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +0 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Phonetic.cjs.map +0 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +0 -1
- package/dist/cjs/root.cjs.map +0 -1
- package/dist/cjs/utils/DeepMerge.cjs.map +0 -1
- package/dist/cjs/utils/DiffChecker.cjs.map +0 -1
- package/dist/cjs/utils/Filter.cjs.map +0 -1
- package/dist/cjs/utils/HashTable.cjs.map +0 -1
- package/dist/cjs/utils/Normalizer.cjs.map +0 -1
- package/dist/cjs/utils/Pool.cjs.map +0 -1
- package/dist/cjs/utils/Profiler.cjs.map +0 -1
- package/dist/cjs/utils/Registry.cjs.map +0 -1
- package/dist/cjs/utils/StructuredData.cjs.map +0 -1
- package/dist/cjs/utils/TextAnalyzer.cjs.map +0 -1
- package/dist/esm/CmpStr.mjs.map +0 -1
- package/dist/esm/CmpStrAsync.mjs.map +0 -1
- package/dist/esm/index.mjs.map +0 -1
- package/dist/esm/metric/Cosine.mjs.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +0 -1
- package/dist/esm/metric/DiceSorensen.mjs.map +0 -1
- package/dist/esm/metric/Hamming.mjs.map +0 -1
- package/dist/esm/metric/Jaccard.mjs.map +0 -1
- package/dist/esm/metric/JaroWinkler.mjs.map +0 -1
- package/dist/esm/metric/LCS.mjs.map +0 -1
- package/dist/esm/metric/Levenshtein.mjs.map +0 -1
- package/dist/esm/metric/Metric.mjs.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +0 -1
- package/dist/esm/metric/QGram.mjs.map +0 -1
- package/dist/esm/metric/SmithWaterman.mjs.map +0 -1
- package/dist/esm/phonetic/Caverphone.mjs.map +0 -1
- package/dist/esm/phonetic/Cologne.mjs.map +0 -1
- package/dist/esm/phonetic/Metaphone.mjs.map +0 -1
- package/dist/esm/phonetic/Phonetic.mjs.map +0 -1
- package/dist/esm/phonetic/Soundex.mjs.map +0 -1
- package/dist/esm/root.mjs.map +0 -1
- package/dist/esm/utils/DeepMerge.mjs.map +0 -1
- package/dist/esm/utils/DiffChecker.mjs.map +0 -1
- package/dist/esm/utils/Filter.mjs.map +0 -1
- package/dist/esm/utils/HashTable.mjs.map +0 -1
- package/dist/esm/utils/Normalizer.mjs.map +0 -1
- package/dist/esm/utils/Pool.mjs.map +0 -1
- package/dist/esm/utils/Profiler.mjs.map +0 -1
- package/dist/esm/utils/Registry.mjs.map +0 -1
- package/dist/esm/utils/StructuredData.mjs.map +0 -1
- package/dist/esm/utils/TextAnalyzer.mjs.map +0 -1
|
@@ -1,49 +1,91 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
|
+
var Errors = require('./Errors.cjs');
|
|
5
|
+
|
|
4
6
|
class Filter {
|
|
7
|
+
static IDENTITY = (s) => s;
|
|
5
8
|
static filters = new Map();
|
|
6
9
|
static pipeline = new Map();
|
|
7
|
-
static getPipeline(hook) {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
10
|
+
static getPipeline(hook, force = false) {
|
|
11
|
+
return Errors.ErrorUtil.wrap(
|
|
12
|
+
() => {
|
|
13
|
+
if (!force) {
|
|
14
|
+
const cached = Filter.pipeline.get(hook);
|
|
15
|
+
if (cached) return cached;
|
|
16
|
+
}
|
|
17
|
+
const filter = Filter.filters.get(hook);
|
|
18
|
+
if (!filter) {
|
|
19
|
+
Filter.pipeline.set(hook, Filter.IDENTITY);
|
|
20
|
+
return Filter.IDENTITY;
|
|
21
|
+
}
|
|
22
|
+
const pipeline = [];
|
|
23
|
+
for (const f of filter.values()) if (f.active) pipeline.push(f);
|
|
24
|
+
pipeline.sort((a, b) => a.priority - b.priority);
|
|
25
|
+
const fn =
|
|
26
|
+
pipeline.length === 0
|
|
27
|
+
? Filter.IDENTITY
|
|
28
|
+
: (input) => {
|
|
29
|
+
let v = input;
|
|
30
|
+
for (let i = 0; i < pipeline.length; i++) v = pipeline[i].fn(v);
|
|
31
|
+
return v;
|
|
32
|
+
};
|
|
33
|
+
Filter.pipeline.set(hook, fn);
|
|
34
|
+
return fn;
|
|
35
|
+
},
|
|
36
|
+
`Error compiling filter pipeline for hook <${hook}>`,
|
|
37
|
+
{ hook }
|
|
38
|
+
);
|
|
19
39
|
}
|
|
20
40
|
static has(hook, id) {
|
|
21
41
|
return !!Filter.filters.get(hook)?.has(id);
|
|
22
42
|
}
|
|
23
43
|
static add(hook, id, fn, opt = {}) {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
44
|
+
return Errors.ErrorUtil.wrap(
|
|
45
|
+
() => {
|
|
46
|
+
const { priority = 10, active = true, overrideable = true } = opt;
|
|
47
|
+
const filter = Filter.filters.get(hook) ?? new Map();
|
|
48
|
+
const index = filter.get(id);
|
|
49
|
+
if (index && !index.overrideable) return false;
|
|
50
|
+
if (
|
|
51
|
+
index &&
|
|
52
|
+
index.fn === fn &&
|
|
53
|
+
index.priority === priority &&
|
|
54
|
+
index.active === active
|
|
55
|
+
)
|
|
56
|
+
return true;
|
|
57
|
+
filter.set(id, { id, fn, priority, active, overrideable });
|
|
58
|
+
Filter.filters.set(hook, filter);
|
|
59
|
+
Filter.getPipeline(hook, true);
|
|
60
|
+
return true;
|
|
61
|
+
},
|
|
62
|
+
`Error adding filter <${id}> to hook <${hook}>`,
|
|
63
|
+
{ hook, id, opt }
|
|
64
|
+
);
|
|
32
65
|
}
|
|
33
66
|
static remove(hook, id) {
|
|
34
|
-
Filter.pipeline.delete(hook);
|
|
35
67
|
const filter = Filter.filters.get(hook);
|
|
36
|
-
|
|
68
|
+
if (!filter || !filter.delete(id)) return false;
|
|
69
|
+
Filter.getPipeline(hook, true);
|
|
70
|
+
return true;
|
|
37
71
|
}
|
|
38
72
|
static pause(hook, id) {
|
|
39
|
-
Filter.
|
|
40
|
-
|
|
41
|
-
|
|
73
|
+
const filter = Filter.filters.get(hook);
|
|
74
|
+
if (!filter) return false;
|
|
75
|
+
const f = filter.get(id);
|
|
76
|
+
if (!f || !f.active) return false;
|
|
77
|
+
f.active = false;
|
|
78
|
+
Filter.getPipeline(hook, true);
|
|
79
|
+
return true;
|
|
42
80
|
}
|
|
43
81
|
static resume(hook, id) {
|
|
44
|
-
Filter.
|
|
45
|
-
|
|
46
|
-
|
|
82
|
+
const filter = Filter.filters.get(hook);
|
|
83
|
+
if (!filter) return false;
|
|
84
|
+
const f = filter.get(id);
|
|
85
|
+
if (!f || f.active) return false;
|
|
86
|
+
f.active = true;
|
|
87
|
+
Filter.getPipeline(hook, true);
|
|
88
|
+
return true;
|
|
47
89
|
}
|
|
48
90
|
static list(hook, active = false) {
|
|
49
91
|
const filter = Filter.filters.get(hook);
|
|
@@ -53,17 +95,36 @@ class Filter {
|
|
|
53
95
|
return out;
|
|
54
96
|
}
|
|
55
97
|
static apply(hook, input) {
|
|
56
|
-
|
|
57
|
-
|
|
98
|
+
return Errors.ErrorUtil.wrap(
|
|
99
|
+
() => {
|
|
100
|
+
const fn = Filter.getPipeline(hook);
|
|
101
|
+
if (typeof input === 'string') return fn(input);
|
|
102
|
+
const arr = input;
|
|
103
|
+
const out = new Array(arr.length);
|
|
104
|
+
for (let i = 0; i < arr.length; i++) out[i] = fn(arr[i]);
|
|
105
|
+
return out;
|
|
106
|
+
},
|
|
107
|
+
`Error applying filters for hook <${hook}>`,
|
|
108
|
+
{ hook, input }
|
|
109
|
+
);
|
|
58
110
|
}
|
|
59
111
|
static async applyAsync(hook, input) {
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
112
|
+
return Errors.ErrorUtil.wrapAsync(
|
|
113
|
+
async () => {
|
|
114
|
+
const fn = Filter.getPipeline(hook);
|
|
115
|
+
if (typeof input === 'string') return Promise.resolve(fn(input));
|
|
116
|
+
const arr = input;
|
|
117
|
+
const out = new Array(arr.length);
|
|
118
|
+
for (let i = 0; i < arr.length; i++)
|
|
119
|
+
out[i] = Promise.resolve(fn(arr[i]));
|
|
120
|
+
return Promise.all(out);
|
|
121
|
+
},
|
|
122
|
+
`Error applying filters for hook <${hook}>`,
|
|
123
|
+
{ hook, input }
|
|
124
|
+
);
|
|
64
125
|
}
|
|
65
126
|
static clear(hook) {
|
|
66
|
-
Filter.
|
|
127
|
+
Filter.clearPipeline();
|
|
67
128
|
if (hook) Filter.filters.delete(hook);
|
|
68
129
|
else Filter.filters.clear();
|
|
69
130
|
}
|
|
@@ -73,4 +134,3 @@ class Filter {
|
|
|
73
134
|
}
|
|
74
135
|
|
|
75
136
|
exports.Filter = Filter;
|
|
76
|
-
//# sourceMappingURL=Filter.cjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
class Hasher {
|
|
@@ -6,25 +6,21 @@ class Hasher {
|
|
|
6
6
|
static HASH_OFFSET = 0x811c9dc5;
|
|
7
7
|
static fastFNV1a(str) {
|
|
8
8
|
const len = str.length;
|
|
9
|
+
const limit = len & -4;
|
|
9
10
|
let hash = this.HASH_OFFSET;
|
|
10
|
-
|
|
11
|
-
for (
|
|
12
|
-
const pos = i * 4;
|
|
11
|
+
let i = 0;
|
|
12
|
+
for (; i < limit; i += 4) {
|
|
13
13
|
const chunk =
|
|
14
|
-
str.charCodeAt(
|
|
15
|
-
(str.charCodeAt(
|
|
16
|
-
(str.charCodeAt(
|
|
17
|
-
(str.charCodeAt(
|
|
14
|
+
str.charCodeAt(i) |
|
|
15
|
+
(str.charCodeAt(i + 1) << 8) |
|
|
16
|
+
(str.charCodeAt(i + 2) << 16) |
|
|
17
|
+
(str.charCodeAt(i + 3) << 24);
|
|
18
18
|
hash ^= chunk;
|
|
19
19
|
hash = Math.imul(hash, this.FNV_PRIME);
|
|
20
20
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
for (let i = 0; i < remaining; i++) {
|
|
25
|
-
hash ^= str.charCodeAt(pos + i);
|
|
26
|
-
hash = Math.imul(hash, this.FNV_PRIME);
|
|
27
|
-
}
|
|
21
|
+
for (; i < len; i++) {
|
|
22
|
+
hash ^= str.charCodeAt(i);
|
|
23
|
+
hash = Math.imul(hash, this.FNV_PRIME);
|
|
28
24
|
}
|
|
29
25
|
hash ^= hash >>> 16;
|
|
30
26
|
hash *= 0x85ebca6b;
|
|
@@ -35,34 +31,52 @@ class Hasher {
|
|
|
35
31
|
}
|
|
36
32
|
}
|
|
37
33
|
class HashTable {
|
|
38
|
-
|
|
34
|
+
FIFO;
|
|
35
|
+
maxSize;
|
|
39
36
|
static MAX_LEN = 2048;
|
|
40
|
-
static TABLE_SIZE = 10_000;
|
|
41
37
|
table = new Map();
|
|
42
|
-
constructor(
|
|
43
|
-
this.
|
|
38
|
+
constructor(FIFO = true, maxSize = 10000) {
|
|
39
|
+
this.FIFO = FIFO;
|
|
40
|
+
this.maxSize = maxSize;
|
|
44
41
|
}
|
|
45
42
|
key(label, strs, sorted = false) {
|
|
46
|
-
|
|
47
|
-
const hashes =
|
|
48
|
-
|
|
43
|
+
const n = strs.length;
|
|
44
|
+
const hashes = new Array(n);
|
|
45
|
+
for (let i = 0; i < n; i++) {
|
|
46
|
+
const s = strs[i];
|
|
47
|
+
if (s.length > HashTable.MAX_LEN) return false;
|
|
48
|
+
hashes[i] = Hasher.fastFNV1a(s);
|
|
49
|
+
}
|
|
50
|
+
if (sorted) hashes.sort((a, b) => a - b);
|
|
51
|
+
let key = label;
|
|
52
|
+
for (let i = 0; i < hashes.length; i++) key += '-' + hashes[i];
|
|
53
|
+
return key;
|
|
54
|
+
}
|
|
55
|
+
has(key) {
|
|
56
|
+
return this.table.has(key);
|
|
57
|
+
}
|
|
58
|
+
get(key) {
|
|
59
|
+
return this.table.get(key);
|
|
49
60
|
}
|
|
50
|
-
has = (key) => this.table.has(key);
|
|
51
|
-
get = (key) => this.table.get(key);
|
|
52
61
|
set(key, entry, update = true) {
|
|
53
62
|
if (!update && this.table.has(key)) return false;
|
|
54
|
-
|
|
55
|
-
if (!this.
|
|
63
|
+
if (!this.table.has(key) && this.table.size >= this.maxSize) {
|
|
64
|
+
if (!this.FIFO) return false;
|
|
56
65
|
this.table.delete(this.table.keys().next().value);
|
|
57
66
|
}
|
|
58
67
|
this.table.set(key, entry);
|
|
59
68
|
return true;
|
|
60
69
|
}
|
|
61
|
-
delete
|
|
62
|
-
|
|
63
|
-
|
|
70
|
+
delete(key) {
|
|
71
|
+
return this.table.delete(key);
|
|
72
|
+
}
|
|
73
|
+
clear() {
|
|
74
|
+
this.table.clear();
|
|
75
|
+
}
|
|
76
|
+
size() {
|
|
77
|
+
return this.table.size;
|
|
78
|
+
}
|
|
64
79
|
}
|
|
65
80
|
|
|
66
81
|
exports.HashTable = HashTable;
|
|
67
82
|
exports.Hasher = Hasher;
|
|
68
|
-
//# sourceMappingURL=HashTable.cjs.map
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
|
+
var Errors = require('./Errors.cjs');
|
|
4
5
|
var HashTable = require('./HashTable.cjs');
|
|
5
6
|
|
|
6
7
|
class Normalizer {
|
|
@@ -17,42 +18,91 @@ class Normalizer {
|
|
|
17
18
|
return Array.from(new Set(flags)).sort().join('');
|
|
18
19
|
}
|
|
19
20
|
static getPipeline(flags) {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
21
|
+
return Errors.ErrorUtil.wrap(
|
|
22
|
+
() => {
|
|
23
|
+
const cached = Normalizer.pipeline.get(flags);
|
|
24
|
+
if (cached) return cached;
|
|
25
|
+
const { REGEX } = Normalizer;
|
|
26
|
+
const steps = [];
|
|
27
|
+
for (let i = 0; i < flags.length; i++) {
|
|
28
|
+
switch (flags[i]) {
|
|
29
|
+
case 'd':
|
|
30
|
+
steps.push((s) => s.normalize('NFD'));
|
|
31
|
+
break;
|
|
32
|
+
case 'i':
|
|
33
|
+
steps.push((s) => s.toLowerCase());
|
|
34
|
+
break;
|
|
35
|
+
case 'k':
|
|
36
|
+
steps.push((s) => s.replace(REGEX.nonLetters, ''));
|
|
37
|
+
break;
|
|
38
|
+
case 'n':
|
|
39
|
+
steps.push((s) => s.replace(REGEX.nonNumbers, ''));
|
|
40
|
+
break;
|
|
41
|
+
case 'r':
|
|
42
|
+
steps.push((s) => s.replace(REGEX.doubleChars, '$1'));
|
|
43
|
+
break;
|
|
44
|
+
case 's':
|
|
45
|
+
steps.push((s) => s.replace(REGEX.specialChars, ''));
|
|
46
|
+
break;
|
|
47
|
+
case 't':
|
|
48
|
+
steps.push((s) => s.trim());
|
|
49
|
+
break;
|
|
50
|
+
case 'u':
|
|
51
|
+
steps.push((s) => s.normalize('NFC'));
|
|
52
|
+
break;
|
|
53
|
+
case 'w':
|
|
54
|
+
steps.push((s) => s.replace(REGEX.whitespace, ' '));
|
|
55
|
+
break;
|
|
56
|
+
case 'x':
|
|
57
|
+
steps.push((s) => s.normalize('NFKC'));
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
const fn = (input) => {
|
|
62
|
+
let v = input;
|
|
63
|
+
for (let i = 0; i < steps.length; i++) v = steps[i](v);
|
|
64
|
+
return v;
|
|
65
|
+
};
|
|
66
|
+
Normalizer.pipeline.set(flags, fn);
|
|
67
|
+
return fn;
|
|
68
|
+
},
|
|
69
|
+
`Failed to create normalization pipeline for flags: ${flags}`,
|
|
70
|
+
{ flags }
|
|
71
|
+
);
|
|
40
72
|
}
|
|
41
|
-
static normalize(input, flags) {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
73
|
+
static normalize(input, flags, normalizedFlags) {
|
|
74
|
+
return Errors.ErrorUtil.wrap(
|
|
75
|
+
() => {
|
|
76
|
+
if (!flags || typeof flags !== 'string' || !input) return input;
|
|
77
|
+
flags = normalizedFlags ?? this.canonicalFlags(flags);
|
|
78
|
+
const pipeline = Normalizer.getPipeline(flags);
|
|
79
|
+
const normalizeOne = (s) => {
|
|
80
|
+
const key = Normalizer.cache.key(flags, [s]);
|
|
81
|
+
if (key && Normalizer.cache.has(key))
|
|
82
|
+
return Normalizer.cache.get(key);
|
|
83
|
+
const res = pipeline(s);
|
|
84
|
+
if (key) Normalizer.cache.set(key, res);
|
|
85
|
+
return res;
|
|
86
|
+
};
|
|
87
|
+
return Array.isArray(input)
|
|
88
|
+
? input.map(normalizeOne)
|
|
89
|
+
: normalizeOne(input);
|
|
90
|
+
},
|
|
91
|
+
`Failed to normalize input with flags: ${flags}`,
|
|
92
|
+
{ input, flags }
|
|
93
|
+
);
|
|
51
94
|
}
|
|
52
95
|
static async normalizeAsync(input, flags) {
|
|
53
|
-
return await
|
|
54
|
-
|
|
55
|
-
|
|
96
|
+
return await Errors.ErrorUtil.wrapAsync(
|
|
97
|
+
async () => {
|
|
98
|
+
if (!flags || typeof flags !== 'string' || !input) return input;
|
|
99
|
+
return await (Array.isArray(input)
|
|
100
|
+
? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
|
|
101
|
+
: Promise.resolve(Normalizer.normalize(input, flags)));
|
|
102
|
+
},
|
|
103
|
+
`Failed to asynchronously normalize input with flags: ${flags}`,
|
|
104
|
+
{ input, flags }
|
|
105
|
+
);
|
|
56
106
|
}
|
|
57
107
|
static clear() {
|
|
58
108
|
Normalizer.pipeline.clear();
|
|
@@ -61,4 +111,3 @@ class Normalizer {
|
|
|
61
111
|
}
|
|
62
112
|
|
|
63
113
|
exports.Normalizer = Normalizer;
|
|
64
|
-
//# sourceMappingURL=Normalizer.cjs.map
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
var Errors = require('./Errors.cjs');
|
|
5
|
+
require('../metric/Cosine.cjs');
|
|
6
|
+
require('../metric/DamerauLevenshtein.cjs');
|
|
7
|
+
require('../metric/DiceSorensen.cjs');
|
|
8
|
+
require('../metric/Hamming.cjs');
|
|
9
|
+
require('../metric/Jaccard.cjs');
|
|
10
|
+
require('../metric/JaroWinkler.cjs');
|
|
11
|
+
require('../metric/LCS.cjs');
|
|
12
|
+
require('../metric/Levenshtein.cjs');
|
|
13
|
+
require('../metric/NeedlemanWunsch.cjs');
|
|
14
|
+
require('../metric/QGram.cjs');
|
|
15
|
+
require('../metric/SmithWaterman.cjs');
|
|
16
|
+
var Metric = require('../metric/Metric.cjs');
|
|
17
|
+
require('../phonetic/Caverphone.cjs');
|
|
18
|
+
require('../phonetic/Cologne.cjs');
|
|
19
|
+
require('../phonetic/Metaphone.cjs');
|
|
20
|
+
require('../phonetic/Soundex.cjs');
|
|
21
|
+
var Phonetic = require('../phonetic/Phonetic.cjs');
|
|
22
|
+
|
|
23
|
+
class OptionsValidator {
|
|
24
|
+
static ALLOWED_FLAGS = new Set([
|
|
25
|
+
'd',
|
|
26
|
+
'u',
|
|
27
|
+
'x',
|
|
28
|
+
'w',
|
|
29
|
+
't',
|
|
30
|
+
'r',
|
|
31
|
+
's',
|
|
32
|
+
'k',
|
|
33
|
+
'n',
|
|
34
|
+
'i'
|
|
35
|
+
]);
|
|
36
|
+
static ALLOWED_OUTPUT = new Set(['orig', 'prep']);
|
|
37
|
+
static ALLOWED_MODES = new Set(['default', 'batch', 'single', 'pairwise']);
|
|
38
|
+
static ALLOWED_SORT = new Set(['asc', 'desc']);
|
|
39
|
+
static PROCESSORS = {
|
|
40
|
+
phonetic: (opt) => {
|
|
41
|
+
if (!opt) return;
|
|
42
|
+
OptionsValidator.validatePhoneticName(opt.algo);
|
|
43
|
+
OptionsValidator.validatePhoneticOptions(opt.opt);
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
static METRIC_OPT_MAP = {
|
|
47
|
+
mode: (v) => OptionsValidator.validateMode(v),
|
|
48
|
+
delimiter: (v) => OptionsValidator.validateString(v, 'opt.delimiter'),
|
|
49
|
+
pad: (v) => OptionsValidator.validateString(v, 'opt.pad'),
|
|
50
|
+
q: (v) => OptionsValidator.validateNumber(v, 'opt.q'),
|
|
51
|
+
match: (v) => OptionsValidator.validateNumber(v, 'opt.match'),
|
|
52
|
+
mismatch: (v) => OptionsValidator.validateNumber(v, 'opt.mismatch'),
|
|
53
|
+
gap: (v) => OptionsValidator.validateNumber(v, 'opt.gap')
|
|
54
|
+
};
|
|
55
|
+
static PHONETIC_OPT_MAP = {
|
|
56
|
+
map: (v) =>
|
|
57
|
+
OptionsValidator.validateString(v, 'processors.phonetic.opt.map'),
|
|
58
|
+
delimiter: (v) =>
|
|
59
|
+
OptionsValidator.validateString(v, 'processors.phonetic.opt.delimiter'),
|
|
60
|
+
length: (v) =>
|
|
61
|
+
OptionsValidator.validateNumber(v, 'processors.phonetic.opt.length'),
|
|
62
|
+
pad: (v) =>
|
|
63
|
+
OptionsValidator.validateString(v, 'processors.phonetic.opt.pad'),
|
|
64
|
+
dedupe: (v) =>
|
|
65
|
+
OptionsValidator.validateBoolean(v, 'processors.phonetic.opt.dedupe'),
|
|
66
|
+
fallback: (v) =>
|
|
67
|
+
OptionsValidator.validateString(v, 'processors.phonetic.opt.fallback')
|
|
68
|
+
};
|
|
69
|
+
static CMPSTR_OPT_MAP = {
|
|
70
|
+
raw: (v) => OptionsValidator.validateBoolean(v, 'raw'),
|
|
71
|
+
removeZero: (v) => OptionsValidator.validateBoolean(v, 'removeZero'),
|
|
72
|
+
safeEmpty: (v) => OptionsValidator.validateBoolean(v, 'safeEmpty'),
|
|
73
|
+
flags: (v) => OptionsValidator.validateFlags(v),
|
|
74
|
+
metric: (v) => OptionsValidator.validateMetricName(v),
|
|
75
|
+
output: (v) => OptionsValidator.validateOutput(v),
|
|
76
|
+
opt: (v) => OptionsValidator.validateMetricOptions(v),
|
|
77
|
+
processors: (v) => OptionsValidator.validateProcessors(v),
|
|
78
|
+
sort: (v) => OptionsValidator.validateSort(v, 'sort'),
|
|
79
|
+
objectsOnly: (v) => OptionsValidator.validateBoolean(v, 'objectsOnly')
|
|
80
|
+
};
|
|
81
|
+
static set2string(set) {
|
|
82
|
+
return Array.from(set).join(' | ');
|
|
83
|
+
}
|
|
84
|
+
static validateType(value, name, type) {
|
|
85
|
+
if (value === undefined) return;
|
|
86
|
+
if (typeof value !== type || (type === 'number' && Number.isNaN(value))) {
|
|
87
|
+
throw new Errors.CmpStrValidationError(
|
|
88
|
+
`Invalid option <${name}>: expected ${type}`,
|
|
89
|
+
{ name, value }
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
static validateEnum(value, name, set) {
|
|
94
|
+
if (value === undefined) return;
|
|
95
|
+
if (typeof value !== 'string' || !set.has(value)) {
|
|
96
|
+
throw new Errors.CmpStrValidationError(
|
|
97
|
+
`Invalid option <${name}>: expected ${OptionsValidator.set2string(set)}`,
|
|
98
|
+
{ name, value }
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
static validateMap(opt, map) {
|
|
103
|
+
if (!opt) return;
|
|
104
|
+
for (const k in opt) {
|
|
105
|
+
const fn = map[k];
|
|
106
|
+
if (!fn)
|
|
107
|
+
throw new Errors.CmpStrValidationError(`Invalid option <${k}>`, {
|
|
108
|
+
option: k,
|
|
109
|
+
value: map[k]
|
|
110
|
+
});
|
|
111
|
+
fn(opt[k]);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
static validateRegistryName(value, name, label, has, list) {
|
|
115
|
+
if (value === undefined) return;
|
|
116
|
+
if (typeof value !== 'string' || value.length === 0)
|
|
117
|
+
throw new Errors.CmpStrValidationError(
|
|
118
|
+
`Invalid option <${name}>: expected non-empty string`,
|
|
119
|
+
{ name, value }
|
|
120
|
+
);
|
|
121
|
+
if (!has(value))
|
|
122
|
+
throw new Errors.CmpStrValidationError(
|
|
123
|
+
`${label} <${value}> is not registered`,
|
|
124
|
+
{ name, value, available: list() }
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
static validateBoolean(value, name) {
|
|
128
|
+
OptionsValidator.validateType(value, name, 'boolean');
|
|
129
|
+
}
|
|
130
|
+
static validateNumber(value, name) {
|
|
131
|
+
OptionsValidator.validateType(value, name, 'number');
|
|
132
|
+
}
|
|
133
|
+
static validateString(value, name) {
|
|
134
|
+
OptionsValidator.validateType(value, name, 'string');
|
|
135
|
+
}
|
|
136
|
+
static validateFlags(value) {
|
|
137
|
+
if (value === undefined) return;
|
|
138
|
+
if (typeof value !== 'string')
|
|
139
|
+
throw new Errors.CmpStrValidationError(
|
|
140
|
+
`Invalid option <flags>: expected string`,
|
|
141
|
+
{ flags: value }
|
|
142
|
+
);
|
|
143
|
+
for (let i = 0; i < value.length; i++) {
|
|
144
|
+
const ch = value[i];
|
|
145
|
+
if (!OptionsValidator.ALLOWED_FLAGS.has(ch))
|
|
146
|
+
throw new Errors.CmpStrValidationError(
|
|
147
|
+
`Invalid normalization flag <${ch}> in <flags>: expected ${OptionsValidator.set2string(OptionsValidator.ALLOWED_FLAGS)}`,
|
|
148
|
+
{ flags: value, invalid: ch }
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
static validateOutput(value) {
|
|
153
|
+
OptionsValidator.validateEnum(
|
|
154
|
+
value,
|
|
155
|
+
'output',
|
|
156
|
+
OptionsValidator.ALLOWED_OUTPUT
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
static validateMode(value) {
|
|
160
|
+
OptionsValidator.validateEnum(
|
|
161
|
+
value,
|
|
162
|
+
'mode',
|
|
163
|
+
OptionsValidator.ALLOWED_MODES
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
static validateSort(value, name) {
|
|
167
|
+
if (value === undefined || typeof value === 'boolean') return;
|
|
168
|
+
OptionsValidator.validateEnum(value, name, OptionsValidator.ALLOWED_SORT);
|
|
169
|
+
}
|
|
170
|
+
static validateMetricName(value) {
|
|
171
|
+
OptionsValidator.validateRegistryName(
|
|
172
|
+
value,
|
|
173
|
+
'metric',
|
|
174
|
+
'Comparison metric',
|
|
175
|
+
Metric.MetricRegistry.has,
|
|
176
|
+
Metric.MetricRegistry.list
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
static validatePhoneticName(value) {
|
|
180
|
+
OptionsValidator.validateRegistryName(
|
|
181
|
+
value,
|
|
182
|
+
'phonetic',
|
|
183
|
+
'Phonetic algorithm',
|
|
184
|
+
Phonetic.PhoneticRegistry.has,
|
|
185
|
+
Phonetic.PhoneticRegistry.list
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
static validateMetricOptions(opt) {
|
|
189
|
+
OptionsValidator.validateMap(opt, OptionsValidator.METRIC_OPT_MAP);
|
|
190
|
+
}
|
|
191
|
+
static validatePhoneticOptions(opt) {
|
|
192
|
+
OptionsValidator.validateMap(opt, OptionsValidator.PHONETIC_OPT_MAP);
|
|
193
|
+
}
|
|
194
|
+
static validateProcessors(opt) {
|
|
195
|
+
if (!opt) return;
|
|
196
|
+
for (const key in opt) {
|
|
197
|
+
const fn = OptionsValidator.PROCESSORS[key];
|
|
198
|
+
if (!fn)
|
|
199
|
+
throw new Errors.CmpStrValidationError(
|
|
200
|
+
`Invalid processor type <${key}> in <processors>: expected ${Object.keys(OptionsValidator.PROCESSORS).join(' | ')}`,
|
|
201
|
+
{ processors: opt, invalid: key }
|
|
202
|
+
);
|
|
203
|
+
fn(opt[key]);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
static validateOptions(opt) {
|
|
207
|
+
OptionsValidator.validateMap(opt, OptionsValidator.CMPSTR_OPT_MAP);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
exports.OptionsValidator = OptionsValidator;
|