cmpstr 3.1.1 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -40
- package/dist/CmpStr.esm.js +899 -886
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +912 -889
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +26 -44
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +10 -5
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +21 -21
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +31 -29
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +16 -13
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -2
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +16 -13
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +45 -43
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +21 -18
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +21 -18
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +18 -31
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +27 -24
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/{qGram.cjs → QGram.cjs} +18 -15
- package/dist/cjs/metric/QGram.cjs.map +1 -0
- package/dist/cjs/metric/SmithWaterman.cjs +27 -24
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +5 -8
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +6 -3
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +23 -16
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +3 -2
- package/dist/cjs/root.cjs.map +1 -1
- package/dist/cjs/utils/DeepMerge.cjs +73 -42
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +33 -45
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +40 -46
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +28 -37
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +32 -21
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +17 -22
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +40 -53
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +6 -4
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/StructuredData.cjs +23 -25
- package/dist/cjs/utils/StructuredData.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +76 -56
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +21 -44
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +5 -5
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +21 -21
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +31 -29
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +16 -13
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -2
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +16 -13
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +45 -43
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +21 -18
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +21 -18
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +19 -32
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +27 -24
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/QGram.mjs +38 -0
- package/dist/esm/metric/QGram.mjs.map +1 -0
- package/dist/esm/metric/SmithWaterman.mjs +27 -24
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +5 -8
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -1
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +6 -3
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +24 -17
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -1
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +3 -3
- package/dist/esm/utils/DeepMerge.mjs +73 -42
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +33 -45
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +40 -46
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +28 -38
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +32 -21
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +17 -22
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +40 -53
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +6 -4
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/StructuredData.mjs +23 -25
- package/dist/esm/utils/StructuredData.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +76 -56
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/CmpStr.d.ts +56 -55
- package/dist/types/CmpStrAsync.d.ts +45 -45
- package/dist/types/index.d.ts +2 -2
- package/dist/types/metric/Cosine.d.ts +7 -5
- package/dist/types/metric/DamerauLevenshtein.d.ts +4 -2
- package/dist/types/metric/DiceSorensen.d.ts +5 -3
- package/dist/types/metric/Hamming.d.ts +4 -2
- package/dist/types/metric/Jaccard.d.ts +3 -1
- package/dist/types/metric/JaroWinkler.d.ts +4 -2
- package/dist/types/metric/LCS.d.ts +3 -1
- package/dist/types/metric/Levenshtein.d.ts +4 -2
- package/dist/types/metric/Metric.d.ts +23 -21
- package/dist/types/metric/NeedlemanWunsch.d.ts +4 -2
- package/dist/types/metric/{qGram.d.ts → QGram.d.ts} +5 -3
- package/dist/types/metric/SmithWaterman.d.ts +4 -2
- package/dist/types/metric/index.d.ts +2 -2
- package/dist/types/phonetic/Caverphone.d.ts +5 -2
- package/dist/types/phonetic/Cologne.d.ts +1 -0
- package/dist/types/phonetic/Metaphone.d.ts +3 -0
- package/dist/types/phonetic/Phonetic.d.ts +14 -10
- package/dist/types/phonetic/Soundex.d.ts +1 -0
- package/dist/types/phonetic/index.d.ts +1 -1
- package/dist/types/root.d.ts +7 -7
- package/dist/types/utils/DeepMerge.d.ts +4 -3
- package/dist/types/utils/DiffChecker.d.ts +9 -7
- package/dist/types/utils/Filter.d.ts +22 -8
- package/dist/types/utils/HashTable.d.ts +44 -10
- package/dist/types/utils/Normalizer.d.ts +13 -4
- package/dist/types/utils/Pool.d.ts +5 -3
- package/dist/types/utils/Profiler.d.ts +30 -15
- package/dist/types/utils/Registry.d.ts +11 -8
- package/dist/types/utils/StructuredData.d.ts +38 -28
- package/dist/types/utils/TextAnalyzer.d.ts +38 -14
- package/dist/types/utils/Types.d.ts +97 -15
- package/package.json +7 -3
- package/dist/cjs/metric/qGram.cjs.map +0 -1
- package/dist/esm/metric/qGram.mjs +0 -35
- package/dist/esm/metric/qGram.mjs.map +0 -1
package/dist/CmpStr.esm.js
CHANGED
|
@@ -1,74 +1,105 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.
|
|
2
|
+
* CmpStr v3.2.0 build-6929b12-260122
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2026 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
6
6
|
*/
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
const BRACKET_PATTERN = /\[(\d+)]/g;
|
|
8
|
+
const PATH_CACHE = new Map();
|
|
9
|
+
function parse(p) {
|
|
10
|
+
let cached = PATH_CACHE.get(p);
|
|
11
|
+
if (cached) return cached;
|
|
12
|
+
const parsed = p
|
|
13
|
+
.replace(BRACKET_PATTERN, '.$1')
|
|
10
14
|
.split('.')
|
|
11
|
-
.map((s) =>
|
|
12
|
-
|
|
13
|
-
|
|
15
|
+
.map((s) => {
|
|
16
|
+
const n = Number(s);
|
|
17
|
+
return Number.isInteger(n) && String(n) === s ? n : s;
|
|
18
|
+
});
|
|
19
|
+
PATH_CACHE.set(p, parsed);
|
|
20
|
+
return parsed;
|
|
21
|
+
}
|
|
22
|
+
function get(t, path, fb) {
|
|
23
|
+
let o = t;
|
|
24
|
+
for (const k of parse(path)) {
|
|
25
|
+
if (o == null || !(k in o)) return fb;
|
|
26
|
+
o = o[k];
|
|
27
|
+
}
|
|
28
|
+
return o;
|
|
14
29
|
}
|
|
15
30
|
function has(t, path) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
31
|
+
let o = t;
|
|
32
|
+
for (const k of parse(path)) {
|
|
33
|
+
if (o == null || !(k in o)) return false;
|
|
34
|
+
o = o[k];
|
|
35
|
+
}
|
|
36
|
+
return true;
|
|
20
37
|
}
|
|
21
38
|
function set(t, path, value) {
|
|
22
39
|
if (path === '') return value;
|
|
23
|
-
const
|
|
40
|
+
const keys = parse(path);
|
|
24
41
|
if (t !== undefined && (typeof t !== 'object' || t === null))
|
|
25
|
-
throw Error(`Cannot set property <${
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
42
|
+
throw Error(`Cannot set property <${keys[0]}> of <${JSON.stringify(t)}>`);
|
|
43
|
+
const root = t ?? (typeof keys[0] === 'number' ? [] : Object.create(null));
|
|
44
|
+
let cur = root;
|
|
45
|
+
for (let i = 0; i < keys.length - 1; i++) {
|
|
46
|
+
const k = keys[i];
|
|
47
|
+
let n = cur[k];
|
|
48
|
+
if (n != null && typeof n !== 'object')
|
|
49
|
+
throw Error(
|
|
50
|
+
`Cannot set property <${keys[i + 1]}> of <${JSON.stringify(n)}>`
|
|
51
|
+
);
|
|
52
|
+
if (n == null)
|
|
53
|
+
n = cur[k] = typeof keys[i + 1] === 'number' ? [] : Object.create(null);
|
|
54
|
+
cur = n;
|
|
55
|
+
}
|
|
56
|
+
cur[keys[keys.length - 1]] = value;
|
|
57
|
+
return root;
|
|
30
58
|
}
|
|
31
59
|
function merge(
|
|
32
60
|
t = Object.create(null),
|
|
33
61
|
o = Object.create(null),
|
|
34
62
|
mergeUndefined = false
|
|
35
63
|
) {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
64
|
+
const target = t ?? Object.create(null);
|
|
65
|
+
Object.keys(o).forEach((k) => {
|
|
66
|
+
const val = o[k];
|
|
67
|
+
if (!mergeUndefined && val === undefined) return;
|
|
68
|
+
if (k === '__proto__' || k === 'constructor') return;
|
|
69
|
+
if (val !== null && typeof val === 'object' && !Array.isArray(val)) {
|
|
70
|
+
const existing = target[k];
|
|
71
|
+
target[k] = merge(
|
|
72
|
+
existing !== null &&
|
|
73
|
+
typeof existing === 'object' &&
|
|
74
|
+
!Array.isArray(existing)
|
|
75
|
+
? existing
|
|
76
|
+
: Object.create(null),
|
|
77
|
+
val,
|
|
78
|
+
mergeUndefined
|
|
79
|
+
);
|
|
80
|
+
} else target[k] = val;
|
|
81
|
+
});
|
|
82
|
+
return target;
|
|
53
83
|
}
|
|
54
84
|
function rmv(t, path, preserveEmpty = false) {
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if (
|
|
59
|
-
if (
|
|
85
|
+
const keys = parse(path);
|
|
86
|
+
const remove = (obj, i = 0) => {
|
|
87
|
+
const key = keys[i];
|
|
88
|
+
if (!obj || typeof obj !== 'object') return false;
|
|
89
|
+
if (i === keys.length - 1) return delete obj[key];
|
|
90
|
+
if (!remove(obj[key], i + 1)) return false;
|
|
60
91
|
if (!preserveEmpty) {
|
|
61
|
-
const val =
|
|
92
|
+
const val = obj[key];
|
|
62
93
|
if (
|
|
63
94
|
typeof val === 'object' &&
|
|
64
95
|
((Array.isArray(val) && val.every((v) => v == null)) ||
|
|
65
96
|
(!Array.isArray(val) && Object.keys(val).length === 0))
|
|
66
97
|
)
|
|
67
|
-
delete
|
|
98
|
+
delete obj[key];
|
|
68
99
|
}
|
|
69
100
|
return true;
|
|
70
101
|
};
|
|
71
|
-
|
|
102
|
+
remove(t);
|
|
72
103
|
return t;
|
|
73
104
|
}
|
|
74
105
|
|
|
@@ -81,272 +112,6 @@ var DeepMerge = /*#__PURE__*/ Object.freeze({
|
|
|
81
112
|
set: set
|
|
82
113
|
});
|
|
83
114
|
|
|
84
|
-
class Profiler {
|
|
85
|
-
static ENV;
|
|
86
|
-
static instance;
|
|
87
|
-
store = new Set();
|
|
88
|
-
totalTime = 0;
|
|
89
|
-
totalMem = 0;
|
|
90
|
-
active;
|
|
91
|
-
static detectEnv() {
|
|
92
|
-
if (typeof process !== 'undefined') Profiler.ENV = 'nodejs';
|
|
93
|
-
else if (typeof performance !== 'undefined') Profiler.ENV = 'browser';
|
|
94
|
-
else Profiler.ENV = 'unknown';
|
|
95
|
-
}
|
|
96
|
-
static getInstance(enable) {
|
|
97
|
-
if (!Profiler.ENV) Profiler.detectEnv();
|
|
98
|
-
if (!Profiler.instance) Profiler.instance = new Profiler(enable);
|
|
99
|
-
return Profiler.instance;
|
|
100
|
-
}
|
|
101
|
-
constructor(enable) {
|
|
102
|
-
this.active = enable ?? false;
|
|
103
|
-
}
|
|
104
|
-
now() {
|
|
105
|
-
switch (Profiler.ENV) {
|
|
106
|
-
case 'nodejs':
|
|
107
|
-
return Number(process.hrtime.bigint()) / 1e6;
|
|
108
|
-
case 'browser':
|
|
109
|
-
return performance.now();
|
|
110
|
-
default:
|
|
111
|
-
return Date.now();
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
mem() {
|
|
115
|
-
switch (Profiler.ENV) {
|
|
116
|
-
case 'nodejs':
|
|
117
|
-
return process.memoryUsage().heapUsed;
|
|
118
|
-
case 'browser':
|
|
119
|
-
return performance.memory?.usedJSHeapSize ?? 0;
|
|
120
|
-
default:
|
|
121
|
-
return 0;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
enable() {
|
|
125
|
-
this.active = true;
|
|
126
|
-
}
|
|
127
|
-
disable() {
|
|
128
|
-
this.active = false;
|
|
129
|
-
}
|
|
130
|
-
clear() {
|
|
131
|
-
this.store.clear();
|
|
132
|
-
this.totalTime = 0;
|
|
133
|
-
this.totalMem = 0;
|
|
134
|
-
}
|
|
135
|
-
run(fn, meta = {}) {
|
|
136
|
-
if (!this.active) return fn();
|
|
137
|
-
const startTime = this.now(),
|
|
138
|
-
startMem = this.mem();
|
|
139
|
-
const res = fn();
|
|
140
|
-
const deltaTime = this.now() - startTime;
|
|
141
|
-
const deltaMem = this.mem() - startMem;
|
|
142
|
-
this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
|
|
143
|
-
((this.totalTime += deltaTime), (this.totalMem += deltaMem));
|
|
144
|
-
return res;
|
|
145
|
-
}
|
|
146
|
-
async runAsync(fn, meta = {}) {
|
|
147
|
-
if (!this.active) return await fn();
|
|
148
|
-
const startTime = this.now(),
|
|
149
|
-
startMem = this.mem();
|
|
150
|
-
const res = await fn();
|
|
151
|
-
const deltaTime = this.now() - startTime;
|
|
152
|
-
const deltaMem = this.mem() - startMem;
|
|
153
|
-
this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
|
|
154
|
-
((this.totalTime += deltaTime), (this.totalMem += deltaMem));
|
|
155
|
-
return res;
|
|
156
|
-
}
|
|
157
|
-
getAll() {
|
|
158
|
-
return [...this.store];
|
|
159
|
-
}
|
|
160
|
-
getLast() {
|
|
161
|
-
return this.getAll().pop();
|
|
162
|
-
}
|
|
163
|
-
getTotal() {
|
|
164
|
-
return { time: this.totalTime, mem: this.totalMem };
|
|
165
|
-
}
|
|
166
|
-
services = {
|
|
167
|
-
enable: this.enable.bind(this),
|
|
168
|
-
disable: this.disable.bind(this),
|
|
169
|
-
clear: this.clear.bind(this),
|
|
170
|
-
report: this.getAll.bind(this),
|
|
171
|
-
last: this.getLast.bind(this),
|
|
172
|
-
total: this.getTotal.bind(this)
|
|
173
|
-
};
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
class TextAnalyzer {
|
|
177
|
-
text;
|
|
178
|
-
words = [];
|
|
179
|
-
sentences = [];
|
|
180
|
-
charFrequency = new Map();
|
|
181
|
-
wordHistogram = new Map();
|
|
182
|
-
syllableCache = new Map();
|
|
183
|
-
constructor(input) {
|
|
184
|
-
this.text = input.trim();
|
|
185
|
-
this.tokenize();
|
|
186
|
-
this.computeFrequencies();
|
|
187
|
-
}
|
|
188
|
-
tokenize() {
|
|
189
|
-
((this.words = []), (this.sentences = []));
|
|
190
|
-
const text = this.text;
|
|
191
|
-
const wordRegex = /\p{L}+/gu;
|
|
192
|
-
let match;
|
|
193
|
-
while ((match = wordRegex.exec(text)) !== null) {
|
|
194
|
-
this.words.push(match[0].toLowerCase());
|
|
195
|
-
}
|
|
196
|
-
this.sentences = text.split(/(?<=[.!?])\s+/).filter(Boolean);
|
|
197
|
-
}
|
|
198
|
-
computeFrequencies() {
|
|
199
|
-
for (const char of this.text)
|
|
200
|
-
this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
|
|
201
|
-
for (const word of this.words)
|
|
202
|
-
this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
|
|
203
|
-
}
|
|
204
|
-
estimateSyllables(word) {
|
|
205
|
-
if (this.syllableCache.has(word)) return this.syllableCache.get(word);
|
|
206
|
-
const clean = word.toLowerCase().replace(/[^a-zäöüß]/g, '');
|
|
207
|
-
const matches = clean.match(/[aeiouyäöü]+/g);
|
|
208
|
-
const count = matches ? matches.length : 1;
|
|
209
|
-
this.syllableCache.set(word, count);
|
|
210
|
-
return count;
|
|
211
|
-
}
|
|
212
|
-
getLength() {
|
|
213
|
-
return this.text.length;
|
|
214
|
-
}
|
|
215
|
-
getWordCount() {
|
|
216
|
-
return this.words.length;
|
|
217
|
-
}
|
|
218
|
-
getSentenceCount() {
|
|
219
|
-
return this.sentences.length;
|
|
220
|
-
}
|
|
221
|
-
getAvgWordLength() {
|
|
222
|
-
let totalLen = 0;
|
|
223
|
-
for (const w of this.words) totalLen += w.length;
|
|
224
|
-
return this.words.length ? totalLen / this.words.length : 0;
|
|
225
|
-
}
|
|
226
|
-
getAvgSentenceLength() {
|
|
227
|
-
return this.sentences.length
|
|
228
|
-
? this.words.length / this.sentences.length
|
|
229
|
-
: 0;
|
|
230
|
-
}
|
|
231
|
-
getWordHistogram() {
|
|
232
|
-
return Object.fromEntries(this.wordHistogram);
|
|
233
|
-
}
|
|
234
|
-
getMostCommonWords(limit = 5) {
|
|
235
|
-
return [...this.wordHistogram.entries()]
|
|
236
|
-
.sort((a, b) => b[1] - a[1])
|
|
237
|
-
.slice(0, limit)
|
|
238
|
-
.map((e) => e[0]);
|
|
239
|
-
}
|
|
240
|
-
getHapaxLegomena() {
|
|
241
|
-
return [...this.wordHistogram.entries()]
|
|
242
|
-
.filter(([, c]) => c === 1)
|
|
243
|
-
.map((e) => e[0]);
|
|
244
|
-
}
|
|
245
|
-
hasNumbers() {
|
|
246
|
-
return /\d/.test(this.text);
|
|
247
|
-
}
|
|
248
|
-
getUpperCaseRatio() {
|
|
249
|
-
let upper = 0,
|
|
250
|
-
letters = 0;
|
|
251
|
-
for (let i = 0, len = this.text.length; i < len; i++) {
|
|
252
|
-
const c = this.text[i];
|
|
253
|
-
if (/[A-Za-zÄÖÜäöüß]/.test(c)) {
|
|
254
|
-
letters++;
|
|
255
|
-
if (/[A-ZÄÖÜ]/.test(c)) upper++;
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
return letters ? upper / letters : 0;
|
|
259
|
-
}
|
|
260
|
-
getCharFrequency() {
|
|
261
|
-
return Object.fromEntries(this.charFrequency);
|
|
262
|
-
}
|
|
263
|
-
getUnicodeStats() {
|
|
264
|
-
const result = {};
|
|
265
|
-
for (const [char, count] of this.charFrequency) {
|
|
266
|
-
const block = char
|
|
267
|
-
.charCodeAt(0)
|
|
268
|
-
.toString(16)
|
|
269
|
-
.padStart(4, '0')
|
|
270
|
-
.toUpperCase();
|
|
271
|
-
result[block] = (result[block] ?? 0) + count;
|
|
272
|
-
}
|
|
273
|
-
return result;
|
|
274
|
-
}
|
|
275
|
-
getLongWordRatio(len = 7) {
|
|
276
|
-
let long = 0;
|
|
277
|
-
for (const w of this.words) if (w.length >= len) long++;
|
|
278
|
-
return this.words.length ? long / this.words.length : 0;
|
|
279
|
-
}
|
|
280
|
-
getShortWordRatio(len = 3) {
|
|
281
|
-
let short = 0;
|
|
282
|
-
for (const w of this.words) if (w.length <= len) short++;
|
|
283
|
-
return this.words.length ? short / this.words.length : 0;
|
|
284
|
-
}
|
|
285
|
-
getSyllablesCount() {
|
|
286
|
-
let count = 0;
|
|
287
|
-
for (const w of this.words) count += this.estimateSyllables(w);
|
|
288
|
-
return count;
|
|
289
|
-
}
|
|
290
|
-
getMonosyllabicWordCount() {
|
|
291
|
-
let count = 0;
|
|
292
|
-
for (const w of this.words) if (this.estimateSyllables(w) === 1) count++;
|
|
293
|
-
return count;
|
|
294
|
-
}
|
|
295
|
-
getMinSyllablesWordCount(min) {
|
|
296
|
-
let count = 0;
|
|
297
|
-
for (const w of this.words) if (this.estimateSyllables(w) >= min) count++;
|
|
298
|
-
return count;
|
|
299
|
-
}
|
|
300
|
-
getMaxSyllablesWordCount(max) {
|
|
301
|
-
let count = 0;
|
|
302
|
-
for (const w of this.words) if (this.estimateSyllables(w) <= max) count++;
|
|
303
|
-
return count;
|
|
304
|
-
}
|
|
305
|
-
getHonoresR() {
|
|
306
|
-
return (
|
|
307
|
-
(100 * Math.log(this.words.length)) /
|
|
308
|
-
(1 - this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1))
|
|
309
|
-
);
|
|
310
|
-
}
|
|
311
|
-
getReadingTime(wpm = 200) {
|
|
312
|
-
return Math.max(1, this.words.length / (wpm ?? 1));
|
|
313
|
-
}
|
|
314
|
-
getReadabilityScore(metric = 'flesch') {
|
|
315
|
-
const w = this.words.length || 1;
|
|
316
|
-
const s = this.sentences.length || 1;
|
|
317
|
-
const y = this.getSyllablesCount() || 1;
|
|
318
|
-
const asl = w / s;
|
|
319
|
-
const asw = y / w;
|
|
320
|
-
switch (metric) {
|
|
321
|
-
case 'flesch':
|
|
322
|
-
return 206.835 - 1.015 * asl - 84.6 * asw;
|
|
323
|
-
case 'fleschde':
|
|
324
|
-
return 180 - asl - 58.5 * asw;
|
|
325
|
-
case 'kincaid':
|
|
326
|
-
return 0.39 * asl + 11.8 * asw - 15.59;
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
getLIXScore() {
|
|
330
|
-
const w = this.words.length || 1;
|
|
331
|
-
const s = this.sentences.length || 1;
|
|
332
|
-
const l = this.getLongWordRatio() * w;
|
|
333
|
-
return w / s + (l / w) * 100;
|
|
334
|
-
}
|
|
335
|
-
getWSTFScore() {
|
|
336
|
-
const w = this.words.length || 1;
|
|
337
|
-
const h = (this.getMinSyllablesWordCount(3) / w) * 100;
|
|
338
|
-
const s = this.getAvgSentenceLength();
|
|
339
|
-
const l = this.getLongWordRatio() * 100;
|
|
340
|
-
const m = (this.getMonosyllabicWordCount() / w) * 100;
|
|
341
|
-
return [
|
|
342
|
-
0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.875,
|
|
343
|
-
0.2007 * h + 0.1682 * s + 0.1373 * l - 2.779,
|
|
344
|
-
0.2963 * h + 0.1905 * s - 1.1144,
|
|
345
|
-
0.2744 * h + 0.2656 * s - 1.693
|
|
346
|
-
];
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
|
|
350
115
|
class DiffChecker {
|
|
351
116
|
a;
|
|
352
117
|
b;
|
|
@@ -377,8 +142,7 @@ class DiffChecker {
|
|
|
377
142
|
return { linesA, linesB, maxLen: Math.max(linesA.length, linesB.length) };
|
|
378
143
|
}
|
|
379
144
|
tokenize(input) {
|
|
380
|
-
|
|
381
|
-
switch (mode) {
|
|
145
|
+
switch (this.options.mode) {
|
|
382
146
|
case 'line':
|
|
383
147
|
return [input];
|
|
384
148
|
case 'word':
|
|
@@ -386,20 +150,15 @@ class DiffChecker {
|
|
|
386
150
|
}
|
|
387
151
|
}
|
|
388
152
|
concat(input) {
|
|
389
|
-
|
|
390
|
-
return input.join(mode === 'word' ? ' ' : '');
|
|
153
|
+
return input.join(this.options.mode === 'word' ? ' ' : '');
|
|
391
154
|
}
|
|
392
155
|
computeDiff() {
|
|
393
|
-
if (
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
}
|
|
400
|
-
this.findGroups();
|
|
401
|
-
this.diffRun = true;
|
|
402
|
-
}
|
|
156
|
+
if (this.diffRun) return;
|
|
157
|
+
const { linesA, linesB, maxLen } = this.text2lines();
|
|
158
|
+
for (let i = 0; i < maxLen; i++)
|
|
159
|
+
this.lineDiff(linesA[i] || '', linesB[i] || '', i);
|
|
160
|
+
this.findGroups();
|
|
161
|
+
this.diffRun = true;
|
|
403
162
|
}
|
|
404
163
|
lineDiff(a, b, line) {
|
|
405
164
|
const { mode, caseInsensitive } = this.options;
|
|
@@ -410,24 +169,27 @@ class DiffChecker {
|
|
|
410
169
|
let diffs = [];
|
|
411
170
|
let delSize = 0,
|
|
412
171
|
insSize = 0;
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
172
|
+
switch (mode) {
|
|
173
|
+
case 'line':
|
|
174
|
+
if (A !== B) {
|
|
175
|
+
diffs.push({
|
|
176
|
+
posA: 0,
|
|
177
|
+
posB: 0,
|
|
178
|
+
del: a,
|
|
179
|
+
ins: b,
|
|
180
|
+
size: b.length - a.length
|
|
181
|
+
});
|
|
182
|
+
delSize = a.length;
|
|
183
|
+
insSize = b.length;
|
|
184
|
+
}
|
|
185
|
+
break;
|
|
186
|
+
case 'word':
|
|
187
|
+
diffs = this.preciseDiff(a, A, b, B);
|
|
188
|
+
for (const d of diffs)
|
|
189
|
+
((delSize += d.del.length), (insSize += d.ins.length));
|
|
190
|
+
break;
|
|
429
191
|
}
|
|
430
|
-
if (diffs.length)
|
|
192
|
+
if (diffs.length)
|
|
431
193
|
this.entries.push({
|
|
432
194
|
line,
|
|
433
195
|
diffs,
|
|
@@ -437,7 +199,6 @@ class DiffChecker {
|
|
|
437
199
|
totalSize: insSize - delSize,
|
|
438
200
|
magnitude: this.magnitude(delSize, insSize, baseLen)
|
|
439
201
|
});
|
|
440
|
-
}
|
|
441
202
|
}
|
|
442
203
|
preciseDiff(a, A, b, B) {
|
|
443
204
|
const posIndex = (t) =>
|
|
@@ -640,43 +401,102 @@ class DiffChecker {
|
|
|
640
401
|
}
|
|
641
402
|
return out.join(lineBreak);
|
|
642
403
|
}
|
|
643
|
-
getStructuredDiff()
|
|
644
|
-
|
|
404
|
+
getStructuredDiff = () => this.entries;
|
|
405
|
+
getGroupedDiff = () => this.grouped;
|
|
406
|
+
getASCIIDiff = () => this.output(false);
|
|
407
|
+
getCLIDiff = () => this.output(true);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
class Filter {
|
|
411
|
+
static filters = new Map();
|
|
412
|
+
static pipeline = new Map();
|
|
413
|
+
static getPipeline(hook) {
|
|
414
|
+
const cached = Filter.pipeline.get(hook);
|
|
415
|
+
if (cached) return cached;
|
|
416
|
+
const filter = Filter.filters.get(hook);
|
|
417
|
+
if (!filter) return (s) => s;
|
|
418
|
+
const pipeline = Array.from(filter.values())
|
|
419
|
+
.filter((f) => f.active)
|
|
420
|
+
.sort((a, b) => a.priority - b.priority)
|
|
421
|
+
.map((f) => f.fn);
|
|
422
|
+
const fn = (input) => pipeline.reduce((v, f) => f(v), input);
|
|
423
|
+
Filter.pipeline.set(hook, fn);
|
|
424
|
+
return fn;
|
|
425
|
+
}
|
|
426
|
+
static has(hook, id) {
|
|
427
|
+
return !!Filter.filters.get(hook)?.has(id);
|
|
428
|
+
}
|
|
429
|
+
static add(hook, id, fn, opt = {}) {
|
|
430
|
+
const { priority = 10, active = true, overrideable = true } = opt;
|
|
431
|
+
const filter = Filter.filters.get(hook) ?? new Map();
|
|
432
|
+
const index = filter.get(id);
|
|
433
|
+
if (index && !index.overrideable) return false;
|
|
434
|
+
filter.set(id, { id, fn, priority, active, overrideable });
|
|
435
|
+
Filter.filters.set(hook, filter);
|
|
436
|
+
Filter.pipeline.delete(hook);
|
|
437
|
+
return true;
|
|
645
438
|
}
|
|
646
|
-
|
|
647
|
-
|
|
439
|
+
static remove(hook, id) {
|
|
440
|
+
Filter.pipeline.delete(hook);
|
|
441
|
+
const filter = Filter.filters.get(hook);
|
|
442
|
+
return filter ? filter.delete(id) : false;
|
|
648
443
|
}
|
|
649
|
-
|
|
650
|
-
|
|
444
|
+
static pause(hook, id) {
|
|
445
|
+
Filter.pipeline.delete(hook);
|
|
446
|
+
const f = Filter.filters.get(hook)?.get(id);
|
|
447
|
+
return !!(f && ((f.active = false), true));
|
|
651
448
|
}
|
|
652
|
-
|
|
653
|
-
|
|
449
|
+
static resume(hook, id) {
|
|
450
|
+
Filter.pipeline.delete(hook);
|
|
451
|
+
const f = Filter.filters.get(hook)?.get(id);
|
|
452
|
+
return !!(f && ((f.active = true), true));
|
|
453
|
+
}
|
|
454
|
+
static list(hook, active = false) {
|
|
455
|
+
const filter = Filter.filters.get(hook);
|
|
456
|
+
if (!filter) return [];
|
|
457
|
+
const out = [];
|
|
458
|
+
for (const f of filter.values()) if (!active || f.active) out.push(f.id);
|
|
459
|
+
return out;
|
|
460
|
+
}
|
|
461
|
+
static apply(hook, input) {
|
|
462
|
+
const fn = Filter.getPipeline(hook);
|
|
463
|
+
return Array.isArray(input) ? input.map(fn) : fn(input);
|
|
464
|
+
}
|
|
465
|
+
static async applyAsync(hook, input) {
|
|
466
|
+
const fn = Filter.getPipeline(hook);
|
|
467
|
+
return Array.isArray(input)
|
|
468
|
+
? Promise.all(input.map(fn))
|
|
469
|
+
: Promise.resolve(fn(input));
|
|
470
|
+
}
|
|
471
|
+
static clear(hook) {
|
|
472
|
+
if (hook) Filter.filters.delete(hook);
|
|
473
|
+
else Filter.filters.clear();
|
|
654
474
|
}
|
|
655
475
|
}
|
|
656
476
|
|
|
657
477
|
class Hasher {
|
|
658
478
|
static FNV_PRIME = 0x01000193;
|
|
659
479
|
static HASH_OFFSET = 0x811c9dc5;
|
|
660
|
-
static
|
|
480
|
+
static fastFNV1a(str) {
|
|
661
481
|
const len = str.length;
|
|
662
482
|
let hash = this.HASH_OFFSET;
|
|
663
483
|
const chunks = Math.floor(len / 4);
|
|
664
484
|
for (let i = 0; i < chunks; i++) {
|
|
665
485
|
const pos = i * 4;
|
|
666
|
-
const
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
486
|
+
const chunk =
|
|
487
|
+
str.charCodeAt(pos) |
|
|
488
|
+
(str.charCodeAt(pos + 1) << 8) |
|
|
489
|
+
(str.charCodeAt(pos + 2) << 16) |
|
|
490
|
+
(str.charCodeAt(pos + 3) << 24);
|
|
671
491
|
hash ^= chunk;
|
|
672
|
-
hash
|
|
492
|
+
hash = Math.imul(hash, this.FNV_PRIME);
|
|
673
493
|
}
|
|
674
494
|
const remaining = len % 4;
|
|
675
495
|
if (remaining > 0) {
|
|
676
496
|
const pos = chunks * 4;
|
|
677
497
|
for (let i = 0; i < remaining; i++) {
|
|
678
498
|
hash ^= str.charCodeAt(pos + i);
|
|
679
|
-
hash
|
|
499
|
+
hash = Math.imul(hash, this.FNV_PRIME);
|
|
680
500
|
}
|
|
681
501
|
}
|
|
682
502
|
hash ^= hash >>> 16;
|
|
@@ -688,73 +508,74 @@ class Hasher {
|
|
|
688
508
|
}
|
|
689
509
|
}
|
|
690
510
|
class HashTable {
|
|
511
|
+
LRU;
|
|
691
512
|
static MAX_LEN = 2048;
|
|
692
513
|
static TABLE_SIZE = 10_000;
|
|
693
514
|
table = new Map();
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
if (str.length > HashTable.MAX_LEN) return false;
|
|
697
|
-
}
|
|
698
|
-
const hashes = strs.map((s) => Hasher.fnv1a(s));
|
|
699
|
-
if (sorted) hashes.sort();
|
|
700
|
-
return [label, ...hashes].join('-');
|
|
515
|
+
constructor(LRU = true) {
|
|
516
|
+
this.LRU = LRU;
|
|
701
517
|
}
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
return this.table.get(key);
|
|
518
|
+
key(label, strs, sorted = false) {
|
|
519
|
+
for (const str of strs) if (str.length > HashTable.MAX_LEN) return false;
|
|
520
|
+
const hashes = strs.map((s) => Hasher.fastFNV1a(s));
|
|
521
|
+
return [label, ...(sorted ? hashes.sort() : hashes)].join('-');
|
|
707
522
|
}
|
|
523
|
+
has = (key) => this.table.has(key);
|
|
524
|
+
get = (key) => this.table.get(key);
|
|
708
525
|
set(key, entry, update = true) {
|
|
709
|
-
if (
|
|
710
|
-
|
|
711
|
-
(
|
|
712
|
-
|
|
713
|
-
this.table.set(key, entry);
|
|
714
|
-
return true;
|
|
526
|
+
if (!update && this.table.has(key)) return false;
|
|
527
|
+
while (!this.table.has(key) && this.table.size >= HashTable.TABLE_SIZE) {
|
|
528
|
+
if (!this.LRU) return false;
|
|
529
|
+
this.table.delete(this.table.keys().next().value);
|
|
715
530
|
}
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
delete(key) {
|
|
719
|
-
this.table.delete(key);
|
|
720
|
-
}
|
|
721
|
-
clear() {
|
|
722
|
-
this.table.clear();
|
|
723
|
-
}
|
|
724
|
-
size() {
|
|
725
|
-
return this.table.size;
|
|
531
|
+
this.table.set(key, entry);
|
|
532
|
+
return true;
|
|
726
533
|
}
|
|
534
|
+
delete = (key) => this.table.delete(key);
|
|
535
|
+
clear = () => this.table.clear();
|
|
536
|
+
size = () => this.table.size;
|
|
727
537
|
}
|
|
728
538
|
|
|
729
539
|
class Normalizer {
|
|
730
540
|
static pipeline = new Map();
|
|
731
541
|
static cache = new HashTable();
|
|
542
|
+
static REGEX = {
|
|
543
|
+
whitespace: /\s+/g,
|
|
544
|
+
doubleChars: /(.)\1+/g,
|
|
545
|
+
specialChars: /[^\p{L}\p{N}\s]/gu,
|
|
546
|
+
nonLetters: /[^\p{L}]/gu,
|
|
547
|
+
nonNumbers: /\p{N}/gu
|
|
548
|
+
};
|
|
549
|
+
static canonicalFlags(flags) {
|
|
550
|
+
return Array.from(new Set(flags)).sort().join('');
|
|
551
|
+
}
|
|
732
552
|
static getPipeline(flags) {
|
|
733
553
|
if (Normalizer.pipeline.has(flags)) return Normalizer.pipeline.get(flags);
|
|
734
|
-
const
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
Normalizer.pipeline.set(flags,
|
|
752
|
-
return
|
|
554
|
+
const { REGEX } = Normalizer;
|
|
555
|
+
const steps = [
|
|
556
|
+
['d', (s) => s.normalize('NFD')],
|
|
557
|
+
['i', (s) => s.toLowerCase()],
|
|
558
|
+
['k', (s) => s.replace(REGEX.nonLetters, '')],
|
|
559
|
+
['n', (s) => s.replace(REGEX.nonNumbers, '')],
|
|
560
|
+
['r', (s) => s.replace(REGEX.doubleChars, '$1')],
|
|
561
|
+
['s', (s) => s.replace(REGEX.specialChars, '')],
|
|
562
|
+
['t', (s) => s.trim()],
|
|
563
|
+
['u', (s) => s.normalize('NFC')],
|
|
564
|
+
['w', (s) => s.replace(REGEX.whitespace, ' ')],
|
|
565
|
+
['x', (s) => s.normalize('NFKC')]
|
|
566
|
+
];
|
|
567
|
+
const pipeline = steps
|
|
568
|
+
.filter(([f]) => flags.includes(f))
|
|
569
|
+
.map(([, fn]) => fn);
|
|
570
|
+
const fn = (s) => pipeline.reduce((v, f) => f(v), s);
|
|
571
|
+
Normalizer.pipeline.set(flags, fn);
|
|
572
|
+
return fn;
|
|
753
573
|
}
|
|
754
574
|
static normalize(input, flags) {
|
|
575
|
+
if (!flags || typeof flags !== 'string' || !input) return input;
|
|
576
|
+
flags = this.canonicalFlags(flags);
|
|
755
577
|
if (Array.isArray(input))
|
|
756
578
|
return input.map((s) => Normalizer.normalize(s, flags));
|
|
757
|
-
if (!flags || typeof flags !== 'string' || !input) return input;
|
|
758
579
|
const key = Normalizer.cache.key(flags, [input]);
|
|
759
580
|
if (key && Normalizer.cache.has(key)) return Normalizer.cache.get(key);
|
|
760
581
|
const res = Normalizer.getPipeline(flags)(input);
|
|
@@ -772,76 +593,135 @@ class Normalizer {
|
|
|
772
593
|
}
|
|
773
594
|
}
|
|
774
595
|
|
|
775
|
-
class
|
|
776
|
-
|
|
777
|
-
static
|
|
778
|
-
|
|
596
|
+
class Profiler {
|
|
597
|
+
active;
|
|
598
|
+
static ENV;
|
|
599
|
+
static instance;
|
|
600
|
+
nowFn;
|
|
601
|
+
memFn;
|
|
602
|
+
store = new Set();
|
|
603
|
+
totalTime = 0;
|
|
604
|
+
totalMem = 0;
|
|
605
|
+
static detectEnv() {
|
|
606
|
+
if (typeof process !== 'undefined') Profiler.ENV = 'nodejs';
|
|
607
|
+
else if (typeof performance !== 'undefined') Profiler.ENV = 'browser';
|
|
608
|
+
else Profiler.ENV = 'unknown';
|
|
779
609
|
}
|
|
780
|
-
static
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
const index = filter.findIndex((f) => f.id === id);
|
|
784
|
-
if (index >= 0) {
|
|
785
|
-
const f = filter[index];
|
|
786
|
-
if (!f.overrideable) return false;
|
|
787
|
-
filter.splice(index, 1);
|
|
788
|
-
}
|
|
789
|
-
filter.push({ id, fn, priority, active, overrideable });
|
|
790
|
-
filter.sort((a, b) => a.priority - b.priority);
|
|
791
|
-
Filter.filters.set(hook, filter);
|
|
792
|
-
return true;
|
|
610
|
+
static getInstance(enable) {
|
|
611
|
+
if (!Profiler.ENV) Profiler.detectEnv();
|
|
612
|
+
return (Profiler.instance ||= new Profiler(enable));
|
|
793
613
|
}
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
614
|
+
constructor(active = false) {
|
|
615
|
+
this.active = active;
|
|
616
|
+
switch (Profiler.ENV) {
|
|
617
|
+
case 'nodejs':
|
|
618
|
+
this.nowFn = () => Number(process.hrtime.bigint()) / 1e6;
|
|
619
|
+
this.memFn = () => process.memoryUsage().heapUsed;
|
|
620
|
+
break;
|
|
621
|
+
case 'browser':
|
|
622
|
+
this.nowFn = () => performance.now();
|
|
623
|
+
this.memFn = () => performance.memory?.usedJSHeapSize ?? 0;
|
|
624
|
+
break;
|
|
625
|
+
default:
|
|
626
|
+
this.nowFn = () => Date.now();
|
|
627
|
+
this.memFn = () => 0;
|
|
628
|
+
break;
|
|
801
629
|
}
|
|
802
|
-
return false;
|
|
803
|
-
}
|
|
804
|
-
static pause(hook, id) {
|
|
805
|
-
const f = Filter.find(hook, id);
|
|
806
|
-
if (!f) return false;
|
|
807
|
-
f.active = false;
|
|
808
|
-
return true;
|
|
809
|
-
}
|
|
810
|
-
static resume(hook, id) {
|
|
811
|
-
const f = Filter.find(hook, id);
|
|
812
|
-
if (!f) return false;
|
|
813
|
-
f.active = true;
|
|
814
|
-
return true;
|
|
815
630
|
}
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
631
|
+
now = () => this.nowFn();
|
|
632
|
+
mem = () => this.memFn();
|
|
633
|
+
profile(fn, meta) {
|
|
634
|
+
const startTime = this.now(),
|
|
635
|
+
startMem = this.mem();
|
|
636
|
+
const res = fn();
|
|
637
|
+
const deltaTime = this.now() - startTime,
|
|
638
|
+
deltaMem = this.mem() - startMem;
|
|
639
|
+
this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
|
|
640
|
+
((this.totalTime += deltaTime), (this.totalMem += deltaMem));
|
|
641
|
+
return res;
|
|
821
642
|
}
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
643
|
+
enable = () => {
|
|
644
|
+
this.active = true;
|
|
645
|
+
};
|
|
646
|
+
disable = () => {
|
|
647
|
+
this.active = false;
|
|
648
|
+
};
|
|
649
|
+
clear() {
|
|
650
|
+
this.store.clear();
|
|
651
|
+
this.totalTime = 0;
|
|
652
|
+
this.totalMem = 0;
|
|
830
653
|
}
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
if (!filter || filter.every((f) => !f.active)) return input;
|
|
834
|
-
const applyOne = async (s) => {
|
|
835
|
-
for (const f of filter) if (f.active) s = await Promise.resolve(f.fn(s));
|
|
836
|
-
return s;
|
|
837
|
-
};
|
|
838
|
-
return Array.isArray(input)
|
|
839
|
-
? Promise.all(input.map(applyOne))
|
|
840
|
-
: applyOne(input);
|
|
654
|
+
run(fn, meta = {}) {
|
|
655
|
+
return this.active ? this.profile(fn, meta) : fn();
|
|
841
656
|
}
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
657
|
+
async runAsync(fn, meta = {}) {
|
|
658
|
+
return this.active
|
|
659
|
+
? this.profile(async () => await fn(), meta)
|
|
660
|
+
: await fn();
|
|
661
|
+
}
|
|
662
|
+
getAll = () => [...this.store];
|
|
663
|
+
getLast = () => this.getAll().pop();
|
|
664
|
+
getTotal = () => ({ time: this.totalTime, mem: this.totalMem });
|
|
665
|
+
services = Object.freeze({
|
|
666
|
+
enable: this.enable.bind(this),
|
|
667
|
+
disable: this.disable.bind(this),
|
|
668
|
+
clear: this.clear.bind(this),
|
|
669
|
+
report: this.getAll.bind(this),
|
|
670
|
+
last: this.getLast.bind(this),
|
|
671
|
+
total: this.getTotal.bind(this)
|
|
672
|
+
});
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
const registry = Object.create(null);
|
|
676
|
+
const factory = Object.create(null);
|
|
677
|
+
function Registry(reg, ctor) {
|
|
678
|
+
if (reg in registry || reg in factory)
|
|
679
|
+
throw new Error(
|
|
680
|
+
`Registry <${reg}> already exists / overwriting is forbidden`
|
|
681
|
+
);
|
|
682
|
+
const classes = Object.create(null);
|
|
683
|
+
const service = Object.freeze({
|
|
684
|
+
add(name, cls, update = false) {
|
|
685
|
+
if (!(cls.prototype instanceof ctor))
|
|
686
|
+
throw new TypeError(`Class must extend <${reg}>`);
|
|
687
|
+
if (!update && name in classes)
|
|
688
|
+
throw new Error(
|
|
689
|
+
`Entry <${name}> already exists / use <update=true> to overwrite`
|
|
690
|
+
);
|
|
691
|
+
classes[name] = cls;
|
|
692
|
+
},
|
|
693
|
+
remove(name) {
|
|
694
|
+
delete classes[name];
|
|
695
|
+
},
|
|
696
|
+
has(name) {
|
|
697
|
+
return name in classes;
|
|
698
|
+
},
|
|
699
|
+
list() {
|
|
700
|
+
return Object.keys(classes);
|
|
701
|
+
},
|
|
702
|
+
get(name) {
|
|
703
|
+
if (!(name in classes))
|
|
704
|
+
throw new Error(`Class <${name}> not registered for <${reg}>`);
|
|
705
|
+
return classes[name];
|
|
706
|
+
}
|
|
707
|
+
});
|
|
708
|
+
registry[reg] = service;
|
|
709
|
+
factory[reg] = (cls, ...args) => createFromRegistry(reg, cls, ...args);
|
|
710
|
+
return service;
|
|
711
|
+
}
|
|
712
|
+
function resolveCls(reg, cls) {
|
|
713
|
+
if (!(reg in registry))
|
|
714
|
+
throw new ReferenceError(`Registry <${reg}> does not exist`);
|
|
715
|
+
return typeof cls === 'string' ? registry[reg]?.get(cls) : cls;
|
|
716
|
+
}
|
|
717
|
+
function createFromRegistry(reg, cls, ...args) {
|
|
718
|
+
cls = resolveCls(reg, cls);
|
|
719
|
+
try {
|
|
720
|
+
return new cls(...args);
|
|
721
|
+
} catch (err) {
|
|
722
|
+
throw new Error(`Cannot instantiate class <${cls.name ?? cls}>`, {
|
|
723
|
+
cause: err
|
|
724
|
+
});
|
|
845
725
|
}
|
|
846
726
|
}
|
|
847
727
|
|
|
@@ -855,22 +735,20 @@ class RingPool {
|
|
|
855
735
|
acquire(minSize, allowOversize) {
|
|
856
736
|
const len = this.buffers.length;
|
|
857
737
|
for (let i = 0; i < len; i++) {
|
|
858
|
-
const idx = (this.pointer + i)
|
|
738
|
+
const idx = (this.pointer + i) & (len - 1);
|
|
859
739
|
const item = this.buffers[idx];
|
|
860
|
-
if (item.size >= minSize) {
|
|
861
|
-
this.pointer = (idx + 1)
|
|
862
|
-
return
|
|
740
|
+
if (item.size >= minSize && (allowOversize || item.size === minSize)) {
|
|
741
|
+
this.pointer = (idx + 1) & (len - 1);
|
|
742
|
+
return item;
|
|
863
743
|
}
|
|
864
744
|
}
|
|
865
745
|
return null;
|
|
866
746
|
}
|
|
867
747
|
release(item) {
|
|
868
|
-
if (this.buffers.length < this.maxSize)
|
|
869
|
-
this.buffers.push(item);
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
this.pointer = (this.pointer + 1) % this.maxSize;
|
|
873
|
-
}
|
|
748
|
+
if (this.buffers.length < this.maxSize)
|
|
749
|
+
return void [this.buffers.push(item)];
|
|
750
|
+
this.buffers[this.pointer] = item;
|
|
751
|
+
this.pointer = (this.pointer + 1) % this.maxSize;
|
|
874
752
|
}
|
|
875
753
|
clear() {
|
|
876
754
|
this.buffers = [];
|
|
@@ -879,8 +757,8 @@ class RingPool {
|
|
|
879
757
|
}
|
|
880
758
|
class Pool {
|
|
881
759
|
static CONFIG = {
|
|
882
|
-
|
|
883
|
-
type: '
|
|
760
|
+
int32: {
|
|
761
|
+
type: 'int32',
|
|
884
762
|
maxSize: 64,
|
|
885
763
|
maxItemSize: 2048,
|
|
886
764
|
allowOversize: true
|
|
@@ -901,7 +779,7 @@ class Pool {
|
|
|
901
779
|
map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
|
|
902
780
|
};
|
|
903
781
|
static POOLS = {
|
|
904
|
-
|
|
782
|
+
int32: new RingPool(64),
|
|
905
783
|
'number[]': new RingPool(16),
|
|
906
784
|
'string[]': new RingPool(2),
|
|
907
785
|
set: new RingPool(8),
|
|
@@ -909,8 +787,8 @@ class Pool {
|
|
|
909
787
|
};
|
|
910
788
|
static allocate(type, size) {
|
|
911
789
|
switch (type) {
|
|
912
|
-
case '
|
|
913
|
-
return new
|
|
790
|
+
case 'int32':
|
|
791
|
+
return new Int32Array(size);
|
|
914
792
|
case 'number[]':
|
|
915
793
|
return new Float64Array(size);
|
|
916
794
|
case 'string[]':
|
|
@@ -925,28 +803,25 @@ class Pool {
|
|
|
925
803
|
const CONFIG = this.CONFIG[type];
|
|
926
804
|
if (size > CONFIG.maxItemSize) return this.allocate(type, size);
|
|
927
805
|
const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
|
|
928
|
-
if (item)
|
|
929
|
-
return type === '
|
|
930
|
-
}
|
|
806
|
+
if (item)
|
|
807
|
+
return type === 'int32' ? item.buffer.subarray(0, size) : item.buffer;
|
|
931
808
|
return this.allocate(type, size);
|
|
932
809
|
}
|
|
933
810
|
static acquireMany(type, sizes) {
|
|
934
811
|
return sizes.map((size) => this.acquire(type, size));
|
|
935
812
|
}
|
|
936
813
|
static release(type, buffer, size) {
|
|
937
|
-
|
|
938
|
-
if (size <= CONFIG.maxItemSize) {
|
|
814
|
+
if (size <= this.CONFIG[type].maxItemSize)
|
|
939
815
|
this.POOLS[type].release({ buffer, size });
|
|
940
|
-
}
|
|
941
816
|
}
|
|
942
817
|
}
|
|
943
818
|
|
|
944
819
|
class StructuredData {
|
|
820
|
+
data;
|
|
821
|
+
key;
|
|
945
822
|
static create(data, key) {
|
|
946
823
|
return new StructuredData(data, key);
|
|
947
824
|
}
|
|
948
|
-
data;
|
|
949
|
-
key;
|
|
950
825
|
constructor(data, key) {
|
|
951
826
|
this.data = data;
|
|
952
827
|
this.key = key;
|
|
@@ -959,9 +834,7 @@ class StructuredData {
|
|
|
959
834
|
}
|
|
960
835
|
return result;
|
|
961
836
|
}
|
|
962
|
-
extract()
|
|
963
|
-
return this.extractFrom(this.data, this.key);
|
|
964
|
-
}
|
|
837
|
+
extract = () => this.extractFrom(this.data, this.key);
|
|
965
838
|
isMetricResult(v) {
|
|
966
839
|
return (
|
|
967
840
|
typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
|
|
@@ -981,7 +854,7 @@ class StructuredData {
|
|
|
981
854
|
const first = results[0];
|
|
982
855
|
let normalized = [];
|
|
983
856
|
if (this.isMetricResult(first)) normalized = results;
|
|
984
|
-
else if (this.isCmpStrResult(first))
|
|
857
|
+
else if (this.isCmpStrResult(first))
|
|
985
858
|
normalized = results.map((r) => ({
|
|
986
859
|
metric: 'unknown',
|
|
987
860
|
a: r.source,
|
|
@@ -989,7 +862,10 @@ class StructuredData {
|
|
|
989
862
|
res: r.match,
|
|
990
863
|
raw: r.raw
|
|
991
864
|
}));
|
|
992
|
-
|
|
865
|
+
else
|
|
866
|
+
throw new TypeError(
|
|
867
|
+
'Unsupported result format for StructuredData normalization.'
|
|
868
|
+
);
|
|
993
869
|
return normalized.map((r, idx) => ({ ...r, __idx: idx }));
|
|
994
870
|
}
|
|
995
871
|
rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
|
|
@@ -1027,118 +903,258 @@ class StructuredData {
|
|
|
1027
903
|
...(result.raw ? { raw: result.raw } : null)
|
|
1028
904
|
};
|
|
1029
905
|
}
|
|
1030
|
-
output.length = out;
|
|
1031
|
-
return output;
|
|
906
|
+
output.length = out;
|
|
907
|
+
return output;
|
|
908
|
+
}
|
|
909
|
+
sort(results, sort) {
|
|
910
|
+
if (!sort || results.length <= 1) return results;
|
|
911
|
+
const asc = sort === 'asc';
|
|
912
|
+
return results.sort((a, b) => (asc ? a.res - b.res : b.res - a.res));
|
|
913
|
+
}
|
|
914
|
+
finalizeLookup(results, extractedStrings, opt) {
|
|
915
|
+
return this.rebuild(
|
|
916
|
+
this.sort(this.normalizeResults(results), opt?.sort),
|
|
917
|
+
this.data,
|
|
918
|
+
extractedStrings,
|
|
919
|
+
opt?.removeZero,
|
|
920
|
+
opt?.objectsOnly
|
|
921
|
+
);
|
|
922
|
+
}
|
|
923
|
+
performLookup(fn, extractedStrings, opt) {
|
|
924
|
+
return this.finalizeLookup(fn(), extractedStrings, opt);
|
|
925
|
+
}
|
|
926
|
+
async performLookupAsync(fn, extractedStrings, opt) {
|
|
927
|
+
return this.finalizeLookup(await fn(), extractedStrings, opt);
|
|
928
|
+
}
|
|
929
|
+
lookup(fn, query, opt) {
|
|
930
|
+
const b = this.extract();
|
|
931
|
+
try {
|
|
932
|
+
return this.performLookup(() => fn(query, b, opt), b, opt);
|
|
933
|
+
} finally {
|
|
934
|
+
Pool.release('string[]', b, b.length);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
async lookupAsync(fn, query, opt) {
|
|
938
|
+
const b = this.extract();
|
|
939
|
+
try {
|
|
940
|
+
return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
|
|
941
|
+
} finally {
|
|
942
|
+
Pool.release('string[]', b, b.length);
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
lookupPairs(fn, other, otherKey, opt) {
|
|
946
|
+
const a = this.extract();
|
|
947
|
+
const b = this.extractFrom(other, otherKey);
|
|
948
|
+
try {
|
|
949
|
+
return this.performLookup(() => fn(a, b, opt), a, opt);
|
|
950
|
+
} finally {
|
|
951
|
+
Pool.release('string[]', a, a.length);
|
|
952
|
+
Pool.release('string[]', b, b.length);
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
async lookupPairsAsync(fn, other, otherKey, opt) {
|
|
956
|
+
const a = this.extract();
|
|
957
|
+
const b = this.extractFrom(other, otherKey);
|
|
958
|
+
try {
|
|
959
|
+
return await this.performLookupAsync(() => fn(a, b, opt), a, opt);
|
|
960
|
+
} finally {
|
|
961
|
+
Pool.release('string[]', a, a.length);
|
|
962
|
+
Pool.release('string[]', b, b.length);
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
class TextAnalyzer {
|
|
968
|
+
static REGEX = {
|
|
969
|
+
number: /\d/,
|
|
970
|
+
sentence: /(?<=[.!?])\s+/,
|
|
971
|
+
word: /\p{L}+/gu,
|
|
972
|
+
nonWord: /[^\p{L}]/gu,
|
|
973
|
+
vowelGroup: /[aeiouy]+/g,
|
|
974
|
+
letter: /\p{L}/gu,
|
|
975
|
+
ucLetter: /\p{Lu}/gu
|
|
976
|
+
};
|
|
977
|
+
text;
|
|
978
|
+
words = [];
|
|
979
|
+
sentences = [];
|
|
980
|
+
charFrequency = new Map();
|
|
981
|
+
wordHistogram = new Map();
|
|
982
|
+
syllableCache = new Map();
|
|
983
|
+
syllableStats;
|
|
984
|
+
constructor(input) {
|
|
985
|
+
this.text = input.trim();
|
|
986
|
+
this.tokenize();
|
|
987
|
+
this.computeFrequencies();
|
|
988
|
+
}
|
|
989
|
+
tokenize() {
|
|
990
|
+
let match;
|
|
991
|
+
const lcText = this.text.toLowerCase();
|
|
992
|
+
while ((match = TextAnalyzer.REGEX.word.exec(lcText)) !== null)
|
|
993
|
+
this.words.push(match[0]);
|
|
994
|
+
this.sentences = this.text
|
|
995
|
+
.split(TextAnalyzer.REGEX.sentence)
|
|
996
|
+
.filter(Boolean);
|
|
997
|
+
}
|
|
998
|
+
computeFrequencies() {
|
|
999
|
+
for (const char of this.text)
|
|
1000
|
+
this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
|
|
1001
|
+
for (const word of this.words)
|
|
1002
|
+
this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
|
|
1003
|
+
}
|
|
1004
|
+
estimateSyllables(word) {
|
|
1005
|
+
const clean = word
|
|
1006
|
+
.normalize('NFC')
|
|
1007
|
+
.toLowerCase()
|
|
1008
|
+
.replace(TextAnalyzer.REGEX.nonWord, '');
|
|
1009
|
+
if (this.syllableCache.has(clean)) return this.syllableCache.get(clean);
|
|
1010
|
+
const matches = clean.match(TextAnalyzer.REGEX.vowelGroup);
|
|
1011
|
+
const count = matches ? matches.length : 1;
|
|
1012
|
+
this.syllableCache.set(clean, count);
|
|
1013
|
+
return count;
|
|
1014
|
+
}
|
|
1015
|
+
computeSyllableStats() {
|
|
1016
|
+
return (this.syllableStats ||= (() => {
|
|
1017
|
+
const perWord = this.words
|
|
1018
|
+
.map((w) => this.estimateSyllables(w))
|
|
1019
|
+
.sort((a, b) => a - b);
|
|
1020
|
+
const total = perWord.reduce((sum, s) => sum + s, 0);
|
|
1021
|
+
const mono = perWord.filter((s) => s === 1).length;
|
|
1022
|
+
const median = !perWord.length
|
|
1023
|
+
? 0
|
|
1024
|
+
: perWord.length % 2 === 0
|
|
1025
|
+
? (perWord[perWord.length / 2 - 1] + perWord[perWord.length / 2]) / 2
|
|
1026
|
+
: perWord[Math.floor(perWord.length / 2)];
|
|
1027
|
+
return {
|
|
1028
|
+
total,
|
|
1029
|
+
mono,
|
|
1030
|
+
perWord,
|
|
1031
|
+
avg: perWord.length ? total / perWord.length : 0,
|
|
1032
|
+
median
|
|
1033
|
+
};
|
|
1034
|
+
})());
|
|
1035
|
+
}
|
|
1036
|
+
getLength = () => this.text.length;
|
|
1037
|
+
getWordCount = () => this.words.length;
|
|
1038
|
+
getSentenceCount = () => this.sentences.length;
|
|
1039
|
+
getAvgWordLength() {
|
|
1040
|
+
return this.words.length
|
|
1041
|
+
? this.words.join('').length / this.words.length
|
|
1042
|
+
: 0;
|
|
1043
|
+
}
|
|
1044
|
+
getAvgSentenceLength() {
|
|
1045
|
+
return this.sentences.length
|
|
1046
|
+
? this.words.length / this.sentences.length
|
|
1047
|
+
: 0;
|
|
1048
|
+
}
|
|
1049
|
+
getWordHistogram() {
|
|
1050
|
+
return Object.fromEntries(this.wordHistogram);
|
|
1051
|
+
}
|
|
1052
|
+
getMostCommonWords(limit = 5) {
|
|
1053
|
+
return [...this.wordHistogram.entries()]
|
|
1054
|
+
.sort((a, b) => b[1] - a[1])
|
|
1055
|
+
.slice(0, limit)
|
|
1056
|
+
.map((e) => e[0]);
|
|
1057
|
+
}
|
|
1058
|
+
getHapaxLegomena() {
|
|
1059
|
+
return [...this.wordHistogram.entries()]
|
|
1060
|
+
.filter(([, c]) => c === 1)
|
|
1061
|
+
.map((e) => e[0]);
|
|
1062
|
+
}
|
|
1063
|
+
hasNumbers = () => TextAnalyzer.REGEX.number.test(this.text);
|
|
1064
|
+
getUpperCaseRatio() {
|
|
1065
|
+
const matches = this.text.match(TextAnalyzer.REGEX.letter) || [];
|
|
1066
|
+
const upper = this.text.match(TextAnalyzer.REGEX.ucLetter)?.length || 0;
|
|
1067
|
+
return matches.length ? upper / matches.length : 0;
|
|
1068
|
+
}
|
|
1069
|
+
getCharFrequency() {
|
|
1070
|
+
return Object.fromEntries(this.charFrequency);
|
|
1071
|
+
}
|
|
1072
|
+
getUnicodeCodepoints() {
|
|
1073
|
+
const result = {};
|
|
1074
|
+
for (const [char, count] of this.charFrequency) {
|
|
1075
|
+
const block = char
|
|
1076
|
+
.charCodeAt(0)
|
|
1077
|
+
.toString(16)
|
|
1078
|
+
.padStart(4, '0')
|
|
1079
|
+
.toUpperCase();
|
|
1080
|
+
result[block] = (result[block] || 0) + count;
|
|
1081
|
+
}
|
|
1082
|
+
return result;
|
|
1032
1083
|
}
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
const
|
|
1036
|
-
return
|
|
1084
|
+
getLongWordRatio(len = 7) {
|
|
1085
|
+
let long = 0;
|
|
1086
|
+
for (const w of this.words) if (w.length >= len) long++;
|
|
1087
|
+
return this.words.length ? long / this.words.length : 0;
|
|
1037
1088
|
}
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
extractedStrings,
|
|
1043
|
-
opt?.removeZero,
|
|
1044
|
-
opt?.objectsOnly
|
|
1045
|
-
);
|
|
1089
|
+
getShortWordRatio(len = 3) {
|
|
1090
|
+
let short = 0;
|
|
1091
|
+
for (const w of this.words) if (w.length <= len) short++;
|
|
1092
|
+
return this.words.length ? short / this.words.length : 0;
|
|
1046
1093
|
}
|
|
1047
|
-
|
|
1048
|
-
return this.
|
|
1049
|
-
this.sort(this.normalizeResults(await fn()), opt?.sort),
|
|
1050
|
-
this.data,
|
|
1051
|
-
extractedStrings,
|
|
1052
|
-
opt?.removeZero,
|
|
1053
|
-
opt?.objectsOnly
|
|
1054
|
-
);
|
|
1094
|
+
getSyllablesCount() {
|
|
1095
|
+
return this.computeSyllableStats().total;
|
|
1055
1096
|
}
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
try {
|
|
1059
|
-
return this.performLookup(() => fn(query, b, opt), b, opt);
|
|
1060
|
-
} finally {
|
|
1061
|
-
Pool.release('string[]', b, b.length);
|
|
1062
|
-
}
|
|
1097
|
+
getMonosyllabicWordCount() {
|
|
1098
|
+
return this.computeSyllableStats().mono;
|
|
1063
1099
|
}
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
const b = this.extractFrom(other, otherKey);
|
|
1067
|
-
try {
|
|
1068
|
-
return this.performLookup(() => fn(a, b, opt), a, opt);
|
|
1069
|
-
} finally {
|
|
1070
|
-
Pool.release('string[]', a, a.length);
|
|
1071
|
-
Pool.release('string[]', b, b.length);
|
|
1072
|
-
}
|
|
1100
|
+
getMinSyllablesWordCount(min) {
|
|
1101
|
+
return this.computeSyllableStats().perWord.filter((w) => w >= min).length;
|
|
1073
1102
|
}
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
try {
|
|
1077
|
-
return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
|
|
1078
|
-
} finally {
|
|
1079
|
-
Pool.release('string[]', b, b.length);
|
|
1080
|
-
}
|
|
1103
|
+
getMaxSyllablesWordCount(max) {
|
|
1104
|
+
return this.computeSyllableStats().perWord.filter((w) => w <= max).length;
|
|
1081
1105
|
}
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1106
|
+
getAvgSyllablesPerWord() {
|
|
1107
|
+
return this.computeSyllableStats().avg;
|
|
1108
|
+
}
|
|
1109
|
+
getMedianSyllablesPerWord() {
|
|
1110
|
+
return this.computeSyllableStats().median;
|
|
1111
|
+
}
|
|
1112
|
+
getHonoresR() {
|
|
1085
1113
|
try {
|
|
1086
|
-
return
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1114
|
+
return (
|
|
1115
|
+
(100 * Math.log(this.words.length)) /
|
|
1116
|
+
(1 - this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1))
|
|
1117
|
+
);
|
|
1118
|
+
} catch {
|
|
1119
|
+
return 0;
|
|
1090
1120
|
}
|
|
1091
1121
|
}
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
`Entry <${name}> already exists / use <update=true> to overwrite`
|
|
1109
|
-
);
|
|
1110
|
-
classes[name] = cls;
|
|
1111
|
-
},
|
|
1112
|
-
remove(name) {
|
|
1113
|
-
delete classes[name];
|
|
1114
|
-
},
|
|
1115
|
-
has(name) {
|
|
1116
|
-
return name in classes;
|
|
1117
|
-
},
|
|
1118
|
-
list() {
|
|
1119
|
-
return Object.keys(classes);
|
|
1120
|
-
},
|
|
1121
|
-
get(name) {
|
|
1122
|
-
if (!(name in classes))
|
|
1123
|
-
throw new Error(`Class <${name}> not registered for <${reg}>`);
|
|
1124
|
-
return classes[name];
|
|
1122
|
+
getReadingTime(wpm = 200) {
|
|
1123
|
+
return this.words.length / (wpm ?? 1);
|
|
1124
|
+
}
|
|
1125
|
+
getReadabilityScore(metric = 'flesch') {
|
|
1126
|
+
const w = this.words.length || 1;
|
|
1127
|
+
const s = this.sentences.length || 1;
|
|
1128
|
+
const y = this.getSyllablesCount() || 1;
|
|
1129
|
+
const asl = w / s;
|
|
1130
|
+
const asw = y / w;
|
|
1131
|
+
switch (metric) {
|
|
1132
|
+
case 'flesch':
|
|
1133
|
+
return 206.835 - 1.015 * asl - 84.6 * asw;
|
|
1134
|
+
case 'fleschde':
|
|
1135
|
+
return 180 - asl - 58.5 * asw;
|
|
1136
|
+
case 'kincaid':
|
|
1137
|
+
return 0.39 * asl + 11.8 * asw - 15.59;
|
|
1125
1138
|
}
|
|
1126
|
-
}
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
return
|
|
1140
|
-
|
|
1141
|
-
|
|
1139
|
+
}
|
|
1140
|
+
getLIXScore() {
|
|
1141
|
+
const w = this.words.length || 1;
|
|
1142
|
+
const s = this.sentences.length || 1;
|
|
1143
|
+
const l = this.getLongWordRatio() * w;
|
|
1144
|
+
return w / s + (l / w) * 100;
|
|
1145
|
+
}
|
|
1146
|
+
getWSTFScore() {
|
|
1147
|
+
const w = this.words.length || 1;
|
|
1148
|
+
const h = (this.getMinSyllablesWordCount(3) / w) * 100;
|
|
1149
|
+
const s = this.getAvgSentenceLength();
|
|
1150
|
+
const l = this.getLongWordRatio() * 100;
|
|
1151
|
+
const m = (this.getMonosyllabicWordCount() / w) * 100;
|
|
1152
|
+
return [
|
|
1153
|
+
0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.875,
|
|
1154
|
+
0.2007 * h + 0.1682 * s + 0.1373 * l - 2.779,
|
|
1155
|
+
0.2963 * h + 0.1905 * s - 1.1144,
|
|
1156
|
+
0.2744 * h + 0.2656 * s - 1.693
|
|
1157
|
+
];
|
|
1142
1158
|
}
|
|
1143
1159
|
}
|
|
1144
1160
|
|
|
@@ -1151,17 +1167,12 @@ class Metric {
|
|
|
1151
1167
|
origA = [];
|
|
1152
1168
|
origB = [];
|
|
1153
1169
|
options;
|
|
1170
|
+
optKey;
|
|
1154
1171
|
symmetric;
|
|
1155
1172
|
results;
|
|
1156
|
-
static clear()
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
static swap(a, b, m, n) {
|
|
1160
|
-
return m > n ? [b, a, n, m] : [a, b, m, n];
|
|
1161
|
-
}
|
|
1162
|
-
static clamp(res) {
|
|
1163
|
-
return Math.max(0, Math.min(1, res));
|
|
1164
|
-
}
|
|
1173
|
+
static clear = () => this.cache.clear();
|
|
1174
|
+
static swap = (a, b, m, n) => (m > n ? [b, a, n, m] : [a, b, m, n]);
|
|
1175
|
+
static clamp = (res) => Math.max(0, Math.min(1, res));
|
|
1165
1176
|
constructor(metric, a, b, opt = {}, symmetric = false) {
|
|
1166
1177
|
this.metric = metric;
|
|
1167
1178
|
this.a = Array.isArray(a) ? a : [a];
|
|
@@ -1169,6 +1180,9 @@ class Metric {
|
|
|
1169
1180
|
if (this.a.length === 0 || this.b.length === 0)
|
|
1170
1181
|
throw new Error(`Inputs <a> and <b> must not be empty`);
|
|
1171
1182
|
this.options = opt;
|
|
1183
|
+
this.optKey = Hasher.fastFNV1a(
|
|
1184
|
+
JSON.stringify(opt, Object.keys(opt).sort())
|
|
1185
|
+
).toString();
|
|
1172
1186
|
this.symmetric = symmetric;
|
|
1173
1187
|
}
|
|
1174
1188
|
preCompute(a, b, m, n) {
|
|
@@ -1189,11 +1203,12 @@ class Metric {
|
|
|
1189
1203
|
let result = this.preCompute(A, B, m, n);
|
|
1190
1204
|
if (!result) {
|
|
1191
1205
|
result = profiler$2.run(() => {
|
|
1192
|
-
|
|
1206
|
+
if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
|
|
1207
|
+
const key =
|
|
1208
|
+
Metric.cache.key(this.metric, [A, B], this.symmetric) + this.optKey;
|
|
1193
1209
|
return (
|
|
1194
1210
|
Metric.cache.get(key || '') ??
|
|
1195
1211
|
(() => {
|
|
1196
|
-
if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
|
|
1197
1212
|
const res = this.compute(A, B, m, n, Math.max(m, n));
|
|
1198
1213
|
if (key) Metric.cache.set(key, res);
|
|
1199
1214
|
return res;
|
|
@@ -1241,12 +1256,8 @@ class Metric {
|
|
|
1241
1256
|
if (b) this.origB = Array.isArray(b) ? b : [b];
|
|
1242
1257
|
return this;
|
|
1243
1258
|
}
|
|
1244
|
-
isBatch()
|
|
1245
|
-
|
|
1246
|
-
}
|
|
1247
|
-
isSingle() {
|
|
1248
|
-
return !this.isBatch();
|
|
1249
|
-
}
|
|
1259
|
+
isBatch = () => this.a.length > 1 || this.b.length > 1;
|
|
1260
|
+
isSingle = () => !this.isBatch();
|
|
1250
1261
|
isPairwise(safe = false) {
|
|
1251
1262
|
return this.isBatch() && this.a.length === this.b.length
|
|
1252
1263
|
? true
|
|
@@ -1255,15 +1266,9 @@ class Metric {
|
|
|
1255
1266
|
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
1256
1267
|
})();
|
|
1257
1268
|
}
|
|
1258
|
-
isSymmetrical()
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
whichMode(mode) {
|
|
1262
|
-
return mode ?? this.options?.mode ?? 'default';
|
|
1263
|
-
}
|
|
1264
|
-
clear() {
|
|
1265
|
-
this.results = undefined;
|
|
1266
|
-
}
|
|
1269
|
+
isSymmetrical = () => this.symmetric;
|
|
1270
|
+
whichMode = (mode) => mode ?? this.options?.mode ?? 'default';
|
|
1271
|
+
clear = () => (this.results = undefined);
|
|
1267
1272
|
run(mode, clear = true) {
|
|
1268
1273
|
if (clear) this.clear();
|
|
1269
1274
|
switch (this.whichMode(mode)) {
|
|
@@ -1306,9 +1311,7 @@ class Metric {
|
|
|
1306
1311
|
throw new Error(`Unsupported async mode <${mode}>`);
|
|
1307
1312
|
}
|
|
1308
1313
|
}
|
|
1309
|
-
getMetricName()
|
|
1310
|
-
return this.metric;
|
|
1311
|
-
}
|
|
1314
|
+
getMetricName = () => this.metric;
|
|
1312
1315
|
getResults() {
|
|
1313
1316
|
if (this.results === undefined)
|
|
1314
1317
|
throw new Error(`run() must be called before getResult()`);
|
|
@@ -1331,26 +1334,26 @@ class CosineSimilarity extends Metric {
|
|
|
1331
1334
|
const { delimiter = ' ' } = this.options;
|
|
1332
1335
|
const termsA = this._termFreq(a, delimiter);
|
|
1333
1336
|
const termsB = this._termFreq(b, delimiter);
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
const
|
|
1339
|
-
|
|
1340
|
-
|
|
1337
|
+
try {
|
|
1338
|
+
let dotP = 0,
|
|
1339
|
+
magA = 0,
|
|
1340
|
+
magB = 0;
|
|
1341
|
+
for (const [term, freqA] of termsA) {
|
|
1342
|
+
const freqB = termsB.get(term) || 0;
|
|
1343
|
+
dotP += freqA * freqB;
|
|
1344
|
+
magA += freqA * freqA;
|
|
1345
|
+
}
|
|
1346
|
+
for (const freqB of termsB.values()) magB += freqB * freqB;
|
|
1347
|
+
magA = Math.sqrt(magA);
|
|
1348
|
+
magB = Math.sqrt(magB);
|
|
1349
|
+
return {
|
|
1350
|
+
res: magA && magB ? Metric.clamp(dotP / (magA * magB)) : 0,
|
|
1351
|
+
raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB }
|
|
1352
|
+
};
|
|
1353
|
+
} finally {
|
|
1354
|
+
Pool.release('map', termsA, termsA.size);
|
|
1355
|
+
Pool.release('map', termsB, termsB.size);
|
|
1341
1356
|
}
|
|
1342
|
-
for (const freqB of termsB.values()) magnitudeB += freqB * freqB;
|
|
1343
|
-
magnitudeA = Math.sqrt(magnitudeA);
|
|
1344
|
-
magnitudeB = Math.sqrt(magnitudeB);
|
|
1345
|
-
Pool.release('map', termsA, termsA.size);
|
|
1346
|
-
Pool.release('map', termsB, termsB.size);
|
|
1347
|
-
return {
|
|
1348
|
-
res:
|
|
1349
|
-
magnitudeA && magnitudeB
|
|
1350
|
-
? Metric.clamp(dotProduct / (magnitudeA * magnitudeB))
|
|
1351
|
-
: 0,
|
|
1352
|
-
raw: { dotProduct, magnitudeA, magnitudeB }
|
|
1353
|
-
};
|
|
1354
1357
|
}
|
|
1355
1358
|
}
|
|
1356
1359
|
MetricRegistry.add('cosine', CosineSimilarity);
|
|
@@ -1361,36 +1364,38 @@ class DamerauLevenshteinDistance extends Metric {
|
|
|
1361
1364
|
}
|
|
1362
1365
|
compute(a, b, m, n, maxLen) {
|
|
1363
1366
|
const len = m + 1;
|
|
1364
|
-
const [test, prev, curr] = Pool.acquireMany('
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1367
|
+
const [test, prev, curr] = Pool.acquireMany('int32', [len, len, len]);
|
|
1368
|
+
try {
|
|
1369
|
+
for (let i = 0; i <= m; i++) prev[i] = i;
|
|
1370
|
+
for (let j = 1; j <= n; j++) {
|
|
1371
|
+
curr[0] = j;
|
|
1372
|
+
const cb = b.charCodeAt(j - 1);
|
|
1373
|
+
for (let i = 1; i <= m; i++) {
|
|
1374
|
+
const ca = a.charCodeAt(i - 1);
|
|
1375
|
+
const cost = ca === cb ? 0 : 1;
|
|
1376
|
+
let val = Math.min(curr[i - 1] + 1, prev[i] + 1, prev[i - 1] + cost);
|
|
1377
|
+
if (
|
|
1378
|
+
i > 1 &&
|
|
1379
|
+
j > 1 &&
|
|
1380
|
+
ca === b.charCodeAt(j - 2) &&
|
|
1381
|
+
cb === a.charCodeAt(i - 2)
|
|
1382
|
+
)
|
|
1383
|
+
val = Math.min(val, test[i - 2] + cost);
|
|
1384
|
+
curr[i] = val;
|
|
1380
1385
|
}
|
|
1381
|
-
|
|
1386
|
+
test.set(prev);
|
|
1387
|
+
prev.set(curr);
|
|
1382
1388
|
}
|
|
1383
|
-
|
|
1384
|
-
|
|
1389
|
+
const dist = prev[m];
|
|
1390
|
+
return {
|
|
1391
|
+
res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
|
|
1392
|
+
raw: { dist, maxLen }
|
|
1393
|
+
};
|
|
1394
|
+
} finally {
|
|
1395
|
+
Pool.release('int32', test, len);
|
|
1396
|
+
Pool.release('int32', prev, len);
|
|
1397
|
+
Pool.release('int32', curr, len);
|
|
1385
1398
|
}
|
|
1386
|
-
const dist = prev[m];
|
|
1387
|
-
Pool.release('uint16', test, len);
|
|
1388
|
-
Pool.release('uint16', prev, len);
|
|
1389
|
-
Pool.release('uint16', curr, len);
|
|
1390
|
-
return {
|
|
1391
|
-
res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
|
|
1392
|
-
raw: { dist, maxLen }
|
|
1393
|
-
};
|
|
1394
1399
|
}
|
|
1395
1400
|
}
|
|
1396
1401
|
MetricRegistry.add('damerau', DamerauLevenshteinDistance);
|
|
@@ -1406,19 +1411,22 @@ class DiceSorensenCoefficient extends Metric {
|
|
|
1406
1411
|
return bigrams;
|
|
1407
1412
|
}
|
|
1408
1413
|
compute(a, b) {
|
|
1409
|
-
const setA = this._bigrams(a)
|
|
1410
|
-
|
|
1411
|
-
let intersection = 0;
|
|
1412
|
-
for (const bigram of setA) if (setB.has(bigram)) intersection++;
|
|
1414
|
+
const setA = this._bigrams(a),
|
|
1415
|
+
setB = this._bigrams(b);
|
|
1413
1416
|
const sizeA = setA.size,
|
|
1414
1417
|
sizeB = setB.size;
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1418
|
+
try {
|
|
1419
|
+
let intersection = 0;
|
|
1420
|
+
for (const bigram of setA) if (setB.has(bigram)) intersection++;
|
|
1421
|
+
const size = sizeA + sizeB;
|
|
1422
|
+
return {
|
|
1423
|
+
res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
|
|
1424
|
+
raw: { intersection, size }
|
|
1425
|
+
};
|
|
1426
|
+
} finally {
|
|
1427
|
+
Pool.release('set', setA, sizeA);
|
|
1428
|
+
Pool.release('set', setB, sizeB);
|
|
1429
|
+
}
|
|
1422
1430
|
}
|
|
1423
1431
|
}
|
|
1424
1432
|
MetricRegistry.add('dice', DiceSorensenCoefficient);
|
|
@@ -1440,7 +1448,7 @@ class HammingDistance extends Metric {
|
|
|
1440
1448
|
);
|
|
1441
1449
|
}
|
|
1442
1450
|
let dist = 0;
|
|
1443
|
-
for (let i = 0; i <
|
|
1451
|
+
for (let i = 0; i < m; i++) if (a[i] !== b[i]) dist++;
|
|
1444
1452
|
return { res: m === 0 ? 1 : Metric.clamp(1 - dist / m), raw: { dist } };
|
|
1445
1453
|
}
|
|
1446
1454
|
}
|
|
@@ -1452,71 +1460,76 @@ class JaccardIndex extends Metric {
|
|
|
1452
1460
|
}
|
|
1453
1461
|
compute(a, b, m, n) {
|
|
1454
1462
|
const [setA, setB] = Pool.acquireMany('set', [m, n]);
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
}
|
|
1463
|
+
try {
|
|
1464
|
+
for (const A of a) setA.add(A);
|
|
1465
|
+
for (const B of b) setB.add(B);
|
|
1466
|
+
let intersection = 0;
|
|
1467
|
+
for (const c of setA) if (setB.has(c)) intersection++;
|
|
1468
|
+
const union = setA.size + setB.size - intersection;
|
|
1469
|
+
return {
|
|
1470
|
+
res: union === 0 ? 1 : Metric.clamp(intersection / union),
|
|
1471
|
+
raw: { intersection, union }
|
|
1472
|
+
};
|
|
1473
|
+
} finally {
|
|
1474
|
+
Pool.release('set', setA, m);
|
|
1475
|
+
Pool.release('set', setB, n);
|
|
1476
|
+
}
|
|
1466
1477
|
}
|
|
1467
1478
|
}
|
|
1468
1479
|
MetricRegistry.add('jaccard', JaccardIndex);
|
|
1469
1480
|
|
|
1470
1481
|
class JaroWinklerDistance extends Metric {
|
|
1471
1482
|
constructor(a, b, opt = {}) {
|
|
1472
|
-
super('
|
|
1483
|
+
super('jaroWinkler', a, b, opt, true);
|
|
1473
1484
|
}
|
|
1474
1485
|
compute(a, b, m, n) {
|
|
1475
|
-
const
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
for (let i = 0; i < m; i++) {
|
|
1482
|
-
const start = Math.max(0, i - matchWindow);
|
|
1483
|
-
const end = Math.min(i + matchWindow + 1, n);
|
|
1484
|
-
for (let j = start; j < end; j++) {
|
|
1485
|
-
if (!matchB[j] && a[i] === b[j]) {
|
|
1486
|
-
matchA[i] = 1;
|
|
1487
|
-
matchB[j] = 1;
|
|
1488
|
-
matches++;
|
|
1489
|
-
break;
|
|
1490
|
-
}
|
|
1491
|
-
}
|
|
1492
|
-
}
|
|
1493
|
-
let transpos = 0,
|
|
1494
|
-
jaro = 0,
|
|
1495
|
-
prefix = 0,
|
|
1496
|
-
res = 0;
|
|
1497
|
-
if (matches > 0) {
|
|
1498
|
-
let k = 0;
|
|
1486
|
+
const [matchA, matchB] = Pool.acquireMany('int32', [m, n]);
|
|
1487
|
+
try {
|
|
1488
|
+
for (let i = 0; i < m; i++) matchA[i] = 0;
|
|
1489
|
+
for (let i = 0; i < n; i++) matchB[i] = 0;
|
|
1490
|
+
const matchWindow = Math.max(0, Math.floor(n / 2) - 1);
|
|
1491
|
+
let matches = 0;
|
|
1499
1492
|
for (let i = 0; i < m; i++) {
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1493
|
+
const start = Math.max(0, i - matchWindow);
|
|
1494
|
+
const end = Math.min(i + matchWindow + 1, n);
|
|
1495
|
+
for (let j = start; j < end; j++) {
|
|
1496
|
+
if (!matchB[j] && a[i] === b[j]) {
|
|
1497
|
+
matchA[i] = 1;
|
|
1498
|
+
matchB[j] = 1;
|
|
1499
|
+
matches++;
|
|
1500
|
+
break;
|
|
1501
|
+
}
|
|
1504
1502
|
}
|
|
1505
1503
|
}
|
|
1506
|
-
transpos
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1504
|
+
let transpos = 0,
|
|
1505
|
+
jaro = 0,
|
|
1506
|
+
prefix = 0,
|
|
1507
|
+
res = 0;
|
|
1508
|
+
if (matches > 0) {
|
|
1509
|
+
let k = 0;
|
|
1510
|
+
for (let i = 0; i < m; i++) {
|
|
1511
|
+
if (matchA[i]) {
|
|
1512
|
+
while (!matchB[k]) k++;
|
|
1513
|
+
if (a[i] !== b[k]) transpos++;
|
|
1514
|
+
k++;
|
|
1515
|
+
}
|
|
1516
|
+
}
|
|
1517
|
+
transpos /= 2;
|
|
1518
|
+
jaro = (matches / m + matches / n + (matches - transpos) / matches) / 3;
|
|
1519
|
+
for (let i = 0; i < Math.min(4, m, n); i++) {
|
|
1520
|
+
if (a[i] === b[i]) prefix++;
|
|
1521
|
+
else break;
|
|
1522
|
+
}
|
|
1523
|
+
res = jaro + prefix * 0.1 * (1 - jaro);
|
|
1511
1524
|
}
|
|
1512
|
-
|
|
1525
|
+
return {
|
|
1526
|
+
res: Metric.clamp(res),
|
|
1527
|
+
raw: { matchWindow, matches, transpos, jaro, prefix }
|
|
1528
|
+
};
|
|
1529
|
+
} finally {
|
|
1530
|
+
Pool.release('int32', matchA, m);
|
|
1531
|
+
Pool.release('int32', matchB, n);
|
|
1513
1532
|
}
|
|
1514
|
-
Pool.release('uint16', matchA, m);
|
|
1515
|
-
Pool.release('uint16', matchB, n);
|
|
1516
|
-
return {
|
|
1517
|
-
res: Metric.clamp(res),
|
|
1518
|
-
raw: { matchWindow, matches, transpos, jaro, prefix }
|
|
1519
|
-
};
|
|
1520
1533
|
}
|
|
1521
1534
|
}
|
|
1522
1535
|
MetricRegistry.add('jaroWinkler', JaroWinklerDistance);
|
|
@@ -1527,24 +1540,27 @@ class LCSMetric extends Metric {
|
|
|
1527
1540
|
}
|
|
1528
1541
|
compute(a, b, m, n, maxLen) {
|
|
1529
1542
|
const len = m + 1;
|
|
1530
|
-
const [prev, curr] = Pool.acquireMany('
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1543
|
+
const [prev, curr] = Pool.acquireMany('int32', [len, len]);
|
|
1544
|
+
try {
|
|
1545
|
+
for (let i = 0; i <= m; i++) prev[i] = 0;
|
|
1546
|
+
for (let j = 1; j <= n; j++) {
|
|
1547
|
+
curr[0] = 0;
|
|
1548
|
+
const cb = b.charCodeAt(j - 1);
|
|
1549
|
+
for (let i = 1; i <= m; i++) {
|
|
1550
|
+
if (a.charCodeAt(i - 1) === cb) curr[i] = prev[i - 1] + 1;
|
|
1551
|
+
else curr[i] = Math.max(prev[i], curr[i - 1]);
|
|
1552
|
+
}
|
|
1553
|
+
prev.set(curr);
|
|
1538
1554
|
}
|
|
1539
|
-
prev
|
|
1555
|
+
const lcs = prev[m];
|
|
1556
|
+
return {
|
|
1557
|
+
res: maxLen === 0 ? 1 : Metric.clamp(lcs / maxLen),
|
|
1558
|
+
raw: { lcs, maxLen }
|
|
1559
|
+
};
|
|
1560
|
+
} finally {
|
|
1561
|
+
Pool.release('int32', prev, len);
|
|
1562
|
+
Pool.release('int32', curr, len);
|
|
1540
1563
|
}
|
|
1541
|
-
const lcs = prev[m];
|
|
1542
|
-
Pool.release('uint16', prev, len);
|
|
1543
|
-
Pool.release('uint16', curr, len);
|
|
1544
|
-
return {
|
|
1545
|
-
res: maxLen === 0 ? 1 : Metric.clamp(lcs / maxLen),
|
|
1546
|
-
raw: { lcs, maxLen }
|
|
1547
|
-
};
|
|
1548
1564
|
}
|
|
1549
1565
|
}
|
|
1550
1566
|
MetricRegistry.add('lcs', LCSMetric);
|
|
@@ -1555,24 +1571,27 @@ class LevenshteinDistance extends Metric {
|
|
|
1555
1571
|
}
|
|
1556
1572
|
compute(a, b, m, n, maxLen) {
|
|
1557
1573
|
const len = m + 1;
|
|
1558
|
-
const [prev, curr] = Pool.acquireMany('
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1574
|
+
const [prev, curr] = Pool.acquireMany('int32', [len, len]);
|
|
1575
|
+
try {
|
|
1576
|
+
for (let i = 0; i <= m; i++) prev[i] = i;
|
|
1577
|
+
for (let j = 1; j <= n; j++) {
|
|
1578
|
+
curr[0] = j;
|
|
1579
|
+
const cb = b.charCodeAt(j - 1);
|
|
1580
|
+
for (let i = 1; i <= m; i++) {
|
|
1581
|
+
const cost = a.charCodeAt(i - 1) === cb ? 0 : 1;
|
|
1582
|
+
curr[i] = Math.min(curr[i - 1] + 1, prev[i] + 1, prev[i - 1] + cost);
|
|
1583
|
+
}
|
|
1584
|
+
prev.set(curr);
|
|
1566
1585
|
}
|
|
1567
|
-
prev
|
|
1586
|
+
const dist = prev[m];
|
|
1587
|
+
return {
|
|
1588
|
+
res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
|
|
1589
|
+
raw: { dist, maxLen }
|
|
1590
|
+
};
|
|
1591
|
+
} finally {
|
|
1592
|
+
Pool.release('int32', prev, len);
|
|
1593
|
+
Pool.release('int32', curr, len);
|
|
1568
1594
|
}
|
|
1569
|
-
const dist = prev[m];
|
|
1570
|
-
Pool.release('uint16', prev, len);
|
|
1571
|
-
Pool.release('uint16', curr, len);
|
|
1572
|
-
return {
|
|
1573
|
-
res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
|
|
1574
|
-
raw: { dist, maxLen }
|
|
1575
|
-
};
|
|
1576
1595
|
}
|
|
1577
1596
|
}
|
|
1578
1597
|
MetricRegistry.add('levenshtein', LevenshteinDistance);
|
|
@@ -1584,37 +1603,40 @@ class NeedlemanWunschDistance extends Metric {
|
|
|
1584
1603
|
compute(a, b, m, n, maxLen) {
|
|
1585
1604
|
const { match = 1, mismatch = -1, gap = -1 } = this.options;
|
|
1586
1605
|
const len = m + 1;
|
|
1587
|
-
const [prev, curr] = Pool.acquireMany('
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1606
|
+
const [prev, curr] = Pool.acquireMany('int32', [len, len]);
|
|
1607
|
+
try {
|
|
1608
|
+
prev[0] = 0;
|
|
1609
|
+
for (let i = 1; i <= m; i++) prev[i] = prev[i - 1] + gap;
|
|
1610
|
+
for (let j = 1; j <= n; j++) {
|
|
1611
|
+
curr[0] = prev[0] + gap;
|
|
1612
|
+
const cb = b.charCodeAt(j - 1);
|
|
1613
|
+
for (let i = 1; i <= m; i++) {
|
|
1614
|
+
const score = a.charCodeAt(i - 1) === cb ? match : mismatch;
|
|
1615
|
+
curr[i] = Math.max(
|
|
1616
|
+
prev[i - 1] + score,
|
|
1617
|
+
prev[i] + gap,
|
|
1618
|
+
curr[i - 1] + gap
|
|
1619
|
+
);
|
|
1620
|
+
}
|
|
1621
|
+
prev.set(curr);
|
|
1600
1622
|
}
|
|
1601
|
-
prev
|
|
1623
|
+
const score = prev[m];
|
|
1624
|
+
const denum = maxLen * match;
|
|
1625
|
+
return {
|
|
1626
|
+
res: denum === 0 ? 0 : Metric.clamp(score / denum),
|
|
1627
|
+
raw: { score, denum }
|
|
1628
|
+
};
|
|
1629
|
+
} finally {
|
|
1630
|
+
Pool.release('int32', prev, len);
|
|
1631
|
+
Pool.release('int32', curr, len);
|
|
1602
1632
|
}
|
|
1603
|
-
const score = prev[m];
|
|
1604
|
-
Pool.release('uint16', prev, len);
|
|
1605
|
-
Pool.release('uint16', curr, len);
|
|
1606
|
-
const denum = maxLen * match;
|
|
1607
|
-
return {
|
|
1608
|
-
res: denum === 0 ? 0 : Metric.clamp(score / denum),
|
|
1609
|
-
raw: { score, denum }
|
|
1610
|
-
};
|
|
1611
1633
|
}
|
|
1612
1634
|
}
|
|
1613
1635
|
MetricRegistry.add('needlemanWunsch', NeedlemanWunschDistance);
|
|
1614
1636
|
|
|
1615
1637
|
class QGramSimilarity extends Metric {
|
|
1616
1638
|
constructor(a, b, opt = {}) {
|
|
1617
|
-
super('
|
|
1639
|
+
super('qGram', a, b, opt, true);
|
|
1618
1640
|
}
|
|
1619
1641
|
_qGrams(str, q) {
|
|
1620
1642
|
const len = Math.max(0, str.length - q + 1);
|
|
@@ -1624,19 +1646,22 @@ class QGramSimilarity extends Metric {
|
|
|
1624
1646
|
}
|
|
1625
1647
|
compute(a, b) {
|
|
1626
1648
|
const { q = 2 } = this.options;
|
|
1627
|
-
const setA = this._qGrams(a, q)
|
|
1628
|
-
|
|
1629
|
-
let intersection = 0;
|
|
1630
|
-
for (const gram of setA) if (setB.has(gram)) intersection++;
|
|
1649
|
+
const setA = this._qGrams(a, q),
|
|
1650
|
+
setB = this._qGrams(b, q);
|
|
1631
1651
|
const sizeA = setA.size,
|
|
1632
1652
|
sizeB = setB.size;
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1653
|
+
try {
|
|
1654
|
+
let intersection = 0;
|
|
1655
|
+
for (const gram of setA) if (setB.has(gram)) intersection++;
|
|
1656
|
+
const size = Math.max(sizeA, sizeB);
|
|
1657
|
+
return {
|
|
1658
|
+
res: size === 0 ? 1 : Metric.clamp(intersection / size),
|
|
1659
|
+
raw: { intersection, size }
|
|
1660
|
+
};
|
|
1661
|
+
} finally {
|
|
1662
|
+
Pool.release('set', setA, sizeA);
|
|
1663
|
+
Pool.release('set', setB, sizeB);
|
|
1664
|
+
}
|
|
1640
1665
|
}
|
|
1641
1666
|
}
|
|
1642
1667
|
MetricRegistry.add('qGram', QGramSimilarity);
|
|
@@ -1648,31 +1673,34 @@ class SmithWatermanDistance extends Metric {
|
|
|
1648
1673
|
compute(a, b, m, n) {
|
|
1649
1674
|
const { match = 2, mismatch = -1, gap = -2 } = this.options;
|
|
1650
1675
|
const len = m + 1;
|
|
1651
|
-
const [prev, curr] = Pool.acquireMany('
|
|
1652
|
-
for (let i = 0; i <= m; i++) prev[i] = 0;
|
|
1676
|
+
const [prev, curr] = Pool.acquireMany('int32', [len, len]);
|
|
1653
1677
|
let maxScore = 0;
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
const
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1678
|
+
try {
|
|
1679
|
+
for (let i = 0; i <= m; i++) prev[i] = 0;
|
|
1680
|
+
for (let j = 1; j <= n; j++) {
|
|
1681
|
+
curr[0] = 0;
|
|
1682
|
+
const cb = b.charCodeAt(j - 1);
|
|
1683
|
+
for (let i = 1; i <= m; i++) {
|
|
1684
|
+
const score = a.charCodeAt(i - 1) === cb ? match : mismatch;
|
|
1685
|
+
curr[i] = Math.max(
|
|
1686
|
+
0,
|
|
1687
|
+
prev[i - 1] + score,
|
|
1688
|
+
prev[i] + gap,
|
|
1689
|
+
curr[i - 1] + gap
|
|
1690
|
+
);
|
|
1691
|
+
if (curr[i] > maxScore) maxScore = curr[i];
|
|
1692
|
+
}
|
|
1693
|
+
prev.set(curr);
|
|
1666
1694
|
}
|
|
1667
|
-
|
|
1695
|
+
const denum = Math.min(m * match, n * match);
|
|
1696
|
+
return {
|
|
1697
|
+
res: denum === 0 ? 0 : Metric.clamp(maxScore / denum),
|
|
1698
|
+
raw: { score: maxScore, denum }
|
|
1699
|
+
};
|
|
1700
|
+
} finally {
|
|
1701
|
+
Pool.release('int32', prev, len);
|
|
1702
|
+
Pool.release('int32', curr, len);
|
|
1668
1703
|
}
|
|
1669
|
-
Pool.release('uint16', prev, len);
|
|
1670
|
-
Pool.release('uint16', curr, len);
|
|
1671
|
-
const denum = Math.min(m * match, n * match);
|
|
1672
|
-
return {
|
|
1673
|
-
res: denum === 0 ? 0 : Metric.clamp(maxScore / denum),
|
|
1674
|
-
raw: { score: maxScore, denum }
|
|
1675
|
-
};
|
|
1676
1704
|
}
|
|
1677
1705
|
}
|
|
1678
1706
|
MetricRegistry.add('smithWaterman', SmithWatermanDistance);
|
|
@@ -1683,10 +1711,9 @@ class Phonetic {
|
|
|
1683
1711
|
static default;
|
|
1684
1712
|
algo;
|
|
1685
1713
|
options;
|
|
1714
|
+
optKey;
|
|
1686
1715
|
map;
|
|
1687
|
-
static clear()
|
|
1688
|
-
this.cache.clear();
|
|
1689
|
-
}
|
|
1716
|
+
static clear = () => this.cache.clear();
|
|
1690
1717
|
constructor(algo, opt = {}) {
|
|
1691
1718
|
const defaults = this.constructor.default ?? {};
|
|
1692
1719
|
const mapId = opt.map ?? defaults.map;
|
|
@@ -1695,6 +1722,9 @@ class Phonetic {
|
|
|
1695
1722
|
if (map === undefined)
|
|
1696
1723
|
throw new Error(`Requested mapping <${mapId}> is not declared`);
|
|
1697
1724
|
this.options = merge(merge(defaults, map.options ?? {}), opt);
|
|
1725
|
+
this.optKey = Hasher.fastFNV1a(
|
|
1726
|
+
JSON.stringify(this.options, Object.keys(this.options).sort())
|
|
1727
|
+
).toString();
|
|
1698
1728
|
this.algo = algo;
|
|
1699
1729
|
this.map = map;
|
|
1700
1730
|
}
|
|
@@ -1716,7 +1746,8 @@ class Phonetic {
|
|
|
1716
1746
|
for (const rule of ruleset) {
|
|
1717
1747
|
if (rule.char && rule.char !== char) continue;
|
|
1718
1748
|
if (rule.position === 'start' && i !== 0) continue;
|
|
1719
|
-
if (rule.position === 'middle' && i
|
|
1749
|
+
if (rule.position === 'middle' && (i === 0 || i === charLen - 1))
|
|
1750
|
+
continue;
|
|
1720
1751
|
if (rule.position === 'end' && i !== charLen) continue;
|
|
1721
1752
|
if (rule.prev && !rule.prev.includes(prev)) continue;
|
|
1722
1753
|
if (rule.prevNot && rule.prevNot.includes(prev)) continue;
|
|
@@ -1770,9 +1801,7 @@ class Phonetic {
|
|
|
1770
1801
|
? input
|
|
1771
1802
|
: (input + pad.repeat(length)).slice(0, length);
|
|
1772
1803
|
}
|
|
1773
|
-
word2Chars(word)
|
|
1774
|
-
return word.toLowerCase().split('');
|
|
1775
|
-
}
|
|
1804
|
+
word2Chars = (word) => word.toLowerCase().split('');
|
|
1776
1805
|
exitEarly(code, i) {
|
|
1777
1806
|
const { length = -1 } = this.options;
|
|
1778
1807
|
return length > 0 && code.length >= length;
|
|
@@ -1783,7 +1812,7 @@ class Phonetic {
|
|
|
1783
1812
|
loop(words) {
|
|
1784
1813
|
const index = [];
|
|
1785
1814
|
for (const word of words) {
|
|
1786
|
-
const key = Phonetic.cache.key(this.algo, [word]);
|
|
1815
|
+
const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
|
|
1787
1816
|
const code =
|
|
1788
1817
|
Phonetic.cache.get(key || '') ??
|
|
1789
1818
|
(() => {
|
|
@@ -1798,14 +1827,20 @@ class Phonetic {
|
|
|
1798
1827
|
async loopAsync(words) {
|
|
1799
1828
|
const index = [];
|
|
1800
1829
|
for (const word of words) {
|
|
1801
|
-
const
|
|
1830
|
+
const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
|
|
1831
|
+
const code = await Promise.resolve(
|
|
1832
|
+
Phonetic.cache.get(key || '') ??
|
|
1833
|
+
(() => {
|
|
1834
|
+
const res = this.encode(word);
|
|
1835
|
+
if (key) Phonetic.cache.set(key, res);
|
|
1836
|
+
return res;
|
|
1837
|
+
})()
|
|
1838
|
+
);
|
|
1802
1839
|
if (code && code.length) index.push(this.equalLen(code));
|
|
1803
1840
|
}
|
|
1804
1841
|
return index;
|
|
1805
1842
|
}
|
|
1806
|
-
getAlgoName()
|
|
1807
|
-
return this.algo;
|
|
1808
|
-
}
|
|
1843
|
+
getAlgoName = () => this.algo;
|
|
1809
1844
|
getIndex(input) {
|
|
1810
1845
|
const { delimiter = ' ' } = this.options;
|
|
1811
1846
|
return profiler$1.run(() =>
|
|
@@ -1825,7 +1860,7 @@ const PhoneticRegistry = Registry('phonetic', Phonetic);
|
|
|
1825
1860
|
const PhoneticMappingRegistry = (() => {
|
|
1826
1861
|
const mappings = Object.create(null);
|
|
1827
1862
|
const maps = (algo) => (mappings[algo] ||= Object.create(null));
|
|
1828
|
-
return {
|
|
1863
|
+
return Object.freeze({
|
|
1829
1864
|
add(algo, id, map, update = false) {
|
|
1830
1865
|
const mappings = maps(algo);
|
|
1831
1866
|
if (!update && id in mappings)
|
|
@@ -1846,10 +1881,11 @@ const PhoneticMappingRegistry = (() => {
|
|
|
1846
1881
|
list(algo) {
|
|
1847
1882
|
return Object.keys(maps(algo));
|
|
1848
1883
|
}
|
|
1849
|
-
};
|
|
1884
|
+
});
|
|
1850
1885
|
})();
|
|
1851
1886
|
|
|
1852
1887
|
class Caverphone extends Phonetic {
|
|
1888
|
+
static REGEX = { uppercase: /[^A-Z]/gi };
|
|
1853
1889
|
static default = {
|
|
1854
1890
|
map: 'en2',
|
|
1855
1891
|
delimiter: ' ',
|
|
@@ -1861,15 +1897,11 @@ class Caverphone extends Phonetic {
|
|
|
1861
1897
|
super('caverphone', opt);
|
|
1862
1898
|
}
|
|
1863
1899
|
encode(word) {
|
|
1864
|
-
word = word.replace(
|
|
1900
|
+
word = word.replace(Caverphone.REGEX.uppercase, '').toLowerCase();
|
|
1865
1901
|
return super.encode(word);
|
|
1866
1902
|
}
|
|
1867
|
-
mapChar(char)
|
|
1868
|
-
|
|
1869
|
-
}
|
|
1870
|
-
adjustCode(code) {
|
|
1871
|
-
return code.toUpperCase();
|
|
1872
|
-
}
|
|
1903
|
+
mapChar = (char) => char;
|
|
1904
|
+
adjustCode = (code) => code.toUpperCase();
|
|
1873
1905
|
}
|
|
1874
1906
|
PhoneticRegistry.add('caverphone', Caverphone);
|
|
1875
1907
|
PhoneticMappingRegistry.add('caverphone', 'en1', {
|
|
@@ -2028,6 +2060,7 @@ PhoneticMappingRegistry.add('cologne', 'default', {
|
|
|
2028
2060
|
});
|
|
2029
2061
|
|
|
2030
2062
|
class Metaphone extends Phonetic {
|
|
2063
|
+
static REGEX = { adjacent: /([A-BD-Z])\1+/gi, vowel: /[AEIOU]/g };
|
|
2031
2064
|
static default = {
|
|
2032
2065
|
map: 'en90',
|
|
2033
2066
|
delimiter: ' ',
|
|
@@ -2039,11 +2072,13 @@ class Metaphone extends Phonetic {
|
|
|
2039
2072
|
super('metaphone', opt);
|
|
2040
2073
|
}
|
|
2041
2074
|
encode(word) {
|
|
2042
|
-
word = word.replace(
|
|
2075
|
+
word = word.replace(Metaphone.REGEX.adjacent, (m, c) =>
|
|
2076
|
+
c === 'C' ? m : c
|
|
2077
|
+
);
|
|
2043
2078
|
return super.encode(word);
|
|
2044
2079
|
}
|
|
2045
2080
|
adjustCode(code) {
|
|
2046
|
-
return code.slice(0, 1) + code.slice(1).replace(
|
|
2081
|
+
return code.slice(0, 1) + code.slice(1).replace(Metaphone.REGEX.vowel, '');
|
|
2047
2082
|
}
|
|
2048
2083
|
}
|
|
2049
2084
|
PhoneticRegistry.add('metaphone', Metaphone);
|
|
@@ -2202,6 +2237,7 @@ PhoneticMappingRegistry.add('soundex', 'de', {
|
|
|
2202
2237
|
const profiler = Profiler.getInstance();
|
|
2203
2238
|
class CmpStr {
|
|
2204
2239
|
static filter = {
|
|
2240
|
+
has: Filter.has,
|
|
2205
2241
|
add: Filter.add,
|
|
2206
2242
|
remove: Filter.remove,
|
|
2207
2243
|
pause: Filter.pause,
|
|
@@ -2233,12 +2269,8 @@ class CmpStr {
|
|
|
2233
2269
|
metric: Metric.clear,
|
|
2234
2270
|
phonetic: Phonetic.clear
|
|
2235
2271
|
};
|
|
2236
|
-
static analyze(input)
|
|
2237
|
-
|
|
2238
|
-
}
|
|
2239
|
-
static diff(a, b, opt) {
|
|
2240
|
-
return new DiffChecker(a, b, opt);
|
|
2241
|
-
}
|
|
2272
|
+
static analyze = (input) => new TextAnalyzer(input);
|
|
2273
|
+
static diff = (a, b, opt) => new DiffChecker(a, b, opt);
|
|
2242
2274
|
static create(opt) {
|
|
2243
2275
|
return new CmpStr(opt);
|
|
2244
2276
|
}
|
|
@@ -2295,7 +2327,7 @@ class CmpStr {
|
|
|
2295
2327
|
}
|
|
2296
2328
|
index(input, { algo, opt }) {
|
|
2297
2329
|
this.assert('phonetic', algo);
|
|
2298
|
-
const phonetic = factory
|
|
2330
|
+
const phonetic = factory['phonetic'](algo, opt);
|
|
2299
2331
|
const delimiter = opt?.delimiter ?? ' ';
|
|
2300
2332
|
return Array.isArray(input)
|
|
2301
2333
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
@@ -2318,7 +2350,7 @@ class CmpStr {
|
|
|
2318
2350
|
) {
|
|
2319
2351
|
return [];
|
|
2320
2352
|
}
|
|
2321
|
-
const metric = factory
|
|
2353
|
+
const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
|
|
2322
2354
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
2323
2355
|
metric.run(mode);
|
|
2324
2356
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
@@ -2331,9 +2363,7 @@ class CmpStr {
|
|
|
2331
2363
|
? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
|
|
2332
2364
|
: { source: result.a, target: result.b, match: result.res };
|
|
2333
2365
|
}
|
|
2334
|
-
clone()
|
|
2335
|
-
return Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
2336
|
-
}
|
|
2366
|
+
clone = () => Object.assign(Object.create(Object.getPrototypeOf(this)), this);
|
|
2337
2367
|
reset() {
|
|
2338
2368
|
for (const k in this.options) delete this.options[k];
|
|
2339
2369
|
return this;
|
|
@@ -2358,33 +2388,15 @@ class CmpStr {
|
|
|
2358
2388
|
rmv(this.options, path);
|
|
2359
2389
|
return this;
|
|
2360
2390
|
}
|
|
2361
|
-
setRaw(enable)
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
rmvFlags() {
|
|
2371
|
-
return this.rmvOption('flags');
|
|
2372
|
-
}
|
|
2373
|
-
setProcessors(opt) {
|
|
2374
|
-
return this.setOption('processors', opt);
|
|
2375
|
-
}
|
|
2376
|
-
rmvProcessors() {
|
|
2377
|
-
return this.rmvOption('processors');
|
|
2378
|
-
}
|
|
2379
|
-
getOptions() {
|
|
2380
|
-
return this.options;
|
|
2381
|
-
}
|
|
2382
|
-
getSerializedOptions() {
|
|
2383
|
-
return JSON.stringify(this.options);
|
|
2384
|
-
}
|
|
2385
|
-
getOption(path) {
|
|
2386
|
-
return get(this.options, path);
|
|
2387
|
-
}
|
|
2391
|
+
setRaw = (enable) => this.setOption('raw', enable);
|
|
2392
|
+
setMetric = (name) => this.setOption('metric', name);
|
|
2393
|
+
setFlags = (flags) => this.setOption('flags', flags);
|
|
2394
|
+
rmvFlags = () => this.rmvOption('flags');
|
|
2395
|
+
setProcessors = (opt) => this.setOption('processors', opt);
|
|
2396
|
+
rmvProcessors = () => this.rmvOption('processors');
|
|
2397
|
+
getOptions = () => this.options;
|
|
2398
|
+
getSerializedOptions = () => JSON.stringify(this.options);
|
|
2399
|
+
getOption = (path) => get(this.options, path);
|
|
2388
2400
|
test(a, b, opt) {
|
|
2389
2401
|
return this.compute(a, b, opt, 'single');
|
|
2390
2402
|
}
|
|
@@ -2498,7 +2510,7 @@ class CmpStrAsync extends CmpStr {
|
|
|
2498
2510
|
}
|
|
2499
2511
|
async indexAsync(input, { algo, opt }) {
|
|
2500
2512
|
this.assert('phonetic', algo);
|
|
2501
|
-
const phonetic = factory
|
|
2513
|
+
const phonetic = factory['phonetic'](algo, opt);
|
|
2502
2514
|
const delimiter = opt?.delimiter ?? ' ';
|
|
2503
2515
|
return Array.isArray(input)
|
|
2504
2516
|
? Promise.all(
|
|
@@ -2522,7 +2534,7 @@ class CmpStrAsync extends CmpStr {
|
|
|
2522
2534
|
) {
|
|
2523
2535
|
return [];
|
|
2524
2536
|
}
|
|
2525
|
-
const metric = factory
|
|
2537
|
+
const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
|
|
2526
2538
|
if (resolved.output !== 'prep') metric.setOriginal(a, b);
|
|
2527
2539
|
await metric.runAsync(mode);
|
|
2528
2540
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
@@ -2632,6 +2644,7 @@ export {
|
|
|
2632
2644
|
DiffChecker,
|
|
2633
2645
|
Filter,
|
|
2634
2646
|
HashTable,
|
|
2647
|
+
Hasher,
|
|
2635
2648
|
Metric,
|
|
2636
2649
|
MetricRegistry,
|
|
2637
2650
|
Normalizer,
|