cmpstr 3.2.2 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2149 -1721
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.umd.js +2028 -1604
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/cjs/CmpStr.cjs +100 -51
- package/dist/cjs/CmpStrAsync.cjs +35 -18
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -1
- package/dist/cjs/metric/Hamming.cjs +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +1 -1
- package/dist/cjs/metric/Metric.cjs +40 -22
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
- package/dist/cjs/metric/QGram.cjs +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +27 -15
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/root.cjs +4 -2
- package/dist/cjs/utils/DeepMerge.cjs +102 -97
- package/dist/cjs/utils/DiffChecker.cjs +1 -1
- package/dist/cjs/utils/Errors.cjs +22 -19
- package/dist/cjs/utils/Filter.cjs +59 -24
- package/dist/cjs/utils/HashTable.cjs +44 -29
- package/dist/cjs/utils/Normalizer.cjs +57 -28
- package/dist/cjs/utils/OptionsValidator.cjs +211 -0
- package/dist/cjs/utils/Pool.cjs +27 -13
- package/dist/cjs/utils/Profiler.cjs +41 -27
- package/dist/cjs/utils/Registry.cjs +5 -5
- package/dist/cjs/utils/StructuredData.cjs +83 -53
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -1
- package/dist/esm/CmpStr.mjs +101 -52
- package/dist/esm/CmpStrAsync.mjs +35 -18
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -1
- package/dist/esm/metric/Hamming.mjs +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -1
- package/dist/esm/metric/LCS.mjs +1 -1
- package/dist/esm/metric/Levenshtein.mjs +1 -1
- package/dist/esm/metric/Metric.mjs +40 -22
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -1
- package/dist/esm/metric/QGram.mjs +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +30 -15
- package/dist/esm/phonetic/Soundex.mjs +1 -1
- package/dist/esm/root.mjs +3 -3
- package/dist/esm/utils/DeepMerge.mjs +103 -94
- package/dist/esm/utils/DiffChecker.mjs +1 -1
- package/dist/esm/utils/Errors.mjs +22 -19
- package/dist/esm/utils/Filter.mjs +59 -24
- package/dist/esm/utils/HashTable.mjs +44 -29
- package/dist/esm/utils/Normalizer.mjs +57 -28
- package/dist/esm/utils/OptionsValidator.mjs +210 -0
- package/dist/esm/utils/Pool.mjs +27 -13
- package/dist/esm/utils/Profiler.mjs +41 -27
- package/dist/esm/utils/Registry.mjs +5 -5
- package/dist/esm/utils/StructuredData.mjs +83 -53
- package/dist/esm/utils/TextAnalyzer.mjs +1 -1
- package/dist/types/CmpStr.d.ts +22 -15
- package/dist/types/CmpStrAsync.d.ts +3 -0
- package/dist/types/index.d.ts +3 -3
- package/dist/types/metric/Metric.d.ts +9 -9
- package/dist/types/phonetic/Phonetic.d.ts +4 -3
- package/dist/types/root.d.ts +3 -2
- package/dist/types/utils/DeepMerge.d.ts +80 -58
- package/dist/types/utils/Errors.d.ts +25 -8
- package/dist/types/utils/Filter.d.ts +4 -1
- package/dist/types/utils/HashTable.d.ts +12 -11
- package/dist/types/utils/Normalizer.d.ts +2 -1
- package/dist/types/utils/OptionsValidator.d.ts +193 -0
- package/dist/types/utils/Profiler.d.ts +9 -28
- package/dist/types/utils/StructuredData.d.ts +3 -0
- package/dist/types/utils/Types.d.ts +13 -1
- package/package.json +14 -5
package/dist/cjs/utils/Pool.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var Errors = require('./Errors.cjs');
|
|
@@ -13,15 +13,14 @@ class RingPool {
|
|
|
13
13
|
acquire(minSize, allowOversize) {
|
|
14
14
|
return Errors.ErrorUtil.wrap(
|
|
15
15
|
() => {
|
|
16
|
-
const
|
|
16
|
+
const buffers = this.buffers;
|
|
17
|
+
const len = buffers.length;
|
|
17
18
|
for (let i = 0; i < len; i++) {
|
|
18
|
-
const idx = (this.pointer + i)
|
|
19
|
-
const item =
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
(
|
|
23
|
-
) {
|
|
24
|
-
this.pointer = (idx + 1) & (len - 1);
|
|
19
|
+
const idx = (this.pointer + i) % len;
|
|
20
|
+
const item = buffers[idx];
|
|
21
|
+
const size = item.size;
|
|
22
|
+
if (size >= minSize && (allowOversize || size === minSize)) {
|
|
23
|
+
this.pointer = (idx + 1) % len;
|
|
25
24
|
return item;
|
|
26
25
|
}
|
|
27
26
|
}
|
|
@@ -34,9 +33,12 @@ class RingPool {
|
|
|
34
33
|
release(item) {
|
|
35
34
|
Errors.ErrorUtil.wrap(
|
|
36
35
|
() => {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
36
|
+
const buffers = this.buffers;
|
|
37
|
+
if (buffers.length < this.maxSize) {
|
|
38
|
+
buffers.push(item);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
buffers[this.pointer] = item;
|
|
40
42
|
this.pointer = (this.pointer + 1) % this.maxSize;
|
|
41
43
|
},
|
|
42
44
|
`Failed to release buffer back to pool`,
|
|
@@ -56,6 +58,12 @@ class Pool {
|
|
|
56
58
|
maxItemSize: 2048,
|
|
57
59
|
allowOversize: true
|
|
58
60
|
},
|
|
61
|
+
'arr[]': {
|
|
62
|
+
type: 'arr[]',
|
|
63
|
+
maxSize: 4,
|
|
64
|
+
maxItemSize: 1024,
|
|
65
|
+
allowOversize: false
|
|
66
|
+
},
|
|
59
67
|
'number[]': {
|
|
60
68
|
type: 'number[]',
|
|
61
69
|
maxSize: 16,
|
|
@@ -73,6 +81,7 @@ class Pool {
|
|
|
73
81
|
};
|
|
74
82
|
static POOLS = {
|
|
75
83
|
int32: new RingPool(64),
|
|
84
|
+
'arr[]': new RingPool(4),
|
|
76
85
|
'number[]': new RingPool(16),
|
|
77
86
|
'string[]': new RingPool(2),
|
|
78
87
|
set: new RingPool(8),
|
|
@@ -82,6 +91,8 @@ class Pool {
|
|
|
82
91
|
switch (type) {
|
|
83
92
|
case 'int32':
|
|
84
93
|
return new Int32Array(size);
|
|
94
|
+
case 'arr[]':
|
|
95
|
+
return new Array(size);
|
|
85
96
|
case 'number[]':
|
|
86
97
|
return new Float64Array(size);
|
|
87
98
|
case 'string[]':
|
|
@@ -105,7 +116,10 @@ class Pool {
|
|
|
105
116
|
return this.allocate(type, size);
|
|
106
117
|
}
|
|
107
118
|
static acquireMany(type, sizes) {
|
|
108
|
-
|
|
119
|
+
const out = new Array(sizes.length);
|
|
120
|
+
for (let i = 0; i < sizes.length; i++)
|
|
121
|
+
out[i] = this.acquire(type, sizes[i]);
|
|
122
|
+
return out;
|
|
109
123
|
}
|
|
110
124
|
static release(type, buffer, size) {
|
|
111
125
|
const CONFIG = this.CONFIG[type];
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
class Profiler {
|
|
@@ -7,11 +7,13 @@ class Profiler {
|
|
|
7
7
|
static instance;
|
|
8
8
|
nowFn;
|
|
9
9
|
memFn;
|
|
10
|
-
store =
|
|
10
|
+
store = [];
|
|
11
|
+
last;
|
|
11
12
|
totalTime = 0;
|
|
12
13
|
totalMem = 0;
|
|
13
14
|
static detectEnv() {
|
|
14
|
-
if (typeof process !== 'undefined'
|
|
15
|
+
if (typeof process !== 'undefined' && process.versions?.node)
|
|
16
|
+
Profiler.ENV = 'nodejs';
|
|
15
17
|
else if (typeof performance !== 'undefined') Profiler.ENV = 'browser';
|
|
16
18
|
else Profiler.ENV = 'unknown';
|
|
17
19
|
}
|
|
@@ -23,7 +25,7 @@ class Profiler {
|
|
|
23
25
|
this.active = active;
|
|
24
26
|
switch (Profiler.ENV) {
|
|
25
27
|
case 'nodejs':
|
|
26
|
-
this.nowFn = () => Number(process.hrtime.bigint())
|
|
28
|
+
this.nowFn = () => Number(process.hrtime.bigint()) * 1e-6;
|
|
27
29
|
this.memFn = () => process.memoryUsage().heapUsed;
|
|
28
30
|
break;
|
|
29
31
|
case 'browser':
|
|
@@ -36,40 +38,52 @@ class Profiler {
|
|
|
36
38
|
break;
|
|
37
39
|
}
|
|
38
40
|
}
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
startMem = this.mem();
|
|
44
|
-
const res = fn();
|
|
45
|
-
const deltaTime = this.now() - startTime,
|
|
46
|
-
deltaMem = this.mem() - startMem;
|
|
47
|
-
this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
|
|
48
|
-
((this.totalTime += deltaTime), (this.totalMem += deltaMem));
|
|
49
|
-
return res;
|
|
41
|
+
storeRes(entry) {
|
|
42
|
+
this.store.push((this.last = entry));
|
|
43
|
+
this.totalTime += entry.time;
|
|
44
|
+
this.totalMem += entry.mem;
|
|
50
45
|
}
|
|
51
|
-
enable
|
|
46
|
+
enable() {
|
|
52
47
|
this.active = true;
|
|
53
|
-
}
|
|
54
|
-
disable
|
|
48
|
+
}
|
|
49
|
+
disable() {
|
|
55
50
|
this.active = false;
|
|
56
|
-
}
|
|
51
|
+
}
|
|
57
52
|
clear() {
|
|
58
|
-
this.store.
|
|
53
|
+
this.store.length = 0;
|
|
54
|
+
this.last = undefined;
|
|
59
55
|
this.totalTime = 0;
|
|
60
56
|
this.totalMem = 0;
|
|
61
57
|
}
|
|
62
58
|
run(fn, meta = {}) {
|
|
63
|
-
|
|
59
|
+
if (!this.active) return fn();
|
|
60
|
+
const startTime = this.nowFn(),
|
|
61
|
+
startMem = this.memFn();
|
|
62
|
+
const res = fn();
|
|
63
|
+
const deltaTime = this.nowFn() - startTime,
|
|
64
|
+
deltaMem = this.memFn() - startMem;
|
|
65
|
+
this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
|
|
66
|
+
return res;
|
|
64
67
|
}
|
|
65
68
|
async runAsync(fn, meta = {}) {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
+
if (!this.active) return fn();
|
|
70
|
+
const startTime = this.nowFn(),
|
|
71
|
+
startMem = this.memFn();
|
|
72
|
+
const res = await fn();
|
|
73
|
+
const deltaTime = this.nowFn() - startTime,
|
|
74
|
+
deltaMem = this.memFn() - startMem;
|
|
75
|
+
this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
|
|
76
|
+
return res;
|
|
77
|
+
}
|
|
78
|
+
getAll() {
|
|
79
|
+
return [...this.store];
|
|
80
|
+
}
|
|
81
|
+
getLast() {
|
|
82
|
+
return this.last;
|
|
83
|
+
}
|
|
84
|
+
getTotal() {
|
|
85
|
+
return { time: this.totalTime, mem: this.totalMem };
|
|
69
86
|
}
|
|
70
|
-
getAll = () => [...this.store];
|
|
71
|
-
getLast = () => this.getAll().pop();
|
|
72
|
-
getTotal = () => ({ time: this.totalTime, mem: this.totalMem });
|
|
73
87
|
services = Object.freeze({
|
|
74
88
|
enable: this.enable.bind(this),
|
|
75
89
|
disable: this.disable.bind(this),
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var Errors = require('./Errors.cjs');
|
|
@@ -68,13 +68,13 @@ function resolveCls(reg, cls) {
|
|
|
68
68
|
throw new Errors.CmpStrNotFoundError(`Registry <${reg}> does not exist`, {
|
|
69
69
|
registry: reg
|
|
70
70
|
});
|
|
71
|
-
return typeof cls === 'string' ? registry[reg]
|
|
71
|
+
return typeof cls === 'string' ? registry[reg].get(cls) : cls;
|
|
72
72
|
}
|
|
73
73
|
function createFromRegistry(reg, cls, ...args) {
|
|
74
|
-
|
|
74
|
+
const ctor = resolveCls(reg, cls);
|
|
75
75
|
return Errors.ErrorUtil.wrap(
|
|
76
|
-
() => new
|
|
77
|
-
`Failed to create instance of class <${
|
|
76
|
+
() => new ctor(...args),
|
|
77
|
+
`Failed to create instance of class <${ctor.name ?? cls}> from registry <${reg}>`,
|
|
78
78
|
{ registry: reg, class: cls, args }
|
|
79
79
|
);
|
|
80
80
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var Errors = require('./Errors.cjs');
|
|
@@ -7,6 +7,8 @@ var Pool = require('./Pool.cjs');
|
|
|
7
7
|
class StructuredData {
|
|
8
8
|
data;
|
|
9
9
|
key;
|
|
10
|
+
static SORT_ASC = (a, b) => a.res - b.res;
|
|
11
|
+
static SORT_DESC = (a, b) => b.res - a.res;
|
|
10
12
|
static create(data, key) {
|
|
11
13
|
return new StructuredData(data, key);
|
|
12
14
|
}
|
|
@@ -15,14 +17,17 @@ class StructuredData {
|
|
|
15
17
|
this.key = key;
|
|
16
18
|
}
|
|
17
19
|
extractFrom(arr, key) {
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
+
const n = arr.length;
|
|
21
|
+
const result = new Array(n);
|
|
22
|
+
for (let i = 0; i < n; i++) {
|
|
20
23
|
const val = arr[i][key];
|
|
21
|
-
result[i] =
|
|
24
|
+
result[i] = val != null ? String(val) : '';
|
|
22
25
|
}
|
|
23
26
|
return result;
|
|
24
27
|
}
|
|
25
|
-
extract
|
|
28
|
+
extract() {
|
|
29
|
+
return this.extractFrom(this.data, this.key);
|
|
30
|
+
}
|
|
26
31
|
isMetricResult(v) {
|
|
27
32
|
return (
|
|
28
33
|
typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
|
|
@@ -40,64 +45,89 @@ class StructuredData {
|
|
|
40
45
|
normalizeResults(results) {
|
|
41
46
|
if (!Array.isArray(results) || results.length === 0) return [];
|
|
42
47
|
const first = results[0];
|
|
43
|
-
let
|
|
44
|
-
if (this.isMetricResult(first))
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
48
|
+
let out = new Array(results.length);
|
|
49
|
+
if (this.isMetricResult(first)) {
|
|
50
|
+
const src = results;
|
|
51
|
+
for (let i = 0; i < src.length; i++) out[i] = { ...src[i], __idx: i };
|
|
52
|
+
} else if (this.isCmpStrResult(first)) {
|
|
53
|
+
const src = results;
|
|
54
|
+
for (let i = 0; i < src.length; i++) {
|
|
55
|
+
const r = src[i];
|
|
56
|
+
out[i] = {
|
|
57
|
+
metric: 'unknown',
|
|
58
|
+
a: r.source,
|
|
59
|
+
b: r.target,
|
|
60
|
+
res: r.match,
|
|
61
|
+
raw: r.raw,
|
|
62
|
+
__idx: i
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
} else
|
|
54
66
|
throw new Errors.CmpStrValidationError(
|
|
55
67
|
'Unsupported result format for StructuredData normalization.'
|
|
56
68
|
);
|
|
57
|
-
return
|
|
69
|
+
return out;
|
|
58
70
|
}
|
|
59
71
|
rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const occurrence = occurrenceCount.get(targetStr) ?? 0;
|
|
77
|
-
occurrenceCount.set(targetStr, occurrence + 1);
|
|
78
|
-
dataIndex = indices[occurrence % indices.length];
|
|
79
|
-
} else {
|
|
80
|
-
dataIndex = result.__idx ?? i;
|
|
72
|
+
const m = extractedStrings.length,
|
|
73
|
+
n = results.length;
|
|
74
|
+
const stringToIndices = Pool.Pool.acquire('map', m);
|
|
75
|
+
const occurrenceCount = Pool.Pool.acquire('map', n);
|
|
76
|
+
const output = new Array(n);
|
|
77
|
+
stringToIndices.clear();
|
|
78
|
+
occurrenceCount.clear();
|
|
79
|
+
try {
|
|
80
|
+
for (let i = 0; i < m; i++) {
|
|
81
|
+
const str = extractedStrings[i];
|
|
82
|
+
let arr = stringToIndices.get(str);
|
|
83
|
+
if (!arr) {
|
|
84
|
+
arr = [];
|
|
85
|
+
stringToIndices.set(str, arr);
|
|
86
|
+
}
|
|
87
|
+
arr.push(i);
|
|
81
88
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
89
|
+
let out = 0;
|
|
90
|
+
for (let i = 0; i < n; i++) {
|
|
91
|
+
const result = results[i];
|
|
92
|
+
if (removeZero && result.res === 0) continue;
|
|
93
|
+
const targetStr = result.b || '';
|
|
94
|
+
const indices = stringToIndices.get(targetStr);
|
|
95
|
+
let dataIndex;
|
|
96
|
+
if (indices && indices.length > 0) {
|
|
97
|
+
const occurrence = occurrenceCount.get(targetStr) ?? 0;
|
|
98
|
+
occurrenceCount.set(targetStr, occurrence + 1);
|
|
99
|
+
dataIndex = indices[occurrence % indices.length];
|
|
100
|
+
} else {
|
|
101
|
+
dataIndex = result.__idx ?? i;
|
|
102
|
+
}
|
|
103
|
+
if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
|
|
104
|
+
const sourceObj = sourceData[dataIndex];
|
|
105
|
+
const mappedTarget = extractedStrings[dataIndex] || targetStr;
|
|
106
|
+
if (objectsOnly) output[out++] = sourceObj;
|
|
107
|
+
else
|
|
108
|
+
output[out++] = {
|
|
109
|
+
obj: sourceObj,
|
|
110
|
+
key: this.key,
|
|
111
|
+
result: {
|
|
112
|
+
source: result.a,
|
|
113
|
+
target: mappedTarget,
|
|
114
|
+
match: result.res
|
|
115
|
+
},
|
|
116
|
+
...(result.raw ? { raw: result.raw } : null)
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
output.length = out;
|
|
120
|
+
return output;
|
|
121
|
+
} finally {
|
|
122
|
+
Pool.Pool.release('map', stringToIndices, m);
|
|
123
|
+
Pool.Pool.release('map', occurrenceCount, n);
|
|
93
124
|
}
|
|
94
|
-
output.length = out;
|
|
95
|
-
return output;
|
|
96
125
|
}
|
|
97
126
|
sort(results, sort) {
|
|
98
127
|
if (!sort || results.length <= 1) return results;
|
|
99
|
-
|
|
100
|
-
|
|
128
|
+
return results.sort(
|
|
129
|
+
sort === 'asc' ? StructuredData.SORT_ASC : StructuredData.SORT_DESC
|
|
130
|
+
);
|
|
101
131
|
}
|
|
102
132
|
finalizeLookup(results, extractedStrings, opt) {
|
|
103
133
|
return this.rebuild(
|
package/dist/esm/CmpStr.mjs
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
// CmpStr v3.
|
|
2
|
-
import {
|
|
1
|
+
// CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { DeepMerge } from './utils/DeepMerge.mjs';
|
|
3
3
|
import { DiffChecker } from './utils/DiffChecker.mjs';
|
|
4
4
|
import {
|
|
5
5
|
CmpStrInternalError,
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
ErrorUtil,
|
|
7
|
+
CmpStrValidationError
|
|
8
8
|
} from './utils/Errors.mjs';
|
|
9
9
|
import { Filter } from './utils/Filter.mjs';
|
|
10
10
|
import { Normalizer } from './utils/Normalizer.mjs';
|
|
11
|
+
import { OptionsValidator } from './utils/OptionsValidator.mjs';
|
|
11
12
|
import { Profiler } from './utils/Profiler.mjs';
|
|
12
13
|
import { factory } from './utils/Registry.mjs';
|
|
13
14
|
import { StructuredData } from './utils/StructuredData.mjs';
|
|
@@ -84,31 +85,26 @@ class CmpStr {
|
|
|
84
85
|
}
|
|
85
86
|
assert(cond, test) {
|
|
86
87
|
switch (cond) {
|
|
88
|
+
default:
|
|
89
|
+
throw new CmpStrInternalError(`Cmpstr condition <${cond}> unknown`);
|
|
87
90
|
case 'metric':
|
|
88
|
-
|
|
89
|
-
throw new CmpStrNotFoundError(
|
|
90
|
-
`CmpStr <metric> must be set, call .setMetric(), ` +
|
|
91
|
-
`use CmpStr.metric.list() for available metrics`,
|
|
92
|
-
{ metric: test }
|
|
93
|
-
);
|
|
91
|
+
OptionsValidator.validateMetricName(test);
|
|
94
92
|
break;
|
|
95
93
|
case 'phonetic':
|
|
96
|
-
|
|
97
|
-
throw new CmpStrNotFoundError(
|
|
98
|
-
`CmpStr <phonetic> must be set, call .setPhonetic(), ` +
|
|
99
|
-
`use CmpStr.phonetic.list() for available phonetic algorithms`,
|
|
100
|
-
{ phonetic: test }
|
|
101
|
-
);
|
|
94
|
+
OptionsValidator.validatePhoneticName(test);
|
|
102
95
|
break;
|
|
103
|
-
default:
|
|
104
|
-
throw new CmpStrInternalError(`Cmpstr condition <${cond}> unknown`);
|
|
105
96
|
}
|
|
106
97
|
}
|
|
107
98
|
assertMany(...cond) {
|
|
108
99
|
for (const [c, test] of cond) this.assert(c, test);
|
|
109
100
|
}
|
|
110
101
|
resolveOptions(opt) {
|
|
111
|
-
|
|
102
|
+
const merged = DeepMerge.merge(
|
|
103
|
+
{ ...(this.options ?? Object.create(null)) },
|
|
104
|
+
opt
|
|
105
|
+
);
|
|
106
|
+
OptionsValidator.validateOptions(merged);
|
|
107
|
+
return merged;
|
|
112
108
|
}
|
|
113
109
|
normalize(input, flags) {
|
|
114
110
|
return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
|
|
@@ -124,7 +120,7 @@ class CmpStr {
|
|
|
124
120
|
return input;
|
|
125
121
|
}
|
|
126
122
|
postProcess(result, opt) {
|
|
127
|
-
if (
|
|
123
|
+
if (Array.isArray(result) && opt?.removeZero)
|
|
128
124
|
result = result.filter((r) => r.res > 0);
|
|
129
125
|
return result;
|
|
130
126
|
}
|
|
@@ -140,10 +136,10 @@ class CmpStr {
|
|
|
140
136
|
return StructuredData.create(data, key);
|
|
141
137
|
}
|
|
142
138
|
compute(a, b, opt, mode, raw, skip) {
|
|
139
|
+
const resolved = this.resolveOptions(opt);
|
|
140
|
+
this.assert('metric', resolved.metric);
|
|
143
141
|
return ErrorUtil.wrap(
|
|
144
142
|
() => {
|
|
145
|
-
const resolved = this.resolveOptions(opt);
|
|
146
|
-
this.assert('metric', resolved.metric);
|
|
147
143
|
const A = skip ? a : this.prepare(a, resolved);
|
|
148
144
|
const B = skip ? b : this.prepare(b, resolved);
|
|
149
145
|
if (
|
|
@@ -161,7 +157,7 @@ class CmpStr {
|
|
|
161
157
|
const result = this.postProcess(metric.getResults(), resolved);
|
|
162
158
|
return this.output(result, raw ?? resolved.raw);
|
|
163
159
|
},
|
|
164
|
-
`Failed to compute metric <${
|
|
160
|
+
`Failed to compute metric <${resolved.metric}> for the given inputs`,
|
|
165
161
|
{ a, b, options: opt }
|
|
166
162
|
);
|
|
167
163
|
}
|
|
@@ -177,46 +173,79 @@ class CmpStr {
|
|
|
177
173
|
{ result, raw }
|
|
178
174
|
);
|
|
179
175
|
}
|
|
180
|
-
clone
|
|
176
|
+
clone() {
|
|
177
|
+
const inst = Object.assign(
|
|
178
|
+
Object.create(Object.getPrototypeOf(this)),
|
|
179
|
+
this
|
|
180
|
+
);
|
|
181
|
+
inst.options = DeepMerge.merge(Object.create(null), this.options);
|
|
182
|
+
return inst;
|
|
183
|
+
}
|
|
181
184
|
reset() {
|
|
182
|
-
|
|
185
|
+
this.options = Object.create(null);
|
|
183
186
|
return this;
|
|
184
187
|
}
|
|
185
188
|
setOptions(opt) {
|
|
189
|
+
OptionsValidator.validateOptions(opt);
|
|
186
190
|
this.options = opt;
|
|
187
191
|
return this;
|
|
188
192
|
}
|
|
189
193
|
mergeOptions(opt) {
|
|
190
|
-
merge(this.options, opt);
|
|
194
|
+
DeepMerge.merge(this.options, opt);
|
|
195
|
+
OptionsValidator.validateOptions(this.options);
|
|
191
196
|
return this;
|
|
192
197
|
}
|
|
193
198
|
setSerializedOptions(opt) {
|
|
194
|
-
|
|
195
|
-
()
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
199
|
+
try {
|
|
200
|
+
const parsed = JSON.parse(opt);
|
|
201
|
+
OptionsValidator.validateOptions(parsed);
|
|
202
|
+
this.options = parsed;
|
|
203
|
+
return this;
|
|
204
|
+
} catch (err) {
|
|
205
|
+
if (err instanceof SyntaxError)
|
|
206
|
+
throw new CmpStrValidationError(
|
|
207
|
+
`Failed to parse serialized options, invalid JSON string`,
|
|
208
|
+
{ opt, error: err instanceof Error ? err.message : String(err) }
|
|
209
|
+
);
|
|
210
|
+
throw err;
|
|
211
|
+
}
|
|
202
212
|
}
|
|
203
213
|
setOption(path, value) {
|
|
204
|
-
set(this.options, path, value);
|
|
214
|
+
DeepMerge.set(this.options, path, value);
|
|
215
|
+
OptionsValidator.validateOptions(this.options);
|
|
205
216
|
return this;
|
|
206
217
|
}
|
|
207
218
|
rmvOption(path) {
|
|
208
|
-
rmv(this.options, path);
|
|
219
|
+
DeepMerge.rmv(this.options, path);
|
|
209
220
|
return this;
|
|
210
221
|
}
|
|
211
|
-
setRaw
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
222
|
+
setRaw(enable) {
|
|
223
|
+
return this.setOption('raw', enable);
|
|
224
|
+
}
|
|
225
|
+
setMetric(name) {
|
|
226
|
+
return this.setOption('metric', name);
|
|
227
|
+
}
|
|
228
|
+
setFlags(flags) {
|
|
229
|
+
return this.setOption('flags', flags);
|
|
230
|
+
}
|
|
231
|
+
rmvFlags() {
|
|
232
|
+
return this.rmvOption('flags');
|
|
233
|
+
}
|
|
234
|
+
setProcessors(opt) {
|
|
235
|
+
return this.setOption('processors', opt);
|
|
236
|
+
}
|
|
237
|
+
rmvProcessors() {
|
|
238
|
+
return this.rmvOption('processors');
|
|
239
|
+
}
|
|
240
|
+
getOptions() {
|
|
241
|
+
return this.options;
|
|
242
|
+
}
|
|
243
|
+
getSerializedOptions() {
|
|
244
|
+
return JSON.stringify(this.options);
|
|
245
|
+
}
|
|
246
|
+
getOption(path) {
|
|
247
|
+
return DeepMerge.get(this.options, path);
|
|
248
|
+
}
|
|
220
249
|
test(a, b, opt) {
|
|
221
250
|
return this.compute(a, b, opt, 'single');
|
|
222
251
|
}
|
|
@@ -255,15 +284,35 @@ class CmpStr {
|
|
|
255
284
|
const resolved = this.resolveOptions({ flags, processors });
|
|
256
285
|
const test = this.prepare(needle, resolved);
|
|
257
286
|
const hstk = this.prepare(haystack, resolved);
|
|
258
|
-
|
|
287
|
+
const out = [];
|
|
288
|
+
for (let i = 0, len = hstk.length; i < len; i++) {
|
|
289
|
+
if (hstk[i].includes(test)) out.push(haystack[i]);
|
|
290
|
+
}
|
|
291
|
+
return out;
|
|
259
292
|
}
|
|
260
293
|
matrix(input, opt) {
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
294
|
+
const resolved = this.resolveOptions(opt);
|
|
295
|
+
const arr = this.prepare(input, resolved);
|
|
296
|
+
const n = arr.length;
|
|
297
|
+
const out = Array.from({ length: n }, () => new Array(n).fill(0));
|
|
298
|
+
for (let i = 0; i < n; i++)
|
|
299
|
+
for (let j = i; j < n; j++) {
|
|
300
|
+
if (i === j) {
|
|
301
|
+
out[i][j] = 1;
|
|
302
|
+
} else {
|
|
303
|
+
const score = this.compute(
|
|
304
|
+
arr[i],
|
|
305
|
+
arr[j],
|
|
306
|
+
resolved,
|
|
307
|
+
'single',
|
|
308
|
+
true,
|
|
309
|
+
true
|
|
310
|
+
).res;
|
|
311
|
+
out[i][j] = score;
|
|
312
|
+
out[j][i] = score;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return out;
|
|
267
316
|
}
|
|
268
317
|
phoneticIndex(input, algo, opt) {
|
|
269
318
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|