cmpstr 3.2.0 → 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -18
- package/dist/CmpStr.esm.js +490 -220
- package/dist/CmpStr.esm.min.js +2 -3
- package/dist/CmpStr.umd.js +489 -220
- package/dist/CmpStr.umd.min.js +2 -3
- package/dist/cjs/CmpStr.cjs +58 -36
- package/dist/cjs/CmpStrAsync.cjs +30 -24
- package/dist/cjs/index.cjs +1 -2
- package/dist/cjs/metric/Cosine.cjs +1 -2
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -2
- package/dist/cjs/metric/DiceSorensen.cjs +1 -2
- package/dist/cjs/metric/Hamming.cjs +5 -4
- package/dist/cjs/metric/Jaccard.cjs +1 -2
- package/dist/cjs/metric/JaroWinkler.cjs +1 -2
- package/dist/cjs/metric/LCS.cjs +1 -2
- package/dist/cjs/metric/Levenshtein.cjs +1 -2
- package/dist/cjs/metric/Metric.cjs +57 -38
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -2
- package/dist/cjs/metric/QGram.cjs +1 -2
- package/dist/cjs/metric/SmithWaterman.cjs +1 -2
- package/dist/cjs/phonetic/Caverphone.cjs +1 -2
- package/dist/cjs/phonetic/Cologne.cjs +1 -2
- package/dist/cjs/phonetic/Metaphone.cjs +1 -2
- package/dist/cjs/phonetic/Phonetic.cjs +55 -35
- package/dist/cjs/phonetic/Soundex.cjs +1 -2
- package/dist/cjs/root.cjs +3 -2
- package/dist/cjs/utils/DeepMerge.cjs +10 -5
- package/dist/cjs/utils/DiffChecker.cjs +1 -2
- package/dist/cjs/utils/Errors.cjs +103 -0
- package/dist/cjs/utils/Filter.cjs +56 -27
- package/dist/cjs/utils/HashTable.cjs +1 -2
- package/dist/cjs/utils/Normalizer.cjs +54 -34
- package/dist/cjs/utils/Pool.cjs +42 -18
- package/dist/cjs/utils/Profiler.cjs +1 -2
- package/dist/cjs/utils/Registry.cjs +46 -22
- package/dist/cjs/utils/StructuredData.cjs +13 -5
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -2
- package/dist/esm/CmpStr.mjs +56 -32
- package/dist/esm/CmpStrAsync.mjs +26 -20
- package/dist/esm/index.mjs +1 -2
- package/dist/esm/metric/Cosine.mjs +1 -2
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -2
- package/dist/esm/metric/DiceSorensen.mjs +1 -2
- package/dist/esm/metric/Hamming.mjs +5 -4
- package/dist/esm/metric/Jaccard.mjs +1 -2
- package/dist/esm/metric/JaroWinkler.mjs +1 -2
- package/dist/esm/metric/LCS.mjs +1 -2
- package/dist/esm/metric/Levenshtein.mjs +1 -2
- package/dist/esm/metric/Metric.mjs +59 -38
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -2
- package/dist/esm/metric/QGram.mjs +1 -2
- package/dist/esm/metric/SmithWaterman.mjs +1 -2
- package/dist/esm/phonetic/Caverphone.mjs +1 -2
- package/dist/esm/phonetic/Cologne.mjs +1 -2
- package/dist/esm/phonetic/Metaphone.mjs +1 -2
- package/dist/esm/phonetic/Phonetic.mjs +55 -35
- package/dist/esm/phonetic/Soundex.mjs +1 -2
- package/dist/esm/root.mjs +3 -2
- package/dist/esm/utils/DeepMerge.mjs +10 -5
- package/dist/esm/utils/DiffChecker.mjs +1 -2
- package/dist/esm/utils/Errors.mjs +103 -0
- package/dist/esm/utils/Filter.mjs +56 -27
- package/dist/esm/utils/HashTable.mjs +1 -2
- package/dist/esm/utils/Normalizer.mjs +54 -34
- package/dist/esm/utils/Pool.mjs +38 -18
- package/dist/esm/utils/Profiler.mjs +1 -2
- package/dist/esm/utils/Registry.mjs +46 -22
- package/dist/esm/utils/StructuredData.mjs +13 -5
- package/dist/esm/utils/TextAnalyzer.mjs +1 -2
- package/dist/types/CmpStr.d.ts +12 -6
- package/dist/types/CmpStrAsync.d.ts +6 -4
- package/dist/types/index.d.ts +3 -2
- package/dist/types/metric/Cosine.d.ts +2 -1
- package/dist/types/metric/DamerauLevenshtein.d.ts +2 -1
- package/dist/types/metric/DiceSorensen.d.ts +2 -1
- package/dist/types/metric/Hamming.d.ts +2 -1
- package/dist/types/metric/Jaccard.d.ts +2 -1
- package/dist/types/metric/JaroWinkler.d.ts +2 -1
- package/dist/types/metric/LCS.d.ts +2 -1
- package/dist/types/metric/Levenshtein.d.ts +2 -1
- package/dist/types/metric/Metric.d.ts +7 -5
- package/dist/types/metric/NeedlemanWunsch.d.ts +2 -1
- package/dist/types/metric/QGram.d.ts +2 -1
- package/dist/types/metric/SmithWaterman.d.ts +2 -1
- package/dist/types/metric/index.d.ts +1 -0
- package/dist/types/phonetic/Caverphone.d.ts +2 -1
- package/dist/types/phonetic/Cologne.d.ts +2 -1
- package/dist/types/phonetic/Metaphone.d.ts +2 -1
- package/dist/types/phonetic/Phonetic.d.ts +4 -1
- package/dist/types/phonetic/Soundex.d.ts +2 -1
- package/dist/types/phonetic/index.d.ts +1 -0
- package/dist/types/root.d.ts +2 -1
- package/dist/types/utils/DeepMerge.d.ts +3 -2
- package/dist/types/utils/DiffChecker.d.ts +2 -1
- package/dist/types/utils/Errors.d.ts +137 -0
- package/dist/types/utils/Filter.d.ts +33 -22
- package/dist/types/utils/HashTable.d.ts +2 -1
- package/dist/types/utils/Normalizer.d.ts +5 -1
- package/dist/types/utils/Pool.d.ts +4 -1
- package/dist/types/utils/Profiler.d.ts +3 -2
- package/dist/types/utils/Registry.d.ts +5 -4
- package/dist/types/utils/StructuredData.d.ts +5 -2
- package/dist/types/utils/TextAnalyzer.d.ts +2 -1
- package/dist/types/utils/Types.d.ts +34 -2
- package/package.json +10 -7
- package/dist/CmpStr.esm.js.map +0 -1
- package/dist/CmpStr.esm.min.js.map +0 -1
- package/dist/CmpStr.umd.js.map +0 -1
- package/dist/CmpStr.umd.min.js.map +0 -1
- package/dist/cjs/CmpStr.cjs.map +0 -1
- package/dist/cjs/CmpStrAsync.cjs.map +0 -1
- package/dist/cjs/index.cjs.map +0 -1
- package/dist/cjs/metric/Cosine.cjs.map +0 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +0 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +0 -1
- package/dist/cjs/metric/Hamming.cjs.map +0 -1
- package/dist/cjs/metric/Jaccard.cjs.map +0 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +0 -1
- package/dist/cjs/metric/LCS.cjs.map +0 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +0 -1
- package/dist/cjs/metric/Metric.cjs.map +0 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +0 -1
- package/dist/cjs/metric/QGram.cjs.map +0 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +0 -1
- package/dist/cjs/phonetic/Caverphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +0 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +0 -1
- package/dist/cjs/phonetic/Phonetic.cjs.map +0 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +0 -1
- package/dist/cjs/root.cjs.map +0 -1
- package/dist/cjs/utils/DeepMerge.cjs.map +0 -1
- package/dist/cjs/utils/DiffChecker.cjs.map +0 -1
- package/dist/cjs/utils/Filter.cjs.map +0 -1
- package/dist/cjs/utils/HashTable.cjs.map +0 -1
- package/dist/cjs/utils/Normalizer.cjs.map +0 -1
- package/dist/cjs/utils/Pool.cjs.map +0 -1
- package/dist/cjs/utils/Profiler.cjs.map +0 -1
- package/dist/cjs/utils/Registry.cjs.map +0 -1
- package/dist/cjs/utils/StructuredData.cjs.map +0 -1
- package/dist/cjs/utils/TextAnalyzer.cjs.map +0 -1
- package/dist/esm/CmpStr.mjs.map +0 -1
- package/dist/esm/CmpStrAsync.mjs.map +0 -1
- package/dist/esm/index.mjs.map +0 -1
- package/dist/esm/metric/Cosine.mjs.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +0 -1
- package/dist/esm/metric/DiceSorensen.mjs.map +0 -1
- package/dist/esm/metric/Hamming.mjs.map +0 -1
- package/dist/esm/metric/Jaccard.mjs.map +0 -1
- package/dist/esm/metric/JaroWinkler.mjs.map +0 -1
- package/dist/esm/metric/LCS.mjs.map +0 -1
- package/dist/esm/metric/Levenshtein.mjs.map +0 -1
- package/dist/esm/metric/Metric.mjs.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +0 -1
- package/dist/esm/metric/QGram.mjs.map +0 -1
- package/dist/esm/metric/SmithWaterman.mjs.map +0 -1
- package/dist/esm/phonetic/Caverphone.mjs.map +0 -1
- package/dist/esm/phonetic/Cologne.mjs.map +0 -1
- package/dist/esm/phonetic/Metaphone.mjs.map +0 -1
- package/dist/esm/phonetic/Phonetic.mjs.map +0 -1
- package/dist/esm/phonetic/Soundex.mjs.map +0 -1
- package/dist/esm/root.mjs.map +0 -1
- package/dist/esm/utils/DeepMerge.mjs.map +0 -1
- package/dist/esm/utils/DiffChecker.mjs.map +0 -1
- package/dist/esm/utils/Filter.mjs.map +0 -1
- package/dist/esm/utils/HashTable.mjs.map +0 -1
- package/dist/esm/utils/Normalizer.mjs.map +0 -1
- package/dist/esm/utils/Pool.mjs.map +0 -1
- package/dist/esm/utils/Profiler.mjs.map +0 -1
- package/dist/esm/utils/Registry.mjs.map +0 -1
- package/dist/esm/utils/StructuredData.mjs.map +0 -1
- package/dist/esm/utils/TextAnalyzer.mjs.map +0 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
class Hasher {
|
|
3
3
|
static FNV_PRIME = 0x01000193;
|
|
4
4
|
static HASH_OFFSET = 0x811c9dc5;
|
|
@@ -62,4 +62,3 @@ class HashTable {
|
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
export { HashTable, Hasher };
|
|
65
|
-
//# sourceMappingURL=HashTable.mjs.map
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { ErrorUtil } from './Errors.mjs';
|
|
2
3
|
import { HashTable } from './HashTable.mjs';
|
|
3
4
|
|
|
4
5
|
class Normalizer {
|
|
@@ -15,42 +16,62 @@ class Normalizer {
|
|
|
15
16
|
return Array.from(new Set(flags)).sort().join('');
|
|
16
17
|
}
|
|
17
18
|
static getPipeline(flags) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
19
|
+
return ErrorUtil.wrap(
|
|
20
|
+
() => {
|
|
21
|
+
if (Normalizer.pipeline.has(flags))
|
|
22
|
+
return Normalizer.pipeline.get(flags);
|
|
23
|
+
const { REGEX } = Normalizer;
|
|
24
|
+
const steps = [
|
|
25
|
+
['d', (s) => s.normalize('NFD')],
|
|
26
|
+
['i', (s) => s.toLowerCase()],
|
|
27
|
+
['k', (s) => s.replace(REGEX.nonLetters, '')],
|
|
28
|
+
['n', (s) => s.replace(REGEX.nonNumbers, '')],
|
|
29
|
+
['r', (s) => s.replace(REGEX.doubleChars, '$1')],
|
|
30
|
+
['s', (s) => s.replace(REGEX.specialChars, '')],
|
|
31
|
+
['t', (s) => s.trim()],
|
|
32
|
+
['u', (s) => s.normalize('NFC')],
|
|
33
|
+
['w', (s) => s.replace(REGEX.whitespace, ' ')],
|
|
34
|
+
['x', (s) => s.normalize('NFKC')]
|
|
35
|
+
];
|
|
36
|
+
const pipeline = steps
|
|
37
|
+
.filter(([f]) => flags.includes(f))
|
|
38
|
+
.map(([, fn]) => fn);
|
|
39
|
+
const fn = (s) => pipeline.reduce((v, f) => f(v), s);
|
|
40
|
+
Normalizer.pipeline.set(flags, fn);
|
|
41
|
+
return fn;
|
|
42
|
+
},
|
|
43
|
+
`Failed to create normalization pipeline for flags: ${flags}`,
|
|
44
|
+
{ flags }
|
|
45
|
+
);
|
|
38
46
|
}
|
|
39
47
|
static normalize(input, flags) {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
return ErrorUtil.wrap(
|
|
49
|
+
() => {
|
|
50
|
+
if (!flags || typeof flags !== 'string' || !input) return input;
|
|
51
|
+
flags = this.canonicalFlags(flags);
|
|
52
|
+
if (Array.isArray(input))
|
|
53
|
+
return input.map((s) => Normalizer.normalize(s, flags));
|
|
54
|
+
const key = Normalizer.cache.key(flags, [input]);
|
|
55
|
+
if (key && Normalizer.cache.has(key)) return Normalizer.cache.get(key);
|
|
56
|
+
const res = Normalizer.getPipeline(flags)(input);
|
|
57
|
+
if (key) Normalizer.cache.set(key, res);
|
|
58
|
+
return res;
|
|
59
|
+
},
|
|
60
|
+
`Failed to normalize input with flags: ${flags}`,
|
|
61
|
+
{ input, flags }
|
|
62
|
+
);
|
|
49
63
|
}
|
|
50
64
|
static async normalizeAsync(input, flags) {
|
|
51
|
-
return await
|
|
52
|
-
|
|
53
|
-
|
|
65
|
+
return await ErrorUtil.wrapAsync(
|
|
66
|
+
async () => {
|
|
67
|
+
if (!flags || typeof flags !== 'string' || !input) return input;
|
|
68
|
+
return await (Array.isArray(input)
|
|
69
|
+
? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
|
|
70
|
+
: Promise.resolve(Normalizer.normalize(input, flags)));
|
|
71
|
+
},
|
|
72
|
+
`Failed to asynchronously normalize input with flags: ${flags}`,
|
|
73
|
+
{ input, flags }
|
|
74
|
+
);
|
|
54
75
|
}
|
|
55
76
|
static clear() {
|
|
56
77
|
Normalizer.pipeline.clear();
|
|
@@ -59,4 +80,3 @@ class Normalizer {
|
|
|
59
80
|
}
|
|
60
81
|
|
|
61
82
|
export { Normalizer };
|
|
62
|
-
//# sourceMappingURL=Normalizer.mjs.map
|
package/dist/esm/utils/Pool.mjs
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { CmpStrUsageError, ErrorUtil } from './Errors.mjs';
|
|
3
|
+
|
|
2
4
|
class RingPool {
|
|
3
5
|
maxSize;
|
|
4
6
|
buffers = [];
|
|
@@ -7,22 +9,37 @@ class RingPool {
|
|
|
7
9
|
this.maxSize = maxSize;
|
|
8
10
|
}
|
|
9
11
|
acquire(minSize, allowOversize) {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
return ErrorUtil.wrap(
|
|
13
|
+
() => {
|
|
14
|
+
const len = this.buffers.length;
|
|
15
|
+
for (let i = 0; i < len; i++) {
|
|
16
|
+
const idx = (this.pointer + i) & (len - 1);
|
|
17
|
+
const item = this.buffers[idx];
|
|
18
|
+
if (
|
|
19
|
+
item.size >= minSize &&
|
|
20
|
+
(allowOversize || item.size === minSize)
|
|
21
|
+
) {
|
|
22
|
+
this.pointer = (idx + 1) & (len - 1);
|
|
23
|
+
return item;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return null;
|
|
27
|
+
},
|
|
28
|
+
`Failed to acquire buffer of size >= ${minSize} from pool`,
|
|
29
|
+
{ minSize, allowOversize }
|
|
30
|
+
);
|
|
20
31
|
}
|
|
21
32
|
release(item) {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
33
|
+
ErrorUtil.wrap(
|
|
34
|
+
() => {
|
|
35
|
+
if (this.buffers.length < this.maxSize)
|
|
36
|
+
return void [this.buffers.push(item)];
|
|
37
|
+
this.buffers[this.pointer] = item;
|
|
38
|
+
this.pointer = (this.pointer + 1) % this.maxSize;
|
|
39
|
+
},
|
|
40
|
+
`Failed to release buffer back to pool`,
|
|
41
|
+
{ item }
|
|
42
|
+
);
|
|
26
43
|
}
|
|
27
44
|
clear() {
|
|
28
45
|
this.buffers = [];
|
|
@@ -75,6 +92,8 @@ class Pool {
|
|
|
75
92
|
}
|
|
76
93
|
static acquire(type, size) {
|
|
77
94
|
const CONFIG = this.CONFIG[type];
|
|
95
|
+
if (!CONFIG)
|
|
96
|
+
throw new CmpStrUsageError(`Unsupported pool type <${type}>`, { type });
|
|
78
97
|
if (size > CONFIG.maxItemSize) return this.allocate(type, size);
|
|
79
98
|
const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
|
|
80
99
|
if (item)
|
|
@@ -85,10 +104,11 @@ class Pool {
|
|
|
85
104
|
return sizes.map((size) => this.acquire(type, size));
|
|
86
105
|
}
|
|
87
106
|
static release(type, buffer, size) {
|
|
88
|
-
|
|
89
|
-
|
|
107
|
+
const CONFIG = this.CONFIG[type];
|
|
108
|
+
if (!CONFIG)
|
|
109
|
+
throw new CmpStrUsageError(`Unsupported pool type <${type}>`, { type });
|
|
110
|
+
if (size <= CONFIG.maxItemSize) this.POOLS[type].release({ buffer, size });
|
|
90
111
|
}
|
|
91
112
|
}
|
|
92
113
|
|
|
93
114
|
export { Pool };
|
|
94
|
-
//# sourceMappingURL=Pool.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
class Profiler {
|
|
3
3
|
active;
|
|
4
4
|
static ENV;
|
|
@@ -79,4 +79,3 @@ class Profiler {
|
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
export { Profiler };
|
|
82
|
-
//# sourceMappingURL=Profiler.mjs.map
|
|
@@ -1,20 +1,37 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { ErrorUtil, CmpStrNotFoundError } from './Errors.mjs';
|
|
3
|
+
|
|
2
4
|
const registry = Object.create(null);
|
|
3
5
|
const factory = Object.create(null);
|
|
4
6
|
function Registry(reg, ctor) {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
ErrorUtil.assert(
|
|
8
|
+
!(reg in registry || reg in factory),
|
|
9
|
+
`Registry <${reg}> already exists / overwriting is forbidden`,
|
|
10
|
+
{ registry: reg }
|
|
11
|
+
);
|
|
9
12
|
const classes = Object.create(null);
|
|
10
13
|
const service = Object.freeze({
|
|
11
14
|
add(name, cls, update = false) {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
ErrorUtil.assert(
|
|
16
|
+
typeof name === 'string' && name.length > 0,
|
|
17
|
+
`Class name must be a non-empty string`,
|
|
18
|
+
{ registry: reg, name }
|
|
19
|
+
);
|
|
20
|
+
ErrorUtil.assert(
|
|
21
|
+
typeof cls === 'function',
|
|
22
|
+
`Class must be a constructor function`,
|
|
23
|
+
{ registry: reg, class: cls }
|
|
24
|
+
);
|
|
25
|
+
ErrorUtil.assert(
|
|
26
|
+
cls.prototype instanceof ctor,
|
|
27
|
+
`Class must extend <${reg}>`,
|
|
28
|
+
{ registry: reg, class: cls }
|
|
29
|
+
);
|
|
30
|
+
ErrorUtil.assert(
|
|
31
|
+
update || !(name in classes),
|
|
32
|
+
`Class <${name}> already exists / use <update=true> to overwrite`,
|
|
33
|
+
{ registry: reg, name }
|
|
34
|
+
);
|
|
18
35
|
classes[name] = cls;
|
|
19
36
|
},
|
|
20
37
|
remove(name) {
|
|
@@ -27,8 +44,16 @@ function Registry(reg, ctor) {
|
|
|
27
44
|
return Object.keys(classes);
|
|
28
45
|
},
|
|
29
46
|
get(name) {
|
|
30
|
-
|
|
31
|
-
|
|
47
|
+
ErrorUtil.assert(
|
|
48
|
+
typeof name === 'string' && name.length > 0,
|
|
49
|
+
`Class name must be a non-empty string`,
|
|
50
|
+
{ registry: reg, name }
|
|
51
|
+
);
|
|
52
|
+
ErrorUtil.assert(
|
|
53
|
+
name in classes,
|
|
54
|
+
`Class <${name}> not registered for <${reg}>`,
|
|
55
|
+
{ registry: reg, name }
|
|
56
|
+
);
|
|
32
57
|
return classes[name];
|
|
33
58
|
}
|
|
34
59
|
});
|
|
@@ -38,19 +63,18 @@ function Registry(reg, ctor) {
|
|
|
38
63
|
}
|
|
39
64
|
function resolveCls(reg, cls) {
|
|
40
65
|
if (!(reg in registry))
|
|
41
|
-
throw new
|
|
66
|
+
throw new CmpStrNotFoundError(`Registry <${reg}> does not exist`, {
|
|
67
|
+
registry: reg
|
|
68
|
+
});
|
|
42
69
|
return typeof cls === 'string' ? registry[reg]?.get(cls) : cls;
|
|
43
70
|
}
|
|
44
71
|
function createFromRegistry(reg, cls, ...args) {
|
|
45
72
|
cls = resolveCls(reg, cls);
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
});
|
|
52
|
-
}
|
|
73
|
+
return ErrorUtil.wrap(
|
|
74
|
+
() => new cls(...args),
|
|
75
|
+
`Failed to create instance of class <${cls.name ?? cls}> from registry <${reg}>`,
|
|
76
|
+
{ registry: reg, class: cls, args }
|
|
77
|
+
);
|
|
53
78
|
}
|
|
54
79
|
|
|
55
80
|
export { Registry, createFromRegistry, factory, registry, resolveCls };
|
|
56
|
-
//# sourceMappingURL=Registry.mjs.map
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
|
+
import { CmpStrValidationError, ErrorUtil } from './Errors.mjs';
|
|
2
3
|
import { Pool } from './Pool.mjs';
|
|
3
4
|
|
|
4
5
|
class StructuredData {
|
|
@@ -48,7 +49,7 @@ class StructuredData {
|
|
|
48
49
|
raw: r.raw
|
|
49
50
|
}));
|
|
50
51
|
else
|
|
51
|
-
throw new
|
|
52
|
+
throw new CmpStrValidationError(
|
|
52
53
|
'Unsupported result format for StructuredData normalization.'
|
|
53
54
|
);
|
|
54
55
|
return normalized.map((r, idx) => ({ ...r, __idx: idx }));
|
|
@@ -106,10 +107,18 @@ class StructuredData {
|
|
|
106
107
|
);
|
|
107
108
|
}
|
|
108
109
|
performLookup(fn, extractedStrings, opt) {
|
|
109
|
-
return
|
|
110
|
+
return ErrorUtil.wrap(
|
|
111
|
+
() => this.finalizeLookup(fn(), extractedStrings, opt),
|
|
112
|
+
'StructuredData lookup failed',
|
|
113
|
+
{ key: this.key }
|
|
114
|
+
);
|
|
110
115
|
}
|
|
111
116
|
async performLookupAsync(fn, extractedStrings, opt) {
|
|
112
|
-
return
|
|
117
|
+
return await ErrorUtil.wrapAsync(
|
|
118
|
+
async () => this.finalizeLookup(await fn(), extractedStrings, opt),
|
|
119
|
+
'StructuredData async lookup failed',
|
|
120
|
+
{ key: this.key }
|
|
121
|
+
);
|
|
113
122
|
}
|
|
114
123
|
lookup(fn, query, opt) {
|
|
115
124
|
const b = this.extract();
|
|
@@ -150,4 +159,3 @@ class StructuredData {
|
|
|
150
159
|
}
|
|
151
160
|
|
|
152
161
|
export { StructuredData };
|
|
153
|
-
//# sourceMappingURL=StructuredData.mjs.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// CmpStr v3.2.
|
|
1
|
+
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
class TextAnalyzer {
|
|
3
3
|
static REGEX = {
|
|
4
4
|
number: /\d/,
|
|
@@ -194,4 +194,3 @@ class TextAnalyzer {
|
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
export { TextAnalyzer };
|
|
197
|
-
//# sourceMappingURL=TextAnalyzer.mjs.map
|
package/dist/types/CmpStr.d.ts
CHANGED
|
@@ -16,11 +16,12 @@
|
|
|
16
16
|
* - Text analysis and unified diff utilities
|
|
17
17
|
* - Full TypeScript type safety and extensibility
|
|
18
18
|
*
|
|
19
|
-
* @module
|
|
19
|
+
* @module Main
|
|
20
|
+
* @name CmpStr
|
|
20
21
|
* @author Paul Köhler (komed3)
|
|
21
22
|
* @license MIT
|
|
22
23
|
*/
|
|
23
|
-
import type { BatchResultLike, CmpStrOptions, CmpStrProcessors, CmpStrResult, DiffOptions, MetricInput, MetricMode, MetricRaw, MetricResult, NormalizeFlags, PhoneticOptions, ResultLike, StructuredDataOptions, StructuredResultLike } from './utils/Types';
|
|
24
|
+
import type { BatchResultLike, CmpStrOptions, CmpStrProcessors, CmpStrResult, DiffOptions, FilterHooks, MetricInput, MetricMode, MetricRaw, MetricResult, NormalizeFlags, PhoneticOptions, ResultLike, StructuredDataOptions, StructuredResultLike } from './utils/Types';
|
|
24
25
|
import { DiffChecker } from './utils/DiffChecker';
|
|
25
26
|
import { Filter } from './utils/Filter';
|
|
26
27
|
import { Normalizer } from './utils/Normalizer';
|
|
@@ -92,10 +93,11 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
92
93
|
*/
|
|
93
94
|
static readonly profiler: import("./utils/Types").ProfilerService<any>;
|
|
94
95
|
/**
|
|
95
|
-
* Clears the caches for normalizer, metric, and phonetic modules.
|
|
96
|
+
* Clears the caches for normalizer, filter pipelines, metric, and phonetic modules.
|
|
96
97
|
*/
|
|
97
98
|
static readonly clearCache: {
|
|
98
99
|
normalizer: typeof Normalizer.clear;
|
|
100
|
+
filter: typeof Filter.clearPipeline;
|
|
99
101
|
metric: () => void;
|
|
100
102
|
phonetic: () => void;
|
|
101
103
|
};
|
|
@@ -151,7 +153,8 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
151
153
|
*
|
|
152
154
|
* @param {string} cond - The condition to met
|
|
153
155
|
* @param {any} [test] - Value to test for
|
|
154
|
-
* @throws {
|
|
156
|
+
* @throws {CmpStrNotFoundError} - If the specified metric or phonetic algorithm is not found
|
|
157
|
+
* @throws {CmpStrInternalError} - If an unknown condition is specified
|
|
155
158
|
*/
|
|
156
159
|
protected assert(cond: string, test?: any): void;
|
|
157
160
|
/**
|
|
@@ -180,10 +183,10 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
180
183
|
* Applies all active filters to the input string or array.
|
|
181
184
|
*
|
|
182
185
|
* @param {MetricInput} input - The input string or array
|
|
183
|
-
* @param {
|
|
186
|
+
* @param {FilterHooks} [hook] - The filter hook
|
|
184
187
|
* @returns {MetricInput} - The filtered string(s)
|
|
185
188
|
*/
|
|
186
|
-
protected filter(input: MetricInput, hook:
|
|
189
|
+
protected filter(input: MetricInput, hook: FilterHooks): MetricInput;
|
|
187
190
|
/**
|
|
188
191
|
* Prepares the input by normalizing and filtering.
|
|
189
192
|
*
|
|
@@ -231,6 +234,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
231
234
|
* @param {boolean} [raw=false] - Whether to return raw results
|
|
232
235
|
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
233
236
|
* @returns {T} - The computed metric result
|
|
237
|
+
* @throws {CmpStrInternalError} - If the computation fails due to internal errors
|
|
234
238
|
*/
|
|
235
239
|
protected compute<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): T;
|
|
236
240
|
/**
|
|
@@ -240,6 +244,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
240
244
|
* @param {MetricResult< R >} result - The metric result
|
|
241
245
|
* @param {boolean} [raw] - Whether to return raw results
|
|
242
246
|
* @returns {T} - The resolved result
|
|
247
|
+
* @throws {CmpStrInternalError} - If the output format cannot be resolved
|
|
243
248
|
*/
|
|
244
249
|
protected output<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(result: MetricResult<R>, raw?: boolean): T;
|
|
245
250
|
/**
|
|
@@ -281,6 +286,7 @@ export declare class CmpStr<R = MetricRaw> {
|
|
|
281
286
|
*
|
|
282
287
|
* @param {string} opt - The serialized options
|
|
283
288
|
* @returns {this}
|
|
289
|
+
* @throws {CmpStrValidationError} - If the provided string is not valid JSON
|
|
284
290
|
*/
|
|
285
291
|
setSerializedOptions(opt: string): this;
|
|
286
292
|
/**
|
|
@@ -15,11 +15,12 @@
|
|
|
15
15
|
* - Full compatibility with the synchronous CmpStr API
|
|
16
16
|
* - Designed for large-scale, high-performance, and server-side applications
|
|
17
17
|
*
|
|
18
|
-
* @module
|
|
18
|
+
* @module Main
|
|
19
|
+
* @name CmpStrAsync
|
|
19
20
|
* @author Paul Köhler (komed3)
|
|
20
21
|
* @license MIT
|
|
21
22
|
*/
|
|
22
|
-
import type { BatchResultLike, CmpStrOptions, CmpStrProcessors, CmpStrResult, MetricInput, MetricMode, MetricRaw, MetricResult, NormalizeFlags, PhoneticOptions, ResultLike, StructuredDataOptions, StructuredResultLike } from './utils/Types';
|
|
23
|
+
import type { BatchResultLike, CmpStrOptions, CmpStrProcessors, CmpStrResult, FilterHooks, MetricInput, MetricMode, MetricRaw, MetricResult, NormalizeFlags, PhoneticOptions, ResultLike, StructuredDataOptions, StructuredResultLike } from './utils/Types';
|
|
23
24
|
import { CmpStr } from './CmpStr';
|
|
24
25
|
/**
|
|
25
26
|
* The CmpStrAsync class provides a fully asynchronous API for string comparison,
|
|
@@ -69,10 +70,10 @@ export declare class CmpStrAsync<R = MetricRaw> extends CmpStr<R> {
|
|
|
69
70
|
* Asynchronously applies all active filters to the input string or array.
|
|
70
71
|
*
|
|
71
72
|
* @param {MetricInput} input - The input string or array
|
|
72
|
-
* @param {
|
|
73
|
+
* @param {FilterHooks} [hook='input'] - The filter hook
|
|
73
74
|
* @returns {Promise< MetricInput >} - The filtered string(s)
|
|
74
75
|
*/
|
|
75
|
-
protected filterAsync(input: MetricInput, hook:
|
|
76
|
+
protected filterAsync(input: MetricInput, hook: FilterHooks): Promise<MetricInput>;
|
|
76
77
|
/**
|
|
77
78
|
* Asynchronously prepares the input by normalizing and filtering.
|
|
78
79
|
*
|
|
@@ -105,6 +106,7 @@ export declare class CmpStrAsync<R = MetricRaw> extends CmpStr<R> {
|
|
|
105
106
|
* @param {boolean} [raw=false] - Whether to return raw results
|
|
106
107
|
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
107
108
|
* @returns {Promise< T >} - The computed metric result
|
|
109
|
+
* @throws {CmpStrInternalError} - If the computation fails due to internal errors
|
|
108
110
|
*/
|
|
109
111
|
protected computeAsync<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): Promise<T>;
|
|
110
112
|
/**
|
package/dist/types/index.d.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* filtering, and text analysis. It is designed for both high-level application development
|
|
8
8
|
* and research, offering a unified API for single, batch, and pairwise operations.
|
|
9
9
|
*
|
|
10
|
-
* Version: 3.2.
|
|
10
|
+
* Version: 3.2.2
|
|
11
11
|
* Author: Paul Köhler (komed3)
|
|
12
12
|
* License: MIT
|
|
13
13
|
*
|
|
@@ -41,11 +41,12 @@
|
|
|
41
41
|
* For asynchronous workloads, use `CmpStrAsync`, which provides the same API with
|
|
42
42
|
* Promise-based, non-blocking methods for large-scale or I/O-bound operations.
|
|
43
43
|
*
|
|
44
|
-
* @version 3.2.
|
|
44
|
+
* @version 3.2.2
|
|
45
45
|
* @author Paul Köhler (komed3)
|
|
46
46
|
* @license MIT
|
|
47
47
|
*/
|
|
48
48
|
export * from './utils/Types';
|
|
49
|
+
export type { CmpStrError, CmpStrValidationError, CmpStrNotFoundError, CmpStrUsageError, CmpStrInternalError } from './utils/Errors';
|
|
49
50
|
export { CmpStr } from './CmpStr';
|
|
50
51
|
export { CmpStrAsync } from './CmpStrAsync';
|
|
51
52
|
export { DiffChecker } from './utils/DiffChecker';
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
* The result is a value between 0 and 1, where 1 means the vectors are identical and 0
|
|
13
13
|
* means they are orthogonal (no similarity).
|
|
14
14
|
*
|
|
15
|
-
* @module Metric
|
|
15
|
+
* @module Metric
|
|
16
|
+
* @name CosineSimilarity
|
|
16
17
|
* @author Paul Köhler (komed3)
|
|
17
18
|
* @license MIT
|
|
18
19
|
*/
|
|
@@ -14,7 +14,8 @@
|
|
|
14
14
|
* the coefficient based on these bigrams. It handles edge cases, such as empty
|
|
15
15
|
* strings and identical strings, to ensure accurate results.
|
|
16
16
|
*
|
|
17
|
-
* @module Metric
|
|
17
|
+
* @module Metric
|
|
18
|
+
* @name DiceSorensenCoefficient
|
|
18
19
|
* @author Paul Köhler (komed3)
|
|
19
20
|
* @license MIT
|
|
20
21
|
*/
|
|
@@ -10,7 +10,8 @@
|
|
|
10
10
|
* This implementation allows for optional padding of the shorter string to equalize
|
|
11
11
|
* lengths, otherwise it throws an error if the strings are of unequal length.
|
|
12
12
|
*
|
|
13
|
-
* @module Metric
|
|
13
|
+
* @module Metric
|
|
14
|
+
* @name HammingDistance
|
|
14
15
|
* @author Paul Köhler (komed3)
|
|
15
16
|
* @license MIT
|
|
16
17
|
*/
|
|
@@ -10,7 +10,8 @@
|
|
|
10
10
|
* tokens, or n-grams. The result is a value between 0 and 1, where 1 means the
|
|
11
11
|
* sets are identical and 0 means they have no elements in common.
|
|
12
12
|
*
|
|
13
|
-
* @module Metric
|
|
13
|
+
* @module Metric
|
|
14
|
+
* @name JaccardIndex
|
|
14
15
|
* @author Paul Köhler (komed3)
|
|
15
16
|
* @license MIT
|
|
16
17
|
*/
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
* It is widely used in approximate string matching, spell checking, and natural
|
|
12
12
|
* language processing.
|
|
13
13
|
*
|
|
14
|
-
* @module Metric
|
|
14
|
+
* @module Metric
|
|
15
|
+
* @name LevenshteinDistance
|
|
15
16
|
* @author Paul Köhler (komed3)
|
|
16
17
|
* @license MIT
|
|
17
18
|
*/
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
* implement the `compute` method to define the specific metric computation logic.
|
|
22
22
|
*
|
|
23
23
|
* @module Metric
|
|
24
|
+
* @name Metric
|
|
24
25
|
* @author Paul Köhler (komed3)
|
|
25
26
|
* @license MIT
|
|
26
27
|
*/
|
|
@@ -82,7 +83,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
82
83
|
* @param {MetricInput} b - Second input string or array of strings
|
|
83
84
|
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
84
85
|
* @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
|
|
85
|
-
* @throws {
|
|
86
|
+
* @throws {CmpStrUsageError} - If the inputs are empty or invalid
|
|
86
87
|
*/
|
|
87
88
|
constructor(metric: string, a: MetricInput, b: MetricInput, opt?: MetricOptions, symmetric?: boolean);
|
|
88
89
|
/**
|
|
@@ -106,7 +107,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
106
107
|
* @param {number} n - Length of the second string
|
|
107
108
|
* @param {number} maxLen - Maximum length of the strings
|
|
108
109
|
* @returns {MetricCompute< R >} - The result of the metric computation
|
|
109
|
-
* @throws {
|
|
110
|
+
* @throws {CmpStrInternalError} - If the method is not overridden in a subclass
|
|
110
111
|
*/
|
|
111
112
|
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<R>;
|
|
112
113
|
/**
|
|
@@ -118,6 +119,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
118
119
|
* @param {number} i - Pointer to the first string
|
|
119
120
|
* @param {number} j - Pointer to the second string
|
|
120
121
|
* @returns {MetricResultSingle< R >} - The result of the metric computation
|
|
122
|
+
* @throws {CmpStrInternalError} - If the metric computation fails for the given inputs
|
|
121
123
|
*/
|
|
122
124
|
private runSingle;
|
|
123
125
|
/**
|
|
@@ -183,7 +185,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
183
185
|
*
|
|
184
186
|
* @returns {boolean} - True if both inputs are arrays of equal length
|
|
185
187
|
* @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
|
|
186
|
-
* @throws {
|
|
188
|
+
* @throws {CmpStrUsageError} - If `safe` is false and the lengths of `a` and `b` are not equal
|
|
187
189
|
*/
|
|
188
190
|
isPairwise(safe?: boolean): boolean;
|
|
189
191
|
/**
|
|
@@ -218,7 +220,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
218
220
|
*
|
|
219
221
|
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
220
222
|
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
221
|
-
* @throws {
|
|
223
|
+
* @throws {CmpStrInternalError} - If an unsupported mode is specified
|
|
222
224
|
*/
|
|
223
225
|
run(mode?: MetricMode, clear?: boolean): void;
|
|
224
226
|
/**
|
|
@@ -227,7 +229,7 @@ export declare abstract class Metric<R = MetricRaw> {
|
|
|
227
229
|
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
228
230
|
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
229
231
|
* @returns {Promise<void>} - A promise that resolves when the metric computation is complete
|
|
230
|
-
* @throws {
|
|
232
|
+
* @throws {CmpStrInternalError} - If an unsupported mode is specified
|
|
231
233
|
*/
|
|
232
234
|
runAsync(mode?: MetricMode, clear?: boolean): Promise<void>;
|
|
233
235
|
/**
|