cmpstr 3.0.4 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/README.md +3 -2
- package/dist/CmpStr.esm.js +362 -95
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +3 -3
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +363 -94
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +3 -3
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +41 -1
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +38 -1
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -1
- package/dist/cjs/metric/Hamming.cjs +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +1 -1
- package/dist/cjs/metric/Metric.cjs +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/root.cjs +13 -1
- package/dist/cjs/root.cjs.map +1 -1
- package/dist/cjs/utils/DeepMerge.cjs +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -1
- package/dist/cjs/utils/HashTable.cjs +6 -6
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +1 -1
- package/dist/cjs/utils/Pool.cjs +13 -4
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -1
- package/dist/cjs/utils/Registry.cjs +1 -1
- package/dist/cjs/utils/StructuredData.cjs +157 -0
- package/dist/cjs/utils/StructuredData.cjs.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -1
- package/dist/esm/CmpStr.mjs +41 -1
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +38 -1
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -1
- package/dist/esm/metric/Hamming.mjs +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -1
- package/dist/esm/metric/LCS.mjs +1 -1
- package/dist/esm/metric/Levenshtein.mjs +1 -1
- package/dist/esm/metric/Metric.mjs +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +1 -1
- package/dist/esm/metric/qGram.mjs +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -1
- package/dist/esm/root.mjs +7 -1
- package/dist/esm/root.mjs.map +1 -1
- package/dist/esm/utils/DeepMerge.mjs +1 -1
- package/dist/esm/utils/DiffChecker.mjs +1 -1
- package/dist/esm/utils/Filter.mjs +1 -1
- package/dist/esm/utils/HashTable.mjs +6 -6
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +1 -1
- package/dist/esm/utils/Pool.mjs +13 -4
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -1
- package/dist/esm/utils/Registry.mjs +1 -1
- package/dist/esm/utils/StructuredData.mjs +155 -0
- package/dist/esm/utils/StructuredData.mjs.map +1 -0
- package/dist/esm/utils/TextAnalyzer.mjs +1 -1
- package/dist/types/CmpStr.d.ts +90 -8
- package/dist/types/CmpStrAsync.d.ts +82 -8
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +3 -2
- package/dist/types/utils/Pool.d.ts +2 -2
- package/dist/types/utils/StructuredData.d.ts +164 -0
- package/dist/types/utils/Types.d.ts +43 -1
- package/package.json +53 -17
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2023-
|
|
3
|
+
Copyright (c) 2023-2026 Paul Köhler (komed3)
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
[](https://www.typescriptlang.org)
|
|
6
6
|
[](https://npmjs.com/package/cmpstr)
|
|
7
7
|
[](https://bundlephobia.com/package/cmpstr)
|
|
8
|
-
[](https://
|
|
8
|
+
[](https://npm-stat.com/charts.html?package=cmpstr)
|
|
9
9
|
[](https://github.com/komed3/cmpstr/actions/workflows/build.yml)
|
|
10
10
|
[](https://github.com/komed3/cmpstr/wiki/Installation-&-Setup#import-in-your-project)
|
|
11
11
|
[](https://github.com/komed3/cmpstr/wiki/Installation-&-Setup#browser)
|
|
@@ -24,6 +24,7 @@ Originally launched in 2023 with a minimal feature set, the library was redesign
|
|
|
24
24
|
- Normalization and filtering pipeline for consistent input processing
|
|
25
25
|
- Single, batch and pairwise comparisons with structured, type-safe results
|
|
26
26
|
- Phonetic-aware search and comparison
|
|
27
|
+
- Structured data comparison by extracting object properties
|
|
27
28
|
- Utilities for text structure and readability analysis (e.g., syllables, word statistics)
|
|
28
29
|
- Diffing tools with CLI-friendly formatting
|
|
29
30
|
- TypeScript-native with full type declarations and extensibility
|
|
@@ -78,4 +79,4 @@ Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.
|
|
|
78
79
|
|
|
79
80
|
The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
|
|
80
81
|
|
|
81
|
-
**LICENSE MIT © 2023-
|
|
82
|
+
**LICENSE MIT © 2023-2026 PAUL KÖHLER (KOMED3)**
|
package/dist/CmpStr.esm.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.
|
|
2
|
+
* CmpStr v3.1.1 build-a140f52-260119
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
|
-
* (c) 2023-
|
|
4
|
+
* (c) 2023-2026 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
6
6
|
*/
|
|
7
7
|
const parse = (p) =>
|
|
@@ -12,6 +12,12 @@ const parse = (p) =>
|
|
|
12
12
|
function get(t, path, fallback) {
|
|
13
13
|
return parse(path).reduce((o, k) => o?.[k] ?? fallback, t);
|
|
14
14
|
}
|
|
15
|
+
function has(t, path) {
|
|
16
|
+
return (
|
|
17
|
+
parse(path).reduce((o, k) => (o && k in o ? o[k] : undefined), t) !==
|
|
18
|
+
undefined
|
|
19
|
+
);
|
|
20
|
+
}
|
|
15
21
|
function set(t, path, value) {
|
|
16
22
|
if (path === '') return value;
|
|
17
23
|
const [k, ...r] = parse(path);
|
|
@@ -66,6 +72,15 @@ function rmv(t, path, preserveEmpty = false) {
|
|
|
66
72
|
return t;
|
|
67
73
|
}
|
|
68
74
|
|
|
75
|
+
var DeepMerge = /*#__PURE__*/ Object.freeze({
|
|
76
|
+
__proto__: null,
|
|
77
|
+
get: get,
|
|
78
|
+
has: has,
|
|
79
|
+
merge: merge,
|
|
80
|
+
rmv: rmv,
|
|
81
|
+
set: set
|
|
82
|
+
});
|
|
83
|
+
|
|
69
84
|
class Profiler {
|
|
70
85
|
static ENV;
|
|
71
86
|
static instance;
|
|
@@ -648,11 +663,11 @@ class Hasher {
|
|
|
648
663
|
const chunks = Math.floor(len / 4);
|
|
649
664
|
for (let i = 0; i < chunks; i++) {
|
|
650
665
|
const pos = i * 4;
|
|
651
|
-
const
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
666
|
+
const c0 = str.charCodeAt(pos);
|
|
667
|
+
const c1 = str.charCodeAt(pos + 1);
|
|
668
|
+
const c2 = str.charCodeAt(pos + 2);
|
|
669
|
+
const c3 = str.charCodeAt(pos + 3);
|
|
670
|
+
const chunk = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
|
|
656
671
|
hash ^= chunk;
|
|
657
672
|
hash *= this.FNV_PRIME;
|
|
658
673
|
}
|
|
@@ -830,6 +845,252 @@ class Filter {
|
|
|
830
845
|
}
|
|
831
846
|
}
|
|
832
847
|
|
|
848
|
+
class RingPool {
|
|
849
|
+
maxSize;
|
|
850
|
+
buffers = [];
|
|
851
|
+
pointer = 0;
|
|
852
|
+
constructor(maxSize) {
|
|
853
|
+
this.maxSize = maxSize;
|
|
854
|
+
}
|
|
855
|
+
acquire(minSize, allowOversize) {
|
|
856
|
+
const len = this.buffers.length;
|
|
857
|
+
for (let i = 0; i < len; i++) {
|
|
858
|
+
const idx = (this.pointer + i) % len;
|
|
859
|
+
const item = this.buffers[idx];
|
|
860
|
+
if (item.size >= minSize) {
|
|
861
|
+
this.pointer = (idx + 1) % len;
|
|
862
|
+
return allowOversize || item.size === minSize ? item : null;
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
return null;
|
|
866
|
+
}
|
|
867
|
+
release(item) {
|
|
868
|
+
if (this.buffers.length < this.maxSize) {
|
|
869
|
+
this.buffers.push(item);
|
|
870
|
+
} else {
|
|
871
|
+
this.buffers[this.pointer] = item;
|
|
872
|
+
this.pointer = (this.pointer + 1) % this.maxSize;
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
clear() {
|
|
876
|
+
this.buffers = [];
|
|
877
|
+
this.pointer = 0;
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
class Pool {
|
|
881
|
+
static CONFIG = {
|
|
882
|
+
uint16: {
|
|
883
|
+
type: 'uint16',
|
|
884
|
+
maxSize: 64,
|
|
885
|
+
maxItemSize: 2048,
|
|
886
|
+
allowOversize: true
|
|
887
|
+
},
|
|
888
|
+
'number[]': {
|
|
889
|
+
type: 'number[]',
|
|
890
|
+
maxSize: 16,
|
|
891
|
+
maxItemSize: 1024,
|
|
892
|
+
allowOversize: false
|
|
893
|
+
},
|
|
894
|
+
'string[]': {
|
|
895
|
+
type: 'string[]',
|
|
896
|
+
maxSize: 2,
|
|
897
|
+
maxItemSize: 1024,
|
|
898
|
+
allowOversize: false
|
|
899
|
+
},
|
|
900
|
+
set: { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
|
|
901
|
+
map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
|
|
902
|
+
};
|
|
903
|
+
static POOLS = {
|
|
904
|
+
uint16: new RingPool(64),
|
|
905
|
+
'number[]': new RingPool(16),
|
|
906
|
+
'string[]': new RingPool(2),
|
|
907
|
+
set: new RingPool(8),
|
|
908
|
+
map: new RingPool(8)
|
|
909
|
+
};
|
|
910
|
+
static allocate(type, size) {
|
|
911
|
+
switch (type) {
|
|
912
|
+
case 'uint16':
|
|
913
|
+
return new Uint16Array(size);
|
|
914
|
+
case 'number[]':
|
|
915
|
+
return new Float64Array(size);
|
|
916
|
+
case 'string[]':
|
|
917
|
+
return new Array(size);
|
|
918
|
+
case 'set':
|
|
919
|
+
return new Set();
|
|
920
|
+
case 'map':
|
|
921
|
+
return new Map();
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
static acquire(type, size) {
|
|
925
|
+
const CONFIG = this.CONFIG[type];
|
|
926
|
+
if (size > CONFIG.maxItemSize) return this.allocate(type, size);
|
|
927
|
+
const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
|
|
928
|
+
if (item) {
|
|
929
|
+
return type === 'uint16' ? item.buffer.subarray(0, size) : item.buffer;
|
|
930
|
+
}
|
|
931
|
+
return this.allocate(type, size);
|
|
932
|
+
}
|
|
933
|
+
static acquireMany(type, sizes) {
|
|
934
|
+
return sizes.map((size) => this.acquire(type, size));
|
|
935
|
+
}
|
|
936
|
+
static release(type, buffer, size) {
|
|
937
|
+
const CONFIG = this.CONFIG[type];
|
|
938
|
+
if (size <= CONFIG.maxItemSize) {
|
|
939
|
+
this.POOLS[type].release({ buffer, size });
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
class StructuredData {
|
|
945
|
+
static create(data, key) {
|
|
946
|
+
return new StructuredData(data, key);
|
|
947
|
+
}
|
|
948
|
+
data;
|
|
949
|
+
key;
|
|
950
|
+
constructor(data, key) {
|
|
951
|
+
this.data = data;
|
|
952
|
+
this.key = key;
|
|
953
|
+
}
|
|
954
|
+
extractFrom(arr, key) {
|
|
955
|
+
const result = Pool.acquire('string[]', arr.length);
|
|
956
|
+
for (let i = 0; i < arr.length; i++) {
|
|
957
|
+
const val = arr[i][key];
|
|
958
|
+
result[i] = typeof val === 'string' ? val : String(val ?? '');
|
|
959
|
+
}
|
|
960
|
+
return result;
|
|
961
|
+
}
|
|
962
|
+
extract() {
|
|
963
|
+
return this.extractFrom(this.data, this.key);
|
|
964
|
+
}
|
|
965
|
+
isMetricResult(v) {
|
|
966
|
+
return (
|
|
967
|
+
typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
|
|
968
|
+
);
|
|
969
|
+
}
|
|
970
|
+
isCmpStrResult(v) {
|
|
971
|
+
return (
|
|
972
|
+
typeof v === 'object' &&
|
|
973
|
+
v !== null &&
|
|
974
|
+
'source' in v &&
|
|
975
|
+
'target' in v &&
|
|
976
|
+
'match' in v
|
|
977
|
+
);
|
|
978
|
+
}
|
|
979
|
+
normalizeResults(results) {
|
|
980
|
+
if (!Array.isArray(results) || results.length === 0) return [];
|
|
981
|
+
const first = results[0];
|
|
982
|
+
let normalized = [];
|
|
983
|
+
if (this.isMetricResult(first)) normalized = results;
|
|
984
|
+
else if (this.isCmpStrResult(first)) {
|
|
985
|
+
normalized = results.map((r) => ({
|
|
986
|
+
metric: 'unknown',
|
|
987
|
+
a: r.source,
|
|
988
|
+
b: r.target,
|
|
989
|
+
res: r.match,
|
|
990
|
+
raw: r.raw
|
|
991
|
+
}));
|
|
992
|
+
}
|
|
993
|
+
return normalized.map((r, idx) => ({ ...r, __idx: idx }));
|
|
994
|
+
}
|
|
995
|
+
rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
|
|
996
|
+
const stringToIndices = new Map();
|
|
997
|
+
for (let i = 0; i < extractedStrings.length; i++) {
|
|
998
|
+
const str = extractedStrings[i];
|
|
999
|
+
if (!stringToIndices.has(str)) stringToIndices.set(str, []);
|
|
1000
|
+
stringToIndices.get(str).push(i);
|
|
1001
|
+
}
|
|
1002
|
+
const output = new Array(results.length);
|
|
1003
|
+
const occurrenceCount = new Map();
|
|
1004
|
+
let out = 0;
|
|
1005
|
+
for (let i = 0; i < results.length; i++) {
|
|
1006
|
+
const result = results[i];
|
|
1007
|
+
if (removeZero && result.res === 0) continue;
|
|
1008
|
+
const targetStr = result.b || '';
|
|
1009
|
+
const indices = stringToIndices.get(targetStr);
|
|
1010
|
+
let dataIndex;
|
|
1011
|
+
if (indices && indices.length > 0) {
|
|
1012
|
+
const occurrence = occurrenceCount.get(targetStr) ?? 0;
|
|
1013
|
+
occurrenceCount.set(targetStr, occurrence + 1);
|
|
1014
|
+
dataIndex = indices[occurrence % indices.length];
|
|
1015
|
+
} else {
|
|
1016
|
+
dataIndex = result.__idx ?? i;
|
|
1017
|
+
}
|
|
1018
|
+
if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
|
|
1019
|
+
const sourceObj = sourceData[dataIndex];
|
|
1020
|
+
const mappedTarget = extractedStrings[dataIndex] || targetStr;
|
|
1021
|
+
if (objectsOnly) output[out++] = sourceObj;
|
|
1022
|
+
else
|
|
1023
|
+
output[out++] = {
|
|
1024
|
+
obj: sourceObj,
|
|
1025
|
+
key: this.key,
|
|
1026
|
+
result: { source: result.a, target: mappedTarget, match: result.res },
|
|
1027
|
+
...(result.raw ? { raw: result.raw } : null)
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
output.length = out;
|
|
1031
|
+
return output;
|
|
1032
|
+
}
|
|
1033
|
+
sort(results, sort) {
|
|
1034
|
+
if (!sort || results.length <= 1) return results;
|
|
1035
|
+
const asc = sort === 'asc';
|
|
1036
|
+
return results.sort((a, b) => (asc ? a.res - b.res : b.res - a.res));
|
|
1037
|
+
}
|
|
1038
|
+
performLookup(fn, extractedStrings, opt) {
|
|
1039
|
+
return this.rebuild(
|
|
1040
|
+
this.sort(this.normalizeResults(fn()), opt?.sort),
|
|
1041
|
+
this.data,
|
|
1042
|
+
extractedStrings,
|
|
1043
|
+
opt?.removeZero,
|
|
1044
|
+
opt?.objectsOnly
|
|
1045
|
+
);
|
|
1046
|
+
}
|
|
1047
|
+
async performLookupAsync(fn, extractedStrings, opt) {
|
|
1048
|
+
return this.rebuild(
|
|
1049
|
+
this.sort(this.normalizeResults(await fn()), opt?.sort),
|
|
1050
|
+
this.data,
|
|
1051
|
+
extractedStrings,
|
|
1052
|
+
opt?.removeZero,
|
|
1053
|
+
opt?.objectsOnly
|
|
1054
|
+
);
|
|
1055
|
+
}
|
|
1056
|
+
lookup(fn, query, opt) {
|
|
1057
|
+
const b = this.extract();
|
|
1058
|
+
try {
|
|
1059
|
+
return this.performLookup(() => fn(query, b, opt), b, opt);
|
|
1060
|
+
} finally {
|
|
1061
|
+
Pool.release('string[]', b, b.length);
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
lookupPairs(fn, other, otherKey, opt) {
|
|
1065
|
+
const a = this.extract();
|
|
1066
|
+
const b = this.extractFrom(other, otherKey);
|
|
1067
|
+
try {
|
|
1068
|
+
return this.performLookup(() => fn(a, b, opt), a, opt);
|
|
1069
|
+
} finally {
|
|
1070
|
+
Pool.release('string[]', a, a.length);
|
|
1071
|
+
Pool.release('string[]', b, b.length);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
async lookupAsync(fn, query, opt) {
|
|
1075
|
+
const b = this.extract();
|
|
1076
|
+
try {
|
|
1077
|
+
return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
|
|
1078
|
+
} finally {
|
|
1079
|
+
Pool.release('string[]', b, b.length);
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
async lookupPairsAsync(fn, other, otherKey, opt) {
|
|
1083
|
+
const a = this.extract();
|
|
1084
|
+
const b = this.extractFrom(other, otherKey);
|
|
1085
|
+
try {
|
|
1086
|
+
return await this.performLookupAsync(() => fn(a, b, opt), a, opt);
|
|
1087
|
+
} finally {
|
|
1088
|
+
Pool.release('string[]', a, a.length);
|
|
1089
|
+
Pool.release('string[]', b, b.length);
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
|
|
833
1094
|
const registry = Object.create(null);
|
|
834
1095
|
const factory = Object.create(null);
|
|
835
1096
|
function Registry(reg, ctor) {
|
|
@@ -1056,93 +1317,6 @@ class Metric {
|
|
|
1056
1317
|
}
|
|
1057
1318
|
const MetricRegistry = Registry('metric', Metric);
|
|
1058
1319
|
|
|
1059
|
-
class RingPool {
|
|
1060
|
-
maxSize;
|
|
1061
|
-
buffers = [];
|
|
1062
|
-
pointer = 0;
|
|
1063
|
-
constructor(maxSize) {
|
|
1064
|
-
this.maxSize = maxSize;
|
|
1065
|
-
}
|
|
1066
|
-
acquire(minSize, allowOversize) {
|
|
1067
|
-
const len = this.buffers.length;
|
|
1068
|
-
for (let i = 0; i < len; i++) {
|
|
1069
|
-
const idx = (this.pointer + i) % len;
|
|
1070
|
-
const item = this.buffers[idx];
|
|
1071
|
-
if (item.size >= minSize) {
|
|
1072
|
-
this.pointer = (idx + 1) % len;
|
|
1073
|
-
return allowOversize || item.size === minSize ? item : null;
|
|
1074
|
-
}
|
|
1075
|
-
}
|
|
1076
|
-
return null;
|
|
1077
|
-
}
|
|
1078
|
-
release(item) {
|
|
1079
|
-
if (this.buffers.length < this.maxSize) {
|
|
1080
|
-
this.buffers.push(item);
|
|
1081
|
-
} else {
|
|
1082
|
-
this.buffers[this.pointer] = item;
|
|
1083
|
-
this.pointer = (this.pointer + 1) % this.maxSize;
|
|
1084
|
-
}
|
|
1085
|
-
}
|
|
1086
|
-
clear() {
|
|
1087
|
-
this.buffers = [];
|
|
1088
|
-
this.pointer = 0;
|
|
1089
|
-
}
|
|
1090
|
-
}
|
|
1091
|
-
class Pool {
|
|
1092
|
-
static CONFIG = {
|
|
1093
|
-
uint16: {
|
|
1094
|
-
type: 'uint16',
|
|
1095
|
-
maxSize: 32,
|
|
1096
|
-
maxItemSize: 2048,
|
|
1097
|
-
allowOversize: true
|
|
1098
|
-
},
|
|
1099
|
-
'number[]': {
|
|
1100
|
-
type: 'number[]',
|
|
1101
|
-
maxSize: 16,
|
|
1102
|
-
maxItemSize: 1024,
|
|
1103
|
-
allowOversize: false
|
|
1104
|
-
},
|
|
1105
|
-
set: { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
|
|
1106
|
-
map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
|
|
1107
|
-
};
|
|
1108
|
-
static POOLS = {
|
|
1109
|
-
uint16: new RingPool(32),
|
|
1110
|
-
'number[]': new RingPool(16),
|
|
1111
|
-
set: new RingPool(8),
|
|
1112
|
-
map: new RingPool(8)
|
|
1113
|
-
};
|
|
1114
|
-
static allocate(type, size) {
|
|
1115
|
-
switch (type) {
|
|
1116
|
-
case 'uint16':
|
|
1117
|
-
return new Uint16Array(size);
|
|
1118
|
-
case 'number[]':
|
|
1119
|
-
return new Array(size).fill(0);
|
|
1120
|
-
case 'set':
|
|
1121
|
-
return new Set();
|
|
1122
|
-
case 'map':
|
|
1123
|
-
return new Map();
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
static acquire(type, size) {
|
|
1127
|
-
const CONFIG = this.CONFIG[type];
|
|
1128
|
-
if (size > CONFIG.maxItemSize) return this.allocate(type, size);
|
|
1129
|
-
const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
|
|
1130
|
-
if (item) {
|
|
1131
|
-
return type === 'uint16' ? item.buffer.subarray(0, size) : item.buffer;
|
|
1132
|
-
}
|
|
1133
|
-
return this.allocate(type, size);
|
|
1134
|
-
}
|
|
1135
|
-
static acquireMany(type, sizes) {
|
|
1136
|
-
return sizes.map((size) => this.acquire(type, size));
|
|
1137
|
-
}
|
|
1138
|
-
static release(type, buffer, size) {
|
|
1139
|
-
const CONFIG = this.CONFIG[type];
|
|
1140
|
-
if (size <= CONFIG.maxItemSize) {
|
|
1141
|
-
this.POOLS[type].release({ buffer, size });
|
|
1142
|
-
}
|
|
1143
|
-
}
|
|
1144
|
-
}
|
|
1145
|
-
|
|
1146
1320
|
class CosineSimilarity extends Metric {
|
|
1147
1321
|
constructor(a, b, opt = {}) {
|
|
1148
1322
|
super('cosine', a, b, opt, true);
|
|
@@ -2127,6 +2301,9 @@ class CmpStr {
|
|
|
2127
2301
|
? input.map((s) => phonetic.getIndex(s).join(delimiter))
|
|
2128
2302
|
: phonetic.getIndex(input).join(delimiter);
|
|
2129
2303
|
}
|
|
2304
|
+
structured(data, key) {
|
|
2305
|
+
return StructuredData.create(data, key);
|
|
2306
|
+
}
|
|
2130
2307
|
compute(a, b, opt, mode, raw, skip) {
|
|
2131
2308
|
const resolved = this.resolveOptions(opt);
|
|
2132
2309
|
this.assert('metric', resolved.metric);
|
|
@@ -2260,6 +2437,42 @@ class CmpStr {
|
|
|
2260
2437
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
2261
2438
|
return this.index(input, { algo: algo ?? a, opt: opt ?? o });
|
|
2262
2439
|
}
|
|
2440
|
+
structuredLookup(query, data, key, opt) {
|
|
2441
|
+
return this.structured(data, key).lookup(
|
|
2442
|
+
(q, items, options) => this.batchTest(q, items, options),
|
|
2443
|
+
query,
|
|
2444
|
+
opt
|
|
2445
|
+
);
|
|
2446
|
+
}
|
|
2447
|
+
structuredMatch(query, data, key, threshold, opt) {
|
|
2448
|
+
return this.structured(data, key).lookup(
|
|
2449
|
+
(q, items, options) => this.match(q, items, threshold, options),
|
|
2450
|
+
query,
|
|
2451
|
+
{ ...opt, sort: 'desc' }
|
|
2452
|
+
);
|
|
2453
|
+
}
|
|
2454
|
+
structuredClosest(query, data, key, n = 1, opt) {
|
|
2455
|
+
return this.structured(data, key).lookup(
|
|
2456
|
+
(q, items, options) => this.closest(q, items, n, options),
|
|
2457
|
+
query,
|
|
2458
|
+
{ ...opt, sort: 'desc' }
|
|
2459
|
+
);
|
|
2460
|
+
}
|
|
2461
|
+
structuredFurthest(query, data, key, n = 1, opt) {
|
|
2462
|
+
return this.structured(data, key).lookup(
|
|
2463
|
+
(q, items, options) => this.furthest(q, items, n, options),
|
|
2464
|
+
query,
|
|
2465
|
+
{ ...opt, sort: 'asc' }
|
|
2466
|
+
);
|
|
2467
|
+
}
|
|
2468
|
+
structuredPairs(data, key, other, otherKey, opt) {
|
|
2469
|
+
return this.structured(data, key).lookupPairs(
|
|
2470
|
+
(items, otherItems, options) => this.pairs(items, otherItems, options),
|
|
2471
|
+
other,
|
|
2472
|
+
otherKey,
|
|
2473
|
+
opt
|
|
2474
|
+
);
|
|
2475
|
+
}
|
|
2263
2476
|
}
|
|
2264
2477
|
|
|
2265
2478
|
class CmpStrAsync extends CmpStr {
|
|
@@ -2373,7 +2586,61 @@ class CmpStrAsync extends CmpStr {
|
|
|
2373
2586
|
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
|
|
2374
2587
|
return this.indexAsync(input, { algo: algo ?? a, opt: opt ?? o });
|
|
2375
2588
|
}
|
|
2589
|
+
async structuredLookupAsync(query, data, key, opt) {
|
|
2590
|
+
return await this.structured(data, key).lookupAsync(
|
|
2591
|
+
(q, items, options) => this.batchTestAsync(q, items, options),
|
|
2592
|
+
query,
|
|
2593
|
+
opt
|
|
2594
|
+
);
|
|
2595
|
+
}
|
|
2596
|
+
async structuredMatchAsync(query, data, key, threshold, opt) {
|
|
2597
|
+
return await this.structured(data, key).lookupAsync(
|
|
2598
|
+
(q, items, options) => this.matchAsync(q, items, threshold, options),
|
|
2599
|
+
query,
|
|
2600
|
+
{ ...opt, sort: 'desc' }
|
|
2601
|
+
);
|
|
2602
|
+
}
|
|
2603
|
+
async structuredClosestAsync(query, data, key, n = 1, opt) {
|
|
2604
|
+
return await this.structured(data, key).lookupAsync(
|
|
2605
|
+
(q, items, options) => this.closestAsync(q, items, n, options),
|
|
2606
|
+
query,
|
|
2607
|
+
{ ...opt, sort: 'desc' }
|
|
2608
|
+
);
|
|
2609
|
+
}
|
|
2610
|
+
async structuredFurthestAsync(query, data, key, n = 1, opt) {
|
|
2611
|
+
return await this.structured(data, key).lookupAsync(
|
|
2612
|
+
(q, items, options) => this.furthestAsync(q, items, n, options),
|
|
2613
|
+
query,
|
|
2614
|
+
{ ...opt, sort: 'asc' }
|
|
2615
|
+
);
|
|
2616
|
+
}
|
|
2617
|
+
async structuredPairsAsync(data, key, other, otherKey, opt) {
|
|
2618
|
+
return await this.structured(data, key).lookupPairsAsync(
|
|
2619
|
+
(items, otherItems, options) =>
|
|
2620
|
+
this.pairsAsync(items, otherItems, options),
|
|
2621
|
+
other,
|
|
2622
|
+
otherKey,
|
|
2623
|
+
opt
|
|
2624
|
+
);
|
|
2625
|
+
}
|
|
2376
2626
|
}
|
|
2377
2627
|
|
|
2378
|
-
export {
|
|
2628
|
+
export {
|
|
2629
|
+
CmpStr,
|
|
2630
|
+
CmpStrAsync,
|
|
2631
|
+
DeepMerge,
|
|
2632
|
+
DiffChecker,
|
|
2633
|
+
Filter,
|
|
2634
|
+
HashTable,
|
|
2635
|
+
Metric,
|
|
2636
|
+
MetricRegistry,
|
|
2637
|
+
Normalizer,
|
|
2638
|
+
Phonetic,
|
|
2639
|
+
PhoneticMappingRegistry,
|
|
2640
|
+
PhoneticRegistry,
|
|
2641
|
+
Pool,
|
|
2642
|
+
Profiler,
|
|
2643
|
+
StructuredData,
|
|
2644
|
+
TextAnalyzer
|
|
2645
|
+
};
|
|
2379
2646
|
//# sourceMappingURL=CmpStr.esm.js.map
|