cmpstr 3.1.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/README.md +1 -1
- package/dist/CmpStr.esm.js +37 -25
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +37 -21
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +1 -1
- package/dist/cjs/CmpStrAsync.cjs +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -1
- package/dist/cjs/metric/Hamming.cjs +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +1 -1
- package/dist/cjs/metric/Metric.cjs +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/root.cjs +1 -1
- package/dist/cjs/utils/DeepMerge.cjs +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -1
- package/dist/cjs/utils/HashTable.cjs +6 -6
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +1 -1
- package/dist/cjs/utils/Pool.cjs +6 -6
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -1
- package/dist/cjs/utils/Registry.cjs +1 -1
- package/dist/cjs/utils/StructuredData.cjs +27 -15
- package/dist/cjs/utils/StructuredData.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -1
- package/dist/esm/CmpStr.mjs +1 -1
- package/dist/esm/CmpStrAsync.mjs +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -1
- package/dist/esm/metric/Hamming.mjs +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -1
- package/dist/esm/metric/LCS.mjs +1 -1
- package/dist/esm/metric/Levenshtein.mjs +1 -1
- package/dist/esm/metric/Metric.mjs +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +1 -1
- package/dist/esm/metric/qGram.mjs +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -1
- package/dist/esm/root.mjs +1 -1
- package/dist/esm/utils/DeepMerge.mjs +1 -1
- package/dist/esm/utils/DiffChecker.mjs +1 -1
- package/dist/esm/utils/Filter.mjs +1 -1
- package/dist/esm/utils/HashTable.mjs +6 -6
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +1 -1
- package/dist/esm/utils/Pool.mjs +6 -6
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -1
- package/dist/esm/utils/Registry.mjs +1 -1
- package/dist/esm/utils/StructuredData.mjs +27 -15
- package/dist/esm/utils/StructuredData.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/root.d.ts +1 -1
- package/dist/types/utils/StructuredData.d.ts +13 -11
- package/dist/types/utils/Types.d.ts +8 -0
- package/package.json +3 -2
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2023-
|
|
3
|
+
Copyright (c) 2023-2026 Paul Köhler (komed3)
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -79,4 +79,4 @@ Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.
|
|
|
79
79
|
|
|
80
80
|
The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
|
|
81
81
|
|
|
82
|
-
**LICENSE MIT © 2023-
|
|
82
|
+
**LICENSE MIT © 2023-2026 PAUL KÖHLER (KOMED3)**
|
package/dist/CmpStr.esm.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.1.
|
|
2
|
+
* CmpStr v3.1.1 build-a140f52-260119
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2026 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
@@ -663,11 +663,11 @@ class Hasher {
|
|
|
663
663
|
const chunks = Math.floor(len / 4);
|
|
664
664
|
for (let i = 0; i < chunks; i++) {
|
|
665
665
|
const pos = i * 4;
|
|
666
|
-
const
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
666
|
+
const c0 = str.charCodeAt(pos);
|
|
667
|
+
const c1 = str.charCodeAt(pos + 1);
|
|
668
|
+
const c2 = str.charCodeAt(pos + 2);
|
|
669
|
+
const c3 = str.charCodeAt(pos + 3);
|
|
670
|
+
const chunk = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
|
|
671
671
|
hash ^= chunk;
|
|
672
672
|
hash *= this.FNV_PRIME;
|
|
673
673
|
}
|
|
@@ -881,7 +881,7 @@ class Pool {
|
|
|
881
881
|
static CONFIG = {
|
|
882
882
|
uint16: {
|
|
883
883
|
type: 'uint16',
|
|
884
|
-
maxSize:
|
|
884
|
+
maxSize: 64,
|
|
885
885
|
maxItemSize: 2048,
|
|
886
886
|
allowOversize: true
|
|
887
887
|
},
|
|
@@ -893,7 +893,7 @@ class Pool {
|
|
|
893
893
|
},
|
|
894
894
|
'string[]': {
|
|
895
895
|
type: 'string[]',
|
|
896
|
-
maxSize:
|
|
896
|
+
maxSize: 2,
|
|
897
897
|
maxItemSize: 1024,
|
|
898
898
|
allowOversize: false
|
|
899
899
|
},
|
|
@@ -901,9 +901,9 @@ class Pool {
|
|
|
901
901
|
map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
|
|
902
902
|
};
|
|
903
903
|
static POOLS = {
|
|
904
|
-
uint16: new RingPool(
|
|
904
|
+
uint16: new RingPool(64),
|
|
905
905
|
'number[]': new RingPool(16),
|
|
906
|
-
'string[]': new RingPool(
|
|
906
|
+
'string[]': new RingPool(2),
|
|
907
907
|
set: new RingPool(8),
|
|
908
908
|
map: new RingPool(8)
|
|
909
909
|
};
|
|
@@ -912,7 +912,7 @@ class Pool {
|
|
|
912
912
|
case 'uint16':
|
|
913
913
|
return new Uint16Array(size);
|
|
914
914
|
case 'number[]':
|
|
915
|
-
return new
|
|
915
|
+
return new Float64Array(size);
|
|
916
916
|
case 'string[]':
|
|
917
917
|
return new Array(size);
|
|
918
918
|
case 'set':
|
|
@@ -979,39 +979,51 @@ class StructuredData {
|
|
|
979
979
|
normalizeResults(results) {
|
|
980
980
|
if (!Array.isArray(results) || results.length === 0) return [];
|
|
981
981
|
const first = results[0];
|
|
982
|
-
|
|
983
|
-
if (this.
|
|
984
|
-
|
|
982
|
+
let normalized = [];
|
|
983
|
+
if (this.isMetricResult(first)) normalized = results;
|
|
984
|
+
else if (this.isCmpStrResult(first)) {
|
|
985
|
+
normalized = results.map((r) => ({
|
|
985
986
|
metric: 'unknown',
|
|
986
987
|
a: r.source,
|
|
987
988
|
b: r.target,
|
|
988
989
|
res: r.match,
|
|
989
990
|
raw: r.raw
|
|
990
991
|
}));
|
|
991
|
-
|
|
992
|
+
}
|
|
993
|
+
return normalized.map((r, idx) => ({ ...r, __idx: idx }));
|
|
992
994
|
}
|
|
993
995
|
rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
|
|
996
|
+
const stringToIndices = new Map();
|
|
997
|
+
for (let i = 0; i < extractedStrings.length; i++) {
|
|
998
|
+
const str = extractedStrings[i];
|
|
999
|
+
if (!stringToIndices.has(str)) stringToIndices.set(str, []);
|
|
1000
|
+
stringToIndices.get(str).push(i);
|
|
1001
|
+
}
|
|
994
1002
|
const output = new Array(results.length);
|
|
1003
|
+
const occurrenceCount = new Map();
|
|
995
1004
|
let out = 0;
|
|
996
1005
|
for (let i = 0; i < results.length; i++) {
|
|
997
1006
|
const result = results[i];
|
|
998
1007
|
if (removeZero && result.res === 0) continue;
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1008
|
+
const targetStr = result.b || '';
|
|
1009
|
+
const indices = stringToIndices.get(targetStr);
|
|
1010
|
+
let dataIndex;
|
|
1011
|
+
if (indices && indices.length > 0) {
|
|
1012
|
+
const occurrence = occurrenceCount.get(targetStr) ?? 0;
|
|
1013
|
+
occurrenceCount.set(targetStr, occurrence + 1);
|
|
1014
|
+
dataIndex = indices[occurrence % indices.length];
|
|
1015
|
+
} else {
|
|
1016
|
+
dataIndex = result.__idx ?? i;
|
|
1017
|
+
}
|
|
1018
|
+
if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
|
|
1004
1019
|
const sourceObj = sourceData[dataIndex];
|
|
1020
|
+
const mappedTarget = extractedStrings[dataIndex] || targetStr;
|
|
1005
1021
|
if (objectsOnly) output[out++] = sourceObj;
|
|
1006
1022
|
else
|
|
1007
1023
|
output[out++] = {
|
|
1008
1024
|
obj: sourceObj,
|
|
1009
1025
|
key: this.key,
|
|
1010
|
-
result: {
|
|
1011
|
-
source: result.a,
|
|
1012
|
-
target: extractedStrings[dataIndex] || result.b,
|
|
1013
|
-
match: result.res
|
|
1014
|
-
},
|
|
1026
|
+
result: { source: result.a, target: mappedTarget, match: result.res },
|
|
1015
1027
|
...(result.raw ? { raw: result.raw } : null)
|
|
1016
1028
|
};
|
|
1017
1029
|
}
|