cmpstr 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/LICENSE +2 -2
  2. package/README.md +1 -1
  3. package/dist/CmpStr.esm.js +37 -25
  4. package/dist/CmpStr.esm.js.map +1 -1
  5. package/dist/CmpStr.esm.min.js +2 -2
  6. package/dist/CmpStr.esm.min.js.map +1 -1
  7. package/dist/CmpStr.umd.js +37 -21
  8. package/dist/CmpStr.umd.js.map +1 -1
  9. package/dist/CmpStr.umd.min.js +2 -2
  10. package/dist/CmpStr.umd.min.js.map +1 -1
  11. package/dist/cjs/CmpStr.cjs +1 -1
  12. package/dist/cjs/CmpStrAsync.cjs +1 -1
  13. package/dist/cjs/index.cjs +1 -1
  14. package/dist/cjs/metric/Cosine.cjs +1 -1
  15. package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
  16. package/dist/cjs/metric/DiceSorensen.cjs +1 -1
  17. package/dist/cjs/metric/Hamming.cjs +1 -1
  18. package/dist/cjs/metric/Jaccard.cjs +1 -1
  19. package/dist/cjs/metric/JaroWinkler.cjs +1 -1
  20. package/dist/cjs/metric/LCS.cjs +1 -1
  21. package/dist/cjs/metric/Levenshtein.cjs +1 -1
  22. package/dist/cjs/metric/Metric.cjs +1 -1
  23. package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
  24. package/dist/cjs/metric/SmithWaterman.cjs +1 -1
  25. package/dist/cjs/metric/qGram.cjs +1 -1
  26. package/dist/cjs/phonetic/Caverphone.cjs +1 -1
  27. package/dist/cjs/phonetic/Cologne.cjs +1 -1
  28. package/dist/cjs/phonetic/Metaphone.cjs +1 -1
  29. package/dist/cjs/phonetic/Phonetic.cjs +1 -1
  30. package/dist/cjs/phonetic/Soundex.cjs +1 -1
  31. package/dist/cjs/root.cjs +1 -1
  32. package/dist/cjs/utils/DeepMerge.cjs +1 -1
  33. package/dist/cjs/utils/DiffChecker.cjs +1 -1
  34. package/dist/cjs/utils/Filter.cjs +1 -1
  35. package/dist/cjs/utils/HashTable.cjs +6 -6
  36. package/dist/cjs/utils/HashTable.cjs.map +1 -1
  37. package/dist/cjs/utils/Normalizer.cjs +1 -1
  38. package/dist/cjs/utils/Pool.cjs +6 -6
  39. package/dist/cjs/utils/Pool.cjs.map +1 -1
  40. package/dist/cjs/utils/Profiler.cjs +1 -1
  41. package/dist/cjs/utils/Registry.cjs +1 -1
  42. package/dist/cjs/utils/StructuredData.cjs +27 -15
  43. package/dist/cjs/utils/StructuredData.cjs.map +1 -1
  44. package/dist/cjs/utils/TextAnalyzer.cjs +1 -1
  45. package/dist/esm/CmpStr.mjs +1 -1
  46. package/dist/esm/CmpStrAsync.mjs +1 -1
  47. package/dist/esm/index.mjs +1 -1
  48. package/dist/esm/metric/Cosine.mjs +1 -1
  49. package/dist/esm/metric/DamerauLevenshtein.mjs +1 -1
  50. package/dist/esm/metric/DiceSorensen.mjs +1 -1
  51. package/dist/esm/metric/Hamming.mjs +1 -1
  52. package/dist/esm/metric/Jaccard.mjs +1 -1
  53. package/dist/esm/metric/JaroWinkler.mjs +1 -1
  54. package/dist/esm/metric/LCS.mjs +1 -1
  55. package/dist/esm/metric/Levenshtein.mjs +1 -1
  56. package/dist/esm/metric/Metric.mjs +1 -1
  57. package/dist/esm/metric/NeedlemanWunsch.mjs +1 -1
  58. package/dist/esm/metric/SmithWaterman.mjs +1 -1
  59. package/dist/esm/metric/qGram.mjs +1 -1
  60. package/dist/esm/phonetic/Caverphone.mjs +1 -1
  61. package/dist/esm/phonetic/Cologne.mjs +1 -1
  62. package/dist/esm/phonetic/Metaphone.mjs +1 -1
  63. package/dist/esm/phonetic/Phonetic.mjs +1 -1
  64. package/dist/esm/phonetic/Soundex.mjs +1 -1
  65. package/dist/esm/root.mjs +1 -1
  66. package/dist/esm/utils/DeepMerge.mjs +1 -1
  67. package/dist/esm/utils/DiffChecker.mjs +1 -1
  68. package/dist/esm/utils/Filter.mjs +1 -1
  69. package/dist/esm/utils/HashTable.mjs +6 -6
  70. package/dist/esm/utils/HashTable.mjs.map +1 -1
  71. package/dist/esm/utils/Normalizer.mjs +1 -1
  72. package/dist/esm/utils/Pool.mjs +6 -6
  73. package/dist/esm/utils/Pool.mjs.map +1 -1
  74. package/dist/esm/utils/Profiler.mjs +1 -1
  75. package/dist/esm/utils/Registry.mjs +1 -1
  76. package/dist/esm/utils/StructuredData.mjs +27 -15
  77. package/dist/esm/utils/StructuredData.mjs.map +1 -1
  78. package/dist/esm/utils/TextAnalyzer.mjs +1 -1
  79. package/dist/types/index.d.ts +2 -2
  80. package/dist/types/root.d.ts +1 -1
  81. package/dist/types/utils/StructuredData.d.ts +13 -11
  82. package/dist/types/utils/Types.d.ts +8 -0
  83. package/package.json +3 -2
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2023-2025 Paul Köhler (komed3)
3
+ Copyright (c) 2023-2026 Paul Köhler (komed3)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
18
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
21
+ SOFTWARE.
package/README.md CHANGED
@@ -79,4 +79,4 @@ Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.
79
79
 
80
80
  The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
81
81
 
82
- **LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
82
+ **LICENSE MIT © 2023-2026 PAUL KÖHLER (KOMED3)**
@@ -1,5 +1,5 @@
1
1
  /**
2
- * CmpStr v3.1.0 build-76aadb9-260117
2
+ * CmpStr v3.1.1 build-a140f52-260119
3
3
  * This is a lightweight, fast and well performing library for calculating string similarity.
4
4
  * (c) 2023-2026 Paul Köhler @komed3 / MIT License
5
5
  * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
@@ -663,11 +663,11 @@ class Hasher {
663
663
  const chunks = Math.floor(len / 4);
664
664
  for (let i = 0; i < chunks; i++) {
665
665
  const pos = i * 4;
666
- const chunk =
667
- str.charCodeAt(pos) |
668
- (str.charCodeAt(pos + 1) << 8) |
669
- (str.charCodeAt(pos + 2) << 16) |
670
- (str.charCodeAt(pos + 3) << 24);
666
+ const c0 = str.charCodeAt(pos);
667
+ const c1 = str.charCodeAt(pos + 1);
668
+ const c2 = str.charCodeAt(pos + 2);
669
+ const c3 = str.charCodeAt(pos + 3);
670
+ const chunk = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
671
671
  hash ^= chunk;
672
672
  hash *= this.FNV_PRIME;
673
673
  }
@@ -881,7 +881,7 @@ class Pool {
881
881
  static CONFIG = {
882
882
  uint16: {
883
883
  type: 'uint16',
884
- maxSize: 32,
884
+ maxSize: 64,
885
885
  maxItemSize: 2048,
886
886
  allowOversize: true
887
887
  },
@@ -893,7 +893,7 @@ class Pool {
893
893
  },
894
894
  'string[]': {
895
895
  type: 'string[]',
896
- maxSize: 4,
896
+ maxSize: 2,
897
897
  maxItemSize: 1024,
898
898
  allowOversize: false
899
899
  },
@@ -901,9 +901,9 @@ class Pool {
901
901
  map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
902
902
  };
903
903
  static POOLS = {
904
- uint16: new RingPool(32),
904
+ uint16: new RingPool(64),
905
905
  'number[]': new RingPool(16),
906
- 'string[]': new RingPool(4),
906
+ 'string[]': new RingPool(2),
907
907
  set: new RingPool(8),
908
908
  map: new RingPool(8)
909
909
  };
@@ -912,7 +912,7 @@ class Pool {
912
912
  case 'uint16':
913
913
  return new Uint16Array(size);
914
914
  case 'number[]':
915
- return new Array(size).fill(0);
915
+ return new Float64Array(size);
916
916
  case 'string[]':
917
917
  return new Array(size);
918
918
  case 'set':
@@ -979,39 +979,51 @@ class StructuredData {
979
979
  normalizeResults(results) {
980
980
  if (!Array.isArray(results) || results.length === 0) return [];
981
981
  const first = results[0];
982
- if (this.isMetricResult(first)) return results;
983
- if (this.isCmpStrResult(first))
984
- return results.map((r) => ({
982
+ let normalized = [];
983
+ if (this.isMetricResult(first)) normalized = results;
984
+ else if (this.isCmpStrResult(first)) {
985
+ normalized = results.map((r) => ({
985
986
  metric: 'unknown',
986
987
  a: r.source,
987
988
  b: r.target,
988
989
  res: r.match,
989
990
  raw: r.raw
990
991
  }));
991
- return [];
992
+ }
993
+ return normalized.map((r, idx) => ({ ...r, __idx: idx }));
992
994
  }
993
995
  rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
996
+ const stringToIndices = new Map();
997
+ for (let i = 0; i < extractedStrings.length; i++) {
998
+ const str = extractedStrings[i];
999
+ if (!stringToIndices.has(str)) stringToIndices.set(str, []);
1000
+ stringToIndices.get(str).push(i);
1001
+ }
994
1002
  const output = new Array(results.length);
1003
+ const occurrenceCount = new Map();
995
1004
  let out = 0;
996
1005
  for (let i = 0; i < results.length; i++) {
997
1006
  const result = results[i];
998
1007
  if (removeZero && result.res === 0) continue;
999
- let dataIndex =
1000
- result.b && extractedStrings.length
1001
- ? extractedStrings.indexOf(result.b)
1002
- : i;
1003
- if (dataIndex < 0 || dataIndex >= sourceData.length) dataIndex = i;
1008
+ const targetStr = result.b || '';
1009
+ const indices = stringToIndices.get(targetStr);
1010
+ let dataIndex;
1011
+ if (indices && indices.length > 0) {
1012
+ const occurrence = occurrenceCount.get(targetStr) ?? 0;
1013
+ occurrenceCount.set(targetStr, occurrence + 1);
1014
+ dataIndex = indices[occurrence % indices.length];
1015
+ } else {
1016
+ dataIndex = result.__idx ?? i;
1017
+ }
1018
+ if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
1004
1019
  const sourceObj = sourceData[dataIndex];
1020
+ const mappedTarget = extractedStrings[dataIndex] || targetStr;
1005
1021
  if (objectsOnly) output[out++] = sourceObj;
1006
1022
  else
1007
1023
  output[out++] = {
1008
1024
  obj: sourceObj,
1009
1025
  key: this.key,
1010
- result: {
1011
- source: result.a,
1012
- target: extractedStrings[dataIndex] || result.b,
1013
- match: result.res
1014
- },
1026
+ result: { source: result.a, target: mappedTarget, match: result.res },
1015
1027
  ...(result.raw ? { raw: result.raw } : null)
1016
1028
  };
1017
1029
  }