cmpstr 3.2.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/dist/CmpStr.esm.js +2149 -1721
  2. package/dist/CmpStr.esm.min.js +2 -2
  3. package/dist/CmpStr.umd.js +2028 -1604
  4. package/dist/CmpStr.umd.min.js +2 -2
  5. package/dist/cjs/CmpStr.cjs +100 -51
  6. package/dist/cjs/CmpStrAsync.cjs +35 -18
  7. package/dist/cjs/index.cjs +1 -1
  8. package/dist/cjs/metric/Cosine.cjs +1 -1
  9. package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
  10. package/dist/cjs/metric/DiceSorensen.cjs +1 -1
  11. package/dist/cjs/metric/Hamming.cjs +1 -1
  12. package/dist/cjs/metric/Jaccard.cjs +1 -1
  13. package/dist/cjs/metric/JaroWinkler.cjs +1 -1
  14. package/dist/cjs/metric/LCS.cjs +1 -1
  15. package/dist/cjs/metric/Levenshtein.cjs +1 -1
  16. package/dist/cjs/metric/Metric.cjs +40 -22
  17. package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
  18. package/dist/cjs/metric/QGram.cjs +1 -1
  19. package/dist/cjs/metric/SmithWaterman.cjs +1 -1
  20. package/dist/cjs/phonetic/Caverphone.cjs +1 -1
  21. package/dist/cjs/phonetic/Cologne.cjs +1 -1
  22. package/dist/cjs/phonetic/Metaphone.cjs +1 -1
  23. package/dist/cjs/phonetic/Phonetic.cjs +27 -15
  24. package/dist/cjs/phonetic/Soundex.cjs +1 -1
  25. package/dist/cjs/root.cjs +4 -2
  26. package/dist/cjs/utils/DeepMerge.cjs +102 -97
  27. package/dist/cjs/utils/DiffChecker.cjs +1 -1
  28. package/dist/cjs/utils/Errors.cjs +22 -19
  29. package/dist/cjs/utils/Filter.cjs +59 -24
  30. package/dist/cjs/utils/HashTable.cjs +44 -29
  31. package/dist/cjs/utils/Normalizer.cjs +57 -28
  32. package/dist/cjs/utils/OptionsValidator.cjs +211 -0
  33. package/dist/cjs/utils/Pool.cjs +27 -13
  34. package/dist/cjs/utils/Profiler.cjs +41 -27
  35. package/dist/cjs/utils/Registry.cjs +5 -5
  36. package/dist/cjs/utils/StructuredData.cjs +83 -53
  37. package/dist/cjs/utils/TextAnalyzer.cjs +1 -1
  38. package/dist/esm/CmpStr.mjs +101 -52
  39. package/dist/esm/CmpStrAsync.mjs +35 -18
  40. package/dist/esm/index.mjs +1 -1
  41. package/dist/esm/metric/Cosine.mjs +1 -1
  42. package/dist/esm/metric/DamerauLevenshtein.mjs +1 -1
  43. package/dist/esm/metric/DiceSorensen.mjs +1 -1
  44. package/dist/esm/metric/Hamming.mjs +1 -1
  45. package/dist/esm/metric/Jaccard.mjs +1 -1
  46. package/dist/esm/metric/JaroWinkler.mjs +1 -1
  47. package/dist/esm/metric/LCS.mjs +1 -1
  48. package/dist/esm/metric/Levenshtein.mjs +1 -1
  49. package/dist/esm/metric/Metric.mjs +40 -22
  50. package/dist/esm/metric/NeedlemanWunsch.mjs +1 -1
  51. package/dist/esm/metric/QGram.mjs +1 -1
  52. package/dist/esm/metric/SmithWaterman.mjs +1 -1
  53. package/dist/esm/phonetic/Caverphone.mjs +1 -1
  54. package/dist/esm/phonetic/Cologne.mjs +1 -1
  55. package/dist/esm/phonetic/Metaphone.mjs +1 -1
  56. package/dist/esm/phonetic/Phonetic.mjs +30 -15
  57. package/dist/esm/phonetic/Soundex.mjs +1 -1
  58. package/dist/esm/root.mjs +3 -3
  59. package/dist/esm/utils/DeepMerge.mjs +103 -94
  60. package/dist/esm/utils/DiffChecker.mjs +1 -1
  61. package/dist/esm/utils/Errors.mjs +22 -19
  62. package/dist/esm/utils/Filter.mjs +59 -24
  63. package/dist/esm/utils/HashTable.mjs +44 -29
  64. package/dist/esm/utils/Normalizer.mjs +57 -28
  65. package/dist/esm/utils/OptionsValidator.mjs +210 -0
  66. package/dist/esm/utils/Pool.mjs +27 -13
  67. package/dist/esm/utils/Profiler.mjs +41 -27
  68. package/dist/esm/utils/Registry.mjs +5 -5
  69. package/dist/esm/utils/StructuredData.mjs +83 -53
  70. package/dist/esm/utils/TextAnalyzer.mjs +1 -1
  71. package/dist/types/CmpStr.d.ts +22 -15
  72. package/dist/types/CmpStrAsync.d.ts +3 -0
  73. package/dist/types/index.d.ts +3 -3
  74. package/dist/types/metric/Metric.d.ts +9 -9
  75. package/dist/types/phonetic/Phonetic.d.ts +4 -3
  76. package/dist/types/root.d.ts +3 -2
  77. package/dist/types/utils/DeepMerge.d.ts +80 -58
  78. package/dist/types/utils/Errors.d.ts +25 -8
  79. package/dist/types/utils/Filter.d.ts +4 -1
  80. package/dist/types/utils/HashTable.d.ts +12 -11
  81. package/dist/types/utils/Normalizer.d.ts +2 -1
  82. package/dist/types/utils/OptionsValidator.d.ts +193 -0
  83. package/dist/types/utils/Profiler.d.ts +9 -28
  84. package/dist/types/utils/StructuredData.d.ts +3 -0
  85. package/dist/types/utils/Types.d.ts +13 -1
  86. package/package.json +14 -5
@@ -0,0 +1,210 @@
1
+ // CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
2
+ import { CmpStrValidationError } from './Errors.mjs';
3
+ import '../metric/Cosine.mjs';
4
+ import '../metric/DamerauLevenshtein.mjs';
5
+ import '../metric/DiceSorensen.mjs';
6
+ import '../metric/Hamming.mjs';
7
+ import '../metric/Jaccard.mjs';
8
+ import '../metric/JaroWinkler.mjs';
9
+ import '../metric/LCS.mjs';
10
+ import '../metric/Levenshtein.mjs';
11
+ import '../metric/NeedlemanWunsch.mjs';
12
+ import '../metric/QGram.mjs';
13
+ import '../metric/SmithWaterman.mjs';
14
+ import { MetricRegistry } from '../metric/Metric.mjs';
15
+ import '../phonetic/Caverphone.mjs';
16
+ import '../phonetic/Cologne.mjs';
17
+ import '../phonetic/Metaphone.mjs';
18
+ import '../phonetic/Soundex.mjs';
19
+ import { PhoneticRegistry } from '../phonetic/Phonetic.mjs';
20
+
21
+ class OptionsValidator {
22
+ static ALLOWED_FLAGS = new Set([
23
+ 'd',
24
+ 'u',
25
+ 'x',
26
+ 'w',
27
+ 't',
28
+ 'r',
29
+ 's',
30
+ 'k',
31
+ 'n',
32
+ 'i'
33
+ ]);
34
+ static ALLOWED_OUTPUT = new Set(['orig', 'prep']);
35
+ static ALLOWED_MODES = new Set(['default', 'batch', 'single', 'pairwise']);
36
+ static ALLOWED_SORT = new Set(['asc', 'desc']);
37
+ static PROCESSORS = {
38
+ phonetic: (opt) => {
39
+ if (!opt) return;
40
+ OptionsValidator.validatePhoneticName(opt.algo);
41
+ OptionsValidator.validatePhoneticOptions(opt.opt);
42
+ }
43
+ };
44
+ static METRIC_OPT_MAP = {
45
+ mode: (v) => OptionsValidator.validateMode(v),
46
+ delimiter: (v) => OptionsValidator.validateString(v, 'opt.delimiter'),
47
+ pad: (v) => OptionsValidator.validateString(v, 'opt.pad'),
48
+ q: (v) => OptionsValidator.validateNumber(v, 'opt.q'),
49
+ match: (v) => OptionsValidator.validateNumber(v, 'opt.match'),
50
+ mismatch: (v) => OptionsValidator.validateNumber(v, 'opt.mismatch'),
51
+ gap: (v) => OptionsValidator.validateNumber(v, 'opt.gap')
52
+ };
53
+ static PHONETIC_OPT_MAP = {
54
+ map: (v) =>
55
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.map'),
56
+ delimiter: (v) =>
57
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.delimiter'),
58
+ length: (v) =>
59
+ OptionsValidator.validateNumber(v, 'processors.phonetic.opt.length'),
60
+ pad: (v) =>
61
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.pad'),
62
+ dedupe: (v) =>
63
+ OptionsValidator.validateBoolean(v, 'processors.phonetic.opt.dedupe'),
64
+ fallback: (v) =>
65
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.fallback')
66
+ };
67
+ static CMPSTR_OPT_MAP = {
68
+ raw: (v) => OptionsValidator.validateBoolean(v, 'raw'),
69
+ removeZero: (v) => OptionsValidator.validateBoolean(v, 'removeZero'),
70
+ safeEmpty: (v) => OptionsValidator.validateBoolean(v, 'safeEmpty'),
71
+ flags: (v) => OptionsValidator.validateFlags(v),
72
+ metric: (v) => OptionsValidator.validateMetricName(v),
73
+ output: (v) => OptionsValidator.validateOutput(v),
74
+ opt: (v) => OptionsValidator.validateMetricOptions(v),
75
+ processors: (v) => OptionsValidator.validateProcessors(v),
76
+ sort: (v) => OptionsValidator.validateSort(v, 'sort'),
77
+ objectsOnly: (v) => OptionsValidator.validateBoolean(v, 'objectsOnly')
78
+ };
79
+ static set2string(set) {
80
+ return Array.from(set).join(' | ');
81
+ }
82
+ static validateType(value, name, type) {
83
+ if (value === undefined) return;
84
+ if (typeof value !== type || (type === 'number' && Number.isNaN(value))) {
85
+ throw new CmpStrValidationError(
86
+ `Invalid option <${name}>: expected ${type}`,
87
+ { name, value }
88
+ );
89
+ }
90
+ }
91
+ static validateEnum(value, name, set) {
92
+ if (value === undefined) return;
93
+ if (typeof value !== 'string' || !set.has(value)) {
94
+ throw new CmpStrValidationError(
95
+ `Invalid option <${name}>: expected ${OptionsValidator.set2string(set)}`,
96
+ { name, value }
97
+ );
98
+ }
99
+ }
100
+ static validateMap(opt, map) {
101
+ if (!opt) return;
102
+ for (const k in opt) {
103
+ const fn = map[k];
104
+ if (!fn)
105
+ throw new CmpStrValidationError(`Invalid option <${k}>`, {
106
+ option: k,
107
+ value: map[k]
108
+ });
109
+ fn(opt[k]);
110
+ }
111
+ }
112
+ static validateRegistryName(value, name, label, has, list) {
113
+ if (value === undefined) return;
114
+ if (typeof value !== 'string' || value.length === 0)
115
+ throw new CmpStrValidationError(
116
+ `Invalid option <${name}>: expected non-empty string`,
117
+ { name, value }
118
+ );
119
+ if (!has(value))
120
+ throw new CmpStrValidationError(`${label} <${value}> is not registered`, {
121
+ name,
122
+ value,
123
+ available: list()
124
+ });
125
+ }
126
+ static validateBoolean(value, name) {
127
+ OptionsValidator.validateType(value, name, 'boolean');
128
+ }
129
+ static validateNumber(value, name) {
130
+ OptionsValidator.validateType(value, name, 'number');
131
+ }
132
+ static validateString(value, name) {
133
+ OptionsValidator.validateType(value, name, 'string');
134
+ }
135
+ static validateFlags(value) {
136
+ if (value === undefined) return;
137
+ if (typeof value !== 'string')
138
+ throw new CmpStrValidationError(
139
+ `Invalid option <flags>: expected string`,
140
+ { flags: value }
141
+ );
142
+ for (let i = 0; i < value.length; i++) {
143
+ const ch = value[i];
144
+ if (!OptionsValidator.ALLOWED_FLAGS.has(ch))
145
+ throw new CmpStrValidationError(
146
+ `Invalid normalization flag <${ch}> in <flags>: expected ${OptionsValidator.set2string(OptionsValidator.ALLOWED_FLAGS)}`,
147
+ { flags: value, invalid: ch }
148
+ );
149
+ }
150
+ }
151
+ static validateOutput(value) {
152
+ OptionsValidator.validateEnum(
153
+ value,
154
+ 'output',
155
+ OptionsValidator.ALLOWED_OUTPUT
156
+ );
157
+ }
158
+ static validateMode(value) {
159
+ OptionsValidator.validateEnum(
160
+ value,
161
+ 'mode',
162
+ OptionsValidator.ALLOWED_MODES
163
+ );
164
+ }
165
+ static validateSort(value, name) {
166
+ if (value === undefined || typeof value === 'boolean') return;
167
+ OptionsValidator.validateEnum(value, name, OptionsValidator.ALLOWED_SORT);
168
+ }
169
+ static validateMetricName(value) {
170
+ OptionsValidator.validateRegistryName(
171
+ value,
172
+ 'metric',
173
+ 'Comparison metric',
174
+ MetricRegistry.has,
175
+ MetricRegistry.list
176
+ );
177
+ }
178
+ static validatePhoneticName(value) {
179
+ OptionsValidator.validateRegistryName(
180
+ value,
181
+ 'phonetic',
182
+ 'Phonetic algorithm',
183
+ PhoneticRegistry.has,
184
+ PhoneticRegistry.list
185
+ );
186
+ }
187
+ static validateMetricOptions(opt) {
188
+ OptionsValidator.validateMap(opt, OptionsValidator.METRIC_OPT_MAP);
189
+ }
190
+ static validatePhoneticOptions(opt) {
191
+ OptionsValidator.validateMap(opt, OptionsValidator.PHONETIC_OPT_MAP);
192
+ }
193
+ static validateProcessors(opt) {
194
+ if (!opt) return;
195
+ for (const key in opt) {
196
+ const fn = OptionsValidator.PROCESSORS[key];
197
+ if (!fn)
198
+ throw new CmpStrValidationError(
199
+ `Invalid processor type <${key}> in <processors>: expected ${Object.keys(OptionsValidator.PROCESSORS).join(' | ')}`,
200
+ { processors: opt, invalid: key }
201
+ );
202
+ fn(opt[key]);
203
+ }
204
+ }
205
+ static validateOptions(opt) {
206
+ OptionsValidator.validateMap(opt, OptionsValidator.CMPSTR_OPT_MAP);
207
+ }
208
+ }
209
+
210
+ export { OptionsValidator };
@@ -1,4 +1,4 @@
1
- // CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
1
+ // CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
2
2
  import { CmpStrUsageError, ErrorUtil } from './Errors.mjs';
3
3
 
4
4
  class RingPool {
@@ -11,15 +11,14 @@ class RingPool {
11
11
  acquire(minSize, allowOversize) {
12
12
  return ErrorUtil.wrap(
13
13
  () => {
14
- const len = this.buffers.length;
14
+ const buffers = this.buffers;
15
+ const len = buffers.length;
15
16
  for (let i = 0; i < len; i++) {
16
- const idx = (this.pointer + i) & (len - 1);
17
- const item = this.buffers[idx];
18
- if (
19
- item.size >= minSize &&
20
- (allowOversize || item.size === minSize)
21
- ) {
22
- this.pointer = (idx + 1) & (len - 1);
17
+ const idx = (this.pointer + i) % len;
18
+ const item = buffers[idx];
19
+ const size = item.size;
20
+ if (size >= minSize && (allowOversize || size === minSize)) {
21
+ this.pointer = (idx + 1) % len;
23
22
  return item;
24
23
  }
25
24
  }
@@ -32,9 +31,12 @@ class RingPool {
32
31
  release(item) {
33
32
  ErrorUtil.wrap(
34
33
  () => {
35
- if (this.buffers.length < this.maxSize)
36
- return void [this.buffers.push(item)];
37
- this.buffers[this.pointer] = item;
34
+ const buffers = this.buffers;
35
+ if (buffers.length < this.maxSize) {
36
+ buffers.push(item);
37
+ return;
38
+ }
39
+ buffers[this.pointer] = item;
38
40
  this.pointer = (this.pointer + 1) % this.maxSize;
39
41
  },
40
42
  `Failed to release buffer back to pool`,
@@ -54,6 +56,12 @@ class Pool {
54
56
  maxItemSize: 2048,
55
57
  allowOversize: true
56
58
  },
59
+ 'arr[]': {
60
+ type: 'arr[]',
61
+ maxSize: 4,
62
+ maxItemSize: 1024,
63
+ allowOversize: false
64
+ },
57
65
  'number[]': {
58
66
  type: 'number[]',
59
67
  maxSize: 16,
@@ -71,6 +79,7 @@ class Pool {
71
79
  };
72
80
  static POOLS = {
73
81
  int32: new RingPool(64),
82
+ 'arr[]': new RingPool(4),
74
83
  'number[]': new RingPool(16),
75
84
  'string[]': new RingPool(2),
76
85
  set: new RingPool(8),
@@ -80,6 +89,8 @@ class Pool {
80
89
  switch (type) {
81
90
  case 'int32':
82
91
  return new Int32Array(size);
92
+ case 'arr[]':
93
+ return new Array(size);
83
94
  case 'number[]':
84
95
  return new Float64Array(size);
85
96
  case 'string[]':
@@ -101,7 +112,10 @@ class Pool {
101
112
  return this.allocate(type, size);
102
113
  }
103
114
  static acquireMany(type, sizes) {
104
- return sizes.map((size) => this.acquire(type, size));
115
+ const out = new Array(sizes.length);
116
+ for (let i = 0; i < sizes.length; i++)
117
+ out[i] = this.acquire(type, sizes[i]);
118
+ return out;
105
119
  }
106
120
  static release(type, buffer, size) {
107
121
  const CONFIG = this.CONFIG[type];
@@ -1,15 +1,17 @@
1
- // CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
1
+ // CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
2
2
  class Profiler {
3
3
  active;
4
4
  static ENV;
5
5
  static instance;
6
6
  nowFn;
7
7
  memFn;
8
- store = new Set();
8
+ store = [];
9
+ last;
9
10
  totalTime = 0;
10
11
  totalMem = 0;
11
12
  static detectEnv() {
12
- if (typeof process !== 'undefined') Profiler.ENV = 'nodejs';
13
+ if (typeof process !== 'undefined' && process.versions?.node)
14
+ Profiler.ENV = 'nodejs';
13
15
  else if (typeof performance !== 'undefined') Profiler.ENV = 'browser';
14
16
  else Profiler.ENV = 'unknown';
15
17
  }
@@ -21,7 +23,7 @@ class Profiler {
21
23
  this.active = active;
22
24
  switch (Profiler.ENV) {
23
25
  case 'nodejs':
24
- this.nowFn = () => Number(process.hrtime.bigint()) / 1e6;
26
+ this.nowFn = () => Number(process.hrtime.bigint()) * 1e-6;
25
27
  this.memFn = () => process.memoryUsage().heapUsed;
26
28
  break;
27
29
  case 'browser':
@@ -34,40 +36,52 @@ class Profiler {
34
36
  break;
35
37
  }
36
38
  }
37
- now = () => this.nowFn();
38
- mem = () => this.memFn();
39
- profile(fn, meta) {
40
- const startTime = this.now(),
41
- startMem = this.mem();
42
- const res = fn();
43
- const deltaTime = this.now() - startTime,
44
- deltaMem = this.mem() - startMem;
45
- this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
46
- ((this.totalTime += deltaTime), (this.totalMem += deltaMem));
47
- return res;
39
+ storeRes(entry) {
40
+ this.store.push((this.last = entry));
41
+ this.totalTime += entry.time;
42
+ this.totalMem += entry.mem;
48
43
  }
49
- enable = () => {
44
+ enable() {
50
45
  this.active = true;
51
- };
52
- disable = () => {
46
+ }
47
+ disable() {
53
48
  this.active = false;
54
- };
49
+ }
55
50
  clear() {
56
- this.store.clear();
51
+ this.store.length = 0;
52
+ this.last = undefined;
57
53
  this.totalTime = 0;
58
54
  this.totalMem = 0;
59
55
  }
60
56
  run(fn, meta = {}) {
61
- return this.active ? this.profile(fn, meta) : fn();
57
+ if (!this.active) return fn();
58
+ const startTime = this.nowFn(),
59
+ startMem = this.memFn();
60
+ const res = fn();
61
+ const deltaTime = this.nowFn() - startTime,
62
+ deltaMem = this.memFn() - startMem;
63
+ this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
64
+ return res;
62
65
  }
63
66
  async runAsync(fn, meta = {}) {
64
- return this.active
65
- ? this.profile(async () => await fn(), meta)
66
- : await fn();
67
+ if (!this.active) return fn();
68
+ const startTime = this.nowFn(),
69
+ startMem = this.memFn();
70
+ const res = await fn();
71
+ const deltaTime = this.nowFn() - startTime,
72
+ deltaMem = this.memFn() - startMem;
73
+ this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
74
+ return res;
75
+ }
76
+ getAll() {
77
+ return [...this.store];
78
+ }
79
+ getLast() {
80
+ return this.last;
81
+ }
82
+ getTotal() {
83
+ return { time: this.totalTime, mem: this.totalMem };
67
84
  }
68
- getAll = () => [...this.store];
69
- getLast = () => this.getAll().pop();
70
- getTotal = () => ({ time: this.totalTime, mem: this.totalMem });
71
85
  services = Object.freeze({
72
86
  enable: this.enable.bind(this),
73
87
  disable: this.disable.bind(this),
@@ -1,4 +1,4 @@
1
- // CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
1
+ // CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
2
2
  import { ErrorUtil, CmpStrNotFoundError } from './Errors.mjs';
3
3
 
4
4
  const registry = Object.create(null);
@@ -66,13 +66,13 @@ function resolveCls(reg, cls) {
66
66
  throw new CmpStrNotFoundError(`Registry <${reg}> does not exist`, {
67
67
  registry: reg
68
68
  });
69
- return typeof cls === 'string' ? registry[reg]?.get(cls) : cls;
69
+ return typeof cls === 'string' ? registry[reg].get(cls) : cls;
70
70
  }
71
71
  function createFromRegistry(reg, cls, ...args) {
72
- cls = resolveCls(reg, cls);
72
+ const ctor = resolveCls(reg, cls);
73
73
  return ErrorUtil.wrap(
74
- () => new cls(...args),
75
- `Failed to create instance of class <${cls.name ?? cls}> from registry <${reg}>`,
74
+ () => new ctor(...args),
75
+ `Failed to create instance of class <${ctor.name ?? cls}> from registry <${reg}>`,
76
76
  { registry: reg, class: cls, args }
77
77
  );
78
78
  }
@@ -1,10 +1,12 @@
1
- // CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
1
+ // CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
2
2
  import { CmpStrValidationError, ErrorUtil } from './Errors.mjs';
3
3
  import { Pool } from './Pool.mjs';
4
4
 
5
5
  class StructuredData {
6
6
  data;
7
7
  key;
8
+ static SORT_ASC = (a, b) => a.res - b.res;
9
+ static SORT_DESC = (a, b) => b.res - a.res;
8
10
  static create(data, key) {
9
11
  return new StructuredData(data, key);
10
12
  }
@@ -13,14 +15,17 @@ class StructuredData {
13
15
  this.key = key;
14
16
  }
15
17
  extractFrom(arr, key) {
16
- const result = Pool.acquire('string[]', arr.length);
17
- for (let i = 0; i < arr.length; i++) {
18
+ const n = arr.length;
19
+ const result = new Array(n);
20
+ for (let i = 0; i < n; i++) {
18
21
  const val = arr[i][key];
19
- result[i] = typeof val === 'string' ? val : String(val ?? '');
22
+ result[i] = val != null ? String(val) : '';
20
23
  }
21
24
  return result;
22
25
  }
23
- extract = () => this.extractFrom(this.data, this.key);
26
+ extract() {
27
+ return this.extractFrom(this.data, this.key);
28
+ }
24
29
  isMetricResult(v) {
25
30
  return (
26
31
  typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
@@ -38,64 +43,89 @@ class StructuredData {
38
43
  normalizeResults(results) {
39
44
  if (!Array.isArray(results) || results.length === 0) return [];
40
45
  const first = results[0];
41
- let normalized = [];
42
- if (this.isMetricResult(first)) normalized = results;
43
- else if (this.isCmpStrResult(first))
44
- normalized = results.map((r) => ({
45
- metric: 'unknown',
46
- a: r.source,
47
- b: r.target,
48
- res: r.match,
49
- raw: r.raw
50
- }));
51
- else
46
+ let out = new Array(results.length);
47
+ if (this.isMetricResult(first)) {
48
+ const src = results;
49
+ for (let i = 0; i < src.length; i++) out[i] = { ...src[i], __idx: i };
50
+ } else if (this.isCmpStrResult(first)) {
51
+ const src = results;
52
+ for (let i = 0; i < src.length; i++) {
53
+ const r = src[i];
54
+ out[i] = {
55
+ metric: 'unknown',
56
+ a: r.source,
57
+ b: r.target,
58
+ res: r.match,
59
+ raw: r.raw,
60
+ __idx: i
61
+ };
62
+ }
63
+ } else
52
64
  throw new CmpStrValidationError(
53
65
  'Unsupported result format for StructuredData normalization.'
54
66
  );
55
- return normalized.map((r, idx) => ({ ...r, __idx: idx }));
67
+ return out;
56
68
  }
57
69
  rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
58
- const stringToIndices = new Map();
59
- for (let i = 0; i < extractedStrings.length; i++) {
60
- const str = extractedStrings[i];
61
- if (!stringToIndices.has(str)) stringToIndices.set(str, []);
62
- stringToIndices.get(str).push(i);
63
- }
64
- const output = new Array(results.length);
65
- const occurrenceCount = new Map();
66
- let out = 0;
67
- for (let i = 0; i < results.length; i++) {
68
- const result = results[i];
69
- if (removeZero && result.res === 0) continue;
70
- const targetStr = result.b || '';
71
- const indices = stringToIndices.get(targetStr);
72
- let dataIndex;
73
- if (indices && indices.length > 0) {
74
- const occurrence = occurrenceCount.get(targetStr) ?? 0;
75
- occurrenceCount.set(targetStr, occurrence + 1);
76
- dataIndex = indices[occurrence % indices.length];
77
- } else {
78
- dataIndex = result.__idx ?? i;
70
+ const m = extractedStrings.length,
71
+ n = results.length;
72
+ const stringToIndices = Pool.acquire('map', m);
73
+ const occurrenceCount = Pool.acquire('map', n);
74
+ const output = new Array(n);
75
+ stringToIndices.clear();
76
+ occurrenceCount.clear();
77
+ try {
78
+ for (let i = 0; i < m; i++) {
79
+ const str = extractedStrings[i];
80
+ let arr = stringToIndices.get(str);
81
+ if (!arr) {
82
+ arr = [];
83
+ stringToIndices.set(str, arr);
84
+ }
85
+ arr.push(i);
79
86
  }
80
- if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
81
- const sourceObj = sourceData[dataIndex];
82
- const mappedTarget = extractedStrings[dataIndex] || targetStr;
83
- if (objectsOnly) output[out++] = sourceObj;
84
- else
85
- output[out++] = {
86
- obj: sourceObj,
87
- key: this.key,
88
- result: { source: result.a, target: mappedTarget, match: result.res },
89
- ...(result.raw ? { raw: result.raw } : null)
90
- };
87
+ let out = 0;
88
+ for (let i = 0; i < n; i++) {
89
+ const result = results[i];
90
+ if (removeZero && result.res === 0) continue;
91
+ const targetStr = result.b || '';
92
+ const indices = stringToIndices.get(targetStr);
93
+ let dataIndex;
94
+ if (indices && indices.length > 0) {
95
+ const occurrence = occurrenceCount.get(targetStr) ?? 0;
96
+ occurrenceCount.set(targetStr, occurrence + 1);
97
+ dataIndex = indices[occurrence % indices.length];
98
+ } else {
99
+ dataIndex = result.__idx ?? i;
100
+ }
101
+ if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
102
+ const sourceObj = sourceData[dataIndex];
103
+ const mappedTarget = extractedStrings[dataIndex] || targetStr;
104
+ if (objectsOnly) output[out++] = sourceObj;
105
+ else
106
+ output[out++] = {
107
+ obj: sourceObj,
108
+ key: this.key,
109
+ result: {
110
+ source: result.a,
111
+ target: mappedTarget,
112
+ match: result.res
113
+ },
114
+ ...(result.raw ? { raw: result.raw } : null)
115
+ };
116
+ }
117
+ output.length = out;
118
+ return output;
119
+ } finally {
120
+ Pool.release('map', stringToIndices, m);
121
+ Pool.release('map', occurrenceCount, n);
91
122
  }
92
- output.length = out;
93
- return output;
94
123
  }
95
124
  sort(results, sort) {
96
125
  if (!sort || results.length <= 1) return results;
97
- const asc = sort === 'asc';
98
- return results.sort((a, b) => (asc ? a.res - b.res : b.res - a.res));
126
+ return results.sort(
127
+ sort === 'asc' ? StructuredData.SORT_ASC : StructuredData.SORT_DESC
128
+ );
99
129
  }
100
130
  finalizeLookup(results, extractedStrings, opt) {
101
131
  return this.rebuild(
@@ -1,4 +1,4 @@
1
- // CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
1
+ // CmpStr v3.3.0 build-3699f85-260318 by Paul Köhler @komed3 / MIT License
2
2
  class TextAnalyzer {
3
3
  static REGEX = {
4
4
  number: /\d/,