cmpstr 3.2.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +24 -18
  2. package/dist/CmpStr.esm.js +1904 -1211
  3. package/dist/CmpStr.esm.min.js +2 -3
  4. package/dist/CmpStr.umd.js +1924 -1236
  5. package/dist/CmpStr.umd.min.js +2 -3
  6. package/dist/cjs/CmpStr.cjs +134 -64
  7. package/dist/cjs/CmpStrAsync.cjs +60 -37
  8. package/dist/cjs/index.cjs +1 -2
  9. package/dist/cjs/metric/Cosine.cjs +1 -2
  10. package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -2
  11. package/dist/cjs/metric/DiceSorensen.cjs +1 -2
  12. package/dist/cjs/metric/Hamming.cjs +5 -4
  13. package/dist/cjs/metric/Jaccard.cjs +1 -2
  14. package/dist/cjs/metric/JaroWinkler.cjs +1 -2
  15. package/dist/cjs/metric/LCS.cjs +1 -2
  16. package/dist/cjs/metric/Levenshtein.cjs +1 -2
  17. package/dist/cjs/metric/Metric.cjs +90 -53
  18. package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -2
  19. package/dist/cjs/metric/QGram.cjs +1 -2
  20. package/dist/cjs/metric/SmithWaterman.cjs +1 -2
  21. package/dist/cjs/phonetic/Caverphone.cjs +1 -2
  22. package/dist/cjs/phonetic/Cologne.cjs +1 -2
  23. package/dist/cjs/phonetic/Metaphone.cjs +1 -2
  24. package/dist/cjs/phonetic/Phonetic.cjs +80 -48
  25. package/dist/cjs/phonetic/Soundex.cjs +1 -2
  26. package/dist/cjs/root.cjs +6 -3
  27. package/dist/cjs/utils/DeepMerge.cjs +109 -99
  28. package/dist/cjs/utils/DiffChecker.cjs +1 -2
  29. package/dist/cjs/utils/Errors.cjs +106 -0
  30. package/dist/cjs/utils/Filter.cjs +97 -37
  31. package/dist/cjs/utils/HashTable.cjs +44 -30
  32. package/dist/cjs/utils/Normalizer.cjs +84 -35
  33. package/dist/cjs/utils/OptionsValidator.cjs +211 -0
  34. package/dist/cjs/utils/Pool.cjs +57 -19
  35. package/dist/cjs/utils/Profiler.cjs +41 -28
  36. package/dist/cjs/utils/Registry.cjs +48 -24
  37. package/dist/cjs/utils/StructuredData.cjs +95 -57
  38. package/dist/cjs/utils/TextAnalyzer.cjs +1 -2
  39. package/dist/esm/CmpStr.mjs +133 -61
  40. package/dist/esm/CmpStrAsync.mjs +56 -33
  41. package/dist/esm/index.mjs +1 -2
  42. package/dist/esm/metric/Cosine.mjs +1 -2
  43. package/dist/esm/metric/DamerauLevenshtein.mjs +1 -2
  44. package/dist/esm/metric/DiceSorensen.mjs +1 -2
  45. package/dist/esm/metric/Hamming.mjs +5 -4
  46. package/dist/esm/metric/Jaccard.mjs +1 -2
  47. package/dist/esm/metric/JaroWinkler.mjs +1 -2
  48. package/dist/esm/metric/LCS.mjs +1 -2
  49. package/dist/esm/metric/Levenshtein.mjs +1 -2
  50. package/dist/esm/metric/Metric.mjs +92 -53
  51. package/dist/esm/metric/NeedlemanWunsch.mjs +1 -2
  52. package/dist/esm/metric/QGram.mjs +1 -2
  53. package/dist/esm/metric/SmithWaterman.mjs +1 -2
  54. package/dist/esm/phonetic/Caverphone.mjs +1 -2
  55. package/dist/esm/phonetic/Cologne.mjs +1 -2
  56. package/dist/esm/phonetic/Metaphone.mjs +1 -2
  57. package/dist/esm/phonetic/Phonetic.mjs +83 -48
  58. package/dist/esm/phonetic/Soundex.mjs +1 -2
  59. package/dist/esm/root.mjs +5 -4
  60. package/dist/esm/utils/DeepMerge.mjs +109 -95
  61. package/dist/esm/utils/DiffChecker.mjs +1 -2
  62. package/dist/esm/utils/Errors.mjs +106 -0
  63. package/dist/esm/utils/Filter.mjs +97 -37
  64. package/dist/esm/utils/HashTable.mjs +44 -30
  65. package/dist/esm/utils/Normalizer.mjs +84 -35
  66. package/dist/esm/utils/OptionsValidator.mjs +210 -0
  67. package/dist/esm/utils/Pool.mjs +53 -19
  68. package/dist/esm/utils/Profiler.mjs +41 -28
  69. package/dist/esm/utils/Registry.mjs +48 -24
  70. package/dist/esm/utils/StructuredData.mjs +95 -57
  71. package/dist/esm/utils/TextAnalyzer.mjs +1 -2
  72. package/dist/types/CmpStr.d.ts +25 -14
  73. package/dist/types/CmpStrAsync.d.ts +4 -0
  74. package/dist/types/index.d.ts +3 -2
  75. package/dist/types/metric/Metric.d.ts +15 -14
  76. package/dist/types/phonetic/Phonetic.d.ts +7 -4
  77. package/dist/types/root.d.ts +4 -2
  78. package/dist/types/utils/DeepMerge.d.ts +80 -58
  79. package/dist/types/utils/Errors.d.ts +154 -0
  80. package/dist/types/utils/Filter.d.ts +8 -1
  81. package/dist/types/utils/HashTable.d.ts +12 -11
  82. package/dist/types/utils/Normalizer.d.ts +5 -1
  83. package/dist/types/utils/OptionsValidator.d.ts +193 -0
  84. package/dist/types/utils/Pool.d.ts +2 -0
  85. package/dist/types/utils/Profiler.d.ts +9 -28
  86. package/dist/types/utils/Registry.d.ts +3 -3
  87. package/dist/types/utils/StructuredData.d.ts +6 -1
  88. package/dist/types/utils/Types.d.ts +39 -1
  89. package/package.json +20 -11
  90. package/dist/CmpStr.esm.js.map +0 -1
  91. package/dist/CmpStr.esm.min.js.map +0 -1
  92. package/dist/CmpStr.umd.js.map +0 -1
  93. package/dist/CmpStr.umd.min.js.map +0 -1
  94. package/dist/cjs/CmpStr.cjs.map +0 -1
  95. package/dist/cjs/CmpStrAsync.cjs.map +0 -1
  96. package/dist/cjs/index.cjs.map +0 -1
  97. package/dist/cjs/metric/Cosine.cjs.map +0 -1
  98. package/dist/cjs/metric/DamerauLevenshtein.cjs.map +0 -1
  99. package/dist/cjs/metric/DiceSorensen.cjs.map +0 -1
  100. package/dist/cjs/metric/Hamming.cjs.map +0 -1
  101. package/dist/cjs/metric/Jaccard.cjs.map +0 -1
  102. package/dist/cjs/metric/JaroWinkler.cjs.map +0 -1
  103. package/dist/cjs/metric/LCS.cjs.map +0 -1
  104. package/dist/cjs/metric/Levenshtein.cjs.map +0 -1
  105. package/dist/cjs/metric/Metric.cjs.map +0 -1
  106. package/dist/cjs/metric/NeedlemanWunsch.cjs.map +0 -1
  107. package/dist/cjs/metric/QGram.cjs.map +0 -1
  108. package/dist/cjs/metric/SmithWaterman.cjs.map +0 -1
  109. package/dist/cjs/phonetic/Caverphone.cjs.map +0 -1
  110. package/dist/cjs/phonetic/Cologne.cjs.map +0 -1
  111. package/dist/cjs/phonetic/Metaphone.cjs.map +0 -1
  112. package/dist/cjs/phonetic/Phonetic.cjs.map +0 -1
  113. package/dist/cjs/phonetic/Soundex.cjs.map +0 -1
  114. package/dist/cjs/root.cjs.map +0 -1
  115. package/dist/cjs/utils/DeepMerge.cjs.map +0 -1
  116. package/dist/cjs/utils/DiffChecker.cjs.map +0 -1
  117. package/dist/cjs/utils/Filter.cjs.map +0 -1
  118. package/dist/cjs/utils/HashTable.cjs.map +0 -1
  119. package/dist/cjs/utils/Normalizer.cjs.map +0 -1
  120. package/dist/cjs/utils/Pool.cjs.map +0 -1
  121. package/dist/cjs/utils/Profiler.cjs.map +0 -1
  122. package/dist/cjs/utils/Registry.cjs.map +0 -1
  123. package/dist/cjs/utils/StructuredData.cjs.map +0 -1
  124. package/dist/cjs/utils/TextAnalyzer.cjs.map +0 -1
  125. package/dist/esm/CmpStr.mjs.map +0 -1
  126. package/dist/esm/CmpStrAsync.mjs.map +0 -1
  127. package/dist/esm/index.mjs.map +0 -1
  128. package/dist/esm/metric/Cosine.mjs.map +0 -1
  129. package/dist/esm/metric/DamerauLevenshtein.mjs.map +0 -1
  130. package/dist/esm/metric/DiceSorensen.mjs.map +0 -1
  131. package/dist/esm/metric/Hamming.mjs.map +0 -1
  132. package/dist/esm/metric/Jaccard.mjs.map +0 -1
  133. package/dist/esm/metric/JaroWinkler.mjs.map +0 -1
  134. package/dist/esm/metric/LCS.mjs.map +0 -1
  135. package/dist/esm/metric/Levenshtein.mjs.map +0 -1
  136. package/dist/esm/metric/Metric.mjs.map +0 -1
  137. package/dist/esm/metric/NeedlemanWunsch.mjs.map +0 -1
  138. package/dist/esm/metric/QGram.mjs.map +0 -1
  139. package/dist/esm/metric/SmithWaterman.mjs.map +0 -1
  140. package/dist/esm/phonetic/Caverphone.mjs.map +0 -1
  141. package/dist/esm/phonetic/Cologne.mjs.map +0 -1
  142. package/dist/esm/phonetic/Metaphone.mjs.map +0 -1
  143. package/dist/esm/phonetic/Phonetic.mjs.map +0 -1
  144. package/dist/esm/phonetic/Soundex.mjs.map +0 -1
  145. package/dist/esm/root.mjs.map +0 -1
  146. package/dist/esm/utils/DeepMerge.mjs.map +0 -1
  147. package/dist/esm/utils/DiffChecker.mjs.map +0 -1
  148. package/dist/esm/utils/Filter.mjs.map +0 -1
  149. package/dist/esm/utils/HashTable.mjs.map +0 -1
  150. package/dist/esm/utils/Normalizer.mjs.map +0 -1
  151. package/dist/esm/utils/Pool.mjs.map +0 -1
  152. package/dist/esm/utils/Profiler.mjs.map +0 -1
  153. package/dist/esm/utils/Registry.mjs.map +0 -1
  154. package/dist/esm/utils/StructuredData.mjs.map +0 -1
  155. package/dist/esm/utils/TextAnalyzer.mjs.map +0 -1
@@ -1,5 +1,5 @@
1
1
  /**
2
- * CmpStr v3.2.1 build-3439ccb-260130
2
+ * CmpStr v3.3.0 build-3699f85-260318
3
3
  * This is a lightweight, fast and well performing library for calculating string similarity.
4
4
  * (c) 2023-2026 Paul Köhler @komed3 / MIT License
5
5
  * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
@@ -15,114 +15,227 @@
15
15
  })(this, function (exports) {
16
16
  'use strict';
17
17
 
18
- const BRACKET_PATTERN = /\[(\d+)]/g;
19
- const PATH_CACHE = new Map();
20
- function parse(p) {
21
- let cached = PATH_CACHE.get(p);
22
- if (cached) return cached;
23
- const parsed = p
24
- .replace(BRACKET_PATTERN, '.$1')
25
- .split('.')
26
- .map((s) => {
27
- const n = Number(s);
28
- return Number.isInteger(n) && String(n) === s ? n : s;
29
- });
30
- PATH_CACHE.set(p, parsed);
31
- return parsed;
18
+ class CmpStrError extends Error {
19
+ code;
20
+ meta;
21
+ when = new Date().toISOString();
22
+ constructor(code, message, meta, cause) {
23
+ super(message, cause !== undefined ? { cause } : undefined);
24
+ this.name = this.constructor.name;
25
+ this.code = code;
26
+ this.meta = meta;
27
+ if (typeof Error.captureStackTrace === 'function') {
28
+ Error.captureStackTrace(this, this.constructor);
29
+ }
30
+ }
31
+ format(stack = false) {
32
+ const parts = [`${this.name} [${this.code}]`, this.message];
33
+ if (this.meta)
34
+ for (const _ in this.meta) {
35
+ parts.push(JSON.stringify(this.meta));
36
+ break;
37
+ }
38
+ return (
39
+ parts.join(' - ') +
40
+ (stack && this.stack ? `\nStack Trace:\n${this.stack}` : '')
41
+ );
42
+ }
43
+ toString() {
44
+ return this.format(false);
45
+ }
46
+ toJSON(stack = false) {
47
+ return {
48
+ name: this.name,
49
+ code: this.code,
50
+ message: this.message,
51
+ meta: this.meta,
52
+ when: this.when,
53
+ cause:
54
+ this.cause instanceof Error
55
+ ? {
56
+ name: this.cause.name,
57
+ message: this.cause.message,
58
+ stack: stack && this.cause.stack
59
+ }
60
+ : this.cause
61
+ };
62
+ }
32
63
  }
33
- function get(t, path, fb) {
34
- let o = t;
35
- for (const k of parse(path)) {
36
- if (o == null || !(k in o)) return fb;
37
- o = o[k];
64
+ class CmpStrValidationError extends CmpStrError {
65
+ constructor(message, meta, cause) {
66
+ super('E_VALIDATION', message, meta, cause);
38
67
  }
39
- return o;
40
68
  }
41
- function has(t, path) {
42
- let o = t;
43
- for (const k of parse(path)) {
44
- if (o == null || !(k in o)) return false;
45
- o = o[k];
69
+ class CmpStrNotFoundError extends CmpStrError {
70
+ constructor(message, meta, cause) {
71
+ super('E_NOT_FOUND', message, meta, cause);
46
72
  }
47
- return true;
48
73
  }
49
- function set(t, path, value) {
50
- if (path === '') return value;
51
- const keys = parse(path);
52
- if (t !== undefined && (typeof t !== 'object' || t === null))
53
- throw Error(`Cannot set property <${keys[0]}> of <${JSON.stringify(t)}>`);
54
- const root = t ?? (typeof keys[0] === 'number' ? [] : Object.create(null));
55
- let cur = root;
56
- for (let i = 0; i < keys.length - 1; i++) {
57
- const k = keys[i];
58
- let n = cur[k];
59
- if (n != null && typeof n !== 'object')
60
- throw Error(
61
- `Cannot set property <${keys[i + 1]}> of <${JSON.stringify(n)}>`
62
- );
63
- if (n == null)
64
- n = cur[k] = typeof keys[i + 1] === 'number' ? [] : Object.create(null);
65
- cur = n;
74
+ class CmpStrUsageError extends CmpStrError {
75
+ constructor(message, meta, cause) {
76
+ super('E_USAGE', message, meta, cause);
66
77
  }
67
- cur[keys[keys.length - 1]] = value;
68
- return root;
69
78
  }
70
- function merge(
71
- t = Object.create(null),
72
- o = Object.create(null),
73
- mergeUndefined = false
74
- ) {
75
- const target = t ?? Object.create(null);
76
- Object.keys(o).forEach((k) => {
77
- const val = o[k];
78
- if (!mergeUndefined && val === undefined) return;
79
- if (k === '__proto__' || k === 'constructor') return;
80
- if (val !== null && typeof val === 'object' && !Array.isArray(val)) {
81
- const existing = target[k];
82
- target[k] = merge(
83
- existing !== null &&
84
- typeof existing === 'object' &&
85
- !Array.isArray(existing)
86
- ? existing
87
- : Object.create(null),
88
- val,
89
- mergeUndefined
90
- );
91
- } else target[k] = val;
92
- });
93
- return target;
79
+ class CmpStrInternalError extends CmpStrError {
80
+ constructor(message, meta, cause) {
81
+ super('E_INTERNAL', message, meta, cause);
82
+ }
94
83
  }
95
- function rmv(t, path, preserveEmpty = false) {
96
- const keys = parse(path);
97
- const remove = (obj, i = 0) => {
98
- const key = keys[i];
99
- if (!obj || typeof obj !== 'object') return false;
100
- if (i === keys.length - 1) return delete obj[key];
101
- if (!remove(obj[key], i + 1)) return false;
102
- if (!preserveEmpty) {
103
- const val = obj[key];
104
- if (
105
- typeof val === 'object' &&
106
- ((Array.isArray(val) && val.every((v) => v == null)) ||
107
- (!Array.isArray(val) && Object.keys(val).length === 0))
108
- )
109
- delete obj[key];
84
+ class ErrorUtil {
85
+ static assert(condition, message, meta) {
86
+ if (!condition) throw new CmpStrUsageError(message, meta);
87
+ }
88
+ static rethrow(err, message, meta) {
89
+ if (err instanceof CmpStrError) throw err;
90
+ throw new CmpStrInternalError(message, meta, err);
91
+ }
92
+ static format(err) {
93
+ if (err instanceof CmpStrError) return err.toString();
94
+ if (err instanceof Error) return `${err.name}: ${err.message}`;
95
+ return String(err);
96
+ }
97
+ static wrap(fn, message, meta) {
98
+ try {
99
+ return fn();
100
+ } catch (err) {
101
+ if (err instanceof CmpStrError) throw err;
102
+ throw new CmpStrInternalError(message, meta, err);
110
103
  }
111
- return true;
112
- };
113
- remove(t);
114
- return t;
104
+ }
105
+ static async wrapAsync(fn, message, meta) {
106
+ try {
107
+ return await fn();
108
+ } catch (err) {
109
+ if (err instanceof CmpStrError) throw err;
110
+ throw new CmpStrInternalError(message, meta, err);
111
+ }
112
+ }
115
113
  }
116
114
 
117
- var DeepMerge = /*#__PURE__*/ Object.freeze({
115
+ var Errors = /*#__PURE__*/ Object.freeze({
118
116
  __proto__: null,
119
- get: get,
120
- has: has,
121
- merge: merge,
122
- rmv: rmv,
123
- set: set
117
+ CmpStrError: CmpStrError,
118
+ CmpStrInternalError: CmpStrInternalError,
119
+ CmpStrNotFoundError: CmpStrNotFoundError,
120
+ CmpStrUsageError: CmpStrUsageError,
121
+ CmpStrValidationError: CmpStrValidationError,
122
+ ErrorUtil: ErrorUtil
124
123
  });
125
124
 
125
+ class DeepMerge {
126
+ static BRACKET_PATTERN = /\[(\d+)]/g;
127
+ static PATH_CACHE = new Map();
128
+ static walk(obj, keys) {
129
+ let o = obj;
130
+ for (let i = 0; i < keys.length; i++) {
131
+ const k = keys[i];
132
+ if (o == null || !(k in o)) return { exists: false };
133
+ o = o[k];
134
+ }
135
+ return { exists: true, value: o };
136
+ }
137
+ static parse(p) {
138
+ const cached = DeepMerge.PATH_CACHE.get(p);
139
+ if (cached) return cached;
140
+ const parsed = p
141
+ .replace(DeepMerge.BRACKET_PATTERN, '.$1')
142
+ .split('.')
143
+ .map((s) => {
144
+ const n = Number(s);
145
+ return Number.isInteger(n) && String(n) === s ? n : s;
146
+ });
147
+ if (DeepMerge.PATH_CACHE.size > 2000) DeepMerge.PATH_CACHE.clear();
148
+ DeepMerge.PATH_CACHE.set(p, parsed);
149
+ return parsed;
150
+ }
151
+ static has(t, path) {
152
+ return DeepMerge.walk(t, DeepMerge.parse(path)).exists;
153
+ }
154
+ static get(t, path, fb) {
155
+ const r = DeepMerge.walk(t, DeepMerge.parse(path));
156
+ return r.exists ? r.value : fb;
157
+ }
158
+ static set(t, path, value) {
159
+ if (path === '') return value;
160
+ const keys = DeepMerge.parse(path);
161
+ ErrorUtil.assert(
162
+ t === undefined || (typeof t === 'object' && t !== null),
163
+ `Cannot set property <${keys[0]}> of <${JSON.stringify(t)}>`,
164
+ { path: keys[0], target: t }
165
+ );
166
+ const root =
167
+ t ?? (typeof keys[0] === 'number' ? [] : Object.create(null));
168
+ let cur = root;
169
+ for (let i = 0; i < keys.length - 1; i++) {
170
+ const k = keys[i];
171
+ let n = cur[k];
172
+ ErrorUtil.assert(
173
+ n == null || typeof n === 'object',
174
+ `Cannot set property <${keys[i + 1]}> of <${JSON.stringify(n)}>`,
175
+ { path: keys.slice(0, i + 2), value: n }
176
+ );
177
+ if (n == null)
178
+ n = cur[k] =
179
+ typeof keys[i + 1] === 'number' ? [] : Object.create(null);
180
+ cur = n;
181
+ }
182
+ cur[keys[keys.length - 1]] = value;
183
+ return root;
184
+ }
185
+ static rmv(t, path, preserveEmpty = false) {
186
+ const keys = DeepMerge.parse(path);
187
+ const remove = (obj, i = 0) => {
188
+ const key = keys[i];
189
+ if (!obj || typeof obj !== 'object') return false;
190
+ if (i === keys.length - 1) return delete obj[key];
191
+ if (!remove(obj[key], i + 1)) return false;
192
+ if (!preserveEmpty) {
193
+ const val = obj[key];
194
+ let empty = true;
195
+ if (typeof val === 'object') {
196
+ if (Array.isArray(val))
197
+ for (let i = 0; i < val.length; i++) {
198
+ if (val[i] != null) {
199
+ empty = false;
200
+ break;
201
+ }
202
+ }
203
+ else empty = false;
204
+ }
205
+ if (empty) delete obj[key];
206
+ }
207
+ return true;
208
+ };
209
+ remove(t);
210
+ return t;
211
+ }
212
+ static merge(
213
+ t = Object.create(null),
214
+ o = Object.create(null),
215
+ mergeUndefined = false
216
+ ) {
217
+ const target = t ?? Object.create(null);
218
+ for (const k in o) {
219
+ const val = o[k];
220
+ if (!mergeUndefined && val === undefined) continue;
221
+ if (k === '__proto__' || k === 'constructor') continue;
222
+ if (val !== null && typeof val === 'object' && !Array.isArray(val)) {
223
+ const existing = target[k];
224
+ target[k] = DeepMerge.merge(
225
+ existing !== null &&
226
+ typeof existing === 'object' &&
227
+ !Array.isArray(existing)
228
+ ? existing
229
+ : Object.create(null),
230
+ val,
231
+ mergeUndefined
232
+ );
233
+ } else target[k] = val;
234
+ }
235
+ return target;
236
+ }
237
+ }
238
+
126
239
  class DiffChecker {
127
240
  a;
128
241
  b;
@@ -422,48 +535,89 @@
422
535
  }
423
536
 
424
537
  class Filter {
538
+ static IDENTITY = (s) => s;
425
539
  static filters = new Map();
426
540
  static pipeline = new Map();
427
- static getPipeline(hook) {
428
- const cached = Filter.pipeline.get(hook);
429
- if (cached) return cached;
430
- const filter = Filter.filters.get(hook);
431
- if (!filter) return (s) => s;
432
- const pipeline = Array.from(filter.values())
433
- .filter((f) => f.active)
434
- .sort((a, b) => a.priority - b.priority)
435
- .map((f) => f.fn);
436
- const fn = (input) => pipeline.reduce((v, f) => f(v), input);
437
- Filter.pipeline.set(hook, fn);
438
- return fn;
541
+ static getPipeline(hook, force = false) {
542
+ return ErrorUtil.wrap(
543
+ () => {
544
+ if (!force) {
545
+ const cached = Filter.pipeline.get(hook);
546
+ if (cached) return cached;
547
+ }
548
+ const filter = Filter.filters.get(hook);
549
+ if (!filter) {
550
+ Filter.pipeline.set(hook, Filter.IDENTITY);
551
+ return Filter.IDENTITY;
552
+ }
553
+ const pipeline = [];
554
+ for (const f of filter.values()) if (f.active) pipeline.push(f);
555
+ pipeline.sort((a, b) => a.priority - b.priority);
556
+ const fn =
557
+ pipeline.length === 0
558
+ ? Filter.IDENTITY
559
+ : (input) => {
560
+ let v = input;
561
+ for (let i = 0; i < pipeline.length; i++)
562
+ v = pipeline[i].fn(v);
563
+ return v;
564
+ };
565
+ Filter.pipeline.set(hook, fn);
566
+ return fn;
567
+ },
568
+ `Error compiling filter pipeline for hook <${hook}>`,
569
+ { hook }
570
+ );
439
571
  }
440
572
  static has(hook, id) {
441
573
  return !!Filter.filters.get(hook)?.has(id);
442
574
  }
443
575
  static add(hook, id, fn, opt = {}) {
444
- const { priority = 10, active = true, overrideable = true } = opt;
445
- const filter = Filter.filters.get(hook) ?? new Map();
446
- const index = filter.get(id);
447
- if (index && !index.overrideable) return false;
448
- filter.set(id, { id, fn, priority, active, overrideable });
449
- Filter.filters.set(hook, filter);
450
- Filter.pipeline.delete(hook);
451
- return true;
576
+ return ErrorUtil.wrap(
577
+ () => {
578
+ const { priority = 10, active = true, overrideable = true } = opt;
579
+ const filter = Filter.filters.get(hook) ?? new Map();
580
+ const index = filter.get(id);
581
+ if (index && !index.overrideable) return false;
582
+ if (
583
+ index &&
584
+ index.fn === fn &&
585
+ index.priority === priority &&
586
+ index.active === active
587
+ )
588
+ return true;
589
+ filter.set(id, { id, fn, priority, active, overrideable });
590
+ Filter.filters.set(hook, filter);
591
+ Filter.getPipeline(hook, true);
592
+ return true;
593
+ },
594
+ `Error adding filter <${id}> to hook <${hook}>`,
595
+ { hook, id, opt }
596
+ );
452
597
  }
453
598
  static remove(hook, id) {
454
- Filter.pipeline.delete(hook);
455
599
  const filter = Filter.filters.get(hook);
456
- return filter ? filter.delete(id) : false;
600
+ if (!filter || !filter.delete(id)) return false;
601
+ Filter.getPipeline(hook, true);
602
+ return true;
457
603
  }
458
604
  static pause(hook, id) {
459
- Filter.pipeline.delete(hook);
460
- const f = Filter.filters.get(hook)?.get(id);
461
- return !!(f && ((f.active = false), true));
605
+ const filter = Filter.filters.get(hook);
606
+ if (!filter) return false;
607
+ const f = filter.get(id);
608
+ if (!f || !f.active) return false;
609
+ f.active = false;
610
+ Filter.getPipeline(hook, true);
611
+ return true;
462
612
  }
463
613
  static resume(hook, id) {
464
- Filter.pipeline.delete(hook);
465
- const f = Filter.filters.get(hook)?.get(id);
466
- return !!(f && ((f.active = true), true));
614
+ const filter = Filter.filters.get(hook);
615
+ if (!filter) return false;
616
+ const f = filter.get(id);
617
+ if (!f || f.active) return false;
618
+ f.active = true;
619
+ Filter.getPipeline(hook, true);
620
+ return true;
467
621
  }
468
622
  static list(hook, active = false) {
469
623
  const filter = Filter.filters.get(hook);
@@ -473,17 +627,36 @@
473
627
  return out;
474
628
  }
475
629
  static apply(hook, input) {
476
- const fn = Filter.getPipeline(hook);
477
- return Array.isArray(input) ? input.map(fn) : fn(input);
630
+ return ErrorUtil.wrap(
631
+ () => {
632
+ const fn = Filter.getPipeline(hook);
633
+ if (typeof input === 'string') return fn(input);
634
+ const arr = input;
635
+ const out = new Array(arr.length);
636
+ for (let i = 0; i < arr.length; i++) out[i] = fn(arr[i]);
637
+ return out;
638
+ },
639
+ `Error applying filters for hook <${hook}>`,
640
+ { hook, input }
641
+ );
478
642
  }
479
643
  static async applyAsync(hook, input) {
480
- const fn = Filter.getPipeline(hook);
481
- return Array.isArray(input)
482
- ? Promise.all(input.map(fn))
483
- : Promise.resolve(fn(input));
644
+ return ErrorUtil.wrapAsync(
645
+ async () => {
646
+ const fn = Filter.getPipeline(hook);
647
+ if (typeof input === 'string') return Promise.resolve(fn(input));
648
+ const arr = input;
649
+ const out = new Array(arr.length);
650
+ for (let i = 0; i < arr.length; i++)
651
+ out[i] = Promise.resolve(fn(arr[i]));
652
+ return Promise.all(out);
653
+ },
654
+ `Error applying filters for hook <${hook}>`,
655
+ { hook, input }
656
+ );
484
657
  }
485
658
  static clear(hook) {
486
- Filter.pipeline.clear();
659
+ Filter.clearPipeline();
487
660
  if (hook) Filter.filters.delete(hook);
488
661
  else Filter.filters.clear();
489
662
  }
@@ -497,25 +670,21 @@
497
670
  static HASH_OFFSET = 0x811c9dc5;
498
671
  static fastFNV1a(str) {
499
672
  const len = str.length;
673
+ const limit = len & -4;
500
674
  let hash = this.HASH_OFFSET;
501
- const chunks = Math.floor(len / 4);
502
- for (let i = 0; i < chunks; i++) {
503
- const pos = i * 4;
675
+ let i = 0;
676
+ for (; i < limit; i += 4) {
504
677
  const chunk =
505
- str.charCodeAt(pos) |
506
- (str.charCodeAt(pos + 1) << 8) |
507
- (str.charCodeAt(pos + 2) << 16) |
508
- (str.charCodeAt(pos + 3) << 24);
678
+ str.charCodeAt(i) |
679
+ (str.charCodeAt(i + 1) << 8) |
680
+ (str.charCodeAt(i + 2) << 16) |
681
+ (str.charCodeAt(i + 3) << 24);
509
682
  hash ^= chunk;
510
683
  hash = Math.imul(hash, this.FNV_PRIME);
511
684
  }
512
- const remaining = len % 4;
513
- if (remaining > 0) {
514
- const pos = chunks * 4;
515
- for (let i = 0; i < remaining; i++) {
516
- hash ^= str.charCodeAt(pos + i);
517
- hash = Math.imul(hash, this.FNV_PRIME);
518
- }
685
+ for (; i < len; i++) {
686
+ hash ^= str.charCodeAt(i);
687
+ hash = Math.imul(hash, this.FNV_PRIME);
519
688
  }
520
689
  hash ^= hash >>> 16;
521
690
  hash *= 0x85ebca6b;
@@ -526,32 +695,51 @@
526
695
  }
527
696
  }
528
697
  class HashTable {
529
- LRU;
698
+ FIFO;
699
+ maxSize;
530
700
  static MAX_LEN = 2048;
531
- static TABLE_SIZE = 10_000;
532
701
  table = new Map();
533
- constructor(LRU = true) {
534
- this.LRU = LRU;
702
+ constructor(FIFO = true, maxSize = 10000) {
703
+ this.FIFO = FIFO;
704
+ this.maxSize = maxSize;
535
705
  }
536
706
  key(label, strs, sorted = false) {
537
- for (const str of strs) if (str.length > HashTable.MAX_LEN) return false;
538
- const hashes = strs.map((s) => Hasher.fastFNV1a(s));
539
- return [label, ...(sorted ? hashes.sort() : hashes)].join('-');
707
+ const n = strs.length;
708
+ const hashes = new Array(n);
709
+ for (let i = 0; i < n; i++) {
710
+ const s = strs[i];
711
+ if (s.length > HashTable.MAX_LEN) return false;
712
+ hashes[i] = Hasher.fastFNV1a(s);
713
+ }
714
+ if (sorted) hashes.sort((a, b) => a - b);
715
+ let key = label;
716
+ for (let i = 0; i < hashes.length; i++) key += '-' + hashes[i];
717
+ return key;
718
+ }
719
+ has(key) {
720
+ return this.table.has(key);
721
+ }
722
+ get(key) {
723
+ return this.table.get(key);
540
724
  }
541
- has = (key) => this.table.has(key);
542
- get = (key) => this.table.get(key);
543
725
  set(key, entry, update = true) {
544
726
  if (!update && this.table.has(key)) return false;
545
- while (!this.table.has(key) && this.table.size >= HashTable.TABLE_SIZE) {
546
- if (!this.LRU) return false;
727
+ if (!this.table.has(key) && this.table.size >= this.maxSize) {
728
+ if (!this.FIFO) return false;
547
729
  this.table.delete(this.table.keys().next().value);
548
730
  }
549
731
  this.table.set(key, entry);
550
732
  return true;
551
733
  }
552
- delete = (key) => this.table.delete(key);
553
- clear = () => this.table.clear();
554
- size = () => this.table.size;
734
+ delete(key) {
735
+ return this.table.delete(key);
736
+ }
737
+ clear() {
738
+ this.table.clear();
739
+ }
740
+ size() {
741
+ return this.table.size;
742
+ }
555
743
  }
556
744
 
557
745
  class Normalizer {
@@ -568,42 +756,91 @@
568
756
  return Array.from(new Set(flags)).sort().join('');
569
757
  }
570
758
  static getPipeline(flags) {
571
- if (Normalizer.pipeline.has(flags)) return Normalizer.pipeline.get(flags);
572
- const { REGEX } = Normalizer;
573
- const steps = [
574
- ['d', (s) => s.normalize('NFD')],
575
- ['i', (s) => s.toLowerCase()],
576
- ['k', (s) => s.replace(REGEX.nonLetters, '')],
577
- ['n', (s) => s.replace(REGEX.nonNumbers, '')],
578
- ['r', (s) => s.replace(REGEX.doubleChars, '$1')],
579
- ['s', (s) => s.replace(REGEX.specialChars, '')],
580
- ['t', (s) => s.trim()],
581
- ['u', (s) => s.normalize('NFC')],
582
- ['w', (s) => s.replace(REGEX.whitespace, ' ')],
583
- ['x', (s) => s.normalize('NFKC')]
584
- ];
585
- const pipeline = steps
586
- .filter(([f]) => flags.includes(f))
587
- .map(([, fn]) => fn);
588
- const fn = (s) => pipeline.reduce((v, f) => f(v), s);
589
- Normalizer.pipeline.set(flags, fn);
590
- return fn;
591
- }
592
- static normalize(input, flags) {
593
- if (!flags || typeof flags !== 'string' || !input) return input;
594
- flags = this.canonicalFlags(flags);
595
- if (Array.isArray(input))
596
- return input.map((s) => Normalizer.normalize(s, flags));
597
- const key = Normalizer.cache.key(flags, [input]);
598
- if (key && Normalizer.cache.has(key)) return Normalizer.cache.get(key);
599
- const res = Normalizer.getPipeline(flags)(input);
600
- if (key) Normalizer.cache.set(key, res);
601
- return res;
759
+ return ErrorUtil.wrap(
760
+ () => {
761
+ const cached = Normalizer.pipeline.get(flags);
762
+ if (cached) return cached;
763
+ const { REGEX } = Normalizer;
764
+ const steps = [];
765
+ for (let i = 0; i < flags.length; i++) {
766
+ switch (flags[i]) {
767
+ case 'd':
768
+ steps.push((s) => s.normalize('NFD'));
769
+ break;
770
+ case 'i':
771
+ steps.push((s) => s.toLowerCase());
772
+ break;
773
+ case 'k':
774
+ steps.push((s) => s.replace(REGEX.nonLetters, ''));
775
+ break;
776
+ case 'n':
777
+ steps.push((s) => s.replace(REGEX.nonNumbers, ''));
778
+ break;
779
+ case 'r':
780
+ steps.push((s) => s.replace(REGEX.doubleChars, '$1'));
781
+ break;
782
+ case 's':
783
+ steps.push((s) => s.replace(REGEX.specialChars, ''));
784
+ break;
785
+ case 't':
786
+ steps.push((s) => s.trim());
787
+ break;
788
+ case 'u':
789
+ steps.push((s) => s.normalize('NFC'));
790
+ break;
791
+ case 'w':
792
+ steps.push((s) => s.replace(REGEX.whitespace, ' '));
793
+ break;
794
+ case 'x':
795
+ steps.push((s) => s.normalize('NFKC'));
796
+ break;
797
+ }
798
+ }
799
+ const fn = (input) => {
800
+ let v = input;
801
+ for (let i = 0; i < steps.length; i++) v = steps[i](v);
802
+ return v;
803
+ };
804
+ Normalizer.pipeline.set(flags, fn);
805
+ return fn;
806
+ },
807
+ `Failed to create normalization pipeline for flags: ${flags}`,
808
+ { flags }
809
+ );
810
+ }
811
+ static normalize(input, flags, normalizedFlags) {
812
+ return ErrorUtil.wrap(
813
+ () => {
814
+ if (!flags || typeof flags !== 'string' || !input) return input;
815
+ flags = normalizedFlags ?? this.canonicalFlags(flags);
816
+ const pipeline = Normalizer.getPipeline(flags);
817
+ const normalizeOne = (s) => {
818
+ const key = Normalizer.cache.key(flags, [s]);
819
+ if (key && Normalizer.cache.has(key))
820
+ return Normalizer.cache.get(key);
821
+ const res = pipeline(s);
822
+ if (key) Normalizer.cache.set(key, res);
823
+ return res;
824
+ };
825
+ return Array.isArray(input)
826
+ ? input.map(normalizeOne)
827
+ : normalizeOne(input);
828
+ },
829
+ `Failed to normalize input with flags: ${flags}`,
830
+ { input, flags }
831
+ );
602
832
  }
603
833
  static async normalizeAsync(input, flags) {
604
- return await (Array.isArray(input)
605
- ? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
606
- : Promise.resolve(Normalizer.normalize(input, flags)));
834
+ return await ErrorUtil.wrapAsync(
835
+ async () => {
836
+ if (!flags || typeof flags !== 'string' || !input) return input;
837
+ return await (Array.isArray(input)
838
+ ? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
839
+ : Promise.resolve(Normalizer.normalize(input, flags)));
840
+ },
841
+ `Failed to asynchronously normalize input with flags: ${flags}`,
842
+ { input, flags }
843
+ );
607
844
  }
608
845
  static clear() {
609
846
  Normalizer.pipeline.clear();
@@ -611,17 +848,144 @@
611
848
  }
612
849
  }
613
850
 
851
+ class RingPool {
852
+ maxSize;
853
+ buffers = [];
854
+ pointer = 0;
855
+ constructor(maxSize) {
856
+ this.maxSize = maxSize;
857
+ }
858
+ acquire(minSize, allowOversize) {
859
+ return ErrorUtil.wrap(
860
+ () => {
861
+ const buffers = this.buffers;
862
+ const len = buffers.length;
863
+ for (let i = 0; i < len; i++) {
864
+ const idx = (this.pointer + i) % len;
865
+ const item = buffers[idx];
866
+ const size = item.size;
867
+ if (size >= minSize && (allowOversize || size === minSize)) {
868
+ this.pointer = (idx + 1) % len;
869
+ return item;
870
+ }
871
+ }
872
+ return null;
873
+ },
874
+ `Failed to acquire buffer of size >= ${minSize} from pool`,
875
+ { minSize, allowOversize }
876
+ );
877
+ }
878
+ release(item) {
879
+ ErrorUtil.wrap(
880
+ () => {
881
+ const buffers = this.buffers;
882
+ if (buffers.length < this.maxSize) {
883
+ buffers.push(item);
884
+ return;
885
+ }
886
+ buffers[this.pointer] = item;
887
+ this.pointer = (this.pointer + 1) % this.maxSize;
888
+ },
889
+ `Failed to release buffer back to pool`,
890
+ { item }
891
+ );
892
+ }
893
+ clear() {
894
+ this.buffers = [];
895
+ this.pointer = 0;
896
+ }
897
+ }
898
+ class Pool {
899
+ static CONFIG = {
900
+ int32: {
901
+ type: 'int32',
902
+ maxSize: 64,
903
+ maxItemSize: 2048,
904
+ allowOversize: true
905
+ },
906
+ 'arr[]': {
907
+ type: 'arr[]',
908
+ maxSize: 4,
909
+ maxItemSize: 1024,
910
+ allowOversize: false
911
+ },
912
+ 'number[]': {
913
+ type: 'number[]',
914
+ maxSize: 16,
915
+ maxItemSize: 1024,
916
+ allowOversize: false
917
+ },
918
+ 'string[]': {
919
+ type: 'string[]',
920
+ maxSize: 2,
921
+ maxItemSize: 1024,
922
+ allowOversize: false
923
+ },
924
+ set: { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
925
+ map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
926
+ };
927
+ static POOLS = {
928
+ int32: new RingPool(64),
929
+ 'arr[]': new RingPool(4),
930
+ 'number[]': new RingPool(16),
931
+ 'string[]': new RingPool(2),
932
+ set: new RingPool(8),
933
+ map: new RingPool(8)
934
+ };
935
+ static allocate(type, size) {
936
+ switch (type) {
937
+ case 'int32':
938
+ return new Int32Array(size);
939
+ case 'arr[]':
940
+ return new Array(size);
941
+ case 'number[]':
942
+ return new Float64Array(size);
943
+ case 'string[]':
944
+ return new Array(size);
945
+ case 'set':
946
+ return new Set();
947
+ case 'map':
948
+ return new Map();
949
+ }
950
+ }
951
+ static acquire(type, size) {
952
+ const CONFIG = this.CONFIG[type];
953
+ if (!CONFIG)
954
+ throw new CmpStrUsageError(`Unsupported pool type <${type}>`, { type });
955
+ if (size > CONFIG.maxItemSize) return this.allocate(type, size);
956
+ const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
957
+ if (item)
958
+ return type === 'int32' ? item.buffer.subarray(0, size) : item.buffer;
959
+ return this.allocate(type, size);
960
+ }
961
+ static acquireMany(type, sizes) {
962
+ const out = new Array(sizes.length);
963
+ for (let i = 0; i < sizes.length; i++)
964
+ out[i] = this.acquire(type, sizes[i]);
965
+ return out;
966
+ }
967
+ static release(type, buffer, size) {
968
+ const CONFIG = this.CONFIG[type];
969
+ if (!CONFIG)
970
+ throw new CmpStrUsageError(`Unsupported pool type <${type}>`, { type });
971
+ if (size <= CONFIG.maxItemSize)
972
+ this.POOLS[type].release({ buffer, size });
973
+ }
974
+ }
975
+
614
976
  class Profiler {
615
977
  active;
616
978
  static ENV;
617
979
  static instance;
618
980
  nowFn;
619
981
  memFn;
620
- store = new Set();
982
+ store = [];
983
+ last;
621
984
  totalTime = 0;
622
985
  totalMem = 0;
623
986
  static detectEnv() {
624
- if (typeof process !== 'undefined') Profiler.ENV = 'nodejs';
987
+ if (typeof process !== 'undefined' && process.versions?.node)
988
+ Profiler.ENV = 'nodejs';
625
989
  else if (typeof performance !== 'undefined') Profiler.ENV = 'browser';
626
990
  else Profiler.ENV = 'unknown';
627
991
  }
@@ -633,7 +997,7 @@
633
997
  this.active = active;
634
998
  switch (Profiler.ENV) {
635
999
  case 'nodejs':
636
- this.nowFn = () => Number(process.hrtime.bigint()) / 1e6;
1000
+ this.nowFn = () => Number(process.hrtime.bigint()) * 1e-6;
637
1001
  this.memFn = () => process.memoryUsage().heapUsed;
638
1002
  break;
639
1003
  case 'browser':
@@ -646,40 +1010,52 @@
646
1010
  break;
647
1011
  }
648
1012
  }
649
- now = () => this.nowFn();
650
- mem = () => this.memFn();
651
- profile(fn, meta) {
652
- const startTime = this.now(),
653
- startMem = this.mem();
654
- const res = fn();
655
- const deltaTime = this.now() - startTime,
656
- deltaMem = this.mem() - startMem;
657
- this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
658
- ((this.totalTime += deltaTime), (this.totalMem += deltaMem));
659
- return res;
1013
+ storeRes(entry) {
1014
+ this.store.push((this.last = entry));
1015
+ this.totalTime += entry.time;
1016
+ this.totalMem += entry.mem;
660
1017
  }
661
- enable = () => {
1018
+ enable() {
662
1019
  this.active = true;
663
- };
664
- disable = () => {
1020
+ }
1021
+ disable() {
665
1022
  this.active = false;
666
- };
1023
+ }
667
1024
  clear() {
668
- this.store.clear();
1025
+ this.store.length = 0;
1026
+ this.last = undefined;
669
1027
  this.totalTime = 0;
670
1028
  this.totalMem = 0;
671
1029
  }
672
1030
  run(fn, meta = {}) {
673
- return this.active ? this.profile(fn, meta) : fn();
1031
+ if (!this.active) return fn();
1032
+ const startTime = this.nowFn(),
1033
+ startMem = this.memFn();
1034
+ const res = fn();
1035
+ const deltaTime = this.nowFn() - startTime,
1036
+ deltaMem = this.memFn() - startMem;
1037
+ this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
1038
+ return res;
674
1039
  }
675
1040
  async runAsync(fn, meta = {}) {
676
- return this.active
677
- ? this.profile(async () => await fn(), meta)
678
- : await fn();
1041
+ if (!this.active) return fn();
1042
+ const startTime = this.nowFn(),
1043
+ startMem = this.memFn();
1044
+ const res = await fn();
1045
+ const deltaTime = this.nowFn() - startTime,
1046
+ deltaMem = this.memFn() - startMem;
1047
+ this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
1048
+ return res;
1049
+ }
1050
+ getAll() {
1051
+ return [...this.store];
1052
+ }
1053
+ getLast() {
1054
+ return this.last;
1055
+ }
1056
+ getTotal() {
1057
+ return { time: this.totalTime, mem: this.totalMem };
679
1058
  }
680
- getAll = () => [...this.store];
681
- getLast = () => this.getAll().pop();
682
- getTotal = () => ({ time: this.totalTime, mem: this.totalMem });
683
1059
  services = Object.freeze({
684
1060
  enable: this.enable.bind(this),
685
1061
  disable: this.disable.bind(this),
@@ -693,19 +1069,34 @@
693
1069
  const registry = Object.create(null);
694
1070
  const factory = Object.create(null);
695
1071
  function Registry(reg, ctor) {
696
- if (reg in registry || reg in factory)
697
- throw new Error(
698
- `Registry <${reg}> already exists / overwriting is forbidden`
699
- );
1072
+ ErrorUtil.assert(
1073
+ !(reg in registry || reg in factory),
1074
+ `Registry <${reg}> already exists / overwriting is forbidden`,
1075
+ { registry: reg }
1076
+ );
700
1077
  const classes = Object.create(null);
701
1078
  const service = Object.freeze({
702
1079
  add(name, cls, update = false) {
703
- if (!(cls.prototype instanceof ctor))
704
- throw new TypeError(`Class must extend <${reg}>`);
705
- if (!update && name in classes)
706
- throw new Error(
707
- `Entry <${name}> already exists / use <update=true> to overwrite`
708
- );
1080
+ ErrorUtil.assert(
1081
+ typeof name === 'string' && name.length > 0,
1082
+ `Class name must be a non-empty string`,
1083
+ { registry: reg, name }
1084
+ );
1085
+ ErrorUtil.assert(
1086
+ typeof cls === 'function',
1087
+ `Class must be a constructor function`,
1088
+ { registry: reg, class: cls }
1089
+ );
1090
+ ErrorUtil.assert(
1091
+ cls.prototype instanceof ctor,
1092
+ `Class must extend <${reg}>`,
1093
+ { registry: reg, class: cls }
1094
+ );
1095
+ ErrorUtil.assert(
1096
+ update || !(name in classes),
1097
+ `Class <${name}> already exists / use <update=true> to overwrite`,
1098
+ { registry: reg, name }
1099
+ );
709
1100
  classes[name] = cls;
710
1101
  },
711
1102
  remove(name) {
@@ -718,8 +1109,16 @@
718
1109
  return Object.keys(classes);
719
1110
  },
720
1111
  get(name) {
721
- if (!(name in classes))
722
- throw new Error(`Class <${name}> not registered for <${reg}>`);
1112
+ ErrorUtil.assert(
1113
+ typeof name === 'string' && name.length > 0,
1114
+ `Class name must be a non-empty string`,
1115
+ { registry: reg, name }
1116
+ );
1117
+ ErrorUtil.assert(
1118
+ name in classes,
1119
+ `Class <${name}> not registered for <${reg}>`,
1120
+ { registry: reg, name }
1121
+ );
723
1122
  return classes[name];
724
1123
  }
725
1124
  });
@@ -729,764 +1128,356 @@
729
1128
  }
730
1129
  function resolveCls(reg, cls) {
731
1130
  if (!(reg in registry))
732
- throw new ReferenceError(`Registry <${reg}> does not exist`);
733
- return typeof cls === 'string' ? registry[reg]?.get(cls) : cls;
1131
+ throw new CmpStrNotFoundError(`Registry <${reg}> does not exist`, {
1132
+ registry: reg
1133
+ });
1134
+ return typeof cls === 'string' ? registry[reg].get(cls) : cls;
734
1135
  }
735
1136
  function createFromRegistry(reg, cls, ...args) {
736
- cls = resolveCls(reg, cls);
737
- try {
738
- return new cls(...args);
739
- } catch (err) {
740
- throw new Error(`Cannot instantiate class <${cls.name ?? cls}>`, {
741
- cause: err
742
- });
743
- }
1137
+ const ctor = resolveCls(reg, cls);
1138
+ return ErrorUtil.wrap(
1139
+ () => new ctor(...args),
1140
+ `Failed to create instance of class <${ctor.name ?? cls}> from registry <${reg}>`,
1141
+ { registry: reg, class: cls, args }
1142
+ );
744
1143
  }
745
1144
 
746
- class RingPool {
747
- maxSize;
748
- buffers = [];
749
- pointer = 0;
750
- constructor(maxSize) {
751
- this.maxSize = maxSize;
1145
+ const profiler$2 = Profiler.getInstance();
1146
+ class Metric {
1147
+ static cache = new HashTable();
1148
+ metric;
1149
+ a;
1150
+ b;
1151
+ origA = [];
1152
+ origB = [];
1153
+ options;
1154
+ optKey;
1155
+ symmetric;
1156
+ results;
1157
+ static clear() {
1158
+ this.cache.clear();
752
1159
  }
753
- acquire(minSize, allowOversize) {
754
- const len = this.buffers.length;
755
- for (let i = 0; i < len; i++) {
756
- const idx = (this.pointer + i) & (len - 1);
757
- const item = this.buffers[idx];
758
- if (item.size >= minSize && (allowOversize || item.size === minSize)) {
759
- this.pointer = (idx + 1) & (len - 1);
760
- return item;
761
- }
762
- }
763
- return null;
1160
+ static swap(a, b, m, n) {
1161
+ return m > n ? [b, a, n, m] : [a, b, m, n];
764
1162
  }
765
- release(item) {
766
- if (this.buffers.length < this.maxSize)
767
- return void [this.buffers.push(item)];
768
- this.buffers[this.pointer] = item;
769
- this.pointer = (this.pointer + 1) % this.maxSize;
1163
+ static clamp(res) {
1164
+ return Math.max(0, Math.min(1, res));
770
1165
  }
771
- clear() {
772
- this.buffers = [];
773
- this.pointer = 0;
1166
+ constructor(metric, a, b, opt = {}, symmetric = false) {
1167
+ this.metric = metric;
1168
+ this.a = Array.isArray(a) ? a : [a];
1169
+ this.b = Array.isArray(b) ? b : [b];
1170
+ ErrorUtil.assert(
1171
+ this.a.length > 0 && this.b.length > 0,
1172
+ `Inputs <a> and <b> must not be empty`,
1173
+ { a: this.a, b: this.b }
1174
+ );
1175
+ this.options = opt;
1176
+ this.optKey = Hasher.fastFNV1a(
1177
+ JSON.stringify(opt, Object.keys(opt).sort())
1178
+ ).toString();
1179
+ this.symmetric = symmetric;
774
1180
  }
775
- }
776
- class Pool {
777
- static CONFIG = {
778
- int32: {
779
- type: 'int32',
780
- maxSize: 64,
781
- maxItemSize: 2048,
782
- allowOversize: true
783
- },
784
- 'number[]': {
785
- type: 'number[]',
786
- maxSize: 16,
787
- maxItemSize: 1024,
788
- allowOversize: false
789
- },
790
- 'string[]': {
791
- type: 'string[]',
792
- maxSize: 2,
793
- maxItemSize: 1024,
794
- allowOversize: false
795
- },
796
- set: { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
797
- map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
798
- };
799
- static POOLS = {
800
- int32: new RingPool(64),
801
- 'number[]': new RingPool(16),
802
- 'string[]': new RingPool(2),
803
- set: new RingPool(8),
804
- map: new RingPool(8)
805
- };
806
- static allocate(type, size) {
807
- switch (type) {
808
- case 'int32':
809
- return new Int32Array(size);
810
- case 'number[]':
811
- return new Float64Array(size);
812
- case 'string[]':
813
- return new Array(size);
814
- case 'set':
815
- return new Set();
816
- case 'map':
817
- return new Map();
818
- }
1181
+ preCompute(a, b, m, n) {
1182
+ if (a === b) return { res: 1 };
1183
+ if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
1184
+ return undefined;
819
1185
  }
820
- static acquire(type, size) {
821
- const CONFIG = this.CONFIG[type];
822
- if (size > CONFIG.maxItemSize) return this.allocate(type, size);
823
- const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
824
- if (item)
825
- return type === 'int32' ? item.buffer.subarray(0, size) : item.buffer;
826
- return this.allocate(type, size);
1186
+ compute(a, b, m, n, maxLen) {
1187
+ throw new CmpStrInternalError(
1188
+ `Method compute() must be overridden in a subclass`
1189
+ );
827
1190
  }
828
- static acquireMany(type, sizes) {
829
- return sizes.map((size) => this.acquire(type, size));
1191
+ runSingle(i, j) {
1192
+ return ErrorUtil.wrap(
1193
+ () => {
1194
+ let a = String(this.a[i]),
1195
+ A = a;
1196
+ let b = String(this.b[j]),
1197
+ B = b;
1198
+ let m = A.length,
1199
+ n = B.length;
1200
+ let result = this.preCompute(A, B, m, n);
1201
+ if (!result) {
1202
+ result = profiler$2.run(() => {
1203
+ if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
1204
+ let key = Metric.cache.key(this.metric, [A, B], this.symmetric);
1205
+ if (key) key += this.optKey;
1206
+ return (
1207
+ Metric.cache.get(key || '') ??
1208
+ (() => {
1209
+ const maxLen = m > n ? m : n;
1210
+ const res = this.compute(A, B, m, n, maxLen);
1211
+ if (key) Metric.cache.set(key, res);
1212
+ return res;
1213
+ })()
1214
+ );
1215
+ });
1216
+ }
1217
+ return {
1218
+ metric: this.metric,
1219
+ a: this.origA.length > i ? this.origA[i] : a,
1220
+ b: this.origB.length > j ? this.origB[j] : b,
1221
+ ...result
1222
+ };
1223
+ },
1224
+ `Failed to compute metric for inputs at indices a[${i}] and b[${j}]`,
1225
+ { i, j }
1226
+ );
830
1227
  }
831
- static release(type, buffer, size) {
832
- if (size <= this.CONFIG[type].maxItemSize)
833
- this.POOLS[type].release({ buffer, size });
1228
+ async runSingleAsync(i, j) {
1229
+ return Promise.resolve(this.runSingle(i, j));
834
1230
  }
835
- }
836
-
837
- class StructuredData {
838
- data;
839
- key;
840
- static create(data, key) {
841
- return new StructuredData(data, key);
1231
+ runBatch() {
1232
+ const results = [];
1233
+ for (let i = 0; i < this.a.length; i++)
1234
+ for (let j = 0; j < this.b.length; j++)
1235
+ results.push(this.runSingle(i, j));
1236
+ this.results = results;
842
1237
  }
843
- constructor(data, key) {
844
- this.data = data;
845
- this.key = key;
1238
+ async runBatchAsync() {
1239
+ const tasks = [];
1240
+ for (let i = 0; i < this.a.length; i++)
1241
+ for (let j = 0; j < this.b.length; j++)
1242
+ tasks.push(this.runSingleAsync(i, j));
1243
+ this.results = await Promise.all(tasks);
846
1244
  }
847
- extractFrom(arr, key) {
848
- const result = Pool.acquire('string[]', arr.length);
849
- for (let i = 0; i < arr.length; i++) {
850
- const val = arr[i][key];
851
- result[i] = typeof val === 'string' ? val : String(val ?? '');
852
- }
853
- return result;
1245
+ runPairwise() {
1246
+ const results = [];
1247
+ for (let i = 0; i < this.a.length; i++)
1248
+ results.push(this.runSingle(i, i));
1249
+ this.results = results;
854
1250
  }
855
- extract = () => this.extractFrom(this.data, this.key);
856
- isMetricResult(v) {
857
- return (
858
- typeof v === 'object' &&
859
- v !== null &&
860
- 'a' in v &&
861
- 'b' in v &&
862
- 'res' in v
863
- );
1251
+ async runPairwiseAsync() {
1252
+ const tasks = [];
1253
+ for (let i = 0; i < this.a.length; i++)
1254
+ tasks.push(this.runSingleAsync(i, i));
1255
+ this.results = await Promise.all(tasks);
864
1256
  }
865
- isCmpStrResult(v) {
866
- return (
867
- typeof v === 'object' &&
868
- v !== null &&
869
- 'source' in v &&
870
- 'target' in v &&
871
- 'match' in v
872
- );
1257
+ setOriginal(a, b) {
1258
+ if (a) this.origA = Array.isArray(a) ? a : [a];
1259
+ if (b) this.origB = Array.isArray(b) ? b : [b];
1260
+ return this;
873
1261
  }
874
- normalizeResults(results) {
875
- if (!Array.isArray(results) || results.length === 0) return [];
876
- const first = results[0];
877
- let normalized = [];
878
- if (this.isMetricResult(first)) normalized = results;
879
- else if (this.isCmpStrResult(first))
880
- normalized = results.map((r) => ({
881
- metric: 'unknown',
882
- a: r.source,
883
- b: r.target,
884
- res: r.match,
885
- raw: r.raw
886
- }));
887
- else
888
- throw new TypeError(
889
- 'Unsupported result format for StructuredData normalization.'
890
- );
891
- return normalized.map((r, idx) => ({ ...r, __idx: idx }));
1262
+ isBatch() {
1263
+ return this.a.length > 1 || this.b.length > 1;
892
1264
  }
893
- rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
894
- const stringToIndices = new Map();
895
- for (let i = 0; i < extractedStrings.length; i++) {
896
- const str = extractedStrings[i];
897
- if (!stringToIndices.has(str)) stringToIndices.set(str, []);
898
- stringToIndices.get(str).push(i);
899
- }
900
- const output = new Array(results.length);
901
- const occurrenceCount = new Map();
902
- let out = 0;
903
- for (let i = 0; i < results.length; i++) {
904
- const result = results[i];
905
- if (removeZero && result.res === 0) continue;
906
- const targetStr = result.b || '';
907
- const indices = stringToIndices.get(targetStr);
908
- let dataIndex;
909
- if (indices && indices.length > 0) {
910
- const occurrence = occurrenceCount.get(targetStr) ?? 0;
911
- occurrenceCount.set(targetStr, occurrence + 1);
912
- dataIndex = indices[occurrence % indices.length];
913
- } else {
914
- dataIndex = result.__idx ?? i;
915
- }
916
- if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
917
- const sourceObj = sourceData[dataIndex];
918
- const mappedTarget = extractedStrings[dataIndex] || targetStr;
919
- if (objectsOnly) output[out++] = sourceObj;
920
- else
921
- output[out++] = {
922
- obj: sourceObj,
923
- key: this.key,
924
- result: {
925
- source: result.a,
926
- target: mappedTarget,
927
- match: result.res
928
- },
929
- ...(result.raw ? { raw: result.raw } : null)
930
- };
931
- }
932
- output.length = out;
933
- return output;
1265
+ isSingle() {
1266
+ return !this.isBatch();
934
1267
  }
935
- sort(results, sort) {
936
- if (!sort || results.length <= 1) return results;
937
- const asc = sort === 'asc';
938
- return results.sort((a, b) => (asc ? a.res - b.res : b.res - a.res));
1268
+ isPairwise(safe = false) {
1269
+ return this.isBatch() && this.a.length === this.b.length
1270
+ ? true
1271
+ : !safe &&
1272
+ (() => {
1273
+ throw new CmpStrUsageError(
1274
+ `Mode <pairwise> requires arrays of equal length`,
1275
+ { a: this.a, b: this.b }
1276
+ );
1277
+ })();
939
1278
  }
940
- finalizeLookup(results, extractedStrings, opt) {
941
- return this.rebuild(
942
- this.sort(this.normalizeResults(results), opt?.sort),
943
- this.data,
944
- extractedStrings,
945
- opt?.removeZero,
946
- opt?.objectsOnly
947
- );
1279
+ isSymmetrical() {
1280
+ return this.symmetric;
948
1281
  }
949
- performLookup(fn, extractedStrings, opt) {
950
- return this.finalizeLookup(fn(), extractedStrings, opt);
1282
+ whichMode(mode) {
1283
+ return mode ?? this.options.mode ?? 'default';
951
1284
  }
952
- async performLookupAsync(fn, extractedStrings, opt) {
953
- return this.finalizeLookup(await fn(), extractedStrings, opt);
1285
+ clear() {
1286
+ this.results = undefined;
954
1287
  }
955
- lookup(fn, query, opt) {
956
- const b = this.extract();
957
- try {
958
- return this.performLookup(() => fn(query, b, opt), b, opt);
959
- } finally {
960
- Pool.release('string[]', b, b.length);
1288
+ run(mode, clear = true) {
1289
+ if (clear) this.clear();
1290
+ switch (this.whichMode(mode)) {
1291
+ case 'default':
1292
+ if (this.isSingle()) {
1293
+ this.results = this.runSingle(0, 0);
1294
+ break;
1295
+ }
1296
+ case 'batch':
1297
+ this.runBatch();
1298
+ break;
1299
+ case 'single':
1300
+ this.results = this.runSingle(0, 0);
1301
+ break;
1302
+ case 'pairwise':
1303
+ if (this.isPairwise()) this.runPairwise();
1304
+ break;
1305
+ default:
1306
+ throw new CmpStrInternalError(`Unsupported mode <${mode}>`);
961
1307
  }
962
1308
  }
963
- async lookupAsync(fn, query, opt) {
964
- const b = this.extract();
965
- try {
966
- return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
967
- } finally {
968
- Pool.release('string[]', b, b.length);
1309
+ async runAsync(mode, clear = true) {
1310
+ if (clear) this.clear();
1311
+ switch (this.whichMode(mode)) {
1312
+ case 'default':
1313
+ if (this.isSingle()) {
1314
+ this.results = await this.runSingleAsync(0, 0);
1315
+ break;
1316
+ }
1317
+ case 'batch':
1318
+ await this.runBatchAsync();
1319
+ break;
1320
+ case 'single':
1321
+ this.results = await this.runSingleAsync(0, 0);
1322
+ break;
1323
+ case 'pairwise':
1324
+ if (this.isPairwise()) await this.runPairwiseAsync();
1325
+ break;
1326
+ default:
1327
+ throw new CmpStrInternalError(`Unsupported async mode <${mode}>`);
969
1328
  }
970
1329
  }
971
- lookupPairs(fn, other, otherKey, opt) {
972
- const a = this.extract();
973
- const b = this.extractFrom(other, otherKey);
974
- try {
975
- return this.performLookup(() => fn(a, b, opt), a, opt);
976
- } finally {
977
- Pool.release('string[]', a, a.length);
978
- Pool.release('string[]', b, b.length);
979
- }
1330
+ getMetricName() {
1331
+ return this.metric;
980
1332
  }
981
- async lookupPairsAsync(fn, other, otherKey, opt) {
982
- const a = this.extract();
983
- const b = this.extractFrom(other, otherKey);
1333
+ getResults() {
1334
+ ErrorUtil.assert(
1335
+ this.results !== undefined,
1336
+ `run() must be called before getResults()`
1337
+ );
1338
+ return this.results;
1339
+ }
1340
+ }
1341
+ const MetricRegistry = Registry('metric', Metric);
1342
+
1343
+ class CosineSimilarity extends Metric {
1344
+ constructor(a, b, opt = {}) {
1345
+ super('cosine', a, b, opt, true);
1346
+ }
1347
+ _termFreq(str, delimiter) {
1348
+ const terms = str.split(delimiter);
1349
+ const freq = Pool.acquire('map', terms.length);
1350
+ for (const term of terms) freq.set(term, (freq.get(term) || 0) + 1);
1351
+ return freq;
1352
+ }
1353
+ compute(a, b) {
1354
+ const { delimiter = ' ' } = this.options;
1355
+ const termsA = this._termFreq(a, delimiter);
1356
+ const termsB = this._termFreq(b, delimiter);
984
1357
  try {
985
- return await this.performLookupAsync(() => fn(a, b, opt), a, opt);
1358
+ let dotP = 0,
1359
+ magA = 0,
1360
+ magB = 0;
1361
+ for (const [term, freqA] of termsA) {
1362
+ const freqB = termsB.get(term) || 0;
1363
+ dotP += freqA * freqB;
1364
+ magA += freqA * freqA;
1365
+ }
1366
+ for (const freqB of termsB.values()) magB += freqB * freqB;
1367
+ magA = Math.sqrt(magA);
1368
+ magB = Math.sqrt(magB);
1369
+ return {
1370
+ res: magA && magB ? Metric.clamp(dotP / (magA * magB)) : 0,
1371
+ raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB }
1372
+ };
986
1373
  } finally {
987
- Pool.release('string[]', a, a.length);
988
- Pool.release('string[]', b, b.length);
1374
+ Pool.release('map', termsA, termsA.size);
1375
+ Pool.release('map', termsB, termsB.size);
989
1376
  }
990
1377
  }
991
1378
  }
1379
+ MetricRegistry.add('cosine', CosineSimilarity);
992
1380
 
993
- class TextAnalyzer {
994
- static REGEX = {
995
- number: /\d/,
996
- sentence: /(?<=[.!?])\s+/,
997
- word: /\p{L}+/gu,
998
- nonWord: /[^\p{L}]/gu,
999
- vowelGroup: /[aeiouy]+/g,
1000
- letter: /\p{L}/gu,
1001
- ucLetter: /\p{Lu}/gu
1002
- };
1003
- text;
1004
- words = [];
1005
- sentences = [];
1006
- charFrequency = new Map();
1007
- wordHistogram = new Map();
1008
- syllableCache = new Map();
1009
- syllableStats;
1010
- constructor(input) {
1011
- this.text = input.trim();
1012
- this.tokenize();
1013
- this.computeFrequencies();
1381
+ class DamerauLevenshteinDistance extends Metric {
1382
+ constructor(a, b, opt = {}) {
1383
+ super('damerau', a, b, opt, true);
1014
1384
  }
1015
- tokenize() {
1016
- let match;
1017
- const lcText = this.text.toLowerCase();
1018
- while ((match = TextAnalyzer.REGEX.word.exec(lcText)) !== null)
1019
- this.words.push(match[0]);
1020
- this.sentences = this.text
1021
- .split(TextAnalyzer.REGEX.sentence)
1022
- .filter(Boolean);
1023
- }
1024
- computeFrequencies() {
1025
- for (const char of this.text)
1026
- this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
1027
- for (const word of this.words)
1028
- this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
1029
- }
1030
- estimateSyllables(word) {
1031
- const clean = word
1032
- .normalize('NFC')
1033
- .toLowerCase()
1034
- .replace(TextAnalyzer.REGEX.nonWord, '');
1035
- if (this.syllableCache.has(clean)) return this.syllableCache.get(clean);
1036
- const matches = clean.match(TextAnalyzer.REGEX.vowelGroup);
1037
- const count = matches ? matches.length : 1;
1038
- this.syllableCache.set(clean, count);
1039
- return count;
1040
- }
1041
- computeSyllableStats() {
1042
- return (this.syllableStats ||= (() => {
1043
- const perWord = this.words
1044
- .map((w) => this.estimateSyllables(w))
1045
- .sort((a, b) => a - b);
1046
- const total = perWord.reduce((sum, s) => sum + s, 0);
1047
- const mono = perWord.filter((s) => s === 1).length;
1048
- const median = !perWord.length
1049
- ? 0
1050
- : perWord.length % 2 === 0
1051
- ? (perWord[perWord.length / 2 - 1] + perWord[perWord.length / 2]) /
1052
- 2
1053
- : perWord[Math.floor(perWord.length / 2)];
1385
+ compute(a, b, m, n, maxLen) {
1386
+ const len = m + 1;
1387
+ const [test, prev, curr] = Pool.acquireMany('int32', [len, len, len]);
1388
+ try {
1389
+ for (let i = 0; i <= m; i++) prev[i] = i;
1390
+ for (let j = 1; j <= n; j++) {
1391
+ curr[0] = j;
1392
+ const cb = b.charCodeAt(j - 1);
1393
+ for (let i = 1; i <= m; i++) {
1394
+ const ca = a.charCodeAt(i - 1);
1395
+ const cost = ca === cb ? 0 : 1;
1396
+ let val = Math.min(
1397
+ curr[i - 1] + 1,
1398
+ prev[i] + 1,
1399
+ prev[i - 1] + cost
1400
+ );
1401
+ if (
1402
+ i > 1 &&
1403
+ j > 1 &&
1404
+ ca === b.charCodeAt(j - 2) &&
1405
+ cb === a.charCodeAt(i - 2)
1406
+ )
1407
+ val = Math.min(val, test[i - 2] + cost);
1408
+ curr[i] = val;
1409
+ }
1410
+ test.set(prev);
1411
+ prev.set(curr);
1412
+ }
1413
+ const dist = prev[m];
1054
1414
  return {
1055
- total,
1056
- mono,
1057
- perWord,
1058
- avg: perWord.length ? total / perWord.length : 0,
1059
- median
1415
+ res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
1416
+ raw: { dist, maxLen }
1060
1417
  };
1061
- })());
1062
- }
1063
- getLength = () => this.text.length;
1064
- getWordCount = () => this.words.length;
1065
- getSentenceCount = () => this.sentences.length;
1066
- getAvgWordLength() {
1067
- return this.words.length
1068
- ? this.words.join('').length / this.words.length
1069
- : 0;
1070
- }
1071
- getAvgSentenceLength() {
1072
- return this.sentences.length
1073
- ? this.words.length / this.sentences.length
1074
- : 0;
1075
- }
1076
- getWordHistogram() {
1077
- return Object.fromEntries(this.wordHistogram);
1078
- }
1079
- getMostCommonWords(limit = 5) {
1080
- return [...this.wordHistogram.entries()]
1081
- .sort((a, b) => b[1] - a[1])
1082
- .slice(0, limit)
1083
- .map((e) => e[0]);
1084
- }
1085
- getHapaxLegomena() {
1086
- return [...this.wordHistogram.entries()]
1087
- .filter(([, c]) => c === 1)
1088
- .map((e) => e[0]);
1089
- }
1090
- hasNumbers = () => TextAnalyzer.REGEX.number.test(this.text);
1091
- getUpperCaseRatio() {
1092
- const matches = this.text.match(TextAnalyzer.REGEX.letter) || [];
1093
- const upper = this.text.match(TextAnalyzer.REGEX.ucLetter)?.length || 0;
1094
- return matches.length ? upper / matches.length : 0;
1095
- }
1096
- getCharFrequency() {
1097
- return Object.fromEntries(this.charFrequency);
1098
- }
1099
- getUnicodeCodepoints() {
1100
- const result = {};
1101
- for (const [char, count] of this.charFrequency) {
1102
- const block = char
1103
- .charCodeAt(0)
1104
- .toString(16)
1105
- .padStart(4, '0')
1106
- .toUpperCase();
1107
- result[block] = (result[block] || 0) + count;
1418
+ } finally {
1419
+ Pool.release('int32', test, len);
1420
+ Pool.release('int32', prev, len);
1421
+ Pool.release('int32', curr, len);
1108
1422
  }
1109
- return result;
1110
- }
1111
- getLongWordRatio(len = 7) {
1112
- let long = 0;
1113
- for (const w of this.words) if (w.length >= len) long++;
1114
- return this.words.length ? long / this.words.length : 0;
1115
- }
1116
- getShortWordRatio(len = 3) {
1117
- let short = 0;
1118
- for (const w of this.words) if (w.length <= len) short++;
1119
- return this.words.length ? short / this.words.length : 0;
1120
- }
1121
- getSyllablesCount() {
1122
- return this.computeSyllableStats().total;
1123
- }
1124
- getMonosyllabicWordCount() {
1125
- return this.computeSyllableStats().mono;
1126
- }
1127
- getMinSyllablesWordCount(min) {
1128
- return this.computeSyllableStats().perWord.filter((w) => w >= min).length;
1129
- }
1130
- getMaxSyllablesWordCount(max) {
1131
- return this.computeSyllableStats().perWord.filter((w) => w <= max).length;
1132
1423
  }
1133
- getAvgSyllablesPerWord() {
1134
- return this.computeSyllableStats().avg;
1424
+ }
1425
+ MetricRegistry.add('damerau', DamerauLevenshteinDistance);
1426
+
1427
+ class DiceSorensenCoefficient extends Metric {
1428
+ constructor(a, b, opt = {}) {
1429
+ super('dice', a, b, opt, true);
1135
1430
  }
1136
- getMedianSyllablesPerWord() {
1137
- return this.computeSyllableStats().median;
1431
+ _bigrams(str) {
1432
+ const len = str.length - 1;
1433
+ const bigrams = Pool.acquire('set', len);
1434
+ for (let i = 0; i < len; i++) bigrams.add(str.substring(i, i + 2));
1435
+ return bigrams;
1138
1436
  }
1139
- getHonoresR() {
1437
+ compute(a, b) {
1438
+ const setA = this._bigrams(a),
1439
+ setB = this._bigrams(b);
1440
+ const sizeA = setA.size,
1441
+ sizeB = setB.size;
1140
1442
  try {
1141
- return (
1142
- (100 * Math.log(this.words.length)) /
1143
- (1 - this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1))
1144
- );
1145
- } catch {
1146
- return 0;
1443
+ let intersection = 0;
1444
+ for (const bigram of setA) if (setB.has(bigram)) intersection++;
1445
+ const size = sizeA + sizeB;
1446
+ return {
1447
+ res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
1448
+ raw: { intersection, size }
1449
+ };
1450
+ } finally {
1451
+ Pool.release('set', setA, sizeA);
1452
+ Pool.release('set', setB, sizeB);
1147
1453
  }
1148
1454
  }
1149
- getReadingTime(wpm = 200) {
1150
- return this.words.length / (wpm ?? 1);
1455
+ }
1456
+ MetricRegistry.add('dice', DiceSorensenCoefficient);
1457
+
1458
+ class HammingDistance extends Metric {
1459
+ constructor(a, b, opt = {}) {
1460
+ super('hamming', a, b, opt, true);
1151
1461
  }
1152
- getReadabilityScore(metric = 'flesch') {
1153
- const w = this.words.length || 1;
1154
- const s = this.sentences.length || 1;
1155
- const y = this.getSyllablesCount() || 1;
1156
- const asl = w / s;
1157
- const asw = y / w;
1158
- switch (metric) {
1159
- case 'flesch':
1160
- return 206.835 - 1.015 * asl - 84.6 * asw;
1161
- case 'fleschde':
1162
- return 180 - asl - 58.5 * asw;
1163
- case 'kincaid':
1164
- return 0.39 * asl + 11.8 * asw - 15.59;
1462
+ compute(a, b, m, n, maxLen) {
1463
+ if (m !== n) {
1464
+ if (this.options.pad !== undefined) {
1465
+ if (m < maxLen) a = a.padEnd(maxLen, this.options.pad);
1466
+ if (n < maxLen) b = b.padEnd(maxLen, this.options.pad);
1467
+ m = n = maxLen;
1468
+ } else
1469
+ throw new CmpStrUsageError(
1470
+ `Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
1471
+ `use option.pad for automatic adjustment`,
1472
+ { a: m, b: n }
1473
+ );
1165
1474
  }
1166
- }
1167
- getLIXScore() {
1168
- const w = this.words.length || 1;
1169
- const s = this.sentences.length || 1;
1170
- const l = this.getLongWordRatio() * w;
1171
- return w / s + (l / w) * 100;
1172
- }
1173
- getWSTFScore() {
1174
- const w = this.words.length || 1;
1175
- const h = (this.getMinSyllablesWordCount(3) / w) * 100;
1176
- const s = this.getAvgSentenceLength();
1177
- const l = this.getLongWordRatio() * 100;
1178
- const m = (this.getMonosyllabicWordCount() / w) * 100;
1179
- return [
1180
- 0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.875,
1181
- 0.2007 * h + 0.1682 * s + 0.1373 * l - 2.779,
1182
- 0.2963 * h + 0.1905 * s - 1.1144,
1183
- 0.2744 * h + 0.2656 * s - 1.693
1184
- ];
1475
+ let dist = 0;
1476
+ for (let i = 0; i < m; i++) if (a[i] !== b[i]) dist++;
1477
+ return { res: m === 0 ? 1 : Metric.clamp(1 - dist / m), raw: { dist } };
1185
1478
  }
1186
1479
  }
1187
-
1188
- const profiler$2 = Profiler.getInstance();
1189
- class Metric {
1190
- static cache = new HashTable();
1191
- metric;
1192
- a;
1193
- b;
1194
- origA = [];
1195
- origB = [];
1196
- options;
1197
- optKey;
1198
- symmetric;
1199
- results;
1200
- static clear = () => this.cache.clear();
1201
- static swap = (a, b, m, n) => (m > n ? [b, a, n, m] : [a, b, m, n]);
1202
- static clamp = (res) => Math.max(0, Math.min(1, res));
1203
- constructor(metric, a, b, opt = {}, symmetric = false) {
1204
- this.metric = metric;
1205
- this.a = Array.isArray(a) ? a : [a];
1206
- this.b = Array.isArray(b) ? b : [b];
1207
- if (this.a.length === 0 || this.b.length === 0)
1208
- throw new Error(`Inputs <a> and <b> must not be empty`);
1209
- this.options = opt;
1210
- this.optKey = Hasher.fastFNV1a(
1211
- JSON.stringify(opt, Object.keys(opt).sort())
1212
- ).toString();
1213
- this.symmetric = symmetric;
1214
- }
1215
- preCompute(a, b, m, n) {
1216
- if (a === b) return { res: 1 };
1217
- if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
1218
- return undefined;
1219
- }
1220
- compute(a, b, m, n, maxLen) {
1221
- throw new Error(`Method compute() must be overridden in a subclass`);
1222
- }
1223
- runSingle(i, j) {
1224
- let a = String(this.a[i]),
1225
- A = a;
1226
- let b = String(this.b[j]),
1227
- B = b;
1228
- let m = A.length,
1229
- n = B.length;
1230
- let result = this.preCompute(A, B, m, n);
1231
- if (!result) {
1232
- result = profiler$2.run(() => {
1233
- if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
1234
- const key =
1235
- Metric.cache.key(this.metric, [A, B], this.symmetric) + this.optKey;
1236
- return (
1237
- Metric.cache.get(key || '') ??
1238
- (() => {
1239
- const res = this.compute(A, B, m, n, Math.max(m, n));
1240
- if (key) Metric.cache.set(key, res);
1241
- return res;
1242
- })()
1243
- );
1244
- });
1245
- }
1246
- return {
1247
- metric: this.metric,
1248
- a: this.origA[i] ?? a,
1249
- b: this.origB[j] ?? b,
1250
- ...result
1251
- };
1252
- }
1253
- async runSingleAsync(i, j) {
1254
- return Promise.resolve(this.runSingle(i, j));
1255
- }
1256
- runBatch() {
1257
- const results = [];
1258
- for (let i = 0; i < this.a.length; i++)
1259
- for (let j = 0; j < this.b.length; j++)
1260
- results.push(this.runSingle(i, j));
1261
- this.results = results;
1262
- }
1263
- async runBatchAsync() {
1264
- const results = [];
1265
- for (let i = 0; i < this.a.length; i++)
1266
- for (let j = 0; j < this.b.length; j++)
1267
- results.push(await this.runSingleAsync(i, j));
1268
- this.results = results;
1269
- }
1270
- runPairwise() {
1271
- const results = [];
1272
- for (let i = 0; i < this.a.length; i++)
1273
- results.push(this.runSingle(i, i));
1274
- this.results = results;
1275
- }
1276
- async runPairwiseAsync() {
1277
- const results = [];
1278
- for (let i = 0; i < this.a.length; i++)
1279
- results.push(await this.runSingleAsync(i, i));
1280
- this.results = results;
1281
- }
1282
- setOriginal(a, b) {
1283
- if (a) this.origA = Array.isArray(a) ? a : [a];
1284
- if (b) this.origB = Array.isArray(b) ? b : [b];
1285
- return this;
1286
- }
1287
- isBatch = () => this.a.length > 1 || this.b.length > 1;
1288
- isSingle = () => !this.isBatch();
1289
- isPairwise(safe = false) {
1290
- return this.isBatch() && this.a.length === this.b.length
1291
- ? true
1292
- : !safe &&
1293
- (() => {
1294
- throw new Error(
1295
- `Mode <pairwise> requires arrays of equal length`
1296
- );
1297
- })();
1298
- }
1299
- isSymmetrical = () => this.symmetric;
1300
- whichMode = (mode) => mode ?? this.options?.mode ?? 'default';
1301
- clear = () => (this.results = undefined);
1302
- run(mode, clear = true) {
1303
- if (clear) this.clear();
1304
- switch (this.whichMode(mode)) {
1305
- case 'default':
1306
- if (this.isSingle()) {
1307
- this.results = this.runSingle(0, 0);
1308
- break;
1309
- }
1310
- case 'batch':
1311
- this.runBatch();
1312
- break;
1313
- case 'single':
1314
- this.results = this.runSingle(0, 0);
1315
- break;
1316
- case 'pairwise':
1317
- if (this.isPairwise()) this.runPairwise();
1318
- break;
1319
- default:
1320
- throw new Error(`Unsupported mode <${mode}>`);
1321
- }
1322
- }
1323
- async runAsync(mode, clear = true) {
1324
- if (clear) this.clear();
1325
- switch (this.whichMode(mode)) {
1326
- case 'default':
1327
- if (this.isSingle()) {
1328
- this.results = await this.runSingleAsync(0, 0);
1329
- break;
1330
- }
1331
- case 'batch':
1332
- await this.runBatchAsync();
1333
- break;
1334
- case 'single':
1335
- this.results = await this.runSingleAsync(0, 0);
1336
- break;
1337
- case 'pairwise':
1338
- if (this.isPairwise()) await this.runPairwiseAsync();
1339
- break;
1340
- default:
1341
- throw new Error(`Unsupported async mode <${mode}>`);
1342
- }
1343
- }
1344
- getMetricName = () => this.metric;
1345
- getResults() {
1346
- if (this.results === undefined)
1347
- throw new Error(`run() must be called before getResult()`);
1348
- return this.results;
1349
- }
1350
- }
1351
- const MetricRegistry = Registry('metric', Metric);
1352
-
1353
- class CosineSimilarity extends Metric {
1354
- constructor(a, b, opt = {}) {
1355
- super('cosine', a, b, opt, true);
1356
- }
1357
- _termFreq(str, delimiter) {
1358
- const terms = str.split(delimiter);
1359
- const freq = Pool.acquire('map', terms.length);
1360
- for (const term of terms) freq.set(term, (freq.get(term) || 0) + 1);
1361
- return freq;
1362
- }
1363
- compute(a, b) {
1364
- const { delimiter = ' ' } = this.options;
1365
- const termsA = this._termFreq(a, delimiter);
1366
- const termsB = this._termFreq(b, delimiter);
1367
- try {
1368
- let dotP = 0,
1369
- magA = 0,
1370
- magB = 0;
1371
- for (const [term, freqA] of termsA) {
1372
- const freqB = termsB.get(term) || 0;
1373
- dotP += freqA * freqB;
1374
- magA += freqA * freqA;
1375
- }
1376
- for (const freqB of termsB.values()) magB += freqB * freqB;
1377
- magA = Math.sqrt(magA);
1378
- magB = Math.sqrt(magB);
1379
- return {
1380
- res: magA && magB ? Metric.clamp(dotP / (magA * magB)) : 0,
1381
- raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB }
1382
- };
1383
- } finally {
1384
- Pool.release('map', termsA, termsA.size);
1385
- Pool.release('map', termsB, termsB.size);
1386
- }
1387
- }
1388
- }
1389
- MetricRegistry.add('cosine', CosineSimilarity);
1390
-
1391
- class DamerauLevenshteinDistance extends Metric {
1392
- constructor(a, b, opt = {}) {
1393
- super('damerau', a, b, opt, true);
1394
- }
1395
- compute(a, b, m, n, maxLen) {
1396
- const len = m + 1;
1397
- const [test, prev, curr] = Pool.acquireMany('int32', [len, len, len]);
1398
- try {
1399
- for (let i = 0; i <= m; i++) prev[i] = i;
1400
- for (let j = 1; j <= n; j++) {
1401
- curr[0] = j;
1402
- const cb = b.charCodeAt(j - 1);
1403
- for (let i = 1; i <= m; i++) {
1404
- const ca = a.charCodeAt(i - 1);
1405
- const cost = ca === cb ? 0 : 1;
1406
- let val = Math.min(
1407
- curr[i - 1] + 1,
1408
- prev[i] + 1,
1409
- prev[i - 1] + cost
1410
- );
1411
- if (
1412
- i > 1 &&
1413
- j > 1 &&
1414
- ca === b.charCodeAt(j - 2) &&
1415
- cb === a.charCodeAt(i - 2)
1416
- )
1417
- val = Math.min(val, test[i - 2] + cost);
1418
- curr[i] = val;
1419
- }
1420
- test.set(prev);
1421
- prev.set(curr);
1422
- }
1423
- const dist = prev[m];
1424
- return {
1425
- res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
1426
- raw: { dist, maxLen }
1427
- };
1428
- } finally {
1429
- Pool.release('int32', test, len);
1430
- Pool.release('int32', prev, len);
1431
- Pool.release('int32', curr, len);
1432
- }
1433
- }
1434
- }
1435
- MetricRegistry.add('damerau', DamerauLevenshteinDistance);
1436
-
1437
- class DiceSorensenCoefficient extends Metric {
1438
- constructor(a, b, opt = {}) {
1439
- super('dice', a, b, opt, true);
1440
- }
1441
- _bigrams(str) {
1442
- const len = str.length - 1;
1443
- const bigrams = Pool.acquire('set', len);
1444
- for (let i = 0; i < len; i++) bigrams.add(str.substring(i, i + 2));
1445
- return bigrams;
1446
- }
1447
- compute(a, b) {
1448
- const setA = this._bigrams(a),
1449
- setB = this._bigrams(b);
1450
- const sizeA = setA.size,
1451
- sizeB = setB.size;
1452
- try {
1453
- let intersection = 0;
1454
- for (const bigram of setA) if (setB.has(bigram)) intersection++;
1455
- const size = sizeA + sizeB;
1456
- return {
1457
- res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
1458
- raw: { intersection, size }
1459
- };
1460
- } finally {
1461
- Pool.release('set', setA, sizeA);
1462
- Pool.release('set', setB, sizeB);
1463
- }
1464
- }
1465
- }
1466
- MetricRegistry.add('dice', DiceSorensenCoefficient);
1467
-
1468
- class HammingDistance extends Metric {
1469
- constructor(a, b, opt = {}) {
1470
- super('hamming', a, b, opt, true);
1471
- }
1472
- compute(a, b, m, n, maxLen) {
1473
- if (m !== n) {
1474
- if (this.options.pad !== undefined) {
1475
- if (m < maxLen) a = a.padEnd(maxLen, this.options.pad);
1476
- if (n < maxLen) b = b.padEnd(maxLen, this.options.pad);
1477
- m = n = maxLen;
1478
- } else
1479
- throw new Error(
1480
- `Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
1481
- `use option.pad for automatic adjustment`
1482
- );
1483
- }
1484
- let dist = 0;
1485
- for (let i = 0; i < m; i++) if (a[i] !== b[i]) dist++;
1486
- return { res: m === 0 ? 1 : Metric.clamp(1 - dist / m), raw: { dist } };
1487
- }
1488
- }
1489
- MetricRegistry.add('hamming', HammingDistance);
1480
+ MetricRegistry.add('hamming', HammingDistance);
1490
1481
 
1491
1482
  class JaccardIndex extends Metric {
1492
1483
  constructor(a, b, opt = {}) {
@@ -1752,43 +1743,59 @@
1752
1743
  options;
1753
1744
  optKey;
1754
1745
  map;
1755
- static clear = () => this.cache.clear();
1746
+ ignoreSet;
1747
+ static clear() {
1748
+ this.cache.clear();
1749
+ }
1756
1750
  constructor(algo, opt = {}) {
1757
1751
  const defaults = this.constructor.default ?? {};
1758
1752
  const mapId = opt.map ?? defaults.map;
1759
1753
  if (!mapId)
1760
- throw new Error(`No mapping specified for phonetic algorithm`);
1754
+ throw new CmpStrNotFoundError(
1755
+ `No mapping specified for phonetic algorithm`,
1756
+ { algo }
1757
+ );
1761
1758
  const map = PhoneticMappingRegistry.get(algo, mapId);
1762
1759
  if (map === undefined)
1763
- throw new Error(`Requested mapping <${mapId}> is not declared`);
1764
- this.options = merge(merge(defaults, map.options ?? {}), opt);
1760
+ throw new CmpStrNotFoundError(
1761
+ `Requested mapping <${mapId}> is not declared`,
1762
+ { algo, mapId }
1763
+ );
1764
+ this.options = DeepMerge.merge(
1765
+ DeepMerge.merge(defaults, map.options ?? {}),
1766
+ opt
1767
+ );
1765
1768
  this.optKey = Hasher.fastFNV1a(
1766
1769
  JSON.stringify(this.options, Object.keys(this.options).sort())
1767
1770
  ).toString();
1768
1771
  this.algo = algo;
1769
1772
  this.map = map;
1773
+ this.ignoreSet = new Set(map.ignore ?? []);
1770
1774
  }
1771
1775
  applyPattern(word) {
1772
1776
  const { patterns = [] } = this.map;
1773
- if (!patterns || !patterns.length) return word;
1777
+ if (!patterns.length) return word;
1774
1778
  for (const { pattern, replace, all = false } of patterns) {
1775
- word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
1779
+ word = all
1780
+ ? word.replaceAll(pattern, replace)
1781
+ : word.replace(pattern, replace);
1776
1782
  }
1777
1783
  return word;
1778
1784
  }
1779
1785
  applyRules(char, i, chars, charLen) {
1780
1786
  const { ruleset = [] } = this.map;
1781
- if (!ruleset || !ruleset.length) return undefined;
1787
+ if (!ruleset.length) return undefined;
1782
1788
  const prev = chars[i - 1] || '',
1783
1789
  prev2 = chars[i - 2] || '';
1784
1790
  const next = chars[i + 1] || '',
1785
1791
  next2 = chars[i + 2] || '';
1792
+ const str = chars.join('');
1786
1793
  for (const rule of ruleset) {
1787
1794
  if (rule.char && rule.char !== char) continue;
1788
1795
  if (rule.position === 'start' && i !== 0) continue;
1789
1796
  if (rule.position === 'middle' && (i === 0 || i === charLen - 1))
1790
1797
  continue;
1791
- if (rule.position === 'end' && i !== charLen) continue;
1798
+ if (rule.position === 'end' && i !== charLen - 1) continue;
1792
1799
  if (rule.prev && !rule.prev.includes(prev)) continue;
1793
1800
  if (rule.prevNot && rule.prevNot.includes(prev)) continue;
1794
1801
  if (rule.prev2 && !rule.prev2.includes(prev2)) continue;
@@ -1799,12 +1806,12 @@
1799
1806
  if (rule.next2Not && rule.next2Not.includes(next2)) continue;
1800
1807
  if (
1801
1808
  rule.leading &&
1802
- !rule.leading.includes(chars.slice(0, rule.leading.length).join(''))
1809
+ !rule.leading.includes(str.slice(0, rule.leading.length))
1803
1810
  )
1804
1811
  continue;
1805
1812
  if (
1806
1813
  rule.trailing &&
1807
- !rule.trailing.includes(chars.slice(-rule.trailing.length).join(''))
1814
+ !rule.trailing.includes(str.slice(-rule.trailing.length))
1808
1815
  )
1809
1816
  continue;
1810
1817
  if (rule.match && !rule.match.every((c, j) => chars[i + j] === c))
@@ -1814,7 +1821,7 @@
1814
1821
  return undefined;
1815
1822
  }
1816
1823
  encode(word) {
1817
- const { map = {}, ignore = [] } = this.map;
1824
+ const { map = {} } = this.map;
1818
1825
  word = this.applyPattern(word);
1819
1826
  const chars = this.word2Chars(word);
1820
1827
  const charLen = chars.length;
@@ -1822,7 +1829,7 @@
1822
1829
  lastCode = null;
1823
1830
  for (let i = 0; i < charLen; i++) {
1824
1831
  const char = chars[i];
1825
- if (ignore.includes(char)) continue;
1832
+ if (this.ignoreSet.has(char)) continue;
1826
1833
  const mapped = this.mapChar(char, i, chars, charLen, lastCode, map);
1827
1834
  if (mapped === undefined) continue;
1828
1835
  ((code += mapped), (lastCode = mapped));
@@ -1842,7 +1849,9 @@
1842
1849
  ? input
1843
1850
  : (input + pad.repeat(length)).slice(0, length);
1844
1851
  }
1845
- word2Chars = (word) => word.toLowerCase().split('');
1852
+ word2Chars(word) {
1853
+ return Array.from(word.toLowerCase());
1854
+ }
1846
1855
  exitEarly(code, i) {
1847
1856
  const { length = -1 } = this.options;
1848
1857
  return length > 0 && code.length >= length;
@@ -1851,37 +1860,52 @@
1851
1860
  return code;
1852
1861
  }
1853
1862
  loop(words) {
1854
- const index = [];
1855
- for (const word of words) {
1856
- const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
1857
- const code =
1858
- Phonetic.cache.get(key || '') ??
1859
- (() => {
1860
- const res = this.encode(word);
1861
- if (key) Phonetic.cache.set(key, res);
1862
- return res;
1863
- })();
1864
- if (code && code.length) index.push(this.equalLen(code));
1865
- }
1866
- return index;
1863
+ return ErrorUtil.wrap(
1864
+ () => {
1865
+ const index = [];
1866
+ for (const word of words) {
1867
+ let key = Phonetic.cache.key(this.algo, [word]);
1868
+ if (key) key += this.optKey;
1869
+ const code =
1870
+ Phonetic.cache.get(key || '') ??
1871
+ (() => {
1872
+ const res = this.encode(word);
1873
+ if (key) Phonetic.cache.set(key, res);
1874
+ return res;
1875
+ })();
1876
+ if (code && code.length) index.push(this.equalLen(code));
1877
+ }
1878
+ return index;
1879
+ },
1880
+ `Failed to generate phonetic index`,
1881
+ { algo: this.algo, words }
1882
+ );
1867
1883
  }
1868
1884
  async loopAsync(words) {
1869
- const index = [];
1870
- for (const word of words) {
1871
- const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
1872
- const code = await Promise.resolve(
1873
- Phonetic.cache.get(key || '') ??
1874
- (() => {
1875
- const res = this.encode(word);
1876
- if (key) Phonetic.cache.set(key, res);
1877
- return res;
1878
- })()
1879
- );
1880
- if (code && code.length) index.push(this.equalLen(code));
1881
- }
1882
- return index;
1885
+ return ErrorUtil.wrapAsync(
1886
+ async () => {
1887
+ const index = [];
1888
+ for (const word of words) {
1889
+ const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
1890
+ const code = await Promise.resolve(
1891
+ Phonetic.cache.get(key || '') ??
1892
+ (() => {
1893
+ const res = this.encode(word);
1894
+ if (key) Phonetic.cache.set(key, res);
1895
+ return res;
1896
+ })()
1897
+ );
1898
+ if (code && code.length) index.push(this.equalLen(code));
1899
+ }
1900
+ return index;
1901
+ },
1902
+ `Failed to generate phonetic index asynchronously`,
1903
+ { algo: this.algo, words }
1904
+ );
1905
+ }
1906
+ getAlgoName() {
1907
+ return this.algo;
1883
1908
  }
1884
- getAlgoName = () => this.algo;
1885
1909
  getIndex(input) {
1886
1910
  const { delimiter = ' ' } = this.options;
1887
1911
  return profiler$1.run(() =>
@@ -1905,10 +1929,11 @@
1905
1929
  return Object.freeze({
1906
1930
  add(algo, id, map, update = false) {
1907
1931
  const mappings = maps(algo);
1908
- if (!update && id in mappings)
1909
- throw new Error(
1910
- `Entry <${id}> already exists / use <update=true> to overwrite`
1911
- );
1932
+ ErrorUtil.assert(
1933
+ !(!id || id in mappings) || update,
1934
+ `Entry <${id}> already exists / use <update=true> to overwrite`,
1935
+ { algo, id }
1936
+ );
1912
1937
  mappings[id] = map;
1913
1938
  },
1914
1939
  remove(algo, id) {
@@ -2118,170 +2143,743 @@
2118
2143
  constructor(opt = {}) {
2119
2144
  super('metaphone', opt);
2120
2145
  }
2121
- encode(word) {
2122
- word = word.replace(Metaphone.REGEX.adjacent, (m, c) =>
2123
- c === 'C' ? m : c
2124
- );
2125
- return super.encode(word);
2146
+ encode(word) {
2147
+ word = word.replace(Metaphone.REGEX.adjacent, (m, c) =>
2148
+ c === 'C' ? m : c
2149
+ );
2150
+ return super.encode(word);
2151
+ }
2152
+ adjustCode(code) {
2153
+ return (
2154
+ code.slice(0, 1) + code.slice(1).replace(Metaphone.REGEX.vowel, '')
2155
+ );
2156
+ }
2157
+ }
2158
+ PhoneticRegistry.add('metaphone', Metaphone);
2159
+ PhoneticMappingRegistry.add('metaphone', 'en90', {
2160
+ map: {
2161
+ a: 'A',
2162
+ b: 'B',
2163
+ c: 'K',
2164
+ d: 'T',
2165
+ e: 'E',
2166
+ f: 'F',
2167
+ g: 'K',
2168
+ h: 'H',
2169
+ i: 'I',
2170
+ j: 'J',
2171
+ k: 'K',
2172
+ l: 'L',
2173
+ m: 'M',
2174
+ n: 'N',
2175
+ o: 'O',
2176
+ p: 'P',
2177
+ q: 'K',
2178
+ r: 'R',
2179
+ s: 'S',
2180
+ t: 'T',
2181
+ u: 'U',
2182
+ v: 'F',
2183
+ w: 'W',
2184
+ x: 'KS',
2185
+ y: 'Y',
2186
+ z: 'S'
2187
+ },
2188
+ ruleset: [
2189
+ { char: 'a', position: 'start', next: ['e'], code: '' },
2190
+ { char: 'g', position: 'start', next: ['n'], code: '' },
2191
+ { char: 'k', position: 'start', next: ['n'], code: '' },
2192
+ { char: 'p', position: 'start', next: ['n'], code: '' },
2193
+ { char: 'w', position: 'start', next: ['r'], code: '' },
2194
+ { char: 'b', position: 'end', prev: ['m'], code: '' },
2195
+ { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' },
2196
+ { char: 'c', next: ['i'], next2: ['a'], code: 'X' },
2197
+ { char: 'c', next: ['e', 'i', 'y'], code: 'S' },
2198
+ { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' },
2199
+ {
2200
+ char: 'g',
2201
+ next: ['h'],
2202
+ next2Not: ['', 'a', 'e', 'i', 'o', 'u'],
2203
+ code: ''
2204
+ },
2205
+ { char: 'g', trailing: 'n', code: '' },
2206
+ { char: 'g', trailing: 'ned', code: '' },
2207
+ { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' },
2208
+ {
2209
+ char: 'h',
2210
+ prev: ['a', 'e', 'i', 'o', 'u'],
2211
+ nextNot: ['a', 'e', 'i', 'o', 'u'],
2212
+ code: ''
2213
+ },
2214
+ { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' },
2215
+ { char: 'k', prev: ['c'], code: '' },
2216
+ { char: 'p', next: ['h'], code: 'F' },
2217
+ { char: 's', next: ['h'], code: 'X' },
2218
+ { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' },
2219
+ { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' },
2220
+ { char: 't', next: ['h'], code: '0' },
2221
+ { char: 't', next: ['c'], next2: ['h'], code: '' },
2222
+ { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
2223
+ { char: 'h', leading: 'w', code: '' },
2224
+ { char: 'x', position: 'start', code: 'S' },
2225
+ { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }
2226
+ ]
2227
+ });
2228
+
2229
+ class Soundex extends Phonetic {
2230
+ static default = {
2231
+ map: 'en',
2232
+ delimiter: ' ',
2233
+ length: 4,
2234
+ pad: '0',
2235
+ dedupe: true
2236
+ };
2237
+ constructor(opt = {}) {
2238
+ super('soundex', opt);
2239
+ }
2240
+ adjustCode(code, chars) {
2241
+ return chars[0].toUpperCase() + code.slice(1).replaceAll('0', '');
2242
+ }
2243
+ }
2244
+ PhoneticRegistry.add('soundex', Soundex);
2245
+ PhoneticMappingRegistry.add('soundex', 'en', {
2246
+ map: {
2247
+ a: '0',
2248
+ e: '0',
2249
+ h: '0',
2250
+ i: '0',
2251
+ o: '0',
2252
+ u: '0',
2253
+ w: '0',
2254
+ y: '0',
2255
+ b: '1',
2256
+ f: '1',
2257
+ p: '1',
2258
+ v: '1',
2259
+ c: '2',
2260
+ g: '2',
2261
+ j: '2',
2262
+ k: '2',
2263
+ q: '2',
2264
+ s: '2',
2265
+ x: '2',
2266
+ z: '2',
2267
+ d: '3',
2268
+ t: '3',
2269
+ l: '4',
2270
+ m: '5',
2271
+ n: '5',
2272
+ r: '6'
2273
+ }
2274
+ });
2275
+ PhoneticMappingRegistry.add('soundex', 'de', {
2276
+ map: {
2277
+ a: '0',
2278
+ ä: '0',
2279
+ e: '0',
2280
+ h: '0',
2281
+ i: '0',
2282
+ j: '0',
2283
+ o: '0',
2284
+ ö: '0',
2285
+ u: '0',
2286
+ ü: '0',
2287
+ y: '0',
2288
+ b: '1',
2289
+ f: '1',
2290
+ p: '1',
2291
+ v: '1',
2292
+ w: '1',
2293
+ c: '2',
2294
+ g: '2',
2295
+ k: '2',
2296
+ q: '2',
2297
+ s: '2',
2298
+ ß: '2',
2299
+ x: '2',
2300
+ z: '2',
2301
+ d: '3',
2302
+ t: '3',
2303
+ l: '4',
2304
+ m: '5',
2305
+ n: '5',
2306
+ r: '6'
2307
+ },
2308
+ ruleset: [{ char: 'c', next: ['h'], code: '7' }]
2309
+ });
2310
+
2311
+ class OptionsValidator {
2312
+ static ALLOWED_FLAGS = new Set([
2313
+ 'd',
2314
+ 'u',
2315
+ 'x',
2316
+ 'w',
2317
+ 't',
2318
+ 'r',
2319
+ 's',
2320
+ 'k',
2321
+ 'n',
2322
+ 'i'
2323
+ ]);
2324
+ static ALLOWED_OUTPUT = new Set(['orig', 'prep']);
2325
+ static ALLOWED_MODES = new Set(['default', 'batch', 'single', 'pairwise']);
2326
+ static ALLOWED_SORT = new Set(['asc', 'desc']);
2327
+ static PROCESSORS = {
2328
+ phonetic: (opt) => {
2329
+ if (!opt) return;
2330
+ OptionsValidator.validatePhoneticName(opt.algo);
2331
+ OptionsValidator.validatePhoneticOptions(opt.opt);
2332
+ }
2333
+ };
2334
+ static METRIC_OPT_MAP = {
2335
+ mode: (v) => OptionsValidator.validateMode(v),
2336
+ delimiter: (v) => OptionsValidator.validateString(v, 'opt.delimiter'),
2337
+ pad: (v) => OptionsValidator.validateString(v, 'opt.pad'),
2338
+ q: (v) => OptionsValidator.validateNumber(v, 'opt.q'),
2339
+ match: (v) => OptionsValidator.validateNumber(v, 'opt.match'),
2340
+ mismatch: (v) => OptionsValidator.validateNumber(v, 'opt.mismatch'),
2341
+ gap: (v) => OptionsValidator.validateNumber(v, 'opt.gap')
2342
+ };
2343
+ static PHONETIC_OPT_MAP = {
2344
+ map: (v) =>
2345
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.map'),
2346
+ delimiter: (v) =>
2347
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.delimiter'),
2348
+ length: (v) =>
2349
+ OptionsValidator.validateNumber(v, 'processors.phonetic.opt.length'),
2350
+ pad: (v) =>
2351
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.pad'),
2352
+ dedupe: (v) =>
2353
+ OptionsValidator.validateBoolean(v, 'processors.phonetic.opt.dedupe'),
2354
+ fallback: (v) =>
2355
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.fallback')
2356
+ };
2357
+ static CMPSTR_OPT_MAP = {
2358
+ raw: (v) => OptionsValidator.validateBoolean(v, 'raw'),
2359
+ removeZero: (v) => OptionsValidator.validateBoolean(v, 'removeZero'),
2360
+ safeEmpty: (v) => OptionsValidator.validateBoolean(v, 'safeEmpty'),
2361
+ flags: (v) => OptionsValidator.validateFlags(v),
2362
+ metric: (v) => OptionsValidator.validateMetricName(v),
2363
+ output: (v) => OptionsValidator.validateOutput(v),
2364
+ opt: (v) => OptionsValidator.validateMetricOptions(v),
2365
+ processors: (v) => OptionsValidator.validateProcessors(v),
2366
+ sort: (v) => OptionsValidator.validateSort(v, 'sort'),
2367
+ objectsOnly: (v) => OptionsValidator.validateBoolean(v, 'objectsOnly')
2368
+ };
2369
+ static set2string(set) {
2370
+ return Array.from(set).join(' | ');
2371
+ }
2372
+ static validateType(value, name, type) {
2373
+ if (value === undefined) return;
2374
+ if (typeof value !== type || (type === 'number' && Number.isNaN(value))) {
2375
+ throw new CmpStrValidationError(
2376
+ `Invalid option <${name}>: expected ${type}`,
2377
+ { name, value }
2378
+ );
2379
+ }
2380
+ }
2381
+ static validateEnum(value, name, set) {
2382
+ if (value === undefined) return;
2383
+ if (typeof value !== 'string' || !set.has(value)) {
2384
+ throw new CmpStrValidationError(
2385
+ `Invalid option <${name}>: expected ${OptionsValidator.set2string(set)}`,
2386
+ { name, value }
2387
+ );
2388
+ }
2389
+ }
2390
+ static validateMap(opt, map) {
2391
+ if (!opt) return;
2392
+ for (const k in opt) {
2393
+ const fn = map[k];
2394
+ if (!fn)
2395
+ throw new CmpStrValidationError(`Invalid option <${k}>`, {
2396
+ option: k,
2397
+ value: map[k]
2398
+ });
2399
+ fn(opt[k]);
2400
+ }
2401
+ }
2402
+ static validateRegistryName(value, name, label, has, list) {
2403
+ if (value === undefined) return;
2404
+ if (typeof value !== 'string' || value.length === 0)
2405
+ throw new CmpStrValidationError(
2406
+ `Invalid option <${name}>: expected non-empty string`,
2407
+ { name, value }
2408
+ );
2409
+ if (!has(value))
2410
+ throw new CmpStrValidationError(
2411
+ `${label} <${value}> is not registered`,
2412
+ { name, value, available: list() }
2413
+ );
2414
+ }
2415
+ static validateBoolean(value, name) {
2416
+ OptionsValidator.validateType(value, name, 'boolean');
2417
+ }
2418
+ static validateNumber(value, name) {
2419
+ OptionsValidator.validateType(value, name, 'number');
2420
+ }
2421
+ static validateString(value, name) {
2422
+ OptionsValidator.validateType(value, name, 'string');
2423
+ }
2424
+ static validateFlags(value) {
2425
+ if (value === undefined) return;
2426
+ if (typeof value !== 'string')
2427
+ throw new CmpStrValidationError(
2428
+ `Invalid option <flags>: expected string`,
2429
+ { flags: value }
2430
+ );
2431
+ for (let i = 0; i < value.length; i++) {
2432
+ const ch = value[i];
2433
+ if (!OptionsValidator.ALLOWED_FLAGS.has(ch))
2434
+ throw new CmpStrValidationError(
2435
+ `Invalid normalization flag <${ch}> in <flags>: expected ${OptionsValidator.set2string(OptionsValidator.ALLOWED_FLAGS)}`,
2436
+ { flags: value, invalid: ch }
2437
+ );
2438
+ }
2439
+ }
2440
+ static validateOutput(value) {
2441
+ OptionsValidator.validateEnum(
2442
+ value,
2443
+ 'output',
2444
+ OptionsValidator.ALLOWED_OUTPUT
2445
+ );
2446
+ }
2447
+ static validateMode(value) {
2448
+ OptionsValidator.validateEnum(
2449
+ value,
2450
+ 'mode',
2451
+ OptionsValidator.ALLOWED_MODES
2452
+ );
2453
+ }
2454
+ static validateSort(value, name) {
2455
+ if (value === undefined || typeof value === 'boolean') return;
2456
+ OptionsValidator.validateEnum(value, name, OptionsValidator.ALLOWED_SORT);
2457
+ }
2458
+ static validateMetricName(value) {
2459
+ OptionsValidator.validateRegistryName(
2460
+ value,
2461
+ 'metric',
2462
+ 'Comparison metric',
2463
+ MetricRegistry.has,
2464
+ MetricRegistry.list
2465
+ );
2466
+ }
2467
+ static validatePhoneticName(value) {
2468
+ OptionsValidator.validateRegistryName(
2469
+ value,
2470
+ 'phonetic',
2471
+ 'Phonetic algorithm',
2472
+ PhoneticRegistry.has,
2473
+ PhoneticRegistry.list
2474
+ );
2475
+ }
2476
+ static validateMetricOptions(opt) {
2477
+ OptionsValidator.validateMap(opt, OptionsValidator.METRIC_OPT_MAP);
2478
+ }
2479
+ static validatePhoneticOptions(opt) {
2480
+ OptionsValidator.validateMap(opt, OptionsValidator.PHONETIC_OPT_MAP);
2481
+ }
2482
+ static validateProcessors(opt) {
2483
+ if (!opt) return;
2484
+ for (const key in opt) {
2485
+ const fn = OptionsValidator.PROCESSORS[key];
2486
+ if (!fn)
2487
+ throw new CmpStrValidationError(
2488
+ `Invalid processor type <${key}> in <processors>: expected ${Object.keys(OptionsValidator.PROCESSORS).join(' | ')}`,
2489
+ { processors: opt, invalid: key }
2490
+ );
2491
+ fn(opt[key]);
2492
+ }
2493
+ }
2494
+ static validateOptions(opt) {
2495
+ OptionsValidator.validateMap(opt, OptionsValidator.CMPSTR_OPT_MAP);
2496
+ }
2497
+ }
2498
+
2499
+ class StructuredData {
2500
+ data;
2501
+ key;
2502
+ static SORT_ASC = (a, b) => a.res - b.res;
2503
+ static SORT_DESC = (a, b) => b.res - a.res;
2504
+ static create(data, key) {
2505
+ return new StructuredData(data, key);
2506
+ }
2507
+ constructor(data, key) {
2508
+ this.data = data;
2509
+ this.key = key;
2510
+ }
2511
+ extractFrom(arr, key) {
2512
+ const n = arr.length;
2513
+ const result = new Array(n);
2514
+ for (let i = 0; i < n; i++) {
2515
+ const val = arr[i][key];
2516
+ result[i] = val != null ? String(val) : '';
2517
+ }
2518
+ return result;
2519
+ }
2520
+ extract() {
2521
+ return this.extractFrom(this.data, this.key);
2522
+ }
2523
+ isMetricResult(v) {
2524
+ return (
2525
+ typeof v === 'object' &&
2526
+ v !== null &&
2527
+ 'a' in v &&
2528
+ 'b' in v &&
2529
+ 'res' in v
2530
+ );
2531
+ }
2532
+ isCmpStrResult(v) {
2533
+ return (
2534
+ typeof v === 'object' &&
2535
+ v !== null &&
2536
+ 'source' in v &&
2537
+ 'target' in v &&
2538
+ 'match' in v
2539
+ );
2540
+ }
2541
+ normalizeResults(results) {
2542
+ if (!Array.isArray(results) || results.length === 0) return [];
2543
+ const first = results[0];
2544
+ let out = new Array(results.length);
2545
+ if (this.isMetricResult(first)) {
2546
+ const src = results;
2547
+ for (let i = 0; i < src.length; i++) out[i] = { ...src[i], __idx: i };
2548
+ } else if (this.isCmpStrResult(first)) {
2549
+ const src = results;
2550
+ for (let i = 0; i < src.length; i++) {
2551
+ const r = src[i];
2552
+ out[i] = {
2553
+ metric: 'unknown',
2554
+ a: r.source,
2555
+ b: r.target,
2556
+ res: r.match,
2557
+ raw: r.raw,
2558
+ __idx: i
2559
+ };
2560
+ }
2561
+ } else
2562
+ throw new CmpStrValidationError(
2563
+ 'Unsupported result format for StructuredData normalization.'
2564
+ );
2565
+ return out;
2566
+ }
2567
+ rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
2568
+ const m = extractedStrings.length,
2569
+ n = results.length;
2570
+ const stringToIndices = Pool.acquire('map', m);
2571
+ const occurrenceCount = Pool.acquire('map', n);
2572
+ const output = new Array(n);
2573
+ stringToIndices.clear();
2574
+ occurrenceCount.clear();
2575
+ try {
2576
+ for (let i = 0; i < m; i++) {
2577
+ const str = extractedStrings[i];
2578
+ let arr = stringToIndices.get(str);
2579
+ if (!arr) {
2580
+ arr = [];
2581
+ stringToIndices.set(str, arr);
2582
+ }
2583
+ arr.push(i);
2584
+ }
2585
+ let out = 0;
2586
+ for (let i = 0; i < n; i++) {
2587
+ const result = results[i];
2588
+ if (removeZero && result.res === 0) continue;
2589
+ const targetStr = result.b || '';
2590
+ const indices = stringToIndices.get(targetStr);
2591
+ let dataIndex;
2592
+ if (indices && indices.length > 0) {
2593
+ const occurrence = occurrenceCount.get(targetStr) ?? 0;
2594
+ occurrenceCount.set(targetStr, occurrence + 1);
2595
+ dataIndex = indices[occurrence % indices.length];
2596
+ } else {
2597
+ dataIndex = result.__idx ?? i;
2598
+ }
2599
+ if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
2600
+ const sourceObj = sourceData[dataIndex];
2601
+ const mappedTarget = extractedStrings[dataIndex] || targetStr;
2602
+ if (objectsOnly) output[out++] = sourceObj;
2603
+ else
2604
+ output[out++] = {
2605
+ obj: sourceObj,
2606
+ key: this.key,
2607
+ result: {
2608
+ source: result.a,
2609
+ target: mappedTarget,
2610
+ match: result.res
2611
+ },
2612
+ ...(result.raw ? { raw: result.raw } : null)
2613
+ };
2614
+ }
2615
+ output.length = out;
2616
+ return output;
2617
+ } finally {
2618
+ Pool.release('map', stringToIndices, m);
2619
+ Pool.release('map', occurrenceCount, n);
2620
+ }
2621
+ }
2622
+ sort(results, sort) {
2623
+ if (!sort || results.length <= 1) return results;
2624
+ return results.sort(
2625
+ sort === 'asc' ? StructuredData.SORT_ASC : StructuredData.SORT_DESC
2626
+ );
2627
+ }
2628
+ finalizeLookup(results, extractedStrings, opt) {
2629
+ return this.rebuild(
2630
+ this.sort(this.normalizeResults(results), opt?.sort),
2631
+ this.data,
2632
+ extractedStrings,
2633
+ opt?.removeZero,
2634
+ opt?.objectsOnly
2635
+ );
2636
+ }
2637
+ performLookup(fn, extractedStrings, opt) {
2638
+ return ErrorUtil.wrap(
2639
+ () => this.finalizeLookup(fn(), extractedStrings, opt),
2640
+ 'StructuredData lookup failed',
2641
+ { key: this.key }
2642
+ );
2643
+ }
2644
+ async performLookupAsync(fn, extractedStrings, opt) {
2645
+ return await ErrorUtil.wrapAsync(
2646
+ async () => this.finalizeLookup(await fn(), extractedStrings, opt),
2647
+ 'StructuredData async lookup failed',
2648
+ { key: this.key }
2649
+ );
2650
+ }
2651
+ lookup(fn, query, opt) {
2652
+ const b = this.extract();
2653
+ try {
2654
+ return this.performLookup(() => fn(query, b, opt), b, opt);
2655
+ } finally {
2656
+ Pool.release('string[]', b, b.length);
2657
+ }
2658
+ }
2659
+ async lookupAsync(fn, query, opt) {
2660
+ const b = this.extract();
2661
+ try {
2662
+ return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
2663
+ } finally {
2664
+ Pool.release('string[]', b, b.length);
2665
+ }
2666
+ }
2667
+ lookupPairs(fn, other, otherKey, opt) {
2668
+ const a = this.extract();
2669
+ const b = this.extractFrom(other, otherKey);
2670
+ try {
2671
+ return this.performLookup(() => fn(a, b, opt), a, opt);
2672
+ } finally {
2673
+ Pool.release('string[]', a, a.length);
2674
+ Pool.release('string[]', b, b.length);
2675
+ }
2676
+ }
2677
+ async lookupPairsAsync(fn, other, otherKey, opt) {
2678
+ const a = this.extract();
2679
+ const b = this.extractFrom(other, otherKey);
2680
+ try {
2681
+ return await this.performLookupAsync(() => fn(a, b, opt), a, opt);
2682
+ } finally {
2683
+ Pool.release('string[]', a, a.length);
2684
+ Pool.release('string[]', b, b.length);
2685
+ }
2686
+ }
2687
+ }
2688
+
2689
+ class TextAnalyzer {
2690
+ static REGEX = {
2691
+ number: /\d/,
2692
+ sentence: /(?<=[.!?])\s+/,
2693
+ word: /\p{L}+/gu,
2694
+ nonWord: /[^\p{L}]/gu,
2695
+ vowelGroup: /[aeiouy]+/g,
2696
+ letter: /\p{L}/gu,
2697
+ ucLetter: /\p{Lu}/gu
2698
+ };
2699
+ text;
2700
+ words = [];
2701
+ sentences = [];
2702
+ charFrequency = new Map();
2703
+ wordHistogram = new Map();
2704
+ syllableCache = new Map();
2705
+ syllableStats;
2706
+ constructor(input) {
2707
+ this.text = input.trim();
2708
+ this.tokenize();
2709
+ this.computeFrequencies();
2710
+ }
2711
+ tokenize() {
2712
+ let match;
2713
+ const lcText = this.text.toLowerCase();
2714
+ while ((match = TextAnalyzer.REGEX.word.exec(lcText)) !== null)
2715
+ this.words.push(match[0]);
2716
+ this.sentences = this.text
2717
+ .split(TextAnalyzer.REGEX.sentence)
2718
+ .filter(Boolean);
2719
+ }
2720
+ computeFrequencies() {
2721
+ for (const char of this.text)
2722
+ this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
2723
+ for (const word of this.words)
2724
+ this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
2725
+ }
2726
+ estimateSyllables(word) {
2727
+ const clean = word
2728
+ .normalize('NFC')
2729
+ .toLowerCase()
2730
+ .replace(TextAnalyzer.REGEX.nonWord, '');
2731
+ if (this.syllableCache.has(clean)) return this.syllableCache.get(clean);
2732
+ const matches = clean.match(TextAnalyzer.REGEX.vowelGroup);
2733
+ const count = matches ? matches.length : 1;
2734
+ this.syllableCache.set(clean, count);
2735
+ return count;
2736
+ }
2737
+ computeSyllableStats() {
2738
+ return (this.syllableStats ||= (() => {
2739
+ const perWord = this.words
2740
+ .map((w) => this.estimateSyllables(w))
2741
+ .sort((a, b) => a - b);
2742
+ const total = perWord.reduce((sum, s) => sum + s, 0);
2743
+ const mono = perWord.filter((s) => s === 1).length;
2744
+ const median = !perWord.length
2745
+ ? 0
2746
+ : perWord.length % 2 === 0
2747
+ ? (perWord[perWord.length / 2 - 1] + perWord[perWord.length / 2]) /
2748
+ 2
2749
+ : perWord[Math.floor(perWord.length / 2)];
2750
+ return {
2751
+ total,
2752
+ mono,
2753
+ perWord,
2754
+ avg: perWord.length ? total / perWord.length : 0,
2755
+ median
2756
+ };
2757
+ })());
2758
+ }
2759
+ getLength = () => this.text.length;
2760
+ getWordCount = () => this.words.length;
2761
+ getSentenceCount = () => this.sentences.length;
2762
+ getAvgWordLength() {
2763
+ return this.words.length
2764
+ ? this.words.join('').length / this.words.length
2765
+ : 0;
2766
+ }
2767
+ getAvgSentenceLength() {
2768
+ return this.sentences.length
2769
+ ? this.words.length / this.sentences.length
2770
+ : 0;
2771
+ }
2772
+ getWordHistogram() {
2773
+ return Object.fromEntries(this.wordHistogram);
2774
+ }
2775
+ getMostCommonWords(limit = 5) {
2776
+ return [...this.wordHistogram.entries()]
2777
+ .sort((a, b) => b[1] - a[1])
2778
+ .slice(0, limit)
2779
+ .map((e) => e[0]);
2780
+ }
2781
+ getHapaxLegomena() {
2782
+ return [...this.wordHistogram.entries()]
2783
+ .filter(([, c]) => c === 1)
2784
+ .map((e) => e[0]);
2785
+ }
2786
+ hasNumbers = () => TextAnalyzer.REGEX.number.test(this.text);
2787
+ getUpperCaseRatio() {
2788
+ const matches = this.text.match(TextAnalyzer.REGEX.letter) || [];
2789
+ const upper = this.text.match(TextAnalyzer.REGEX.ucLetter)?.length || 0;
2790
+ return matches.length ? upper / matches.length : 0;
2791
+ }
2792
+ getCharFrequency() {
2793
+ return Object.fromEntries(this.charFrequency);
2794
+ }
2795
+ getUnicodeCodepoints() {
2796
+ const result = {};
2797
+ for (const [char, count] of this.charFrequency) {
2798
+ const block = char
2799
+ .charCodeAt(0)
2800
+ .toString(16)
2801
+ .padStart(4, '0')
2802
+ .toUpperCase();
2803
+ result[block] = (result[block] || 0) + count;
2804
+ }
2805
+ return result;
2806
+ }
2807
+ getLongWordRatio(len = 7) {
2808
+ let long = 0;
2809
+ for (const w of this.words) if (w.length >= len) long++;
2810
+ return this.words.length ? long / this.words.length : 0;
2811
+ }
2812
+ getShortWordRatio(len = 3) {
2813
+ let short = 0;
2814
+ for (const w of this.words) if (w.length <= len) short++;
2815
+ return this.words.length ? short / this.words.length : 0;
2816
+ }
2817
+ getSyllablesCount() {
2818
+ return this.computeSyllableStats().total;
2819
+ }
2820
+ getMonosyllabicWordCount() {
2821
+ return this.computeSyllableStats().mono;
2822
+ }
2823
+ getMinSyllablesWordCount(min) {
2824
+ return this.computeSyllableStats().perWord.filter((w) => w >= min).length;
2825
+ }
2826
+ getMaxSyllablesWordCount(max) {
2827
+ return this.computeSyllableStats().perWord.filter((w) => w <= max).length;
2828
+ }
2829
+ getAvgSyllablesPerWord() {
2830
+ return this.computeSyllableStats().avg;
2831
+ }
2832
+ getMedianSyllablesPerWord() {
2833
+ return this.computeSyllableStats().median;
2834
+ }
2835
+ getHonoresR() {
2836
+ try {
2837
+ return (
2838
+ (100 * Math.log(this.words.length)) /
2839
+ (1 - this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1))
2840
+ );
2841
+ } catch {
2842
+ return 0;
2843
+ }
2844
+ }
2845
+ getReadingTime(wpm = 200) {
2846
+ return this.words.length / (wpm ?? 1);
2126
2847
  }
2127
- adjustCode(code) {
2128
- return (
2129
- code.slice(0, 1) + code.slice(1).replace(Metaphone.REGEX.vowel, '')
2130
- );
2848
+ getReadabilityScore(metric = 'flesch') {
2849
+ const w = this.words.length || 1;
2850
+ const s = this.sentences.length || 1;
2851
+ const y = this.getSyllablesCount() || 1;
2852
+ const asl = w / s;
2853
+ const asw = y / w;
2854
+ switch (metric) {
2855
+ case 'flesch':
2856
+ return 206.835 - 1.015 * asl - 84.6 * asw;
2857
+ case 'fleschde':
2858
+ return 180 - asl - 58.5 * asw;
2859
+ case 'kincaid':
2860
+ return 0.39 * asl + 11.8 * asw - 15.59;
2861
+ }
2131
2862
  }
2132
- }
2133
- PhoneticRegistry.add('metaphone', Metaphone);
2134
- PhoneticMappingRegistry.add('metaphone', 'en90', {
2135
- map: {
2136
- a: 'A',
2137
- b: 'B',
2138
- c: 'K',
2139
- d: 'T',
2140
- e: 'E',
2141
- f: 'F',
2142
- g: 'K',
2143
- h: 'H',
2144
- i: 'I',
2145
- j: 'J',
2146
- k: 'K',
2147
- l: 'L',
2148
- m: 'M',
2149
- n: 'N',
2150
- o: 'O',
2151
- p: 'P',
2152
- q: 'K',
2153
- r: 'R',
2154
- s: 'S',
2155
- t: 'T',
2156
- u: 'U',
2157
- v: 'F',
2158
- w: 'W',
2159
- x: 'KS',
2160
- y: 'Y',
2161
- z: 'S'
2162
- },
2163
- ruleset: [
2164
- { char: 'a', position: 'start', next: ['e'], code: '' },
2165
- { char: 'g', position: 'start', next: ['n'], code: '' },
2166
- { char: 'k', position: 'start', next: ['n'], code: '' },
2167
- { char: 'p', position: 'start', next: ['n'], code: '' },
2168
- { char: 'w', position: 'start', next: ['r'], code: '' },
2169
- { char: 'b', position: 'end', prev: ['m'], code: '' },
2170
- { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' },
2171
- { char: 'c', next: ['i'], next2: ['a'], code: 'X' },
2172
- { char: 'c', next: ['e', 'i', 'y'], code: 'S' },
2173
- { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' },
2174
- {
2175
- char: 'g',
2176
- next: ['h'],
2177
- next2Not: ['', 'a', 'e', 'i', 'o', 'u'],
2178
- code: ''
2179
- },
2180
- { char: 'g', trailing: 'n', code: '' },
2181
- { char: 'g', trailing: 'ned', code: '' },
2182
- { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' },
2183
- {
2184
- char: 'h',
2185
- prev: ['a', 'e', 'i', 'o', 'u'],
2186
- nextNot: ['a', 'e', 'i', 'o', 'u'],
2187
- code: ''
2188
- },
2189
- { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' },
2190
- { char: 'k', prev: ['c'], code: '' },
2191
- { char: 'p', next: ['h'], code: 'F' },
2192
- { char: 's', next: ['h'], code: 'X' },
2193
- { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' },
2194
- { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' },
2195
- { char: 't', next: ['h'], code: '0' },
2196
- { char: 't', next: ['c'], next2: ['h'], code: '' },
2197
- { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
2198
- { char: 'h', leading: 'w', code: '' },
2199
- { char: 'x', position: 'start', code: 'S' },
2200
- { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }
2201
- ]
2202
- });
2203
-
2204
- class Soundex extends Phonetic {
2205
- static default = {
2206
- map: 'en',
2207
- delimiter: ' ',
2208
- length: 4,
2209
- pad: '0',
2210
- dedupe: true
2211
- };
2212
- constructor(opt = {}) {
2213
- super('soundex', opt);
2863
+ getLIXScore() {
2864
+ const w = this.words.length || 1;
2865
+ const s = this.sentences.length || 1;
2866
+ const l = this.getLongWordRatio() * w;
2867
+ return w / s + (l / w) * 100;
2214
2868
  }
2215
- adjustCode(code, chars) {
2216
- return chars[0].toUpperCase() + code.slice(1).replaceAll('0', '');
2869
+ getWSTFScore() {
2870
+ const w = this.words.length || 1;
2871
+ const h = (this.getMinSyllablesWordCount(3) / w) * 100;
2872
+ const s = this.getAvgSentenceLength();
2873
+ const l = this.getLongWordRatio() * 100;
2874
+ const m = (this.getMonosyllabicWordCount() / w) * 100;
2875
+ return [
2876
+ 0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.875,
2877
+ 0.2007 * h + 0.1682 * s + 0.1373 * l - 2.779,
2878
+ 0.2963 * h + 0.1905 * s - 1.1144,
2879
+ 0.2744 * h + 0.2656 * s - 1.693
2880
+ ];
2217
2881
  }
2218
2882
  }
2219
- PhoneticRegistry.add('soundex', Soundex);
2220
- PhoneticMappingRegistry.add('soundex', 'en', {
2221
- map: {
2222
- a: '0',
2223
- e: '0',
2224
- h: '0',
2225
- i: '0',
2226
- o: '0',
2227
- u: '0',
2228
- w: '0',
2229
- y: '0',
2230
- b: '1',
2231
- f: '1',
2232
- p: '1',
2233
- v: '1',
2234
- c: '2',
2235
- g: '2',
2236
- j: '2',
2237
- k: '2',
2238
- q: '2',
2239
- s: '2',
2240
- x: '2',
2241
- z: '2',
2242
- d: '3',
2243
- t: '3',
2244
- l: '4',
2245
- m: '5',
2246
- n: '5',
2247
- r: '6'
2248
- }
2249
- });
2250
- PhoneticMappingRegistry.add('soundex', 'de', {
2251
- map: {
2252
- a: '0',
2253
- ä: '0',
2254
- e: '0',
2255
- h: '0',
2256
- i: '0',
2257
- j: '0',
2258
- o: '0',
2259
- ö: '0',
2260
- u: '0',
2261
- ü: '0',
2262
- y: '0',
2263
- b: '1',
2264
- f: '1',
2265
- p: '1',
2266
- v: '1',
2267
- w: '1',
2268
- c: '2',
2269
- g: '2',
2270
- k: '2',
2271
- q: '2',
2272
- s: '2',
2273
- ß: '2',
2274
- x: '2',
2275
- z: '2',
2276
- d: '3',
2277
- t: '3',
2278
- l: '4',
2279
- m: '5',
2280
- n: '5',
2281
- r: '6'
2282
- },
2283
- ruleset: [{ char: 'c', next: ['h'], code: '7' }]
2284
- });
2285
2883
 
2286
2884
  const profiler = Profiler.getInstance();
2287
2885
  class CmpStr {
@@ -2333,29 +2931,26 @@
2333
2931
  }
2334
2932
  assert(cond, test) {
2335
2933
  switch (cond) {
2934
+ default:
2935
+ throw new CmpStrInternalError(`Cmpstr condition <${cond}> unknown`);
2336
2936
  case 'metric':
2337
- if (!CmpStr.metric.has(test))
2338
- throw new Error(
2339
- `CmpStr <metric> must be set, call .setMetric(), ` +
2340
- `use CmpStr.metric.list() for available metrics`
2341
- );
2937
+ OptionsValidator.validateMetricName(test);
2342
2938
  break;
2343
2939
  case 'phonetic':
2344
- if (!CmpStr.phonetic.has(test))
2345
- throw new Error(
2346
- `CmpStr <phonetic> must be set, call .setPhonetic(), ` +
2347
- `use CmpStr.phonetic.list() for available phonetic algorithms`
2348
- );
2940
+ OptionsValidator.validatePhoneticName(test);
2349
2941
  break;
2350
- default:
2351
- throw new Error(`Cmpstr condition <${cond}> unknown`);
2352
2942
  }
2353
2943
  }
2354
2944
  assertMany(...cond) {
2355
2945
  for (const [c, test] of cond) this.assert(c, test);
2356
2946
  }
2357
2947
  resolveOptions(opt) {
2358
- return merge({ ...(this.options ?? Object.create(null)) }, opt);
2948
+ const merged = DeepMerge.merge(
2949
+ { ...(this.options ?? Object.create(null)) },
2950
+ opt
2951
+ );
2952
+ OptionsValidator.validateOptions(merged);
2953
+ return merged;
2359
2954
  }
2360
2955
  normalize(input, flags) {
2361
2956
  return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
@@ -2371,7 +2966,7 @@
2371
2966
  return input;
2372
2967
  }
2373
2968
  postProcess(result, opt) {
2374
- if (opt?.removeZero && Array.isArray(result))
2969
+ if (Array.isArray(result) && opt?.removeZero)
2375
2970
  result = result.filter((r) => r.res > 0);
2376
2971
  return result;
2377
2972
  }
@@ -2389,65 +2984,114 @@
2389
2984
  compute(a, b, opt, mode, raw, skip) {
2390
2985
  const resolved = this.resolveOptions(opt);
2391
2986
  this.assert('metric', resolved.metric);
2392
- const A = skip ? a : this.prepare(a, resolved);
2393
- const B = skip ? b : this.prepare(b, resolved);
2394
- if (
2395
- resolved.safeEmpty &&
2396
- ((Array.isArray(A) && A.length === 0) ||
2397
- (Array.isArray(B) && B.length === 0) ||
2398
- A === '' ||
2399
- B === '')
2400
- ) {
2401
- return [];
2402
- }
2403
- const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
2404
- if (resolved.output !== 'prep') metric.setOriginal(a, b);
2405
- metric.run(mode);
2406
- const result = this.postProcess(metric.getResults(), resolved);
2407
- return this.output(result, raw ?? resolved.raw);
2987
+ return ErrorUtil.wrap(
2988
+ () => {
2989
+ const A = skip ? a : this.prepare(a, resolved);
2990
+ const B = skip ? b : this.prepare(b, resolved);
2991
+ if (
2992
+ resolved.safeEmpty &&
2993
+ ((Array.isArray(A) && A.length === 0) ||
2994
+ (Array.isArray(B) && B.length === 0) ||
2995
+ A === '' ||
2996
+ B === '')
2997
+ ) {
2998
+ return [];
2999
+ }
3000
+ const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
3001
+ if (resolved.output !== 'prep') metric.setOriginal(a, b);
3002
+ metric.run(mode);
3003
+ const result = this.postProcess(metric.getResults(), resolved);
3004
+ return this.output(result, raw ?? resolved.raw);
3005
+ },
3006
+ `Failed to compute metric <${resolved.metric}> for the given inputs`,
3007
+ { a, b, options: opt }
3008
+ );
2408
3009
  }
2409
3010
  output(result, raw) {
2410
- return (raw ?? this.options.raw)
2411
- ? result
2412
- : Array.isArray(result)
2413
- ? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
2414
- : { source: result.a, target: result.b, match: result.res };
2415
- }
2416
- clone = () =>
2417
- Object.assign(Object.create(Object.getPrototypeOf(this)), this);
3011
+ return ErrorUtil.wrap(
3012
+ () =>
3013
+ (raw ?? this.options.raw)
3014
+ ? result
3015
+ : Array.isArray(result)
3016
+ ? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
3017
+ : { source: result.a, target: result.b, match: result.res },
3018
+ `Failed to resolve output format for the metric result`,
3019
+ { result, raw }
3020
+ );
3021
+ }
3022
+ clone() {
3023
+ const inst = Object.assign(
3024
+ Object.create(Object.getPrototypeOf(this)),
3025
+ this
3026
+ );
3027
+ inst.options = DeepMerge.merge(Object.create(null), this.options);
3028
+ return inst;
3029
+ }
2418
3030
  reset() {
2419
- for (const k in this.options) delete this.options[k];
3031
+ this.options = Object.create(null);
2420
3032
  return this;
2421
3033
  }
2422
3034
  setOptions(opt) {
3035
+ OptionsValidator.validateOptions(opt);
2423
3036
  this.options = opt;
2424
3037
  return this;
2425
3038
  }
2426
3039
  mergeOptions(opt) {
2427
- merge(this.options, opt);
3040
+ DeepMerge.merge(this.options, opt);
3041
+ OptionsValidator.validateOptions(this.options);
2428
3042
  return this;
2429
3043
  }
2430
3044
  setSerializedOptions(opt) {
2431
- this.options = JSON.parse(opt);
2432
- return this;
3045
+ try {
3046
+ const parsed = JSON.parse(opt);
3047
+ OptionsValidator.validateOptions(parsed);
3048
+ this.options = parsed;
3049
+ return this;
3050
+ } catch (err) {
3051
+ if (err instanceof SyntaxError)
3052
+ throw new CmpStrValidationError(
3053
+ `Failed to parse serialized options, invalid JSON string`,
3054
+ { opt, error: err instanceof Error ? err.message : String(err) }
3055
+ );
3056
+ throw err;
3057
+ }
2433
3058
  }
2434
3059
  setOption(path, value) {
2435
- set(this.options, path, value);
3060
+ DeepMerge.set(this.options, path, value);
3061
+ OptionsValidator.validateOptions(this.options);
2436
3062
  return this;
2437
3063
  }
2438
3064
  rmvOption(path) {
2439
- rmv(this.options, path);
3065
+ DeepMerge.rmv(this.options, path);
2440
3066
  return this;
2441
3067
  }
2442
- setRaw = (enable) => this.setOption('raw', enable);
2443
- setMetric = (name) => this.setOption('metric', name);
2444
- setFlags = (flags) => this.setOption('flags', flags);
2445
- rmvFlags = () => this.rmvOption('flags');
2446
- setProcessors = (opt) => this.setOption('processors', opt);
2447
- rmvProcessors = () => this.rmvOption('processors');
2448
- getOptions = () => this.options;
2449
- getSerializedOptions = () => JSON.stringify(this.options);
2450
- getOption = (path) => get(this.options, path);
3068
+ setRaw(enable) {
3069
+ return this.setOption('raw', enable);
3070
+ }
3071
+ setMetric(name) {
3072
+ return this.setOption('metric', name);
3073
+ }
3074
+ setFlags(flags) {
3075
+ return this.setOption('flags', flags);
3076
+ }
3077
+ rmvFlags() {
3078
+ return this.rmvOption('flags');
3079
+ }
3080
+ setProcessors(opt) {
3081
+ return this.setOption('processors', opt);
3082
+ }
3083
+ rmvProcessors() {
3084
+ return this.rmvOption('processors');
3085
+ }
3086
+ getOptions() {
3087
+ return this.options;
3088
+ }
3089
+ getSerializedOptions() {
3090
+ return JSON.stringify(this.options);
3091
+ }
3092
+ getOption(path) {
3093
+ return DeepMerge.get(this.options, path);
3094
+ }
2451
3095
  test(a, b, opt) {
2452
3096
  return this.compute(a, b, opt, 'single');
2453
3097
  }
@@ -2486,15 +3130,35 @@
2486
3130
  const resolved = this.resolveOptions({ flags, processors });
2487
3131
  const test = this.prepare(needle, resolved);
2488
3132
  const hstk = this.prepare(haystack, resolved);
2489
- return haystack.filter((_, i) => hstk[i].includes(test));
3133
+ const out = [];
3134
+ for (let i = 0, len = hstk.length; i < len; i++) {
3135
+ if (hstk[i].includes(test)) out.push(haystack[i]);
3136
+ }
3137
+ return out;
2490
3138
  }
2491
3139
  matrix(input, opt) {
2492
- input = this.prepare(input, this.resolveOptions(opt));
2493
- return input.map((a) =>
2494
- this.compute(a, input, undefined, 'batch', true, true).map(
2495
- (b) => b.res ?? 0
2496
- )
2497
- );
3140
+ const resolved = this.resolveOptions(opt);
3141
+ const arr = this.prepare(input, resolved);
3142
+ const n = arr.length;
3143
+ const out = Array.from({ length: n }, () => new Array(n).fill(0));
3144
+ for (let i = 0; i < n; i++)
3145
+ for (let j = i; j < n; j++) {
3146
+ if (i === j) {
3147
+ out[i][j] = 1;
3148
+ } else {
3149
+ const score = this.compute(
3150
+ arr[i],
3151
+ arr[j],
3152
+ resolved,
3153
+ 'single',
3154
+ true,
3155
+ true
3156
+ ).res;
3157
+ out[i][j] = score;
3158
+ out[j][i] = score;
3159
+ }
3160
+ }
3161
+ return out;
2498
3162
  }
2499
3163
  phoneticIndex(input, algo, opt) {
2500
3164
  const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
@@ -2577,22 +3241,28 @@
2577
3241
  async computeAsync(a, b, opt, mode, raw, skip) {
2578
3242
  const resolved = this.resolveOptions(opt);
2579
3243
  this.assert('metric', resolved.metric);
2580
- const A = skip ? a : await this.prepareAsync(a, resolved);
2581
- const B = skip ? b : await this.prepareAsync(b, resolved);
2582
- if (
2583
- resolved.safeEmpty &&
2584
- ((Array.isArray(A) && A.length === 0) ||
2585
- (Array.isArray(B) && B.length === 0) ||
2586
- A === '' ||
2587
- B === '')
2588
- ) {
2589
- return [];
2590
- }
2591
- const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
2592
- if (resolved.output !== 'prep') metric.setOriginal(a, b);
2593
- await metric.runAsync(mode);
2594
- const result = this.postProcess(metric.getResults(), resolved);
2595
- return this.output(result, raw ?? resolved.raw);
3244
+ return ErrorUtil.wrapAsync(
3245
+ async () => {
3246
+ const A = skip ? a : await this.prepareAsync(a, resolved);
3247
+ const B = skip ? b : await this.prepareAsync(b, resolved);
3248
+ if (
3249
+ resolved.safeEmpty &&
3250
+ ((Array.isArray(A) && A.length === 0) ||
3251
+ (Array.isArray(B) && B.length === 0) ||
3252
+ A === '' ||
3253
+ B === '')
3254
+ ) {
3255
+ return [];
3256
+ }
3257
+ const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
3258
+ if (resolved.output !== 'prep') metric.setOriginal(a, b);
3259
+ await metric.runAsync(mode);
3260
+ const result = this.postProcess(metric.getResults(), resolved);
3261
+ return this.output(result, raw ?? resolved.raw);
3262
+ },
3263
+ `Failed to compute metric <${opt?.metric ?? this.options.metric}> for the given inputs`,
3264
+ { a, b, opt }
3265
+ );
2596
3266
  }
2597
3267
  async testAsync(a, b, opt) {
2598
3268
  return this.computeAsync(a, b, opt, 'single');
@@ -2630,23 +3300,40 @@
2630
3300
  const resolved = this.resolveOptions({ flags, processors });
2631
3301
  const test = await this.prepareAsync(needle, resolved);
2632
3302
  const hstk = await this.prepareAsync(haystack, resolved);
2633
- return haystack.filter((_, i) => hstk[i].includes(test));
3303
+ const out = [];
3304
+ for (let i = 0; i < hstk.length; i++) {
3305
+ if (hstk[i].includes(test)) out.push(haystack[i]);
3306
+ }
3307
+ return out;
2634
3308
  }
2635
3309
  async matrixAsync(input, opt) {
2636
- input = await this.prepareAsync(input, this.resolveOptions(opt));
2637
- return Promise.all(
2638
- input.map(
2639
- async (a) =>
2640
- await this.computeAsync(
2641
- a,
2642
- input,
2643
- undefined,
2644
- 'batch',
2645
- true,
2646
- true
2647
- ).then((r) => r.map((b) => b.res ?? 0))
2648
- )
2649
- );
3310
+ const resolved = this.resolveOptions(opt);
3311
+ const arr = await this.prepareAsync(input, resolved);
3312
+ const n = arr.length;
3313
+ const out = Array.from({ length: n }, () => new Array(n).fill(0));
3314
+ for (let i = 0; i < n; i++) {
3315
+ await Promise.all(
3316
+ Array.from({ length: n - i }, (_, k) => i + k).map(async (j) => {
3317
+ if (i === j) {
3318
+ out[i][j] = 1;
3319
+ } else {
3320
+ const score = (
3321
+ await this.computeAsync(
3322
+ arr[i],
3323
+ arr[j],
3324
+ resolved,
3325
+ 'single',
3326
+ true,
3327
+ true
3328
+ )
3329
+ ).res;
3330
+ out[i][j] = score;
3331
+ out[j][i] = score;
3332
+ }
3333
+ })
3334
+ );
3335
+ }
3336
+ return out;
2650
3337
  }
2651
3338
  async phoneticIndexAsync(input, algo, opt) {
2652
3339
  const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
@@ -2693,6 +3380,7 @@
2693
3380
 
2694
3381
  exports.CmpStr = CmpStr;
2695
3382
  exports.CmpStrAsync = CmpStrAsync;
3383
+ exports.CmpStrError = Errors;
2696
3384
  exports.DeepMerge = DeepMerge;
2697
3385
  exports.DiffChecker = DiffChecker;
2698
3386
  exports.Filter = Filter;
@@ -2701,6 +3389,7 @@
2701
3389
  exports.Metric = Metric;
2702
3390
  exports.MetricRegistry = MetricRegistry;
2703
3391
  exports.Normalizer = Normalizer;
3392
+ exports.OptionsValidator = OptionsValidator;
2704
3393
  exports.Phonetic = Phonetic;
2705
3394
  exports.PhoneticMappingRegistry = PhoneticMappingRegistry;
2706
3395
  exports.PhoneticRegistry = PhoneticRegistry;
@@ -2709,4 +3398,3 @@
2709
3398
  exports.StructuredData = StructuredData;
2710
3399
  exports.TextAnalyzer = TextAnalyzer;
2711
3400
  });
2712
- //# sourceMappingURL=CmpStr.umd.js.map