cmpstr 3.2.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +24 -18
  2. package/dist/CmpStr.esm.js +1904 -1211
  3. package/dist/CmpStr.esm.min.js +2 -3
  4. package/dist/CmpStr.umd.js +1924 -1236
  5. package/dist/CmpStr.umd.min.js +2 -3
  6. package/dist/cjs/CmpStr.cjs +134 -64
  7. package/dist/cjs/CmpStrAsync.cjs +60 -37
  8. package/dist/cjs/index.cjs +1 -2
  9. package/dist/cjs/metric/Cosine.cjs +1 -2
  10. package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -2
  11. package/dist/cjs/metric/DiceSorensen.cjs +1 -2
  12. package/dist/cjs/metric/Hamming.cjs +5 -4
  13. package/dist/cjs/metric/Jaccard.cjs +1 -2
  14. package/dist/cjs/metric/JaroWinkler.cjs +1 -2
  15. package/dist/cjs/metric/LCS.cjs +1 -2
  16. package/dist/cjs/metric/Levenshtein.cjs +1 -2
  17. package/dist/cjs/metric/Metric.cjs +90 -53
  18. package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -2
  19. package/dist/cjs/metric/QGram.cjs +1 -2
  20. package/dist/cjs/metric/SmithWaterman.cjs +1 -2
  21. package/dist/cjs/phonetic/Caverphone.cjs +1 -2
  22. package/dist/cjs/phonetic/Cologne.cjs +1 -2
  23. package/dist/cjs/phonetic/Metaphone.cjs +1 -2
  24. package/dist/cjs/phonetic/Phonetic.cjs +80 -48
  25. package/dist/cjs/phonetic/Soundex.cjs +1 -2
  26. package/dist/cjs/root.cjs +6 -3
  27. package/dist/cjs/utils/DeepMerge.cjs +109 -99
  28. package/dist/cjs/utils/DiffChecker.cjs +1 -2
  29. package/dist/cjs/utils/Errors.cjs +106 -0
  30. package/dist/cjs/utils/Filter.cjs +97 -37
  31. package/dist/cjs/utils/HashTable.cjs +44 -30
  32. package/dist/cjs/utils/Normalizer.cjs +84 -35
  33. package/dist/cjs/utils/OptionsValidator.cjs +211 -0
  34. package/dist/cjs/utils/Pool.cjs +57 -19
  35. package/dist/cjs/utils/Profiler.cjs +41 -28
  36. package/dist/cjs/utils/Registry.cjs +48 -24
  37. package/dist/cjs/utils/StructuredData.cjs +95 -57
  38. package/dist/cjs/utils/TextAnalyzer.cjs +1 -2
  39. package/dist/esm/CmpStr.mjs +133 -61
  40. package/dist/esm/CmpStrAsync.mjs +56 -33
  41. package/dist/esm/index.mjs +1 -2
  42. package/dist/esm/metric/Cosine.mjs +1 -2
  43. package/dist/esm/metric/DamerauLevenshtein.mjs +1 -2
  44. package/dist/esm/metric/DiceSorensen.mjs +1 -2
  45. package/dist/esm/metric/Hamming.mjs +5 -4
  46. package/dist/esm/metric/Jaccard.mjs +1 -2
  47. package/dist/esm/metric/JaroWinkler.mjs +1 -2
  48. package/dist/esm/metric/LCS.mjs +1 -2
  49. package/dist/esm/metric/Levenshtein.mjs +1 -2
  50. package/dist/esm/metric/Metric.mjs +92 -53
  51. package/dist/esm/metric/NeedlemanWunsch.mjs +1 -2
  52. package/dist/esm/metric/QGram.mjs +1 -2
  53. package/dist/esm/metric/SmithWaterman.mjs +1 -2
  54. package/dist/esm/phonetic/Caverphone.mjs +1 -2
  55. package/dist/esm/phonetic/Cologne.mjs +1 -2
  56. package/dist/esm/phonetic/Metaphone.mjs +1 -2
  57. package/dist/esm/phonetic/Phonetic.mjs +83 -48
  58. package/dist/esm/phonetic/Soundex.mjs +1 -2
  59. package/dist/esm/root.mjs +5 -4
  60. package/dist/esm/utils/DeepMerge.mjs +109 -95
  61. package/dist/esm/utils/DiffChecker.mjs +1 -2
  62. package/dist/esm/utils/Errors.mjs +106 -0
  63. package/dist/esm/utils/Filter.mjs +97 -37
  64. package/dist/esm/utils/HashTable.mjs +44 -30
  65. package/dist/esm/utils/Normalizer.mjs +84 -35
  66. package/dist/esm/utils/OptionsValidator.mjs +210 -0
  67. package/dist/esm/utils/Pool.mjs +53 -19
  68. package/dist/esm/utils/Profiler.mjs +41 -28
  69. package/dist/esm/utils/Registry.mjs +48 -24
  70. package/dist/esm/utils/StructuredData.mjs +95 -57
  71. package/dist/esm/utils/TextAnalyzer.mjs +1 -2
  72. package/dist/types/CmpStr.d.ts +25 -14
  73. package/dist/types/CmpStrAsync.d.ts +4 -0
  74. package/dist/types/index.d.ts +3 -2
  75. package/dist/types/metric/Metric.d.ts +15 -14
  76. package/dist/types/phonetic/Phonetic.d.ts +7 -4
  77. package/dist/types/root.d.ts +4 -2
  78. package/dist/types/utils/DeepMerge.d.ts +80 -58
  79. package/dist/types/utils/Errors.d.ts +154 -0
  80. package/dist/types/utils/Filter.d.ts +8 -1
  81. package/dist/types/utils/HashTable.d.ts +12 -11
  82. package/dist/types/utils/Normalizer.d.ts +5 -1
  83. package/dist/types/utils/OptionsValidator.d.ts +193 -0
  84. package/dist/types/utils/Pool.d.ts +2 -0
  85. package/dist/types/utils/Profiler.d.ts +9 -28
  86. package/dist/types/utils/Registry.d.ts +3 -3
  87. package/dist/types/utils/StructuredData.d.ts +6 -1
  88. package/dist/types/utils/Types.d.ts +39 -1
  89. package/package.json +20 -11
  90. package/dist/CmpStr.esm.js.map +0 -1
  91. package/dist/CmpStr.esm.min.js.map +0 -1
  92. package/dist/CmpStr.umd.js.map +0 -1
  93. package/dist/CmpStr.umd.min.js.map +0 -1
  94. package/dist/cjs/CmpStr.cjs.map +0 -1
  95. package/dist/cjs/CmpStrAsync.cjs.map +0 -1
  96. package/dist/cjs/index.cjs.map +0 -1
  97. package/dist/cjs/metric/Cosine.cjs.map +0 -1
  98. package/dist/cjs/metric/DamerauLevenshtein.cjs.map +0 -1
  99. package/dist/cjs/metric/DiceSorensen.cjs.map +0 -1
  100. package/dist/cjs/metric/Hamming.cjs.map +0 -1
  101. package/dist/cjs/metric/Jaccard.cjs.map +0 -1
  102. package/dist/cjs/metric/JaroWinkler.cjs.map +0 -1
  103. package/dist/cjs/metric/LCS.cjs.map +0 -1
  104. package/dist/cjs/metric/Levenshtein.cjs.map +0 -1
  105. package/dist/cjs/metric/Metric.cjs.map +0 -1
  106. package/dist/cjs/metric/NeedlemanWunsch.cjs.map +0 -1
  107. package/dist/cjs/metric/QGram.cjs.map +0 -1
  108. package/dist/cjs/metric/SmithWaterman.cjs.map +0 -1
  109. package/dist/cjs/phonetic/Caverphone.cjs.map +0 -1
  110. package/dist/cjs/phonetic/Cologne.cjs.map +0 -1
  111. package/dist/cjs/phonetic/Metaphone.cjs.map +0 -1
  112. package/dist/cjs/phonetic/Phonetic.cjs.map +0 -1
  113. package/dist/cjs/phonetic/Soundex.cjs.map +0 -1
  114. package/dist/cjs/root.cjs.map +0 -1
  115. package/dist/cjs/utils/DeepMerge.cjs.map +0 -1
  116. package/dist/cjs/utils/DiffChecker.cjs.map +0 -1
  117. package/dist/cjs/utils/Filter.cjs.map +0 -1
  118. package/dist/cjs/utils/HashTable.cjs.map +0 -1
  119. package/dist/cjs/utils/Normalizer.cjs.map +0 -1
  120. package/dist/cjs/utils/Pool.cjs.map +0 -1
  121. package/dist/cjs/utils/Profiler.cjs.map +0 -1
  122. package/dist/cjs/utils/Registry.cjs.map +0 -1
  123. package/dist/cjs/utils/StructuredData.cjs.map +0 -1
  124. package/dist/cjs/utils/TextAnalyzer.cjs.map +0 -1
  125. package/dist/esm/CmpStr.mjs.map +0 -1
  126. package/dist/esm/CmpStrAsync.mjs.map +0 -1
  127. package/dist/esm/index.mjs.map +0 -1
  128. package/dist/esm/metric/Cosine.mjs.map +0 -1
  129. package/dist/esm/metric/DamerauLevenshtein.mjs.map +0 -1
  130. package/dist/esm/metric/DiceSorensen.mjs.map +0 -1
  131. package/dist/esm/metric/Hamming.mjs.map +0 -1
  132. package/dist/esm/metric/Jaccard.mjs.map +0 -1
  133. package/dist/esm/metric/JaroWinkler.mjs.map +0 -1
  134. package/dist/esm/metric/LCS.mjs.map +0 -1
  135. package/dist/esm/metric/Levenshtein.mjs.map +0 -1
  136. package/dist/esm/metric/Metric.mjs.map +0 -1
  137. package/dist/esm/metric/NeedlemanWunsch.mjs.map +0 -1
  138. package/dist/esm/metric/QGram.mjs.map +0 -1
  139. package/dist/esm/metric/SmithWaterman.mjs.map +0 -1
  140. package/dist/esm/phonetic/Caverphone.mjs.map +0 -1
  141. package/dist/esm/phonetic/Cologne.mjs.map +0 -1
  142. package/dist/esm/phonetic/Metaphone.mjs.map +0 -1
  143. package/dist/esm/phonetic/Phonetic.mjs.map +0 -1
  144. package/dist/esm/phonetic/Soundex.mjs.map +0 -1
  145. package/dist/esm/root.mjs.map +0 -1
  146. package/dist/esm/utils/DeepMerge.mjs.map +0 -1
  147. package/dist/esm/utils/DiffChecker.mjs.map +0 -1
  148. package/dist/esm/utils/Filter.mjs.map +0 -1
  149. package/dist/esm/utils/HashTable.mjs.map +0 -1
  150. package/dist/esm/utils/Normalizer.mjs.map +0 -1
  151. package/dist/esm/utils/Pool.mjs.map +0 -1
  152. package/dist/esm/utils/Profiler.mjs.map +0 -1
  153. package/dist/esm/utils/Registry.mjs.map +0 -1
  154. package/dist/esm/utils/StructuredData.mjs.map +0 -1
  155. package/dist/esm/utils/TextAnalyzer.mjs.map +0 -1
@@ -1,117 +1,228 @@
1
1
  /**
2
- * CmpStr v3.2.1 build-3439ccb-260130
2
+ * CmpStr v3.3.0 build-3699f85-260318
3
3
  * This is a lightweight, fast and well performing library for calculating string similarity.
4
4
  * (c) 2023-2026 Paul Köhler @komed3 / MIT License
5
5
  * Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
6
6
  */
7
- const BRACKET_PATTERN = /\[(\d+)]/g;
8
- const PATH_CACHE = new Map();
9
- function parse(p) {
10
- let cached = PATH_CACHE.get(p);
11
- if (cached) return cached;
12
- const parsed = p
13
- .replace(BRACKET_PATTERN, '.$1')
14
- .split('.')
15
- .map((s) => {
16
- const n = Number(s);
17
- return Number.isInteger(n) && String(n) === s ? n : s;
18
- });
19
- PATH_CACHE.set(p, parsed);
20
- return parsed;
7
+ class CmpStrError extends Error {
8
+ code;
9
+ meta;
10
+ when = new Date().toISOString();
11
+ constructor(code, message, meta, cause) {
12
+ super(message, cause !== undefined ? { cause } : undefined);
13
+ this.name = this.constructor.name;
14
+ this.code = code;
15
+ this.meta = meta;
16
+ if (typeof Error.captureStackTrace === 'function') {
17
+ Error.captureStackTrace(this, this.constructor);
18
+ }
19
+ }
20
+ format(stack = false) {
21
+ const parts = [`${this.name} [${this.code}]`, this.message];
22
+ if (this.meta)
23
+ for (const _ in this.meta) {
24
+ parts.push(JSON.stringify(this.meta));
25
+ break;
26
+ }
27
+ return (
28
+ parts.join(' - ') +
29
+ (stack && this.stack ? `\nStack Trace:\n${this.stack}` : '')
30
+ );
31
+ }
32
+ toString() {
33
+ return this.format(false);
34
+ }
35
+ toJSON(stack = false) {
36
+ return {
37
+ name: this.name,
38
+ code: this.code,
39
+ message: this.message,
40
+ meta: this.meta,
41
+ when: this.when,
42
+ cause:
43
+ this.cause instanceof Error
44
+ ? {
45
+ name: this.cause.name,
46
+ message: this.cause.message,
47
+ stack: stack && this.cause.stack
48
+ }
49
+ : this.cause
50
+ };
51
+ }
21
52
  }
22
- function get(t, path, fb) {
23
- let o = t;
24
- for (const k of parse(path)) {
25
- if (o == null || !(k in o)) return fb;
26
- o = o[k];
53
+ class CmpStrValidationError extends CmpStrError {
54
+ constructor(message, meta, cause) {
55
+ super('E_VALIDATION', message, meta, cause);
27
56
  }
28
- return o;
29
57
  }
30
- function has(t, path) {
31
- let o = t;
32
- for (const k of parse(path)) {
33
- if (o == null || !(k in o)) return false;
34
- o = o[k];
58
+ class CmpStrNotFoundError extends CmpStrError {
59
+ constructor(message, meta, cause) {
60
+ super('E_NOT_FOUND', message, meta, cause);
35
61
  }
36
- return true;
37
62
  }
38
- function set(t, path, value) {
39
- if (path === '') return value;
40
- const keys = parse(path);
41
- if (t !== undefined && (typeof t !== 'object' || t === null))
42
- throw Error(`Cannot set property <${keys[0]}> of <${JSON.stringify(t)}>`);
43
- const root = t ?? (typeof keys[0] === 'number' ? [] : Object.create(null));
44
- let cur = root;
45
- for (let i = 0; i < keys.length - 1; i++) {
46
- const k = keys[i];
47
- let n = cur[k];
48
- if (n != null && typeof n !== 'object')
49
- throw Error(
50
- `Cannot set property <${keys[i + 1]}> of <${JSON.stringify(n)}>`
51
- );
52
- if (n == null)
53
- n = cur[k] = typeof keys[i + 1] === 'number' ? [] : Object.create(null);
54
- cur = n;
63
+ class CmpStrUsageError extends CmpStrError {
64
+ constructor(message, meta, cause) {
65
+ super('E_USAGE', message, meta, cause);
55
66
  }
56
- cur[keys[keys.length - 1]] = value;
57
- return root;
58
67
  }
59
- function merge(
60
- t = Object.create(null),
61
- o = Object.create(null),
62
- mergeUndefined = false
63
- ) {
64
- const target = t ?? Object.create(null);
65
- Object.keys(o).forEach((k) => {
66
- const val = o[k];
67
- if (!mergeUndefined && val === undefined) return;
68
- if (k === '__proto__' || k === 'constructor') return;
69
- if (val !== null && typeof val === 'object' && !Array.isArray(val)) {
70
- const existing = target[k];
71
- target[k] = merge(
72
- existing !== null &&
73
- typeof existing === 'object' &&
74
- !Array.isArray(existing)
75
- ? existing
76
- : Object.create(null),
77
- val,
78
- mergeUndefined
79
- );
80
- } else target[k] = val;
81
- });
82
- return target;
68
+ class CmpStrInternalError extends CmpStrError {
69
+ constructor(message, meta, cause) {
70
+ super('E_INTERNAL', message, meta, cause);
71
+ }
83
72
  }
84
- function rmv(t, path, preserveEmpty = false) {
85
- const keys = parse(path);
86
- const remove = (obj, i = 0) => {
87
- const key = keys[i];
88
- if (!obj || typeof obj !== 'object') return false;
89
- if (i === keys.length - 1) return delete obj[key];
90
- if (!remove(obj[key], i + 1)) return false;
91
- if (!preserveEmpty) {
92
- const val = obj[key];
93
- if (
94
- typeof val === 'object' &&
95
- ((Array.isArray(val) && val.every((v) => v == null)) ||
96
- (!Array.isArray(val) && Object.keys(val).length === 0))
97
- )
98
- delete obj[key];
73
+ class ErrorUtil {
74
+ static assert(condition, message, meta) {
75
+ if (!condition) throw new CmpStrUsageError(message, meta);
76
+ }
77
+ static rethrow(err, message, meta) {
78
+ if (err instanceof CmpStrError) throw err;
79
+ throw new CmpStrInternalError(message, meta, err);
80
+ }
81
+ static format(err) {
82
+ if (err instanceof CmpStrError) return err.toString();
83
+ if (err instanceof Error) return `${err.name}: ${err.message}`;
84
+ return String(err);
85
+ }
86
+ static wrap(fn, message, meta) {
87
+ try {
88
+ return fn();
89
+ } catch (err) {
90
+ if (err instanceof CmpStrError) throw err;
91
+ throw new CmpStrInternalError(message, meta, err);
99
92
  }
100
- return true;
101
- };
102
- remove(t);
103
- return t;
93
+ }
94
+ static async wrapAsync(fn, message, meta) {
95
+ try {
96
+ return await fn();
97
+ } catch (err) {
98
+ if (err instanceof CmpStrError) throw err;
99
+ throw new CmpStrInternalError(message, meta, err);
100
+ }
101
+ }
104
102
  }
105
103
 
106
- var DeepMerge = /*#__PURE__*/ Object.freeze({
104
+ var Errors = /*#__PURE__*/ Object.freeze({
107
105
  __proto__: null,
108
- get: get,
109
- has: has,
110
- merge: merge,
111
- rmv: rmv,
112
- set: set
106
+ CmpStrError: CmpStrError,
107
+ CmpStrInternalError: CmpStrInternalError,
108
+ CmpStrNotFoundError: CmpStrNotFoundError,
109
+ CmpStrUsageError: CmpStrUsageError,
110
+ CmpStrValidationError: CmpStrValidationError,
111
+ ErrorUtil: ErrorUtil
113
112
  });
114
113
 
114
+ class DeepMerge {
115
+ static BRACKET_PATTERN = /\[(\d+)]/g;
116
+ static PATH_CACHE = new Map();
117
+ static walk(obj, keys) {
118
+ let o = obj;
119
+ for (let i = 0; i < keys.length; i++) {
120
+ const k = keys[i];
121
+ if (o == null || !(k in o)) return { exists: false };
122
+ o = o[k];
123
+ }
124
+ return { exists: true, value: o };
125
+ }
126
+ static parse(p) {
127
+ const cached = DeepMerge.PATH_CACHE.get(p);
128
+ if (cached) return cached;
129
+ const parsed = p
130
+ .replace(DeepMerge.BRACKET_PATTERN, '.$1')
131
+ .split('.')
132
+ .map((s) => {
133
+ const n = Number(s);
134
+ return Number.isInteger(n) && String(n) === s ? n : s;
135
+ });
136
+ if (DeepMerge.PATH_CACHE.size > 2000) DeepMerge.PATH_CACHE.clear();
137
+ DeepMerge.PATH_CACHE.set(p, parsed);
138
+ return parsed;
139
+ }
140
+ static has(t, path) {
141
+ return DeepMerge.walk(t, DeepMerge.parse(path)).exists;
142
+ }
143
+ static get(t, path, fb) {
144
+ const r = DeepMerge.walk(t, DeepMerge.parse(path));
145
+ return r.exists ? r.value : fb;
146
+ }
147
+ static set(t, path, value) {
148
+ if (path === '') return value;
149
+ const keys = DeepMerge.parse(path);
150
+ ErrorUtil.assert(
151
+ t === undefined || (typeof t === 'object' && t !== null),
152
+ `Cannot set property <${keys[0]}> of <${JSON.stringify(t)}>`,
153
+ { path: keys[0], target: t }
154
+ );
155
+ const root = t ?? (typeof keys[0] === 'number' ? [] : Object.create(null));
156
+ let cur = root;
157
+ for (let i = 0; i < keys.length - 1; i++) {
158
+ const k = keys[i];
159
+ let n = cur[k];
160
+ ErrorUtil.assert(
161
+ n == null || typeof n === 'object',
162
+ `Cannot set property <${keys[i + 1]}> of <${JSON.stringify(n)}>`,
163
+ { path: keys.slice(0, i + 2), value: n }
164
+ );
165
+ if (n == null)
166
+ n = cur[k] = typeof keys[i + 1] === 'number' ? [] : Object.create(null);
167
+ cur = n;
168
+ }
169
+ cur[keys[keys.length - 1]] = value;
170
+ return root;
171
+ }
172
+ static rmv(t, path, preserveEmpty = false) {
173
+ const keys = DeepMerge.parse(path);
174
+ const remove = (obj, i = 0) => {
175
+ const key = keys[i];
176
+ if (!obj || typeof obj !== 'object') return false;
177
+ if (i === keys.length - 1) return delete obj[key];
178
+ if (!remove(obj[key], i + 1)) return false;
179
+ if (!preserveEmpty) {
180
+ const val = obj[key];
181
+ let empty = true;
182
+ if (typeof val === 'object') {
183
+ if (Array.isArray(val))
184
+ for (let i = 0; i < val.length; i++) {
185
+ if (val[i] != null) {
186
+ empty = false;
187
+ break;
188
+ }
189
+ }
190
+ else empty = false;
191
+ }
192
+ if (empty) delete obj[key];
193
+ }
194
+ return true;
195
+ };
196
+ remove(t);
197
+ return t;
198
+ }
199
+ static merge(
200
+ t = Object.create(null),
201
+ o = Object.create(null),
202
+ mergeUndefined = false
203
+ ) {
204
+ const target = t ?? Object.create(null);
205
+ for (const k in o) {
206
+ const val = o[k];
207
+ if (!mergeUndefined && val === undefined) continue;
208
+ if (k === '__proto__' || k === 'constructor') continue;
209
+ if (val !== null && typeof val === 'object' && !Array.isArray(val)) {
210
+ const existing = target[k];
211
+ target[k] = DeepMerge.merge(
212
+ existing !== null &&
213
+ typeof existing === 'object' &&
214
+ !Array.isArray(existing)
215
+ ? existing
216
+ : Object.create(null),
217
+ val,
218
+ mergeUndefined
219
+ );
220
+ } else target[k] = val;
221
+ }
222
+ return target;
223
+ }
224
+ }
225
+
115
226
  class DiffChecker {
116
227
  a;
117
228
  b;
@@ -408,48 +519,88 @@ class DiffChecker {
408
519
  }
409
520
 
410
521
  class Filter {
522
+ static IDENTITY = (s) => s;
411
523
  static filters = new Map();
412
524
  static pipeline = new Map();
413
- static getPipeline(hook) {
414
- const cached = Filter.pipeline.get(hook);
415
- if (cached) return cached;
416
- const filter = Filter.filters.get(hook);
417
- if (!filter) return (s) => s;
418
- const pipeline = Array.from(filter.values())
419
- .filter((f) => f.active)
420
- .sort((a, b) => a.priority - b.priority)
421
- .map((f) => f.fn);
422
- const fn = (input) => pipeline.reduce((v, f) => f(v), input);
423
- Filter.pipeline.set(hook, fn);
424
- return fn;
525
+ static getPipeline(hook, force = false) {
526
+ return ErrorUtil.wrap(
527
+ () => {
528
+ if (!force) {
529
+ const cached = Filter.pipeline.get(hook);
530
+ if (cached) return cached;
531
+ }
532
+ const filter = Filter.filters.get(hook);
533
+ if (!filter) {
534
+ Filter.pipeline.set(hook, Filter.IDENTITY);
535
+ return Filter.IDENTITY;
536
+ }
537
+ const pipeline = [];
538
+ for (const f of filter.values()) if (f.active) pipeline.push(f);
539
+ pipeline.sort((a, b) => a.priority - b.priority);
540
+ const fn =
541
+ pipeline.length === 0
542
+ ? Filter.IDENTITY
543
+ : (input) => {
544
+ let v = input;
545
+ for (let i = 0; i < pipeline.length; i++) v = pipeline[i].fn(v);
546
+ return v;
547
+ };
548
+ Filter.pipeline.set(hook, fn);
549
+ return fn;
550
+ },
551
+ `Error compiling filter pipeline for hook <${hook}>`,
552
+ { hook }
553
+ );
425
554
  }
426
555
  static has(hook, id) {
427
556
  return !!Filter.filters.get(hook)?.has(id);
428
557
  }
429
558
  static add(hook, id, fn, opt = {}) {
430
- const { priority = 10, active = true, overrideable = true } = opt;
431
- const filter = Filter.filters.get(hook) ?? new Map();
432
- const index = filter.get(id);
433
- if (index && !index.overrideable) return false;
434
- filter.set(id, { id, fn, priority, active, overrideable });
435
- Filter.filters.set(hook, filter);
436
- Filter.pipeline.delete(hook);
437
- return true;
559
+ return ErrorUtil.wrap(
560
+ () => {
561
+ const { priority = 10, active = true, overrideable = true } = opt;
562
+ const filter = Filter.filters.get(hook) ?? new Map();
563
+ const index = filter.get(id);
564
+ if (index && !index.overrideable) return false;
565
+ if (
566
+ index &&
567
+ index.fn === fn &&
568
+ index.priority === priority &&
569
+ index.active === active
570
+ )
571
+ return true;
572
+ filter.set(id, { id, fn, priority, active, overrideable });
573
+ Filter.filters.set(hook, filter);
574
+ Filter.getPipeline(hook, true);
575
+ return true;
576
+ },
577
+ `Error adding filter <${id}> to hook <${hook}>`,
578
+ { hook, id, opt }
579
+ );
438
580
  }
439
581
  static remove(hook, id) {
440
- Filter.pipeline.delete(hook);
441
582
  const filter = Filter.filters.get(hook);
442
- return filter ? filter.delete(id) : false;
583
+ if (!filter || !filter.delete(id)) return false;
584
+ Filter.getPipeline(hook, true);
585
+ return true;
443
586
  }
444
587
  static pause(hook, id) {
445
- Filter.pipeline.delete(hook);
446
- const f = Filter.filters.get(hook)?.get(id);
447
- return !!(f && ((f.active = false), true));
588
+ const filter = Filter.filters.get(hook);
589
+ if (!filter) return false;
590
+ const f = filter.get(id);
591
+ if (!f || !f.active) return false;
592
+ f.active = false;
593
+ Filter.getPipeline(hook, true);
594
+ return true;
448
595
  }
449
596
  static resume(hook, id) {
450
- Filter.pipeline.delete(hook);
451
- const f = Filter.filters.get(hook)?.get(id);
452
- return !!(f && ((f.active = true), true));
597
+ const filter = Filter.filters.get(hook);
598
+ if (!filter) return false;
599
+ const f = filter.get(id);
600
+ if (!f || f.active) return false;
601
+ f.active = true;
602
+ Filter.getPipeline(hook, true);
603
+ return true;
453
604
  }
454
605
  static list(hook, active = false) {
455
606
  const filter = Filter.filters.get(hook);
@@ -459,17 +610,36 @@ class Filter {
459
610
  return out;
460
611
  }
461
612
  static apply(hook, input) {
462
- const fn = Filter.getPipeline(hook);
463
- return Array.isArray(input) ? input.map(fn) : fn(input);
613
+ return ErrorUtil.wrap(
614
+ () => {
615
+ const fn = Filter.getPipeline(hook);
616
+ if (typeof input === 'string') return fn(input);
617
+ const arr = input;
618
+ const out = new Array(arr.length);
619
+ for (let i = 0; i < arr.length; i++) out[i] = fn(arr[i]);
620
+ return out;
621
+ },
622
+ `Error applying filters for hook <${hook}>`,
623
+ { hook, input }
624
+ );
464
625
  }
465
626
  static async applyAsync(hook, input) {
466
- const fn = Filter.getPipeline(hook);
467
- return Array.isArray(input)
468
- ? Promise.all(input.map(fn))
469
- : Promise.resolve(fn(input));
627
+ return ErrorUtil.wrapAsync(
628
+ async () => {
629
+ const fn = Filter.getPipeline(hook);
630
+ if (typeof input === 'string') return Promise.resolve(fn(input));
631
+ const arr = input;
632
+ const out = new Array(arr.length);
633
+ for (let i = 0; i < arr.length; i++)
634
+ out[i] = Promise.resolve(fn(arr[i]));
635
+ return Promise.all(out);
636
+ },
637
+ `Error applying filters for hook <${hook}>`,
638
+ { hook, input }
639
+ );
470
640
  }
471
641
  static clear(hook) {
472
- Filter.pipeline.clear();
642
+ Filter.clearPipeline();
473
643
  if (hook) Filter.filters.delete(hook);
474
644
  else Filter.filters.clear();
475
645
  }
@@ -483,25 +653,21 @@ class Hasher {
483
653
  static HASH_OFFSET = 0x811c9dc5;
484
654
  static fastFNV1a(str) {
485
655
  const len = str.length;
656
+ const limit = len & -4;
486
657
  let hash = this.HASH_OFFSET;
487
- const chunks = Math.floor(len / 4);
488
- for (let i = 0; i < chunks; i++) {
489
- const pos = i * 4;
658
+ let i = 0;
659
+ for (; i < limit; i += 4) {
490
660
  const chunk =
491
- str.charCodeAt(pos) |
492
- (str.charCodeAt(pos + 1) << 8) |
493
- (str.charCodeAt(pos + 2) << 16) |
494
- (str.charCodeAt(pos + 3) << 24);
661
+ str.charCodeAt(i) |
662
+ (str.charCodeAt(i + 1) << 8) |
663
+ (str.charCodeAt(i + 2) << 16) |
664
+ (str.charCodeAt(i + 3) << 24);
495
665
  hash ^= chunk;
496
666
  hash = Math.imul(hash, this.FNV_PRIME);
497
667
  }
498
- const remaining = len % 4;
499
- if (remaining > 0) {
500
- const pos = chunks * 4;
501
- for (let i = 0; i < remaining; i++) {
502
- hash ^= str.charCodeAt(pos + i);
503
- hash = Math.imul(hash, this.FNV_PRIME);
504
- }
668
+ for (; i < len; i++) {
669
+ hash ^= str.charCodeAt(i);
670
+ hash = Math.imul(hash, this.FNV_PRIME);
505
671
  }
506
672
  hash ^= hash >>> 16;
507
673
  hash *= 0x85ebca6b;
@@ -512,32 +678,51 @@ class Hasher {
512
678
  }
513
679
  }
514
680
  class HashTable {
515
- LRU;
681
+ FIFO;
682
+ maxSize;
516
683
  static MAX_LEN = 2048;
517
- static TABLE_SIZE = 10_000;
518
684
  table = new Map();
519
- constructor(LRU = true) {
520
- this.LRU = LRU;
685
+ constructor(FIFO = true, maxSize = 10000) {
686
+ this.FIFO = FIFO;
687
+ this.maxSize = maxSize;
521
688
  }
522
689
  key(label, strs, sorted = false) {
523
- for (const str of strs) if (str.length > HashTable.MAX_LEN) return false;
524
- const hashes = strs.map((s) => Hasher.fastFNV1a(s));
525
- return [label, ...(sorted ? hashes.sort() : hashes)].join('-');
690
+ const n = strs.length;
691
+ const hashes = new Array(n);
692
+ for (let i = 0; i < n; i++) {
693
+ const s = strs[i];
694
+ if (s.length > HashTable.MAX_LEN) return false;
695
+ hashes[i] = Hasher.fastFNV1a(s);
696
+ }
697
+ if (sorted) hashes.sort((a, b) => a - b);
698
+ let key = label;
699
+ for (let i = 0; i < hashes.length; i++) key += '-' + hashes[i];
700
+ return key;
701
+ }
702
+ has(key) {
703
+ return this.table.has(key);
704
+ }
705
+ get(key) {
706
+ return this.table.get(key);
526
707
  }
527
- has = (key) => this.table.has(key);
528
- get = (key) => this.table.get(key);
529
708
  set(key, entry, update = true) {
530
709
  if (!update && this.table.has(key)) return false;
531
- while (!this.table.has(key) && this.table.size >= HashTable.TABLE_SIZE) {
532
- if (!this.LRU) return false;
710
+ if (!this.table.has(key) && this.table.size >= this.maxSize) {
711
+ if (!this.FIFO) return false;
533
712
  this.table.delete(this.table.keys().next().value);
534
713
  }
535
714
  this.table.set(key, entry);
536
715
  return true;
537
716
  }
538
- delete = (key) => this.table.delete(key);
539
- clear = () => this.table.clear();
540
- size = () => this.table.size;
717
+ delete(key) {
718
+ return this.table.delete(key);
719
+ }
720
+ clear() {
721
+ this.table.clear();
722
+ }
723
+ size() {
724
+ return this.table.size;
725
+ }
541
726
  }
542
727
 
543
728
  class Normalizer {
@@ -554,42 +739,91 @@ class Normalizer {
554
739
  return Array.from(new Set(flags)).sort().join('');
555
740
  }
556
741
  static getPipeline(flags) {
557
- if (Normalizer.pipeline.has(flags)) return Normalizer.pipeline.get(flags);
558
- const { REGEX } = Normalizer;
559
- const steps = [
560
- ['d', (s) => s.normalize('NFD')],
561
- ['i', (s) => s.toLowerCase()],
562
- ['k', (s) => s.replace(REGEX.nonLetters, '')],
563
- ['n', (s) => s.replace(REGEX.nonNumbers, '')],
564
- ['r', (s) => s.replace(REGEX.doubleChars, '$1')],
565
- ['s', (s) => s.replace(REGEX.specialChars, '')],
566
- ['t', (s) => s.trim()],
567
- ['u', (s) => s.normalize('NFC')],
568
- ['w', (s) => s.replace(REGEX.whitespace, ' ')],
569
- ['x', (s) => s.normalize('NFKC')]
570
- ];
571
- const pipeline = steps
572
- .filter(([f]) => flags.includes(f))
573
- .map(([, fn]) => fn);
574
- const fn = (s) => pipeline.reduce((v, f) => f(v), s);
575
- Normalizer.pipeline.set(flags, fn);
576
- return fn;
577
- }
578
- static normalize(input, flags) {
579
- if (!flags || typeof flags !== 'string' || !input) return input;
580
- flags = this.canonicalFlags(flags);
581
- if (Array.isArray(input))
582
- return input.map((s) => Normalizer.normalize(s, flags));
583
- const key = Normalizer.cache.key(flags, [input]);
584
- if (key && Normalizer.cache.has(key)) return Normalizer.cache.get(key);
585
- const res = Normalizer.getPipeline(flags)(input);
586
- if (key) Normalizer.cache.set(key, res);
587
- return res;
742
+ return ErrorUtil.wrap(
743
+ () => {
744
+ const cached = Normalizer.pipeline.get(flags);
745
+ if (cached) return cached;
746
+ const { REGEX } = Normalizer;
747
+ const steps = [];
748
+ for (let i = 0; i < flags.length; i++) {
749
+ switch (flags[i]) {
750
+ case 'd':
751
+ steps.push((s) => s.normalize('NFD'));
752
+ break;
753
+ case 'i':
754
+ steps.push((s) => s.toLowerCase());
755
+ break;
756
+ case 'k':
757
+ steps.push((s) => s.replace(REGEX.nonLetters, ''));
758
+ break;
759
+ case 'n':
760
+ steps.push((s) => s.replace(REGEX.nonNumbers, ''));
761
+ break;
762
+ case 'r':
763
+ steps.push((s) => s.replace(REGEX.doubleChars, '$1'));
764
+ break;
765
+ case 's':
766
+ steps.push((s) => s.replace(REGEX.specialChars, ''));
767
+ break;
768
+ case 't':
769
+ steps.push((s) => s.trim());
770
+ break;
771
+ case 'u':
772
+ steps.push((s) => s.normalize('NFC'));
773
+ break;
774
+ case 'w':
775
+ steps.push((s) => s.replace(REGEX.whitespace, ' '));
776
+ break;
777
+ case 'x':
778
+ steps.push((s) => s.normalize('NFKC'));
779
+ break;
780
+ }
781
+ }
782
+ const fn = (input) => {
783
+ let v = input;
784
+ for (let i = 0; i < steps.length; i++) v = steps[i](v);
785
+ return v;
786
+ };
787
+ Normalizer.pipeline.set(flags, fn);
788
+ return fn;
789
+ },
790
+ `Failed to create normalization pipeline for flags: ${flags}`,
791
+ { flags }
792
+ );
793
+ }
794
+ static normalize(input, flags, normalizedFlags) {
795
+ return ErrorUtil.wrap(
796
+ () => {
797
+ if (!flags || typeof flags !== 'string' || !input) return input;
798
+ flags = normalizedFlags ?? this.canonicalFlags(flags);
799
+ const pipeline = Normalizer.getPipeline(flags);
800
+ const normalizeOne = (s) => {
801
+ const key = Normalizer.cache.key(flags, [s]);
802
+ if (key && Normalizer.cache.has(key))
803
+ return Normalizer.cache.get(key);
804
+ const res = pipeline(s);
805
+ if (key) Normalizer.cache.set(key, res);
806
+ return res;
807
+ };
808
+ return Array.isArray(input)
809
+ ? input.map(normalizeOne)
810
+ : normalizeOne(input);
811
+ },
812
+ `Failed to normalize input with flags: ${flags}`,
813
+ { input, flags }
814
+ );
588
815
  }
589
816
  static async normalizeAsync(input, flags) {
590
- return await (Array.isArray(input)
591
- ? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
592
- : Promise.resolve(Normalizer.normalize(input, flags)));
817
+ return await ErrorUtil.wrapAsync(
818
+ async () => {
819
+ if (!flags || typeof flags !== 'string' || !input) return input;
820
+ return await (Array.isArray(input)
821
+ ? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
822
+ : Promise.resolve(Normalizer.normalize(input, flags)));
823
+ },
824
+ `Failed to asynchronously normalize input with flags: ${flags}`,
825
+ { input, flags }
826
+ );
593
827
  }
594
828
  static clear() {
595
829
  Normalizer.pipeline.clear();
@@ -597,17 +831,143 @@ class Normalizer {
597
831
  }
598
832
  }
599
833
 
834
+ class RingPool {
835
+ maxSize;
836
+ buffers = [];
837
+ pointer = 0;
838
+ constructor(maxSize) {
839
+ this.maxSize = maxSize;
840
+ }
841
+ acquire(minSize, allowOversize) {
842
+ return ErrorUtil.wrap(
843
+ () => {
844
+ const buffers = this.buffers;
845
+ const len = buffers.length;
846
+ for (let i = 0; i < len; i++) {
847
+ const idx = (this.pointer + i) % len;
848
+ const item = buffers[idx];
849
+ const size = item.size;
850
+ if (size >= minSize && (allowOversize || size === minSize)) {
851
+ this.pointer = (idx + 1) % len;
852
+ return item;
853
+ }
854
+ }
855
+ return null;
856
+ },
857
+ `Failed to acquire buffer of size >= ${minSize} from pool`,
858
+ { minSize, allowOversize }
859
+ );
860
+ }
861
+ release(item) {
862
+ ErrorUtil.wrap(
863
+ () => {
864
+ const buffers = this.buffers;
865
+ if (buffers.length < this.maxSize) {
866
+ buffers.push(item);
867
+ return;
868
+ }
869
+ buffers[this.pointer] = item;
870
+ this.pointer = (this.pointer + 1) % this.maxSize;
871
+ },
872
+ `Failed to release buffer back to pool`,
873
+ { item }
874
+ );
875
+ }
876
+ clear() {
877
+ this.buffers = [];
878
+ this.pointer = 0;
879
+ }
880
+ }
881
+ class Pool {
882
+ static CONFIG = {
883
+ int32: {
884
+ type: 'int32',
885
+ maxSize: 64,
886
+ maxItemSize: 2048,
887
+ allowOversize: true
888
+ },
889
+ 'arr[]': {
890
+ type: 'arr[]',
891
+ maxSize: 4,
892
+ maxItemSize: 1024,
893
+ allowOversize: false
894
+ },
895
+ 'number[]': {
896
+ type: 'number[]',
897
+ maxSize: 16,
898
+ maxItemSize: 1024,
899
+ allowOversize: false
900
+ },
901
+ 'string[]': {
902
+ type: 'string[]',
903
+ maxSize: 2,
904
+ maxItemSize: 1024,
905
+ allowOversize: false
906
+ },
907
+ set: { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
908
+ map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
909
+ };
910
+ static POOLS = {
911
+ int32: new RingPool(64),
912
+ 'arr[]': new RingPool(4),
913
+ 'number[]': new RingPool(16),
914
+ 'string[]': new RingPool(2),
915
+ set: new RingPool(8),
916
+ map: new RingPool(8)
917
+ };
918
+ static allocate(type, size) {
919
+ switch (type) {
920
+ case 'int32':
921
+ return new Int32Array(size);
922
+ case 'arr[]':
923
+ return new Array(size);
924
+ case 'number[]':
925
+ return new Float64Array(size);
926
+ case 'string[]':
927
+ return new Array(size);
928
+ case 'set':
929
+ return new Set();
930
+ case 'map':
931
+ return new Map();
932
+ }
933
+ }
934
+ static acquire(type, size) {
935
+ const CONFIG = this.CONFIG[type];
936
+ if (!CONFIG)
937
+ throw new CmpStrUsageError(`Unsupported pool type <${type}>`, { type });
938
+ if (size > CONFIG.maxItemSize) return this.allocate(type, size);
939
+ const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
940
+ if (item)
941
+ return type === 'int32' ? item.buffer.subarray(0, size) : item.buffer;
942
+ return this.allocate(type, size);
943
+ }
944
+ static acquireMany(type, sizes) {
945
+ const out = new Array(sizes.length);
946
+ for (let i = 0; i < sizes.length; i++)
947
+ out[i] = this.acquire(type, sizes[i]);
948
+ return out;
949
+ }
950
+ static release(type, buffer, size) {
951
+ const CONFIG = this.CONFIG[type];
952
+ if (!CONFIG)
953
+ throw new CmpStrUsageError(`Unsupported pool type <${type}>`, { type });
954
+ if (size <= CONFIG.maxItemSize) this.POOLS[type].release({ buffer, size });
955
+ }
956
+ }
957
+
600
958
  class Profiler {
601
959
  active;
602
960
  static ENV;
603
961
  static instance;
604
962
  nowFn;
605
963
  memFn;
606
- store = new Set();
964
+ store = [];
965
+ last;
607
966
  totalTime = 0;
608
967
  totalMem = 0;
609
968
  static detectEnv() {
610
- if (typeof process !== 'undefined') Profiler.ENV = 'nodejs';
969
+ if (typeof process !== 'undefined' && process.versions?.node)
970
+ Profiler.ENV = 'nodejs';
611
971
  else if (typeof performance !== 'undefined') Profiler.ENV = 'browser';
612
972
  else Profiler.ENV = 'unknown';
613
973
  }
@@ -619,7 +979,7 @@ class Profiler {
619
979
  this.active = active;
620
980
  switch (Profiler.ENV) {
621
981
  case 'nodejs':
622
- this.nowFn = () => Number(process.hrtime.bigint()) / 1e6;
982
+ this.nowFn = () => Number(process.hrtime.bigint()) * 1e-6;
623
983
  this.memFn = () => process.memoryUsage().heapUsed;
624
984
  break;
625
985
  case 'browser':
@@ -632,40 +992,52 @@ class Profiler {
632
992
  break;
633
993
  }
634
994
  }
635
- now = () => this.nowFn();
636
- mem = () => this.memFn();
637
- profile(fn, meta) {
638
- const startTime = this.now(),
639
- startMem = this.mem();
640
- const res = fn();
641
- const deltaTime = this.now() - startTime,
642
- deltaMem = this.mem() - startMem;
643
- this.store.add({ time: deltaTime, mem: deltaMem, res, meta });
644
- ((this.totalTime += deltaTime), (this.totalMem += deltaMem));
645
- return res;
995
+ storeRes(entry) {
996
+ this.store.push((this.last = entry));
997
+ this.totalTime += entry.time;
998
+ this.totalMem += entry.mem;
646
999
  }
647
- enable = () => {
1000
+ enable() {
648
1001
  this.active = true;
649
- };
650
- disable = () => {
1002
+ }
1003
+ disable() {
651
1004
  this.active = false;
652
- };
1005
+ }
653
1006
  clear() {
654
- this.store.clear();
1007
+ this.store.length = 0;
1008
+ this.last = undefined;
655
1009
  this.totalTime = 0;
656
1010
  this.totalMem = 0;
657
1011
  }
658
1012
  run(fn, meta = {}) {
659
- return this.active ? this.profile(fn, meta) : fn();
1013
+ if (!this.active) return fn();
1014
+ const startTime = this.nowFn(),
1015
+ startMem = this.memFn();
1016
+ const res = fn();
1017
+ const deltaTime = this.nowFn() - startTime,
1018
+ deltaMem = this.memFn() - startMem;
1019
+ this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
1020
+ return res;
660
1021
  }
661
1022
  async runAsync(fn, meta = {}) {
662
- return this.active
663
- ? this.profile(async () => await fn(), meta)
664
- : await fn();
1023
+ if (!this.active) return fn();
1024
+ const startTime = this.nowFn(),
1025
+ startMem = this.memFn();
1026
+ const res = await fn();
1027
+ const deltaTime = this.nowFn() - startTime,
1028
+ deltaMem = this.memFn() - startMem;
1029
+ this.storeRes({ time: deltaTime, mem: deltaMem, res, meta });
1030
+ return res;
1031
+ }
1032
+ getAll() {
1033
+ return [...this.store];
1034
+ }
1035
+ getLast() {
1036
+ return this.last;
1037
+ }
1038
+ getTotal() {
1039
+ return { time: this.totalTime, mem: this.totalMem };
665
1040
  }
666
- getAll = () => [...this.store];
667
- getLast = () => this.getAll().pop();
668
- getTotal = () => ({ time: this.totalTime, mem: this.totalMem });
669
1041
  services = Object.freeze({
670
1042
  enable: this.enable.bind(this),
671
1043
  disable: this.disable.bind(this),
@@ -679,19 +1051,34 @@ class Profiler {
679
1051
  const registry = Object.create(null);
680
1052
  const factory = Object.create(null);
681
1053
  function Registry(reg, ctor) {
682
- if (reg in registry || reg in factory)
683
- throw new Error(
684
- `Registry <${reg}> already exists / overwriting is forbidden`
685
- );
1054
+ ErrorUtil.assert(
1055
+ !(reg in registry || reg in factory),
1056
+ `Registry <${reg}> already exists / overwriting is forbidden`,
1057
+ { registry: reg }
1058
+ );
686
1059
  const classes = Object.create(null);
687
1060
  const service = Object.freeze({
688
1061
  add(name, cls, update = false) {
689
- if (!(cls.prototype instanceof ctor))
690
- throw new TypeError(`Class must extend <${reg}>`);
691
- if (!update && name in classes)
692
- throw new Error(
693
- `Entry <${name}> already exists / use <update=true> to overwrite`
694
- );
1062
+ ErrorUtil.assert(
1063
+ typeof name === 'string' && name.length > 0,
1064
+ `Class name must be a non-empty string`,
1065
+ { registry: reg, name }
1066
+ );
1067
+ ErrorUtil.assert(
1068
+ typeof cls === 'function',
1069
+ `Class must be a constructor function`,
1070
+ { registry: reg, class: cls }
1071
+ );
1072
+ ErrorUtil.assert(
1073
+ cls.prototype instanceof ctor,
1074
+ `Class must extend <${reg}>`,
1075
+ { registry: reg, class: cls }
1076
+ );
1077
+ ErrorUtil.assert(
1078
+ update || !(name in classes),
1079
+ `Class <${name}> already exists / use <update=true> to overwrite`,
1080
+ { registry: reg, name }
1081
+ );
695
1082
  classes[name] = cls;
696
1083
  },
697
1084
  remove(name) {
@@ -704,8 +1091,16 @@ function Registry(reg, ctor) {
704
1091
  return Object.keys(classes);
705
1092
  },
706
1093
  get(name) {
707
- if (!(name in classes))
708
- throw new Error(`Class <${name}> not registered for <${reg}>`);
1094
+ ErrorUtil.assert(
1095
+ typeof name === 'string' && name.length > 0,
1096
+ `Class name must be a non-empty string`,
1097
+ { registry: reg, name }
1098
+ );
1099
+ ErrorUtil.assert(
1100
+ name in classes,
1101
+ `Class <${name}> not registered for <${reg}>`,
1102
+ { registry: reg, name }
1103
+ );
709
1104
  return classes[name];
710
1105
  }
711
1106
  });
@@ -715,745 +1110,348 @@ function Registry(reg, ctor) {
715
1110
  }
716
1111
  function resolveCls(reg, cls) {
717
1112
  if (!(reg in registry))
718
- throw new ReferenceError(`Registry <${reg}> does not exist`);
719
- return typeof cls === 'string' ? registry[reg]?.get(cls) : cls;
1113
+ throw new CmpStrNotFoundError(`Registry <${reg}> does not exist`, {
1114
+ registry: reg
1115
+ });
1116
+ return typeof cls === 'string' ? registry[reg].get(cls) : cls;
720
1117
  }
721
1118
  function createFromRegistry(reg, cls, ...args) {
722
- cls = resolveCls(reg, cls);
723
- try {
724
- return new cls(...args);
725
- } catch (err) {
726
- throw new Error(`Cannot instantiate class <${cls.name ?? cls}>`, {
727
- cause: err
728
- });
729
- }
1119
+ const ctor = resolveCls(reg, cls);
1120
+ return ErrorUtil.wrap(
1121
+ () => new ctor(...args),
1122
+ `Failed to create instance of class <${ctor.name ?? cls}> from registry <${reg}>`,
1123
+ { registry: reg, class: cls, args }
1124
+ );
730
1125
  }
731
1126
 
732
- class RingPool {
733
- maxSize;
734
- buffers = [];
735
- pointer = 0;
736
- constructor(maxSize) {
737
- this.maxSize = maxSize;
1127
+ const profiler$2 = Profiler.getInstance();
1128
+ class Metric {
1129
+ static cache = new HashTable();
1130
+ metric;
1131
+ a;
1132
+ b;
1133
+ origA = [];
1134
+ origB = [];
1135
+ options;
1136
+ optKey;
1137
+ symmetric;
1138
+ results;
1139
+ static clear() {
1140
+ this.cache.clear();
738
1141
  }
739
- acquire(minSize, allowOversize) {
740
- const len = this.buffers.length;
741
- for (let i = 0; i < len; i++) {
742
- const idx = (this.pointer + i) & (len - 1);
743
- const item = this.buffers[idx];
744
- if (item.size >= minSize && (allowOversize || item.size === minSize)) {
745
- this.pointer = (idx + 1) & (len - 1);
746
- return item;
747
- }
748
- }
749
- return null;
1142
+ static swap(a, b, m, n) {
1143
+ return m > n ? [b, a, n, m] : [a, b, m, n];
750
1144
  }
751
- release(item) {
752
- if (this.buffers.length < this.maxSize)
753
- return void [this.buffers.push(item)];
754
- this.buffers[this.pointer] = item;
755
- this.pointer = (this.pointer + 1) % this.maxSize;
1145
+ static clamp(res) {
1146
+ return Math.max(0, Math.min(1, res));
756
1147
  }
757
- clear() {
758
- this.buffers = [];
759
- this.pointer = 0;
1148
+ constructor(metric, a, b, opt = {}, symmetric = false) {
1149
+ this.metric = metric;
1150
+ this.a = Array.isArray(a) ? a : [a];
1151
+ this.b = Array.isArray(b) ? b : [b];
1152
+ ErrorUtil.assert(
1153
+ this.a.length > 0 && this.b.length > 0,
1154
+ `Inputs <a> and <b> must not be empty`,
1155
+ { a: this.a, b: this.b }
1156
+ );
1157
+ this.options = opt;
1158
+ this.optKey = Hasher.fastFNV1a(
1159
+ JSON.stringify(opt, Object.keys(opt).sort())
1160
+ ).toString();
1161
+ this.symmetric = symmetric;
760
1162
  }
761
- }
762
- class Pool {
763
- static CONFIG = {
764
- int32: {
765
- type: 'int32',
766
- maxSize: 64,
767
- maxItemSize: 2048,
768
- allowOversize: true
769
- },
770
- 'number[]': {
771
- type: 'number[]',
772
- maxSize: 16,
773
- maxItemSize: 1024,
774
- allowOversize: false
775
- },
776
- 'string[]': {
777
- type: 'string[]',
778
- maxSize: 2,
779
- maxItemSize: 1024,
780
- allowOversize: false
781
- },
782
- set: { type: 'set', maxSize: 8, maxItemSize: 0, allowOversize: false },
783
- map: { type: 'map', maxSize: 8, maxItemSize: 0, allowOversize: false }
784
- };
785
- static POOLS = {
786
- int32: new RingPool(64),
787
- 'number[]': new RingPool(16),
788
- 'string[]': new RingPool(2),
789
- set: new RingPool(8),
790
- map: new RingPool(8)
791
- };
792
- static allocate(type, size) {
793
- switch (type) {
794
- case 'int32':
795
- return new Int32Array(size);
796
- case 'number[]':
797
- return new Float64Array(size);
798
- case 'string[]':
799
- return new Array(size);
800
- case 'set':
801
- return new Set();
802
- case 'map':
803
- return new Map();
804
- }
1163
+ preCompute(a, b, m, n) {
1164
+ if (a === b) return { res: 1 };
1165
+ if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
1166
+ return undefined;
805
1167
  }
806
- static acquire(type, size) {
807
- const CONFIG = this.CONFIG[type];
808
- if (size > CONFIG.maxItemSize) return this.allocate(type, size);
809
- const item = this.POOLS[type].acquire(size, CONFIG.allowOversize);
810
- if (item)
811
- return type === 'int32' ? item.buffer.subarray(0, size) : item.buffer;
812
- return this.allocate(type, size);
1168
+ compute(a, b, m, n, maxLen) {
1169
+ throw new CmpStrInternalError(
1170
+ `Method compute() must be overridden in a subclass`
1171
+ );
813
1172
  }
814
- static acquireMany(type, sizes) {
815
- return sizes.map((size) => this.acquire(type, size));
1173
+ runSingle(i, j) {
1174
+ return ErrorUtil.wrap(
1175
+ () => {
1176
+ let a = String(this.a[i]),
1177
+ A = a;
1178
+ let b = String(this.b[j]),
1179
+ B = b;
1180
+ let m = A.length,
1181
+ n = B.length;
1182
+ let result = this.preCompute(A, B, m, n);
1183
+ if (!result) {
1184
+ result = profiler$2.run(() => {
1185
+ if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
1186
+ let key = Metric.cache.key(this.metric, [A, B], this.symmetric);
1187
+ if (key) key += this.optKey;
1188
+ return (
1189
+ Metric.cache.get(key || '') ??
1190
+ (() => {
1191
+ const maxLen = m > n ? m : n;
1192
+ const res = this.compute(A, B, m, n, maxLen);
1193
+ if (key) Metric.cache.set(key, res);
1194
+ return res;
1195
+ })()
1196
+ );
1197
+ });
1198
+ }
1199
+ return {
1200
+ metric: this.metric,
1201
+ a: this.origA.length > i ? this.origA[i] : a,
1202
+ b: this.origB.length > j ? this.origB[j] : b,
1203
+ ...result
1204
+ };
1205
+ },
1206
+ `Failed to compute metric for inputs at indices a[${i}] and b[${j}]`,
1207
+ { i, j }
1208
+ );
816
1209
  }
817
- static release(type, buffer, size) {
818
- if (size <= this.CONFIG[type].maxItemSize)
819
- this.POOLS[type].release({ buffer, size });
1210
+ async runSingleAsync(i, j) {
1211
+ return Promise.resolve(this.runSingle(i, j));
820
1212
  }
821
- }
822
-
823
- class StructuredData {
824
- data;
825
- key;
826
- static create(data, key) {
827
- return new StructuredData(data, key);
1213
+ runBatch() {
1214
+ const results = [];
1215
+ for (let i = 0; i < this.a.length; i++)
1216
+ for (let j = 0; j < this.b.length; j++)
1217
+ results.push(this.runSingle(i, j));
1218
+ this.results = results;
828
1219
  }
829
- constructor(data, key) {
830
- this.data = data;
831
- this.key = key;
1220
+ async runBatchAsync() {
1221
+ const tasks = [];
1222
+ for (let i = 0; i < this.a.length; i++)
1223
+ for (let j = 0; j < this.b.length; j++)
1224
+ tasks.push(this.runSingleAsync(i, j));
1225
+ this.results = await Promise.all(tasks);
832
1226
  }
833
- extractFrom(arr, key) {
834
- const result = Pool.acquire('string[]', arr.length);
835
- for (let i = 0; i < arr.length; i++) {
836
- const val = arr[i][key];
837
- result[i] = typeof val === 'string' ? val : String(val ?? '');
838
- }
839
- return result;
1227
+ runPairwise() {
1228
+ const results = [];
1229
+ for (let i = 0; i < this.a.length; i++) results.push(this.runSingle(i, i));
1230
+ this.results = results;
840
1231
  }
841
- extract = () => this.extractFrom(this.data, this.key);
842
- isMetricResult(v) {
843
- return (
844
- typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
845
- );
1232
+ async runPairwiseAsync() {
1233
+ const tasks = [];
1234
+ for (let i = 0; i < this.a.length; i++)
1235
+ tasks.push(this.runSingleAsync(i, i));
1236
+ this.results = await Promise.all(tasks);
846
1237
  }
847
- isCmpStrResult(v) {
848
- return (
849
- typeof v === 'object' &&
850
- v !== null &&
851
- 'source' in v &&
852
- 'target' in v &&
853
- 'match' in v
854
- );
1238
+ setOriginal(a, b) {
1239
+ if (a) this.origA = Array.isArray(a) ? a : [a];
1240
+ if (b) this.origB = Array.isArray(b) ? b : [b];
1241
+ return this;
855
1242
  }
856
- normalizeResults(results) {
857
- if (!Array.isArray(results) || results.length === 0) return [];
858
- const first = results[0];
859
- let normalized = [];
860
- if (this.isMetricResult(first)) normalized = results;
861
- else if (this.isCmpStrResult(first))
862
- normalized = results.map((r) => ({
863
- metric: 'unknown',
864
- a: r.source,
865
- b: r.target,
866
- res: r.match,
867
- raw: r.raw
868
- }));
869
- else
870
- throw new TypeError(
871
- 'Unsupported result format for StructuredData normalization.'
872
- );
873
- return normalized.map((r, idx) => ({ ...r, __idx: idx }));
1243
+ isBatch() {
1244
+ return this.a.length > 1 || this.b.length > 1;
874
1245
  }
875
- rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
876
- const stringToIndices = new Map();
877
- for (let i = 0; i < extractedStrings.length; i++) {
878
- const str = extractedStrings[i];
879
- if (!stringToIndices.has(str)) stringToIndices.set(str, []);
880
- stringToIndices.get(str).push(i);
881
- }
882
- const output = new Array(results.length);
883
- const occurrenceCount = new Map();
884
- let out = 0;
885
- for (let i = 0; i < results.length; i++) {
886
- const result = results[i];
887
- if (removeZero && result.res === 0) continue;
888
- const targetStr = result.b || '';
889
- const indices = stringToIndices.get(targetStr);
890
- let dataIndex;
891
- if (indices && indices.length > 0) {
892
- const occurrence = occurrenceCount.get(targetStr) ?? 0;
893
- occurrenceCount.set(targetStr, occurrence + 1);
894
- dataIndex = indices[occurrence % indices.length];
895
- } else {
896
- dataIndex = result.__idx ?? i;
897
- }
898
- if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
899
- const sourceObj = sourceData[dataIndex];
900
- const mappedTarget = extractedStrings[dataIndex] || targetStr;
901
- if (objectsOnly) output[out++] = sourceObj;
902
- else
903
- output[out++] = {
904
- obj: sourceObj,
905
- key: this.key,
906
- result: { source: result.a, target: mappedTarget, match: result.res },
907
- ...(result.raw ? { raw: result.raw } : null)
908
- };
909
- }
910
- output.length = out;
911
- return output;
1246
+ isSingle() {
1247
+ return !this.isBatch();
912
1248
  }
913
- sort(results, sort) {
914
- if (!sort || results.length <= 1) return results;
915
- const asc = sort === 'asc';
916
- return results.sort((a, b) => (asc ? a.res - b.res : b.res - a.res));
1249
+ isPairwise(safe = false) {
1250
+ return this.isBatch() && this.a.length === this.b.length
1251
+ ? true
1252
+ : !safe &&
1253
+ (() => {
1254
+ throw new CmpStrUsageError(
1255
+ `Mode <pairwise> requires arrays of equal length`,
1256
+ { a: this.a, b: this.b }
1257
+ );
1258
+ })();
917
1259
  }
918
- finalizeLookup(results, extractedStrings, opt) {
919
- return this.rebuild(
920
- this.sort(this.normalizeResults(results), opt?.sort),
921
- this.data,
922
- extractedStrings,
923
- opt?.removeZero,
924
- opt?.objectsOnly
925
- );
1260
+ isSymmetrical() {
1261
+ return this.symmetric;
926
1262
  }
927
- performLookup(fn, extractedStrings, opt) {
928
- return this.finalizeLookup(fn(), extractedStrings, opt);
1263
+ whichMode(mode) {
1264
+ return mode ?? this.options.mode ?? 'default';
929
1265
  }
930
- async performLookupAsync(fn, extractedStrings, opt) {
931
- return this.finalizeLookup(await fn(), extractedStrings, opt);
1266
+ clear() {
1267
+ this.results = undefined;
932
1268
  }
933
- lookup(fn, query, opt) {
934
- const b = this.extract();
935
- try {
936
- return this.performLookup(() => fn(query, b, opt), b, opt);
937
- } finally {
938
- Pool.release('string[]', b, b.length);
1269
+ run(mode, clear = true) {
1270
+ if (clear) this.clear();
1271
+ switch (this.whichMode(mode)) {
1272
+ case 'default':
1273
+ if (this.isSingle()) {
1274
+ this.results = this.runSingle(0, 0);
1275
+ break;
1276
+ }
1277
+ case 'batch':
1278
+ this.runBatch();
1279
+ break;
1280
+ case 'single':
1281
+ this.results = this.runSingle(0, 0);
1282
+ break;
1283
+ case 'pairwise':
1284
+ if (this.isPairwise()) this.runPairwise();
1285
+ break;
1286
+ default:
1287
+ throw new CmpStrInternalError(`Unsupported mode <${mode}>`);
939
1288
  }
940
1289
  }
941
- async lookupAsync(fn, query, opt) {
942
- const b = this.extract();
943
- try {
944
- return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
945
- } finally {
946
- Pool.release('string[]', b, b.length);
1290
+ async runAsync(mode, clear = true) {
1291
+ if (clear) this.clear();
1292
+ switch (this.whichMode(mode)) {
1293
+ case 'default':
1294
+ if (this.isSingle()) {
1295
+ this.results = await this.runSingleAsync(0, 0);
1296
+ break;
1297
+ }
1298
+ case 'batch':
1299
+ await this.runBatchAsync();
1300
+ break;
1301
+ case 'single':
1302
+ this.results = await this.runSingleAsync(0, 0);
1303
+ break;
1304
+ case 'pairwise':
1305
+ if (this.isPairwise()) await this.runPairwiseAsync();
1306
+ break;
1307
+ default:
1308
+ throw new CmpStrInternalError(`Unsupported async mode <${mode}>`);
947
1309
  }
948
1310
  }
949
- lookupPairs(fn, other, otherKey, opt) {
950
- const a = this.extract();
951
- const b = this.extractFrom(other, otherKey);
952
- try {
953
- return this.performLookup(() => fn(a, b, opt), a, opt);
954
- } finally {
955
- Pool.release('string[]', a, a.length);
956
- Pool.release('string[]', b, b.length);
957
- }
1311
+ getMetricName() {
1312
+ return this.metric;
958
1313
  }
959
- async lookupPairsAsync(fn, other, otherKey, opt) {
960
- const a = this.extract();
961
- const b = this.extractFrom(other, otherKey);
1314
+ getResults() {
1315
+ ErrorUtil.assert(
1316
+ this.results !== undefined,
1317
+ `run() must be called before getResults()`
1318
+ );
1319
+ return this.results;
1320
+ }
1321
+ }
1322
+ const MetricRegistry = Registry('metric', Metric);
1323
+
1324
+ class CosineSimilarity extends Metric {
1325
+ constructor(a, b, opt = {}) {
1326
+ super('cosine', a, b, opt, true);
1327
+ }
1328
+ _termFreq(str, delimiter) {
1329
+ const terms = str.split(delimiter);
1330
+ const freq = Pool.acquire('map', terms.length);
1331
+ for (const term of terms) freq.set(term, (freq.get(term) || 0) + 1);
1332
+ return freq;
1333
+ }
1334
+ compute(a, b) {
1335
+ const { delimiter = ' ' } = this.options;
1336
+ const termsA = this._termFreq(a, delimiter);
1337
+ const termsB = this._termFreq(b, delimiter);
962
1338
  try {
963
- return await this.performLookupAsync(() => fn(a, b, opt), a, opt);
1339
+ let dotP = 0,
1340
+ magA = 0,
1341
+ magB = 0;
1342
+ for (const [term, freqA] of termsA) {
1343
+ const freqB = termsB.get(term) || 0;
1344
+ dotP += freqA * freqB;
1345
+ magA += freqA * freqA;
1346
+ }
1347
+ for (const freqB of termsB.values()) magB += freqB * freqB;
1348
+ magA = Math.sqrt(magA);
1349
+ magB = Math.sqrt(magB);
1350
+ return {
1351
+ res: magA && magB ? Metric.clamp(dotP / (magA * magB)) : 0,
1352
+ raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB }
1353
+ };
964
1354
  } finally {
965
- Pool.release('string[]', a, a.length);
966
- Pool.release('string[]', b, b.length);
1355
+ Pool.release('map', termsA, termsA.size);
1356
+ Pool.release('map', termsB, termsB.size);
967
1357
  }
968
1358
  }
969
1359
  }
1360
+ MetricRegistry.add('cosine', CosineSimilarity);
970
1361
 
971
- class TextAnalyzer {
972
- static REGEX = {
973
- number: /\d/,
974
- sentence: /(?<=[.!?])\s+/,
975
- word: /\p{L}+/gu,
976
- nonWord: /[^\p{L}]/gu,
977
- vowelGroup: /[aeiouy]+/g,
978
- letter: /\p{L}/gu,
979
- ucLetter: /\p{Lu}/gu
980
- };
981
- text;
982
- words = [];
983
- sentences = [];
984
- charFrequency = new Map();
985
- wordHistogram = new Map();
986
- syllableCache = new Map();
987
- syllableStats;
988
- constructor(input) {
989
- this.text = input.trim();
990
- this.tokenize();
991
- this.computeFrequencies();
992
- }
993
- tokenize() {
994
- let match;
995
- const lcText = this.text.toLowerCase();
996
- while ((match = TextAnalyzer.REGEX.word.exec(lcText)) !== null)
997
- this.words.push(match[0]);
998
- this.sentences = this.text
999
- .split(TextAnalyzer.REGEX.sentence)
1000
- .filter(Boolean);
1362
+ class DamerauLevenshteinDistance extends Metric {
1363
+ constructor(a, b, opt = {}) {
1364
+ super('damerau', a, b, opt, true);
1001
1365
  }
1002
- computeFrequencies() {
1003
- for (const char of this.text)
1004
- this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
1005
- for (const word of this.words)
1006
- this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
1007
- }
1008
- estimateSyllables(word) {
1009
- const clean = word
1010
- .normalize('NFC')
1011
- .toLowerCase()
1012
- .replace(TextAnalyzer.REGEX.nonWord, '');
1013
- if (this.syllableCache.has(clean)) return this.syllableCache.get(clean);
1014
- const matches = clean.match(TextAnalyzer.REGEX.vowelGroup);
1015
- const count = matches ? matches.length : 1;
1016
- this.syllableCache.set(clean, count);
1017
- return count;
1018
- }
1019
- computeSyllableStats() {
1020
- return (this.syllableStats ||= (() => {
1021
- const perWord = this.words
1022
- .map((w) => this.estimateSyllables(w))
1023
- .sort((a, b) => a - b);
1024
- const total = perWord.reduce((sum, s) => sum + s, 0);
1025
- const mono = perWord.filter((s) => s === 1).length;
1026
- const median = !perWord.length
1027
- ? 0
1028
- : perWord.length % 2 === 0
1029
- ? (perWord[perWord.length / 2 - 1] + perWord[perWord.length / 2]) / 2
1030
- : perWord[Math.floor(perWord.length / 2)];
1366
+ compute(a, b, m, n, maxLen) {
1367
+ const len = m + 1;
1368
+ const [test, prev, curr] = Pool.acquireMany('int32', [len, len, len]);
1369
+ try {
1370
+ for (let i = 0; i <= m; i++) prev[i] = i;
1371
+ for (let j = 1; j <= n; j++) {
1372
+ curr[0] = j;
1373
+ const cb = b.charCodeAt(j - 1);
1374
+ for (let i = 1; i <= m; i++) {
1375
+ const ca = a.charCodeAt(i - 1);
1376
+ const cost = ca === cb ? 0 : 1;
1377
+ let val = Math.min(curr[i - 1] + 1, prev[i] + 1, prev[i - 1] + cost);
1378
+ if (
1379
+ i > 1 &&
1380
+ j > 1 &&
1381
+ ca === b.charCodeAt(j - 2) &&
1382
+ cb === a.charCodeAt(i - 2)
1383
+ )
1384
+ val = Math.min(val, test[i - 2] + cost);
1385
+ curr[i] = val;
1386
+ }
1387
+ test.set(prev);
1388
+ prev.set(curr);
1389
+ }
1390
+ const dist = prev[m];
1031
1391
  return {
1032
- total,
1033
- mono,
1034
- perWord,
1035
- avg: perWord.length ? total / perWord.length : 0,
1036
- median
1392
+ res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
1393
+ raw: { dist, maxLen }
1037
1394
  };
1038
- })());
1039
- }
1040
- getLength = () => this.text.length;
1041
- getWordCount = () => this.words.length;
1042
- getSentenceCount = () => this.sentences.length;
1043
- getAvgWordLength() {
1044
- return this.words.length
1045
- ? this.words.join('').length / this.words.length
1046
- : 0;
1047
- }
1048
- getAvgSentenceLength() {
1049
- return this.sentences.length
1050
- ? this.words.length / this.sentences.length
1051
- : 0;
1052
- }
1053
- getWordHistogram() {
1054
- return Object.fromEntries(this.wordHistogram);
1055
- }
1056
- getMostCommonWords(limit = 5) {
1057
- return [...this.wordHistogram.entries()]
1058
- .sort((a, b) => b[1] - a[1])
1059
- .slice(0, limit)
1060
- .map((e) => e[0]);
1061
- }
1062
- getHapaxLegomena() {
1063
- return [...this.wordHistogram.entries()]
1064
- .filter(([, c]) => c === 1)
1065
- .map((e) => e[0]);
1066
- }
1067
- hasNumbers = () => TextAnalyzer.REGEX.number.test(this.text);
1068
- getUpperCaseRatio() {
1069
- const matches = this.text.match(TextAnalyzer.REGEX.letter) || [];
1070
- const upper = this.text.match(TextAnalyzer.REGEX.ucLetter)?.length || 0;
1071
- return matches.length ? upper / matches.length : 0;
1072
- }
1073
- getCharFrequency() {
1074
- return Object.fromEntries(this.charFrequency);
1075
- }
1076
- getUnicodeCodepoints() {
1077
- const result = {};
1078
- for (const [char, count] of this.charFrequency) {
1079
- const block = char
1080
- .charCodeAt(0)
1081
- .toString(16)
1082
- .padStart(4, '0')
1083
- .toUpperCase();
1084
- result[block] = (result[block] || 0) + count;
1395
+ } finally {
1396
+ Pool.release('int32', test, len);
1397
+ Pool.release('int32', prev, len);
1398
+ Pool.release('int32', curr, len);
1085
1399
  }
1086
- return result;
1087
- }
1088
- getLongWordRatio(len = 7) {
1089
- let long = 0;
1090
- for (const w of this.words) if (w.length >= len) long++;
1091
- return this.words.length ? long / this.words.length : 0;
1092
- }
1093
- getShortWordRatio(len = 3) {
1094
- let short = 0;
1095
- for (const w of this.words) if (w.length <= len) short++;
1096
- return this.words.length ? short / this.words.length : 0;
1097
- }
1098
- getSyllablesCount() {
1099
- return this.computeSyllableStats().total;
1100
- }
1101
- getMonosyllabicWordCount() {
1102
- return this.computeSyllableStats().mono;
1103
- }
1104
- getMinSyllablesWordCount(min) {
1105
- return this.computeSyllableStats().perWord.filter((w) => w >= min).length;
1106
1400
  }
1107
- getMaxSyllablesWordCount(max) {
1108
- return this.computeSyllableStats().perWord.filter((w) => w <= max).length;
1109
- }
1110
- getAvgSyllablesPerWord() {
1111
- return this.computeSyllableStats().avg;
1401
+ }
1402
+ MetricRegistry.add('damerau', DamerauLevenshteinDistance);
1403
+
1404
+ class DiceSorensenCoefficient extends Metric {
1405
+ constructor(a, b, opt = {}) {
1406
+ super('dice', a, b, opt, true);
1112
1407
  }
1113
- getMedianSyllablesPerWord() {
1114
- return this.computeSyllableStats().median;
1408
+ _bigrams(str) {
1409
+ const len = str.length - 1;
1410
+ const bigrams = Pool.acquire('set', len);
1411
+ for (let i = 0; i < len; i++) bigrams.add(str.substring(i, i + 2));
1412
+ return bigrams;
1115
1413
  }
1116
- getHonoresR() {
1414
+ compute(a, b) {
1415
+ const setA = this._bigrams(a),
1416
+ setB = this._bigrams(b);
1417
+ const sizeA = setA.size,
1418
+ sizeB = setB.size;
1117
1419
  try {
1118
- return (
1119
- (100 * Math.log(this.words.length)) /
1120
- (1 - this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1))
1121
- );
1122
- } catch {
1123
- return 0;
1124
- }
1125
- }
1126
- getReadingTime(wpm = 200) {
1127
- return this.words.length / (wpm ?? 1);
1128
- }
1129
- getReadabilityScore(metric = 'flesch') {
1130
- const w = this.words.length || 1;
1131
- const s = this.sentences.length || 1;
1132
- const y = this.getSyllablesCount() || 1;
1133
- const asl = w / s;
1134
- const asw = y / w;
1135
- switch (metric) {
1136
- case 'flesch':
1137
- return 206.835 - 1.015 * asl - 84.6 * asw;
1138
- case 'fleschde':
1139
- return 180 - asl - 58.5 * asw;
1140
- case 'kincaid':
1141
- return 0.39 * asl + 11.8 * asw - 15.59;
1420
+ let intersection = 0;
1421
+ for (const bigram of setA) if (setB.has(bigram)) intersection++;
1422
+ const size = sizeA + sizeB;
1423
+ return {
1424
+ res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
1425
+ raw: { intersection, size }
1426
+ };
1427
+ } finally {
1428
+ Pool.release('set', setA, sizeA);
1429
+ Pool.release('set', setB, sizeB);
1142
1430
  }
1143
1431
  }
1144
- getLIXScore() {
1145
- const w = this.words.length || 1;
1146
- const s = this.sentences.length || 1;
1147
- const l = this.getLongWordRatio() * w;
1148
- return w / s + (l / w) * 100;
1149
- }
1150
- getWSTFScore() {
1151
- const w = this.words.length || 1;
1152
- const h = (this.getMinSyllablesWordCount(3) / w) * 100;
1153
- const s = this.getAvgSentenceLength();
1154
- const l = this.getLongWordRatio() * 100;
1155
- const m = (this.getMonosyllabicWordCount() / w) * 100;
1156
- return [
1157
- 0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.875,
1158
- 0.2007 * h + 0.1682 * s + 0.1373 * l - 2.779,
1159
- 0.2963 * h + 0.1905 * s - 1.1144,
1160
- 0.2744 * h + 0.2656 * s - 1.693
1161
- ];
1162
- }
1163
1432
  }
1433
+ MetricRegistry.add('dice', DiceSorensenCoefficient);
1164
1434
 
1165
- const profiler$2 = Profiler.getInstance();
1166
- class Metric {
1167
- static cache = new HashTable();
1168
- metric;
1169
- a;
1170
- b;
1171
- origA = [];
1172
- origB = [];
1173
- options;
1174
- optKey;
1175
- symmetric;
1176
- results;
1177
- static clear = () => this.cache.clear();
1178
- static swap = (a, b, m, n) => (m > n ? [b, a, n, m] : [a, b, m, n]);
1179
- static clamp = (res) => Math.max(0, Math.min(1, res));
1180
- constructor(metric, a, b, opt = {}, symmetric = false) {
1181
- this.metric = metric;
1182
- this.a = Array.isArray(a) ? a : [a];
1183
- this.b = Array.isArray(b) ? b : [b];
1184
- if (this.a.length === 0 || this.b.length === 0)
1185
- throw new Error(`Inputs <a> and <b> must not be empty`);
1186
- this.options = opt;
1187
- this.optKey = Hasher.fastFNV1a(
1188
- JSON.stringify(opt, Object.keys(opt).sort())
1189
- ).toString();
1190
- this.symmetric = symmetric;
1191
- }
1192
- preCompute(a, b, m, n) {
1193
- if (a === b) return { res: 1 };
1194
- if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
1195
- return undefined;
1435
+ class HammingDistance extends Metric {
1436
+ constructor(a, b, opt = {}) {
1437
+ super('hamming', a, b, opt, true);
1196
1438
  }
1197
1439
  compute(a, b, m, n, maxLen) {
1198
- throw new Error(`Method compute() must be overridden in a subclass`);
1199
- }
1200
- runSingle(i, j) {
1201
- let a = String(this.a[i]),
1202
- A = a;
1203
- let b = String(this.b[j]),
1204
- B = b;
1205
- let m = A.length,
1206
- n = B.length;
1207
- let result = this.preCompute(A, B, m, n);
1208
- if (!result) {
1209
- result = profiler$2.run(() => {
1210
- if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
1211
- const key =
1212
- Metric.cache.key(this.metric, [A, B], this.symmetric) + this.optKey;
1213
- return (
1214
- Metric.cache.get(key || '') ??
1215
- (() => {
1216
- const res = this.compute(A, B, m, n, Math.max(m, n));
1217
- if (key) Metric.cache.set(key, res);
1218
- return res;
1219
- })()
1440
+ if (m !== n) {
1441
+ if (this.options.pad !== undefined) {
1442
+ if (m < maxLen) a = a.padEnd(maxLen, this.options.pad);
1443
+ if (n < maxLen) b = b.padEnd(maxLen, this.options.pad);
1444
+ m = n = maxLen;
1445
+ } else
1446
+ throw new CmpStrUsageError(
1447
+ `Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
1448
+ `use option.pad for automatic adjustment`,
1449
+ { a: m, b: n }
1220
1450
  );
1221
- });
1222
- }
1223
- return {
1224
- metric: this.metric,
1225
- a: this.origA[i] ?? a,
1226
- b: this.origB[j] ?? b,
1227
- ...result
1228
- };
1229
- }
1230
- async runSingleAsync(i, j) {
1231
- return Promise.resolve(this.runSingle(i, j));
1232
- }
1233
- runBatch() {
1234
- const results = [];
1235
- for (let i = 0; i < this.a.length; i++)
1236
- for (let j = 0; j < this.b.length; j++)
1237
- results.push(this.runSingle(i, j));
1238
- this.results = results;
1239
- }
1240
- async runBatchAsync() {
1241
- const results = [];
1242
- for (let i = 0; i < this.a.length; i++)
1243
- for (let j = 0; j < this.b.length; j++)
1244
- results.push(await this.runSingleAsync(i, j));
1245
- this.results = results;
1246
- }
1247
- runPairwise() {
1248
- const results = [];
1249
- for (let i = 0; i < this.a.length; i++) results.push(this.runSingle(i, i));
1250
- this.results = results;
1251
- }
1252
- async runPairwiseAsync() {
1253
- const results = [];
1254
- for (let i = 0; i < this.a.length; i++)
1255
- results.push(await this.runSingleAsync(i, i));
1256
- this.results = results;
1257
- }
1258
- setOriginal(a, b) {
1259
- if (a) this.origA = Array.isArray(a) ? a : [a];
1260
- if (b) this.origB = Array.isArray(b) ? b : [b];
1261
- return this;
1262
- }
1263
- isBatch = () => this.a.length > 1 || this.b.length > 1;
1264
- isSingle = () => !this.isBatch();
1265
- isPairwise(safe = false) {
1266
- return this.isBatch() && this.a.length === this.b.length
1267
- ? true
1268
- : !safe &&
1269
- (() => {
1270
- throw new Error(`Mode <pairwise> requires arrays of equal length`);
1271
- })();
1272
- }
1273
- isSymmetrical = () => this.symmetric;
1274
- whichMode = (mode) => mode ?? this.options?.mode ?? 'default';
1275
- clear = () => (this.results = undefined);
1276
- run(mode, clear = true) {
1277
- if (clear) this.clear();
1278
- switch (this.whichMode(mode)) {
1279
- case 'default':
1280
- if (this.isSingle()) {
1281
- this.results = this.runSingle(0, 0);
1282
- break;
1283
- }
1284
- case 'batch':
1285
- this.runBatch();
1286
- break;
1287
- case 'single':
1288
- this.results = this.runSingle(0, 0);
1289
- break;
1290
- case 'pairwise':
1291
- if (this.isPairwise()) this.runPairwise();
1292
- break;
1293
- default:
1294
- throw new Error(`Unsupported mode <${mode}>`);
1295
- }
1296
- }
1297
- async runAsync(mode, clear = true) {
1298
- if (clear) this.clear();
1299
- switch (this.whichMode(mode)) {
1300
- case 'default':
1301
- if (this.isSingle()) {
1302
- this.results = await this.runSingleAsync(0, 0);
1303
- break;
1304
- }
1305
- case 'batch':
1306
- await this.runBatchAsync();
1307
- break;
1308
- case 'single':
1309
- this.results = await this.runSingleAsync(0, 0);
1310
- break;
1311
- case 'pairwise':
1312
- if (this.isPairwise()) await this.runPairwiseAsync();
1313
- break;
1314
- default:
1315
- throw new Error(`Unsupported async mode <${mode}>`);
1316
- }
1317
- }
1318
- getMetricName = () => this.metric;
1319
- getResults() {
1320
- if (this.results === undefined)
1321
- throw new Error(`run() must be called before getResult()`);
1322
- return this.results;
1323
- }
1324
- }
1325
- const MetricRegistry = Registry('metric', Metric);
1326
-
1327
- class CosineSimilarity extends Metric {
1328
- constructor(a, b, opt = {}) {
1329
- super('cosine', a, b, opt, true);
1330
- }
1331
- _termFreq(str, delimiter) {
1332
- const terms = str.split(delimiter);
1333
- const freq = Pool.acquire('map', terms.length);
1334
- for (const term of terms) freq.set(term, (freq.get(term) || 0) + 1);
1335
- return freq;
1336
- }
1337
- compute(a, b) {
1338
- const { delimiter = ' ' } = this.options;
1339
- const termsA = this._termFreq(a, delimiter);
1340
- const termsB = this._termFreq(b, delimiter);
1341
- try {
1342
- let dotP = 0,
1343
- magA = 0,
1344
- magB = 0;
1345
- for (const [term, freqA] of termsA) {
1346
- const freqB = termsB.get(term) || 0;
1347
- dotP += freqA * freqB;
1348
- magA += freqA * freqA;
1349
- }
1350
- for (const freqB of termsB.values()) magB += freqB * freqB;
1351
- magA = Math.sqrt(magA);
1352
- magB = Math.sqrt(magB);
1353
- return {
1354
- res: magA && magB ? Metric.clamp(dotP / (magA * magB)) : 0,
1355
- raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB }
1356
- };
1357
- } finally {
1358
- Pool.release('map', termsA, termsA.size);
1359
- Pool.release('map', termsB, termsB.size);
1360
- }
1361
- }
1362
- }
1363
- MetricRegistry.add('cosine', CosineSimilarity);
1364
-
1365
- class DamerauLevenshteinDistance extends Metric {
1366
- constructor(a, b, opt = {}) {
1367
- super('damerau', a, b, opt, true);
1368
- }
1369
- compute(a, b, m, n, maxLen) {
1370
- const len = m + 1;
1371
- const [test, prev, curr] = Pool.acquireMany('int32', [len, len, len]);
1372
- try {
1373
- for (let i = 0; i <= m; i++) prev[i] = i;
1374
- for (let j = 1; j <= n; j++) {
1375
- curr[0] = j;
1376
- const cb = b.charCodeAt(j - 1);
1377
- for (let i = 1; i <= m; i++) {
1378
- const ca = a.charCodeAt(i - 1);
1379
- const cost = ca === cb ? 0 : 1;
1380
- let val = Math.min(curr[i - 1] + 1, prev[i] + 1, prev[i - 1] + cost);
1381
- if (
1382
- i > 1 &&
1383
- j > 1 &&
1384
- ca === b.charCodeAt(j - 2) &&
1385
- cb === a.charCodeAt(i - 2)
1386
- )
1387
- val = Math.min(val, test[i - 2] + cost);
1388
- curr[i] = val;
1389
- }
1390
- test.set(prev);
1391
- prev.set(curr);
1392
- }
1393
- const dist = prev[m];
1394
- return {
1395
- res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
1396
- raw: { dist, maxLen }
1397
- };
1398
- } finally {
1399
- Pool.release('int32', test, len);
1400
- Pool.release('int32', prev, len);
1401
- Pool.release('int32', curr, len);
1402
- }
1403
- }
1404
- }
1405
- MetricRegistry.add('damerau', DamerauLevenshteinDistance);
1406
-
1407
- class DiceSorensenCoefficient extends Metric {
1408
- constructor(a, b, opt = {}) {
1409
- super('dice', a, b, opt, true);
1410
- }
1411
- _bigrams(str) {
1412
- const len = str.length - 1;
1413
- const bigrams = Pool.acquire('set', len);
1414
- for (let i = 0; i < len; i++) bigrams.add(str.substring(i, i + 2));
1415
- return bigrams;
1416
- }
1417
- compute(a, b) {
1418
- const setA = this._bigrams(a),
1419
- setB = this._bigrams(b);
1420
- const sizeA = setA.size,
1421
- sizeB = setB.size;
1422
- try {
1423
- let intersection = 0;
1424
- for (const bigram of setA) if (setB.has(bigram)) intersection++;
1425
- const size = sizeA + sizeB;
1426
- return {
1427
- res: size === 0 ? 1 : Metric.clamp((2 * intersection) / size),
1428
- raw: { intersection, size }
1429
- };
1430
- } finally {
1431
- Pool.release('set', setA, sizeA);
1432
- Pool.release('set', setB, sizeB);
1433
1451
  }
1434
- }
1435
- }
1436
- MetricRegistry.add('dice', DiceSorensenCoefficient);
1437
-
1438
- class HammingDistance extends Metric {
1439
- constructor(a, b, opt = {}) {
1440
- super('hamming', a, b, opt, true);
1441
- }
1442
- compute(a, b, m, n, maxLen) {
1443
- if (m !== n) {
1444
- if (this.options.pad !== undefined) {
1445
- if (m < maxLen) a = a.padEnd(maxLen, this.options.pad);
1446
- if (n < maxLen) b = b.padEnd(maxLen, this.options.pad);
1447
- m = n = maxLen;
1448
- } else
1449
- throw new Error(
1450
- `Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
1451
- `use option.pad for automatic adjustment`
1452
- );
1453
- }
1454
- let dist = 0;
1455
- for (let i = 0; i < m; i++) if (a[i] !== b[i]) dist++;
1456
- return { res: m === 0 ? 1 : Metric.clamp(1 - dist / m), raw: { dist } };
1452
+ let dist = 0;
1453
+ for (let i = 0; i < m; i++) if (a[i] !== b[i]) dist++;
1454
+ return { res: m === 0 ? 1 : Metric.clamp(1 - dist / m), raw: { dist } };
1457
1455
  }
1458
1456
  }
1459
1457
  MetricRegistry.add('hamming', HammingDistance);
@@ -1717,42 +1715,59 @@ class Phonetic {
1717
1715
  options;
1718
1716
  optKey;
1719
1717
  map;
1720
- static clear = () => this.cache.clear();
1718
+ ignoreSet;
1719
+ static clear() {
1720
+ this.cache.clear();
1721
+ }
1721
1722
  constructor(algo, opt = {}) {
1722
1723
  const defaults = this.constructor.default ?? {};
1723
1724
  const mapId = opt.map ?? defaults.map;
1724
- if (!mapId) throw new Error(`No mapping specified for phonetic algorithm`);
1725
+ if (!mapId)
1726
+ throw new CmpStrNotFoundError(
1727
+ `No mapping specified for phonetic algorithm`,
1728
+ { algo }
1729
+ );
1725
1730
  const map = PhoneticMappingRegistry.get(algo, mapId);
1726
1731
  if (map === undefined)
1727
- throw new Error(`Requested mapping <${mapId}> is not declared`);
1728
- this.options = merge(merge(defaults, map.options ?? {}), opt);
1732
+ throw new CmpStrNotFoundError(
1733
+ `Requested mapping <${mapId}> is not declared`,
1734
+ { algo, mapId }
1735
+ );
1736
+ this.options = DeepMerge.merge(
1737
+ DeepMerge.merge(defaults, map.options ?? {}),
1738
+ opt
1739
+ );
1729
1740
  this.optKey = Hasher.fastFNV1a(
1730
1741
  JSON.stringify(this.options, Object.keys(this.options).sort())
1731
1742
  ).toString();
1732
1743
  this.algo = algo;
1733
1744
  this.map = map;
1745
+ this.ignoreSet = new Set(map.ignore ?? []);
1734
1746
  }
1735
1747
  applyPattern(word) {
1736
1748
  const { patterns = [] } = this.map;
1737
- if (!patterns || !patterns.length) return word;
1749
+ if (!patterns.length) return word;
1738
1750
  for (const { pattern, replace, all = false } of patterns) {
1739
- word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
1751
+ word = all
1752
+ ? word.replaceAll(pattern, replace)
1753
+ : word.replace(pattern, replace);
1740
1754
  }
1741
1755
  return word;
1742
1756
  }
1743
1757
  applyRules(char, i, chars, charLen) {
1744
1758
  const { ruleset = [] } = this.map;
1745
- if (!ruleset || !ruleset.length) return undefined;
1759
+ if (!ruleset.length) return undefined;
1746
1760
  const prev = chars[i - 1] || '',
1747
1761
  prev2 = chars[i - 2] || '';
1748
1762
  const next = chars[i + 1] || '',
1749
1763
  next2 = chars[i + 2] || '';
1764
+ const str = chars.join('');
1750
1765
  for (const rule of ruleset) {
1751
1766
  if (rule.char && rule.char !== char) continue;
1752
1767
  if (rule.position === 'start' && i !== 0) continue;
1753
1768
  if (rule.position === 'middle' && (i === 0 || i === charLen - 1))
1754
1769
  continue;
1755
- if (rule.position === 'end' && i !== charLen) continue;
1770
+ if (rule.position === 'end' && i !== charLen - 1) continue;
1756
1771
  if (rule.prev && !rule.prev.includes(prev)) continue;
1757
1772
  if (rule.prevNot && rule.prevNot.includes(prev)) continue;
1758
1773
  if (rule.prev2 && !rule.prev2.includes(prev2)) continue;
@@ -1763,12 +1778,12 @@ class Phonetic {
1763
1778
  if (rule.next2Not && rule.next2Not.includes(next2)) continue;
1764
1779
  if (
1765
1780
  rule.leading &&
1766
- !rule.leading.includes(chars.slice(0, rule.leading.length).join(''))
1781
+ !rule.leading.includes(str.slice(0, rule.leading.length))
1767
1782
  )
1768
1783
  continue;
1769
1784
  if (
1770
1785
  rule.trailing &&
1771
- !rule.trailing.includes(chars.slice(-rule.trailing.length).join(''))
1786
+ !rule.trailing.includes(str.slice(-rule.trailing.length))
1772
1787
  )
1773
1788
  continue;
1774
1789
  if (rule.match && !rule.match.every((c, j) => chars[i + j] === c))
@@ -1778,7 +1793,7 @@ class Phonetic {
1778
1793
  return undefined;
1779
1794
  }
1780
1795
  encode(word) {
1781
- const { map = {}, ignore = [] } = this.map;
1796
+ const { map = {} } = this.map;
1782
1797
  word = this.applyPattern(word);
1783
1798
  const chars = this.word2Chars(word);
1784
1799
  const charLen = chars.length;
@@ -1786,7 +1801,7 @@ class Phonetic {
1786
1801
  lastCode = null;
1787
1802
  for (let i = 0; i < charLen; i++) {
1788
1803
  const char = chars[i];
1789
- if (ignore.includes(char)) continue;
1804
+ if (this.ignoreSet.has(char)) continue;
1790
1805
  const mapped = this.mapChar(char, i, chars, charLen, lastCode, map);
1791
1806
  if (mapped === undefined) continue;
1792
1807
  ((code += mapped), (lastCode = mapped));
@@ -1805,7 +1820,9 @@ class Phonetic {
1805
1820
  ? input
1806
1821
  : (input + pad.repeat(length)).slice(0, length);
1807
1822
  }
1808
- word2Chars = (word) => word.toLowerCase().split('');
1823
+ word2Chars(word) {
1824
+ return Array.from(word.toLowerCase());
1825
+ }
1809
1826
  exitEarly(code, i) {
1810
1827
  const { length = -1 } = this.options;
1811
1828
  return length > 0 && code.length >= length;
@@ -1814,37 +1831,52 @@ class Phonetic {
1814
1831
  return code;
1815
1832
  }
1816
1833
  loop(words) {
1817
- const index = [];
1818
- for (const word of words) {
1819
- const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
1820
- const code =
1821
- Phonetic.cache.get(key || '') ??
1822
- (() => {
1823
- const res = this.encode(word);
1824
- if (key) Phonetic.cache.set(key, res);
1825
- return res;
1826
- })();
1827
- if (code && code.length) index.push(this.equalLen(code));
1828
- }
1829
- return index;
1834
+ return ErrorUtil.wrap(
1835
+ () => {
1836
+ const index = [];
1837
+ for (const word of words) {
1838
+ let key = Phonetic.cache.key(this.algo, [word]);
1839
+ if (key) key += this.optKey;
1840
+ const code =
1841
+ Phonetic.cache.get(key || '') ??
1842
+ (() => {
1843
+ const res = this.encode(word);
1844
+ if (key) Phonetic.cache.set(key, res);
1845
+ return res;
1846
+ })();
1847
+ if (code && code.length) index.push(this.equalLen(code));
1848
+ }
1849
+ return index;
1850
+ },
1851
+ `Failed to generate phonetic index`,
1852
+ { algo: this.algo, words }
1853
+ );
1830
1854
  }
1831
1855
  async loopAsync(words) {
1832
- const index = [];
1833
- for (const word of words) {
1834
- const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
1835
- const code = await Promise.resolve(
1836
- Phonetic.cache.get(key || '') ??
1837
- (() => {
1838
- const res = this.encode(word);
1839
- if (key) Phonetic.cache.set(key, res);
1840
- return res;
1841
- })()
1842
- );
1843
- if (code && code.length) index.push(this.equalLen(code));
1844
- }
1845
- return index;
1856
+ return ErrorUtil.wrapAsync(
1857
+ async () => {
1858
+ const index = [];
1859
+ for (const word of words) {
1860
+ const key = Phonetic.cache.key(this.algo, [word]) + this.optKey;
1861
+ const code = await Promise.resolve(
1862
+ Phonetic.cache.get(key || '') ??
1863
+ (() => {
1864
+ const res = this.encode(word);
1865
+ if (key) Phonetic.cache.set(key, res);
1866
+ return res;
1867
+ })()
1868
+ );
1869
+ if (code && code.length) index.push(this.equalLen(code));
1870
+ }
1871
+ return index;
1872
+ },
1873
+ `Failed to generate phonetic index asynchronously`,
1874
+ { algo: this.algo, words }
1875
+ );
1876
+ }
1877
+ getAlgoName() {
1878
+ return this.algo;
1846
1879
  }
1847
- getAlgoName = () => this.algo;
1848
1880
  getIndex(input) {
1849
1881
  const { delimiter = ' ' } = this.options;
1850
1882
  return profiler$1.run(() =>
@@ -1867,10 +1899,11 @@ const PhoneticMappingRegistry = (() => {
1867
1899
  return Object.freeze({
1868
1900
  add(algo, id, map, update = false) {
1869
1901
  const mappings = maps(algo);
1870
- if (!update && id in mappings)
1871
- throw new Error(
1872
- `Entry <${id}> already exists / use <update=true> to overwrite`
1873
- );
1902
+ ErrorUtil.assert(
1903
+ !(!id || id in mappings) || update,
1904
+ `Entry <${id}> already exists / use <update=true> to overwrite`,
1905
+ { algo, id }
1906
+ );
1874
1907
  mappings[id] = map;
1875
1908
  },
1876
1909
  remove(algo, id) {
@@ -2075,168 +2108,737 @@ class Metaphone extends Phonetic {
2075
2108
  constructor(opt = {}) {
2076
2109
  super('metaphone', opt);
2077
2110
  }
2078
- encode(word) {
2079
- word = word.replace(Metaphone.REGEX.adjacent, (m, c) =>
2080
- c === 'C' ? m : c
2081
- );
2082
- return super.encode(word);
2111
+ encode(word) {
2112
+ word = word.replace(Metaphone.REGEX.adjacent, (m, c) =>
2113
+ c === 'C' ? m : c
2114
+ );
2115
+ return super.encode(word);
2116
+ }
2117
+ adjustCode(code) {
2118
+ return code.slice(0, 1) + code.slice(1).replace(Metaphone.REGEX.vowel, '');
2119
+ }
2120
+ }
2121
+ PhoneticRegistry.add('metaphone', Metaphone);
2122
+ PhoneticMappingRegistry.add('metaphone', 'en90', {
2123
+ map: {
2124
+ a: 'A',
2125
+ b: 'B',
2126
+ c: 'K',
2127
+ d: 'T',
2128
+ e: 'E',
2129
+ f: 'F',
2130
+ g: 'K',
2131
+ h: 'H',
2132
+ i: 'I',
2133
+ j: 'J',
2134
+ k: 'K',
2135
+ l: 'L',
2136
+ m: 'M',
2137
+ n: 'N',
2138
+ o: 'O',
2139
+ p: 'P',
2140
+ q: 'K',
2141
+ r: 'R',
2142
+ s: 'S',
2143
+ t: 'T',
2144
+ u: 'U',
2145
+ v: 'F',
2146
+ w: 'W',
2147
+ x: 'KS',
2148
+ y: 'Y',
2149
+ z: 'S'
2150
+ },
2151
+ ruleset: [
2152
+ { char: 'a', position: 'start', next: ['e'], code: '' },
2153
+ { char: 'g', position: 'start', next: ['n'], code: '' },
2154
+ { char: 'k', position: 'start', next: ['n'], code: '' },
2155
+ { char: 'p', position: 'start', next: ['n'], code: '' },
2156
+ { char: 'w', position: 'start', next: ['r'], code: '' },
2157
+ { char: 'b', position: 'end', prev: ['m'], code: '' },
2158
+ { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' },
2159
+ { char: 'c', next: ['i'], next2: ['a'], code: 'X' },
2160
+ { char: 'c', next: ['e', 'i', 'y'], code: 'S' },
2161
+ { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' },
2162
+ {
2163
+ char: 'g',
2164
+ next: ['h'],
2165
+ next2Not: ['', 'a', 'e', 'i', 'o', 'u'],
2166
+ code: ''
2167
+ },
2168
+ { char: 'g', trailing: 'n', code: '' },
2169
+ { char: 'g', trailing: 'ned', code: '' },
2170
+ { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' },
2171
+ {
2172
+ char: 'h',
2173
+ prev: ['a', 'e', 'i', 'o', 'u'],
2174
+ nextNot: ['a', 'e', 'i', 'o', 'u'],
2175
+ code: ''
2176
+ },
2177
+ { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' },
2178
+ { char: 'k', prev: ['c'], code: '' },
2179
+ { char: 'p', next: ['h'], code: 'F' },
2180
+ { char: 's', next: ['h'], code: 'X' },
2181
+ { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' },
2182
+ { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' },
2183
+ { char: 't', next: ['h'], code: '0' },
2184
+ { char: 't', next: ['c'], next2: ['h'], code: '' },
2185
+ { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
2186
+ { char: 'h', leading: 'w', code: '' },
2187
+ { char: 'x', position: 'start', code: 'S' },
2188
+ { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }
2189
+ ]
2190
+ });
2191
+
2192
+ class Soundex extends Phonetic {
2193
+ static default = {
2194
+ map: 'en',
2195
+ delimiter: ' ',
2196
+ length: 4,
2197
+ pad: '0',
2198
+ dedupe: true
2199
+ };
2200
+ constructor(opt = {}) {
2201
+ super('soundex', opt);
2202
+ }
2203
+ adjustCode(code, chars) {
2204
+ return chars[0].toUpperCase() + code.slice(1).replaceAll('0', '');
2205
+ }
2206
+ }
2207
+ PhoneticRegistry.add('soundex', Soundex);
2208
+ PhoneticMappingRegistry.add('soundex', 'en', {
2209
+ map: {
2210
+ a: '0',
2211
+ e: '0',
2212
+ h: '0',
2213
+ i: '0',
2214
+ o: '0',
2215
+ u: '0',
2216
+ w: '0',
2217
+ y: '0',
2218
+ b: '1',
2219
+ f: '1',
2220
+ p: '1',
2221
+ v: '1',
2222
+ c: '2',
2223
+ g: '2',
2224
+ j: '2',
2225
+ k: '2',
2226
+ q: '2',
2227
+ s: '2',
2228
+ x: '2',
2229
+ z: '2',
2230
+ d: '3',
2231
+ t: '3',
2232
+ l: '4',
2233
+ m: '5',
2234
+ n: '5',
2235
+ r: '6'
2236
+ }
2237
+ });
2238
+ PhoneticMappingRegistry.add('soundex', 'de', {
2239
+ map: {
2240
+ a: '0',
2241
+ ä: '0',
2242
+ e: '0',
2243
+ h: '0',
2244
+ i: '0',
2245
+ j: '0',
2246
+ o: '0',
2247
+ ö: '0',
2248
+ u: '0',
2249
+ ü: '0',
2250
+ y: '0',
2251
+ b: '1',
2252
+ f: '1',
2253
+ p: '1',
2254
+ v: '1',
2255
+ w: '1',
2256
+ c: '2',
2257
+ g: '2',
2258
+ k: '2',
2259
+ q: '2',
2260
+ s: '2',
2261
+ ß: '2',
2262
+ x: '2',
2263
+ z: '2',
2264
+ d: '3',
2265
+ t: '3',
2266
+ l: '4',
2267
+ m: '5',
2268
+ n: '5',
2269
+ r: '6'
2270
+ },
2271
+ ruleset: [{ char: 'c', next: ['h'], code: '7' }]
2272
+ });
2273
+
2274
+ class OptionsValidator {
2275
+ static ALLOWED_FLAGS = new Set([
2276
+ 'd',
2277
+ 'u',
2278
+ 'x',
2279
+ 'w',
2280
+ 't',
2281
+ 'r',
2282
+ 's',
2283
+ 'k',
2284
+ 'n',
2285
+ 'i'
2286
+ ]);
2287
+ static ALLOWED_OUTPUT = new Set(['orig', 'prep']);
2288
+ static ALLOWED_MODES = new Set(['default', 'batch', 'single', 'pairwise']);
2289
+ static ALLOWED_SORT = new Set(['asc', 'desc']);
2290
+ static PROCESSORS = {
2291
+ phonetic: (opt) => {
2292
+ if (!opt) return;
2293
+ OptionsValidator.validatePhoneticName(opt.algo);
2294
+ OptionsValidator.validatePhoneticOptions(opt.opt);
2295
+ }
2296
+ };
2297
+ static METRIC_OPT_MAP = {
2298
+ mode: (v) => OptionsValidator.validateMode(v),
2299
+ delimiter: (v) => OptionsValidator.validateString(v, 'opt.delimiter'),
2300
+ pad: (v) => OptionsValidator.validateString(v, 'opt.pad'),
2301
+ q: (v) => OptionsValidator.validateNumber(v, 'opt.q'),
2302
+ match: (v) => OptionsValidator.validateNumber(v, 'opt.match'),
2303
+ mismatch: (v) => OptionsValidator.validateNumber(v, 'opt.mismatch'),
2304
+ gap: (v) => OptionsValidator.validateNumber(v, 'opt.gap')
2305
+ };
2306
+ static PHONETIC_OPT_MAP = {
2307
+ map: (v) =>
2308
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.map'),
2309
+ delimiter: (v) =>
2310
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.delimiter'),
2311
+ length: (v) =>
2312
+ OptionsValidator.validateNumber(v, 'processors.phonetic.opt.length'),
2313
+ pad: (v) =>
2314
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.pad'),
2315
+ dedupe: (v) =>
2316
+ OptionsValidator.validateBoolean(v, 'processors.phonetic.opt.dedupe'),
2317
+ fallback: (v) =>
2318
+ OptionsValidator.validateString(v, 'processors.phonetic.opt.fallback')
2319
+ };
2320
+ static CMPSTR_OPT_MAP = {
2321
+ raw: (v) => OptionsValidator.validateBoolean(v, 'raw'),
2322
+ removeZero: (v) => OptionsValidator.validateBoolean(v, 'removeZero'),
2323
+ safeEmpty: (v) => OptionsValidator.validateBoolean(v, 'safeEmpty'),
2324
+ flags: (v) => OptionsValidator.validateFlags(v),
2325
+ metric: (v) => OptionsValidator.validateMetricName(v),
2326
+ output: (v) => OptionsValidator.validateOutput(v),
2327
+ opt: (v) => OptionsValidator.validateMetricOptions(v),
2328
+ processors: (v) => OptionsValidator.validateProcessors(v),
2329
+ sort: (v) => OptionsValidator.validateSort(v, 'sort'),
2330
+ objectsOnly: (v) => OptionsValidator.validateBoolean(v, 'objectsOnly')
2331
+ };
2332
+ static set2string(set) {
2333
+ return Array.from(set).join(' | ');
2334
+ }
2335
+ static validateType(value, name, type) {
2336
+ if (value === undefined) return;
2337
+ if (typeof value !== type || (type === 'number' && Number.isNaN(value))) {
2338
+ throw new CmpStrValidationError(
2339
+ `Invalid option <${name}>: expected ${type}`,
2340
+ { name, value }
2341
+ );
2342
+ }
2343
+ }
2344
+ static validateEnum(value, name, set) {
2345
+ if (value === undefined) return;
2346
+ if (typeof value !== 'string' || !set.has(value)) {
2347
+ throw new CmpStrValidationError(
2348
+ `Invalid option <${name}>: expected ${OptionsValidator.set2string(set)}`,
2349
+ { name, value }
2350
+ );
2351
+ }
2352
+ }
2353
+ static validateMap(opt, map) {
2354
+ if (!opt) return;
2355
+ for (const k in opt) {
2356
+ const fn = map[k];
2357
+ if (!fn)
2358
+ throw new CmpStrValidationError(`Invalid option <${k}>`, {
2359
+ option: k,
2360
+ value: map[k]
2361
+ });
2362
+ fn(opt[k]);
2363
+ }
2364
+ }
2365
+ static validateRegistryName(value, name, label, has, list) {
2366
+ if (value === undefined) return;
2367
+ if (typeof value !== 'string' || value.length === 0)
2368
+ throw new CmpStrValidationError(
2369
+ `Invalid option <${name}>: expected non-empty string`,
2370
+ { name, value }
2371
+ );
2372
+ if (!has(value))
2373
+ throw new CmpStrValidationError(`${label} <${value}> is not registered`, {
2374
+ name,
2375
+ value,
2376
+ available: list()
2377
+ });
2378
+ }
2379
+ static validateBoolean(value, name) {
2380
+ OptionsValidator.validateType(value, name, 'boolean');
2381
+ }
2382
+ static validateNumber(value, name) {
2383
+ OptionsValidator.validateType(value, name, 'number');
2384
+ }
2385
+ static validateString(value, name) {
2386
+ OptionsValidator.validateType(value, name, 'string');
2387
+ }
2388
+ static validateFlags(value) {
2389
+ if (value === undefined) return;
2390
+ if (typeof value !== 'string')
2391
+ throw new CmpStrValidationError(
2392
+ `Invalid option <flags>: expected string`,
2393
+ { flags: value }
2394
+ );
2395
+ for (let i = 0; i < value.length; i++) {
2396
+ const ch = value[i];
2397
+ if (!OptionsValidator.ALLOWED_FLAGS.has(ch))
2398
+ throw new CmpStrValidationError(
2399
+ `Invalid normalization flag <${ch}> in <flags>: expected ${OptionsValidator.set2string(OptionsValidator.ALLOWED_FLAGS)}`,
2400
+ { flags: value, invalid: ch }
2401
+ );
2402
+ }
2403
+ }
2404
+ static validateOutput(value) {
2405
+ OptionsValidator.validateEnum(
2406
+ value,
2407
+ 'output',
2408
+ OptionsValidator.ALLOWED_OUTPUT
2409
+ );
2410
+ }
2411
+ static validateMode(value) {
2412
+ OptionsValidator.validateEnum(
2413
+ value,
2414
+ 'mode',
2415
+ OptionsValidator.ALLOWED_MODES
2416
+ );
2417
+ }
2418
+ static validateSort(value, name) {
2419
+ if (value === undefined || typeof value === 'boolean') return;
2420
+ OptionsValidator.validateEnum(value, name, OptionsValidator.ALLOWED_SORT);
2421
+ }
2422
+ static validateMetricName(value) {
2423
+ OptionsValidator.validateRegistryName(
2424
+ value,
2425
+ 'metric',
2426
+ 'Comparison metric',
2427
+ MetricRegistry.has,
2428
+ MetricRegistry.list
2429
+ );
2430
+ }
2431
+ static validatePhoneticName(value) {
2432
+ OptionsValidator.validateRegistryName(
2433
+ value,
2434
+ 'phonetic',
2435
+ 'Phonetic algorithm',
2436
+ PhoneticRegistry.has,
2437
+ PhoneticRegistry.list
2438
+ );
2439
+ }
2440
+ static validateMetricOptions(opt) {
2441
+ OptionsValidator.validateMap(opt, OptionsValidator.METRIC_OPT_MAP);
2442
+ }
2443
+ static validatePhoneticOptions(opt) {
2444
+ OptionsValidator.validateMap(opt, OptionsValidator.PHONETIC_OPT_MAP);
2445
+ }
2446
+ static validateProcessors(opt) {
2447
+ if (!opt) return;
2448
+ for (const key in opt) {
2449
+ const fn = OptionsValidator.PROCESSORS[key];
2450
+ if (!fn)
2451
+ throw new CmpStrValidationError(
2452
+ `Invalid processor type <${key}> in <processors>: expected ${Object.keys(OptionsValidator.PROCESSORS).join(' | ')}`,
2453
+ { processors: opt, invalid: key }
2454
+ );
2455
+ fn(opt[key]);
2456
+ }
2457
+ }
2458
+ static validateOptions(opt) {
2459
+ OptionsValidator.validateMap(opt, OptionsValidator.CMPSTR_OPT_MAP);
2460
+ }
2461
+ }
2462
+
2463
+ class StructuredData {
2464
+ data;
2465
+ key;
2466
+ static SORT_ASC = (a, b) => a.res - b.res;
2467
+ static SORT_DESC = (a, b) => b.res - a.res;
2468
+ static create(data, key) {
2469
+ return new StructuredData(data, key);
2470
+ }
2471
+ constructor(data, key) {
2472
+ this.data = data;
2473
+ this.key = key;
2474
+ }
2475
+ extractFrom(arr, key) {
2476
+ const n = arr.length;
2477
+ const result = new Array(n);
2478
+ for (let i = 0; i < n; i++) {
2479
+ const val = arr[i][key];
2480
+ result[i] = val != null ? String(val) : '';
2481
+ }
2482
+ return result;
2483
+ }
2484
+ extract() {
2485
+ return this.extractFrom(this.data, this.key);
2486
+ }
2487
+ isMetricResult(v) {
2488
+ return (
2489
+ typeof v === 'object' && v !== null && 'a' in v && 'b' in v && 'res' in v
2490
+ );
2491
+ }
2492
+ isCmpStrResult(v) {
2493
+ return (
2494
+ typeof v === 'object' &&
2495
+ v !== null &&
2496
+ 'source' in v &&
2497
+ 'target' in v &&
2498
+ 'match' in v
2499
+ );
2500
+ }
2501
+ normalizeResults(results) {
2502
+ if (!Array.isArray(results) || results.length === 0) return [];
2503
+ const first = results[0];
2504
+ let out = new Array(results.length);
2505
+ if (this.isMetricResult(first)) {
2506
+ const src = results;
2507
+ for (let i = 0; i < src.length; i++) out[i] = { ...src[i], __idx: i };
2508
+ } else if (this.isCmpStrResult(first)) {
2509
+ const src = results;
2510
+ for (let i = 0; i < src.length; i++) {
2511
+ const r = src[i];
2512
+ out[i] = {
2513
+ metric: 'unknown',
2514
+ a: r.source,
2515
+ b: r.target,
2516
+ res: r.match,
2517
+ raw: r.raw,
2518
+ __idx: i
2519
+ };
2520
+ }
2521
+ } else
2522
+ throw new CmpStrValidationError(
2523
+ 'Unsupported result format for StructuredData normalization.'
2524
+ );
2525
+ return out;
2526
+ }
2527
+ rebuild(results, sourceData, extractedStrings, removeZero, objectsOnly) {
2528
+ const m = extractedStrings.length,
2529
+ n = results.length;
2530
+ const stringToIndices = Pool.acquire('map', m);
2531
+ const occurrenceCount = Pool.acquire('map', n);
2532
+ const output = new Array(n);
2533
+ stringToIndices.clear();
2534
+ occurrenceCount.clear();
2535
+ try {
2536
+ for (let i = 0; i < m; i++) {
2537
+ const str = extractedStrings[i];
2538
+ let arr = stringToIndices.get(str);
2539
+ if (!arr) {
2540
+ arr = [];
2541
+ stringToIndices.set(str, arr);
2542
+ }
2543
+ arr.push(i);
2544
+ }
2545
+ let out = 0;
2546
+ for (let i = 0; i < n; i++) {
2547
+ const result = results[i];
2548
+ if (removeZero && result.res === 0) continue;
2549
+ const targetStr = result.b || '';
2550
+ const indices = stringToIndices.get(targetStr);
2551
+ let dataIndex;
2552
+ if (indices && indices.length > 0) {
2553
+ const occurrence = occurrenceCount.get(targetStr) ?? 0;
2554
+ occurrenceCount.set(targetStr, occurrence + 1);
2555
+ dataIndex = indices[occurrence % indices.length];
2556
+ } else {
2557
+ dataIndex = result.__idx ?? i;
2558
+ }
2559
+ if (dataIndex < 0 || dataIndex >= sourceData.length) continue;
2560
+ const sourceObj = sourceData[dataIndex];
2561
+ const mappedTarget = extractedStrings[dataIndex] || targetStr;
2562
+ if (objectsOnly) output[out++] = sourceObj;
2563
+ else
2564
+ output[out++] = {
2565
+ obj: sourceObj,
2566
+ key: this.key,
2567
+ result: {
2568
+ source: result.a,
2569
+ target: mappedTarget,
2570
+ match: result.res
2571
+ },
2572
+ ...(result.raw ? { raw: result.raw } : null)
2573
+ };
2574
+ }
2575
+ output.length = out;
2576
+ return output;
2577
+ } finally {
2578
+ Pool.release('map', stringToIndices, m);
2579
+ Pool.release('map', occurrenceCount, n);
2580
+ }
2581
+ }
2582
+ sort(results, sort) {
2583
+ if (!sort || results.length <= 1) return results;
2584
+ return results.sort(
2585
+ sort === 'asc' ? StructuredData.SORT_ASC : StructuredData.SORT_DESC
2586
+ );
2587
+ }
2588
+ finalizeLookup(results, extractedStrings, opt) {
2589
+ return this.rebuild(
2590
+ this.sort(this.normalizeResults(results), opt?.sort),
2591
+ this.data,
2592
+ extractedStrings,
2593
+ opt?.removeZero,
2594
+ opt?.objectsOnly
2595
+ );
2596
+ }
2597
+ performLookup(fn, extractedStrings, opt) {
2598
+ return ErrorUtil.wrap(
2599
+ () => this.finalizeLookup(fn(), extractedStrings, opt),
2600
+ 'StructuredData lookup failed',
2601
+ { key: this.key }
2602
+ );
2603
+ }
2604
+ async performLookupAsync(fn, extractedStrings, opt) {
2605
+ return await ErrorUtil.wrapAsync(
2606
+ async () => this.finalizeLookup(await fn(), extractedStrings, opt),
2607
+ 'StructuredData async lookup failed',
2608
+ { key: this.key }
2609
+ );
2610
+ }
2611
+ lookup(fn, query, opt) {
2612
+ const b = this.extract();
2613
+ try {
2614
+ return this.performLookup(() => fn(query, b, opt), b, opt);
2615
+ } finally {
2616
+ Pool.release('string[]', b, b.length);
2617
+ }
2618
+ }
2619
+ async lookupAsync(fn, query, opt) {
2620
+ const b = this.extract();
2621
+ try {
2622
+ return await this.performLookupAsync(() => fn(query, b, opt), b, opt);
2623
+ } finally {
2624
+ Pool.release('string[]', b, b.length);
2625
+ }
2626
+ }
2627
+ lookupPairs(fn, other, otherKey, opt) {
2628
+ const a = this.extract();
2629
+ const b = this.extractFrom(other, otherKey);
2630
+ try {
2631
+ return this.performLookup(() => fn(a, b, opt), a, opt);
2632
+ } finally {
2633
+ Pool.release('string[]', a, a.length);
2634
+ Pool.release('string[]', b, b.length);
2635
+ }
2636
+ }
2637
+ async lookupPairsAsync(fn, other, otherKey, opt) {
2638
+ const a = this.extract();
2639
+ const b = this.extractFrom(other, otherKey);
2640
+ try {
2641
+ return await this.performLookupAsync(() => fn(a, b, opt), a, opt);
2642
+ } finally {
2643
+ Pool.release('string[]', a, a.length);
2644
+ Pool.release('string[]', b, b.length);
2645
+ }
2646
+ }
2647
+ }
2648
+
2649
+ class TextAnalyzer {
2650
+ static REGEX = {
2651
+ number: /\d/,
2652
+ sentence: /(?<=[.!?])\s+/,
2653
+ word: /\p{L}+/gu,
2654
+ nonWord: /[^\p{L}]/gu,
2655
+ vowelGroup: /[aeiouy]+/g,
2656
+ letter: /\p{L}/gu,
2657
+ ucLetter: /\p{Lu}/gu
2658
+ };
2659
+ text;
2660
+ words = [];
2661
+ sentences = [];
2662
+ charFrequency = new Map();
2663
+ wordHistogram = new Map();
2664
+ syllableCache = new Map();
2665
+ syllableStats;
2666
+ constructor(input) {
2667
+ this.text = input.trim();
2668
+ this.tokenize();
2669
+ this.computeFrequencies();
2670
+ }
2671
+ tokenize() {
2672
+ let match;
2673
+ const lcText = this.text.toLowerCase();
2674
+ while ((match = TextAnalyzer.REGEX.word.exec(lcText)) !== null)
2675
+ this.words.push(match[0]);
2676
+ this.sentences = this.text
2677
+ .split(TextAnalyzer.REGEX.sentence)
2678
+ .filter(Boolean);
2679
+ }
2680
+ computeFrequencies() {
2681
+ for (const char of this.text)
2682
+ this.charFrequency.set(char, (this.charFrequency.get(char) ?? 0) + 1);
2683
+ for (const word of this.words)
2684
+ this.wordHistogram.set(word, (this.wordHistogram.get(word) ?? 0) + 1);
2685
+ }
2686
+ estimateSyllables(word) {
2687
+ const clean = word
2688
+ .normalize('NFC')
2689
+ .toLowerCase()
2690
+ .replace(TextAnalyzer.REGEX.nonWord, '');
2691
+ if (this.syllableCache.has(clean)) return this.syllableCache.get(clean);
2692
+ const matches = clean.match(TextAnalyzer.REGEX.vowelGroup);
2693
+ const count = matches ? matches.length : 1;
2694
+ this.syllableCache.set(clean, count);
2695
+ return count;
2696
+ }
2697
+ computeSyllableStats() {
2698
+ return (this.syllableStats ||= (() => {
2699
+ const perWord = this.words
2700
+ .map((w) => this.estimateSyllables(w))
2701
+ .sort((a, b) => a - b);
2702
+ const total = perWord.reduce((sum, s) => sum + s, 0);
2703
+ const mono = perWord.filter((s) => s === 1).length;
2704
+ const median = !perWord.length
2705
+ ? 0
2706
+ : perWord.length % 2 === 0
2707
+ ? (perWord[perWord.length / 2 - 1] + perWord[perWord.length / 2]) / 2
2708
+ : perWord[Math.floor(perWord.length / 2)];
2709
+ return {
2710
+ total,
2711
+ mono,
2712
+ perWord,
2713
+ avg: perWord.length ? total / perWord.length : 0,
2714
+ median
2715
+ };
2716
+ })());
2717
+ }
2718
+ getLength = () => this.text.length;
2719
+ getWordCount = () => this.words.length;
2720
+ getSentenceCount = () => this.sentences.length;
2721
+ getAvgWordLength() {
2722
+ return this.words.length
2723
+ ? this.words.join('').length / this.words.length
2724
+ : 0;
2725
+ }
2726
+ getAvgSentenceLength() {
2727
+ return this.sentences.length
2728
+ ? this.words.length / this.sentences.length
2729
+ : 0;
2730
+ }
2731
+ getWordHistogram() {
2732
+ return Object.fromEntries(this.wordHistogram);
2733
+ }
2734
+ getMostCommonWords(limit = 5) {
2735
+ return [...this.wordHistogram.entries()]
2736
+ .sort((a, b) => b[1] - a[1])
2737
+ .slice(0, limit)
2738
+ .map((e) => e[0]);
2739
+ }
2740
+ getHapaxLegomena() {
2741
+ return [...this.wordHistogram.entries()]
2742
+ .filter(([, c]) => c === 1)
2743
+ .map((e) => e[0]);
2744
+ }
2745
+ hasNumbers = () => TextAnalyzer.REGEX.number.test(this.text);
2746
+ getUpperCaseRatio() {
2747
+ const matches = this.text.match(TextAnalyzer.REGEX.letter) || [];
2748
+ const upper = this.text.match(TextAnalyzer.REGEX.ucLetter)?.length || 0;
2749
+ return matches.length ? upper / matches.length : 0;
2750
+ }
2751
+ getCharFrequency() {
2752
+ return Object.fromEntries(this.charFrequency);
2753
+ }
2754
+ getUnicodeCodepoints() {
2755
+ const result = {};
2756
+ for (const [char, count] of this.charFrequency) {
2757
+ const block = char
2758
+ .charCodeAt(0)
2759
+ .toString(16)
2760
+ .padStart(4, '0')
2761
+ .toUpperCase();
2762
+ result[block] = (result[block] || 0) + count;
2763
+ }
2764
+ return result;
2765
+ }
2766
+ getLongWordRatio(len = 7) {
2767
+ let long = 0;
2768
+ for (const w of this.words) if (w.length >= len) long++;
2769
+ return this.words.length ? long / this.words.length : 0;
2770
+ }
2771
+ getShortWordRatio(len = 3) {
2772
+ let short = 0;
2773
+ for (const w of this.words) if (w.length <= len) short++;
2774
+ return this.words.length ? short / this.words.length : 0;
2775
+ }
2776
+ getSyllablesCount() {
2777
+ return this.computeSyllableStats().total;
2778
+ }
2779
+ getMonosyllabicWordCount() {
2780
+ return this.computeSyllableStats().mono;
2781
+ }
2782
+ getMinSyllablesWordCount(min) {
2783
+ return this.computeSyllableStats().perWord.filter((w) => w >= min).length;
2784
+ }
2785
+ getMaxSyllablesWordCount(max) {
2786
+ return this.computeSyllableStats().perWord.filter((w) => w <= max).length;
2787
+ }
2788
+ getAvgSyllablesPerWord() {
2789
+ return this.computeSyllableStats().avg;
2790
+ }
2791
+ getMedianSyllablesPerWord() {
2792
+ return this.computeSyllableStats().median;
2793
+ }
2794
+ getHonoresR() {
2795
+ try {
2796
+ return (
2797
+ (100 * Math.log(this.words.length)) /
2798
+ (1 - this.getHapaxLegomena().length / (this.wordHistogram.size ?? 1))
2799
+ );
2800
+ } catch {
2801
+ return 0;
2802
+ }
2083
2803
  }
2084
- adjustCode(code) {
2085
- return code.slice(0, 1) + code.slice(1).replace(Metaphone.REGEX.vowel, '');
2804
+ getReadingTime(wpm = 200) {
2805
+ return this.words.length / (wpm ?? 1);
2086
2806
  }
2087
- }
2088
- PhoneticRegistry.add('metaphone', Metaphone);
2089
- PhoneticMappingRegistry.add('metaphone', 'en90', {
2090
- map: {
2091
- a: 'A',
2092
- b: 'B',
2093
- c: 'K',
2094
- d: 'T',
2095
- e: 'E',
2096
- f: 'F',
2097
- g: 'K',
2098
- h: 'H',
2099
- i: 'I',
2100
- j: 'J',
2101
- k: 'K',
2102
- l: 'L',
2103
- m: 'M',
2104
- n: 'N',
2105
- o: 'O',
2106
- p: 'P',
2107
- q: 'K',
2108
- r: 'R',
2109
- s: 'S',
2110
- t: 'T',
2111
- u: 'U',
2112
- v: 'F',
2113
- w: 'W',
2114
- x: 'KS',
2115
- y: 'Y',
2116
- z: 'S'
2117
- },
2118
- ruleset: [
2119
- { char: 'a', position: 'start', next: ['e'], code: '' },
2120
- { char: 'g', position: 'start', next: ['n'], code: '' },
2121
- { char: 'k', position: 'start', next: ['n'], code: '' },
2122
- { char: 'p', position: 'start', next: ['n'], code: '' },
2123
- { char: 'w', position: 'start', next: ['r'], code: '' },
2124
- { char: 'b', position: 'end', prev: ['m'], code: '' },
2125
- { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' },
2126
- { char: 'c', next: ['i'], next2: ['a'], code: 'X' },
2127
- { char: 'c', next: ['e', 'i', 'y'], code: 'S' },
2128
- { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' },
2129
- {
2130
- char: 'g',
2131
- next: ['h'],
2132
- next2Not: ['', 'a', 'e', 'i', 'o', 'u'],
2133
- code: ''
2134
- },
2135
- { char: 'g', trailing: 'n', code: '' },
2136
- { char: 'g', trailing: 'ned', code: '' },
2137
- { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' },
2138
- {
2139
- char: 'h',
2140
- prev: ['a', 'e', 'i', 'o', 'u'],
2141
- nextNot: ['a', 'e', 'i', 'o', 'u'],
2142
- code: ''
2143
- },
2144
- { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' },
2145
- { char: 'k', prev: ['c'], code: '' },
2146
- { char: 'p', next: ['h'], code: 'F' },
2147
- { char: 's', next: ['h'], code: 'X' },
2148
- { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' },
2149
- { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' },
2150
- { char: 't', next: ['h'], code: '0' },
2151
- { char: 't', next: ['c'], next2: ['h'], code: '' },
2152
- { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' },
2153
- { char: 'h', leading: 'w', code: '' },
2154
- { char: 'x', position: 'start', code: 'S' },
2155
- { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }
2156
- ]
2157
- });
2158
-
2159
- class Soundex extends Phonetic {
2160
- static default = {
2161
- map: 'en',
2162
- delimiter: ' ',
2163
- length: 4,
2164
- pad: '0',
2165
- dedupe: true
2166
- };
2167
- constructor(opt = {}) {
2168
- super('soundex', opt);
2807
+ getReadabilityScore(metric = 'flesch') {
2808
+ const w = this.words.length || 1;
2809
+ const s = this.sentences.length || 1;
2810
+ const y = this.getSyllablesCount() || 1;
2811
+ const asl = w / s;
2812
+ const asw = y / w;
2813
+ switch (metric) {
2814
+ case 'flesch':
2815
+ return 206.835 - 1.015 * asl - 84.6 * asw;
2816
+ case 'fleschde':
2817
+ return 180 - asl - 58.5 * asw;
2818
+ case 'kincaid':
2819
+ return 0.39 * asl + 11.8 * asw - 15.59;
2820
+ }
2169
2821
  }
2170
- adjustCode(code, chars) {
2171
- return chars[0].toUpperCase() + code.slice(1).replaceAll('0', '');
2822
+ getLIXScore() {
2823
+ const w = this.words.length || 1;
2824
+ const s = this.sentences.length || 1;
2825
+ const l = this.getLongWordRatio() * w;
2826
+ return w / s + (l / w) * 100;
2172
2827
  }
2173
- }
2174
- PhoneticRegistry.add('soundex', Soundex);
2175
- PhoneticMappingRegistry.add('soundex', 'en', {
2176
- map: {
2177
- a: '0',
2178
- e: '0',
2179
- h: '0',
2180
- i: '0',
2181
- o: '0',
2182
- u: '0',
2183
- w: '0',
2184
- y: '0',
2185
- b: '1',
2186
- f: '1',
2187
- p: '1',
2188
- v: '1',
2189
- c: '2',
2190
- g: '2',
2191
- j: '2',
2192
- k: '2',
2193
- q: '2',
2194
- s: '2',
2195
- x: '2',
2196
- z: '2',
2197
- d: '3',
2198
- t: '3',
2199
- l: '4',
2200
- m: '5',
2201
- n: '5',
2202
- r: '6'
2828
+ getWSTFScore() {
2829
+ const w = this.words.length || 1;
2830
+ const h = (this.getMinSyllablesWordCount(3) / w) * 100;
2831
+ const s = this.getAvgSentenceLength();
2832
+ const l = this.getLongWordRatio() * 100;
2833
+ const m = (this.getMonosyllabicWordCount() / w) * 100;
2834
+ return [
2835
+ 0.1935 * h + 0.1672 * s + 0.1297 * l - 0.0327 * m - 0.875,
2836
+ 0.2007 * h + 0.1682 * s + 0.1373 * l - 2.779,
2837
+ 0.2963 * h + 0.1905 * s - 1.1144,
2838
+ 0.2744 * h + 0.2656 * s - 1.693
2839
+ ];
2203
2840
  }
2204
- });
2205
- PhoneticMappingRegistry.add('soundex', 'de', {
2206
- map: {
2207
- a: '0',
2208
- ä: '0',
2209
- e: '0',
2210
- h: '0',
2211
- i: '0',
2212
- j: '0',
2213
- o: '0',
2214
- ö: '0',
2215
- u: '0',
2216
- ü: '0',
2217
- y: '0',
2218
- b: '1',
2219
- f: '1',
2220
- p: '1',
2221
- v: '1',
2222
- w: '1',
2223
- c: '2',
2224
- g: '2',
2225
- k: '2',
2226
- q: '2',
2227
- s: '2',
2228
- ß: '2',
2229
- x: '2',
2230
- z: '2',
2231
- d: '3',
2232
- t: '3',
2233
- l: '4',
2234
- m: '5',
2235
- n: '5',
2236
- r: '6'
2237
- },
2238
- ruleset: [{ char: 'c', next: ['h'], code: '7' }]
2239
- });
2841
+ }
2240
2842
 
2241
2843
  const profiler = Profiler.getInstance();
2242
2844
  class CmpStr {
@@ -2288,29 +2890,26 @@ class CmpStr {
2288
2890
  }
2289
2891
  assert(cond, test) {
2290
2892
  switch (cond) {
2893
+ default:
2894
+ throw new CmpStrInternalError(`Cmpstr condition <${cond}> unknown`);
2291
2895
  case 'metric':
2292
- if (!CmpStr.metric.has(test))
2293
- throw new Error(
2294
- `CmpStr <metric> must be set, call .setMetric(), ` +
2295
- `use CmpStr.metric.list() for available metrics`
2296
- );
2896
+ OptionsValidator.validateMetricName(test);
2297
2897
  break;
2298
2898
  case 'phonetic':
2299
- if (!CmpStr.phonetic.has(test))
2300
- throw new Error(
2301
- `CmpStr <phonetic> must be set, call .setPhonetic(), ` +
2302
- `use CmpStr.phonetic.list() for available phonetic algorithms`
2303
- );
2899
+ OptionsValidator.validatePhoneticName(test);
2304
2900
  break;
2305
- default:
2306
- throw new Error(`Cmpstr condition <${cond}> unknown`);
2307
2901
  }
2308
2902
  }
2309
2903
  assertMany(...cond) {
2310
2904
  for (const [c, test] of cond) this.assert(c, test);
2311
2905
  }
2312
2906
  resolveOptions(opt) {
2313
- return merge({ ...(this.options ?? Object.create(null)) }, opt);
2907
+ const merged = DeepMerge.merge(
2908
+ { ...(this.options ?? Object.create(null)) },
2909
+ opt
2910
+ );
2911
+ OptionsValidator.validateOptions(merged);
2912
+ return merged;
2314
2913
  }
2315
2914
  normalize(input, flags) {
2316
2915
  return Normalizer.normalize(input, flags ?? this.options.flags ?? '');
@@ -2326,7 +2925,7 @@ class CmpStr {
2326
2925
  return input;
2327
2926
  }
2328
2927
  postProcess(result, opt) {
2329
- if (opt?.removeZero && Array.isArray(result))
2928
+ if (Array.isArray(result) && opt?.removeZero)
2330
2929
  result = result.filter((r) => r.res > 0);
2331
2930
  return result;
2332
2931
  }
@@ -2344,64 +2943,114 @@ class CmpStr {
2344
2943
  compute(a, b, opt, mode, raw, skip) {
2345
2944
  const resolved = this.resolveOptions(opt);
2346
2945
  this.assert('metric', resolved.metric);
2347
- const A = skip ? a : this.prepare(a, resolved);
2348
- const B = skip ? b : this.prepare(b, resolved);
2349
- if (
2350
- resolved.safeEmpty &&
2351
- ((Array.isArray(A) && A.length === 0) ||
2352
- (Array.isArray(B) && B.length === 0) ||
2353
- A === '' ||
2354
- B === '')
2355
- ) {
2356
- return [];
2357
- }
2358
- const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
2359
- if (resolved.output !== 'prep') metric.setOriginal(a, b);
2360
- metric.run(mode);
2361
- const result = this.postProcess(metric.getResults(), resolved);
2362
- return this.output(result, raw ?? resolved.raw);
2946
+ return ErrorUtil.wrap(
2947
+ () => {
2948
+ const A = skip ? a : this.prepare(a, resolved);
2949
+ const B = skip ? b : this.prepare(b, resolved);
2950
+ if (
2951
+ resolved.safeEmpty &&
2952
+ ((Array.isArray(A) && A.length === 0) ||
2953
+ (Array.isArray(B) && B.length === 0) ||
2954
+ A === '' ||
2955
+ B === '')
2956
+ ) {
2957
+ return [];
2958
+ }
2959
+ const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
2960
+ if (resolved.output !== 'prep') metric.setOriginal(a, b);
2961
+ metric.run(mode);
2962
+ const result = this.postProcess(metric.getResults(), resolved);
2963
+ return this.output(result, raw ?? resolved.raw);
2964
+ },
2965
+ `Failed to compute metric <${resolved.metric}> for the given inputs`,
2966
+ { a, b, options: opt }
2967
+ );
2363
2968
  }
2364
2969
  output(result, raw) {
2365
- return (raw ?? this.options.raw)
2366
- ? result
2367
- : Array.isArray(result)
2368
- ? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
2369
- : { source: result.a, target: result.b, match: result.res };
2970
+ return ErrorUtil.wrap(
2971
+ () =>
2972
+ (raw ?? this.options.raw)
2973
+ ? result
2974
+ : Array.isArray(result)
2975
+ ? result.map((r) => ({ source: r.a, target: r.b, match: r.res }))
2976
+ : { source: result.a, target: result.b, match: result.res },
2977
+ `Failed to resolve output format for the metric result`,
2978
+ { result, raw }
2979
+ );
2980
+ }
2981
+ clone() {
2982
+ const inst = Object.assign(
2983
+ Object.create(Object.getPrototypeOf(this)),
2984
+ this
2985
+ );
2986
+ inst.options = DeepMerge.merge(Object.create(null), this.options);
2987
+ return inst;
2370
2988
  }
2371
- clone = () => Object.assign(Object.create(Object.getPrototypeOf(this)), this);
2372
2989
  reset() {
2373
- for (const k in this.options) delete this.options[k];
2990
+ this.options = Object.create(null);
2374
2991
  return this;
2375
2992
  }
2376
2993
  setOptions(opt) {
2994
+ OptionsValidator.validateOptions(opt);
2377
2995
  this.options = opt;
2378
2996
  return this;
2379
2997
  }
2380
2998
  mergeOptions(opt) {
2381
- merge(this.options, opt);
2999
+ DeepMerge.merge(this.options, opt);
3000
+ OptionsValidator.validateOptions(this.options);
2382
3001
  return this;
2383
3002
  }
2384
3003
  setSerializedOptions(opt) {
2385
- this.options = JSON.parse(opt);
2386
- return this;
3004
+ try {
3005
+ const parsed = JSON.parse(opt);
3006
+ OptionsValidator.validateOptions(parsed);
3007
+ this.options = parsed;
3008
+ return this;
3009
+ } catch (err) {
3010
+ if (err instanceof SyntaxError)
3011
+ throw new CmpStrValidationError(
3012
+ `Failed to parse serialized options, invalid JSON string`,
3013
+ { opt, error: err instanceof Error ? err.message : String(err) }
3014
+ );
3015
+ throw err;
3016
+ }
2387
3017
  }
2388
3018
  setOption(path, value) {
2389
- set(this.options, path, value);
3019
+ DeepMerge.set(this.options, path, value);
3020
+ OptionsValidator.validateOptions(this.options);
2390
3021
  return this;
2391
3022
  }
2392
3023
  rmvOption(path) {
2393
- rmv(this.options, path);
3024
+ DeepMerge.rmv(this.options, path);
2394
3025
  return this;
2395
3026
  }
2396
- setRaw = (enable) => this.setOption('raw', enable);
2397
- setMetric = (name) => this.setOption('metric', name);
2398
- setFlags = (flags) => this.setOption('flags', flags);
2399
- rmvFlags = () => this.rmvOption('flags');
2400
- setProcessors = (opt) => this.setOption('processors', opt);
2401
- rmvProcessors = () => this.rmvOption('processors');
2402
- getOptions = () => this.options;
2403
- getSerializedOptions = () => JSON.stringify(this.options);
2404
- getOption = (path) => get(this.options, path);
3027
+ setRaw(enable) {
3028
+ return this.setOption('raw', enable);
3029
+ }
3030
+ setMetric(name) {
3031
+ return this.setOption('metric', name);
3032
+ }
3033
+ setFlags(flags) {
3034
+ return this.setOption('flags', flags);
3035
+ }
3036
+ rmvFlags() {
3037
+ return this.rmvOption('flags');
3038
+ }
3039
+ setProcessors(opt) {
3040
+ return this.setOption('processors', opt);
3041
+ }
3042
+ rmvProcessors() {
3043
+ return this.rmvOption('processors');
3044
+ }
3045
+ getOptions() {
3046
+ return this.options;
3047
+ }
3048
+ getSerializedOptions() {
3049
+ return JSON.stringify(this.options);
3050
+ }
3051
+ getOption(path) {
3052
+ return DeepMerge.get(this.options, path);
3053
+ }
2405
3054
  test(a, b, opt) {
2406
3055
  return this.compute(a, b, opt, 'single');
2407
3056
  }
@@ -2440,15 +3089,35 @@ class CmpStr {
2440
3089
  const resolved = this.resolveOptions({ flags, processors });
2441
3090
  const test = this.prepare(needle, resolved);
2442
3091
  const hstk = this.prepare(haystack, resolved);
2443
- return haystack.filter((_, i) => hstk[i].includes(test));
3092
+ const out = [];
3093
+ for (let i = 0, len = hstk.length; i < len; i++) {
3094
+ if (hstk[i].includes(test)) out.push(haystack[i]);
3095
+ }
3096
+ return out;
2444
3097
  }
2445
3098
  matrix(input, opt) {
2446
- input = this.prepare(input, this.resolveOptions(opt));
2447
- return input.map((a) =>
2448
- this.compute(a, input, undefined, 'batch', true, true).map(
2449
- (b) => b.res ?? 0
2450
- )
2451
- );
3099
+ const resolved = this.resolveOptions(opt);
3100
+ const arr = this.prepare(input, resolved);
3101
+ const n = arr.length;
3102
+ const out = Array.from({ length: n }, () => new Array(n).fill(0));
3103
+ for (let i = 0; i < n; i++)
3104
+ for (let j = i; j < n; j++) {
3105
+ if (i === j) {
3106
+ out[i][j] = 1;
3107
+ } else {
3108
+ const score = this.compute(
3109
+ arr[i],
3110
+ arr[j],
3111
+ resolved,
3112
+ 'single',
3113
+ true,
3114
+ true
3115
+ ).res;
3116
+ out[i][j] = score;
3117
+ out[j][i] = score;
3118
+ }
3119
+ }
3120
+ return out;
2452
3121
  }
2453
3122
  phoneticIndex(input, algo, opt) {
2454
3123
  const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
@@ -2528,22 +3197,28 @@ class CmpStrAsync extends CmpStr {
2528
3197
  async computeAsync(a, b, opt, mode, raw, skip) {
2529
3198
  const resolved = this.resolveOptions(opt);
2530
3199
  this.assert('metric', resolved.metric);
2531
- const A = skip ? a : await this.prepareAsync(a, resolved);
2532
- const B = skip ? b : await this.prepareAsync(b, resolved);
2533
- if (
2534
- resolved.safeEmpty &&
2535
- ((Array.isArray(A) && A.length === 0) ||
2536
- (Array.isArray(B) && B.length === 0) ||
2537
- A === '' ||
2538
- B === '')
2539
- ) {
2540
- return [];
2541
- }
2542
- const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
2543
- if (resolved.output !== 'prep') metric.setOriginal(a, b);
2544
- await metric.runAsync(mode);
2545
- const result = this.postProcess(metric.getResults(), resolved);
2546
- return this.output(result, raw ?? resolved.raw);
3200
+ return ErrorUtil.wrapAsync(
3201
+ async () => {
3202
+ const A = skip ? a : await this.prepareAsync(a, resolved);
3203
+ const B = skip ? b : await this.prepareAsync(b, resolved);
3204
+ if (
3205
+ resolved.safeEmpty &&
3206
+ ((Array.isArray(A) && A.length === 0) ||
3207
+ (Array.isArray(B) && B.length === 0) ||
3208
+ A === '' ||
3209
+ B === '')
3210
+ ) {
3211
+ return [];
3212
+ }
3213
+ const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
3214
+ if (resolved.output !== 'prep') metric.setOriginal(a, b);
3215
+ await metric.runAsync(mode);
3216
+ const result = this.postProcess(metric.getResults(), resolved);
3217
+ return this.output(result, raw ?? resolved.raw);
3218
+ },
3219
+ `Failed to compute metric <${opt?.metric ?? this.options.metric}> for the given inputs`,
3220
+ { a, b, opt }
3221
+ );
2547
3222
  }
2548
3223
  async testAsync(a, b, opt) {
2549
3224
  return this.computeAsync(a, b, opt, 'single');
@@ -2581,23 +3256,40 @@ class CmpStrAsync extends CmpStr {
2581
3256
  const resolved = this.resolveOptions({ flags, processors });
2582
3257
  const test = await this.prepareAsync(needle, resolved);
2583
3258
  const hstk = await this.prepareAsync(haystack, resolved);
2584
- return haystack.filter((_, i) => hstk[i].includes(test));
3259
+ const out = [];
3260
+ for (let i = 0; i < hstk.length; i++) {
3261
+ if (hstk[i].includes(test)) out.push(haystack[i]);
3262
+ }
3263
+ return out;
2585
3264
  }
2586
3265
  async matrixAsync(input, opt) {
2587
- input = await this.prepareAsync(input, this.resolveOptions(opt));
2588
- return Promise.all(
2589
- input.map(
2590
- async (a) =>
2591
- await this.computeAsync(
2592
- a,
2593
- input,
2594
- undefined,
2595
- 'batch',
2596
- true,
2597
- true
2598
- ).then((r) => r.map((b) => b.res ?? 0))
2599
- )
2600
- );
3266
+ const resolved = this.resolveOptions(opt);
3267
+ const arr = await this.prepareAsync(input, resolved);
3268
+ const n = arr.length;
3269
+ const out = Array.from({ length: n }, () => new Array(n).fill(0));
3270
+ for (let i = 0; i < n; i++) {
3271
+ await Promise.all(
3272
+ Array.from({ length: n - i }, (_, k) => i + k).map(async (j) => {
3273
+ if (i === j) {
3274
+ out[i][j] = 1;
3275
+ } else {
3276
+ const score = (
3277
+ await this.computeAsync(
3278
+ arr[i],
3279
+ arr[j],
3280
+ resolved,
3281
+ 'single',
3282
+ true,
3283
+ true
3284
+ )
3285
+ ).res;
3286
+ out[i][j] = score;
3287
+ out[j][i] = score;
3288
+ }
3289
+ })
3290
+ );
3291
+ }
3292
+ return out;
2601
3293
  }
2602
3294
  async phoneticIndexAsync(input, algo, opt) {
2603
3295
  const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
@@ -2645,6 +3337,7 @@ class CmpStrAsync extends CmpStr {
2645
3337
  export {
2646
3338
  CmpStr,
2647
3339
  CmpStrAsync,
3340
+ Errors as CmpStrError,
2648
3341
  DeepMerge,
2649
3342
  DiffChecker,
2650
3343
  Filter,
@@ -2653,6 +3346,7 @@ export {
2653
3346
  Metric,
2654
3347
  MetricRegistry,
2655
3348
  Normalizer,
3349
+ OptionsValidator,
2656
3350
  Phonetic,
2657
3351
  PhoneticMappingRegistry,
2658
3352
  PhoneticRegistry,
@@ -2661,4 +3355,3 @@ export {
2661
3355
  StructuredData,
2662
3356
  TextAnalyzer
2663
3357
  };
2664
- //# sourceMappingURL=CmpStr.esm.js.map