cmpstr 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2228 -4930
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +2348 -5026
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +10 -404
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +10 -220
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +2 -262
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +37 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +2 -90
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +10 -404
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +10 -220
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +2 -262
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +29 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +1 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +2 -90
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +38 -0
- package/dist/types/utils/Types.d.ts +1 -0
- package/package.json +15 -9
package/dist/esm/metric/LCS.mjs
CHANGED
|
@@ -1,86 +1,33 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { MetricRegistry, Metric } from './Metric.mjs';
|
|
3
3
|
import { Pool } from '../utils/Pool.mjs';
|
|
4
4
|
|
|
5
|
-
/**
|
|
6
|
-
* Longest Common Subsequence (LCS)
|
|
7
|
-
* src/metric/LCS.ts
|
|
8
|
-
*
|
|
9
|
-
* @see https://en.wikipedia.org/wiki/Longest_common_subsequence
|
|
10
|
-
*
|
|
11
|
-
* The Longest Common Subsequence (LCS) metric measures the length of the longest
|
|
12
|
-
* subsequence common to both strings. Unlike substrings, the characters of a
|
|
13
|
-
* subsequence do not need to be contiguous, but must appear in the same order.
|
|
14
|
-
*
|
|
15
|
-
* The LCS is widely used in diff tools, bioinformatics, and approximate string
|
|
16
|
-
* matching.
|
|
17
|
-
*
|
|
18
|
-
* @module Metric/LCS
|
|
19
|
-
* @author Paul Köhler (komed3)
|
|
20
|
-
* @license MIT
|
|
21
|
-
*/
|
|
22
|
-
/**
|
|
23
|
-
* LCSMetric class extends the Metric class to implement the Longest Common Subsequence algorithm.
|
|
24
|
-
*/
|
|
25
5
|
class LCSMetric extends Metric {
|
|
26
|
-
/**
|
|
27
|
-
* Constructor for the LCSMetric class.
|
|
28
|
-
*
|
|
29
|
-
* Initializes the LCS metric with two input strings or
|
|
30
|
-
* arrays of strings and optional options.
|
|
31
|
-
*
|
|
32
|
-
* @param {MetricInput} a - First input string or array of strings
|
|
33
|
-
* @param {MetricInput} b - Second input string or array of strings
|
|
34
|
-
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
35
|
-
*/
|
|
36
6
|
constructor(a, b, opt = {}) {
|
|
37
|
-
// Call the parent Metric constructor with the metric name and inputs
|
|
38
|
-
// Metric is symmetrical
|
|
39
7
|
super('lcs', a, b, opt, true);
|
|
40
8
|
}
|
|
41
|
-
/**
|
|
42
|
-
* Calculates the normalized LCS similarity between two strings.
|
|
43
|
-
*
|
|
44
|
-
* @param {string} a - First string
|
|
45
|
-
* @param {string} b - Second string
|
|
46
|
-
* @param {number} m - Length of the first string
|
|
47
|
-
* @param {number} n - Length of the second string
|
|
48
|
-
* @param {number} maxLen - Maximum length of the strings
|
|
49
|
-
* @return {MetricCompute<LCSRaw>} - Object containing the similarity result and raw LCS length
|
|
50
|
-
*/
|
|
51
9
|
compute(a, b, m, n, maxLen) {
|
|
52
|
-
// Get two reusable arrays from the Pool for the DP rows
|
|
53
10
|
const len = m + 1;
|
|
54
11
|
const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
|
|
55
|
-
// Initialize the first row to zeros
|
|
56
12
|
for (let i = 0; i <= m; i++) prev[i] = 0;
|
|
57
|
-
// Fill the DP matrix row by row (over the longer string)
|
|
58
13
|
for (let j = 1; j <= n; j++) {
|
|
59
14
|
curr[0] = 0;
|
|
60
|
-
// Get the character code of the current character in b
|
|
61
15
|
const cb = b.charCodeAt(j - 1);
|
|
62
16
|
for (let i = 1; i <= m; i++) {
|
|
63
|
-
// If characters match, increment the LCS length
|
|
64
17
|
if (a.charCodeAt(i - 1) === cb) curr[i] = prev[i - 1] + 1;
|
|
65
|
-
// Otherwise, take the maximum of the left or above cell
|
|
66
18
|
else curr[i] = Math.max(prev[i], curr[i - 1]);
|
|
67
19
|
}
|
|
68
|
-
// Copy current row to previous for next iteration
|
|
69
20
|
prev.set(curr);
|
|
70
21
|
}
|
|
71
|
-
// The last value in prev is the LCS length
|
|
72
22
|
const lcs = prev[m];
|
|
73
|
-
// Release arrays back to the pool
|
|
74
23
|
Pool.release('uint16', prev, len);
|
|
75
24
|
Pool.release('uint16', curr, len);
|
|
76
|
-
// Normalize by the length of the longer string
|
|
77
25
|
return {
|
|
78
26
|
res: maxLen === 0 ? 1 : Metric.clamp(lcs / maxLen),
|
|
79
27
|
raw: { lcs, maxLen }
|
|
80
28
|
};
|
|
81
29
|
}
|
|
82
30
|
}
|
|
83
|
-
// Register the Longest Common Subsequence (LCS) in the metric registry
|
|
84
31
|
MetricRegistry.add('lcs', LCSMetric);
|
|
85
32
|
|
|
86
33
|
export { LCSMetric };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LCS.mjs","sources":["../../../src/metric/LCS.ts"],"sourcesContent":[null],"names":[],"mappings":";;;;
|
|
1
|
+
{"version":3,"file":"LCS.mjs","sources":["../../../src/metric/LCS.ts"],"sourcesContent":[null],"names":[],"mappings":";;;;AAgCM,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAU,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAQ,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAA;AAYzC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAc,CAAA,CAAE,CAAc,CAAA,CAAE,CAAA,CAAA,IAAqB,CAAA,CAAE,CAAA,CAAA;IAIjE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAG,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAE;EAEpC;EAYmB,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CACtB,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAC1C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAA,CAAA;AAId,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAG,CAAA,CAAA,CAAW,CAAC,GAAG,CAAC;AACzB,CAAA,CAAA,CAAA,CAAA,MAAM,CAAE,CAAA,CAAA,CAAA,CAAI,EAAE,CAAA,CAAA,CAAA,CAAI,CAAE,GAAG,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAW,CAAE,QAAQ,CAAA,CAAE,CAAE,GAAG,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE,CAAE;IAGjE,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAG,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;AAG5C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;AAE3B,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;MAGb,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAE,CAAA,CAAA,CAAW,CAAC,CAAC,UAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE;AAExC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;QAG3B,CAAA,CAAA,CAAA,CAAK,CAAC,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,CAAG,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,CAAA,CAAA,CAAG,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAG,CAAC;aAG5D,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAA,CAAA,CAAA,CAAI,CAAC,GAAG,CAAE,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,EAAE,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAE;MAEzD;AAGA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAE,IAAI,CAAE;IAEpB;AAGA,CAAA,CAAA,CAAA,CAAA,MAAM,CAAA,CAAA,CAAG,CAAA,CAAA,CAAW,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE;IAG7B,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IACnC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IAGnC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO;AACH,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAA,CAAK,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAA,CAAG,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAC,CAAA,CAAA,CAAA,CAAA,CAAK,CAAE,CAAA,CAAA,CAAG,CAAA,CAAA,CAAG,MAAM,CAAE;AACpD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAE,CAAA,CAAA,CAAG,EAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA;KACrB;EAEL;AAEH;AAGD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAC,CAAA,CAAA,CAAG,CAAE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,SAAS,CAAE;;;"}
|
|
@@ -1,91 +1,33 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { MetricRegistry, Metric } from './Metric.mjs';
|
|
3
3
|
import { Pool } from '../utils/Pool.mjs';
|
|
4
4
|
|
|
5
|
-
/**
|
|
6
|
-
* Levenshtein Distance
|
|
7
|
-
* src/metric/Levenshtein.ts
|
|
8
|
-
*
|
|
9
|
-
* @see https://en.wikipedia.org/wiki/Levenshtein_distance
|
|
10
|
-
*
|
|
11
|
-
* The Levenshtein distance is a classic metric for measuring the minimum number
|
|
12
|
-
* of single-character edits (insertions, deletions, or substitutions) required
|
|
13
|
-
* to change one string into another.
|
|
14
|
-
*
|
|
15
|
-
* It is widely used in approximate string matching, spell checking, and natural
|
|
16
|
-
* language processing.
|
|
17
|
-
*
|
|
18
|
-
* @module Metric/LevenshteinDistance
|
|
19
|
-
* @author Paul Köhler (komed3)
|
|
20
|
-
* @license MIT
|
|
21
|
-
*/
|
|
22
|
-
/**
|
|
23
|
-
* LevenshteinDistance class extends the Metric class to implement the Levenshtein distance algorithm.
|
|
24
|
-
*/
|
|
25
5
|
class LevenshteinDistance extends Metric {
|
|
26
|
-
/**
|
|
27
|
-
* Constructor for the Levenshtein class.
|
|
28
|
-
*
|
|
29
|
-
* Initializes the Levenshtein metric with two input strings
|
|
30
|
-
* or arrays of strings and optional options.
|
|
31
|
-
*
|
|
32
|
-
* @param {MetricInput} a - First input string or array of strings
|
|
33
|
-
* @param {MetricInput} b - Second input string or array of strings
|
|
34
|
-
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
35
|
-
*/
|
|
36
6
|
constructor(a, b, opt = {}) {
|
|
37
|
-
// Call the parent Metric constructor with the metric name and inputs
|
|
38
|
-
// Metric is symmetrical
|
|
39
7
|
super('levenshtein', a, b, opt, true);
|
|
40
8
|
}
|
|
41
|
-
/**
|
|
42
|
-
* Calculates the Levenshtein distance between two strings.
|
|
43
|
-
*
|
|
44
|
-
* @param {string} a - First string
|
|
45
|
-
* @param {string} b - Second string
|
|
46
|
-
* @param {number} m - Length of the first string
|
|
47
|
-
* @param {number} n - Length of the second string
|
|
48
|
-
* @param {number} maxLen - Maximum length of the strings
|
|
49
|
-
* @return {MetricCompute<LevenshteinRaw>} - Object containing the similarity result and raw distance
|
|
50
|
-
*/
|
|
51
9
|
compute(a, b, m, n, maxLen) {
|
|
52
|
-
// Get two reusable arrays from the Pool for the DP rows
|
|
53
10
|
const len = m + 1;
|
|
54
11
|
const [prev, curr] = Pool.acquireMany('uint16', [len, len]);
|
|
55
|
-
// Initialize the first row (edit distances from empty string to a)
|
|
56
12
|
for (let i = 0; i <= m; i++) prev[i] = i;
|
|
57
|
-
// Fill the DP matrix row by row (over the longer string)
|
|
58
13
|
for (let j = 1; j <= n; j++) {
|
|
59
|
-
// Cost of transforming empty string to b[0..j]
|
|
60
14
|
curr[0] = j;
|
|
61
|
-
// Get the character code of the current character in b
|
|
62
15
|
const cb = b.charCodeAt(j - 1);
|
|
63
16
|
for (let i = 1; i <= m; i++) {
|
|
64
|
-
// Cost is 0 if characters match, 1 otherwise
|
|
65
17
|
const cost = a.charCodeAt(i - 1) === cb ? 0 : 1;
|
|
66
|
-
|
|
67
|
-
curr[i] = Math.min(
|
|
68
|
-
curr[i - 1] + 1, // Insertion
|
|
69
|
-
prev[i] + 1, // Deletion
|
|
70
|
-
prev[i - 1] + cost // Substitution
|
|
71
|
-
);
|
|
18
|
+
curr[i] = Math.min(curr[i - 1] + 1, prev[i] + 1, prev[i - 1] + cost);
|
|
72
19
|
}
|
|
73
|
-
// Copy current row to previous for next iteration
|
|
74
20
|
prev.set(curr);
|
|
75
21
|
}
|
|
76
|
-
// The last value in prev is the Levenshtein distance
|
|
77
22
|
const dist = prev[m];
|
|
78
|
-
// Release arrays back to the pool
|
|
79
23
|
Pool.release('uint16', prev, len);
|
|
80
24
|
Pool.release('uint16', curr, len);
|
|
81
|
-
// Return the result as a MetricCompute object
|
|
82
25
|
return {
|
|
83
26
|
res: maxLen === 0 ? 1 : Metric.clamp(1 - dist / maxLen),
|
|
84
27
|
raw: { dist, maxLen }
|
|
85
28
|
};
|
|
86
29
|
}
|
|
87
30
|
}
|
|
88
|
-
// Register the Levenshtein distance in the metric registry
|
|
89
31
|
MetricRegistry.add('levenshtein', LevenshteinDistance);
|
|
90
32
|
|
|
91
33
|
export { LevenshteinDistance };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Levenshtein.mjs","sources":["../../../src/metric/Levenshtein.ts"],"sourcesContent":[null],"names":[],"mappings":";;;;
|
|
1
|
+
{"version":3,"file":"Levenshtein.mjs","sources":["../../../src/metric/Levenshtein.ts"],"sourcesContent":[null],"names":[],"mappings":";;;;AAgCM,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAoB,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAQ,CAAA,CAAA,CAAA,CAAA,CAAA,CAAsB,CAAA;AAY3D,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAc,CAAA,CAAE,CAAc,CAAA,CAAE,CAAA,CAAA,IAAqB,CAAA,CAAE,CAAA,CAAA;IAIjE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAG,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAa,CAAA,CAAE,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAE;EAE5C;EAYmB,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CACtB,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAC1C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAA,CAAA;AAId,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAG,CAAA,CAAA,CAAW,CAAC,GAAG,CAAC;AACzB,CAAA,CAAA,CAAA,CAAA,MAAM,CAAE,CAAA,CAAA,CAAA,CAAI,EAAE,CAAA,CAAA,CAAA,CAAI,CAAE,GAAG,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAW,CAAE,QAAQ,CAAA,CAAE,CAAE,GAAG,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE,CAAE;IAGjE,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAG,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;AAG5C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;AAG3B,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;MAGb,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAE,CAAA,CAAA,CAAW,CAAC,CAAC,UAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE;AAExC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;QAG3B,MAAM,CAAA,CAAA,CAAA,CAAI,CAAA,CAAA,CAAW,CAAC,CAAC,UAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,CAAA,CAAA,CAAG,CAAC,GAAG,CAAC;AAGzD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,IAAI,CAAE,CAAC,CAAE,CAAA,CAAA,CAAG,IAAI,CAAC,CAAA,CAAA,CAAG,CAChB,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,GAAG,CAAC,CACjB,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC,EACb,IAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAG,CAAA,CAAA,CAAA,EACnB;MAEL;AAGA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAE,IAAI,CAAE;IAEpB;AAGA,CAAA,CAAA,CAAA,CAAA,MAAM,CAAA,CAAA,CAAA,CAAI,CAAA,CAAA,CAAW,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE;IAG9B,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IACnC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IAGnC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO;MACH,GAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAA,CAAK,CAAC,GAAG,CAAC,CAAA,CAAA,CAAG,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAC,KAAK,CAAE,CAAC,GAAG,CAAA,CAAA,CAAA,CAAI,CAAA,CAAA,CAAG,MAAM,CAAE;AACzD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,EAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA;KACtB;EAEL;AAEH;AAGD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAC,CAAA,CAAA,CAAG,CAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAa,CAAA,CAAE,mBAAmB,CAAE;;;"}
|
|
@@ -1,188 +1,67 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
import { Registry } from '../utils/Registry.mjs';
|
|
3
3
|
import { HashTable } from '../utils/HashTable.mjs';
|
|
4
4
|
import { Profiler } from '../utils/Profiler.mjs';
|
|
5
5
|
|
|
6
|
-
/**
|
|
7
|
-
* Abstract Metric
|
|
8
|
-
* src/metric/Metric.ts
|
|
9
|
-
*
|
|
10
|
-
* This module defines an abstract class for string metrics, providing a framework for
|
|
11
|
-
* computing various string similarity metrics. It includes methods for running metrics
|
|
12
|
-
* in different modes (single, batch, pairwise) synchronous or asynchronous and caching
|
|
13
|
-
* results to optimize performance. The class is designed to be extended by specific
|
|
14
|
-
* metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
|
|
15
|
-
*
|
|
16
|
-
* It provides:
|
|
17
|
-
* - A base class for string metrics with common functionality
|
|
18
|
-
* - Methods for running metrics in different modes
|
|
19
|
-
* - Pre-computation for trivial cases to optimize performance
|
|
20
|
-
* - Caching of metric computations to avoid redundant calculations
|
|
21
|
-
* - Support for symmetrical metrics (same result for inputs in any order)
|
|
22
|
-
* - Performance tracking capabilities (Profiler)
|
|
23
|
-
* - Asynchronous execution support for metrics
|
|
24
|
-
*
|
|
25
|
-
* This class is intended to be extended by specific metric implementations that will
|
|
26
|
-
* implement the `compute` method to define the specific metric computation logic.
|
|
27
|
-
*
|
|
28
|
-
* @module Metric
|
|
29
|
-
* @author Paul Köhler (komed3)
|
|
30
|
-
* @license MIT
|
|
31
|
-
*/
|
|
32
|
-
// Get the singleton profiler instance for performance monitoring
|
|
33
6
|
const profiler = Profiler.getInstance();
|
|
34
|
-
/**
|
|
35
|
-
* Abstract class representing a generic string metric.
|
|
36
|
-
*
|
|
37
|
-
* @abstract
|
|
38
|
-
* @template R - The type of the raw result, defaulting to `MetricRaw`.
|
|
39
|
-
*/
|
|
40
7
|
class Metric {
|
|
41
|
-
// Cache for metric computations to avoid redundant calculations
|
|
42
8
|
static cache = new HashTable();
|
|
43
|
-
// Metric name for identification
|
|
44
9
|
metric;
|
|
45
|
-
// Inputs for the metric computation, transformed into arrays
|
|
46
10
|
a;
|
|
47
11
|
b;
|
|
48
|
-
// Store original inputs for result mapping
|
|
49
12
|
origA = [];
|
|
50
13
|
origB = [];
|
|
51
|
-
// Options for the metric computation, such as performance tracking
|
|
52
14
|
options;
|
|
53
|
-
// Indicates whether the metric is symmetric (same result for inputs in any order)
|
|
54
15
|
symmetric;
|
|
55
|
-
/**
|
|
56
|
-
* Result of the metric computation, which can be a single result or an array of results.
|
|
57
|
-
* This will be populated after running the metric.
|
|
58
|
-
*/
|
|
59
16
|
results;
|
|
60
|
-
/**
|
|
61
|
-
* Static method to clear the cache of metric computations.
|
|
62
|
-
*/
|
|
63
17
|
static clear() {
|
|
64
18
|
this.cache.clear();
|
|
65
19
|
}
|
|
66
|
-
/**
|
|
67
|
-
* Swaps two strings and their lengths if the first is longer than the second.
|
|
68
|
-
*
|
|
69
|
-
* @param {string} a - First string
|
|
70
|
-
* @param {string} b - Second string
|
|
71
|
-
* @param {number} m - Length of the first string
|
|
72
|
-
* @param {number} n - Length of the second string
|
|
73
|
-
* @returns {[string, string, number, number]} - Swapped strings and lengths
|
|
74
|
-
*/
|
|
75
20
|
static swap(a, b, m, n) {
|
|
76
21
|
return m > n ? [b, a, n, m] : [a, b, m, n];
|
|
77
22
|
}
|
|
78
|
-
/**
|
|
79
|
-
* Clamps the similarity result between 0 and 1.
|
|
80
|
-
*
|
|
81
|
-
* @param {number} res - The input similarity to clamp
|
|
82
|
-
* @returns {number} - The clamped similarity (0 to 1)
|
|
83
|
-
*/
|
|
84
23
|
static clamp(res) {
|
|
85
24
|
return Math.max(0, Math.min(1, res));
|
|
86
25
|
}
|
|
87
|
-
/**
|
|
88
|
-
* Constructor for the Metric class.
|
|
89
|
-
* Initializes the metric with two inputs (strings or arrays of strings) and options.
|
|
90
|
-
*
|
|
91
|
-
* @param {string} metric - The name of the metric (e.g. 'levenshtein')
|
|
92
|
-
* @param {MetricInput} a - First input string or array of strings
|
|
93
|
-
* @param {MetricInput} b - Second input string or array of strings
|
|
94
|
-
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
95
|
-
* @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
|
|
96
|
-
* @throws {Error} - If inputs `a` or `b` are empty
|
|
97
|
-
*/
|
|
98
26
|
constructor(metric, a, b, opt = {}, symmetric = false) {
|
|
99
|
-
// Set the metric name
|
|
100
27
|
this.metric = metric;
|
|
101
|
-
// Set the inputs
|
|
102
28
|
this.a = Array.isArray(a) ? a : [a];
|
|
103
29
|
this.b = Array.isArray(b) ? b : [b];
|
|
104
|
-
// Validate inputs: ensure they are not empty
|
|
105
30
|
if (this.a.length === 0 || this.b.length === 0)
|
|
106
|
-
throw new Error(`
|
|
107
|
-
// Set options
|
|
31
|
+
throw new Error(`Inputs <a> and <b> must not be empty`);
|
|
108
32
|
this.options = opt;
|
|
109
33
|
this.symmetric = symmetric;
|
|
110
34
|
}
|
|
111
|
-
/**
|
|
112
|
-
* Pre-compute the metric for two strings.
|
|
113
|
-
* This method is called before the actual computation to handle trivial cases.
|
|
114
|
-
*
|
|
115
|
-
* @param {string} a - First string
|
|
116
|
-
* @param {string} b - Second string
|
|
117
|
-
* @param {number} m - Length of the first string
|
|
118
|
-
* @param {number} n - Length of the second string
|
|
119
|
-
* @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
|
|
120
|
-
*/
|
|
121
35
|
preCompute(a, b, m, n) {
|
|
122
|
-
// If strings are identical, return a similarity of 1
|
|
123
36
|
if (a === b) return { res: 1 };
|
|
124
|
-
// If the lengths of both strings is less than 2, return a similarity of 0
|
|
125
37
|
if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
|
|
126
38
|
return undefined;
|
|
127
39
|
}
|
|
128
|
-
/**
|
|
129
|
-
* Abstract method to be implemented by subclasses to perform the metric computation.
|
|
130
|
-
* This method should contain the logic for computing the metric between two strings.
|
|
131
|
-
*
|
|
132
|
-
* @param {string} a - First string
|
|
133
|
-
* @param {string} b - Second string
|
|
134
|
-
* @param {number} m - Length of the first string
|
|
135
|
-
* @param {number} n - Length of the second string
|
|
136
|
-
* @param {number} maxLen - Maximum length of the strings
|
|
137
|
-
* @returns {MetricCompute<R>} - The result of the metric computation
|
|
138
|
-
* @throws {Error} - If not overridden in a subclass
|
|
139
|
-
*/
|
|
140
40
|
compute(a, b, m, n, maxLen) {
|
|
141
41
|
throw new Error(`Method compute() must be overridden in a subclass`);
|
|
142
42
|
}
|
|
143
|
-
/**
|
|
144
|
-
* Run the metric computation for single inputs (two strings).
|
|
145
|
-
* Applies preCompute for trivial cases before cache lookup and computation.
|
|
146
|
-
*
|
|
147
|
-
* If the profiler is active, it will measure time and memory usage.
|
|
148
|
-
*
|
|
149
|
-
* @param {number} i - Pointer to the first string
|
|
150
|
-
* @param {number} j - Pointer to the second string
|
|
151
|
-
* @returns {MetricResultSingle<R>} - The result of the metric computation
|
|
152
|
-
*/
|
|
153
43
|
runSingle(i, j) {
|
|
154
|
-
// Type safety: convert inputs to strings
|
|
155
44
|
let a = String(this.a[i]),
|
|
156
45
|
A = a;
|
|
157
46
|
let b = String(this.b[j]),
|
|
158
47
|
B = b;
|
|
159
|
-
// Get lengths
|
|
160
48
|
let m = A.length,
|
|
161
49
|
n = B.length;
|
|
162
|
-
// Pre-compute trivial cases (identical, empty, etc.)
|
|
163
50
|
let result = this.preCompute(A, B, m, n);
|
|
164
51
|
if (!result) {
|
|
165
|
-
// If the profiler is enabled, measure; else, just run
|
|
166
52
|
result = profiler.run(() => {
|
|
167
|
-
// Generate a cache key based on the metric and pair of strings `a` and `b`
|
|
168
53
|
const key = Metric.cache.key(this.metric, [A, B], this.symmetric);
|
|
169
|
-
// If the key exists in the cache, return the cached result
|
|
170
|
-
// Otherwise, compute the metric using the algorithm
|
|
171
54
|
return (
|
|
172
55
|
Metric.cache.get(key || '') ??
|
|
173
56
|
(() => {
|
|
174
|
-
// If the metric is symmetrical, swap `a` and `b` (shorter string first)
|
|
175
57
|
if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
|
|
176
|
-
// Compute the similarity using the algorithm
|
|
177
58
|
const res = this.compute(A, B, m, n, Math.max(m, n));
|
|
178
|
-
// If a key was generated, store the result in the cache
|
|
179
59
|
if (key) Metric.cache.set(key, res);
|
|
180
60
|
return res;
|
|
181
61
|
})()
|
|
182
62
|
);
|
|
183
63
|
});
|
|
184
64
|
}
|
|
185
|
-
// Build metric result object
|
|
186
65
|
return {
|
|
187
66
|
metric: this.metric,
|
|
188
67
|
a: this.origA[i] ?? a,
|
|
@@ -190,114 +69,45 @@ class Metric {
|
|
|
190
69
|
...result
|
|
191
70
|
};
|
|
192
71
|
}
|
|
193
|
-
/**
|
|
194
|
-
* Run the metric computation for single inputs (two strings) asynchronously.
|
|
195
|
-
*
|
|
196
|
-
* @param {number} i - Pointer to the first string
|
|
197
|
-
* @param {number} j - Pointer to the second string
|
|
198
|
-
* @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
|
|
199
|
-
*/
|
|
200
72
|
async runSingleAsync(i, j) {
|
|
201
73
|
return Promise.resolve(this.runSingle(i, j));
|
|
202
74
|
}
|
|
203
|
-
/**
|
|
204
|
-
* Run the metric computation for batch inputs (arrays of strings).
|
|
205
|
-
*
|
|
206
|
-
* It iterates through each string in the first array and computes the metric
|
|
207
|
-
* against each string in the second array.
|
|
208
|
-
*/
|
|
209
75
|
runBatch() {
|
|
210
76
|
const results = [];
|
|
211
|
-
// Loop through each combination of strings in a[] and b[]
|
|
212
77
|
for (let i = 0; i < this.a.length; i++)
|
|
213
78
|
for (let j = 0; j < this.b.length; j++)
|
|
214
79
|
results.push(this.runSingle(i, j));
|
|
215
|
-
// Populate the results
|
|
216
|
-
// `this.results` will be an array of MetricResultSingle
|
|
217
80
|
this.results = results;
|
|
218
81
|
}
|
|
219
|
-
/**
|
|
220
|
-
* Run the metric computation for batch inputs (arrays of strings) asynchronously.
|
|
221
|
-
*/
|
|
222
82
|
async runBatchAsync() {
|
|
223
83
|
const results = [];
|
|
224
|
-
// Loop through each combination of strings in a[] and b[]
|
|
225
84
|
for (let i = 0; i < this.a.length; i++)
|
|
226
85
|
for (let j = 0; j < this.b.length; j++)
|
|
227
86
|
results.push(await this.runSingleAsync(i, j));
|
|
228
|
-
// Populate the results
|
|
229
|
-
// `this.results` will be an array of MetricResultSingle
|
|
230
87
|
this.results = results;
|
|
231
88
|
}
|
|
232
|
-
/**
|
|
233
|
-
* Run the metric computation for pairwise inputs (A[i] vs B[i]).
|
|
234
|
-
*
|
|
235
|
-
* This method assumes that both `a` and `b` are arrays of equal length
|
|
236
|
-
* and computes the metric only for corresponding index pairs.
|
|
237
|
-
*/
|
|
238
89
|
runPairwise() {
|
|
239
90
|
const results = [];
|
|
240
|
-
// Compute metric for each corresponding pair
|
|
241
91
|
for (let i = 0; i < this.a.length; i++) results.push(this.runSingle(i, i));
|
|
242
|
-
// Populate the results
|
|
243
|
-
// `this.results` will be an array of MetricResultSingle
|
|
244
92
|
this.results = results;
|
|
245
93
|
}
|
|
246
|
-
/**
|
|
247
|
-
* Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
|
|
248
|
-
*/
|
|
249
94
|
async runPairwiseAsync() {
|
|
250
95
|
const results = [];
|
|
251
|
-
// Compute metric for each corresponding pair
|
|
252
96
|
for (let i = 0; i < this.a.length; i++)
|
|
253
97
|
results.push(await this.runSingleAsync(i, i));
|
|
254
|
-
// Populate the results
|
|
255
|
-
// `this.results` will be an array of MetricResultSingle
|
|
256
98
|
this.results = results;
|
|
257
99
|
}
|
|
258
|
-
/**
|
|
259
|
-
* Set the original inputs to which the results of the metric calculation will refer.
|
|
260
|
-
*
|
|
261
|
-
* @param {MetricInput} [a] - original input(s) for a
|
|
262
|
-
* @param {MetricInput} [b] - original input(s) for b
|
|
263
|
-
*/
|
|
264
100
|
setOriginal(a, b) {
|
|
265
101
|
if (a) this.origA = Array.isArray(a) ? a : [a];
|
|
266
102
|
if (b) this.origB = Array.isArray(b) ? b : [b];
|
|
267
103
|
return this;
|
|
268
104
|
}
|
|
269
|
-
/**
|
|
270
|
-
* Check if the inputs are in batch mode.
|
|
271
|
-
*
|
|
272
|
-
* This method checks if either `a` or `b` contains more than one string,
|
|
273
|
-
* indicating that the metric is being run in batch mode.
|
|
274
|
-
*
|
|
275
|
-
* @returns {boolean} - True if either input is an array with more than one element
|
|
276
|
-
*/
|
|
277
105
|
isBatch() {
|
|
278
106
|
return this.a.length > 1 || this.b.length > 1;
|
|
279
107
|
}
|
|
280
|
-
/**
|
|
281
|
-
* Check if the inputs are in single mode.
|
|
282
|
-
*
|
|
283
|
-
* This method checks if both `a` and `b` are single strings (not arrays),
|
|
284
|
-
* indicating that the metric is being run on a single pair of strings.
|
|
285
|
-
*
|
|
286
|
-
* @returns {boolean} - True if both inputs are single strings
|
|
287
|
-
*/
|
|
288
108
|
isSingle() {
|
|
289
109
|
return !this.isBatch();
|
|
290
110
|
}
|
|
291
|
-
/**
|
|
292
|
-
* Check if the inputs are in pairwise mode.
|
|
293
|
-
*
|
|
294
|
-
* This method checks if both `a` and `b` are arrays of the same length,
|
|
295
|
-
* indicating that the metric is being run on corresponding pairs of strings.
|
|
296
|
-
*
|
|
297
|
-
* @returns {boolean} - True if both inputs are arrays of equal length
|
|
298
|
-
* @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
|
|
299
|
-
* @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
|
|
300
|
-
*/
|
|
301
111
|
isPairwise(safe = false) {
|
|
302
112
|
return this.isBatch() && this.a.length === this.b.length
|
|
303
113
|
? true
|
|
@@ -306,136 +116,66 @@ class Metric {
|
|
|
306
116
|
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
307
117
|
})();
|
|
308
118
|
}
|
|
309
|
-
/**
|
|
310
|
-
* Check if the metric is symmetrical.
|
|
311
|
-
*
|
|
312
|
-
* This method returns whether the metric is symmetric, meaning it produces the same
|
|
313
|
-
* result regardless of the order of inputs (e.g., Levenshtein distance).
|
|
314
|
-
*
|
|
315
|
-
* @returns {boolean} - True if the metric is symmetric
|
|
316
|
-
*/
|
|
317
119
|
isSymmetrical() {
|
|
318
120
|
return this.symmetric;
|
|
319
121
|
}
|
|
320
|
-
/**
|
|
321
|
-
* Determine which mode to run the metric in.
|
|
322
|
-
*
|
|
323
|
-
* This method checks the provided mode or defaults to the mode specified in options.
|
|
324
|
-
* If no mode is specified, it defaults to 'default'.
|
|
325
|
-
*
|
|
326
|
-
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
327
|
-
* @returns {MetricMode} - The determined mode
|
|
328
|
-
*/
|
|
329
122
|
whichMode(mode) {
|
|
330
123
|
return mode ?? this.options?.mode ?? 'default';
|
|
331
124
|
}
|
|
332
|
-
/**
|
|
333
|
-
* Clear the cached results of the metric.
|
|
334
|
-
*
|
|
335
|
-
* This method resets the `results` property to `undefined`, effectively clearing
|
|
336
|
-
* any previously computed results. It can be useful for re-running the metric
|
|
337
|
-
* with new inputs or options.
|
|
338
|
-
*/
|
|
339
125
|
clear() {
|
|
340
126
|
this.results = undefined;
|
|
341
127
|
}
|
|
342
|
-
/**
|
|
343
|
-
* Run the metric computation based on the specified mode.
|
|
344
|
-
*
|
|
345
|
-
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
346
|
-
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
347
|
-
* @throws {Error} - If an unsupported mode is specified
|
|
348
|
-
*/
|
|
349
128
|
run(mode, clear = true) {
|
|
350
|
-
// Clear previous results if requested
|
|
351
129
|
if (clear) this.clear();
|
|
352
130
|
switch (this.whichMode(mode)) {
|
|
353
|
-
// Default mode runs the metric on single inputs or falls back to batch mode
|
|
354
131
|
case 'default':
|
|
355
132
|
if (this.isSingle()) {
|
|
356
133
|
this.results = this.runSingle(0, 0);
|
|
357
134
|
break;
|
|
358
135
|
}
|
|
359
|
-
// Batch mode runs the metric on all combinations of a[] and b[]
|
|
360
136
|
case 'batch':
|
|
361
137
|
this.runBatch();
|
|
362
138
|
break;
|
|
363
|
-
// Single mode runs the metric on the first elements of a[] and b[]
|
|
364
139
|
case 'single':
|
|
365
140
|
this.results = this.runSingle(0, 0);
|
|
366
141
|
break;
|
|
367
|
-
// Pairwise mode runs the metric on corresponding pairs of a[] and b[]
|
|
368
142
|
case 'pairwise':
|
|
369
143
|
if (this.isPairwise()) this.runPairwise();
|
|
370
144
|
break;
|
|
371
|
-
// Unsupported mode
|
|
372
145
|
default:
|
|
373
146
|
throw new Error(`Unsupported mode <${mode}>`);
|
|
374
147
|
}
|
|
375
148
|
}
|
|
376
|
-
/**
|
|
377
|
-
* Run the metric computation based on the specified mode asynchronously.
|
|
378
|
-
*
|
|
379
|
-
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
380
|
-
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
381
|
-
* @returns {Promise<void>} - A promise that resolves when the metric computation is complete
|
|
382
|
-
* @throws {Error} - If an unsupported mode is specified
|
|
383
|
-
*/
|
|
384
149
|
async runAsync(mode, clear = true) {
|
|
385
|
-
// Clear previous results if requested
|
|
386
150
|
if (clear) this.clear();
|
|
387
151
|
switch (this.whichMode(mode)) {
|
|
388
|
-
// Default mode runs the metric on single inputs or falls back to batch mode
|
|
389
152
|
case 'default':
|
|
390
153
|
if (this.isSingle()) {
|
|
391
154
|
this.results = await this.runSingleAsync(0, 0);
|
|
392
155
|
break;
|
|
393
156
|
}
|
|
394
|
-
// Batch mode runs the metric on all combinations of a[] and b[]
|
|
395
157
|
case 'batch':
|
|
396
158
|
await this.runBatchAsync();
|
|
397
159
|
break;
|
|
398
|
-
// Single mode runs the metric on the first elements of a[] and b[]
|
|
399
160
|
case 'single':
|
|
400
161
|
this.results = await this.runSingleAsync(0, 0);
|
|
401
162
|
break;
|
|
402
|
-
// Pairwise mode runs the metric on corresponding pairs of a[] and b[]
|
|
403
163
|
case 'pairwise':
|
|
404
164
|
if (this.isPairwise()) await this.runPairwiseAsync();
|
|
405
165
|
break;
|
|
406
|
-
// Unsupported mode
|
|
407
166
|
default:
|
|
408
167
|
throw new Error(`Unsupported async mode <${mode}>`);
|
|
409
168
|
}
|
|
410
169
|
}
|
|
411
|
-
/**
|
|
412
|
-
* Get the name of the metric.
|
|
413
|
-
*
|
|
414
|
-
* @returns {string} - The name of the metric
|
|
415
|
-
*/
|
|
416
170
|
getMetricName() {
|
|
417
171
|
return this.metric;
|
|
418
172
|
}
|
|
419
|
-
/**
|
|
420
|
-
* Get the result of the metric computation.
|
|
421
|
-
*
|
|
422
|
-
* @returns {MetricResult<R>} - The result of the metric computation
|
|
423
|
-
* @throws {Error} - If `run()` has not been called before this method
|
|
424
|
-
*/
|
|
425
173
|
getResults() {
|
|
426
|
-
// Ensure that the metric has been run before getting the result
|
|
427
174
|
if (this.results === undefined)
|
|
428
175
|
throw new Error(`run() must be called before getResult()`);
|
|
429
|
-
// Return the result(s)
|
|
430
176
|
return this.results;
|
|
431
177
|
}
|
|
432
178
|
}
|
|
433
|
-
/**
|
|
434
|
-
* Metric registry service for managing metric implementations.
|
|
435
|
-
*
|
|
436
|
-
* This registry allows for dynamic registration and retrieval of metric classes,
|
|
437
|
-
* enabling the use of various string similarity metrics in a consistent manner.
|
|
438
|
-
*/
|
|
439
179
|
const MetricRegistry = Registry('metric', Metric);
|
|
440
180
|
|
|
441
181
|
export { Metric, MetricRegistry };
|