cmpstr 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CmpStr.esm.js +2228 -4930
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +2348 -5026
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +10 -404
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +10 -220
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -56
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +2 -64
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -56
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -51
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -48
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -53
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -54
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +2 -60
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +2 -262
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +4 -56
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +4 -58
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -55
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +1 -78
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Cologne.cjs +1 -43
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -76
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +1 -261
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -47
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/root.cjs +37 -0
- package/dist/cjs/root.cjs.map +1 -0
- package/dist/cjs/utils/DeepMerge.cjs +8 -75
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +2 -190
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -112
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -99
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +3 -94
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -105
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +1 -133
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +2 -90
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +1 -180
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/CmpStr.mjs +10 -404
- package/dist/esm/CmpStr.mjs.map +1 -1
- package/dist/esm/CmpStrAsync.mjs +10 -220
- package/dist/esm/CmpStrAsync.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -1
- package/dist/esm/metric/Cosine.mjs +1 -56
- package/dist/esm/metric/Cosine.mjs.map +1 -1
- package/dist/esm/metric/DamerauLevenshtein.mjs +2 -64
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -1
- package/dist/esm/metric/DiceSorensen.mjs +1 -56
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -1
- package/dist/esm/metric/Hamming.mjs +2 -51
- package/dist/esm/metric/Hamming.mjs.map +1 -1
- package/dist/esm/metric/Jaccard.mjs +1 -48
- package/dist/esm/metric/Jaccard.mjs.map +1 -1
- package/dist/esm/metric/JaroWinkler.mjs +1 -53
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -1
- package/dist/esm/metric/LCS.mjs +1 -54
- package/dist/esm/metric/LCS.mjs.map +1 -1
- package/dist/esm/metric/Levenshtein.mjs +2 -60
- package/dist/esm/metric/Levenshtein.mjs.map +1 -1
- package/dist/esm/metric/Metric.mjs +2 -262
- package/dist/esm/metric/Metric.mjs.map +1 -1
- package/dist/esm/metric/NeedlemanWunsch.mjs +4 -56
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -1
- package/dist/esm/metric/SmithWaterman.mjs +4 -58
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -1
- package/dist/esm/metric/qGram.mjs +1 -55
- package/dist/esm/metric/qGram.mjs.map +1 -1
- package/dist/esm/phonetic/Caverphone.mjs +1 -78
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -1
- package/dist/esm/phonetic/Cologne.mjs +1 -43
- package/dist/esm/phonetic/Cologne.mjs.map +1 -1
- package/dist/esm/phonetic/Metaphone.mjs +1 -76
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -1
- package/dist/esm/phonetic/Phonetic.mjs +1 -261
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -1
- package/dist/esm/phonetic/Soundex.mjs +1 -47
- package/dist/esm/phonetic/Soundex.mjs.map +1 -1
- package/dist/esm/root.mjs +29 -0
- package/dist/esm/root.mjs.map +1 -0
- package/dist/esm/utils/DeepMerge.mjs +8 -76
- package/dist/esm/utils/DeepMerge.mjs.map +1 -1
- package/dist/esm/utils/DiffChecker.mjs +2 -190
- package/dist/esm/utils/DiffChecker.mjs.map +1 -1
- package/dist/esm/utils/Filter.mjs +1 -112
- package/dist/esm/utils/Filter.mjs.map +1 -1
- package/dist/esm/utils/HashTable.mjs +1 -99
- package/dist/esm/utils/HashTable.mjs.map +1 -1
- package/dist/esm/utils/Normalizer.mjs +3 -94
- package/dist/esm/utils/Normalizer.mjs.map +1 -1
- package/dist/esm/utils/Pool.mjs +1 -105
- package/dist/esm/utils/Pool.mjs.map +1 -1
- package/dist/esm/utils/Profiler.mjs +1 -133
- package/dist/esm/utils/Profiler.mjs.map +1 -1
- package/dist/esm/utils/Registry.mjs +2 -90
- package/dist/esm/utils/Registry.mjs.map +1 -1
- package/dist/esm/utils/TextAnalyzer.mjs +1 -180
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -1
- package/dist/types/index.d.ts +3 -2
- package/dist/types/root.d.ts +38 -0
- package/dist/types/utils/Types.d.ts +1 -0
- package/package.json +15 -9
package/dist/cjs/metric/LCS.cjs
CHANGED
|
@@ -1,88 +1,35 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var Metric = require('./Metric.cjs');
|
|
5
5
|
var Pool = require('../utils/Pool.cjs');
|
|
6
6
|
|
|
7
|
-
/**
|
|
8
|
-
* Longest Common Subsequence (LCS)
|
|
9
|
-
* src/metric/LCS.ts
|
|
10
|
-
*
|
|
11
|
-
* @see https://en.wikipedia.org/wiki/Longest_common_subsequence
|
|
12
|
-
*
|
|
13
|
-
* The Longest Common Subsequence (LCS) metric measures the length of the longest
|
|
14
|
-
* subsequence common to both strings. Unlike substrings, the characters of a
|
|
15
|
-
* subsequence do not need to be contiguous, but must appear in the same order.
|
|
16
|
-
*
|
|
17
|
-
* The LCS is widely used in diff tools, bioinformatics, and approximate string
|
|
18
|
-
* matching.
|
|
19
|
-
*
|
|
20
|
-
* @module Metric/LCS
|
|
21
|
-
* @author Paul Köhler (komed3)
|
|
22
|
-
* @license MIT
|
|
23
|
-
*/
|
|
24
|
-
/**
|
|
25
|
-
* LCSMetric class extends the Metric class to implement the Longest Common Subsequence algorithm.
|
|
26
|
-
*/
|
|
27
7
|
class LCSMetric extends Metric.Metric {
|
|
28
|
-
/**
|
|
29
|
-
* Constructor for the LCSMetric class.
|
|
30
|
-
*
|
|
31
|
-
* Initializes the LCS metric with two input strings or
|
|
32
|
-
* arrays of strings and optional options.
|
|
33
|
-
*
|
|
34
|
-
* @param {MetricInput} a - First input string or array of strings
|
|
35
|
-
* @param {MetricInput} b - Second input string or array of strings
|
|
36
|
-
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
37
|
-
*/
|
|
38
8
|
constructor(a, b, opt = {}) {
|
|
39
|
-
// Call the parent Metric constructor with the metric name and inputs
|
|
40
|
-
// Metric is symmetrical
|
|
41
9
|
super('lcs', a, b, opt, true);
|
|
42
10
|
}
|
|
43
|
-
/**
|
|
44
|
-
* Calculates the normalized LCS similarity between two strings.
|
|
45
|
-
*
|
|
46
|
-
* @param {string} a - First string
|
|
47
|
-
* @param {string} b - Second string
|
|
48
|
-
* @param {number} m - Length of the first string
|
|
49
|
-
* @param {number} n - Length of the second string
|
|
50
|
-
* @param {number} maxLen - Maximum length of the strings
|
|
51
|
-
* @return {MetricCompute<LCSRaw>} - Object containing the similarity result and raw LCS length
|
|
52
|
-
*/
|
|
53
11
|
compute(a, b, m, n, maxLen) {
|
|
54
|
-
// Get two reusable arrays from the Pool for the DP rows
|
|
55
12
|
const len = m + 1;
|
|
56
13
|
const [prev, curr] = Pool.Pool.acquireMany('uint16', [len, len]);
|
|
57
|
-
// Initialize the first row to zeros
|
|
58
14
|
for (let i = 0; i <= m; i++) prev[i] = 0;
|
|
59
|
-
// Fill the DP matrix row by row (over the longer string)
|
|
60
15
|
for (let j = 1; j <= n; j++) {
|
|
61
16
|
curr[0] = 0;
|
|
62
|
-
// Get the character code of the current character in b
|
|
63
17
|
const cb = b.charCodeAt(j - 1);
|
|
64
18
|
for (let i = 1; i <= m; i++) {
|
|
65
|
-
// If characters match, increment the LCS length
|
|
66
19
|
if (a.charCodeAt(i - 1) === cb) curr[i] = prev[i - 1] + 1;
|
|
67
|
-
// Otherwise, take the maximum of the left or above cell
|
|
68
20
|
else curr[i] = Math.max(prev[i], curr[i - 1]);
|
|
69
21
|
}
|
|
70
|
-
// Copy current row to previous for next iteration
|
|
71
22
|
prev.set(curr);
|
|
72
23
|
}
|
|
73
|
-
// The last value in prev is the LCS length
|
|
74
24
|
const lcs = prev[m];
|
|
75
|
-
// Release arrays back to the pool
|
|
76
25
|
Pool.Pool.release('uint16', prev, len);
|
|
77
26
|
Pool.Pool.release('uint16', curr, len);
|
|
78
|
-
// Normalize by the length of the longer string
|
|
79
27
|
return {
|
|
80
28
|
res: maxLen === 0 ? 1 : Metric.Metric.clamp(lcs / maxLen),
|
|
81
29
|
raw: { lcs, maxLen }
|
|
82
30
|
};
|
|
83
31
|
}
|
|
84
32
|
}
|
|
85
|
-
// Register the Longest Common Subsequence (LCS) in the metric registry
|
|
86
33
|
Metric.MetricRegistry.add('lcs', LCSMetric);
|
|
87
34
|
|
|
88
35
|
exports.LCSMetric = LCSMetric;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LCS.cjs","sources":["../../../src/metric/LCS.ts"],"sourcesContent":[null],"names":["Metric","Pool","MetricRegistry"],"mappings":";;;;;;
|
|
1
|
+
{"version":3,"file":"LCS.cjs","sources":["../../../src/metric/LCS.ts"],"sourcesContent":[null],"names":["Metric","Pool","MetricRegistry"],"mappings":";;;;;;AAgCM,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAU,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAQA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAc,CAAA;AAYzC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAc,CAAA,CAAE,CAAc,CAAA,CAAE,CAAA,CAAA,IAAqB,CAAA,CAAE,CAAA,CAAA;IAIjE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAG,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAE;EAEpC;EAYmB,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CACtB,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAC1C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAA,CAAA;AAId,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAG,CAAA,CAAA,CAAW,CAAC,GAAG,CAAC;AACzB,CAAA,CAAA,CAAA,CAAA,MAAM,CAAE,CAAA,CAAA,CAAA,CAAI,EAAE,CAAA,CAAA,CAAA,CAAI,CAAE,GAAGC,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAW,CAAE,QAAQ,CAAA,CAAE,CAAE,GAAG,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE,CAAE;IAGjE,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAG,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;AAG5C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;AAE3B,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;MAGb,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAE,CAAA,CAAA,CAAW,CAAC,CAAC,UAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE;AAExC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;QAG3B,CAAA,CAAA,CAAA,CAAK,CAAC,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,CAAG,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,CAAA,CAAA,CAAG,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAG,CAAC;aAG5D,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAA,CAAA,CAAA,CAAI,CAAC,GAAG,CAAE,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,EAAE,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAE;MAEzD;AAGA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAE,IAAI,CAAE;IAEpB;AAGA,CAAA,CAAA,CAAA,CAAA,MAAM,CAAA,CAAA,CAAG,CAAA,CAAA,CAAW,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE;IAG7BA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IACnCA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IAGnC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO;AACH,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAA,CAAK,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAA,CAAGD,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAM,CAAC,CAAA,CAAA,CAAA,CAAA,CAAK,CAAE,CAAA,CAAA,CAAG,CAAA,CAAA,CAAG,MAAM,CAAE;AACpD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAE,CAAA,CAAA,CAAG,EAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA;KACrB;EAEL;AAEH;AAGDE,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAc,CAAC,CAAA,CAAA,CAAG,CAAE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,SAAS,CAAE;;;"}
|
|
@@ -1,93 +1,35 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var Metric = require('./Metric.cjs');
|
|
5
5
|
var Pool = require('../utils/Pool.cjs');
|
|
6
6
|
|
|
7
|
-
/**
|
|
8
|
-
* Levenshtein Distance
|
|
9
|
-
* src/metric/Levenshtein.ts
|
|
10
|
-
*
|
|
11
|
-
* @see https://en.wikipedia.org/wiki/Levenshtein_distance
|
|
12
|
-
*
|
|
13
|
-
* The Levenshtein distance is a classic metric for measuring the minimum number
|
|
14
|
-
* of single-character edits (insertions, deletions, or substitutions) required
|
|
15
|
-
* to change one string into another.
|
|
16
|
-
*
|
|
17
|
-
* It is widely used in approximate string matching, spell checking, and natural
|
|
18
|
-
* language processing.
|
|
19
|
-
*
|
|
20
|
-
* @module Metric/LevenshteinDistance
|
|
21
|
-
* @author Paul Köhler (komed3)
|
|
22
|
-
* @license MIT
|
|
23
|
-
*/
|
|
24
|
-
/**
|
|
25
|
-
* LevenshteinDistance class extends the Metric class to implement the Levenshtein distance algorithm.
|
|
26
|
-
*/
|
|
27
7
|
class LevenshteinDistance extends Metric.Metric {
|
|
28
|
-
/**
|
|
29
|
-
* Constructor for the Levenshtein class.
|
|
30
|
-
*
|
|
31
|
-
* Initializes the Levenshtein metric with two input strings
|
|
32
|
-
* or arrays of strings and optional options.
|
|
33
|
-
*
|
|
34
|
-
* @param {MetricInput} a - First input string or array of strings
|
|
35
|
-
* @param {MetricInput} b - Second input string or array of strings
|
|
36
|
-
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
37
|
-
*/
|
|
38
8
|
constructor(a, b, opt = {}) {
|
|
39
|
-
// Call the parent Metric constructor with the metric name and inputs
|
|
40
|
-
// Metric is symmetrical
|
|
41
9
|
super('levenshtein', a, b, opt, true);
|
|
42
10
|
}
|
|
43
|
-
/**
|
|
44
|
-
* Calculates the Levenshtein distance between two strings.
|
|
45
|
-
*
|
|
46
|
-
* @param {string} a - First string
|
|
47
|
-
* @param {string} b - Second string
|
|
48
|
-
* @param {number} m - Length of the first string
|
|
49
|
-
* @param {number} n - Length of the second string
|
|
50
|
-
* @param {number} maxLen - Maximum length of the strings
|
|
51
|
-
* @return {MetricCompute<LevenshteinRaw>} - Object containing the similarity result and raw distance
|
|
52
|
-
*/
|
|
53
11
|
compute(a, b, m, n, maxLen) {
|
|
54
|
-
// Get two reusable arrays from the Pool for the DP rows
|
|
55
12
|
const len = m + 1;
|
|
56
13
|
const [prev, curr] = Pool.Pool.acquireMany('uint16', [len, len]);
|
|
57
|
-
// Initialize the first row (edit distances from empty string to a)
|
|
58
14
|
for (let i = 0; i <= m; i++) prev[i] = i;
|
|
59
|
-
// Fill the DP matrix row by row (over the longer string)
|
|
60
15
|
for (let j = 1; j <= n; j++) {
|
|
61
|
-
// Cost of transforming empty string to b[0..j]
|
|
62
16
|
curr[0] = j;
|
|
63
|
-
// Get the character code of the current character in b
|
|
64
17
|
const cb = b.charCodeAt(j - 1);
|
|
65
18
|
for (let i = 1; i <= m; i++) {
|
|
66
|
-
// Cost is 0 if characters match, 1 otherwise
|
|
67
19
|
const cost = a.charCodeAt(i - 1) === cb ? 0 : 1;
|
|
68
|
-
|
|
69
|
-
curr[i] = Math.min(
|
|
70
|
-
curr[i - 1] + 1, // Insertion
|
|
71
|
-
prev[i] + 1, // Deletion
|
|
72
|
-
prev[i - 1] + cost // Substitution
|
|
73
|
-
);
|
|
20
|
+
curr[i] = Math.min(curr[i - 1] + 1, prev[i] + 1, prev[i - 1] + cost);
|
|
74
21
|
}
|
|
75
|
-
// Copy current row to previous for next iteration
|
|
76
22
|
prev.set(curr);
|
|
77
23
|
}
|
|
78
|
-
// The last value in prev is the Levenshtein distance
|
|
79
24
|
const dist = prev[m];
|
|
80
|
-
// Release arrays back to the pool
|
|
81
25
|
Pool.Pool.release('uint16', prev, len);
|
|
82
26
|
Pool.Pool.release('uint16', curr, len);
|
|
83
|
-
// Return the result as a MetricCompute object
|
|
84
27
|
return {
|
|
85
28
|
res: maxLen === 0 ? 1 : Metric.Metric.clamp(1 - dist / maxLen),
|
|
86
29
|
raw: { dist, maxLen }
|
|
87
30
|
};
|
|
88
31
|
}
|
|
89
32
|
}
|
|
90
|
-
// Register the Levenshtein distance in the metric registry
|
|
91
33
|
Metric.MetricRegistry.add('levenshtein', LevenshteinDistance);
|
|
92
34
|
|
|
93
35
|
exports.LevenshteinDistance = LevenshteinDistance;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Levenshtein.cjs","sources":["../../../src/metric/Levenshtein.ts"],"sourcesContent":[null],"names":["Metric","Pool","MetricRegistry"],"mappings":";;;;;;
|
|
1
|
+
{"version":3,"file":"Levenshtein.cjs","sources":["../../../src/metric/Levenshtein.ts"],"sourcesContent":[null],"names":["Metric","Pool","MetricRegistry"],"mappings":";;;;;;AAgCM,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAoB,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAQA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAsB,CAAA;AAY3D,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAc,CAAA,CAAE,CAAc,CAAA,CAAE,CAAA,CAAA,IAAqB,CAAA,CAAE,CAAA,CAAA;IAIjE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAG,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAa,CAAA,CAAE,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAE;EAE5C;EAYmB,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CACtB,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAAE,CAAS,CAAA,CAC1C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAc,CAAA,CAAA;AAId,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAG,CAAA,CAAA,CAAW,CAAC,GAAG,CAAC;AACzB,CAAA,CAAA,CAAA,CAAA,MAAM,CAAE,CAAA,CAAA,CAAA,CAAI,EAAE,CAAA,CAAA,CAAA,CAAI,CAAE,GAAGC,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAW,CAAE,QAAQ,CAAA,CAAE,CAAE,GAAG,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE,CAAE;IAGjE,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAG,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;AAG5C,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;AAG3B,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC;MAGb,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAE,CAAA,CAAA,CAAW,CAAC,CAAC,UAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE;AAExC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAC,CAAA,CAAE,CAAC,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAE,CAAC,CAAA,CAAE,CAAA,CAAG;QAG3B,MAAM,CAAA,CAAA,CAAA,CAAI,CAAA,CAAA,CAAW,CAAC,CAAC,UAAU,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAA,CAAA,CAAK,CAAA,CAAE,CAAA,CAAA,CAAG,CAAC,GAAG,CAAC;AAGzD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,IAAI,CAAE,CAAC,CAAE,CAAA,CAAA,CAAG,IAAI,CAAC,CAAA,CAAA,CAAG,CAChB,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,GAAG,CAAC,CACjB,CAAA,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE,GAAG,CAAC,EACb,IAAI,CAAE,CAAC,CAAA,CAAA,CAAG,CAAC,CAAE,CAAA,CAAA,CAAG,CAAA,CAAA,CAAA,EACnB;MAEL;AAGA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAI,CAAC,CAAA,CAAA,CAAG,CAAE,IAAI,CAAE;IAEpB;AAGA,CAAA,CAAA,CAAA,CAAA,MAAM,CAAA,CAAA,CAAA,CAAI,CAAA,CAAA,CAAW,CAAA,CAAA,CAAA,CAAI,CAAE,CAAC,CAAE;IAG9BA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IACnCA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAI,CAAC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO,CAAE,QAAQ,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,CAAA,CAAE,CAAA,CAAA,CAAG,CAAE;IAGnC,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAO;MACH,GAAG,CAAA,CAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA,CAAA,CAAA,CAAA,CAAK,CAAC,GAAG,CAAC,CAAA,CAAA,CAAGD,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAM,CAAC,KAAK,CAAE,CAAC,GAAG,CAAA,CAAA,CAAA,CAAI,CAAA,CAAA,CAAG,MAAM,CAAE;AACzD,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAG,CAAA,CAAE,CAAA,CAAE,CAAA,CAAA,CAAA,CAAI,EAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAM,CAAA;KACtB;EAEL;AAEH;AAGDE,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAAA,CAAc,CAAC,CAAA,CAAA,CAAG,CAAE,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAA,CAAa,CAAA,CAAE,mBAAmB,CAAE;;;"}
|
|
@@ -1,190 +1,69 @@
|
|
|
1
|
-
// CmpStr v3.0.
|
|
1
|
+
// CmpStr v3.0.4 build-74e65a5-250915 by Paul Köhler @komed3 / MIT License
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var Registry = require('../utils/Registry.cjs');
|
|
5
5
|
var HashTable = require('../utils/HashTable.cjs');
|
|
6
6
|
var Profiler = require('../utils/Profiler.cjs');
|
|
7
7
|
|
|
8
|
-
/**
|
|
9
|
-
* Abstract Metric
|
|
10
|
-
* src/metric/Metric.ts
|
|
11
|
-
*
|
|
12
|
-
* This module defines an abstract class for string metrics, providing a framework for
|
|
13
|
-
* computing various string similarity metrics. It includes methods for running metrics
|
|
14
|
-
* in different modes (single, batch, pairwise) synchronous or asynchronous and caching
|
|
15
|
-
* results to optimize performance. The class is designed to be extended by specific
|
|
16
|
-
* metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
|
|
17
|
-
*
|
|
18
|
-
* It provides:
|
|
19
|
-
* - A base class for string metrics with common functionality
|
|
20
|
-
* - Methods for running metrics in different modes
|
|
21
|
-
* - Pre-computation for trivial cases to optimize performance
|
|
22
|
-
* - Caching of metric computations to avoid redundant calculations
|
|
23
|
-
* - Support for symmetrical metrics (same result for inputs in any order)
|
|
24
|
-
* - Performance tracking capabilities (Profiler)
|
|
25
|
-
* - Asynchronous execution support for metrics
|
|
26
|
-
*
|
|
27
|
-
* This class is intended to be extended by specific metric implementations that will
|
|
28
|
-
* implement the `compute` method to define the specific metric computation logic.
|
|
29
|
-
*
|
|
30
|
-
* @module Metric
|
|
31
|
-
* @author Paul Köhler (komed3)
|
|
32
|
-
* @license MIT
|
|
33
|
-
*/
|
|
34
|
-
// Get the singleton profiler instance for performance monitoring
|
|
35
8
|
const profiler = Profiler.Profiler.getInstance();
|
|
36
|
-
/**
|
|
37
|
-
* Abstract class representing a generic string metric.
|
|
38
|
-
*
|
|
39
|
-
* @abstract
|
|
40
|
-
* @template R - The type of the raw result, defaulting to `MetricRaw`.
|
|
41
|
-
*/
|
|
42
9
|
class Metric {
|
|
43
|
-
// Cache for metric computations to avoid redundant calculations
|
|
44
10
|
static cache = new HashTable.HashTable();
|
|
45
|
-
// Metric name for identification
|
|
46
11
|
metric;
|
|
47
|
-
// Inputs for the metric computation, transformed into arrays
|
|
48
12
|
a;
|
|
49
13
|
b;
|
|
50
|
-
// Store original inputs for result mapping
|
|
51
14
|
origA = [];
|
|
52
15
|
origB = [];
|
|
53
|
-
// Options for the metric computation, such as performance tracking
|
|
54
16
|
options;
|
|
55
|
-
// Indicates whether the metric is symmetric (same result for inputs in any order)
|
|
56
17
|
symmetric;
|
|
57
|
-
/**
|
|
58
|
-
* Result of the metric computation, which can be a single result or an array of results.
|
|
59
|
-
* This will be populated after running the metric.
|
|
60
|
-
*/
|
|
61
18
|
results;
|
|
62
|
-
/**
|
|
63
|
-
* Static method to clear the cache of metric computations.
|
|
64
|
-
*/
|
|
65
19
|
static clear() {
|
|
66
20
|
this.cache.clear();
|
|
67
21
|
}
|
|
68
|
-
/**
|
|
69
|
-
* Swaps two strings and their lengths if the first is longer than the second.
|
|
70
|
-
*
|
|
71
|
-
* @param {string} a - First string
|
|
72
|
-
* @param {string} b - Second string
|
|
73
|
-
* @param {number} m - Length of the first string
|
|
74
|
-
* @param {number} n - Length of the second string
|
|
75
|
-
* @returns {[string, string, number, number]} - Swapped strings and lengths
|
|
76
|
-
*/
|
|
77
22
|
static swap(a, b, m, n) {
|
|
78
23
|
return m > n ? [b, a, n, m] : [a, b, m, n];
|
|
79
24
|
}
|
|
80
|
-
/**
|
|
81
|
-
* Clamps the similarity result between 0 and 1.
|
|
82
|
-
*
|
|
83
|
-
* @param {number} res - The input similarity to clamp
|
|
84
|
-
* @returns {number} - The clamped similarity (0 to 1)
|
|
85
|
-
*/
|
|
86
25
|
static clamp(res) {
|
|
87
26
|
return Math.max(0, Math.min(1, res));
|
|
88
27
|
}
|
|
89
|
-
/**
|
|
90
|
-
* Constructor for the Metric class.
|
|
91
|
-
* Initializes the metric with two inputs (strings or arrays of strings) and options.
|
|
92
|
-
*
|
|
93
|
-
* @param {string} metric - The name of the metric (e.g. 'levenshtein')
|
|
94
|
-
* @param {MetricInput} a - First input string or array of strings
|
|
95
|
-
* @param {MetricInput} b - Second input string or array of strings
|
|
96
|
-
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
97
|
-
* @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
|
|
98
|
-
* @throws {Error} - If inputs `a` or `b` are empty
|
|
99
|
-
*/
|
|
100
28
|
constructor(metric, a, b, opt = {}, symmetric = false) {
|
|
101
|
-
// Set the metric name
|
|
102
29
|
this.metric = metric;
|
|
103
|
-
// Set the inputs
|
|
104
30
|
this.a = Array.isArray(a) ? a : [a];
|
|
105
31
|
this.b = Array.isArray(b) ? b : [b];
|
|
106
|
-
// Validate inputs: ensure they are not empty
|
|
107
32
|
if (this.a.length === 0 || this.b.length === 0)
|
|
108
|
-
throw new Error(`
|
|
109
|
-
// Set options
|
|
33
|
+
throw new Error(`Inputs <a> and <b> must not be empty`);
|
|
110
34
|
this.options = opt;
|
|
111
35
|
this.symmetric = symmetric;
|
|
112
36
|
}
|
|
113
|
-
/**
|
|
114
|
-
* Pre-compute the metric for two strings.
|
|
115
|
-
* This method is called before the actual computation to handle trivial cases.
|
|
116
|
-
*
|
|
117
|
-
* @param {string} a - First string
|
|
118
|
-
* @param {string} b - Second string
|
|
119
|
-
* @param {number} m - Length of the first string
|
|
120
|
-
* @param {number} n - Length of the second string
|
|
121
|
-
* @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
|
|
122
|
-
*/
|
|
123
37
|
preCompute(a, b, m, n) {
|
|
124
|
-
// If strings are identical, return a similarity of 1
|
|
125
38
|
if (a === b) return { res: 1 };
|
|
126
|
-
// If the lengths of both strings is less than 2, return a similarity of 0
|
|
127
39
|
if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
|
|
128
40
|
return undefined;
|
|
129
41
|
}
|
|
130
|
-
/**
|
|
131
|
-
* Abstract method to be implemented by subclasses to perform the metric computation.
|
|
132
|
-
* This method should contain the logic for computing the metric between two strings.
|
|
133
|
-
*
|
|
134
|
-
* @param {string} a - First string
|
|
135
|
-
* @param {string} b - Second string
|
|
136
|
-
* @param {number} m - Length of the first string
|
|
137
|
-
* @param {number} n - Length of the second string
|
|
138
|
-
* @param {number} maxLen - Maximum length of the strings
|
|
139
|
-
* @returns {MetricCompute<R>} - The result of the metric computation
|
|
140
|
-
* @throws {Error} - If not overridden in a subclass
|
|
141
|
-
*/
|
|
142
42
|
compute(a, b, m, n, maxLen) {
|
|
143
43
|
throw new Error(`Method compute() must be overridden in a subclass`);
|
|
144
44
|
}
|
|
145
|
-
/**
|
|
146
|
-
* Run the metric computation for single inputs (two strings).
|
|
147
|
-
* Applies preCompute for trivial cases before cache lookup and computation.
|
|
148
|
-
*
|
|
149
|
-
* If the profiler is active, it will measure time and memory usage.
|
|
150
|
-
*
|
|
151
|
-
* @param {number} i - Pointer to the first string
|
|
152
|
-
* @param {number} j - Pointer to the second string
|
|
153
|
-
* @returns {MetricResultSingle<R>} - The result of the metric computation
|
|
154
|
-
*/
|
|
155
45
|
runSingle(i, j) {
|
|
156
|
-
// Type safety: convert inputs to strings
|
|
157
46
|
let a = String(this.a[i]),
|
|
158
47
|
A = a;
|
|
159
48
|
let b = String(this.b[j]),
|
|
160
49
|
B = b;
|
|
161
|
-
// Get lengths
|
|
162
50
|
let m = A.length,
|
|
163
51
|
n = B.length;
|
|
164
|
-
// Pre-compute trivial cases (identical, empty, etc.)
|
|
165
52
|
let result = this.preCompute(A, B, m, n);
|
|
166
53
|
if (!result) {
|
|
167
|
-
// If the profiler is enabled, measure; else, just run
|
|
168
54
|
result = profiler.run(() => {
|
|
169
|
-
// Generate a cache key based on the metric and pair of strings `a` and `b`
|
|
170
55
|
const key = Metric.cache.key(this.metric, [A, B], this.symmetric);
|
|
171
|
-
// If the key exists in the cache, return the cached result
|
|
172
|
-
// Otherwise, compute the metric using the algorithm
|
|
173
56
|
return (
|
|
174
57
|
Metric.cache.get(key || '') ??
|
|
175
58
|
(() => {
|
|
176
|
-
// If the metric is symmetrical, swap `a` and `b` (shorter string first)
|
|
177
59
|
if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
|
|
178
|
-
// Compute the similarity using the algorithm
|
|
179
60
|
const res = this.compute(A, B, m, n, Math.max(m, n));
|
|
180
|
-
// If a key was generated, store the result in the cache
|
|
181
61
|
if (key) Metric.cache.set(key, res);
|
|
182
62
|
return res;
|
|
183
63
|
})()
|
|
184
64
|
);
|
|
185
65
|
});
|
|
186
66
|
}
|
|
187
|
-
// Build metric result object
|
|
188
67
|
return {
|
|
189
68
|
metric: this.metric,
|
|
190
69
|
a: this.origA[i] ?? a,
|
|
@@ -192,114 +71,45 @@ class Metric {
|
|
|
192
71
|
...result
|
|
193
72
|
};
|
|
194
73
|
}
|
|
195
|
-
/**
|
|
196
|
-
* Run the metric computation for single inputs (two strings) asynchronously.
|
|
197
|
-
*
|
|
198
|
-
* @param {number} i - Pointer to the first string
|
|
199
|
-
* @param {number} j - Pointer to the second string
|
|
200
|
-
* @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
|
|
201
|
-
*/
|
|
202
74
|
async runSingleAsync(i, j) {
|
|
203
75
|
return Promise.resolve(this.runSingle(i, j));
|
|
204
76
|
}
|
|
205
|
-
/**
|
|
206
|
-
* Run the metric computation for batch inputs (arrays of strings).
|
|
207
|
-
*
|
|
208
|
-
* It iterates through each string in the first array and computes the metric
|
|
209
|
-
* against each string in the second array.
|
|
210
|
-
*/
|
|
211
77
|
runBatch() {
|
|
212
78
|
const results = [];
|
|
213
|
-
// Loop through each combination of strings in a[] and b[]
|
|
214
79
|
for (let i = 0; i < this.a.length; i++)
|
|
215
80
|
for (let j = 0; j < this.b.length; j++)
|
|
216
81
|
results.push(this.runSingle(i, j));
|
|
217
|
-
// Populate the results
|
|
218
|
-
// `this.results` will be an array of MetricResultSingle
|
|
219
82
|
this.results = results;
|
|
220
83
|
}
|
|
221
|
-
/**
|
|
222
|
-
* Run the metric computation for batch inputs (arrays of strings) asynchronously.
|
|
223
|
-
*/
|
|
224
84
|
async runBatchAsync() {
|
|
225
85
|
const results = [];
|
|
226
|
-
// Loop through each combination of strings in a[] and b[]
|
|
227
86
|
for (let i = 0; i < this.a.length; i++)
|
|
228
87
|
for (let j = 0; j < this.b.length; j++)
|
|
229
88
|
results.push(await this.runSingleAsync(i, j));
|
|
230
|
-
// Populate the results
|
|
231
|
-
// `this.results` will be an array of MetricResultSingle
|
|
232
89
|
this.results = results;
|
|
233
90
|
}
|
|
234
|
-
/**
|
|
235
|
-
* Run the metric computation for pairwise inputs (A[i] vs B[i]).
|
|
236
|
-
*
|
|
237
|
-
* This method assumes that both `a` and `b` are arrays of equal length
|
|
238
|
-
* and computes the metric only for corresponding index pairs.
|
|
239
|
-
*/
|
|
240
91
|
runPairwise() {
|
|
241
92
|
const results = [];
|
|
242
|
-
// Compute metric for each corresponding pair
|
|
243
93
|
for (let i = 0; i < this.a.length; i++) results.push(this.runSingle(i, i));
|
|
244
|
-
// Populate the results
|
|
245
|
-
// `this.results` will be an array of MetricResultSingle
|
|
246
94
|
this.results = results;
|
|
247
95
|
}
|
|
248
|
-
/**
|
|
249
|
-
* Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
|
|
250
|
-
*/
|
|
251
96
|
async runPairwiseAsync() {
|
|
252
97
|
const results = [];
|
|
253
|
-
// Compute metric for each corresponding pair
|
|
254
98
|
for (let i = 0; i < this.a.length; i++)
|
|
255
99
|
results.push(await this.runSingleAsync(i, i));
|
|
256
|
-
// Populate the results
|
|
257
|
-
// `this.results` will be an array of MetricResultSingle
|
|
258
100
|
this.results = results;
|
|
259
101
|
}
|
|
260
|
-
/**
|
|
261
|
-
* Set the original inputs to which the results of the metric calculation will refer.
|
|
262
|
-
*
|
|
263
|
-
* @param {MetricInput} [a] - original input(s) for a
|
|
264
|
-
* @param {MetricInput} [b] - original input(s) for b
|
|
265
|
-
*/
|
|
266
102
|
setOriginal(a, b) {
|
|
267
103
|
if (a) this.origA = Array.isArray(a) ? a : [a];
|
|
268
104
|
if (b) this.origB = Array.isArray(b) ? b : [b];
|
|
269
105
|
return this;
|
|
270
106
|
}
|
|
271
|
-
/**
|
|
272
|
-
* Check if the inputs are in batch mode.
|
|
273
|
-
*
|
|
274
|
-
* This method checks if either `a` or `b` contains more than one string,
|
|
275
|
-
* indicating that the metric is being run in batch mode.
|
|
276
|
-
*
|
|
277
|
-
* @returns {boolean} - True if either input is an array with more than one element
|
|
278
|
-
*/
|
|
279
107
|
isBatch() {
|
|
280
108
|
return this.a.length > 1 || this.b.length > 1;
|
|
281
109
|
}
|
|
282
|
-
/**
|
|
283
|
-
* Check if the inputs are in single mode.
|
|
284
|
-
*
|
|
285
|
-
* This method checks if both `a` and `b` are single strings (not arrays),
|
|
286
|
-
* indicating that the metric is being run on a single pair of strings.
|
|
287
|
-
*
|
|
288
|
-
* @returns {boolean} - True if both inputs are single strings
|
|
289
|
-
*/
|
|
290
110
|
isSingle() {
|
|
291
111
|
return !this.isBatch();
|
|
292
112
|
}
|
|
293
|
-
/**
|
|
294
|
-
* Check if the inputs are in pairwise mode.
|
|
295
|
-
*
|
|
296
|
-
* This method checks if both `a` and `b` are arrays of the same length,
|
|
297
|
-
* indicating that the metric is being run on corresponding pairs of strings.
|
|
298
|
-
*
|
|
299
|
-
* @returns {boolean} - True if both inputs are arrays of equal length
|
|
300
|
-
* @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
|
|
301
|
-
* @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
|
|
302
|
-
*/
|
|
303
113
|
isPairwise(safe = false) {
|
|
304
114
|
return this.isBatch() && this.a.length === this.b.length
|
|
305
115
|
? true
|
|
@@ -308,136 +118,66 @@ class Metric {
|
|
|
308
118
|
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
309
119
|
})();
|
|
310
120
|
}
|
|
311
|
-
/**
|
|
312
|
-
* Check if the metric is symmetrical.
|
|
313
|
-
*
|
|
314
|
-
* This method returns whether the metric is symmetric, meaning it produces the same
|
|
315
|
-
* result regardless of the order of inputs (e.g., Levenshtein distance).
|
|
316
|
-
*
|
|
317
|
-
* @returns {boolean} - True if the metric is symmetric
|
|
318
|
-
*/
|
|
319
121
|
isSymmetrical() {
|
|
320
122
|
return this.symmetric;
|
|
321
123
|
}
|
|
322
|
-
/**
|
|
323
|
-
* Determine which mode to run the metric in.
|
|
324
|
-
*
|
|
325
|
-
* This method checks the provided mode or defaults to the mode specified in options.
|
|
326
|
-
* If no mode is specified, it defaults to 'default'.
|
|
327
|
-
*
|
|
328
|
-
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
329
|
-
* @returns {MetricMode} - The determined mode
|
|
330
|
-
*/
|
|
331
124
|
whichMode(mode) {
|
|
332
125
|
return mode ?? this.options?.mode ?? 'default';
|
|
333
126
|
}
|
|
334
|
-
/**
|
|
335
|
-
* Clear the cached results of the metric.
|
|
336
|
-
*
|
|
337
|
-
* This method resets the `results` property to `undefined`, effectively clearing
|
|
338
|
-
* any previously computed results. It can be useful for re-running the metric
|
|
339
|
-
* with new inputs or options.
|
|
340
|
-
*/
|
|
341
127
|
clear() {
|
|
342
128
|
this.results = undefined;
|
|
343
129
|
}
|
|
344
|
-
/**
|
|
345
|
-
* Run the metric computation based on the specified mode.
|
|
346
|
-
*
|
|
347
|
-
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
348
|
-
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
349
|
-
* @throws {Error} - If an unsupported mode is specified
|
|
350
|
-
*/
|
|
351
130
|
run(mode, clear = true) {
|
|
352
|
-
// Clear previous results if requested
|
|
353
131
|
if (clear) this.clear();
|
|
354
132
|
switch (this.whichMode(mode)) {
|
|
355
|
-
// Default mode runs the metric on single inputs or falls back to batch mode
|
|
356
133
|
case 'default':
|
|
357
134
|
if (this.isSingle()) {
|
|
358
135
|
this.results = this.runSingle(0, 0);
|
|
359
136
|
break;
|
|
360
137
|
}
|
|
361
|
-
// Batch mode runs the metric on all combinations of a[] and b[]
|
|
362
138
|
case 'batch':
|
|
363
139
|
this.runBatch();
|
|
364
140
|
break;
|
|
365
|
-
// Single mode runs the metric on the first elements of a[] and b[]
|
|
366
141
|
case 'single':
|
|
367
142
|
this.results = this.runSingle(0, 0);
|
|
368
143
|
break;
|
|
369
|
-
// Pairwise mode runs the metric on corresponding pairs of a[] and b[]
|
|
370
144
|
case 'pairwise':
|
|
371
145
|
if (this.isPairwise()) this.runPairwise();
|
|
372
146
|
break;
|
|
373
|
-
// Unsupported mode
|
|
374
147
|
default:
|
|
375
148
|
throw new Error(`Unsupported mode <${mode}>`);
|
|
376
149
|
}
|
|
377
150
|
}
|
|
378
|
-
/**
|
|
379
|
-
* Run the metric computation based on the specified mode asynchronously.
|
|
380
|
-
*
|
|
381
|
-
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
382
|
-
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
383
|
-
* @returns {Promise<void>} - A promise that resolves when the metric computation is complete
|
|
384
|
-
* @throws {Error} - If an unsupported mode is specified
|
|
385
|
-
*/
|
|
386
151
|
async runAsync(mode, clear = true) {
|
|
387
|
-
// Clear previous results if requested
|
|
388
152
|
if (clear) this.clear();
|
|
389
153
|
switch (this.whichMode(mode)) {
|
|
390
|
-
// Default mode runs the metric on single inputs or falls back to batch mode
|
|
391
154
|
case 'default':
|
|
392
155
|
if (this.isSingle()) {
|
|
393
156
|
this.results = await this.runSingleAsync(0, 0);
|
|
394
157
|
break;
|
|
395
158
|
}
|
|
396
|
-
// Batch mode runs the metric on all combinations of a[] and b[]
|
|
397
159
|
case 'batch':
|
|
398
160
|
await this.runBatchAsync();
|
|
399
161
|
break;
|
|
400
|
-
// Single mode runs the metric on the first elements of a[] and b[]
|
|
401
162
|
case 'single':
|
|
402
163
|
this.results = await this.runSingleAsync(0, 0);
|
|
403
164
|
break;
|
|
404
|
-
// Pairwise mode runs the metric on corresponding pairs of a[] and b[]
|
|
405
165
|
case 'pairwise':
|
|
406
166
|
if (this.isPairwise()) await this.runPairwiseAsync();
|
|
407
167
|
break;
|
|
408
|
-
// Unsupported mode
|
|
409
168
|
default:
|
|
410
169
|
throw new Error(`Unsupported async mode <${mode}>`);
|
|
411
170
|
}
|
|
412
171
|
}
|
|
413
|
-
/**
|
|
414
|
-
* Get the name of the metric.
|
|
415
|
-
*
|
|
416
|
-
* @returns {string} - The name of the metric
|
|
417
|
-
*/
|
|
418
172
|
getMetricName() {
|
|
419
173
|
return this.metric;
|
|
420
174
|
}
|
|
421
|
-
/**
|
|
422
|
-
* Get the result of the metric computation.
|
|
423
|
-
*
|
|
424
|
-
* @returns {MetricResult<R>} - The result of the metric computation
|
|
425
|
-
* @throws {Error} - If `run()` has not been called before this method
|
|
426
|
-
*/
|
|
427
175
|
getResults() {
|
|
428
|
-
// Ensure that the metric has been run before getting the result
|
|
429
176
|
if (this.results === undefined)
|
|
430
177
|
throw new Error(`run() must be called before getResult()`);
|
|
431
|
-
// Return the result(s)
|
|
432
178
|
return this.results;
|
|
433
179
|
}
|
|
434
180
|
}
|
|
435
|
-
/**
|
|
436
|
-
* Metric registry service for managing metric implementations.
|
|
437
|
-
*
|
|
438
|
-
* This registry allows for dynamic registration and retrieval of metric classes,
|
|
439
|
-
* enabling the use of various string similarity metrics in a consistent manner.
|
|
440
|
-
*/
|
|
441
181
|
const MetricRegistry = Registry.Registry('metric', Metric);
|
|
442
182
|
|
|
443
183
|
exports.Metric = Metric;
|