cmpstr 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +75 -499
- package/dist/CmpStr.esm.js +4863 -0
- package/dist/CmpStr.esm.js.map +1 -0
- package/dist/CmpStr.esm.min.js +8 -0
- package/dist/CmpStr.esm.min.js.map +1 -0
- package/dist/CmpStr.umd.js +4875 -0
- package/dist/CmpStr.umd.js.map +1 -0
- package/dist/CmpStr.umd.min.js +8 -0
- package/dist/CmpStr.umd.min.js.map +1 -0
- package/dist/cjs/CmpStr.js +663 -0
- package/dist/cjs/CmpStr.js.map +1 -0
- package/dist/cjs/CmpStrAsync.js +336 -0
- package/dist/cjs/CmpStrAsync.js.map +1 -0
- package/dist/cjs/index.js +15 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/metric/Cosine.js +101 -0
- package/dist/cjs/metric/Cosine.js.map +1 -0
- package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
- package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/cjs/metric/DiceSorensen.js +91 -0
- package/dist/cjs/metric/DiceSorensen.js.map +1 -0
- package/dist/cjs/metric/Hamming.js +82 -0
- package/dist/cjs/metric/Hamming.js.map +1 -0
- package/dist/cjs/metric/Jaccard.js +76 -0
- package/dist/cjs/metric/Jaccard.js.map +1 -0
- package/dist/cjs/metric/JaroWinkler.js +114 -0
- package/dist/cjs/metric/JaroWinkler.js.map +1 -0
- package/dist/cjs/metric/LCS.js +89 -0
- package/dist/cjs/metric/LCS.js.map +1 -0
- package/dist/cjs/metric/Levenshtein.js +94 -0
- package/dist/cjs/metric/Levenshtein.js.map +1 -0
- package/dist/cjs/metric/Metric.js +445 -0
- package/dist/cjs/metric/Metric.js.map +1 -0
- package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
- package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/cjs/metric/SmithWaterman.js +98 -0
- package/dist/cjs/metric/SmithWaterman.js.map +1 -0
- package/dist/cjs/metric/qGram.js +91 -0
- package/dist/cjs/metric/qGram.js.map +1 -0
- package/dist/cjs/phonetic/Cologne.js +112 -0
- package/dist/cjs/phonetic/Cologne.js.map +1 -0
- package/dist/cjs/phonetic/Metaphone.js +172 -0
- package/dist/cjs/phonetic/Metaphone.js.map +1 -0
- package/dist/cjs/phonetic/Phonetic.js +413 -0
- package/dist/cjs/phonetic/Phonetic.js.map +1 -0
- package/dist/cjs/phonetic/Soundex.js +135 -0
- package/dist/cjs/phonetic/Soundex.js.map +1 -0
- package/dist/cjs/utils/DeepMerge.js +144 -0
- package/dist/cjs/utils/DeepMerge.js.map +1 -0
- package/dist/cjs/utils/DiffChecker.js +500 -0
- package/dist/cjs/utils/DiffChecker.js.map +1 -0
- package/dist/cjs/utils/Filter.js +189 -0
- package/dist/cjs/utils/Filter.js.map +1 -0
- package/dist/cjs/utils/HashTable.js +175 -0
- package/dist/cjs/utils/HashTable.js.map +1 -0
- package/dist/cjs/utils/Normalizer.js +144 -0
- package/dist/cjs/utils/Normalizer.js.map +1 -0
- package/dist/cjs/utils/Pool.js +196 -0
- package/dist/cjs/utils/Pool.js.map +1 -0
- package/dist/cjs/utils/Profiler.js +229 -0
- package/dist/cjs/utils/Profiler.js.map +1 -0
- package/dist/cjs/utils/Registry.js +148 -0
- package/dist/cjs/utils/Registry.js.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.js +358 -0
- package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
- package/dist/esm/CmpStr.js +662 -0
- package/dist/esm/CmpStr.js.map +1 -0
- package/dist/esm/CmpStrAsync.js +331 -0
- package/dist/esm/CmpStrAsync.js.map +1 -0
- package/dist/esm/index.js +7 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/metric/Cosine.js +99 -0
- package/dist/esm/metric/Cosine.js.map +1 -0
- package/dist/esm/metric/DamerauLevenshtein.js +108 -0
- package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/esm/metric/DiceSorensen.js +89 -0
- package/dist/esm/metric/DiceSorensen.js.map +1 -0
- package/dist/esm/metric/Hamming.js +77 -0
- package/dist/esm/metric/Hamming.js.map +1 -0
- package/dist/esm/metric/Jaccard.js +74 -0
- package/dist/esm/metric/Jaccard.js.map +1 -0
- package/dist/esm/metric/JaroWinkler.js +112 -0
- package/dist/esm/metric/JaroWinkler.js.map +1 -0
- package/dist/esm/metric/LCS.js +87 -0
- package/dist/esm/metric/LCS.js.map +1 -0
- package/dist/esm/metric/Levenshtein.js +92 -0
- package/dist/esm/metric/Levenshtein.js.map +1 -0
- package/dist/esm/metric/Metric.js +442 -0
- package/dist/esm/metric/Metric.js.map +1 -0
- package/dist/esm/metric/NeedlemanWunsch.js +93 -0
- package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/esm/metric/SmithWaterman.js +96 -0
- package/dist/esm/metric/SmithWaterman.js.map +1 -0
- package/dist/esm/metric/qGram.js +89 -0
- package/dist/esm/metric/qGram.js.map +1 -0
- package/dist/esm/phonetic/Cologne.js +114 -0
- package/dist/esm/phonetic/Cologne.js.map +1 -0
- package/dist/esm/phonetic/Metaphone.js +174 -0
- package/dist/esm/phonetic/Metaphone.js.map +1 -0
- package/dist/esm/phonetic/Phonetic.js +409 -0
- package/dist/esm/phonetic/Phonetic.js.map +1 -0
- package/dist/esm/phonetic/Soundex.js +137 -0
- package/dist/esm/phonetic/Soundex.js.map +1 -0
- package/dist/esm/utils/DeepMerge.js +139 -0
- package/dist/esm/utils/DeepMerge.js.map +1 -0
- package/dist/esm/utils/DiffChecker.js +498 -0
- package/dist/esm/utils/DiffChecker.js.map +1 -0
- package/dist/esm/utils/Filter.js +187 -0
- package/dist/esm/utils/Filter.js.map +1 -0
- package/dist/esm/utils/HashTable.js +173 -0
- package/dist/esm/utils/HashTable.js.map +1 -0
- package/dist/esm/utils/Normalizer.js +142 -0
- package/dist/esm/utils/Normalizer.js.map +1 -0
- package/dist/esm/utils/Pool.js +194 -0
- package/dist/esm/utils/Pool.js.map +1 -0
- package/dist/esm/utils/Profiler.js +227 -0
- package/dist/esm/utils/Profiler.js.map +1 -0
- package/dist/esm/utils/Registry.js +142 -0
- package/dist/esm/utils/Registry.js.map +1 -0
- package/dist/esm/utils/TextAnalyzer.js +356 -0
- package/dist/esm/utils/TextAnalyzer.js.map +1 -0
- package/dist/types/CmpStr.d.ts +472 -0
- package/dist/types/CmpStrAsync.d.ts +233 -0
- package/dist/types/index.d.ts +51 -0
- package/dist/types/metric/Cosine.d.ts +57 -0
- package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
- package/dist/types/metric/DiceSorensen.d.ts +57 -0
- package/dist/types/metric/Hamming.d.ts +49 -0
- package/dist/types/metric/Jaccard.d.ts +48 -0
- package/dist/types/metric/JaroWinkler.d.ts +50 -0
- package/dist/types/metric/LCS.d.ts +50 -0
- package/dist/types/metric/Levenshtein.d.ts +50 -0
- package/dist/types/metric/Metric.d.ts +261 -0
- package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
- package/dist/types/metric/SmithWaterman.d.ts +48 -0
- package/dist/types/metric/index.d.ts +41 -0
- package/dist/types/metric/qGram.d.ts +56 -0
- package/dist/types/phonetic/Cologne.d.ts +46 -0
- package/dist/types/phonetic/Metaphone.d.ts +50 -0
- package/dist/types/phonetic/Phonetic.d.ts +189 -0
- package/dist/types/phonetic/Soundex.d.ts +49 -0
- package/dist/types/phonetic/index.d.ts +30 -0
- package/dist/types/utils/DeepMerge.d.ts +70 -0
- package/dist/types/utils/DiffChecker.d.ts +137 -0
- package/dist/types/utils/Filter.d.ts +97 -0
- package/dist/types/utils/HashTable.d.ts +86 -0
- package/dist/types/utils/Normalizer.d.ts +76 -0
- package/dist/types/utils/Pool.d.ts +63 -0
- package/dist/types/utils/Profiler.d.ts +129 -0
- package/dist/types/utils/Registry.d.ts +57 -0
- package/dist/types/utils/TextAnalyzer.d.ts +199 -0
- package/dist/types/utils/Types.d.ts +313 -0
- package/package.json +62 -49
- package/src/CmpStr.d.ts +0 -70
- package/src/CmpStr.js +0 -912
- package/src/CmpStrAsync.d.ts +0 -19
- package/src/CmpStrAsync.js +0 -204
- package/src/algorithms/cosine.js +0 -86
- package/src/algorithms/damerau.js +0 -78
- package/src/algorithms/dice.js +0 -65
- package/src/algorithms/hamming.js +0 -44
- package/src/algorithms/jaccard.js +0 -34
- package/src/algorithms/jaroWinkler.js +0 -106
- package/src/algorithms/lcs.js +0 -58
- package/src/algorithms/levenshtein.js +0 -70
- package/src/algorithms/needlemanWunsch.js +0 -72
- package/src/algorithms/qGram.js +0 -63
- package/src/algorithms/smithWaterman.js +0 -78
- package/src/algorithms/soundex.js +0 -152
- package/src/index.d.ts +0 -3
- package/src/index.js +0 -47
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Levenshtein Distance
|
|
3
|
+
* src/metric/Levenshtein.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Levenshtein_distance
|
|
6
|
+
*
|
|
7
|
+
* The Levenshtein distance is a classic metric for measuring the minimum number
|
|
8
|
+
* of single-character edits (insertions, deletions, or substitutions) required
|
|
9
|
+
* to change one string into another.
|
|
10
|
+
*
|
|
11
|
+
* It is widely used in approximate string matching, spell checking, and natural
|
|
12
|
+
* language processing.
|
|
13
|
+
*
|
|
14
|
+
* @module Metric/LevenshteinDistance
|
|
15
|
+
* @author Paul Köhler (komed3)
|
|
16
|
+
* @license MIT
|
|
17
|
+
*/
|
|
18
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
19
|
+
import { Metric } from './Metric';
|
|
20
|
+
export interface LevenshteinRaw {
|
|
21
|
+
dist: number;
|
|
22
|
+
maxLen: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* LevenshteinDistance class extends the Metric class to implement the Levenshtein distance algorithm.
|
|
26
|
+
*/
|
|
27
|
+
export declare class LevenshteinDistance extends Metric<LevenshteinRaw> {
|
|
28
|
+
/**
|
|
29
|
+
* Constructor for the Levenshtein class.
|
|
30
|
+
*
|
|
31
|
+
* Initializes the Levenshtein metric with two input strings
|
|
32
|
+
* or arrays of strings and optional options.
|
|
33
|
+
*
|
|
34
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
35
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
36
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
37
|
+
*/
|
|
38
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
39
|
+
/**
|
|
40
|
+
* Calculates the Levenshtein distance between two strings.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} a - First string
|
|
43
|
+
* @param {string} b - Second string
|
|
44
|
+
* @param {number} m - Length of the first string
|
|
45
|
+
* @param {number} n - Length of the second string
|
|
46
|
+
* @param {number} maxLen - Maximum length of the strings
|
|
47
|
+
* @return {MetricCompute<LevenshteinRaw>} - Object containing the similarity result and raw distance
|
|
48
|
+
*/
|
|
49
|
+
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<LevenshteinRaw>;
|
|
50
|
+
}
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Abstract Metric
|
|
3
|
+
* src/metric/Metric.ts
|
|
4
|
+
*
|
|
5
|
+
* This module defines an abstract class for string metrics, providing a framework for
|
|
6
|
+
* computing various string similarity metrics. It includes methods for running metrics
|
|
7
|
+
* in different modes (single, batch, pairwise) synchronous or asynchronous and caching
|
|
8
|
+
* results to optimize performance. The class is designed to be extended by specific
|
|
9
|
+
* metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
|
|
10
|
+
*
|
|
11
|
+
* It provides:
|
|
12
|
+
* - A base class for string metrics with common functionality
|
|
13
|
+
* - Methods for running metrics in different modes
|
|
14
|
+
* - Pre-computation for trivial cases to optimize performance
|
|
15
|
+
* - Caching of metric computations to avoid redundant calculations
|
|
16
|
+
* - Support for symmetrical metrics (same result for inputs in any order)
|
|
17
|
+
* - Performance tracking capabilities (Profiler)
|
|
18
|
+
* - Asynchronous execution support for metrics
|
|
19
|
+
*
|
|
20
|
+
* This class is intended to be extended by specific metric implementations that will
|
|
21
|
+
* implement the `compute` method to define the specific metric computation logic.
|
|
22
|
+
*
|
|
23
|
+
* @module Metric
|
|
24
|
+
* @author Paul Köhler (komed3)
|
|
25
|
+
* @license MIT
|
|
26
|
+
*/
|
|
27
|
+
import type { MetricMode, MetricInput, MetricOptions, MetricCompute, MetricRaw, MetricResult, RegistryService } from '../utils/Types';
|
|
28
|
+
/**
|
|
29
|
+
* Abstract class representing a generic string metric.
|
|
30
|
+
*
|
|
31
|
+
* @abstract
|
|
32
|
+
* @template R - The type of the raw result, defaulting to `MetricRaw`.
|
|
33
|
+
*/
|
|
34
|
+
export declare abstract class Metric<R = MetricRaw> {
|
|
35
|
+
private static cache;
|
|
36
|
+
private readonly metric;
|
|
37
|
+
private readonly a;
|
|
38
|
+
private readonly b;
|
|
39
|
+
private origA;
|
|
40
|
+
private origB;
|
|
41
|
+
protected readonly options: MetricOptions;
|
|
42
|
+
protected readonly symmetric: boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Result of the metric computation, which can be a single result or an array of results.
|
|
45
|
+
* This will be populated after running the metric.
|
|
46
|
+
*/
|
|
47
|
+
private results;
|
|
48
|
+
/**
|
|
49
|
+
* Static method to clear the cache of metric computations.
|
|
50
|
+
*/
|
|
51
|
+
static clear(): void;
|
|
52
|
+
/**
|
|
53
|
+
* Swaps two strings and their lengths if the first is longer than the second.
|
|
54
|
+
*
|
|
55
|
+
* @param {string} a - First string
|
|
56
|
+
* @param {string} b - Second string
|
|
57
|
+
* @param {number} m - Length of the first string
|
|
58
|
+
* @param {number} n - Length of the second string
|
|
59
|
+
* @returns {[string, string, number, number]} - Swapped strings and lengths
|
|
60
|
+
*/
|
|
61
|
+
protected static swap(a: string, b: string, m: number, n: number): [
|
|
62
|
+
string,
|
|
63
|
+
string,
|
|
64
|
+
number,
|
|
65
|
+
number
|
|
66
|
+
];
|
|
67
|
+
/**
|
|
68
|
+
* Clamps the similarity result between 0 and 1.
|
|
69
|
+
*
|
|
70
|
+
* @param {number} res - The input similarity to clamp
|
|
71
|
+
* @returns {number} - The clamped similarity (0 to 1)
|
|
72
|
+
*/
|
|
73
|
+
protected static clamp(res: number): number;
|
|
74
|
+
/**
|
|
75
|
+
* Constructor for the Metric class.
|
|
76
|
+
* Initializes the metric with two inputs (strings or arrays of strings) and options.
|
|
77
|
+
*
|
|
78
|
+
* @param {string} metric - The name of the metric (e.g. 'levenshtein')
|
|
79
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
80
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
81
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
82
|
+
* @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
|
|
83
|
+
* @throws {Error} - If inputs `a` or `b` are empty
|
|
84
|
+
*/
|
|
85
|
+
constructor(metric: string, a: MetricInput, b: MetricInput, opt?: MetricOptions, symmetric?: boolean);
|
|
86
|
+
/**
|
|
87
|
+
* Pre-compute the metric for two strings.
|
|
88
|
+
* This method is called before the actual computation to handle trivial cases.
|
|
89
|
+
*
|
|
90
|
+
* @param {string} a - First string
|
|
91
|
+
* @param {string} b - Second string
|
|
92
|
+
* @param {number} m - Length of the first string
|
|
93
|
+
* @param {number} n - Length of the second string
|
|
94
|
+
* @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
|
|
95
|
+
*/
|
|
96
|
+
protected preCompute(a: string, b: string, m: number, n: number): MetricCompute<R> | undefined;
|
|
97
|
+
/**
|
|
98
|
+
* Abstract method to be implemented by subclasses to perform the metric computation.
|
|
99
|
+
* This method should contain the logic for computing the metric between two strings.
|
|
100
|
+
*
|
|
101
|
+
* @param {string} a - First string
|
|
102
|
+
* @param {string} b - Second string
|
|
103
|
+
* @param {number} m - Length of the first string
|
|
104
|
+
* @param {number} n - Length of the second string
|
|
105
|
+
* @param {number} maxLen - Maximum length of the strings
|
|
106
|
+
* @returns {MetricCompute<R>} - The result of the metric computation
|
|
107
|
+
* @throws {Error} - If not overridden in a subclass
|
|
108
|
+
*/
|
|
109
|
+
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<R>;
|
|
110
|
+
/**
|
|
111
|
+
* Run the metric computation for single inputs (two strings).
|
|
112
|
+
* Applies preCompute for trivial cases before cache lookup and computation.
|
|
113
|
+
*
|
|
114
|
+
* If the profiler is active, it will measure time and memory usage.
|
|
115
|
+
*
|
|
116
|
+
* @param {number} i - Pointer to the first string
|
|
117
|
+
* @param {number} j - Pointer to the second string
|
|
118
|
+
* @returns {MetricResultSingle<R>} - The result of the metric computation
|
|
119
|
+
*/
|
|
120
|
+
private runSingle;
|
|
121
|
+
/**
|
|
122
|
+
* Run the metric computation for single inputs (two strings) asynchronously.
|
|
123
|
+
*
|
|
124
|
+
* @param {number} i - Pointer to the first string
|
|
125
|
+
* @param {number} j - Pointer to the second string
|
|
126
|
+
* @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
|
|
127
|
+
*/
|
|
128
|
+
private runSingleAsync;
|
|
129
|
+
/**
|
|
130
|
+
* Run the metric computation for batch inputs (arrays of strings).
|
|
131
|
+
*
|
|
132
|
+
* It iterates through each string in the first array and computes the metric
|
|
133
|
+
* against each string in the second array.
|
|
134
|
+
*/
|
|
135
|
+
private runBatch;
|
|
136
|
+
/**
|
|
137
|
+
* Run the metric computation for batch inputs (arrays of strings) asynchronously.
|
|
138
|
+
*/
|
|
139
|
+
private runBatchAsync;
|
|
140
|
+
/**
|
|
141
|
+
* Run the metric computation for pairwise inputs (A[i] vs B[i]).
|
|
142
|
+
*
|
|
143
|
+
* This method assumes that both `a` and `b` are arrays of equal length
|
|
144
|
+
* and computes the metric only for corresponding index pairs.
|
|
145
|
+
*/
|
|
146
|
+
private runPairwise;
|
|
147
|
+
/**
|
|
148
|
+
* Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
|
|
149
|
+
*/
|
|
150
|
+
private runPairwiseAsync;
|
|
151
|
+
/**
|
|
152
|
+
* Set the original inputs to which the results of the metric calculation will refer.
|
|
153
|
+
*
|
|
154
|
+
* @param {MetricInput} [a] - original input(s) for a
|
|
155
|
+
* @param {MetricInput} [b] - original input(s) for b
|
|
156
|
+
*/
|
|
157
|
+
setOriginal(a?: MetricInput, b?: MetricInput): this;
|
|
158
|
+
/**
|
|
159
|
+
* Check if the inputs are in batch mode.
|
|
160
|
+
*
|
|
161
|
+
* This method checks if either `a` or `b` contains more than one string,
|
|
162
|
+
* indicating that the metric is being run in batch mode.
|
|
163
|
+
*
|
|
164
|
+
* @returns {boolean} - True if either input is an array with more than one element
|
|
165
|
+
*/
|
|
166
|
+
isBatch(): boolean;
|
|
167
|
+
/**
|
|
168
|
+
* Check if the inputs are in single mode.
|
|
169
|
+
*
|
|
170
|
+
* This method checks if both `a` and `b` are single strings (not arrays),
|
|
171
|
+
* indicating that the metric is being run on a single pair of strings.
|
|
172
|
+
*
|
|
173
|
+
* @returns {boolean} - True if both inputs are single strings
|
|
174
|
+
*/
|
|
175
|
+
isSingle(): boolean;
|
|
176
|
+
/**
|
|
177
|
+
* Check if the inputs are in pairwise mode.
|
|
178
|
+
*
|
|
179
|
+
* This method checks if both `a` and `b` are arrays of the same length,
|
|
180
|
+
* indicating that the metric is being run on corresponding pairs of strings.
|
|
181
|
+
*
|
|
182
|
+
* @returns {boolean} - True if both inputs are arrays of equal length
|
|
183
|
+
* @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
|
|
184
|
+
* @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
|
|
185
|
+
*/
|
|
186
|
+
isPairwise(safe?: boolean): boolean;
|
|
187
|
+
/**
|
|
188
|
+
* Check if the metric is symmetrical.
|
|
189
|
+
*
|
|
190
|
+
* This method returns whether the metric is symmetric, meaning it produces the same
|
|
191
|
+
* result regardless of the order of inputs (e.g., Levenshtein distance).
|
|
192
|
+
*
|
|
193
|
+
* @returns {boolean} - True if the metric is symmetric
|
|
194
|
+
*/
|
|
195
|
+
isSymmetrical(): boolean;
|
|
196
|
+
/**
|
|
197
|
+
* Determine which mode to run the metric in.
|
|
198
|
+
*
|
|
199
|
+
* This method checks the provided mode or defaults to the mode specified in options.
|
|
200
|
+
* If no mode is specified, it defaults to 'default'.
|
|
201
|
+
*
|
|
202
|
+
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
203
|
+
* @returns {MetricMode} - The determined mode
|
|
204
|
+
*/
|
|
205
|
+
whichMode(mode?: MetricMode): MetricMode;
|
|
206
|
+
/**
|
|
207
|
+
* Clear the cached results of the metric.
|
|
208
|
+
*
|
|
209
|
+
* This method resets the `results` property to `undefined`, effectively clearing
|
|
210
|
+
* any previously computed results. It can be useful for re-running the metric
|
|
211
|
+
* with new inputs or options.
|
|
212
|
+
*/
|
|
213
|
+
clear(): void;
|
|
214
|
+
/**
|
|
215
|
+
* Run the metric computation based on the specified mode.
|
|
216
|
+
*
|
|
217
|
+
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
218
|
+
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
219
|
+
* @throws {Error} - If an unsupported mode is specified
|
|
220
|
+
*/
|
|
221
|
+
run(mode?: MetricMode, clear?: boolean): void;
|
|
222
|
+
/**
|
|
223
|
+
* Run the metric computation based on the specified mode asynchronously.
|
|
224
|
+
*
|
|
225
|
+
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
|
|
226
|
+
* @param {boolean} [clear=true] - Whether to clear previous results before running
|
|
227
|
+
* @returns {Promise<void>} - A promise that resolves when the metric computation is complete
|
|
228
|
+
* @throws {Error} - If an unsupported mode is specified
|
|
229
|
+
*/
|
|
230
|
+
runAsync(mode?: MetricMode, clear?: boolean): Promise<void>;
|
|
231
|
+
/**
|
|
232
|
+
* Get the name of the metric.
|
|
233
|
+
*
|
|
234
|
+
* @returns {string} - The name of the metric
|
|
235
|
+
*/
|
|
236
|
+
getMetricName(): string;
|
|
237
|
+
/**
|
|
238
|
+
* Get the result of the metric computation.
|
|
239
|
+
*
|
|
240
|
+
* @returns {MetricResult<R>} - The result of the metric computation
|
|
241
|
+
* @throws {Error} - If `run()` has not been called before this method
|
|
242
|
+
*/
|
|
243
|
+
getResults(): MetricResult<R>;
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Metric registry service for managing metric implementations.
|
|
247
|
+
*
|
|
248
|
+
* This registry allows for dynamic registration and retrieval of metric classes,
|
|
249
|
+
* enabling the use of various string similarity metrics in a consistent manner.
|
|
250
|
+
*/
|
|
251
|
+
export declare const MetricRegistry: RegistryService<Metric<MetricRaw>>;
|
|
252
|
+
/**
|
|
253
|
+
* Type definition for a class constructor that extends the Metric class.
|
|
254
|
+
*
|
|
255
|
+
* This type represents a constructor function for a class that extends the Metric
|
|
256
|
+
* class. It can be used to create instances of specific metric implementations,
|
|
257
|
+
* such as Levenshtein or Jaro-Winkler.
|
|
258
|
+
*
|
|
259
|
+
* @template R - The type of the raw result, defaulting to `MetricRaw`.
|
|
260
|
+
*/
|
|
261
|
+
export type MetricCls<R = MetricRaw> = new (...args: any[]) => Metric<R>;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Needleman-Wunsch Algorithm
|
|
3
|
+
* src/metric/NeedlemanWunsch.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
|
|
6
|
+
*
|
|
7
|
+
* The Needleman-Wunsch algorithm performs global alignment, aligning two strings
|
|
8
|
+
* entirely, including gaps. It is commonly used in bioinformatics for sequence
|
|
9
|
+
* alignment.
|
|
10
|
+
*
|
|
11
|
+
* @module Metric/NeedlemanWunsch
|
|
12
|
+
* @author Paul Köhler (komed3)
|
|
13
|
+
* @license MIT
|
|
14
|
+
*/
|
|
15
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
16
|
+
import { Metric } from './Metric';
|
|
17
|
+
export interface NeedlemanRaw {
|
|
18
|
+
score: number;
|
|
19
|
+
denum: number;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* NeedlemanWunschDistance class extends the Metric class to implement the Needleman-Wunsch algorithm.
|
|
23
|
+
*/
|
|
24
|
+
export declare class NeedlemanWunschDistance extends Metric<NeedlemanRaw> {
|
|
25
|
+
/**
|
|
26
|
+
* Constructor for the NeedlemanWunsch class.
|
|
27
|
+
*
|
|
28
|
+
* Initializes the Needleman-Wunsch metric with two input strings or
|
|
29
|
+
* arrays of strings and optional options.
|
|
30
|
+
*
|
|
31
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
32
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
33
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
34
|
+
*/
|
|
35
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
36
|
+
/**
|
|
37
|
+
* Calculates the Needleman-Wunsch global alignment score between two strings.
|
|
38
|
+
*
|
|
39
|
+
* @param {string} a - First string
|
|
40
|
+
* @param {string} b - Second string
|
|
41
|
+
* @param {number} m - Length of the first string
|
|
42
|
+
* @param {number} n - Length of the second string
|
|
43
|
+
* @param {number} maxLen - Maximum length of the strings
|
|
44
|
+
* @return {MetricCompute<NeedlemanRaw>} - Object containing the similarity result and raw score
|
|
45
|
+
*/
|
|
46
|
+
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<NeedlemanRaw>;
|
|
47
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smith-Waterman Algorithm
|
|
3
|
+
* src/metric/SmithWaterman.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
|
|
6
|
+
*
|
|
7
|
+
* The Smith-Waterman algorithm performs local alignment, finding the best matching
|
|
8
|
+
* subsequence between two strings. It is commonly used in bioinformatics for local
|
|
9
|
+
* sequence alignment. Instead of looking at the entire sequence, the Smith–Waterman
|
|
10
|
+
* algorithm compares segments of all possible lengths and optimizes the similarity
|
|
11
|
+
* measure.
|
|
12
|
+
*
|
|
13
|
+
* @module Metric/SmithWatermanDistance
|
|
14
|
+
* @author Paul Köhler (komed3)
|
|
15
|
+
* @license MIT
|
|
16
|
+
*/
|
|
17
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
18
|
+
import { Metric } from './Metric';
|
|
19
|
+
export interface SmithWatermanRaw {
|
|
20
|
+
score: number;
|
|
21
|
+
denum: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* SmithWatermanDistance class extends the Metric class to implement the Smith-Waterman algorithm.
|
|
25
|
+
*/
|
|
26
|
+
export declare class SmithWatermanDistance extends Metric<SmithWatermanRaw> {
|
|
27
|
+
/**
|
|
28
|
+
* Constructor for the SmithWaterman class.
|
|
29
|
+
*
|
|
30
|
+
* Initializes the Smith-Waterman metric with two input strings or
|
|
31
|
+
* arrays of strings and optional options.
|
|
32
|
+
*
|
|
33
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
34
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
35
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
36
|
+
*/
|
|
37
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
38
|
+
/**
|
|
39
|
+
* Calculates the Smith-Waterman local alignment score between two strings.
|
|
40
|
+
*
|
|
41
|
+
* @param {string} a - First string
|
|
42
|
+
* @param {string} b - Second string
|
|
43
|
+
* @param {number} m - Length of the first string
|
|
44
|
+
* @param {number} n - Length of the second string
|
|
45
|
+
* @return {MetricCompute<SmithWatermanRaw>} - Object containing the similarity result and raw score
|
|
46
|
+
*/
|
|
47
|
+
protected compute(a: string, b: string, m: number, n: number): MetricCompute<SmithWatermanRaw>;
|
|
48
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metric Registry Loader
|
|
3
|
+
* src/metric/index.ts
|
|
4
|
+
*
|
|
5
|
+
* This module serves as the central loader and registry for all string similarity metrics
|
|
6
|
+
* available in the CmpStr library. It ensures that all metric implementations are
|
|
7
|
+
* registered with the MetricRegistry and available for use throughout the application.
|
|
8
|
+
*
|
|
9
|
+
* Each metric algorithm (such as Levenshtein, Jaccard, Dice-Sørensen, etc.) is defined
|
|
10
|
+
* in its own module and is automatically registered with the MetricRegistry upon import.
|
|
11
|
+
* This design allows for easy extensibility: new metrics can be added simply by creating
|
|
12
|
+
* a new module and importing it here. The registry pattern enables dynamic lookup,
|
|
13
|
+
* instantiation, and management of all available metrics at runtime.
|
|
14
|
+
*
|
|
15
|
+
* Features:
|
|
16
|
+
* - Centralized registration of all built-in string similarity metrics
|
|
17
|
+
* - Automatic registration via side-effect imports
|
|
18
|
+
* - Extensible: custom metrics can be registered at runtime via the MetricRegistry API
|
|
19
|
+
* - Consistent interface for accessing, listing, and managing metrics
|
|
20
|
+
* - Ensures that all metrics are available for use in the CmpStr API and utilities
|
|
21
|
+
*
|
|
22
|
+
* Native implemented metrics are highly optimized for performance and efficiency,
|
|
23
|
+
* providing fast and reliable string similarity calculations. They will use CmpStr's
|
|
24
|
+
* pooling system to manage resources effectively, ensuring minimal overhead
|
|
25
|
+
* and maximum performance.
|
|
26
|
+
*
|
|
27
|
+
* @author Paul Köhler (komed3)
|
|
28
|
+
* @license MIT
|
|
29
|
+
*/
|
|
30
|
+
import './Cosine';
|
|
31
|
+
import './DamerauLevenshtein';
|
|
32
|
+
import './DiceSorensen';
|
|
33
|
+
import './Hamming';
|
|
34
|
+
import './Jaccard';
|
|
35
|
+
import './JaroWinkler';
|
|
36
|
+
import './LCS';
|
|
37
|
+
import './Levenshtein';
|
|
38
|
+
import './NeedlemanWunsch';
|
|
39
|
+
import './qGram';
|
|
40
|
+
import './SmithWaterman';
|
|
41
|
+
export { MetricRegistry, Metric, MetricCls } from './Metric';
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* q-Gram Similarity
|
|
3
|
+
* src/metric/QGram.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Q-gram
|
|
6
|
+
*
|
|
7
|
+
* Q-gram similarity is a string-matching algorithm that compares two strings by
|
|
8
|
+
* breaking them into substrings (q-grams) of length Q. The similarity is computed
|
|
9
|
+
* as the size of the intersection of q-gram sets divided by the size of the larger
|
|
10
|
+
* set.
|
|
11
|
+
*
|
|
12
|
+
* This metric is widely used in approximate string matching, information retrieval,
|
|
13
|
+
* and computational linguistics.
|
|
14
|
+
*
|
|
15
|
+
* @module Metric/QGramSimilarity
|
|
16
|
+
* @author Paul Köhler (komed3)
|
|
17
|
+
* @license MIT
|
|
18
|
+
*/
|
|
19
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
20
|
+
import { Metric } from './Metric';
|
|
21
|
+
export interface QGramRaw {
|
|
22
|
+
intersection: number;
|
|
23
|
+
size: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* QGramSimilarity class extends the Metric class to implement the q-Gram similarity algorithm.
|
|
27
|
+
*/
|
|
28
|
+
export declare class QGramSimilarity extends Metric<QGramRaw> {
|
|
29
|
+
/**
|
|
30
|
+
* Constructor for the QGramSimilarity class.
|
|
31
|
+
*
|
|
32
|
+
* Initializes the q-Gram similarity metric with two input strings or
|
|
33
|
+
* arrays of strings and optional options.
|
|
34
|
+
*
|
|
35
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
36
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
37
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
38
|
+
*/
|
|
39
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
40
|
+
/**
|
|
41
|
+
* Converts a string into a set of q-grams (substrings of length q).
|
|
42
|
+
*
|
|
43
|
+
* @param {string} str - The input string
|
|
44
|
+
* @param {number} q - The length of each q-gram
|
|
45
|
+
* @return {Set<string>} - Set of q-grams
|
|
46
|
+
*/
|
|
47
|
+
private _qGrams;
|
|
48
|
+
/**
|
|
49
|
+
* Calculates the q-Gram similarity between two strings.
|
|
50
|
+
*
|
|
51
|
+
* @param {string} a - First string
|
|
52
|
+
* @param {string} b - Second string
|
|
53
|
+
* @return {MetricCompute<QGramRaw>} - Object containing the similarity result and raw values
|
|
54
|
+
*/
|
|
55
|
+
protected compute(a: string, b: string): MetricCompute<QGramRaw>;
|
|
56
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cologne Phonetic Algorithm
|
|
3
|
+
* src/phonetic/Cologne.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Cologne_phonetics
|
|
6
|
+
*
|
|
7
|
+
* Cologne phonetics, also known as `Kölner Phonetik` or the `Cologne process`,
|
|
8
|
+
* is a phonetic algorithm that assigns a sequence of digits, referred to as the
|
|
9
|
+
* phonetic code, to words. The purpose of this method is to ensure that words
|
|
10
|
+
* with identical sounds receive the same code. This algorithm can facilitate a
|
|
11
|
+
* similarity search among words.
|
|
12
|
+
*
|
|
13
|
+
* Cologne phonetics is associated with the well-known Soundex phonetic algorithm,
|
|
14
|
+
* yet it is specifically optimized for the German language. This algorithm was
|
|
15
|
+
* introduced by Hans Joachim Postel in 1969.
|
|
16
|
+
*
|
|
17
|
+
* The Cologne phonetic algorithm works by mapping letters to digits, ignoring
|
|
18
|
+
* certain letters, and applying specific rules to handle character combinations.
|
|
19
|
+
*
|
|
20
|
+
* @module Phonetic/Cologne
|
|
21
|
+
* @author Paul Köhler (komed3)
|
|
22
|
+
* @license MIT
|
|
23
|
+
*/
|
|
24
|
+
import type { PhoneticOptions } from '../utils/Types';
|
|
25
|
+
import { Phonetic } from './Phonetic';
|
|
26
|
+
/**
|
|
27
|
+
* Cologne class extends the Phonetic class to implement the Cologne phonetic algorithm.
|
|
28
|
+
*/
|
|
29
|
+
export declare class Cologne extends Phonetic {
|
|
30
|
+
protected static default: PhoneticOptions;
|
|
31
|
+
/**
|
|
32
|
+
* Constructor for the Cologne class.
|
|
33
|
+
*
|
|
34
|
+
* Initializes the Cologne phonetic algorithm with the mapping and options.
|
|
35
|
+
*
|
|
36
|
+
* @param {PhoneticOptions} [opt] - Options for the Cologne phonetic algorithm
|
|
37
|
+
*/
|
|
38
|
+
constructor(opt?: PhoneticOptions);
|
|
39
|
+
/**
|
|
40
|
+
* Adjusts the phonetic code by removing all '0's except the first character.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} code - The phonetic code to adjust
|
|
43
|
+
* @returns {string} - The adjusted phonetic code
|
|
44
|
+
*/
|
|
45
|
+
protected adjustCode(code: string): string;
|
|
46
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metaphone Phonetic Algorithm
|
|
3
|
+
* src/phonetic/Metaphone.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Metaphone
|
|
6
|
+
*
|
|
7
|
+
* Metaphone is a phonetic algorithm for indexing words by their English pronunciation.
|
|
8
|
+
* It encodes words into a string of consonant symbols, allowing for the comparison of
|
|
9
|
+
* words based on their pronunciation rather than their spelling. Metaphone is more
|
|
10
|
+
* accurate than Soundex for English and is widely used in search, spell-checking,
|
|
11
|
+
* and fuzzy matching.
|
|
12
|
+
*
|
|
13
|
+
* This implementation uses a mapping and a comprehensive ruleset to efficiently
|
|
14
|
+
* transform input words into their Metaphone code. The algorithm drops or transforms
|
|
15
|
+
* letters according to context-sensitive rules, and only retains vowels at the start.
|
|
16
|
+
*
|
|
17
|
+
* @module Phonetic/Metaphone
|
|
18
|
+
* @author Paul Köhler (komed3)
|
|
19
|
+
* @license MIT
|
|
20
|
+
*/
|
|
21
|
+
import type { PhoneticOptions } from '../utils/Types';
|
|
22
|
+
import { Phonetic } from './Phonetic';
|
|
23
|
+
/**
|
|
24
|
+
* Metaphone class extends the Phonetic class to implement the Metaphone phonetic algorithm.
|
|
25
|
+
*/
|
|
26
|
+
export declare class Metaphone extends Phonetic {
|
|
27
|
+
protected static default: PhoneticOptions;
|
|
28
|
+
/**
|
|
29
|
+
* Constructor for the Metaphone class.
|
|
30
|
+
*
|
|
31
|
+
* Initializes the Metaphone phonetic algorithm with the mapping and options.
|
|
32
|
+
*
|
|
33
|
+
* @param {PhoneticOptions} [opt] - Options for the Metaphone phonetic algorithm
|
|
34
|
+
*/
|
|
35
|
+
constructor(opt?: PhoneticOptions);
|
|
36
|
+
/**
|
|
37
|
+
* Generates the Metaphone code for a given word.
|
|
38
|
+
*
|
|
39
|
+
* @param {string} word - The input word to be converted into a Metaphone code
|
|
40
|
+
* @returns {string} - The generated Metaphone code
|
|
41
|
+
*/
|
|
42
|
+
protected encode(word: string): string;
|
|
43
|
+
/**
|
|
44
|
+
* Adjusts the Metaphone code by removing vowels except for the first letter.
|
|
45
|
+
*
|
|
46
|
+
* @param {string} code - The Metaphone code to be adjusted
|
|
47
|
+
* @returns {string} - The adjusted Metaphone code
|
|
48
|
+
*/
|
|
49
|
+
protected adjustCode(code: string): string;
|
|
50
|
+
}
|