cmpstr 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +75 -499
- package/dist/CmpStr.esm.js +4863 -0
- package/dist/CmpStr.esm.js.map +1 -0
- package/dist/CmpStr.esm.min.js +8 -0
- package/dist/CmpStr.esm.min.js.map +1 -0
- package/dist/CmpStr.umd.js +4875 -0
- package/dist/CmpStr.umd.js.map +1 -0
- package/dist/CmpStr.umd.min.js +8 -0
- package/dist/CmpStr.umd.min.js.map +1 -0
- package/dist/cjs/CmpStr.js +663 -0
- package/dist/cjs/CmpStr.js.map +1 -0
- package/dist/cjs/CmpStrAsync.js +336 -0
- package/dist/cjs/CmpStrAsync.js.map +1 -0
- package/dist/cjs/index.js +15 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/metric/Cosine.js +101 -0
- package/dist/cjs/metric/Cosine.js.map +1 -0
- package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
- package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/cjs/metric/DiceSorensen.js +91 -0
- package/dist/cjs/metric/DiceSorensen.js.map +1 -0
- package/dist/cjs/metric/Hamming.js +82 -0
- package/dist/cjs/metric/Hamming.js.map +1 -0
- package/dist/cjs/metric/Jaccard.js +76 -0
- package/dist/cjs/metric/Jaccard.js.map +1 -0
- package/dist/cjs/metric/JaroWinkler.js +114 -0
- package/dist/cjs/metric/JaroWinkler.js.map +1 -0
- package/dist/cjs/metric/LCS.js +89 -0
- package/dist/cjs/metric/LCS.js.map +1 -0
- package/dist/cjs/metric/Levenshtein.js +94 -0
- package/dist/cjs/metric/Levenshtein.js.map +1 -0
- package/dist/cjs/metric/Metric.js +445 -0
- package/dist/cjs/metric/Metric.js.map +1 -0
- package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
- package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/cjs/metric/SmithWaterman.js +98 -0
- package/dist/cjs/metric/SmithWaterman.js.map +1 -0
- package/dist/cjs/metric/qGram.js +91 -0
- package/dist/cjs/metric/qGram.js.map +1 -0
- package/dist/cjs/phonetic/Cologne.js +112 -0
- package/dist/cjs/phonetic/Cologne.js.map +1 -0
- package/dist/cjs/phonetic/Metaphone.js +172 -0
- package/dist/cjs/phonetic/Metaphone.js.map +1 -0
- package/dist/cjs/phonetic/Phonetic.js +413 -0
- package/dist/cjs/phonetic/Phonetic.js.map +1 -0
- package/dist/cjs/phonetic/Soundex.js +135 -0
- package/dist/cjs/phonetic/Soundex.js.map +1 -0
- package/dist/cjs/utils/DeepMerge.js +144 -0
- package/dist/cjs/utils/DeepMerge.js.map +1 -0
- package/dist/cjs/utils/DiffChecker.js +500 -0
- package/dist/cjs/utils/DiffChecker.js.map +1 -0
- package/dist/cjs/utils/Filter.js +189 -0
- package/dist/cjs/utils/Filter.js.map +1 -0
- package/dist/cjs/utils/HashTable.js +175 -0
- package/dist/cjs/utils/HashTable.js.map +1 -0
- package/dist/cjs/utils/Normalizer.js +144 -0
- package/dist/cjs/utils/Normalizer.js.map +1 -0
- package/dist/cjs/utils/Pool.js +196 -0
- package/dist/cjs/utils/Pool.js.map +1 -0
- package/dist/cjs/utils/Profiler.js +229 -0
- package/dist/cjs/utils/Profiler.js.map +1 -0
- package/dist/cjs/utils/Registry.js +148 -0
- package/dist/cjs/utils/Registry.js.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.js +358 -0
- package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
- package/dist/esm/CmpStr.js +662 -0
- package/dist/esm/CmpStr.js.map +1 -0
- package/dist/esm/CmpStrAsync.js +331 -0
- package/dist/esm/CmpStrAsync.js.map +1 -0
- package/dist/esm/index.js +7 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/metric/Cosine.js +99 -0
- package/dist/esm/metric/Cosine.js.map +1 -0
- package/dist/esm/metric/DamerauLevenshtein.js +108 -0
- package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/esm/metric/DiceSorensen.js +89 -0
- package/dist/esm/metric/DiceSorensen.js.map +1 -0
- package/dist/esm/metric/Hamming.js +77 -0
- package/dist/esm/metric/Hamming.js.map +1 -0
- package/dist/esm/metric/Jaccard.js +74 -0
- package/dist/esm/metric/Jaccard.js.map +1 -0
- package/dist/esm/metric/JaroWinkler.js +112 -0
- package/dist/esm/metric/JaroWinkler.js.map +1 -0
- package/dist/esm/metric/LCS.js +87 -0
- package/dist/esm/metric/LCS.js.map +1 -0
- package/dist/esm/metric/Levenshtein.js +92 -0
- package/dist/esm/metric/Levenshtein.js.map +1 -0
- package/dist/esm/metric/Metric.js +442 -0
- package/dist/esm/metric/Metric.js.map +1 -0
- package/dist/esm/metric/NeedlemanWunsch.js +93 -0
- package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/esm/metric/SmithWaterman.js +96 -0
- package/dist/esm/metric/SmithWaterman.js.map +1 -0
- package/dist/esm/metric/qGram.js +89 -0
- package/dist/esm/metric/qGram.js.map +1 -0
- package/dist/esm/phonetic/Cologne.js +114 -0
- package/dist/esm/phonetic/Cologne.js.map +1 -0
- package/dist/esm/phonetic/Metaphone.js +174 -0
- package/dist/esm/phonetic/Metaphone.js.map +1 -0
- package/dist/esm/phonetic/Phonetic.js +409 -0
- package/dist/esm/phonetic/Phonetic.js.map +1 -0
- package/dist/esm/phonetic/Soundex.js +137 -0
- package/dist/esm/phonetic/Soundex.js.map +1 -0
- package/dist/esm/utils/DeepMerge.js +139 -0
- package/dist/esm/utils/DeepMerge.js.map +1 -0
- package/dist/esm/utils/DiffChecker.js +498 -0
- package/dist/esm/utils/DiffChecker.js.map +1 -0
- package/dist/esm/utils/Filter.js +187 -0
- package/dist/esm/utils/Filter.js.map +1 -0
- package/dist/esm/utils/HashTable.js +173 -0
- package/dist/esm/utils/HashTable.js.map +1 -0
- package/dist/esm/utils/Normalizer.js +142 -0
- package/dist/esm/utils/Normalizer.js.map +1 -0
- package/dist/esm/utils/Pool.js +194 -0
- package/dist/esm/utils/Pool.js.map +1 -0
- package/dist/esm/utils/Profiler.js +227 -0
- package/dist/esm/utils/Profiler.js.map +1 -0
- package/dist/esm/utils/Registry.js +142 -0
- package/dist/esm/utils/Registry.js.map +1 -0
- package/dist/esm/utils/TextAnalyzer.js +356 -0
- package/dist/esm/utils/TextAnalyzer.js.map +1 -0
- package/dist/types/CmpStr.d.ts +472 -0
- package/dist/types/CmpStrAsync.d.ts +233 -0
- package/dist/types/index.d.ts +51 -0
- package/dist/types/metric/Cosine.d.ts +57 -0
- package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
- package/dist/types/metric/DiceSorensen.d.ts +57 -0
- package/dist/types/metric/Hamming.d.ts +49 -0
- package/dist/types/metric/Jaccard.d.ts +48 -0
- package/dist/types/metric/JaroWinkler.d.ts +50 -0
- package/dist/types/metric/LCS.d.ts +50 -0
- package/dist/types/metric/Levenshtein.d.ts +50 -0
- package/dist/types/metric/Metric.d.ts +261 -0
- package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
- package/dist/types/metric/SmithWaterman.d.ts +48 -0
- package/dist/types/metric/index.d.ts +41 -0
- package/dist/types/metric/qGram.d.ts +56 -0
- package/dist/types/phonetic/Cologne.d.ts +46 -0
- package/dist/types/phonetic/Metaphone.d.ts +50 -0
- package/dist/types/phonetic/Phonetic.d.ts +189 -0
- package/dist/types/phonetic/Soundex.d.ts +49 -0
- package/dist/types/phonetic/index.d.ts +30 -0
- package/dist/types/utils/DeepMerge.d.ts +70 -0
- package/dist/types/utils/DiffChecker.d.ts +137 -0
- package/dist/types/utils/Filter.d.ts +97 -0
- package/dist/types/utils/HashTable.d.ts +86 -0
- package/dist/types/utils/Normalizer.d.ts +76 -0
- package/dist/types/utils/Pool.d.ts +63 -0
- package/dist/types/utils/Profiler.d.ts +129 -0
- package/dist/types/utils/Registry.d.ts +57 -0
- package/dist/types/utils/TextAnalyzer.d.ts +199 -0
- package/dist/types/utils/Types.d.ts +313 -0
- package/package.json +62 -49
- package/src/CmpStr.d.ts +0 -70
- package/src/CmpStr.js +0 -912
- package/src/CmpStrAsync.d.ts +0 -19
- package/src/CmpStrAsync.js +0 -204
- package/src/algorithms/cosine.js +0 -86
- package/src/algorithms/damerau.js +0 -78
- package/src/algorithms/dice.js +0 -65
- package/src/algorithms/hamming.js +0 -44
- package/src/algorithms/jaccard.js +0 -34
- package/src/algorithms/jaroWinkler.js +0 -106
- package/src/algorithms/lcs.js +0 -58
- package/src/algorithms/levenshtein.js +0 -70
- package/src/algorithms/needlemanWunsch.js +0 -72
- package/src/algorithms/qGram.js +0 -63
- package/src/algorithms/smithWaterman.js +0 -78
- package/src/algorithms/soundex.js +0 -152
- package/src/index.d.ts +0 -3
- package/src/index.js +0 -47
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CmpStrAsync Asynchronous API
|
|
3
|
+
* src/CmpStrAsync.ts
|
|
4
|
+
*
|
|
5
|
+
* The CmpStrAsync class provides a fully asynchronous, Promise-based interface for
|
|
6
|
+
* advanced string comparison, similarity measurement, phonetic indexing, filtering
|
|
7
|
+
* and normalization. It extends the CmpStr class and overrides all relevant methods
|
|
8
|
+
* to support non-blocking, scalable, and I/O-friendly workloads.
|
|
9
|
+
*
|
|
10
|
+
* Features:
|
|
11
|
+
* - Asynchronous normalization, filtering, and metric computation
|
|
12
|
+
* - Async batch, pairwise, and single string comparison with detailed results
|
|
13
|
+
* - Async phonetic indexing and phonetic-aware search and comparison
|
|
14
|
+
* - Full compatibility with the synchronous CmpStr API
|
|
15
|
+
* - Designed for large-scale, high-performance, and server-side applications
|
|
16
|
+
*
|
|
17
|
+
* @module CmpStrAsync
|
|
18
|
+
* @author Paul Köhler (komed3)
|
|
19
|
+
* @license MIT
|
|
20
|
+
*/
|
|
21
|
+
import type { CmpStrOptions, CmpStrProcessors, CmpStrResult, NormalizeFlags, PhoneticOptions, MetricRaw, MetricInput, MetricMode, MetricResult, MetricResultSingle, MetricResultBatch } from './utils/Types';
|
|
22
|
+
import { CmpStr } from './CmpStr';
|
|
23
|
+
/**
|
|
24
|
+
* The CmpStrAsync class provides a fully asynchronous API for string comparison,
|
|
25
|
+
* phonetic indexing, filtering and normalization.
|
|
26
|
+
*
|
|
27
|
+
* @template R - The type of the metric result, defaults to MetricRaw
|
|
28
|
+
*/
|
|
29
|
+
export declare class CmpStrAsync<R = MetricRaw> extends CmpStr<R> {
|
|
30
|
+
/**
|
|
31
|
+
* --------------------------------------------------------------------------------
|
|
32
|
+
* Instanciate the CmpStrAsync class
|
|
33
|
+
* --------------------------------------------------------------------------------
|
|
34
|
+
*
|
|
35
|
+
* Methods to create a new CmpStrAsync instance with the given options.
|
|
36
|
+
* Using the static `create` method is recommended to ensure proper instantiation.
|
|
37
|
+
*/
|
|
38
|
+
/**
|
|
39
|
+
* Creates a new CmpStrAsync instance with the given options.
|
|
40
|
+
*
|
|
41
|
+
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
42
|
+
* @returns {CmpStrAsync<R>} - A new CmpStrAsync instance
|
|
43
|
+
*/
|
|
44
|
+
static create<R = MetricRaw>(opt?: string | CmpStrOptions): CmpStrAsync<R>;
|
|
45
|
+
/**
|
|
46
|
+
* Creates a new CmpStrAsync instance calliing the super constructor.
|
|
47
|
+
*
|
|
48
|
+
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
|
|
49
|
+
*/
|
|
50
|
+
protected constructor(opt?: string | CmpStrOptions);
|
|
51
|
+
/**
|
|
52
|
+
* ---------------------------------------------------------------------------------
|
|
53
|
+
* Protected asynchronously utility methods for internal use
|
|
54
|
+
* ---------------------------------------------------------------------------------
|
|
55
|
+
*
|
|
56
|
+
* These methods provide asynchronous normalization, filtering, and metric
|
|
57
|
+
* computation capabilities, allowing for non-blocking operations.
|
|
58
|
+
*/
|
|
59
|
+
/**
|
|
60
|
+
* Asynchronously normalizes the input string or array using the configured or provided flags.
|
|
61
|
+
*
|
|
62
|
+
* @param {MetricInput} input - The input string or array
|
|
63
|
+
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
64
|
+
* @returns {Promise<MetricInput>} - The normalized input
|
|
65
|
+
*/
|
|
66
|
+
protected normalizeAsync(input: MetricInput, flags?: NormalizeFlags): Promise<MetricInput>;
|
|
67
|
+
/**
|
|
68
|
+
* Asynchronously applies all active filters to the input string or array.
|
|
69
|
+
*
|
|
70
|
+
* @param {MetricInput} input - The input string or array
|
|
71
|
+
* @param {string} [hook='input'] - The filter hook
|
|
72
|
+
* @returns {Promise<MetricInput>} - The filtered string(s)
|
|
73
|
+
*/
|
|
74
|
+
protected filterAsync(input: MetricInput, hook: string): Promise<MetricInput>;
|
|
75
|
+
/**
|
|
76
|
+
* Asynchronously prepares the input by normalizing and filtering.
|
|
77
|
+
*
|
|
78
|
+
* @param {MetricInput} [input] - The input string or array
|
|
79
|
+
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
80
|
+
* @returns {Promise<MetricInput>} - The prepared input
|
|
81
|
+
*/
|
|
82
|
+
protected prepareAsync(input: MetricInput, opt?: CmpStrOptions): Promise<MetricInput>;
|
|
83
|
+
/**
|
|
84
|
+
* Asynchronously computes the phonetic index for the given input using
|
|
85
|
+
* the specified phonetic algorithm.
|
|
86
|
+
*
|
|
87
|
+
* @param {MetricInput} input - The input string or array
|
|
88
|
+
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
|
|
89
|
+
* @returns {Promise<MetricInput>} - The phonetic index for the given input
|
|
90
|
+
*/
|
|
91
|
+
protected indexAsync(input: MetricInput, { algo, opt }: {
|
|
92
|
+
algo: string;
|
|
93
|
+
opt?: PhoneticOptions;
|
|
94
|
+
}): Promise<MetricInput>;
|
|
95
|
+
/**
|
|
96
|
+
* Asynchronously computes the metric result for the given inputs, applying
|
|
97
|
+
* normalization and filtering as configured.
|
|
98
|
+
*
|
|
99
|
+
* @template T - The type of the metric result
|
|
100
|
+
* @param {MetricInput} a - The first input string or array
|
|
101
|
+
* @param {MetricInput} b - The second input string or array
|
|
102
|
+
* @param {CmpStrOptions} [opt] - Optional options to use
|
|
103
|
+
* @param {MetricMode} [mode='single'] - The metric mode to use
|
|
104
|
+
* @param {boolean} [raw=false] - Whether to return raw results
|
|
105
|
+
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
|
|
106
|
+
* @returns {Promise<T>} - The computed metric result
|
|
107
|
+
*/
|
|
108
|
+
protected computeAsync<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): Promise<T>;
|
|
109
|
+
/**
|
|
110
|
+
* ---------------------------------------------------------------------------------
|
|
111
|
+
* Public asynchronously core methods for string comparison
|
|
112
|
+
* ---------------------------------------------------------------------------------
|
|
113
|
+
*
|
|
114
|
+
* These methods provide the asynchronous core functionality for string comparison,
|
|
115
|
+
* phonetic indexing and text search, allowing for non-blocking operations.
|
|
116
|
+
*/
|
|
117
|
+
/**
|
|
118
|
+
* Asynchronously performs a single metric comparison.
|
|
119
|
+
*
|
|
120
|
+
* @template T - The type of the metric result
|
|
121
|
+
* @param {string} a - The source string
|
|
122
|
+
* @param {string} b - The target string
|
|
123
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
124
|
+
* @returns {Promise<T>} - The metric result
|
|
125
|
+
*/
|
|
126
|
+
testAsync<T extends CmpStrResult | MetricResultSingle<R>>(a: string, b: string, opt?: CmpStrOptions): Promise<T>;
|
|
127
|
+
/**
|
|
128
|
+
* Asynchronously performs a single metric comparison returning the numeric score.
|
|
129
|
+
*
|
|
130
|
+
* @param {string} a - The source string
|
|
131
|
+
* @param {string} b - The target string
|
|
132
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
133
|
+
* @returns {Promise<number>} - The similarity score (0..1)
|
|
134
|
+
*/
|
|
135
|
+
compareAsync(a: string, b: string, opt?: CmpStrOptions): Promise<number>;
|
|
136
|
+
/**
|
|
137
|
+
* Asynchronously performs a batch metric comparison between source and target
|
|
138
|
+
* strings or array of strings.
|
|
139
|
+
*
|
|
140
|
+
* @template T - The type of the metric result
|
|
141
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
142
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
143
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
144
|
+
* @returns {Promise<T>} - The batch metric results
|
|
145
|
+
*/
|
|
146
|
+
batchTestAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): Promise<T>;
|
|
147
|
+
/**
|
|
148
|
+
* Asynchronously performs a batch metric comparison and returns results sorted by score.
|
|
149
|
+
*
|
|
150
|
+
* @template T - The type of the metric result
|
|
151
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
152
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
153
|
+
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
|
|
154
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
155
|
+
* @returns {Promise<T>} - The sorted batch results
|
|
156
|
+
*/
|
|
157
|
+
batchSortedAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, dir?: 'desc' | 'asc', opt?: CmpStrOptions): Promise<T>;
|
|
158
|
+
/**
|
|
159
|
+
* Asynchronously performs a pairwise metric comparison between source and target
|
|
160
|
+
* strings or array of strings.
|
|
161
|
+
*
|
|
162
|
+
* @template T - The type of the metric result
|
|
163
|
+
* Input arrays needs of the same length to perform pairwise comparison,
|
|
164
|
+
* otherwise the method will throw an error.
|
|
165
|
+
*
|
|
166
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
167
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
168
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
169
|
+
* @returns {Promise<T>} - The pairwise metric results
|
|
170
|
+
*/
|
|
171
|
+
pairsAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): Promise<T>;
|
|
172
|
+
/**
|
|
173
|
+
* Asynchronously performs a batch comparison and returns only results above the threshold.
|
|
174
|
+
*
|
|
175
|
+
* @template T - The type of the metric result
|
|
176
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
177
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
178
|
+
* @param {number} threshold - The similarity threshold (0..1)
|
|
179
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
180
|
+
* @returns {Promise<T>} - The filtered batch results
|
|
181
|
+
*/
|
|
182
|
+
matchAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, threshold: number, opt?: CmpStrOptions): Promise<T>;
|
|
183
|
+
/**
|
|
184
|
+
* Asynchronously returns the n closest matches from a batch comparison.
|
|
185
|
+
*
|
|
186
|
+
* @template T - The type of the metric result
|
|
187
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
188
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
189
|
+
* @param {number} [n=1] - Number of closest matches
|
|
190
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
191
|
+
* @returns {Promise<T>} - The closest matches
|
|
192
|
+
*/
|
|
193
|
+
closestAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): Promise<T>;
|
|
194
|
+
/**
|
|
195
|
+
* Asynchronously returns the n furthest matches from a batch comparison.
|
|
196
|
+
*
|
|
197
|
+
* @template T - The type of the metric result
|
|
198
|
+
* @param {MetricInput} a - The source string or array of strings
|
|
199
|
+
* @param {MetricInput} b - The target string or array of strings
|
|
200
|
+
* @param {number} [n=1] - Number of furthest matches
|
|
201
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
202
|
+
* @returns {Promise<T>} - The furthest matches
|
|
203
|
+
*/
|
|
204
|
+
furthestAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): Promise<T>;
|
|
205
|
+
/**
|
|
206
|
+
* Asynchronously performs a normalized and filtered substring search.
|
|
207
|
+
*
|
|
208
|
+
* @param {string} needle - The search string
|
|
209
|
+
* @param {string[]} haystack - The array to search in
|
|
210
|
+
* @param {NormalizeFlags} [flags] - Normalization flags
|
|
211
|
+
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
|
|
212
|
+
* @returns {Promise<string[]>} - Array of matching entries
|
|
213
|
+
*/
|
|
214
|
+
searchAsync(needle: string, haystack: string[], flags?: NormalizeFlags, processors?: CmpStrProcessors): Promise<string[]>;
|
|
215
|
+
/**
|
|
216
|
+
* Asynchronously computes a similarity matrix for the given input array.
|
|
217
|
+
*
|
|
218
|
+
* @param {string[]} input - The input array
|
|
219
|
+
* @param {CmpStrOptions} [opt] - Optional options
|
|
220
|
+
* @returns {Promise<number[][]>} - The similarity matrix
|
|
221
|
+
*/
|
|
222
|
+
matrixAsync(input: string[], opt?: CmpStrOptions): Promise<number[][]>;
|
|
223
|
+
/**
|
|
224
|
+
* Asynchronously computes the phonetic index for a string using the
|
|
225
|
+
* configured or given algorithm.
|
|
226
|
+
*
|
|
227
|
+
* @param {string} [input] - The input string
|
|
228
|
+
* @param {string} [algo] - The phonetic algorithm to use
|
|
229
|
+
* @param {PhoneticOptions} [opt] - Optional phonetic options
|
|
230
|
+
* @returns {Promise<string>} - The phonetic index as a string
|
|
231
|
+
*/
|
|
232
|
+
phoneticIndexAsync(input: string, algo?: string, opt?: PhoneticOptions): Promise<string>;
|
|
233
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CmpStr Main Entry Point
|
|
3
|
+
* src/index.ts
|
|
4
|
+
*
|
|
5
|
+
* CmpStr is a comprehensive, extensible, and highly abstracted TypeScript library for
|
|
6
|
+
* advanced string comparison, similarity measurement, phonetic indexing, normalization,
|
|
7
|
+
* filtering, and text analysis. It is designed for both high-level application development
|
|
8
|
+
* and research, offering a unified API for single, batch, and pairwise operations.
|
|
9
|
+
*
|
|
10
|
+
* Version: 3.0.0
|
|
11
|
+
* Author: Paul Köhler (komed3)
|
|
12
|
+
* License: MIT
|
|
13
|
+
*
|
|
14
|
+
* Core Features:
|
|
15
|
+
* --------------
|
|
16
|
+
*
|
|
17
|
+
* - Unified interface for string similarity, distance, and matching
|
|
18
|
+
* - Pluggable metric system (Levenshtein, Jaro-Winkler, Cosine, Dice, Hamming, LCS, etc.)
|
|
19
|
+
* - Phonetic algorithms (Cologne, Soundex, Metaphone) with mapping registry
|
|
20
|
+
* - Flexible normalization and filtering pipeline for all inputs
|
|
21
|
+
* - Batch, pairwise, and single comparison with detailed, type-safe results
|
|
22
|
+
* - Phonetic-aware search, indexing, and comparison
|
|
23
|
+
* - Readability and text analysis utilities (syllables, word stats, etc.)
|
|
24
|
+
* - Unified diff and difference reporting (line/word, ASCII/CLI)
|
|
25
|
+
* - Full TypeScript type safety, extensibility, and profiling support
|
|
26
|
+
* - Modular architecture for easy integration and extension
|
|
27
|
+
*
|
|
28
|
+
* Overview:
|
|
29
|
+
* ---------
|
|
30
|
+
*
|
|
31
|
+
* CmpStr provides a single entry point for all string comparison and analysis tasks.
|
|
32
|
+
* The main class, `CmpStr`, exposes a rich API for comparing strings, arrays, or
|
|
33
|
+
* batches, with full support for normalization, filtering, and phonetic processing.
|
|
34
|
+
* All metric and phonetic algorithms are managed via registries, allowing for
|
|
35
|
+
* dynamic extension and customization. The package also includes utilities for
|
|
36
|
+
* diffing, text analysis, and profiling, making it suitable for applications such as
|
|
37
|
+
* search engines, data deduplication, fuzzy matching, linguistics, and more.
|
|
38
|
+
*
|
|
39
|
+
* For asynchronous workloads, use `CmpStrAsync`, which provides the same API with
|
|
40
|
+
* Promise-based, non-blocking methods for large-scale or I/O-bound operations.
|
|
41
|
+
*
|
|
42
|
+
* @version 3.0.0
|
|
43
|
+
* @author Paul Köhler (komed3)
|
|
44
|
+
* @license MIT
|
|
45
|
+
*/
|
|
46
|
+
export * from './utils/Types';
|
|
47
|
+
export { CmpStr } from './CmpStr';
|
|
48
|
+
export { CmpStrAsync } from './CmpStrAsync';
|
|
49
|
+
export { DiffChecker } from './utils/DiffChecker';
|
|
50
|
+
export { Normalizer } from './utils/Normalizer';
|
|
51
|
+
export { TextAnalyzer } from './utils/TextAnalyzer';
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cosine Similarity
|
|
3
|
+
* src/metric/Cosine.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Cosine_similarity
|
|
6
|
+
*
|
|
7
|
+
* Cosine similarity is a metric used to measure how similar two vectors are, regardless
|
|
8
|
+
* of their magnitude. In text analysis, it is commonly used to compare documents or
|
|
9
|
+
* strings by representing them as term frequency vectors and computing the cosine of
|
|
10
|
+
* the angle between these vectors.
|
|
11
|
+
*
|
|
12
|
+
* The result is a value between 0 and 1, where 1 means the vectors are identical and
|
|
13
|
+
* 0 means they are orthogonal (no similarity).
|
|
14
|
+
*
|
|
15
|
+
* @module Metric/CosineSimilarity
|
|
16
|
+
* @author Paul Köhler (komed3)
|
|
17
|
+
* @license MIT
|
|
18
|
+
*/
|
|
19
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
20
|
+
import { Metric } from './Metric';
|
|
21
|
+
export interface CosineRaw {
|
|
22
|
+
dotProduct: number;
|
|
23
|
+
magnitudeA: number;
|
|
24
|
+
magnitudeB: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* CosineSimilarity class extends the Metric class to implement the Cosine similarity algorithm.
|
|
28
|
+
*/
|
|
29
|
+
export declare class CosineSimilarity extends Metric<CosineRaw> {
|
|
30
|
+
/**
|
|
31
|
+
* Constructor for the CosineSimilarity class.
|
|
32
|
+
*
|
|
33
|
+
* Initializes the Cosine similarity metric with two input strings or
|
|
34
|
+
* arrays of strings and optional options.
|
|
35
|
+
*
|
|
36
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
37
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
38
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
39
|
+
*/
|
|
40
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
41
|
+
/**
|
|
42
|
+
* Calculates the term frequency vector for a given string.
|
|
43
|
+
*
|
|
44
|
+
* @param {string} str - The input string
|
|
45
|
+
* @param {string} delimiter - The delimiter to split terms
|
|
46
|
+
* @return {Map<string, number>} - Term frequency object
|
|
47
|
+
*/
|
|
48
|
+
private _termFreq;
|
|
49
|
+
/**
|
|
50
|
+
* Calculates the Cosine similarity between two strings.
|
|
51
|
+
*
|
|
52
|
+
* @param {string} a - First string
|
|
53
|
+
* @param {string} b - Second string
|
|
54
|
+
* @return {MetricCompute<CosineRaw>} - Object containing the similarity result and raw values
|
|
55
|
+
*/
|
|
56
|
+
protected compute(a: string, b: string): MetricCompute<CosineRaw>;
|
|
57
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Damerau-Levenshtein Distance
|
|
3
|
+
* src/metric/DamerauLevenshtein.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
|
6
|
+
*
|
|
7
|
+
* The Damerau-Levenshtein distance extends the classical Levenshtein algorithm by
|
|
8
|
+
* including transpositions (swapping of two adjacent characters) as a single edit
|
|
9
|
+
* operation, in addition to insertions, deletions, and substitutions.
|
|
10
|
+
*
|
|
11
|
+
* This metric is particularly useful for detecting and correcting common
|
|
12
|
+
* typographical errors.
|
|
13
|
+
*
|
|
14
|
+
* @module Metric/DamerauLevenshtein
|
|
15
|
+
* @author Paul Köhler (komed3)
|
|
16
|
+
* @license MIT
|
|
17
|
+
*/
|
|
18
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
19
|
+
import { Metric } from './Metric';
|
|
20
|
+
export interface DamerauRaw {
|
|
21
|
+
dist: number;
|
|
22
|
+
maxLen: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* DamerauLevenshteinDistance class extends the Metric class to implement the Damerau-Levenshtein algorithm.
|
|
26
|
+
*/
|
|
27
|
+
export declare class DamerauLevenshteinDistance extends Metric<DamerauRaw> {
|
|
28
|
+
/**
|
|
29
|
+
* Constructor for the DamerauLevenshteinDistance class.
|
|
30
|
+
*
|
|
31
|
+
* Initializes the Damerau-Levenshtein metric with two input strings or
|
|
32
|
+
* arrays of strings and optional options.
|
|
33
|
+
*
|
|
34
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
35
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
36
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
37
|
+
*/
|
|
38
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
39
|
+
/**
|
|
40
|
+
* Calculates the normalized Damerau-Levenshtein distance between two strings.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} a - First string (always the shorter string for memory efficiency)
|
|
43
|
+
* @param {string} b - Second string
|
|
44
|
+
* @param {number} m - Length of the first string (a)
|
|
45
|
+
* @param {number} n - Length of the second string (b)
|
|
46
|
+
* @param {number} maxLen - Maximum length of the strings
|
|
47
|
+
* @return {MetricCompute<DamerauRaw>} - Object containing the similarity result and raw distance
|
|
48
|
+
*/
|
|
49
|
+
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<DamerauRaw>;
|
|
50
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dice-Sørensen Coefficient
|
|
3
|
+
* src/metric/DiceSorensen.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient
|
|
6
|
+
*
|
|
7
|
+
* This module implements the Dice-Sørensen coefficient, a statistic used to gauge
|
|
8
|
+
* the similarity of two samples. It is commonly used in natural language processing
|
|
9
|
+
* and information retrieval to compare the similarity between two sets of data,
|
|
10
|
+
* such as text documents. The coefficient is defined as twice the size of the
|
|
11
|
+
* intersection divided by the sum of the sizes of the two sets.
|
|
12
|
+
*
|
|
13
|
+
* The implementation includes methods to compute bigrams from strings and calculate
|
|
14
|
+
* the coefficient based on these bigrams. It handles edge cases, such as empty
|
|
15
|
+
* strings and identical strings, to ensure accurate results.
|
|
16
|
+
*
|
|
17
|
+
* @module Metric/DiceSorensenCoefficient
|
|
18
|
+
* @author Paul Köhler (komed3)
|
|
19
|
+
* @license MIT
|
|
20
|
+
*/
|
|
21
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
22
|
+
import { Metric } from './Metric';
|
|
23
|
+
export interface DiceRaw {
|
|
24
|
+
intersection: number;
|
|
25
|
+
size: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* DiceSorensenCoefficient class extends the Metric class to implement the Dice-Sørensen coefficient.
|
|
29
|
+
*/
|
|
30
|
+
export declare class DiceSorensenCoefficient extends Metric<DiceRaw> {
|
|
31
|
+
/**
|
|
32
|
+
* Constructor for the DiceSorensen class.
|
|
33
|
+
*
|
|
34
|
+
* Initializes the DiceSorensen metric with two input strings or
|
|
35
|
+
* arrays of strings and optional options.
|
|
36
|
+
*
|
|
37
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
38
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
39
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
40
|
+
*/
|
|
41
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
42
|
+
/**
|
|
43
|
+
* Computes the bigrams of a given string.
|
|
44
|
+
*
|
|
45
|
+
* @param {string} str - The input string
|
|
46
|
+
* @return {Set<string>} - A set of bigrams (two-character sequences) from the string
|
|
47
|
+
*/
|
|
48
|
+
private _bigrams;
|
|
49
|
+
/**
|
|
50
|
+
* Calculates the Dice-Sørensen coefficient between two strings.
|
|
51
|
+
*
|
|
52
|
+
* @param {string} a - First string
|
|
53
|
+
* @param {string} b - Second string
|
|
54
|
+
* @return {MetricCompute<DiceRaw>} - Object containing the similarity result and raw distance
|
|
55
|
+
*/
|
|
56
|
+
protected compute(a: string, b: string): MetricCompute<DiceRaw>;
|
|
57
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hamming Distance
|
|
3
|
+
* src/metric/Hamming.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Hamming_distance
|
|
6
|
+
*
|
|
7
|
+
* The Hamming distance is a metric for comparing two strings of equal length. It
|
|
8
|
+
* measures the number of positions at which the corresponding symbols are different.
|
|
9
|
+
*
|
|
10
|
+
* This implementation allows for optional padding of the shorter string to equalize
|
|
11
|
+
* lengths, otherwise it throws an error if the strings are of unequal length.
|
|
12
|
+
*
|
|
13
|
+
* @module Metric/HammingDistance
|
|
14
|
+
* @author Paul Köhler (komed3)
|
|
15
|
+
* @license MIT
|
|
16
|
+
*/
|
|
17
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
18
|
+
import { Metric } from './Metric';
|
|
19
|
+
export interface HammingRaw {
|
|
20
|
+
dist: number;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* HammingDistance class extends the Metric class to implement the Hamming distance.
|
|
24
|
+
*/
|
|
25
|
+
export declare class HammingDistance extends Metric<HammingRaw> {
|
|
26
|
+
/**
|
|
27
|
+
* Constructor for the Hamming class.
|
|
28
|
+
*
|
|
29
|
+
* Initializes the Hamming distance metric with two input strings or
|
|
30
|
+
* arrays of strings and optional options.
|
|
31
|
+
*
|
|
32
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
33
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
34
|
+
* @param {MetricOptions} opt - Options for the metric computation
|
|
35
|
+
*/
|
|
36
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
37
|
+
/**
|
|
38
|
+
* Calculates the Hamming distance between two strings.
|
|
39
|
+
*
|
|
40
|
+
* @param {string} a - First string
|
|
41
|
+
* @param {string} b - Second string
|
|
42
|
+
* @param {number} m - Length of the first string
|
|
43
|
+
* @param {number} n - Length of the second string
|
|
44
|
+
* @param {number} maxLen - Maximum length of the strings
|
|
45
|
+
* @return {MetricCompute<HammingRaw>} - Object containing the similarity result and raw distance
|
|
46
|
+
* @throws {Error} - If strings are of unequal length and padding is not specified
|
|
47
|
+
*/
|
|
48
|
+
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<HammingRaw>;
|
|
49
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Jaccard Index
|
|
3
|
+
* src/metric/Jaccard.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Jaccard_index
|
|
6
|
+
*
|
|
7
|
+
* The Jaccard Index (or Jaccard similarity coefficient) measures the similarity
|
|
8
|
+
* between two sets by dividing the size of their intersection by the size of
|
|
9
|
+
* their union. In string similarity, it is often used to compare sets of characters,
|
|
10
|
+
* tokens, or n-grams. The result is a value between 0 and 1, where 1 means the
|
|
11
|
+
* sets are identical and 0 means they have no elements in common.
|
|
12
|
+
*
|
|
13
|
+
* @module Metric/JaccardIndex
|
|
14
|
+
* @author Paul Köhler (komed3)
|
|
15
|
+
* @license MIT
|
|
16
|
+
*/
|
|
17
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
18
|
+
import { Metric } from './Metric';
|
|
19
|
+
export interface JaccardRaw {
|
|
20
|
+
intersection: number;
|
|
21
|
+
union: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm.
|
|
25
|
+
*/
|
|
26
|
+
export declare class JaccardIndex extends Metric<JaccardRaw> {
|
|
27
|
+
/**
|
|
28
|
+
* Constructor for the JaccardIndex class.
|
|
29
|
+
*
|
|
30
|
+
* Initializes the Jaccard Index metric with two input strings or
|
|
31
|
+
* arrays of strings and optional options.
|
|
32
|
+
*
|
|
33
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
34
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
35
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
36
|
+
*/
|
|
37
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
38
|
+
/**
|
|
39
|
+
* Calculates the Jaccard Index between two strings.
|
|
40
|
+
*
|
|
41
|
+
* @param {string} a - First string
|
|
42
|
+
* @param {string} b - Second string
|
|
43
|
+
* @param {number} m - Length of the first string
|
|
44
|
+
* @param {number} n - Length of the second string
|
|
45
|
+
* @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values
|
|
46
|
+
*/
|
|
47
|
+
protected compute(a: string, b: string, m: number, n: number): MetricCompute<JaccardRaw>;
|
|
48
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Jaro-Winkler Distance
|
|
3
|
+
* src/metric/JaroWinkler.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
|
|
6
|
+
*
|
|
7
|
+
* The Jaro-Winkler distance is a string similarity metric that gives more weight
|
|
8
|
+
* to matching characters at the start of the strings. It is especially effective
|
|
9
|
+
* for short strings and typographical errors, and is widely used in record linkage
|
|
10
|
+
* and duplicate detection.
|
|
11
|
+
*
|
|
12
|
+
* @module Metric/JaroWinkler
|
|
13
|
+
* @author Paul Köhler (komed3)
|
|
14
|
+
* @license MIT
|
|
15
|
+
*/
|
|
16
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
17
|
+
import { Metric } from './Metric';
|
|
18
|
+
export interface JaroWinklerRaw {
|
|
19
|
+
matchWindow: number;
|
|
20
|
+
matches: number;
|
|
21
|
+
transpos: number;
|
|
22
|
+
jaro: number;
|
|
23
|
+
prefix: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* JaroWinklerDistance class extends the Metric class to implement the Jaro-Winkler algorithm.
|
|
27
|
+
*/
|
|
28
|
+
export declare class JaroWinklerDistance extends Metric<JaroWinklerRaw> {
|
|
29
|
+
/**
|
|
30
|
+
* Constructor for the JaroWinklerDistance class.
|
|
31
|
+
*
|
|
32
|
+
* Initializes the Jaro-Winkler metric with two input strings or
|
|
33
|
+
* arrays of strings and optional options.
|
|
34
|
+
*
|
|
35
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
36
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
37
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
38
|
+
*/
|
|
39
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
40
|
+
/**
|
|
41
|
+
* Calculates the Jaro-Winkler similarity between two strings.
|
|
42
|
+
*
|
|
43
|
+
* @param {string} a - First string
|
|
44
|
+
* @param {string} b - Second string
|
|
45
|
+
* @param {number} m - Length of the first string
|
|
46
|
+
* @param {number} n - Length of the second string
|
|
47
|
+
* @return {MetricCompute<JaroWinklerRaw>} - Object containing the similarity result and raw values
|
|
48
|
+
*/
|
|
49
|
+
protected compute(a: string, b: string, m: number, n: number): MetricCompute<JaroWinklerRaw>;
|
|
50
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Longest Common Subsequence (LCS)
|
|
3
|
+
* src/metric/LCS.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Longest_common_subsequence
|
|
6
|
+
*
|
|
7
|
+
* The Longest Common Subsequence (LCS) metric measures the length of the longest
|
|
8
|
+
* subsequence common to both strings. Unlike substrings, the characters of a
|
|
9
|
+
* subsequence do not need to be contiguous, but must appear in the same order.
|
|
10
|
+
*
|
|
11
|
+
* The LCS is widely used in diff tools, bioinformatics, and approximate string
|
|
12
|
+
* matching.
|
|
13
|
+
*
|
|
14
|
+
* @module Metric/LCS
|
|
15
|
+
* @author Paul Köhler (komed3)
|
|
16
|
+
* @license MIT
|
|
17
|
+
*/
|
|
18
|
+
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
|
|
19
|
+
import { Metric } from './Metric';
|
|
20
|
+
export interface LCSRaw {
|
|
21
|
+
lcs: number;
|
|
22
|
+
maxLen: number;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* LCSMetric class extends the Metric class to implement the Longest Common Subsequence algorithm.
|
|
26
|
+
*/
|
|
27
|
+
export declare class LCSMetric extends Metric<LCSRaw> {
|
|
28
|
+
/**
|
|
29
|
+
* Constructor for the LCSMetric class.
|
|
30
|
+
*
|
|
31
|
+
* Initializes the LCS metric with two input strings or
|
|
32
|
+
* arrays of strings and optional options.
|
|
33
|
+
*
|
|
34
|
+
* @param {MetricInput} a - First input string or array of strings
|
|
35
|
+
* @param {MetricInput} b - Second input string or array of strings
|
|
36
|
+
* @param {MetricOptions} [opt] - Options for the metric computation
|
|
37
|
+
*/
|
|
38
|
+
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
|
|
39
|
+
/**
|
|
40
|
+
* Calculates the normalized LCS similarity between two strings.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} a - First string
|
|
43
|
+
* @param {string} b - Second string
|
|
44
|
+
* @param {number} m - Length of the first string
|
|
45
|
+
* @param {number} n - Length of the second string
|
|
46
|
+
* @param {number} maxLen - Maximum length of the strings
|
|
47
|
+
* @return {MetricCompute<LCSRaw>} - Object containing the similarity result and raw LCS length
|
|
48
|
+
*/
|
|
49
|
+
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<LCSRaw>;
|
|
50
|
+
}
|