cmpstr 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -499
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -912
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -204
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Levenshtein Distance
3
+ * src/metric/Levenshtein.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Levenshtein_distance
6
+ *
7
+ * The Levenshtein distance is a classic metric for measuring the minimum number
8
+ * of single-character edits (insertions, deletions, or substitutions) required
9
+ * to change one string into another.
10
+ *
11
+ * It is widely used in approximate string matching, spell checking, and natural
12
+ * language processing.
13
+ *
14
+ * @module Metric/LevenshteinDistance
15
+ * @author Paul Köhler (komed3)
16
+ * @license MIT
17
+ */
18
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
19
+ import { Metric } from './Metric';
20
+ export interface LevenshteinRaw {
21
+ dist: number;
22
+ maxLen: number;
23
+ }
24
+ /**
25
+ * LevenshteinDistance class extends the Metric class to implement the Levenshtein distance algorithm.
26
+ */
27
+ export declare class LevenshteinDistance extends Metric<LevenshteinRaw> {
28
+ /**
29
+ * Constructor for the Levenshtein class.
30
+ *
31
+ * Initializes the Levenshtein metric with two input strings
32
+ * or arrays of strings and optional options.
33
+ *
34
+ * @param {MetricInput} a - First input string or array of strings
35
+ * @param {MetricInput} b - Second input string or array of strings
36
+ * @param {MetricOptions} [opt] - Options for the metric computation
37
+ */
38
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
39
+ /**
40
+ * Calculates the Levenshtein distance between two strings.
41
+ *
42
+ * @param {string} a - First string
43
+ * @param {string} b - Second string
44
+ * @param {number} m - Length of the first string
45
+ * @param {number} n - Length of the second string
46
+ * @param {number} maxLen - Maximum length of the strings
47
+ * @return {MetricCompute<LevenshteinRaw>} - Object containing the similarity result and raw distance
48
+ */
49
+ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<LevenshteinRaw>;
50
+ }
@@ -0,0 +1,261 @@
1
+ /**
2
+ * Abstract Metric
3
+ * src/metric/Metric.ts
4
+ *
5
+ * This module defines an abstract class for string metrics, providing a framework for
6
+ * computing various string similarity metrics. It includes methods for running metrics
7
+ * in different modes (single, batch, pairwise) synchronous or asynchronous and caching
8
+ * results to optimize performance. The class is designed to be extended by specific
9
+ * metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
10
+ *
11
+ * It provides:
12
+ * - A base class for string metrics with common functionality
13
+ * - Methods for running metrics in different modes
14
+ * - Pre-computation for trivial cases to optimize performance
15
+ * - Caching of metric computations to avoid redundant calculations
16
+ * - Support for symmetrical metrics (same result for inputs in any order)
17
+ * - Performance tracking capabilities (Profiler)
18
+ * - Asynchronous execution support for metrics
19
+ *
20
+ * This class is intended to be extended by specific metric implementations that will
21
+ * implement the `compute` method to define the specific metric computation logic.
22
+ *
23
+ * @module Metric
24
+ * @author Paul Köhler (komed3)
25
+ * @license MIT
26
+ */
27
+ import type { MetricMode, MetricInput, MetricOptions, MetricCompute, MetricRaw, MetricResult, RegistryService } from '../utils/Types';
28
+ /**
29
+ * Abstract class representing a generic string metric.
30
+ *
31
+ * @abstract
32
+ * @template R - The type of the raw result, defaulting to `MetricRaw`.
33
+ */
34
+ export declare abstract class Metric<R = MetricRaw> {
35
+ private static cache;
36
+ private readonly metric;
37
+ private readonly a;
38
+ private readonly b;
39
+ private origA;
40
+ private origB;
41
+ protected readonly options: MetricOptions;
42
+ protected readonly symmetric: boolean;
43
+ /**
44
+ * Result of the metric computation, which can be a single result or an array of results.
45
+ * This will be populated after running the metric.
46
+ */
47
+ private results;
48
+ /**
49
+ * Static method to clear the cache of metric computations.
50
+ */
51
+ static clear(): void;
52
+ /**
53
+ * Swaps two strings and their lengths if the first is longer than the second.
54
+ *
55
+ * @param {string} a - First string
56
+ * @param {string} b - Second string
57
+ * @param {number} m - Length of the first string
58
+ * @param {number} n - Length of the second string
59
+ * @returns {[string, string, number, number]} - Swapped strings and lengths
60
+ */
61
+ protected static swap(a: string, b: string, m: number, n: number): [
62
+ string,
63
+ string,
64
+ number,
65
+ number
66
+ ];
67
+ /**
68
+ * Clamps the similarity result between 0 and 1.
69
+ *
70
+ * @param {number} res - The input similarity to clamp
71
+ * @returns {number} - The clamped similarity (0 to 1)
72
+ */
73
+ protected static clamp(res: number): number;
74
+ /**
75
+ * Constructor for the Metric class.
76
+ * Initializes the metric with two inputs (strings or arrays of strings) and options.
77
+ *
78
+ * @param {string} metric - The name of the metric (e.g. 'levenshtein')
79
+ * @param {MetricInput} a - First input string or array of strings
80
+ * @param {MetricInput} b - Second input string or array of strings
81
+ * @param {MetricOptions} [opt] - Options for the metric computation
82
+ * @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
83
+ * @throws {Error} - If inputs `a` or `b` are empty
84
+ */
85
+ constructor(metric: string, a: MetricInput, b: MetricInput, opt?: MetricOptions, symmetric?: boolean);
86
+ /**
87
+ * Pre-compute the metric for two strings.
88
+ * This method is called before the actual computation to handle trivial cases.
89
+ *
90
+ * @param {string} a - First string
91
+ * @param {string} b - Second string
92
+ * @param {number} m - Length of the first string
93
+ * @param {number} n - Length of the second string
94
+ * @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
95
+ */
96
+ protected preCompute(a: string, b: string, m: number, n: number): MetricCompute<R> | undefined;
97
+ /**
98
+ * Abstract method to be implemented by subclasses to perform the metric computation.
99
+ * This method should contain the logic for computing the metric between two strings.
100
+ *
101
+ * @param {string} a - First string
102
+ * @param {string} b - Second string
103
+ * @param {number} m - Length of the first string
104
+ * @param {number} n - Length of the second string
105
+ * @param {number} maxLen - Maximum length of the strings
106
+ * @returns {MetricCompute<R>} - The result of the metric computation
107
+ * @throws {Error} - If not overridden in a subclass
108
+ */
109
+ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<R>;
110
+ /**
111
+ * Run the metric computation for single inputs (two strings).
112
+ * Applies preCompute for trivial cases before cache lookup and computation.
113
+ *
114
+ * If the profiler is active, it will measure time and memory usage.
115
+ *
116
+ * @param {number} i - Pointer to the first string
117
+ * @param {number} j - Pointer to the second string
118
+ * @returns {MetricResultSingle<R>} - The result of the metric computation
119
+ */
120
+ private runSingle;
121
+ /**
122
+ * Run the metric computation for single inputs (two strings) asynchronously.
123
+ *
124
+ * @param {number} i - Pointer to the first string
125
+ * @param {number} j - Pointer to the second string
126
+ * @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
127
+ */
128
+ private runSingleAsync;
129
+ /**
130
+ * Run the metric computation for batch inputs (arrays of strings).
131
+ *
132
+ * It iterates through each string in the first array and computes the metric
133
+ * against each string in the second array.
134
+ */
135
+ private runBatch;
136
+ /**
137
+ * Run the metric computation for batch inputs (arrays of strings) asynchronously.
138
+ */
139
+ private runBatchAsync;
140
+ /**
141
+ * Run the metric computation for pairwise inputs (A[i] vs B[i]).
142
+ *
143
+ * This method assumes that both `a` and `b` are arrays of equal length
144
+ * and computes the metric only for corresponding index pairs.
145
+ */
146
+ private runPairwise;
147
+ /**
148
+ * Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
149
+ */
150
+ private runPairwiseAsync;
151
+ /**
152
+ * Set the original inputs to which the results of the metric calculation will refer.
153
+ *
154
+ * @param {MetricInput} [a] - original input(s) for a
155
+ * @param {MetricInput} [b] - original input(s) for b
156
+ */
157
+ setOriginal(a?: MetricInput, b?: MetricInput): this;
158
+ /**
159
+ * Check if the inputs are in batch mode.
160
+ *
161
+ * This method checks if either `a` or `b` contains more than one string,
162
+ * indicating that the metric is being run in batch mode.
163
+ *
164
+ * @returns {boolean} - True if either input is an array with more than one element
165
+ */
166
+ isBatch(): boolean;
167
+ /**
168
+ * Check if the inputs are in single mode.
169
+ *
170
+ * This method checks if both `a` and `b` are single strings (not arrays),
171
+ * indicating that the metric is being run on a single pair of strings.
172
+ *
173
+ * @returns {boolean} - True if both inputs are single strings
174
+ */
175
+ isSingle(): boolean;
176
+ /**
177
+ * Check if the inputs are in pairwise mode.
178
+ *
179
+ * This method checks if both `a` and `b` are arrays of the same length,
180
+ * indicating that the metric is being run on corresponding pairs of strings.
181
+ *
182
+ * @returns {boolean} - True if both inputs are arrays of equal length
183
+ * @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
184
+ * @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
185
+ */
186
+ isPairwise(safe?: boolean): boolean;
187
+ /**
188
+ * Check if the metric is symmetrical.
189
+ *
190
+ * This method returns whether the metric is symmetric, meaning it produces the same
191
+ * result regardless of the order of inputs (e.g., Levenshtein distance).
192
+ *
193
+ * @returns {boolean} - True if the metric is symmetric
194
+ */
195
+ isSymmetrical(): boolean;
196
+ /**
197
+ * Determine which mode to run the metric in.
198
+ *
199
+ * This method checks the provided mode or defaults to the mode specified in options.
200
+ * If no mode is specified, it defaults to 'default'.
201
+ *
202
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
203
+ * @returns {MetricMode} - The determined mode
204
+ */
205
+ whichMode(mode?: MetricMode): MetricMode;
206
+ /**
207
+ * Clear the cached results of the metric.
208
+ *
209
+ * This method resets the `results` property to `undefined`, effectively clearing
210
+ * any previously computed results. It can be useful for re-running the metric
211
+ * with new inputs or options.
212
+ */
213
+ clear(): void;
214
+ /**
215
+ * Run the metric computation based on the specified mode.
216
+ *
217
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
218
+ * @param {boolean} [clear=true] - Whether to clear previous results before running
219
+ * @throws {Error} - If an unsupported mode is specified
220
+ */
221
+ run(mode?: MetricMode, clear?: boolean): void;
222
+ /**
223
+ * Run the metric computation based on the specified mode asynchronously.
224
+ *
225
+ * @param {MetricMode} [mode] - The mode to run the metric in (optional)
226
+ * @param {boolean} [clear=true] - Whether to clear previous results before running
227
+ * @returns {Promise<void>} - A promise that resolves when the metric computation is complete
228
+ * @throws {Error} - If an unsupported mode is specified
229
+ */
230
+ runAsync(mode?: MetricMode, clear?: boolean): Promise<void>;
231
+ /**
232
+ * Get the name of the metric.
233
+ *
234
+ * @returns {string} - The name of the metric
235
+ */
236
+ getMetricName(): string;
237
+ /**
238
+ * Get the result of the metric computation.
239
+ *
240
+ * @returns {MetricResult<R>} - The result of the metric computation
241
+ * @throws {Error} - If `run()` has not been called before this method
242
+ */
243
+ getResults(): MetricResult<R>;
244
+ }
245
+ /**
246
+ * Metric registry service for managing metric implementations.
247
+ *
248
+ * This registry allows for dynamic registration and retrieval of metric classes,
249
+ * enabling the use of various string similarity metrics in a consistent manner.
250
+ */
251
+ export declare const MetricRegistry: RegistryService<Metric<MetricRaw>>;
252
+ /**
253
+ * Type definition for a class constructor that extends the Metric class.
254
+ *
255
+ * This type represents a constructor function for a class that extends the Metric
256
+ * class. It can be used to create instances of specific metric implementations,
257
+ * such as Levenshtein or Jaro-Winkler.
258
+ *
259
+ * @template R - The type of the raw result, defaulting to `MetricRaw`.
260
+ */
261
+ export type MetricCls<R = MetricRaw> = new (...args: any[]) => Metric<R>;
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Needleman-Wunsch Algorithm
3
+ * src/metric/NeedlemanWunsch.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Needleman%E2%80%93Wunsch_algorithm
6
+ *
7
+ * The Needleman-Wunsch algorithm performs global alignment, aligning two strings
8
+ * entirely, including gaps. It is commonly used in bioinformatics for sequence
9
+ * alignment.
10
+ *
11
+ * @module Metric/NeedlemanWunsch
12
+ * @author Paul Köhler (komed3)
13
+ * @license MIT
14
+ */
15
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
16
+ import { Metric } from './Metric';
17
+ export interface NeedlemanRaw {
18
+ score: number;
19
+ denum: number;
20
+ }
21
+ /**
22
+ * NeedlemanWunschDistance class extends the Metric class to implement the Needleman-Wunsch algorithm.
23
+ */
24
+ export declare class NeedlemanWunschDistance extends Metric<NeedlemanRaw> {
25
+ /**
26
+ * Constructor for the NeedlemanWunsch class.
27
+ *
28
+ * Initializes the Needleman-Wunsch metric with two input strings or
29
+ * arrays of strings and optional options.
30
+ *
31
+ * @param {MetricInput} a - First input string or array of strings
32
+ * @param {MetricInput} b - Second input string or array of strings
33
+ * @param {MetricOptions} [opt] - Options for the metric computation
34
+ */
35
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
36
+ /**
37
+ * Calculates the Needleman-Wunsch global alignment score between two strings.
38
+ *
39
+ * @param {string} a - First string
40
+ * @param {string} b - Second string
41
+ * @param {number} m - Length of the first string
42
+ * @param {number} n - Length of the second string
43
+ * @param {number} maxLen - Maximum length of the strings
44
+ * @return {MetricCompute<NeedlemanRaw>} - Object containing the similarity result and raw score
45
+ */
46
+ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<NeedlemanRaw>;
47
+ }
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Smith-Waterman Algorithm
3
+ * src/metric/SmithWaterman.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
6
+ *
7
+ * The Smith-Waterman algorithm performs local alignment, finding the best matching
8
+ * subsequence between two strings. It is commonly used in bioinformatics for local
9
+ * sequence alignment. Instead of looking at the entire sequence, the Smith–Waterman
10
+ * algorithm compares segments of all possible lengths and optimizes the similarity
11
+ * measure.
12
+ *
13
+ * @module Metric/SmithWatermanDistance
14
+ * @author Paul Köhler (komed3)
15
+ * @license MIT
16
+ */
17
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
18
+ import { Metric } from './Metric';
19
+ export interface SmithWatermanRaw {
20
+ score: number;
21
+ denum: number;
22
+ }
23
+ /**
24
+ * SmithWatermanDistance class extends the Metric class to implement the Smith-Waterman algorithm.
25
+ */
26
+ export declare class SmithWatermanDistance extends Metric<SmithWatermanRaw> {
27
+ /**
28
+ * Constructor for the SmithWaterman class.
29
+ *
30
+ * Initializes the Smith-Waterman metric with two input strings or
31
+ * arrays of strings and optional options.
32
+ *
33
+ * @param {MetricInput} a - First input string or array of strings
34
+ * @param {MetricInput} b - Second input string or array of strings
35
+ * @param {MetricOptions} [opt] - Options for the metric computation
36
+ */
37
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
38
+ /**
39
+ * Calculates the Smith-Waterman local alignment score between two strings.
40
+ *
41
+ * @param {string} a - First string
42
+ * @param {string} b - Second string
43
+ * @param {number} m - Length of the first string
44
+ * @param {number} n - Length of the second string
45
+ * @return {MetricCompute<SmithWatermanRaw>} - Object containing the similarity result and raw score
46
+ */
47
+ protected compute(a: string, b: string, m: number, n: number): MetricCompute<SmithWatermanRaw>;
48
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Metric Registry Loader
3
+ * src/metric/index.ts
4
+ *
5
+ * This module serves as the central loader and registry for all string similarity metrics
6
+ * available in the CmpStr library. It ensures that all metric implementations are
7
+ * registered with the MetricRegistry and available for use throughout the application.
8
+ *
9
+ * Each metric algorithm (such as Levenshtein, Jaccard, Dice-Sørensen, etc.) is defined
10
+ * in its own module and is automatically registered with the MetricRegistry upon import.
11
+ * This design allows for easy extensibility: new metrics can be added simply by creating
12
+ * a new module and importing it here. The registry pattern enables dynamic lookup,
13
+ * instantiation, and management of all available metrics at runtime.
14
+ *
15
+ * Features:
16
+ * - Centralized registration of all built-in string similarity metrics
17
+ * - Automatic registration via side-effect imports
18
+ * - Extensible: custom metrics can be registered at runtime via the MetricRegistry API
19
+ * - Consistent interface for accessing, listing, and managing metrics
20
+ * - Ensures that all metrics are available for use in the CmpStr API and utilities
21
+ *
22
+ * Native implemented metrics are highly optimized for performance and efficiency,
23
+ * providing fast and reliable string similarity calculations. They will use CmpStr's
24
+ * pooling system to manage resources effectively, ensuring minimal overhead
25
+ * and maximum performance.
26
+ *
27
+ * @author Paul Köhler (komed3)
28
+ * @license MIT
29
+ */
30
+ import './Cosine';
31
+ import './DamerauLevenshtein';
32
+ import './DiceSorensen';
33
+ import './Hamming';
34
+ import './Jaccard';
35
+ import './JaroWinkler';
36
+ import './LCS';
37
+ import './Levenshtein';
38
+ import './NeedlemanWunsch';
39
+ import './qGram';
40
+ import './SmithWaterman';
41
+ export { MetricRegistry, Metric, MetricCls } from './Metric';
@@ -0,0 +1,56 @@
1
+ /**
2
+ * q-Gram Similarity
3
+ * src/metric/QGram.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Q-gram
6
+ *
7
+ * Q-gram similarity is a string-matching algorithm that compares two strings by
8
+ * breaking them into substrings (q-grams) of length Q. The similarity is computed
9
+ * as the size of the intersection of q-gram sets divided by the size of the larger
10
+ * set.
11
+ *
12
+ * This metric is widely used in approximate string matching, information retrieval,
13
+ * and computational linguistics.
14
+ *
15
+ * @module Metric/QGramSimilarity
16
+ * @author Paul Köhler (komed3)
17
+ * @license MIT
18
+ */
19
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
20
+ import { Metric } from './Metric';
21
+ export interface QGramRaw {
22
+ intersection: number;
23
+ size: number;
24
+ }
25
+ /**
26
+ * QGramSimilarity class extends the Metric class to implement the q-Gram similarity algorithm.
27
+ */
28
+ export declare class QGramSimilarity extends Metric<QGramRaw> {
29
+ /**
30
+ * Constructor for the QGramSimilarity class.
31
+ *
32
+ * Initializes the q-Gram similarity metric with two input strings or
33
+ * arrays of strings and optional options.
34
+ *
35
+ * @param {MetricInput} a - First input string or array of strings
36
+ * @param {MetricInput} b - Second input string or array of strings
37
+ * @param {MetricOptions} [opt] - Options for the metric computation
38
+ */
39
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
40
+ /**
41
+ * Converts a string into a set of q-grams (substrings of length q).
42
+ *
43
+ * @param {string} str - The input string
44
+ * @param {number} q - The length of each q-gram
45
+ * @return {Set<string>} - Set of q-grams
46
+ */
47
+ private _qGrams;
48
+ /**
49
+ * Calculates the q-Gram similarity between two strings.
50
+ *
51
+ * @param {string} a - First string
52
+ * @param {string} b - Second string
53
+ * @return {MetricCompute<QGramRaw>} - Object containing the similarity result and raw values
54
+ */
55
+ protected compute(a: string, b: string): MetricCompute<QGramRaw>;
56
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Cologne Phonetic Algorithm
3
+ * src/phonetic/Cologne.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Cologne_phonetics
6
+ *
7
+ * Cologne phonetics, also known as `Kölner Phonetik` or the `Cologne process`,
8
+ * is a phonetic algorithm that assigns a sequence of digits, referred to as the
9
+ * phonetic code, to words. The purpose of this method is to ensure that words
10
+ * with identical sounds receive the same code. This algorithm can facilitate a
11
+ * similarity search among words.
12
+ *
13
+ * Cologne phonetics is associated with the well-known Soundex phonetic algorithm,
14
+ * yet it is specifically optimized for the German language. This algorithm was
15
+ * introduced by Hans Joachim Postel in 1969.
16
+ *
17
+ * The Cologne phonetic algorithm works by mapping letters to digits, ignoring
18
+ * certain letters, and applying specific rules to handle character combinations.
19
+ *
20
+ * @module Phonetic/Cologne
21
+ * @author Paul Köhler (komed3)
22
+ * @license MIT
23
+ */
24
+ import type { PhoneticOptions } from '../utils/Types';
25
+ import { Phonetic } from './Phonetic';
26
+ /**
27
+ * Cologne class extends the Phonetic class to implement the Cologne phonetic algorithm.
28
+ */
29
+ export declare class Cologne extends Phonetic {
30
+ protected static default: PhoneticOptions;
31
+ /**
32
+ * Constructor for the Cologne class.
33
+ *
34
+ * Initializes the Cologne phonetic algorithm with the mapping and options.
35
+ *
36
+ * @param {PhoneticOptions} [opt] - Options for the Cologne phonetic algorithm
37
+ */
38
+ constructor(opt?: PhoneticOptions);
39
+ /**
40
+ * Adjusts the phonetic code by removing all '0's except the first character.
41
+ *
42
+ * @param {string} code - The phonetic code to adjust
43
+ * @returns {string} - The adjusted phonetic code
44
+ */
45
+ protected adjustCode(code: string): string;
46
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Metaphone Phonetic Algorithm
3
+ * src/phonetic/Metaphone.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Metaphone
6
+ *
7
+ * Metaphone is a phonetic algorithm for indexing words by their English pronunciation.
8
+ * It encodes words into a string of consonant symbols, allowing for the comparison of
9
+ * words based on their pronunciation rather than their spelling. Metaphone is more
10
+ * accurate than Soundex for English and is widely used in search, spell-checking,
11
+ * and fuzzy matching.
12
+ *
13
+ * This implementation uses a mapping and a comprehensive ruleset to efficiently
14
+ * transform input words into their Metaphone code. The algorithm drops or transforms
15
+ * letters according to context-sensitive rules, and only retains vowels at the start.
16
+ *
17
+ * @module Phonetic/Metaphone
18
+ * @author Paul Köhler (komed3)
19
+ * @license MIT
20
+ */
21
+ import type { PhoneticOptions } from '../utils/Types';
22
+ import { Phonetic } from './Phonetic';
23
+ /**
24
+ * Metaphone class extends the Phonetic class to implement the Metaphone phonetic algorithm.
25
+ */
26
+ export declare class Metaphone extends Phonetic {
27
+ protected static default: PhoneticOptions;
28
+ /**
29
+ * Constructor for the Metaphone class.
30
+ *
31
+ * Initializes the Metaphone phonetic algorithm with the mapping and options.
32
+ *
33
+ * @param {PhoneticOptions} [opt] - Options for the Metaphone phonetic algorithm
34
+ */
35
+ constructor(opt?: PhoneticOptions);
36
+ /**
37
+ * Generates the Metaphone code for a given word.
38
+ *
39
+ * @param {string} word - The input word to be converted into a Metaphone code
40
+ * @returns {string} - The generated Metaphone code
41
+ */
42
+ protected encode(word: string): string;
43
+ /**
44
+ * Adjusts the Metaphone code by removing vowels except for the first letter.
45
+ *
46
+ * @param {string} code - The Metaphone code to be adjusted
47
+ * @returns {string} - The adjusted Metaphone code
48
+ */
49
+ protected adjustCode(code: string): string;
50
+ }