cmpstr 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -499
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -912
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -204
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
@@ -0,0 +1,233 @@
1
+ /**
2
+ * CmpStrAsync Asynchronous API
3
+ * src/CmpStrAsync.ts
4
+ *
5
+ * The CmpStrAsync class provides a fully asynchronous, Promise-based interface for
6
+ * advanced string comparison, similarity measurement, phonetic indexing, filtering
7
+ * and normalization. It extends the CmpStr class and overrides all relevant methods
8
+ * to support non-blocking, scalable, and I/O-friendly workloads.
9
+ *
10
+ * Features:
11
+ * - Asynchronous normalization, filtering, and metric computation
12
+ * - Async batch, pairwise, and single string comparison with detailed results
13
+ * - Async phonetic indexing and phonetic-aware search and comparison
14
+ * - Full compatibility with the synchronous CmpStr API
15
+ * - Designed for large-scale, high-performance, and server-side applications
16
+ *
17
+ * @module CmpStrAsync
18
+ * @author Paul Köhler (komed3)
19
+ * @license MIT
20
+ */
21
+ import type { CmpStrOptions, CmpStrProcessors, CmpStrResult, NormalizeFlags, PhoneticOptions, MetricRaw, MetricInput, MetricMode, MetricResult, MetricResultSingle, MetricResultBatch } from './utils/Types';
22
+ import { CmpStr } from './CmpStr';
23
+ /**
24
+ * The CmpStrAsync class provides a fully asynchronous API for string comparison,
25
+ * phonetic indexing, filtering and normalization.
26
+ *
27
+ * @template R - The type of the metric result, defaults to MetricRaw
28
+ */
29
+ export declare class CmpStrAsync<R = MetricRaw> extends CmpStr<R> {
30
+ /**
31
+ * --------------------------------------------------------------------------------
32
+ * Instanciate the CmpStrAsync class
33
+ * --------------------------------------------------------------------------------
34
+ *
35
+ * Methods to create a new CmpStrAsync instance with the given options.
36
+ * Using the static `create` method is recommended to ensure proper instantiation.
37
+ */
38
+ /**
39
+ * Creates a new CmpStrAsync instance with the given options.
40
+ *
41
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
42
+ * @returns {CmpStrAsync<R>} - A new CmpStrAsync instance
43
+ */
44
+ static create<R = MetricRaw>(opt?: string | CmpStrOptions): CmpStrAsync<R>;
45
+ /**
46
+ * Creates a new CmpStrAsync instance calliing the super constructor.
47
+ *
48
+ * @param {string|CmpStrOptions} [opt] - Optional serialized or options object
49
+ */
50
+ protected constructor(opt?: string | CmpStrOptions);
51
+ /**
52
+ * ---------------------------------------------------------------------------------
53
+ * Protected asynchronously utility methods for internal use
54
+ * ---------------------------------------------------------------------------------
55
+ *
56
+ * These methods provide asynchronous normalization, filtering, and metric
57
+ * computation capabilities, allowing for non-blocking operations.
58
+ */
59
+ /**
60
+ * Asynchronously normalizes the input string or array using the configured or provided flags.
61
+ *
62
+ * @param {MetricInput} input - The input string or array
63
+ * @param {NormalizeFlags} [flags] - Normalization flags
64
+ * @returns {Promise<MetricInput>} - The normalized input
65
+ */
66
+ protected normalizeAsync(input: MetricInput, flags?: NormalizeFlags): Promise<MetricInput>;
67
+ /**
68
+ * Asynchronously applies all active filters to the input string or array.
69
+ *
70
+ * @param {MetricInput} input - The input string or array
71
+ * @param {string} [hook='input'] - The filter hook
72
+ * @returns {Promise<MetricInput>} - The filtered string(s)
73
+ */
74
+ protected filterAsync(input: MetricInput, hook: string): Promise<MetricInput>;
75
+ /**
76
+ * Asynchronously prepares the input by normalizing and filtering.
77
+ *
78
+ * @param {MetricInput} [input] - The input string or array
79
+ * @param {CmpStrOptions} [opt] - Optional options to use
80
+ * @returns {Promise<MetricInput>} - The prepared input
81
+ */
82
+ protected prepareAsync(input: MetricInput, opt?: CmpStrOptions): Promise<MetricInput>;
83
+ /**
84
+ * Asynchronously computes the phonetic index for the given input using
85
+ * the specified phonetic algorithm.
86
+ *
87
+ * @param {MetricInput} input - The input string or array
88
+ * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
89
+ * @returns {Promise<MetricInput>} - The phonetic index for the given input
90
+ */
91
+ protected indexAsync(input: MetricInput, { algo, opt }: {
92
+ algo: string;
93
+ opt?: PhoneticOptions;
94
+ }): Promise<MetricInput>;
95
+ /**
96
+ * Asynchronously computes the metric result for the given inputs, applying
97
+ * normalization and filtering as configured.
98
+ *
99
+ * @template T - The type of the metric result
100
+ * @param {MetricInput} a - The first input string or array
101
+ * @param {MetricInput} b - The second input string or array
102
+ * @param {CmpStrOptions} [opt] - Optional options to use
103
+ * @param {MetricMode} [mode='single'] - The metric mode to use
104
+ * @param {boolean} [raw=false] - Whether to return raw results
105
+ * @param {boolean} [skip=false] - Whether to skip normalization and filtering
106
+ * @returns {Promise<T>} - The computed metric result
107
+ */
108
+ protected computeAsync<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): Promise<T>;
109
+ /**
110
+ * ---------------------------------------------------------------------------------
111
+ * Public asynchronously core methods for string comparison
112
+ * ---------------------------------------------------------------------------------
113
+ *
114
+ * These methods provide the asynchronous core functionality for string comparison,
115
+ * phonetic indexing and text search, allowing for non-blocking operations.
116
+ */
117
+ /**
118
+ * Asynchronously performs a single metric comparison.
119
+ *
120
+ * @template T - The type of the metric result
121
+ * @param {string} a - The source string
122
+ * @param {string} b - The target string
123
+ * @param {CmpStrOptions} [opt] - Optional options
124
+ * @returns {Promise<T>} - The metric result
125
+ */
126
+ testAsync<T extends CmpStrResult | MetricResultSingle<R>>(a: string, b: string, opt?: CmpStrOptions): Promise<T>;
127
+ /**
128
+ * Asynchronously performs a single metric comparison returning the numeric score.
129
+ *
130
+ * @param {string} a - The source string
131
+ * @param {string} b - The target string
132
+ * @param {CmpStrOptions} [opt] - Optional options
133
+ * @returns {Promise<number>} - The similarity score (0..1)
134
+ */
135
+ compareAsync(a: string, b: string, opt?: CmpStrOptions): Promise<number>;
136
+ /**
137
+ * Asynchronously performs a batch metric comparison between source and target
138
+ * strings or array of strings.
139
+ *
140
+ * @template T - The type of the metric result
141
+ * @param {MetricInput} a - The source string or array of strings
142
+ * @param {MetricInput} b - The target string or array of strings
143
+ * @param {CmpStrOptions} [opt] - Optional options
144
+ * @returns {Promise<T>} - The batch metric results
145
+ */
146
+ batchTestAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): Promise<T>;
147
+ /**
148
+ * Asynchronously performs a batch metric comparison and returns results sorted by score.
149
+ *
150
+ * @template T - The type of the metric result
151
+ * @param {MetricInput} a - The source string or array of strings
152
+ * @param {MetricInput} b - The target string or array of strings
153
+ * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
154
+ * @param {CmpStrOptions} [opt] - Optional options
155
+ * @returns {Promise<T>} - The sorted batch results
156
+ */
157
+ batchSortedAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, dir?: 'desc' | 'asc', opt?: CmpStrOptions): Promise<T>;
158
+ /**
159
+ * Asynchronously performs a pairwise metric comparison between source and target
160
+ * strings or array of strings.
161
+ *
162
+ * @template T - The type of the metric result
163
+ * Input arrays needs of the same length to perform pairwise comparison,
164
+ * otherwise the method will throw an error.
165
+ *
166
+ * @param {MetricInput} a - The source string or array of strings
167
+ * @param {MetricInput} b - The target string or array of strings
168
+ * @param {CmpStrOptions} [opt] - Optional options
169
+ * @returns {Promise<T>} - The pairwise metric results
170
+ */
171
+ pairsAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): Promise<T>;
172
+ /**
173
+ * Asynchronously performs a batch comparison and returns only results above the threshold.
174
+ *
175
+ * @template T - The type of the metric result
176
+ * @param {MetricInput} a - The source string or array of strings
177
+ * @param {MetricInput} b - The target string or array of strings
178
+ * @param {number} threshold - The similarity threshold (0..1)
179
+ * @param {CmpStrOptions} [opt] - Optional options
180
+ * @returns {Promise<T>} - The filtered batch results
181
+ */
182
+ matchAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, threshold: number, opt?: CmpStrOptions): Promise<T>;
183
+ /**
184
+ * Asynchronously returns the n closest matches from a batch comparison.
185
+ *
186
+ * @template T - The type of the metric result
187
+ * @param {MetricInput} a - The source string or array of strings
188
+ * @param {MetricInput} b - The target string or array of strings
189
+ * @param {number} [n=1] - Number of closest matches
190
+ * @param {CmpStrOptions} [opt] - Optional options
191
+ * @returns {Promise<T>} - The closest matches
192
+ */
193
+ closestAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): Promise<T>;
194
+ /**
195
+ * Asynchronously returns the n furthest matches from a batch comparison.
196
+ *
197
+ * @template T - The type of the metric result
198
+ * @param {MetricInput} a - The source string or array of strings
199
+ * @param {MetricInput} b - The target string or array of strings
200
+ * @param {number} [n=1] - Number of furthest matches
201
+ * @param {CmpStrOptions} [opt] - Optional options
202
+ * @returns {Promise<T>} - The furthest matches
203
+ */
204
+ furthestAsync<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): Promise<T>;
205
+ /**
206
+ * Asynchronously performs a normalized and filtered substring search.
207
+ *
208
+ * @param {string} needle - The search string
209
+ * @param {string[]} haystack - The array to search in
210
+ * @param {NormalizeFlags} [flags] - Normalization flags
211
+ * @param {CmpStrProcessors} [processors] - Pre-processors to apply
212
+ * @returns {Promise<string[]>} - Array of matching entries
213
+ */
214
+ searchAsync(needle: string, haystack: string[], flags?: NormalizeFlags, processors?: CmpStrProcessors): Promise<string[]>;
215
+ /**
216
+ * Asynchronously computes a similarity matrix for the given input array.
217
+ *
218
+ * @param {string[]} input - The input array
219
+ * @param {CmpStrOptions} [opt] - Optional options
220
+ * @returns {Promise<number[][]>} - The similarity matrix
221
+ */
222
+ matrixAsync(input: string[], opt?: CmpStrOptions): Promise<number[][]>;
223
+ /**
224
+ * Asynchronously computes the phonetic index for a string using the
225
+ * configured or given algorithm.
226
+ *
227
+ * @param {string} [input] - The input string
228
+ * @param {string} [algo] - The phonetic algorithm to use
229
+ * @param {PhoneticOptions} [opt] - Optional phonetic options
230
+ * @returns {Promise<string>} - The phonetic index as a string
231
+ */
232
+ phoneticIndexAsync(input: string, algo?: string, opt?: PhoneticOptions): Promise<string>;
233
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * CmpStr Main Entry Point
3
+ * src/index.ts
4
+ *
5
+ * CmpStr is a comprehensive, extensible, and highly abstracted TypeScript library for
6
+ * advanced string comparison, similarity measurement, phonetic indexing, normalization,
7
+ * filtering, and text analysis. It is designed for both high-level application development
8
+ * and research, offering a unified API for single, batch, and pairwise operations.
9
+ *
10
+ * Version: 3.0.0
11
+ * Author: Paul Köhler (komed3)
12
+ * License: MIT
13
+ *
14
+ * Core Features:
15
+ * --------------
16
+ *
17
+ * - Unified interface for string similarity, distance, and matching
18
+ * - Pluggable metric system (Levenshtein, Jaro-Winkler, Cosine, Dice, Hamming, LCS, etc.)
19
+ * - Phonetic algorithms (Cologne, Soundex, Metaphone) with mapping registry
20
+ * - Flexible normalization and filtering pipeline for all inputs
21
+ * - Batch, pairwise, and single comparison with detailed, type-safe results
22
+ * - Phonetic-aware search, indexing, and comparison
23
+ * - Readability and text analysis utilities (syllables, word stats, etc.)
24
+ * - Unified diff and difference reporting (line/word, ASCII/CLI)
25
+ * - Full TypeScript type safety, extensibility, and profiling support
26
+ * - Modular architecture for easy integration and extension
27
+ *
28
+ * Overview:
29
+ * ---------
30
+ *
31
+ * CmpStr provides a single entry point for all string comparison and analysis tasks.
32
+ * The main class, `CmpStr`, exposes a rich API for comparing strings, arrays, or
33
+ * batches, with full support for normalization, filtering, and phonetic processing.
34
+ * All metric and phonetic algorithms are managed via registries, allowing for
35
+ * dynamic extension and customization. The package also includes utilities for
36
+ * diffing, text analysis, and profiling, making it suitable for applications such as
37
+ * search engines, data deduplication, fuzzy matching, linguistics, and more.
38
+ *
39
+ * For asynchronous workloads, use `CmpStrAsync`, which provides the same API with
40
+ * Promise-based, non-blocking methods for large-scale or I/O-bound operations.
41
+ *
42
+ * @version 3.0.0
43
+ * @author Paul Köhler (komed3)
44
+ * @license MIT
45
+ */
46
+ export * from './utils/Types';
47
+ export { CmpStr } from './CmpStr';
48
+ export { CmpStrAsync } from './CmpStrAsync';
49
+ export { DiffChecker } from './utils/DiffChecker';
50
+ export { Normalizer } from './utils/Normalizer';
51
+ export { TextAnalyzer } from './utils/TextAnalyzer';
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Cosine Similarity
3
+ * src/metric/Cosine.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Cosine_similarity
6
+ *
7
+ * Cosine similarity is a metric used to measure how similar two vectors are, regardless
8
+ * of their magnitude. In text analysis, it is commonly used to compare documents or
9
+ * strings by representing them as term frequency vectors and computing the cosine of
10
+ * the angle between these vectors.
11
+ *
12
+ * The result is a value between 0 and 1, where 1 means the vectors are identical and
13
+ * 0 means they are orthogonal (no similarity).
14
+ *
15
+ * @module Metric/CosineSimilarity
16
+ * @author Paul Köhler (komed3)
17
+ * @license MIT
18
+ */
19
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
20
+ import { Metric } from './Metric';
21
+ export interface CosineRaw {
22
+ dotProduct: number;
23
+ magnitudeA: number;
24
+ magnitudeB: number;
25
+ }
26
+ /**
27
+ * CosineSimilarity class extends the Metric class to implement the Cosine similarity algorithm.
28
+ */
29
+ export declare class CosineSimilarity extends Metric<CosineRaw> {
30
+ /**
31
+ * Constructor for the CosineSimilarity class.
32
+ *
33
+ * Initializes the Cosine similarity metric with two input strings or
34
+ * arrays of strings and optional options.
35
+ *
36
+ * @param {MetricInput} a - First input string or array of strings
37
+ * @param {MetricInput} b - Second input string or array of strings
38
+ * @param {MetricOptions} [opt] - Options for the metric computation
39
+ */
40
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
41
+ /**
42
+ * Calculates the term frequency vector for a given string.
43
+ *
44
+ * @param {string} str - The input string
45
+ * @param {string} delimiter - The delimiter to split terms
46
+ * @return {Map<string, number>} - Term frequency object
47
+ */
48
+ private _termFreq;
49
+ /**
50
+ * Calculates the Cosine similarity between two strings.
51
+ *
52
+ * @param {string} a - First string
53
+ * @param {string} b - Second string
54
+ * @return {MetricCompute<CosineRaw>} - Object containing the similarity result and raw values
55
+ */
56
+ protected compute(a: string, b: string): MetricCompute<CosineRaw>;
57
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Damerau-Levenshtein Distance
3
+ * src/metric/DamerauLevenshtein.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
6
+ *
7
+ * The Damerau-Levenshtein distance extends the classical Levenshtein algorithm by
8
+ * including transpositions (swapping of two adjacent characters) as a single edit
9
+ * operation, in addition to insertions, deletions, and substitutions.
10
+ *
11
+ * This metric is particularly useful for detecting and correcting common
12
+ * typographical errors.
13
+ *
14
+ * @module Metric/DamerauLevenshtein
15
+ * @author Paul Köhler (komed3)
16
+ * @license MIT
17
+ */
18
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
19
+ import { Metric } from './Metric';
20
+ export interface DamerauRaw {
21
+ dist: number;
22
+ maxLen: number;
23
+ }
24
+ /**
25
+ * DamerauLevenshteinDistance class extends the Metric class to implement the Damerau-Levenshtein algorithm.
26
+ */
27
+ export declare class DamerauLevenshteinDistance extends Metric<DamerauRaw> {
28
+ /**
29
+ * Constructor for the DamerauLevenshteinDistance class.
30
+ *
31
+ * Initializes the Damerau-Levenshtein metric with two input strings or
32
+ * arrays of strings and optional options.
33
+ *
34
+ * @param {MetricInput} a - First input string or array of strings
35
+ * @param {MetricInput} b - Second input string or array of strings
36
+ * @param {MetricOptions} [opt] - Options for the metric computation
37
+ */
38
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
39
+ /**
40
+ * Calculates the normalized Damerau-Levenshtein distance between two strings.
41
+ *
42
+ * @param {string} a - First string (always the shorter string for memory efficiency)
43
+ * @param {string} b - Second string
44
+ * @param {number} m - Length of the first string (a)
45
+ * @param {number} n - Length of the second string (b)
46
+ * @param {number} maxLen - Maximum length of the strings
47
+ * @return {MetricCompute<DamerauRaw>} - Object containing the similarity result and raw distance
48
+ */
49
+ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<DamerauRaw>;
50
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Dice-Sørensen Coefficient
3
+ * src/metric/DiceSorensen.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient
6
+ *
7
+ * This module implements the Dice-Sørensen coefficient, a statistic used to gauge
8
+ * the similarity of two samples. It is commonly used in natural language processing
9
+ * and information retrieval to compare the similarity between two sets of data,
10
+ * such as text documents. The coefficient is defined as twice the size of the
11
+ * intersection divided by the sum of the sizes of the two sets.
12
+ *
13
+ * The implementation includes methods to compute bigrams from strings and calculate
14
+ * the coefficient based on these bigrams. It handles edge cases, such as empty
15
+ * strings and identical strings, to ensure accurate results.
16
+ *
17
+ * @module Metric/DiceSorensenCoefficient
18
+ * @author Paul Köhler (komed3)
19
+ * @license MIT
20
+ */
21
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
22
+ import { Metric } from './Metric';
23
+ export interface DiceRaw {
24
+ intersection: number;
25
+ size: number;
26
+ }
27
+ /**
28
+ * DiceSorensenCoefficient class extends the Metric class to implement the Dice-Sørensen coefficient.
29
+ */
30
+ export declare class DiceSorensenCoefficient extends Metric<DiceRaw> {
31
+ /**
32
+ * Constructor for the DiceSorensen class.
33
+ *
34
+ * Initializes the DiceSorensen metric with two input strings or
35
+ * arrays of strings and optional options.
36
+ *
37
+ * @param {MetricInput} a - First input string or array of strings
38
+ * @param {MetricInput} b - Second input string or array of strings
39
+ * @param {MetricOptions} [opt] - Options for the metric computation
40
+ */
41
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
42
+ /**
43
+ * Computes the bigrams of a given string.
44
+ *
45
+ * @param {string} str - The input string
46
+ * @return {Set<string>} - A set of bigrams (two-character sequences) from the string
47
+ */
48
+ private _bigrams;
49
+ /**
50
+ * Calculates the Dice-Sørensen coefficient between two strings.
51
+ *
52
+ * @param {string} a - First string
53
+ * @param {string} b - Second string
54
+ * @return {MetricCompute<DiceRaw>} - Object containing the similarity result and raw distance
55
+ */
56
+ protected compute(a: string, b: string): MetricCompute<DiceRaw>;
57
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Hamming Distance
3
+ * src/metric/Hamming.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Hamming_distance
6
+ *
7
+ * The Hamming distance is a metric for comparing two strings of equal length. It
8
+ * measures the number of positions at which the corresponding symbols are different.
9
+ *
10
+ * This implementation allows for optional padding of the shorter string to equalize
11
+ * lengths, otherwise it throws an error if the strings are of unequal length.
12
+ *
13
+ * @module Metric/HammingDistance
14
+ * @author Paul Köhler (komed3)
15
+ * @license MIT
16
+ */
17
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
18
+ import { Metric } from './Metric';
19
+ export interface HammingRaw {
20
+ dist: number;
21
+ }
22
+ /**
23
+ * HammingDistance class extends the Metric class to implement the Hamming distance.
24
+ */
25
+ export declare class HammingDistance extends Metric<HammingRaw> {
26
+ /**
27
+ * Constructor for the Hamming class.
28
+ *
29
+ * Initializes the Hamming distance metric with two input strings or
30
+ * arrays of strings and optional options.
31
+ *
32
+ * @param {MetricInput} a - First input string or array of strings
33
+ * @param {MetricInput} b - Second input string or array of strings
34
+ * @param {MetricOptions} opt - Options for the metric computation
35
+ */
36
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
37
+ /**
38
+ * Calculates the Hamming distance between two strings.
39
+ *
40
+ * @param {string} a - First string
41
+ * @param {string} b - Second string
42
+ * @param {number} m - Length of the first string
43
+ * @param {number} n - Length of the second string
44
+ * @param {number} maxLen - Maximum length of the strings
45
+ * @return {MetricCompute<HammingRaw>} - Object containing the similarity result and raw distance
46
+ * @throws {Error} - If strings are of unequal length and padding is not specified
47
+ */
48
+ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<HammingRaw>;
49
+ }
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Jaccard Index
3
+ * src/metric/Jaccard.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Jaccard_index
6
+ *
7
+ * The Jaccard Index (or Jaccard similarity coefficient) measures the similarity
8
+ * between two sets by dividing the size of their intersection by the size of
9
+ * their union. In string similarity, it is often used to compare sets of characters,
10
+ * tokens, or n-grams. The result is a value between 0 and 1, where 1 means the
11
+ * sets are identical and 0 means they have no elements in common.
12
+ *
13
+ * @module Metric/JaccardIndex
14
+ * @author Paul Köhler (komed3)
15
+ * @license MIT
16
+ */
17
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
18
+ import { Metric } from './Metric';
19
+ export interface JaccardRaw {
20
+ intersection: number;
21
+ union: number;
22
+ }
23
+ /**
24
+ * JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm.
25
+ */
26
+ export declare class JaccardIndex extends Metric<JaccardRaw> {
27
+ /**
28
+ * Constructor for the JaccardIndex class.
29
+ *
30
+ * Initializes the Jaccard Index metric with two input strings or
31
+ * arrays of strings and optional options.
32
+ *
33
+ * @param {MetricInput} a - First input string or array of strings
34
+ * @param {MetricInput} b - Second input string or array of strings
35
+ * @param {MetricOptions} [opt] - Options for the metric computation
36
+ */
37
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
38
+ /**
39
+ * Calculates the Jaccard Index between two strings.
40
+ *
41
+ * @param {string} a - First string
42
+ * @param {string} b - Second string
43
+ * @param {number} m - Length of the first string
44
+ * @param {number} n - Length of the second string
45
+ * @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values
46
+ */
47
+ protected compute(a: string, b: string, m: number, n: number): MetricCompute<JaccardRaw>;
48
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Jaro-Winkler Distance
3
+ * src/metric/JaroWinkler.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
6
+ *
7
+ * The Jaro-Winkler distance is a string similarity metric that gives more weight
8
+ * to matching characters at the start of the strings. It is especially effective
9
+ * for short strings and typographical errors, and is widely used in record linkage
10
+ * and duplicate detection.
11
+ *
12
+ * @module Metric/JaroWinkler
13
+ * @author Paul Köhler (komed3)
14
+ * @license MIT
15
+ */
16
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
17
+ import { Metric } from './Metric';
18
+ export interface JaroWinklerRaw {
19
+ matchWindow: number;
20
+ matches: number;
21
+ transpos: number;
22
+ jaro: number;
23
+ prefix: number;
24
+ }
25
+ /**
26
+ * JaroWinklerDistance class extends the Metric class to implement the Jaro-Winkler algorithm.
27
+ */
28
+ export declare class JaroWinklerDistance extends Metric<JaroWinklerRaw> {
29
+ /**
30
+ * Constructor for the JaroWinklerDistance class.
31
+ *
32
+ * Initializes the Jaro-Winkler metric with two input strings or
33
+ * arrays of strings and optional options.
34
+ *
35
+ * @param {MetricInput} a - First input string or array of strings
36
+ * @param {MetricInput} b - Second input string or array of strings
37
+ * @param {MetricOptions} [opt] - Options for the metric computation
38
+ */
39
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
40
+ /**
41
+ * Calculates the Jaro-Winkler similarity between two strings.
42
+ *
43
+ * @param {string} a - First string
44
+ * @param {string} b - Second string
45
+ * @param {number} m - Length of the first string
46
+ * @param {number} n - Length of the second string
47
+ * @return {MetricCompute<JaroWinklerRaw>} - Object containing the similarity result and raw values
48
+ */
49
+ protected compute(a: string, b: string, m: number, n: number): MetricCompute<JaroWinklerRaw>;
50
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Longest Common Subsequence (LCS)
3
+ * src/metric/LCS.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Longest_common_subsequence
6
+ *
7
+ * The Longest Common Subsequence (LCS) metric measures the length of the longest
8
+ * subsequence common to both strings. Unlike substrings, the characters of a
9
+ * subsequence do not need to be contiguous, but must appear in the same order.
10
+ *
11
+ * The LCS is widely used in diff tools, bioinformatics, and approximate string
12
+ * matching.
13
+ *
14
+ * @module Metric/LCS
15
+ * @author Paul Köhler (komed3)
16
+ * @license MIT
17
+ */
18
+ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
19
+ import { Metric } from './Metric';
20
+ export interface LCSRaw {
21
+ lcs: number;
22
+ maxLen: number;
23
+ }
24
+ /**
25
+ * LCSMetric class extends the Metric class to implement the Longest Common Subsequence algorithm.
26
+ */
27
+ export declare class LCSMetric extends Metric<LCSRaw> {
28
+ /**
29
+ * Constructor for the LCSMetric class.
30
+ *
31
+ * Initializes the LCS metric with two input strings or
32
+ * arrays of strings and optional options.
33
+ *
34
+ * @param {MetricInput} a - First input string or array of strings
35
+ * @param {MetricInput} b - Second input string or array of strings
36
+ * @param {MetricOptions} [opt] - Options for the metric computation
37
+ */
38
+ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
39
+ /**
40
+ * Calculates the normalized LCS similarity between two strings.
41
+ *
42
+ * @param {string} a - First string
43
+ * @param {string} b - Second string
44
+ * @param {number} m - Length of the first string
45
+ * @param {number} n - Length of the second string
46
+ * @param {number} maxLen - Maximum length of the strings
47
+ * @return {MetricCompute<LCSRaw>} - Object containing the similarity result and raw LCS length
48
+ */
49
+ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<LCSRaw>;
50
+ }