cmpstr 2.0.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +75 -499
  3. package/dist/CmpStr.esm.js +4863 -0
  4. package/dist/CmpStr.esm.js.map +1 -0
  5. package/dist/CmpStr.esm.min.js +8 -0
  6. package/dist/CmpStr.esm.min.js.map +1 -0
  7. package/dist/CmpStr.umd.js +4875 -0
  8. package/dist/CmpStr.umd.js.map +1 -0
  9. package/dist/CmpStr.umd.min.js +8 -0
  10. package/dist/CmpStr.umd.min.js.map +1 -0
  11. package/dist/cjs/CmpStr.js +663 -0
  12. package/dist/cjs/CmpStr.js.map +1 -0
  13. package/dist/cjs/CmpStrAsync.js +336 -0
  14. package/dist/cjs/CmpStrAsync.js.map +1 -0
  15. package/dist/cjs/index.js +15 -0
  16. package/dist/cjs/index.js.map +1 -0
  17. package/dist/cjs/metric/Cosine.js +101 -0
  18. package/dist/cjs/metric/Cosine.js.map +1 -0
  19. package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
  20. package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
  21. package/dist/cjs/metric/DiceSorensen.js +91 -0
  22. package/dist/cjs/metric/DiceSorensen.js.map +1 -0
  23. package/dist/cjs/metric/Hamming.js +82 -0
  24. package/dist/cjs/metric/Hamming.js.map +1 -0
  25. package/dist/cjs/metric/Jaccard.js +76 -0
  26. package/dist/cjs/metric/Jaccard.js.map +1 -0
  27. package/dist/cjs/metric/JaroWinkler.js +114 -0
  28. package/dist/cjs/metric/JaroWinkler.js.map +1 -0
  29. package/dist/cjs/metric/LCS.js +89 -0
  30. package/dist/cjs/metric/LCS.js.map +1 -0
  31. package/dist/cjs/metric/Levenshtein.js +94 -0
  32. package/dist/cjs/metric/Levenshtein.js.map +1 -0
  33. package/dist/cjs/metric/Metric.js +445 -0
  34. package/dist/cjs/metric/Metric.js.map +1 -0
  35. package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
  36. package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
  37. package/dist/cjs/metric/SmithWaterman.js +98 -0
  38. package/dist/cjs/metric/SmithWaterman.js.map +1 -0
  39. package/dist/cjs/metric/qGram.js +91 -0
  40. package/dist/cjs/metric/qGram.js.map +1 -0
  41. package/dist/cjs/phonetic/Cologne.js +112 -0
  42. package/dist/cjs/phonetic/Cologne.js.map +1 -0
  43. package/dist/cjs/phonetic/Metaphone.js +172 -0
  44. package/dist/cjs/phonetic/Metaphone.js.map +1 -0
  45. package/dist/cjs/phonetic/Phonetic.js +413 -0
  46. package/dist/cjs/phonetic/Phonetic.js.map +1 -0
  47. package/dist/cjs/phonetic/Soundex.js +135 -0
  48. package/dist/cjs/phonetic/Soundex.js.map +1 -0
  49. package/dist/cjs/utils/DeepMerge.js +144 -0
  50. package/dist/cjs/utils/DeepMerge.js.map +1 -0
  51. package/dist/cjs/utils/DiffChecker.js +500 -0
  52. package/dist/cjs/utils/DiffChecker.js.map +1 -0
  53. package/dist/cjs/utils/Filter.js +189 -0
  54. package/dist/cjs/utils/Filter.js.map +1 -0
  55. package/dist/cjs/utils/HashTable.js +175 -0
  56. package/dist/cjs/utils/HashTable.js.map +1 -0
  57. package/dist/cjs/utils/Normalizer.js +144 -0
  58. package/dist/cjs/utils/Normalizer.js.map +1 -0
  59. package/dist/cjs/utils/Pool.js +196 -0
  60. package/dist/cjs/utils/Pool.js.map +1 -0
  61. package/dist/cjs/utils/Profiler.js +229 -0
  62. package/dist/cjs/utils/Profiler.js.map +1 -0
  63. package/dist/cjs/utils/Registry.js +148 -0
  64. package/dist/cjs/utils/Registry.js.map +1 -0
  65. package/dist/cjs/utils/TextAnalyzer.js +358 -0
  66. package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
  67. package/dist/esm/CmpStr.js +662 -0
  68. package/dist/esm/CmpStr.js.map +1 -0
  69. package/dist/esm/CmpStrAsync.js +331 -0
  70. package/dist/esm/CmpStrAsync.js.map +1 -0
  71. package/dist/esm/index.js +7 -0
  72. package/dist/esm/index.js.map +1 -0
  73. package/dist/esm/metric/Cosine.js +99 -0
  74. package/dist/esm/metric/Cosine.js.map +1 -0
  75. package/dist/esm/metric/DamerauLevenshtein.js +108 -0
  76. package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
  77. package/dist/esm/metric/DiceSorensen.js +89 -0
  78. package/dist/esm/metric/DiceSorensen.js.map +1 -0
  79. package/dist/esm/metric/Hamming.js +77 -0
  80. package/dist/esm/metric/Hamming.js.map +1 -0
  81. package/dist/esm/metric/Jaccard.js +74 -0
  82. package/dist/esm/metric/Jaccard.js.map +1 -0
  83. package/dist/esm/metric/JaroWinkler.js +112 -0
  84. package/dist/esm/metric/JaroWinkler.js.map +1 -0
  85. package/dist/esm/metric/LCS.js +87 -0
  86. package/dist/esm/metric/LCS.js.map +1 -0
  87. package/dist/esm/metric/Levenshtein.js +92 -0
  88. package/dist/esm/metric/Levenshtein.js.map +1 -0
  89. package/dist/esm/metric/Metric.js +442 -0
  90. package/dist/esm/metric/Metric.js.map +1 -0
  91. package/dist/esm/metric/NeedlemanWunsch.js +93 -0
  92. package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
  93. package/dist/esm/metric/SmithWaterman.js +96 -0
  94. package/dist/esm/metric/SmithWaterman.js.map +1 -0
  95. package/dist/esm/metric/qGram.js +89 -0
  96. package/dist/esm/metric/qGram.js.map +1 -0
  97. package/dist/esm/phonetic/Cologne.js +114 -0
  98. package/dist/esm/phonetic/Cologne.js.map +1 -0
  99. package/dist/esm/phonetic/Metaphone.js +174 -0
  100. package/dist/esm/phonetic/Metaphone.js.map +1 -0
  101. package/dist/esm/phonetic/Phonetic.js +409 -0
  102. package/dist/esm/phonetic/Phonetic.js.map +1 -0
  103. package/dist/esm/phonetic/Soundex.js +137 -0
  104. package/dist/esm/phonetic/Soundex.js.map +1 -0
  105. package/dist/esm/utils/DeepMerge.js +139 -0
  106. package/dist/esm/utils/DeepMerge.js.map +1 -0
  107. package/dist/esm/utils/DiffChecker.js +498 -0
  108. package/dist/esm/utils/DiffChecker.js.map +1 -0
  109. package/dist/esm/utils/Filter.js +187 -0
  110. package/dist/esm/utils/Filter.js.map +1 -0
  111. package/dist/esm/utils/HashTable.js +173 -0
  112. package/dist/esm/utils/HashTable.js.map +1 -0
  113. package/dist/esm/utils/Normalizer.js +142 -0
  114. package/dist/esm/utils/Normalizer.js.map +1 -0
  115. package/dist/esm/utils/Pool.js +194 -0
  116. package/dist/esm/utils/Pool.js.map +1 -0
  117. package/dist/esm/utils/Profiler.js +227 -0
  118. package/dist/esm/utils/Profiler.js.map +1 -0
  119. package/dist/esm/utils/Registry.js +142 -0
  120. package/dist/esm/utils/Registry.js.map +1 -0
  121. package/dist/esm/utils/TextAnalyzer.js +356 -0
  122. package/dist/esm/utils/TextAnalyzer.js.map +1 -0
  123. package/dist/types/CmpStr.d.ts +472 -0
  124. package/dist/types/CmpStrAsync.d.ts +233 -0
  125. package/dist/types/index.d.ts +51 -0
  126. package/dist/types/metric/Cosine.d.ts +57 -0
  127. package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
  128. package/dist/types/metric/DiceSorensen.d.ts +57 -0
  129. package/dist/types/metric/Hamming.d.ts +49 -0
  130. package/dist/types/metric/Jaccard.d.ts +48 -0
  131. package/dist/types/metric/JaroWinkler.d.ts +50 -0
  132. package/dist/types/metric/LCS.d.ts +50 -0
  133. package/dist/types/metric/Levenshtein.d.ts +50 -0
  134. package/dist/types/metric/Metric.d.ts +261 -0
  135. package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
  136. package/dist/types/metric/SmithWaterman.d.ts +48 -0
  137. package/dist/types/metric/index.d.ts +41 -0
  138. package/dist/types/metric/qGram.d.ts +56 -0
  139. package/dist/types/phonetic/Cologne.d.ts +46 -0
  140. package/dist/types/phonetic/Metaphone.d.ts +50 -0
  141. package/dist/types/phonetic/Phonetic.d.ts +189 -0
  142. package/dist/types/phonetic/Soundex.d.ts +49 -0
  143. package/dist/types/phonetic/index.d.ts +30 -0
  144. package/dist/types/utils/DeepMerge.d.ts +70 -0
  145. package/dist/types/utils/DiffChecker.d.ts +137 -0
  146. package/dist/types/utils/Filter.d.ts +97 -0
  147. package/dist/types/utils/HashTable.d.ts +86 -0
  148. package/dist/types/utils/Normalizer.d.ts +76 -0
  149. package/dist/types/utils/Pool.d.ts +63 -0
  150. package/dist/types/utils/Profiler.d.ts +129 -0
  151. package/dist/types/utils/Registry.d.ts +57 -0
  152. package/dist/types/utils/TextAnalyzer.d.ts +199 -0
  153. package/dist/types/utils/Types.d.ts +313 -0
  154. package/package.json +62 -49
  155. package/src/CmpStr.d.ts +0 -70
  156. package/src/CmpStr.js +0 -912
  157. package/src/CmpStrAsync.d.ts +0 -19
  158. package/src/CmpStrAsync.js +0 -204
  159. package/src/algorithms/cosine.js +0 -86
  160. package/src/algorithms/damerau.js +0 -78
  161. package/src/algorithms/dice.js +0 -65
  162. package/src/algorithms/hamming.js +0 -44
  163. package/src/algorithms/jaccard.js +0 -34
  164. package/src/algorithms/jaroWinkler.js +0 -106
  165. package/src/algorithms/lcs.js +0 -58
  166. package/src/algorithms/levenshtein.js +0 -70
  167. package/src/algorithms/needlemanWunsch.js +0 -72
  168. package/src/algorithms/qGram.js +0 -63
  169. package/src/algorithms/smithWaterman.js +0 -78
  170. package/src/algorithms/soundex.js +0 -152
  171. package/src/index.d.ts +0 -3
  172. package/src/index.js +0 -47
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Normalizer Utility
3
+ * src/utils/Normalizer.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Text_normalization
6
+ * @see https://en.wikipedia.org/wiki/Unicode_equivalence
7
+ *
8
+ * This module provides a Normalizer class that allows for string normalization based
9
+ * on various flags. It uses a pipeline of normalization functions that can be reused
10
+ * and cached for efficiency. The Normalizer can handle both single strings and arrays
11
+ * of strings, and supports synchronous and asynchronous normalization.
12
+ *
13
+ * Supported flags:
14
+ * 'd' :: Normalize to NFD (Normalization Form Decomposed)
15
+ * 'u' :: Normalize to NFC (Normalization Form Composed)
16
+ * 'x' :: Normalize to NFKC (Normalization Form Compatibility Composed)
17
+ * 'w' :: Collapse whitespace
18
+ * 't' :: Remove leading and trailing whitespace
19
+ * 'r' :: Remove double characters
20
+ * 's' :: Remove punctuation / special characters
21
+ * 'k' :: Remove non-letter characters
22
+ * 'n' :: Remove non-number characters
23
+ * 'i' :: Case insensitive (convert to lowercase)
24
+ *
25
+ * @module Utils/Normalizer
26
+ * @author Paul Köhler (komed3)
27
+ * @license MIT
28
+ */
29
+ import type { NormalizeFlags } from './Types';
30
+ /**
31
+ * The Normalizer class providing methods to normalize strings based on various flags.
32
+ */
33
+ export declare class Normalizer {
34
+ /**
35
+ * A map that holds normalization functions based on the flags.
36
+ * This allows for reusing normalization logic without recomputing it.
37
+ */
38
+ private static pipeline;
39
+ /**
40
+ * A cache to store normalized strings based on the flags and input.
41
+ * This helps avoid recomputing normalization for the same input and flags.
42
+ */
43
+ private static cache;
44
+ /**
45
+ * Returns a normalization function based on the provided flags.
46
+ * The flags are a string of characters that define the normalization steps.
47
+ *
48
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
49
+ * @returns {NormalizerFn} - A function that normalizes a string based on the provided flags
50
+ */
51
+ private static getPipeline;
52
+ /**
53
+ * Normalizes the input string or array of strings based on the provided flags.
54
+ * The flags are a string of characters that define the normalization steps.
55
+ *
56
+ * @param {string|string[]} input - The string or array of strings to normalize
57
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
58
+ * @returns {string|string[]} - The normalized string(s)
59
+ */
60
+ static normalize(input: string | string[], flags: NormalizeFlags): string | string[];
61
+ /**
62
+ * Asynchronously normalizes the input string or array of strings based on the
63
+ * provided flags. This method is useful for handling large inputs or when
64
+ * normalization needs to be done in a non-blocking way.
65
+ *
66
+ * @param {string|string[]} input - The string or array of strings to normalize
67
+ * @param {NormalizeFlags} flags - A string of characters representing the normalization steps
68
+ * @returns {Promise<string|string[]>} - A promise that resolves to the normalized string(s)
69
+ */
70
+ static normalizeAsync(input: string | string[], flags: NormalizeFlags): Promise<string | string[]>;
71
+ /**
72
+ * Clears the normalization pipeline and cache.
73
+ * This is useful for resetting the state of the Normalizer.
74
+ */
75
+ static clear(): void;
76
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Pool Utility
3
+ * src/utils/Pool.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Circular_buffer
6
+ *
7
+ * The Pool class provides a simple and efficient buffer pool for dynamic programming
8
+ * algorithms that require temporary arrays (such as Levenshtein, LCS, etc.).
9
+ * By reusing pre-allocated typed arrays, it reduces memory allocations and garbage
10
+ * collection overhead, especially for repeated or batch computations.
11
+ *
12
+ * It supports different types of buffers (Uint16Array, number[], Set, Map) and allows
13
+ * for acquiring buffers of specific sizes while managing a maximum pool size.
14
+ *
15
+ * @module Utils/Pool
16
+ * @author Paul Köhler (komed3)
17
+ * @license MIT
18
+ */
19
+ import type { PoolType } from './Types';
20
+ /**
21
+ * The Pool class provides a buffer pool for dynamic programming algorithms.
22
+ *
23
+ * It allows for efficient reuse of buffers (Uint16Array, number[], Set, Map)
24
+ * to reduce memory allocations and garbage collection overhead.
25
+ */
26
+ export declare class Pool {
27
+ private static readonly CONFIG;
28
+ private static readonly POOLS;
29
+ /**
30
+ * Allocates a new buffer of the specified type and size.
31
+ *
32
+ * @param {PoolType} type - The type of buffer to allocate
33
+ * @param {number} size - The size of the buffer to allocate
34
+ * @return {any} - The newly allocated buffer
35
+ */
36
+ private static allocate;
37
+ /**
38
+ * Acquires a buffer of the specified type and size from the pool.
39
+ * If no suitable buffer is available, it allocates a new one.
40
+ *
41
+ * @param {PoolType} type - The type of buffer to acquire (e.g., 'uint16', 'number[]', 'set', 'map')
42
+ * @param {number} size - The size of the buffer to acquire
43
+ * @return {T} - The acquired buffer of the specified type
44
+ */
45
+ static acquire<T = any>(type: PoolType, size: number): T;
46
+ /**
47
+ * Acquires multiple buffers of the specified type and sizes from the pool.
48
+ *
49
+ * @param {PoolType} type - The type of buffers to acquire
50
+ * @param {number[]} sizes - An array of sizes for each buffer to acquire
51
+ * @return {T[]} - An array of acquired buffers of the specified type
52
+ */
53
+ static acquireMany<T = any>(type: PoolType, sizes: number[]): T[];
54
+ /**
55
+ * Releases a buffer back to the pool.
56
+ * If the size of the buffer is larger than the maximum item size, it will not be released.
57
+ *
58
+ * @param {PoolType} type - The type of buffer to release
59
+ * @param {T} buffer - The buffer to release
60
+ * @param {number} size - The size of the buffer
61
+ */
62
+ static release<T = any>(type: PoolType, buffer: T, size: number): void;
63
+ }
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Profiler Utility
3
+ * src/utils/profiler.ts
4
+ *
5
+ * @see https://en.wikipedia.org/wiki/Profiling_(computer_programming)
6
+ *
7
+ * This class provides methods to run synchronous and asynchronous functions, capturing
8
+ * their execution time and memory usage, and storing the results in a set of profiler
9
+ * entries. It supports both Node.js and browser environments, detecting the environment
10
+ * automatically.
11
+ *
12
+ * The class is optimized for minimal overhead and can be used for fine-grained
13
+ * performance profiling.
14
+ *
15
+ * @module Utils/Profiler
16
+ * @author Paul Köhler (komed3)
17
+ * @license MIT
18
+ */
19
+ import type { ProfilerEntry, ProfilerService } from './Types';
20
+ /**
21
+ * Profiler class for measuring execution time and memory usage of functions.
22
+ */
23
+ export declare class Profiler {
24
+ private static ENV;
25
+ private static instance;
26
+ private store;
27
+ private totalTime;
28
+ private totalMem;
29
+ private active;
30
+ /**
31
+ * Sets the environment based on the available global objects.
32
+ * Detects if running in Node.js or browser and sets the ENV property accordingly.
33
+ */
34
+ protected static detectEnv(): void;
35
+ /**
36
+ * Returns the singleton instance of the Perf class.
37
+ * If the instance does not exist, it creates a new one.
38
+ *
39
+ * @param {boolean} [enable=false] - Optional parameter to enable the profiler upon instantiation
40
+ * @returns {Profiler} - Singleton Profiler instance
41
+ */
42
+ static getInstance(enable?: boolean): Profiler;
43
+ /**
44
+ * Private constructor to enforce singleton pattern.
45
+ * Initializes the store for profiler entries.
46
+ *
47
+ * @param {boolean} [enable=false] - Optional parameter to enable the profiler
48
+ */
49
+ private constructor();
50
+ /**
51
+ * Gets the current time based on the environment.
52
+ *
53
+ * Uses process.hrtime.bigint() for Node.js, performance.now() for browsers,
54
+ * and Date.now() as a fallback.
55
+ *
56
+ * @returns {number} - Current time in milliseconds
57
+ */
58
+ private now;
59
+ /**
60
+ * Gets the current memory usage based on the environment.
61
+ *
62
+ * Uses process.memoryUsage().heapUsed for Node.js, performance.memory.usedJSHeapSize
63
+ * for browsers, and returns 0 as a fallback.
64
+ *
65
+ * @returns {number} - Current memory usage in bytes
66
+ */
67
+ private mem;
68
+ /**
69
+ * Enables the profiler.
70
+ * Sets the active state to true, allowing profiling to occur.
71
+ */
72
+ enable(): void;
73
+ /**
74
+ * Disables the profiler.
75
+ * Sets the active state to false, preventing further profiling.
76
+ */
77
+ disable(): void;
78
+ /**
79
+ * Resets the profiler by clearing the store, total time and memory consumption.
80
+ * This method is useful for starting a new profiling session.
81
+ */
82
+ clear(): void;
83
+ /**
84
+ * Runs a synchronous function and profiles its execution time and memory usage.
85
+ * If the profiler is not active, it simply executes the function without profiling.
86
+ *
87
+ * @param {() => T} fn - Function to be executed and profiled
88
+ * @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
89
+ * @returns {T} - The result of the executed function
90
+ */
91
+ run<T>(fn: () => T, meta?: Record<string, any>): T;
92
+ /**
93
+ * Runs an asynchronous function and profiles its execution time and memory usage.
94
+ * If the profiler is not active, it simply executes the function without profiling.
95
+ *
96
+ * @param {() => Promise<T>} fn - Asynchronous function to be executed and profiled
97
+ * @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
98
+ * @returns {Promise<T>} - A promise that resolves to the result of the executed function
99
+ */
100
+ runAsync<T>(fn: () => Promise<T>, meta?: Record<string, any>): Promise<T>;
101
+ /**
102
+ * Retrieves all profiler entries stored in the profiler.
103
+ *
104
+ * @returns {ProfilerEntry<any>[]} - An array of profiler entries
105
+ */
106
+ getAll(): ProfilerEntry<any>[];
107
+ /**
108
+ * Retrieves the last profiler entry stored in the profiler.
109
+ *
110
+ * @returns {ProfilerEntry<any> | undefined} - The last profiler entry or undefined if no entries exist
111
+ */
112
+ getLast(): ProfilerEntry<any> | undefined;
113
+ /**
114
+ * Retrieves the total time and memory consumption recorded by the profiler.
115
+ *
116
+ * @returns {{ time: number, mem: number }} - An object containing total time and memory usage
117
+ */
118
+ getTotal(): {
119
+ time: number;
120
+ mem: number;
121
+ };
122
+ /**
123
+ * Returns the services provided by the Profiler class.
124
+ * This allows for easy access to the profiler's methods.
125
+ *
126
+ * @returns {ProfilerService<any>} - An object containing methods to control the profiler
127
+ */
128
+ services: ProfilerService<any>;
129
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Registry Utility
3
+ * src/utils/Registry.ts
4
+ *
5
+ * This module provides a Registry function that allows for registering,
6
+ * removing, checking, getting, and listing class constructors.
7
+ *
8
+ * It is designed to manage class extensions, ensuring that all registered
9
+ * classes extend a specified base constructor.
10
+ *
11
+ * @module Utils/Registry
12
+ * @author Paul Köhler (komed3)
13
+ * @license MIT
14
+ */
15
+ import type { RegistryService, RegistryConstructor } from './Types';
16
+ /**
17
+ * Global registry object to hold multiple registries.
18
+ * Each registry is keyed by a string identifier.
19
+ *
20
+ * @type {Record<string, RegistryService<any>>}
21
+ */
22
+ export declare const registry: Record<string, RegistryService<any>>;
23
+ /**
24
+ * Factory object to hold factory functions for creating instances.
25
+ * This is used to create instances of registered classes.
26
+ *
27
+ * @type {Record<string, ( cls: string, ...args: any[] ) => InstanceType<any>>}
28
+ */
29
+ export declare const factory: Record<string, (cls: string, ...args: any[]) => InstanceType<any>>;
30
+ /**
31
+ * Registry function to create a service for managing class constructors.
32
+ *
33
+ * @param {string} reg - The name of the registry
34
+ * @param {RegistryConstructor<T>} ctor - The base constructor that all registered classes must extend
35
+ * @returns {RegistryService<T>} - An object with methods to register, remove, check, get, and list classes
36
+ * @throws {Error} If the registry already exists (overwriting is forbidden)
37
+ */
38
+ export declare function Registry<T>(reg: string, ctor: RegistryConstructor<T>): RegistryService<T>;
39
+ /**
40
+ * Resolve a class constructor from a specific registry.
41
+ *
42
+ * @param {string} reg - The name of the registry
43
+ * @param {T|string} cls - The class itself or name of the class to resolve
44
+ * @returns {T|undefined} - The class constructor if found, otherwise undefined
45
+ * @throws {ReferenceError} If the registry does not exist
46
+ */
47
+ export declare function resolveCls<T extends RegistryConstructor<any>>(reg: string, cls: T | string): T;
48
+ /**
49
+ * Create an instance of a class from a specific registry.
50
+ *
51
+ * @param {string} reg - The name of the registry
52
+ * @param {T|string} cls - The class itself or name of the class to instantiate
53
+ * @param {...any} args - Arguments to pass to the class constructor
54
+ * @returns {T} - An instance of the class
55
+ * @throws {Error} If the class cannot be instantiated
56
+ */
57
+ export declare function createFromRegistry<T extends RegistryConstructor<any>>(reg: string, cls: T | string, ...args: any[]): InstanceType<T>;
@@ -0,0 +1,199 @@
1
+ /**
2
+ * TextAnalyzer Utility
3
+ * src/utils/TextAnalyzer.ts
4
+ *
5
+ * The TextAnalyzer class provides a comprehensive set of methods for analyzing and
6
+ * extracting statistics from a given text. It supports word and sentence tokenization,
7
+ * character and word frequency analysis, syllable estimation, readability metrics
8
+ * (Flesch, Kincaid, LIX, WSTF), and various ratios and histograms. Designed for
9
+ * efficiency and flexibility, it is suitable for linguistic research, readability
10
+ * scoring, and text preprocessing tasks.
11
+ *
12
+ * @module Utils/TextAnalyzer
13
+ * @author Paul Köhler (komed3)
14
+ * @license MIT
15
+ */
16
+ export declare class TextAnalyzer {
17
+ private readonly text;
18
+ private words;
19
+ private sentences;
20
+ private charFrequency;
21
+ private wordHistogram;
22
+ private syllableCache;
23
+ /**
24
+ * Constructs a new TextAnalyzer instance with the provided input text.
25
+ *
26
+ * @param {string} input - The text to analyze
27
+ */
28
+ constructor(input: string);
29
+ /**
30
+ * Tokenizes the input text into words and sentences.
31
+ */
32
+ private tokenize;
33
+ /**
34
+ * Computes character and word frequencies from the tokenized text.
35
+ */
36
+ private computeFrequencies;
37
+ /**
38
+ * Estimates the number of syllables in a word using a simple heuristic.
39
+ *
40
+ * @param {string} word - The word to estimate syllables for
41
+ * @returns {number} - Estimated syllable count
42
+ */
43
+ private estimateSyllables;
44
+ /**
45
+ * Gets the original text length in characters.
46
+ *
47
+ * @return {number} - Length of the text
48
+ */
49
+ getLength(): number;
50
+ /**
51
+ * Gets the number of words in the text.
52
+ *
53
+ * @return {number} - Count of words
54
+ */
55
+ getWordCount(): number;
56
+ /**
57
+ * Gets the number of sentences in the text.
58
+ *
59
+ * @return {number} - Count of sentences
60
+ */
61
+ getSentenceCount(): number;
62
+ /**
63
+ * Gets the average word length in the text.
64
+ *
65
+ * @return {number} - Average length of words
66
+ */
67
+ getAvgWordLength(): number;
68
+ /**
69
+ * Gets the average sentence length in words.
70
+ *
71
+ * @return {number} - Average length of sentences
72
+ */
73
+ getAvgSentenceLength(): number;
74
+ /**
75
+ * Gets a histogram of word frequencies in the text.
76
+ *
77
+ * @returns {Record<string, number>} - A histogram of word frequencies
78
+ */
79
+ getWordHistogram(): Record<string, number>;
80
+ /**
81
+ * Gets the most common words in the text, limited to a specified number.
82
+ *
83
+ * @param {number} [limit=5] - Maximum number of common words to return
84
+ * @returns {string[]} - Array of the most common words
85
+ */
86
+ getMostCommonWords(limit?: number): string[];
87
+ /**
88
+ * Gets the least common words (hapax legomena) in the text.
89
+ *
90
+ * Hapax legomena are words that occur only once in the text.
91
+ *
92
+ * @returns {string[]} - Array of hapax legomena
93
+ */
94
+ getHapaxLegomena(): string[];
95
+ /**
96
+ * Checks if the text contains any numbers.
97
+ *
98
+ * @returns {boolean} - True if numbers are present, false otherwise
99
+ */
100
+ hasNumbers(): boolean;
101
+ /**
102
+ * Calculates the ratio of uppercase letters to total letters in the text.
103
+ *
104
+ * @return {number} - Ratio of uppercase letters to total letters
105
+ */
106
+ getUpperCaseRatio(): number;
107
+ /**
108
+ * Gets the frequency of each character in the text.
109
+ *
110
+ * @returns {Record<string, number>} - A record of character frequencies
111
+ */
112
+ getCharFrequency(): Record<string, number>;
113
+ /**
114
+ * Gets the frequency of each Unicode block in the text.
115
+ *
116
+ * @returns {Record<string, number>} - A record of Unicode block frequencies
117
+ */
118
+ getUnicodeStats(): Record<string, number>;
119
+ /**
120
+ * Gets the ratio of long words (words with length >= len) to total words.
121
+ *
122
+ * @param {number} [len=7] - Minimum length for a word to be considered long
123
+ * @returns {number} - Ratio of long words to total words
124
+ */
125
+ getLongWordRatio(len?: number): number;
126
+ /**
127
+ * Gets the ratio of short words (words with length <= len) to total words.
128
+ *
129
+ * @param {number} [len=3] - Maximum length for a word to be considered short
130
+ * @returns {number} - Ratio of short words to total words
131
+ */
132
+ getShortWordRatio(len?: number): number;
133
+ /**
134
+ * Estimates the number of syllables in the text.
135
+ *
136
+ * @returns {number} - Total estimated syllable count
137
+ */
138
+ getSyllablesCount(): number;
139
+ /**
140
+ * Gets the number of monosyllabic words (words with exactly one syllable).
141
+ *
142
+ * @returns {number} - Count of monosyllabic words
143
+ */
144
+ getMonosyllabicWordCount(): number;
145
+ /**
146
+ * Gets the number of words with at least a specified minimum syllable count.
147
+ *
148
+ * @param {number} min - Minimum syllable count for a word to be included
149
+ * @returns {number} - Count of words meeting the syllable criteria
150
+ */
151
+ getMinSyllablesWordCount(min: number): number;
152
+ /**
153
+ * Gets the number of words with at most a specified maximum syllable count.
154
+ *
155
+ * @param {number} max - Maximum syllable count for a word to be included
156
+ * @returns {number} - Count of words meeting the syllable criteria
157
+ */
158
+ getMaxSyllablesWordCount(max: number): number;
159
+ /**
160
+ * Calculates the Honore's R statistic for the text as a measure of lexical richness.
161
+ *
162
+ * @returns {number} - The Honore's R statistic
163
+ */
164
+ getHonoresR(): number;
165
+ /**
166
+ * Estimates the reading time for the text based on words per minute (WPM).
167
+ *
168
+ * @param {number} [wpm=200] - Words per minute for the calculation
169
+ * @returns {number} - Estimated reading time in minutes
170
+ */
171
+ getReadingTime(wpm?: number): number;
172
+ /**
173
+ * Calculates various readability scores based on the text.
174
+ *
175
+ * This method supports multiple readability metrics:
176
+ * - Flesch Reading Ease
177
+ * - Flesch-Kincaid Grade Level
178
+ *
179
+ * @param {'flesch'|'fleschde'|'kincaid'} [metric='flesch'] - The readability metric to calculate
180
+ * @returns {number} - The calculated readability score
181
+ */
182
+ getReadabilityScore(metric?: 'flesch' | 'fleschde' | 'kincaid'): number;
183
+ /**
184
+ * Calculates the LIX (Lesbarhetsindex) score for the text.
185
+ *
186
+ * The LIX score is a readability index that combines average word length and sentence length.
187
+ *
188
+ * @returns {number} - The LIX score
189
+ */
190
+ getLIXScore(): number;
191
+ /**
192
+ * Calculates the Wiener Sachtextformel (WSTF) scores for the text.
193
+ *
194
+ * The WSTF scores are a set of readability metrics based on word and sentence characteristics.
195
+ *
196
+ * @returns {[number, number, number, number]} - An array of WSTF scores
197
+ */
198
+ getWSTFScore(): [number, number, number, number];
199
+ }