cmpstr 2.0.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +75 -499
- package/dist/CmpStr.esm.js +4863 -0
- package/dist/CmpStr.esm.js.map +1 -0
- package/dist/CmpStr.esm.min.js +8 -0
- package/dist/CmpStr.esm.min.js.map +1 -0
- package/dist/CmpStr.umd.js +4875 -0
- package/dist/CmpStr.umd.js.map +1 -0
- package/dist/CmpStr.umd.min.js +8 -0
- package/dist/CmpStr.umd.min.js.map +1 -0
- package/dist/cjs/CmpStr.js +663 -0
- package/dist/cjs/CmpStr.js.map +1 -0
- package/dist/cjs/CmpStrAsync.js +336 -0
- package/dist/cjs/CmpStrAsync.js.map +1 -0
- package/dist/cjs/index.js +15 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/metric/Cosine.js +101 -0
- package/dist/cjs/metric/Cosine.js.map +1 -0
- package/dist/cjs/metric/DamerauLevenshtein.js +110 -0
- package/dist/cjs/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/cjs/metric/DiceSorensen.js +91 -0
- package/dist/cjs/metric/DiceSorensen.js.map +1 -0
- package/dist/cjs/metric/Hamming.js +82 -0
- package/dist/cjs/metric/Hamming.js.map +1 -0
- package/dist/cjs/metric/Jaccard.js +76 -0
- package/dist/cjs/metric/Jaccard.js.map +1 -0
- package/dist/cjs/metric/JaroWinkler.js +114 -0
- package/dist/cjs/metric/JaroWinkler.js.map +1 -0
- package/dist/cjs/metric/LCS.js +89 -0
- package/dist/cjs/metric/LCS.js.map +1 -0
- package/dist/cjs/metric/Levenshtein.js +94 -0
- package/dist/cjs/metric/Levenshtein.js.map +1 -0
- package/dist/cjs/metric/Metric.js +445 -0
- package/dist/cjs/metric/Metric.js.map +1 -0
- package/dist/cjs/metric/NeedlemanWunsch.js +95 -0
- package/dist/cjs/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/cjs/metric/SmithWaterman.js +98 -0
- package/dist/cjs/metric/SmithWaterman.js.map +1 -0
- package/dist/cjs/metric/qGram.js +91 -0
- package/dist/cjs/metric/qGram.js.map +1 -0
- package/dist/cjs/phonetic/Cologne.js +112 -0
- package/dist/cjs/phonetic/Cologne.js.map +1 -0
- package/dist/cjs/phonetic/Metaphone.js +172 -0
- package/dist/cjs/phonetic/Metaphone.js.map +1 -0
- package/dist/cjs/phonetic/Phonetic.js +413 -0
- package/dist/cjs/phonetic/Phonetic.js.map +1 -0
- package/dist/cjs/phonetic/Soundex.js +135 -0
- package/dist/cjs/phonetic/Soundex.js.map +1 -0
- package/dist/cjs/utils/DeepMerge.js +144 -0
- package/dist/cjs/utils/DeepMerge.js.map +1 -0
- package/dist/cjs/utils/DiffChecker.js +500 -0
- package/dist/cjs/utils/DiffChecker.js.map +1 -0
- package/dist/cjs/utils/Filter.js +189 -0
- package/dist/cjs/utils/Filter.js.map +1 -0
- package/dist/cjs/utils/HashTable.js +175 -0
- package/dist/cjs/utils/HashTable.js.map +1 -0
- package/dist/cjs/utils/Normalizer.js +144 -0
- package/dist/cjs/utils/Normalizer.js.map +1 -0
- package/dist/cjs/utils/Pool.js +196 -0
- package/dist/cjs/utils/Pool.js.map +1 -0
- package/dist/cjs/utils/Profiler.js +229 -0
- package/dist/cjs/utils/Profiler.js.map +1 -0
- package/dist/cjs/utils/Registry.js +148 -0
- package/dist/cjs/utils/Registry.js.map +1 -0
- package/dist/cjs/utils/TextAnalyzer.js +358 -0
- package/dist/cjs/utils/TextAnalyzer.js.map +1 -0
- package/dist/esm/CmpStr.js +662 -0
- package/dist/esm/CmpStr.js.map +1 -0
- package/dist/esm/CmpStrAsync.js +331 -0
- package/dist/esm/CmpStrAsync.js.map +1 -0
- package/dist/esm/index.js +7 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/metric/Cosine.js +99 -0
- package/dist/esm/metric/Cosine.js.map +1 -0
- package/dist/esm/metric/DamerauLevenshtein.js +108 -0
- package/dist/esm/metric/DamerauLevenshtein.js.map +1 -0
- package/dist/esm/metric/DiceSorensen.js +89 -0
- package/dist/esm/metric/DiceSorensen.js.map +1 -0
- package/dist/esm/metric/Hamming.js +77 -0
- package/dist/esm/metric/Hamming.js.map +1 -0
- package/dist/esm/metric/Jaccard.js +74 -0
- package/dist/esm/metric/Jaccard.js.map +1 -0
- package/dist/esm/metric/JaroWinkler.js +112 -0
- package/dist/esm/metric/JaroWinkler.js.map +1 -0
- package/dist/esm/metric/LCS.js +87 -0
- package/dist/esm/metric/LCS.js.map +1 -0
- package/dist/esm/metric/Levenshtein.js +92 -0
- package/dist/esm/metric/Levenshtein.js.map +1 -0
- package/dist/esm/metric/Metric.js +442 -0
- package/dist/esm/metric/Metric.js.map +1 -0
- package/dist/esm/metric/NeedlemanWunsch.js +93 -0
- package/dist/esm/metric/NeedlemanWunsch.js.map +1 -0
- package/dist/esm/metric/SmithWaterman.js +96 -0
- package/dist/esm/metric/SmithWaterman.js.map +1 -0
- package/dist/esm/metric/qGram.js +89 -0
- package/dist/esm/metric/qGram.js.map +1 -0
- package/dist/esm/phonetic/Cologne.js +114 -0
- package/dist/esm/phonetic/Cologne.js.map +1 -0
- package/dist/esm/phonetic/Metaphone.js +174 -0
- package/dist/esm/phonetic/Metaphone.js.map +1 -0
- package/dist/esm/phonetic/Phonetic.js +409 -0
- package/dist/esm/phonetic/Phonetic.js.map +1 -0
- package/dist/esm/phonetic/Soundex.js +137 -0
- package/dist/esm/phonetic/Soundex.js.map +1 -0
- package/dist/esm/utils/DeepMerge.js +139 -0
- package/dist/esm/utils/DeepMerge.js.map +1 -0
- package/dist/esm/utils/DiffChecker.js +498 -0
- package/dist/esm/utils/DiffChecker.js.map +1 -0
- package/dist/esm/utils/Filter.js +187 -0
- package/dist/esm/utils/Filter.js.map +1 -0
- package/dist/esm/utils/HashTable.js +173 -0
- package/dist/esm/utils/HashTable.js.map +1 -0
- package/dist/esm/utils/Normalizer.js +142 -0
- package/dist/esm/utils/Normalizer.js.map +1 -0
- package/dist/esm/utils/Pool.js +194 -0
- package/dist/esm/utils/Pool.js.map +1 -0
- package/dist/esm/utils/Profiler.js +227 -0
- package/dist/esm/utils/Profiler.js.map +1 -0
- package/dist/esm/utils/Registry.js +142 -0
- package/dist/esm/utils/Registry.js.map +1 -0
- package/dist/esm/utils/TextAnalyzer.js +356 -0
- package/dist/esm/utils/TextAnalyzer.js.map +1 -0
- package/dist/types/CmpStr.d.ts +472 -0
- package/dist/types/CmpStrAsync.d.ts +233 -0
- package/dist/types/index.d.ts +51 -0
- package/dist/types/metric/Cosine.d.ts +57 -0
- package/dist/types/metric/DamerauLevenshtein.d.ts +50 -0
- package/dist/types/metric/DiceSorensen.d.ts +57 -0
- package/dist/types/metric/Hamming.d.ts +49 -0
- package/dist/types/metric/Jaccard.d.ts +48 -0
- package/dist/types/metric/JaroWinkler.d.ts +50 -0
- package/dist/types/metric/LCS.d.ts +50 -0
- package/dist/types/metric/Levenshtein.d.ts +50 -0
- package/dist/types/metric/Metric.d.ts +261 -0
- package/dist/types/metric/NeedlemanWunsch.d.ts +47 -0
- package/dist/types/metric/SmithWaterman.d.ts +48 -0
- package/dist/types/metric/index.d.ts +41 -0
- package/dist/types/metric/qGram.d.ts +56 -0
- package/dist/types/phonetic/Cologne.d.ts +46 -0
- package/dist/types/phonetic/Metaphone.d.ts +50 -0
- package/dist/types/phonetic/Phonetic.d.ts +189 -0
- package/dist/types/phonetic/Soundex.d.ts +49 -0
- package/dist/types/phonetic/index.d.ts +30 -0
- package/dist/types/utils/DeepMerge.d.ts +70 -0
- package/dist/types/utils/DiffChecker.d.ts +137 -0
- package/dist/types/utils/Filter.d.ts +97 -0
- package/dist/types/utils/HashTable.d.ts +86 -0
- package/dist/types/utils/Normalizer.d.ts +76 -0
- package/dist/types/utils/Pool.d.ts +63 -0
- package/dist/types/utils/Profiler.d.ts +129 -0
- package/dist/types/utils/Registry.d.ts +57 -0
- package/dist/types/utils/TextAnalyzer.d.ts +199 -0
- package/dist/types/utils/Types.d.ts +313 -0
- package/package.json +62 -49
- package/src/CmpStr.d.ts +0 -70
- package/src/CmpStr.js +0 -912
- package/src/CmpStrAsync.d.ts +0 -19
- package/src/CmpStrAsync.js +0 -204
- package/src/algorithms/cosine.js +0 -86
- package/src/algorithms/damerau.js +0 -78
- package/src/algorithms/dice.js +0 -65
- package/src/algorithms/hamming.js +0 -44
- package/src/algorithms/jaccard.js +0 -34
- package/src/algorithms/jaroWinkler.js +0 -106
- package/src/algorithms/lcs.js +0 -58
- package/src/algorithms/levenshtein.js +0 -70
- package/src/algorithms/needlemanWunsch.js +0 -72
- package/src/algorithms/qGram.js +0 -63
- package/src/algorithms/smithWaterman.js +0 -78
- package/src/algorithms/soundex.js +0 -152
- package/src/index.d.ts +0 -3
- package/src/index.js +0 -47
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalizer Utility
|
|
3
|
+
* src/utils/Normalizer.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Text_normalization
|
|
6
|
+
* @see https://en.wikipedia.org/wiki/Unicode_equivalence
|
|
7
|
+
*
|
|
8
|
+
* This module provides a Normalizer class that allows for string normalization based
|
|
9
|
+
* on various flags. It uses a pipeline of normalization functions that can be reused
|
|
10
|
+
* and cached for efficiency. The Normalizer can handle both single strings and arrays
|
|
11
|
+
* of strings, and supports synchronous and asynchronous normalization.
|
|
12
|
+
*
|
|
13
|
+
* Supported flags:
|
|
14
|
+
* 'd' :: Normalize to NFD (Normalization Form Decomposed)
|
|
15
|
+
* 'u' :: Normalize to NFC (Normalization Form Composed)
|
|
16
|
+
* 'x' :: Normalize to NFKC (Normalization Form Compatibility Composed)
|
|
17
|
+
* 'w' :: Collapse whitespace
|
|
18
|
+
* 't' :: Remove leading and trailing whitespace
|
|
19
|
+
* 'r' :: Remove double characters
|
|
20
|
+
* 's' :: Remove punctuation / special characters
|
|
21
|
+
* 'k' :: Remove non-letter characters
|
|
22
|
+
* 'n' :: Remove non-number characters
|
|
23
|
+
* 'i' :: Case insensitive (convert to lowercase)
|
|
24
|
+
*
|
|
25
|
+
* @module Utils/Normalizer
|
|
26
|
+
* @author Paul Köhler (komed3)
|
|
27
|
+
* @license MIT
|
|
28
|
+
*/
|
|
29
|
+
import type { NormalizeFlags } from './Types';
|
|
30
|
+
/**
|
|
31
|
+
* The Normalizer class providing methods to normalize strings based on various flags.
|
|
32
|
+
*/
|
|
33
|
+
export declare class Normalizer {
|
|
34
|
+
/**
|
|
35
|
+
* A map that holds normalization functions based on the flags.
|
|
36
|
+
* This allows for reusing normalization logic without recomputing it.
|
|
37
|
+
*/
|
|
38
|
+
private static pipeline;
|
|
39
|
+
/**
|
|
40
|
+
* A cache to store normalized strings based on the flags and input.
|
|
41
|
+
* This helps avoid recomputing normalization for the same input and flags.
|
|
42
|
+
*/
|
|
43
|
+
private static cache;
|
|
44
|
+
/**
|
|
45
|
+
* Returns a normalization function based on the provided flags.
|
|
46
|
+
* The flags are a string of characters that define the normalization steps.
|
|
47
|
+
*
|
|
48
|
+
* @param {NormalizeFlags} flags - A string of characters representing the normalization steps
|
|
49
|
+
* @returns {NormalizerFn} - A function that normalizes a string based on the provided flags
|
|
50
|
+
*/
|
|
51
|
+
private static getPipeline;
|
|
52
|
+
/**
|
|
53
|
+
* Normalizes the input string or array of strings based on the provided flags.
|
|
54
|
+
* The flags are a string of characters that define the normalization steps.
|
|
55
|
+
*
|
|
56
|
+
* @param {string|string[]} input - The string or array of strings to normalize
|
|
57
|
+
* @param {NormalizeFlags} flags - A string of characters representing the normalization steps
|
|
58
|
+
* @returns {string|string[]} - The normalized string(s)
|
|
59
|
+
*/
|
|
60
|
+
static normalize(input: string | string[], flags: NormalizeFlags): string | string[];
|
|
61
|
+
/**
|
|
62
|
+
* Asynchronously normalizes the input string or array of strings based on the
|
|
63
|
+
* provided flags. This method is useful for handling large inputs or when
|
|
64
|
+
* normalization needs to be done in a non-blocking way.
|
|
65
|
+
*
|
|
66
|
+
* @param {string|string[]} input - The string or array of strings to normalize
|
|
67
|
+
* @param {NormalizeFlags} flags - A string of characters representing the normalization steps
|
|
68
|
+
* @returns {Promise<string|string[]>} - A promise that resolves to the normalized string(s)
|
|
69
|
+
*/
|
|
70
|
+
static normalizeAsync(input: string | string[], flags: NormalizeFlags): Promise<string | string[]>;
|
|
71
|
+
/**
|
|
72
|
+
* Clears the normalization pipeline and cache.
|
|
73
|
+
* This is useful for resetting the state of the Normalizer.
|
|
74
|
+
*/
|
|
75
|
+
static clear(): void;
|
|
76
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pool Utility
|
|
3
|
+
* src/utils/Pool.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Circular_buffer
|
|
6
|
+
*
|
|
7
|
+
* The Pool class provides a simple and efficient buffer pool for dynamic programming
|
|
8
|
+
* algorithms that require temporary arrays (such as Levenshtein, LCS, etc.).
|
|
9
|
+
* By reusing pre-allocated typed arrays, it reduces memory allocations and garbage
|
|
10
|
+
* collection overhead, especially for repeated or batch computations.
|
|
11
|
+
*
|
|
12
|
+
* It supports different types of buffers (Uint16Array, number[], Set, Map) and allows
|
|
13
|
+
* for acquiring buffers of specific sizes while managing a maximum pool size.
|
|
14
|
+
*
|
|
15
|
+
* @module Utils/Pool
|
|
16
|
+
* @author Paul Köhler (komed3)
|
|
17
|
+
* @license MIT
|
|
18
|
+
*/
|
|
19
|
+
import type { PoolType } from './Types';
|
|
20
|
+
/**
|
|
21
|
+
* The Pool class provides a buffer pool for dynamic programming algorithms.
|
|
22
|
+
*
|
|
23
|
+
* It allows for efficient reuse of buffers (Uint16Array, number[], Set, Map)
|
|
24
|
+
* to reduce memory allocations and garbage collection overhead.
|
|
25
|
+
*/
|
|
26
|
+
export declare class Pool {
|
|
27
|
+
private static readonly CONFIG;
|
|
28
|
+
private static readonly POOLS;
|
|
29
|
+
/**
|
|
30
|
+
* Allocates a new buffer of the specified type and size.
|
|
31
|
+
*
|
|
32
|
+
* @param {PoolType} type - The type of buffer to allocate
|
|
33
|
+
* @param {number} size - The size of the buffer to allocate
|
|
34
|
+
* @return {any} - The newly allocated buffer
|
|
35
|
+
*/
|
|
36
|
+
private static allocate;
|
|
37
|
+
/**
|
|
38
|
+
* Acquires a buffer of the specified type and size from the pool.
|
|
39
|
+
* If no suitable buffer is available, it allocates a new one.
|
|
40
|
+
*
|
|
41
|
+
* @param {PoolType} type - The type of buffer to acquire (e.g., 'uint16', 'number[]', 'set', 'map')
|
|
42
|
+
* @param {number} size - The size of the buffer to acquire
|
|
43
|
+
* @return {T} - The acquired buffer of the specified type
|
|
44
|
+
*/
|
|
45
|
+
static acquire<T = any>(type: PoolType, size: number): T;
|
|
46
|
+
/**
|
|
47
|
+
* Acquires multiple buffers of the specified type and sizes from the pool.
|
|
48
|
+
*
|
|
49
|
+
* @param {PoolType} type - The type of buffers to acquire
|
|
50
|
+
* @param {number[]} sizes - An array of sizes for each buffer to acquire
|
|
51
|
+
* @return {T[]} - An array of acquired buffers of the specified type
|
|
52
|
+
*/
|
|
53
|
+
static acquireMany<T = any>(type: PoolType, sizes: number[]): T[];
|
|
54
|
+
/**
|
|
55
|
+
* Releases a buffer back to the pool.
|
|
56
|
+
* If the size of the buffer is larger than the maximum item size, it will not be released.
|
|
57
|
+
*
|
|
58
|
+
* @param {PoolType} type - The type of buffer to release
|
|
59
|
+
* @param {T} buffer - The buffer to release
|
|
60
|
+
* @param {number} size - The size of the buffer
|
|
61
|
+
*/
|
|
62
|
+
static release<T = any>(type: PoolType, buffer: T, size: number): void;
|
|
63
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Profiler Utility
|
|
3
|
+
* src/utils/profiler.ts
|
|
4
|
+
*
|
|
5
|
+
* @see https://en.wikipedia.org/wiki/Profiling_(computer_programming)
|
|
6
|
+
*
|
|
7
|
+
* This class provides methods to run synchronous and asynchronous functions, capturing
|
|
8
|
+
* their execution time and memory usage, and storing the results in a set of profiler
|
|
9
|
+
* entries. It supports both Node.js and browser environments, detecting the environment
|
|
10
|
+
* automatically.
|
|
11
|
+
*
|
|
12
|
+
* The class is optimized for minimal overhead and can be used for fine-grained
|
|
13
|
+
* performance profiling.
|
|
14
|
+
*
|
|
15
|
+
* @module Utils/Profiler
|
|
16
|
+
* @author Paul Köhler (komed3)
|
|
17
|
+
* @license MIT
|
|
18
|
+
*/
|
|
19
|
+
import type { ProfilerEntry, ProfilerService } from './Types';
|
|
20
|
+
/**
|
|
21
|
+
* Profiler class for measuring execution time and memory usage of functions.
|
|
22
|
+
*/
|
|
23
|
+
export declare class Profiler {
|
|
24
|
+
private static ENV;
|
|
25
|
+
private static instance;
|
|
26
|
+
private store;
|
|
27
|
+
private totalTime;
|
|
28
|
+
private totalMem;
|
|
29
|
+
private active;
|
|
30
|
+
/**
|
|
31
|
+
* Sets the environment based on the available global objects.
|
|
32
|
+
* Detects if running in Node.js or browser and sets the ENV property accordingly.
|
|
33
|
+
*/
|
|
34
|
+
protected static detectEnv(): void;
|
|
35
|
+
/**
|
|
36
|
+
* Returns the singleton instance of the Perf class.
|
|
37
|
+
* If the instance does not exist, it creates a new one.
|
|
38
|
+
*
|
|
39
|
+
* @param {boolean} [enable=false] - Optional parameter to enable the profiler upon instantiation
|
|
40
|
+
* @returns {Profiler} - Singleton Profiler instance
|
|
41
|
+
*/
|
|
42
|
+
static getInstance(enable?: boolean): Profiler;
|
|
43
|
+
/**
|
|
44
|
+
* Private constructor to enforce singleton pattern.
|
|
45
|
+
* Initializes the store for profiler entries.
|
|
46
|
+
*
|
|
47
|
+
* @param {boolean} [enable=false] - Optional parameter to enable the profiler
|
|
48
|
+
*/
|
|
49
|
+
private constructor();
|
|
50
|
+
/**
|
|
51
|
+
* Gets the current time based on the environment.
|
|
52
|
+
*
|
|
53
|
+
* Uses process.hrtime.bigint() for Node.js, performance.now() for browsers,
|
|
54
|
+
* and Date.now() as a fallback.
|
|
55
|
+
*
|
|
56
|
+
* @returns {number} - Current time in milliseconds
|
|
57
|
+
*/
|
|
58
|
+
private now;
|
|
59
|
+
/**
|
|
60
|
+
* Gets the current memory usage based on the environment.
|
|
61
|
+
*
|
|
62
|
+
* Uses process.memoryUsage().heapUsed for Node.js, performance.memory.usedJSHeapSize
|
|
63
|
+
* for browsers, and returns 0 as a fallback.
|
|
64
|
+
*
|
|
65
|
+
* @returns {number} - Current memory usage in bytes
|
|
66
|
+
*/
|
|
67
|
+
private mem;
|
|
68
|
+
/**
|
|
69
|
+
* Enables the profiler.
|
|
70
|
+
* Sets the active state to true, allowing profiling to occur.
|
|
71
|
+
*/
|
|
72
|
+
enable(): void;
|
|
73
|
+
/**
|
|
74
|
+
* Disables the profiler.
|
|
75
|
+
* Sets the active state to false, preventing further profiling.
|
|
76
|
+
*/
|
|
77
|
+
disable(): void;
|
|
78
|
+
/**
|
|
79
|
+
* Resets the profiler by clearing the store, total time and memory consumption.
|
|
80
|
+
* This method is useful for starting a new profiling session.
|
|
81
|
+
*/
|
|
82
|
+
clear(): void;
|
|
83
|
+
/**
|
|
84
|
+
* Runs a synchronous function and profiles its execution time and memory usage.
|
|
85
|
+
* If the profiler is not active, it simply executes the function without profiling.
|
|
86
|
+
*
|
|
87
|
+
* @param {() => T} fn - Function to be executed and profiled
|
|
88
|
+
* @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
|
|
89
|
+
* @returns {T} - The result of the executed function
|
|
90
|
+
*/
|
|
91
|
+
run<T>(fn: () => T, meta?: Record<string, any>): T;
|
|
92
|
+
/**
|
|
93
|
+
* Runs an asynchronous function and profiles its execution time and memory usage.
|
|
94
|
+
* If the profiler is not active, it simply executes the function without profiling.
|
|
95
|
+
*
|
|
96
|
+
* @param {() => Promise<T>} fn - Asynchronous function to be executed and profiled
|
|
97
|
+
* @param {Record<string, any>} meta - Metadata to be associated with the profiling entry
|
|
98
|
+
* @returns {Promise<T>} - A promise that resolves to the result of the executed function
|
|
99
|
+
*/
|
|
100
|
+
runAsync<T>(fn: () => Promise<T>, meta?: Record<string, any>): Promise<T>;
|
|
101
|
+
/**
|
|
102
|
+
* Retrieves all profiler entries stored in the profiler.
|
|
103
|
+
*
|
|
104
|
+
* @returns {ProfilerEntry<any>[]} - An array of profiler entries
|
|
105
|
+
*/
|
|
106
|
+
getAll(): ProfilerEntry<any>[];
|
|
107
|
+
/**
|
|
108
|
+
* Retrieves the last profiler entry stored in the profiler.
|
|
109
|
+
*
|
|
110
|
+
* @returns {ProfilerEntry<any> | undefined} - The last profiler entry or undefined if no entries exist
|
|
111
|
+
*/
|
|
112
|
+
getLast(): ProfilerEntry<any> | undefined;
|
|
113
|
+
/**
|
|
114
|
+
* Retrieves the total time and memory consumption recorded by the profiler.
|
|
115
|
+
*
|
|
116
|
+
* @returns {{ time: number, mem: number }} - An object containing total time and memory usage
|
|
117
|
+
*/
|
|
118
|
+
getTotal(): {
|
|
119
|
+
time: number;
|
|
120
|
+
mem: number;
|
|
121
|
+
};
|
|
122
|
+
/**
|
|
123
|
+
* Returns the services provided by the Profiler class.
|
|
124
|
+
* This allows for easy access to the profiler's methods.
|
|
125
|
+
*
|
|
126
|
+
* @returns {ProfilerService<any>} - An object containing methods to control the profiler
|
|
127
|
+
*/
|
|
128
|
+
services: ProfilerService<any>;
|
|
129
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry Utility
|
|
3
|
+
* src/utils/Registry.ts
|
|
4
|
+
*
|
|
5
|
+
* This module provides a Registry function that allows for registering,
|
|
6
|
+
* removing, checking, getting, and listing class constructors.
|
|
7
|
+
*
|
|
8
|
+
* It is designed to manage class extensions, ensuring that all registered
|
|
9
|
+
* classes extend a specified base constructor.
|
|
10
|
+
*
|
|
11
|
+
* @module Utils/Registry
|
|
12
|
+
* @author Paul Köhler (komed3)
|
|
13
|
+
* @license MIT
|
|
14
|
+
*/
|
|
15
|
+
import type { RegistryService, RegistryConstructor } from './Types';
|
|
16
|
+
/**
|
|
17
|
+
* Global registry object to hold multiple registries.
|
|
18
|
+
* Each registry is keyed by a string identifier.
|
|
19
|
+
*
|
|
20
|
+
* @type {Record<string, RegistryService<any>>}
|
|
21
|
+
*/
|
|
22
|
+
export declare const registry: Record<string, RegistryService<any>>;
|
|
23
|
+
/**
|
|
24
|
+
* Factory object to hold factory functions for creating instances.
|
|
25
|
+
* This is used to create instances of registered classes.
|
|
26
|
+
*
|
|
27
|
+
* @type {Record<string, ( cls: string, ...args: any[] ) => InstanceType<any>>}
|
|
28
|
+
*/
|
|
29
|
+
export declare const factory: Record<string, (cls: string, ...args: any[]) => InstanceType<any>>;
|
|
30
|
+
/**
|
|
31
|
+
* Registry function to create a service for managing class constructors.
|
|
32
|
+
*
|
|
33
|
+
* @param {string} reg - The name of the registry
|
|
34
|
+
* @param {RegistryConstructor<T>} ctor - The base constructor that all registered classes must extend
|
|
35
|
+
* @returns {RegistryService<T>} - An object with methods to register, remove, check, get, and list classes
|
|
36
|
+
* @throws {Error} If the registry already exists (overwriting is forbidden)
|
|
37
|
+
*/
|
|
38
|
+
export declare function Registry<T>(reg: string, ctor: RegistryConstructor<T>): RegistryService<T>;
|
|
39
|
+
/**
|
|
40
|
+
* Resolve a class constructor from a specific registry.
|
|
41
|
+
*
|
|
42
|
+
* @param {string} reg - The name of the registry
|
|
43
|
+
* @param {T|string} cls - The class itself or name of the class to resolve
|
|
44
|
+
* @returns {T|undefined} - The class constructor if found, otherwise undefined
|
|
45
|
+
* @throws {ReferenceError} If the registry does not exist
|
|
46
|
+
*/
|
|
47
|
+
export declare function resolveCls<T extends RegistryConstructor<any>>(reg: string, cls: T | string): T;
|
|
48
|
+
/**
|
|
49
|
+
* Create an instance of a class from a specific registry.
|
|
50
|
+
*
|
|
51
|
+
* @param {string} reg - The name of the registry
|
|
52
|
+
* @param {T|string} cls - The class itself or name of the class to instantiate
|
|
53
|
+
* @param {...any} args - Arguments to pass to the class constructor
|
|
54
|
+
* @returns {T} - An instance of the class
|
|
55
|
+
* @throws {Error} If the class cannot be instantiated
|
|
56
|
+
*/
|
|
57
|
+
export declare function createFromRegistry<T extends RegistryConstructor<any>>(reg: string, cls: T | string, ...args: any[]): InstanceType<T>;
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TextAnalyzer Utility
|
|
3
|
+
* src/utils/TextAnalyzer.ts
|
|
4
|
+
*
|
|
5
|
+
* The TextAnalyzer class provides a comprehensive set of methods for analyzing and
|
|
6
|
+
* extracting statistics from a given text. It supports word and sentence tokenization,
|
|
7
|
+
* character and word frequency analysis, syllable estimation, readability metrics
|
|
8
|
+
* (Flesch, Kincaid, LIX, WSTF), and various ratios and histograms. Designed for
|
|
9
|
+
* efficiency and flexibility, it is suitable for linguistic research, readability
|
|
10
|
+
* scoring, and text preprocessing tasks.
|
|
11
|
+
*
|
|
12
|
+
* @module Utils/TextAnalyzer
|
|
13
|
+
* @author Paul Köhler (komed3)
|
|
14
|
+
* @license MIT
|
|
15
|
+
*/
|
|
16
|
+
export declare class TextAnalyzer {
|
|
17
|
+
private readonly text;
|
|
18
|
+
private words;
|
|
19
|
+
private sentences;
|
|
20
|
+
private charFrequency;
|
|
21
|
+
private wordHistogram;
|
|
22
|
+
private syllableCache;
|
|
23
|
+
/**
|
|
24
|
+
* Constructs a new TextAnalyzer instance with the provided input text.
|
|
25
|
+
*
|
|
26
|
+
* @param {string} input - The text to analyze
|
|
27
|
+
*/
|
|
28
|
+
constructor(input: string);
|
|
29
|
+
/**
|
|
30
|
+
* Tokenizes the input text into words and sentences.
|
|
31
|
+
*/
|
|
32
|
+
private tokenize;
|
|
33
|
+
/**
|
|
34
|
+
* Computes character and word frequencies from the tokenized text.
|
|
35
|
+
*/
|
|
36
|
+
private computeFrequencies;
|
|
37
|
+
/**
|
|
38
|
+
* Estimates the number of syllables in a word using a simple heuristic.
|
|
39
|
+
*
|
|
40
|
+
* @param {string} word - The word to estimate syllables for
|
|
41
|
+
* @returns {number} - Estimated syllable count
|
|
42
|
+
*/
|
|
43
|
+
private estimateSyllables;
|
|
44
|
+
/**
|
|
45
|
+
* Gets the original text length in characters.
|
|
46
|
+
*
|
|
47
|
+
* @return {number} - Length of the text
|
|
48
|
+
*/
|
|
49
|
+
getLength(): number;
|
|
50
|
+
/**
|
|
51
|
+
* Gets the number of words in the text.
|
|
52
|
+
*
|
|
53
|
+
* @return {number} - Count of words
|
|
54
|
+
*/
|
|
55
|
+
getWordCount(): number;
|
|
56
|
+
/**
|
|
57
|
+
* Gets the number of sentences in the text.
|
|
58
|
+
*
|
|
59
|
+
* @return {number} - Count of sentences
|
|
60
|
+
*/
|
|
61
|
+
getSentenceCount(): number;
|
|
62
|
+
/**
|
|
63
|
+
* Gets the average word length in the text.
|
|
64
|
+
*
|
|
65
|
+
* @return {number} - Average length of words
|
|
66
|
+
*/
|
|
67
|
+
getAvgWordLength(): number;
|
|
68
|
+
/**
|
|
69
|
+
* Gets the average sentence length in words.
|
|
70
|
+
*
|
|
71
|
+
* @return {number} - Average length of sentences
|
|
72
|
+
*/
|
|
73
|
+
getAvgSentenceLength(): number;
|
|
74
|
+
/**
|
|
75
|
+
* Gets a histogram of word frequencies in the text.
|
|
76
|
+
*
|
|
77
|
+
* @returns {Record<string, number>} - A histogram of word frequencies
|
|
78
|
+
*/
|
|
79
|
+
getWordHistogram(): Record<string, number>;
|
|
80
|
+
/**
|
|
81
|
+
* Gets the most common words in the text, limited to a specified number.
|
|
82
|
+
*
|
|
83
|
+
* @param {number} [limit=5] - Maximum number of common words to return
|
|
84
|
+
* @returns {string[]} - Array of the most common words
|
|
85
|
+
*/
|
|
86
|
+
getMostCommonWords(limit?: number): string[];
|
|
87
|
+
/**
|
|
88
|
+
* Gets the least common words (hapax legomena) in the text.
|
|
89
|
+
*
|
|
90
|
+
* Hapax legomena are words that occur only once in the text.
|
|
91
|
+
*
|
|
92
|
+
* @returns {string[]} - Array of hapax legomena
|
|
93
|
+
*/
|
|
94
|
+
getHapaxLegomena(): string[];
|
|
95
|
+
/**
|
|
96
|
+
* Checks if the text contains any numbers.
|
|
97
|
+
*
|
|
98
|
+
* @returns {boolean} - True if numbers are present, false otherwise
|
|
99
|
+
*/
|
|
100
|
+
hasNumbers(): boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Calculates the ratio of uppercase letters to total letters in the text.
|
|
103
|
+
*
|
|
104
|
+
* @return {number} - Ratio of uppercase letters to total letters
|
|
105
|
+
*/
|
|
106
|
+
getUpperCaseRatio(): number;
|
|
107
|
+
/**
|
|
108
|
+
* Gets the frequency of each character in the text.
|
|
109
|
+
*
|
|
110
|
+
* @returns {Record<string, number>} - A record of character frequencies
|
|
111
|
+
*/
|
|
112
|
+
getCharFrequency(): Record<string, number>;
|
|
113
|
+
/**
|
|
114
|
+
* Gets the frequency of each Unicode block in the text.
|
|
115
|
+
*
|
|
116
|
+
* @returns {Record<string, number>} - A record of Unicode block frequencies
|
|
117
|
+
*/
|
|
118
|
+
getUnicodeStats(): Record<string, number>;
|
|
119
|
+
/**
|
|
120
|
+
* Gets the ratio of long words (words with length >= len) to total words.
|
|
121
|
+
*
|
|
122
|
+
* @param {number} [len=7] - Minimum length for a word to be considered long
|
|
123
|
+
* @returns {number} - Ratio of long words to total words
|
|
124
|
+
*/
|
|
125
|
+
getLongWordRatio(len?: number): number;
|
|
126
|
+
/**
|
|
127
|
+
* Gets the ratio of short words (words with length <= len) to total words.
|
|
128
|
+
*
|
|
129
|
+
* @param {number} [len=3] - Maximum length for a word to be considered short
|
|
130
|
+
* @returns {number} - Ratio of short words to total words
|
|
131
|
+
*/
|
|
132
|
+
getShortWordRatio(len?: number): number;
|
|
133
|
+
/**
|
|
134
|
+
* Estimates the number of syllables in the text.
|
|
135
|
+
*
|
|
136
|
+
* @returns {number} - Total estimated syllable count
|
|
137
|
+
*/
|
|
138
|
+
getSyllablesCount(): number;
|
|
139
|
+
/**
|
|
140
|
+
* Gets the number of monosyllabic words (words with exactly one syllable).
|
|
141
|
+
*
|
|
142
|
+
* @returns {number} - Count of monosyllabic words
|
|
143
|
+
*/
|
|
144
|
+
getMonosyllabicWordCount(): number;
|
|
145
|
+
/**
|
|
146
|
+
* Gets the number of words with at least a specified minimum syllable count.
|
|
147
|
+
*
|
|
148
|
+
* @param {number} min - Minimum syllable count for a word to be included
|
|
149
|
+
* @returns {number} - Count of words meeting the syllable criteria
|
|
150
|
+
*/
|
|
151
|
+
getMinSyllablesWordCount(min: number): number;
|
|
152
|
+
/**
|
|
153
|
+
* Gets the number of words with at most a specified maximum syllable count.
|
|
154
|
+
*
|
|
155
|
+
* @param {number} max - Maximum syllable count for a word to be included
|
|
156
|
+
* @returns {number} - Count of words meeting the syllable criteria
|
|
157
|
+
*/
|
|
158
|
+
getMaxSyllablesWordCount(max: number): number;
|
|
159
|
+
/**
|
|
160
|
+
* Calculates the Honore's R statistic for the text as a measure of lexical richness.
|
|
161
|
+
*
|
|
162
|
+
* @returns {number} - The Honore's R statistic
|
|
163
|
+
*/
|
|
164
|
+
getHonoresR(): number;
|
|
165
|
+
/**
|
|
166
|
+
* Estimates the reading time for the text based on words per minute (WPM).
|
|
167
|
+
*
|
|
168
|
+
* @param {number} [wpm=200] - Words per minute for the calculation
|
|
169
|
+
* @returns {number} - Estimated reading time in minutes
|
|
170
|
+
*/
|
|
171
|
+
getReadingTime(wpm?: number): number;
|
|
172
|
+
/**
|
|
173
|
+
* Calculates various readability scores based on the text.
|
|
174
|
+
*
|
|
175
|
+
* This method supports multiple readability metrics:
|
|
176
|
+
* - Flesch Reading Ease
|
|
177
|
+
* - Flesch-Kincaid Grade Level
|
|
178
|
+
*
|
|
179
|
+
* @param {'flesch'|'fleschde'|'kincaid'} [metric='flesch'] - The readability metric to calculate
|
|
180
|
+
* @returns {number} - The calculated readability score
|
|
181
|
+
*/
|
|
182
|
+
getReadabilityScore(metric?: 'flesch' | 'fleschde' | 'kincaid'): number;
|
|
183
|
+
/**
|
|
184
|
+
* Calculates the LIX (Lesbarhetsindex) score for the text.
|
|
185
|
+
*
|
|
186
|
+
* The LIX score is a readability index that combines average word length and sentence length.
|
|
187
|
+
*
|
|
188
|
+
* @returns {number} - The LIX score
|
|
189
|
+
*/
|
|
190
|
+
getLIXScore(): number;
|
|
191
|
+
/**
|
|
192
|
+
* Calculates the Wiener Sachtextformel (WSTF) scores for the text.
|
|
193
|
+
*
|
|
194
|
+
* The WSTF scores are a set of readability metrics based on word and sentence characteristics.
|
|
195
|
+
*
|
|
196
|
+
* @returns {[number, number, number, number]} - An array of WSTF scores
|
|
197
|
+
*/
|
|
198
|
+
getWSTFScore(): [number, number, number, number];
|
|
199
|
+
}
|