@nlptools/distance 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Demo Macro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # @nlptools/distance
2
+
3
+ ![npm version](https://img.shields.io/npm/v/@nlptools/distance)
4
+ ![npm downloads](https://img.shields.io/npm/dw/@nlptools/distance)
5
+ ![npm license](https://img.shields.io/npm/l/@nlptools/distance)
6
+ [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-2.1-4baaaa.svg)](https://www.contributor-covenant.org/version/2/1/code_of_conduct/)
7
+
8
+ > Complete string distance and similarity algorithms package with WebAssembly and JavaScript implementations
9
+
10
+ This package provides comprehensive text similarity and distance algorithms, combining the high-performance WebAssembly implementation from `@nlptools/distance-wasm` with additional JavaScript-based algorithms for maximum compatibility and performance.
11
+
12
+ ## Features
13
+
14
+ - ⚡ **Dual Implementation**: WebAssembly for performance + JavaScript for compatibility
15
+ - 🧮 **Comprehensive Algorithms**: 30+ string similarity and distance algorithms
16
+ - 🎯 **Multiple Categories**: Edit-based, sequence-based, token-based, and naive algorithms
17
+ - 📝 **TypeScript First**: Full type safety with comprehensive API
18
+ - 🔧 **Universal Interface**: Single compare function for all algorithms
19
+ - 📊 **Normalized Results**: Consistent 0-1 similarity scores across algorithms
20
+ - 🚀 **Auto-optimization**: Automatically chooses the fastest implementation available
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ # Install with npm
26
+ npm install @nlptools/distance
27
+
28
+ # Install with yarn
29
+ yarn add @nlptools/distance
30
+
31
+ # Install with pnpm
32
+ pnpm add @nlptools/distance
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### Basic Setup
38
+
39
+ ```typescript
40
+ import * as distance from "@nlptools/distance";
41
+
42
+ // All algorithms are available as named functions
43
+ console.log(distance.levenshtein("kitten", "sitting")); // 3
44
+ console.log(distance.jaro("hello", "hallo")); // 0.8666666666666667
45
+ console.log(distance.cosine("abc", "bcd")); // 0.6666666666666666
46
+ ```
47
+
48
+ ### Distance vs Similarity
49
+
50
+ Most algorithms have both distance and normalized versions:
51
+
52
+ ```typescript
53
+ // Distance algorithms (lower is more similar)
54
+ const dist = distance.levenshtein("cat", "bat"); // 1
55
+
56
+ // Similarity algorithms (higher is more similar, 0-1 range)
57
+ const sim = distance.levenshtein_normalized("cat", "bat"); // 0.6666666666666666
58
+ ```
59
+
60
+ ### Available Algorithms
61
+
62
+ This package includes all algorithms from `@nlptools/distance-wasm` plus additional JavaScript implementations:
63
+
64
+ #### Edit Distance Algorithms
65
+
66
+ - `levenshtein` - Classic edit distance
67
+ - `fastest_levenshtein` - High-performance Levenshtein distance (fastest-levenshtein)
68
+ - `damerau_levenshtein` - Edit distance with transpositions
69
+ - `myers_levenshtein` - Myers bit-parallel algorithm for edit distance
70
+ - `jaro` - Jaro similarity
71
+ - `jarowinkler` - Jaro-Winkler similarity
72
+ - `hamming` - Hamming distance for equal-length strings
73
+ - `sift4_simple` - SIFT4 algorithm
74
+
75
+ #### Sequence-based Algorithms
76
+
77
+ - `lcs_seq` - Longest common subsequence
78
+ - `lcs_str` - Longest common substring
79
+ - `ratcliff_obershelp` - Gestalt pattern matching
80
+ - `smith_waterman` - Local sequence alignment
81
+
82
+ #### Token-based Algorithms
83
+
84
+ - `jaccard` - Jaccard similarity
85
+ - `cosine` - Cosine similarity
86
+ - `sorensen` - Sørensen-Dice coefficient
87
+ - `tversky` - Tversky index
88
+ - `overlap` - Overlap coefficient
89
+
90
+ #### Bigram Algorithms
91
+
92
+ - `jaccard_bigram` - Jaccard similarity on character bigrams
93
+ - `cosine_bigram` - Cosine similarity on character bigrams
94
+
95
+ #### Naive Algorithms
96
+
97
+ - `prefix` - Prefix similarity
98
+ - `suffix` - Suffix similarity
99
+ - `length` - Length-based similarity
100
+
101
+ ### Universal Compare Function
102
+
103
+ ```typescript
104
+ const result = distance.compare("hello", "hallo", "jaro");
105
+ console.log(result); // 0.8666666666666667
106
+
107
+ // Use fastest-levenshtein for optimal performance
108
+ console.log(distance.fastest_levenshtein("fast", "faster")); // 2
109
+ ```
110
+
111
+ ## Performance
112
+
113
+ The package automatically selects the fastest implementation available:
114
+
115
+ - **WebAssembly algorithms**: 10-100x faster than pure JavaScript
116
+ - **JavaScript fallbacks**: Ensure compatibility across all environments
117
+ - **Auto-detection**: Seamlessly switches between WASM and JS implementations
118
+
119
+ ## References
120
+
121
+ This package incorporates and builds upon the following excellent open source projects:
122
+
123
+ - [textdistance.rs](https://github.com/life4/textdistance.rs) - Core Rust implementation via @nlptools/distance-wasm
124
+ - [fastest-levenshtein](https://github.com/ka-weihe/fastest-levenshtein) - High-performance Levenshtein implementation
125
+
126
+ ## License
127
+
128
+ - [MIT](LICENSE) © [Demo Macro](https://imst.xyz/)
@@ -0,0 +1,5 @@
1
+ export * from '@nlptools/distance-wasm';
2
+
3
+ declare const fastest_levenshtein: (a: string, b: string) => number;
4
+
5
+ export { fastest_levenshtein };
@@ -0,0 +1,5 @@
1
+ export * from '@nlptools/distance-wasm';
2
+
3
+ declare const fastest_levenshtein: (a: string, b: string) => number;
4
+
5
+ export { fastest_levenshtein };
package/dist/index.mjs ADDED
@@ -0,0 +1,6 @@
1
+ export * from '@nlptools/distance-wasm';
2
+ import { distance } from 'fastest-levenshtein';
3
+
4
+ const fastest_levenshtein = distance;
5
+
6
+ export { fastest_levenshtein };
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@nlptools/distance",
3
+ "version": "0.0.0",
4
+ "description": "Complete string distance and similarity algorithms package with WebAssembly and JavaScript implementations",
5
+ "main": "dist/index.mjs",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist"
9
+ ],
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "git+https://github.com/DemoMacro/nlptools.git"
13
+ },
14
+ "keywords": [
15
+ "text-distance",
16
+ "string-similarity",
17
+ "fuzzy-matching",
18
+ "algorithms",
19
+ "levenshtein",
20
+ "jaro-winkler",
21
+ "jaccard",
22
+ "cosine",
23
+ "edit-distance",
24
+ "webassembly",
25
+ "wasm",
26
+ "nlptools",
27
+ "performance"
28
+ ],
29
+ "author": {
30
+ "name": "Demo Macro",
31
+ "email": "abc@imst.xyz",
32
+ "url": "https://imst.xyz/"
33
+ },
34
+ "license": "MIT",
35
+ "bugs": {
36
+ "url": "https://github.com/DemoMacro/nlptools/issues"
37
+ },
38
+ "homepage": "https://github.com/DemoMacro/nlptools#readme",
39
+ "dependencies": {
40
+ "fastest-levenshtein": "1.0.16",
41
+ "@nlptools/distance-wasm": "0.0.0"
42
+ },
43
+ "scripts": {
44
+ "dev": "pnpm unbuild --stub"
45
+ }
46
+ }