@small-ltsc/sdk 0.1.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -34
- package/package.json +7 -2
- package/src/wasm/small_ltsc_core_bg.wasm +0 -0
package/README.md
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
# @small-ltsc/sdk
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@small-ltsc/sdk)
|
|
4
|
+
[](../../LICENSE)
|
|
5
|
+
|
|
3
6
|
TypeScript SDK for **Small LTSC** - Lossless Token Sequence Compression for LLMs.
|
|
4
7
|
|
|
5
|
-
Reduce LLM inference costs by compressing repetitive token patterns in prompts while maintaining perfect reconstruction.
|
|
8
|
+
Reduce LLM inference costs by compressing repetitive token patterns in prompts while maintaining perfect reconstruction. Achieve 30-60% compression on structured inputs with a format that fine-tuned models can understand.
|
|
6
9
|
|
|
7
10
|
## Features
|
|
8
11
|
|
|
9
|
-
- **Lossless compression** - Perfect round-trip reconstruction
|
|
10
|
-
- **High performance** - Rust/WASM core with O(n log n) algorithms
|
|
12
|
+
- **Lossless compression** - Perfect round-trip reconstruction guaranteed
|
|
13
|
+
- **High performance** - Rust/WASM core with O(n log n) suffix array algorithms
|
|
11
14
|
- **Cross-platform** - Works in browsers, Node.js, Deno, and edge runtimes
|
|
12
|
-
- **Streaming support** - Handle inputs of any size
|
|
15
|
+
- **Streaming support** - Handle inputs of any size with constant memory
|
|
13
16
|
- **Worker threads** - Non-blocking compression for large inputs
|
|
14
|
-
- **Static dictionaries** - Pre-built patterns for
|
|
15
|
-
- **TypeScript-first** - Full type safety and IntelliSense
|
|
17
|
+
- **Static dictionaries** - Pre-built patterns for Python, TypeScript, SQL, and more
|
|
18
|
+
- **TypeScript-first** - Full type safety and IntelliSense support
|
|
16
19
|
|
|
17
20
|
## Installation
|
|
18
21
|
|
|
@@ -25,7 +28,7 @@ npm install @small-ltsc/sdk
|
|
|
25
28
|
```typescript
|
|
26
29
|
import { compress, decompress, initWasm } from '@small-ltsc/sdk';
|
|
27
30
|
|
|
28
|
-
// Initialize WASM (required once)
|
|
31
|
+
// Initialize WASM module (required once)
|
|
29
32
|
await initWasm();
|
|
30
33
|
|
|
31
34
|
// Compress tokens
|
|
@@ -33,9 +36,9 @@ const tokens = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3];
|
|
|
33
36
|
const result = await compress(tokens);
|
|
34
37
|
|
|
35
38
|
console.log(`Compressed: ${result.originalLength} → ${result.compressedLength} tokens`);
|
|
36
|
-
console.log(`
|
|
39
|
+
console.log(`Savings: ${((1 - result.compressionRatio) * 100).toFixed(1)}%`);
|
|
37
40
|
|
|
38
|
-
// Decompress
|
|
41
|
+
// Decompress (lossless)
|
|
39
42
|
const restored = await decompress(result.serializedTokens);
|
|
40
43
|
console.assert(JSON.stringify(tokens) === JSON.stringify(restored));
|
|
41
44
|
```
|
|
@@ -45,24 +48,24 @@ console.assert(JSON.stringify(tokens) === JSON.stringify(restored));
|
|
|
45
48
|
```typescript
|
|
46
49
|
const result = await compress(tokens, {
|
|
47
50
|
// Pattern discovery
|
|
48
|
-
minSubsequenceLength: 2, //
|
|
49
|
-
maxSubsequenceLength: 8, //
|
|
51
|
+
minSubsequenceLength: 2, // Minimum pattern length (default: 2)
|
|
52
|
+
maxSubsequenceLength: 8, // Maximum pattern length (default: 8)
|
|
50
53
|
|
|
51
54
|
// Selection algorithm
|
|
52
55
|
selectionMode: 'greedy', // 'greedy' | 'optimal' | 'beam'
|
|
53
56
|
|
|
54
57
|
// Hierarchical compression
|
|
55
58
|
hierarchicalEnabled: true, // Allow patterns of patterns
|
|
56
|
-
hierarchicalMaxDepth: 3, //
|
|
59
|
+
hierarchicalMaxDepth: 3, // Maximum nesting depth
|
|
57
60
|
|
|
58
61
|
// Verification
|
|
59
|
-
verify: true, //
|
|
62
|
+
verify: true, // Enable round-trip verification
|
|
60
63
|
});
|
|
61
64
|
```
|
|
62
65
|
|
|
63
66
|
## Static Dictionaries
|
|
64
67
|
|
|
65
|
-
Use pre-built dictionaries for
|
|
68
|
+
Use pre-built dictionaries for domain-specific compression:
|
|
66
69
|
|
|
67
70
|
```typescript
|
|
68
71
|
const result = await compress(pythonCodeTokens, {
|
|
@@ -70,11 +73,11 @@ const result = await compress(pythonCodeTokens, {
|
|
|
70
73
|
});
|
|
71
74
|
```
|
|
72
75
|
|
|
73
|
-
Available: `python-v1`, `typescript-v1`, `markdown-v1`, `json-v1`, `sql-v1`
|
|
76
|
+
Available dictionaries: `python-v1`, `typescript-v1`, `markdown-v1`, `json-v1`, `sql-v1`
|
|
74
77
|
|
|
75
78
|
## Streaming
|
|
76
79
|
|
|
77
|
-
For large inputs:
|
|
80
|
+
For large inputs that exceed memory constraints:
|
|
78
81
|
|
|
79
82
|
```typescript
|
|
80
83
|
import { createStreamingCompressor } from '@small-ltsc/sdk';
|
|
@@ -90,7 +93,7 @@ const result = await compressor.finish();
|
|
|
90
93
|
|
|
91
94
|
## Worker Threads
|
|
92
95
|
|
|
93
|
-
Non-blocking compression:
|
|
96
|
+
Non-blocking compression for UI responsiveness:
|
|
94
97
|
|
|
95
98
|
```typescript
|
|
96
99
|
import { createWorkerPool } from '@small-ltsc/sdk';
|
|
@@ -105,45 +108,58 @@ pool.terminate();
|
|
|
105
108
|
```html
|
|
106
109
|
<script type="module">
|
|
107
110
|
import { compress, initWasm } from 'https://esm.sh/@small-ltsc/sdk';
|
|
111
|
+
|
|
108
112
|
await initWasm();
|
|
109
113
|
const result = await compress([1, 2, 3, 1, 2, 3]);
|
|
114
|
+
console.log('Compression ratio:', result.compressionRatio);
|
|
110
115
|
</script>
|
|
111
116
|
```
|
|
112
117
|
|
|
113
|
-
## API
|
|
118
|
+
## API Reference
|
|
114
119
|
|
|
115
120
|
### Core Functions
|
|
116
121
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
122
|
+
| Function | Description |
|
|
123
|
+
|----------|-------------|
|
|
124
|
+
| `compress(tokens, config?)` | Compress a token sequence |
|
|
125
|
+
| `decompress(tokens, config?)` | Decompress to original tokens |
|
|
126
|
+
| `discoverPatterns(tokens, minLen?, maxLen?)` | Find patterns without compressing |
|
|
120
127
|
|
|
121
128
|
### Streaming
|
|
122
129
|
|
|
123
|
-
|
|
124
|
-
|
|
130
|
+
| Function | Description |
|
|
131
|
+
|----------|-------------|
|
|
132
|
+
| `createStreamingCompressor(config?)` | Create a streaming compressor instance |
|
|
133
|
+
| `compressStream(asyncIterable, config?)` | Compress an async iterable stream |
|
|
125
134
|
|
|
126
135
|
### Workers
|
|
127
136
|
|
|
128
|
-
|
|
129
|
-
|
|
137
|
+
| Function | Description |
|
|
138
|
+
|----------|-------------|
|
|
139
|
+
| `createWorkerPool(count?)` | Create a pool of worker threads |
|
|
140
|
+
| `compressInWorker(tokens, config?)` | Single-use worker compression |
|
|
130
141
|
|
|
131
142
|
### Dictionaries
|
|
132
143
|
|
|
133
|
-
|
|
134
|
-
|
|
144
|
+
| Function | Description |
|
|
145
|
+
|----------|-------------|
|
|
146
|
+
| `loadStaticDictionary(id)` | Load a built-in dictionary |
|
|
147
|
+
| `createStaticDictionary(id, patterns)` | Create a custom dictionary |
|
|
135
148
|
|
|
136
149
|
### Utilities
|
|
137
150
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
151
|
+
| Function | Description |
|
|
152
|
+
|----------|-------------|
|
|
153
|
+
| `initWasm()` | Initialize the WASM module |
|
|
154
|
+
| `isWasmInitialized()` | Check initialization status |
|
|
155
|
+
| `extractDictionary(tokens)` | Extract dictionary from compressed tokens |
|
|
156
|
+
| `isCompressed(tokens)` | Check if tokens are in compressed format |
|
|
142
157
|
|
|
143
158
|
## Documentation
|
|
144
159
|
|
|
145
160
|
- [Quick Start Guide](./docs/QUICKSTART.md)
|
|
146
161
|
- [API Reference](./docs/API.md)
|
|
162
|
+
- [Main Repository](https://github.com/triage-sec/small)
|
|
147
163
|
|
|
148
164
|
## Optional ML Features
|
|
149
165
|
|
|
@@ -154,16 +170,24 @@ npm install @small-ltsc/ml
|
|
|
154
170
|
```
|
|
155
171
|
|
|
156
172
|
```typescript
|
|
157
|
-
import { HeuristicQualityPredictor
|
|
173
|
+
import { HeuristicQualityPredictor } from '@small-ltsc/ml';
|
|
158
174
|
|
|
159
175
|
const predictor = new HeuristicQualityPredictor();
|
|
160
176
|
const prediction = await predictor.predict(compressionResult);
|
|
161
177
|
|
|
162
178
|
if (!prediction.acceptable) {
|
|
163
|
-
|
|
179
|
+
console.log(`Recommendation: ${prediction.recommendation}`);
|
|
164
180
|
}
|
|
165
181
|
```
|
|
166
182
|
|
|
167
183
|
## License
|
|
168
184
|
|
|
169
|
-
MIT
|
|
185
|
+
MIT License - see [LICENSE](../../LICENSE) for details.
|
|
186
|
+
|
|
187
|
+
## Contributors
|
|
188
|
+
|
|
189
|
+
Built by [Triage Sec](https://triage-sec.com) - an applied team of researchers and engineers working towards building resiliency for AI systems.
|
|
190
|
+
|
|
191
|
+
- Nikhil Srivastava (University of California, Berkeley)
|
|
192
|
+
- Omansh Bainsla (Georgia Tech)
|
|
193
|
+
- Sahil Chatiwala (Georgia Tech)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@small-ltsc/sdk",
|
|
3
|
-
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "TypeScript SDK for Small LTSC - Lossless Token Sequence Compression",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/esm/index.js",
|
|
@@ -40,7 +40,12 @@
|
|
|
40
40
|
"wasm",
|
|
41
41
|
"lossless"
|
|
42
42
|
],
|
|
43
|
-
"author": "",
|
|
43
|
+
"author": "Triage Sec <nicks@triage-sec.com>",
|
|
44
|
+
"contributors": [
|
|
45
|
+
"Nikhil Srivastava",
|
|
46
|
+
"Omansh Bainsla",
|
|
47
|
+
"Sahil Chatiwala"
|
|
48
|
+
],
|
|
44
49
|
"license": "MIT",
|
|
45
50
|
"repository": {
|
|
46
51
|
"type": "git",
|
|
Binary file
|