@small-ltsc/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +169 -0
- package/dist/esm/compress.js +160 -0
- package/dist/esm/compress.js.map +1 -0
- package/dist/esm/config.js +47 -0
- package/dist/esm/config.js.map +1 -0
- package/dist/esm/decompress.js +105 -0
- package/dist/esm/decompress.js.map +1 -0
- package/dist/esm/dictionaries/index.js +104 -0
- package/dist/esm/dictionaries/index.js.map +1 -0
- package/dist/esm/dictionaries/json.json +28 -0
- package/dist/esm/dictionaries/markdown.json +28 -0
- package/dist/esm/dictionaries/python.json +28 -0
- package/dist/esm/dictionaries/sql.json +28 -0
- package/dist/esm/dictionaries/typescript.json +28 -0
- package/dist/esm/index.js +26 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/streaming.js +139 -0
- package/dist/esm/streaming.js.map +1 -0
- package/dist/esm/types.js +25 -0
- package/dist/esm/types.js.map +1 -0
- package/dist/esm/wasm/index.js +5 -0
- package/dist/esm/wasm/index.js.map +1 -0
- package/dist/esm/wasm/loader.js +179 -0
- package/dist/esm/wasm/loader.js.map +1 -0
- package/dist/esm/worker.js +304 -0
- package/dist/esm/worker.js.map +1 -0
- package/dist/types/compress.d.ts +39 -0
- package/dist/types/compress.d.ts.map +1 -0
- package/dist/types/config.d.ts +113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/decompress.d.ts +53 -0
- package/dist/types/decompress.d.ts.map +1 -0
- package/dist/types/dictionaries/index.d.ts +127 -0
- package/dist/types/dictionaries/index.d.ts.map +1 -0
- package/dist/types/index.d.ts +18 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/streaming.d.ts +81 -0
- package/dist/types/streaming.d.ts.map +1 -0
- package/dist/types/types.d.ts +116 -0
- package/dist/types/types.d.ts.map +1 -0
- package/dist/types/wasm/index.d.ts +6 -0
- package/dist/types/wasm/index.d.ts.map +1 -0
- package/dist/types/wasm/loader.d.ts +71 -0
- package/dist/types/wasm/loader.d.ts.map +1 -0
- package/dist/types/worker.d.ts +106 -0
- package/dist/types/worker.d.ts.map +1 -0
- package/package.json +63 -0
- package/src/wasm/small_ltsc_core_bg.wasm +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker thread support for non-blocking compression.
|
|
3
|
+
*
|
|
4
|
+
* Provides utilities for running compression in Web Workers (browser)
|
|
5
|
+
* or Worker threads (Node.js) to avoid blocking the main thread.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Create a worker pool for parallel compression.
|
|
9
|
+
*
|
|
10
|
+
* @param workerCount - Number of workers to create (default: navigator.hardwareConcurrency or 4)
|
|
11
|
+
* @returns Worker pool instance
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { createWorkerPool } from '@small-ltsc/sdk';
|
|
16
|
+
*
|
|
17
|
+
* const pool = await createWorkerPool(4);
|
|
18
|
+
*
|
|
19
|
+
* // Compress without blocking main thread
|
|
20
|
+
* const result = await pool.compress(tokens);
|
|
21
|
+
*
|
|
22
|
+
* // Clean up when done
|
|
23
|
+
* pool.terminate();
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export async function createWorkerPool(workerCount) {
|
|
27
|
+
const count = workerCount ?? (typeof navigator !== 'undefined' ? navigator.hardwareConcurrency : 4) ?? 4;
|
|
28
|
+
// Detect environment
|
|
29
|
+
const isNode = typeof process !== 'undefined' &&
|
|
30
|
+
process.versions &&
|
|
31
|
+
process.versions.node;
|
|
32
|
+
if (isNode) {
|
|
33
|
+
return createNodeWorkerPool(count);
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
return createBrowserWorkerPool(count);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Create a browser-based worker pool.
|
|
41
|
+
*/
|
|
42
|
+
async function createBrowserWorkerPool(count) {
|
|
43
|
+
const workers = [];
|
|
44
|
+
const pendingRequests = new Map();
|
|
45
|
+
let nextWorkerIndex = 0;
|
|
46
|
+
// Worker script as a blob URL
|
|
47
|
+
const workerScript = `
|
|
48
|
+
let wasm = null;
|
|
49
|
+
|
|
50
|
+
self.onmessage = async function(event) {
|
|
51
|
+
const request = event.data;
|
|
52
|
+
|
|
53
|
+
if (request.type === 'init') {
|
|
54
|
+
try {
|
|
55
|
+
const { initWasm } = await import('@small-ltsc/sdk/wasm');
|
|
56
|
+
await initWasm();
|
|
57
|
+
self.postMessage({ type: 'init', success: true, error: null });
|
|
58
|
+
} catch (error) {
|
|
59
|
+
self.postMessage({ type: 'init', success: false, error: String(error) });
|
|
60
|
+
}
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (request.type === 'compress') {
|
|
65
|
+
try {
|
|
66
|
+
const { compress } = await import('@small-ltsc/sdk');
|
|
67
|
+
const result = await compress(request.tokens, request.config);
|
|
68
|
+
self.postMessage({ type: 'compress', requestId: request.requestId, result, error: null });
|
|
69
|
+
} catch (error) {
|
|
70
|
+
self.postMessage({ type: 'compress', requestId: request.requestId, result: null, error: String(error) });
|
|
71
|
+
}
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (request.type === 'decompress') {
|
|
76
|
+
try {
|
|
77
|
+
const { decompress } = await import('@small-ltsc/sdk');
|
|
78
|
+
const result = await decompress(request.tokens, request.config);
|
|
79
|
+
self.postMessage({ type: 'decompress', requestId: request.requestId, result, error: null });
|
|
80
|
+
} catch (error) {
|
|
81
|
+
self.postMessage({ type: 'decompress', requestId: request.requestId, result: null, error: String(error) });
|
|
82
|
+
}
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
`;
|
|
87
|
+
const blob = new Blob([workerScript], { type: 'application/javascript' });
|
|
88
|
+
const workerUrl = URL.createObjectURL(blob);
|
|
89
|
+
// Create workers
|
|
90
|
+
for (let i = 0; i < count; i++) {
|
|
91
|
+
const worker = new Worker(workerUrl, { type: 'module' });
|
|
92
|
+
worker.onmessage = (event) => {
|
|
93
|
+
const response = event.data;
|
|
94
|
+
if (response.type === 'init') {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
const pending = pendingRequests.get(response.requestId);
|
|
98
|
+
if (!pending)
|
|
99
|
+
return;
|
|
100
|
+
pendingRequests.delete(response.requestId);
|
|
101
|
+
if (response.error) {
|
|
102
|
+
pending.reject(new Error(response.error));
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
pending.resolve(response.result);
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
// Initialize worker
|
|
109
|
+
worker.postMessage({ type: 'init' });
|
|
110
|
+
workers.push(worker);
|
|
111
|
+
}
|
|
112
|
+
// Get next worker (round-robin)
|
|
113
|
+
const getWorker = () => {
|
|
114
|
+
const worker = workers[nextWorkerIndex];
|
|
115
|
+
nextWorkerIndex = (nextWorkerIndex + 1) % workers.length;
|
|
116
|
+
return worker;
|
|
117
|
+
};
|
|
118
|
+
// Generate request ID
|
|
119
|
+
const generateId = () => {
|
|
120
|
+
return `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
121
|
+
};
|
|
122
|
+
return {
|
|
123
|
+
compress(tokens, config) {
|
|
124
|
+
return new Promise((resolve, reject) => {
|
|
125
|
+
const requestId = generateId();
|
|
126
|
+
pendingRequests.set(requestId, { resolve, reject });
|
|
127
|
+
const worker = getWorker();
|
|
128
|
+
worker.postMessage({
|
|
129
|
+
type: 'compress',
|
|
130
|
+
requestId,
|
|
131
|
+
tokens: Array.from(tokens),
|
|
132
|
+
config,
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
},
|
|
136
|
+
decompress(tokens, config) {
|
|
137
|
+
return new Promise((resolve, reject) => {
|
|
138
|
+
const requestId = generateId();
|
|
139
|
+
pendingRequests.set(requestId, { resolve, reject });
|
|
140
|
+
const worker = getWorker();
|
|
141
|
+
worker.postMessage({
|
|
142
|
+
type: 'decompress',
|
|
143
|
+
requestId,
|
|
144
|
+
tokens: Array.from(tokens),
|
|
145
|
+
config,
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
},
|
|
149
|
+
terminate() {
|
|
150
|
+
workers.forEach((w) => w.terminate());
|
|
151
|
+
workers.length = 0;
|
|
152
|
+
URL.revokeObjectURL(workerUrl);
|
|
153
|
+
},
|
|
154
|
+
size() {
|
|
155
|
+
return workers.length;
|
|
156
|
+
},
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Create a Node.js-based worker pool.
|
|
161
|
+
*/
|
|
162
|
+
async function createNodeWorkerPool(count) {
|
|
163
|
+
// Import worker_threads dynamically
|
|
164
|
+
const { Worker } = await import('node:worker_threads');
|
|
165
|
+
const workers = [];
|
|
166
|
+
const pendingRequests = new Map();
|
|
167
|
+
let nextWorkerIndex = 0;
|
|
168
|
+
// Worker script for Node.js
|
|
169
|
+
const workerScript = `
|
|
170
|
+
const { parentPort } = require('worker_threads');
|
|
171
|
+
|
|
172
|
+
parentPort.on('message', async (request) => {
|
|
173
|
+
if (request.type === 'init') {
|
|
174
|
+
try {
|
|
175
|
+
const { initWasm } = await import('@small-ltsc/sdk/wasm');
|
|
176
|
+
await initWasm();
|
|
177
|
+
parentPort.postMessage({ type: 'init', success: true, error: null });
|
|
178
|
+
} catch (error) {
|
|
179
|
+
parentPort.postMessage({ type: 'init', success: false, error: String(error) });
|
|
180
|
+
}
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (request.type === 'compress') {
|
|
185
|
+
try {
|
|
186
|
+
const { compress } = await import('@small-ltsc/sdk');
|
|
187
|
+
const result = await compress(request.tokens, request.config);
|
|
188
|
+
parentPort.postMessage({ type: 'compress', requestId: request.requestId, result, error: null });
|
|
189
|
+
} catch (error) {
|
|
190
|
+
parentPort.postMessage({ type: 'compress', requestId: request.requestId, result: null, error: String(error) });
|
|
191
|
+
}
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (request.type === 'decompress') {
|
|
196
|
+
try {
|
|
197
|
+
const { decompress } = await import('@small-ltsc/sdk');
|
|
198
|
+
const result = await decompress(request.tokens, request.config);
|
|
199
|
+
parentPort.postMessage({ type: 'decompress', requestId: request.requestId, result, error: null });
|
|
200
|
+
} catch (error) {
|
|
201
|
+
parentPort.postMessage({ type: 'decompress', requestId: request.requestId, result: null, error: String(error) });
|
|
202
|
+
}
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
`;
|
|
207
|
+
// Create workers
|
|
208
|
+
for (let i = 0; i < count; i++) {
|
|
209
|
+
const worker = new Worker(workerScript, { eval: true });
|
|
210
|
+
worker.on('message', (response) => {
|
|
211
|
+
if (response.type === 'init') {
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
const pending = pendingRequests.get(response.requestId);
|
|
215
|
+
if (!pending)
|
|
216
|
+
return;
|
|
217
|
+
pendingRequests.delete(response.requestId);
|
|
218
|
+
if (response.error) {
|
|
219
|
+
pending.reject(new Error(response.error));
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
pending.resolve(response.result);
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
// Initialize worker
|
|
226
|
+
worker.postMessage({ type: 'init' });
|
|
227
|
+
workers.push(worker);
|
|
228
|
+
}
|
|
229
|
+
const getWorker = () => {
|
|
230
|
+
const worker = workers[nextWorkerIndex];
|
|
231
|
+
nextWorkerIndex = (nextWorkerIndex + 1) % workers.length;
|
|
232
|
+
return worker;
|
|
233
|
+
};
|
|
234
|
+
const generateId = () => {
|
|
235
|
+
return `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
|
236
|
+
};
|
|
237
|
+
return {
|
|
238
|
+
compress(tokens, config) {
|
|
239
|
+
return new Promise((resolve, reject) => {
|
|
240
|
+
const requestId = generateId();
|
|
241
|
+
pendingRequests.set(requestId, { resolve, reject });
|
|
242
|
+
const worker = getWorker();
|
|
243
|
+
worker.postMessage({
|
|
244
|
+
type: 'compress',
|
|
245
|
+
requestId,
|
|
246
|
+
tokens: Array.from(tokens),
|
|
247
|
+
config,
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
},
|
|
251
|
+
decompress(tokens, config) {
|
|
252
|
+
return new Promise((resolve, reject) => {
|
|
253
|
+
const requestId = generateId();
|
|
254
|
+
pendingRequests.set(requestId, { resolve, reject });
|
|
255
|
+
const worker = getWorker();
|
|
256
|
+
worker.postMessage({
|
|
257
|
+
type: 'decompress',
|
|
258
|
+
requestId,
|
|
259
|
+
tokens: Array.from(tokens),
|
|
260
|
+
config,
|
|
261
|
+
});
|
|
262
|
+
});
|
|
263
|
+
},
|
|
264
|
+
terminate() {
|
|
265
|
+
workers.forEach((w) => w.terminate());
|
|
266
|
+
workers.length = 0;
|
|
267
|
+
},
|
|
268
|
+
size() {
|
|
269
|
+
return workers.length;
|
|
270
|
+
},
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Compress tokens in a worker (single-use helper).
|
|
275
|
+
*
|
|
276
|
+
* Creates a temporary worker, runs compression, and terminates.
|
|
277
|
+
* For multiple compressions, use createWorkerPool instead.
|
|
278
|
+
*
|
|
279
|
+
* @param tokens - Token sequence to compress
|
|
280
|
+
* @param config - Optional compression configuration
|
|
281
|
+
* @returns Promise resolving to compression result
|
|
282
|
+
*/
|
|
283
|
+
export async function compressInWorker(tokens, config) {
|
|
284
|
+
const pool = await createWorkerPool(1);
|
|
285
|
+
try {
|
|
286
|
+
return await pool.compress(tokens, config);
|
|
287
|
+
}
|
|
288
|
+
finally {
|
|
289
|
+
pool.terminate();
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Decompress tokens in a worker (single-use helper).
|
|
294
|
+
*/
|
|
295
|
+
export async function decompressInWorker(tokens, config) {
|
|
296
|
+
const pool = await createWorkerPool(1);
|
|
297
|
+
try {
|
|
298
|
+
return await pool.decompress(tokens, config);
|
|
299
|
+
}
|
|
300
|
+
finally {
|
|
301
|
+
pool.terminate();
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
//# sourceMappingURL=worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"worker.js","sourceRoot":"","sources":["../../src/worker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAqDH;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,WAAoB;IACzD,MAAM,KAAK,GAAG,WAAW,IAAI,CAAC,OAAO,SAAS,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAEzG,qBAAqB;IACrB,MAAM,MAAM,GACV,OAAO,OAAO,KAAK,WAAW;QAC9B,OAAO,CAAC,QAAQ;QAChB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC;IAExB,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,oBAAoB,CAAC,KAAK,CAAC,CAAC;IACrC,CAAC;SAAM,CAAC;QACN,OAAO,uBAAuB,CAAC,KAAK,CAAC,CAAC;IACxC,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,uBAAuB,CAAC,KAAa;IAClD,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,eAAe,GAAG,IAAI,GAAG,EAAmC,CAAC;IACnE,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,8BAA8B;IAC9B,MAAM,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCpB,CAAC;IAEF,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,YAAY,CAAC,EAAE,EAAE,IAAI,EAAE,wBAAwB,EAAE,CAAC,CAAC;IAC1E,MAAM,SAAS,GAAG,GAAG,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;IAE5C,iBAAiB;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;QAEzD,MAAM,CAAC,SAAS,GAAG,CAAC,KAAmC,EAAE,EAAE;YACzD,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC;YAE5B,IAAI,QAAQ,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC7B,OAAO;YACT,CAAC;YAED,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YACxD,IAAI,CAAC,OAAO;gBAAE,OAAO;YAErB,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YAE3C,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACnB,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;YAC5C,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC;QACH,CAAC,CAAC;QAEF,oBAAoB;QACpB,MAAM,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QACrC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,gCAAgC;IAChC,MAAM,SAAS,GAAG,GAAW,EAAE;QAC7B,MAAM,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;QACxC,eAAe,GAAG,CAAC,eAAe,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACzD,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC;IAEF,sBAAsB;IACtB,MAAM,UAAU,GAAG,GAAW,EAAE;QAC9B,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;IACpE,CAAC,CAAC;IAEF,OAAO;QACL,QAAQ,CAAC,MAAkB,EAAE,MAA0B;YACrD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACrC,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;gBAC/B,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,OAAO,EAAE,MAAM,EAA6B,CAAC,CAAC;gBAE/E,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;gBAC3B,MAAM,CAAC,WAAW,CAAC;oBACjB,IAAI,EAAE,UAAU;oBAChB,SAAS;oBACT,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAA2B,CAAC;oBAC/C,MAAM;iBACP,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;QAED,UAAU,CAAC,MAAkB,EAAE,MAA4B;YACzD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACrC,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;gBAC/B,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,OAAO,EAAE,MAAM,EAA6B,CAAC,CAAC;gBAE/E,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;gBAC3B,MAAM,CAAC,WAAW,CAAC;oBACjB,IAAI,EAAE,YAAY;oBAClB,SAAS;oBACT,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAA2B,CAAC;oBAC/C,MAAM;iBACP,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;QAED,SAAS;YACP,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;YACtC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjC,CAAC;QAED,IAAI;YACF,OAAO,OAAO,CAAC,MAAM,CAAC;QACxB,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,oBAAoB,CAAC,KAAa;IAC/C,oCAAoC;IACpC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;IAEvD,MAAM,OAAO,GAAkC,EAAE,CAAC;IAClD,MAAM,eAAe,GAAG,IAAI,GAAG,EAAmC,CAAC;IACnE,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,4BAA4B;IAC5B,MAAM,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCpB,CAAC;IAEF,iBAAiB;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;QAExD,MAAM,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,QAAwB,EAAE,EAAE;YAChD,IAAI,QAAQ,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC7B,OAAO;YACT,CAAC;YAED,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YACxD,IAAI,CAAC,OAAO;gBAAE,OAAO;YAErB,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;YAE3C,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACnB,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;YAC5C,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,oBAAoB;QACpB,MAAM,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QACrC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,SAAS,GAAG,GAAG,EAAE;QACrB,MAAM,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;QACxC,eAAe,GAAG,CAAC,eAAe,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACzD,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC;IAEF,MAAM,UAAU,GAAG,GAAW,EAAE;QAC9B,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;IACpE,CAAC,CAAC;IAEF,OAAO;QACL,QAAQ,CAAC,MAAkB,EAAE,MAA0B;YACrD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACrC,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;gBAC/B,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,OAAO,EAAE,MAAM,EAA6B,CAAC,CAAC;gBAE/E,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;gBAC3B,MAAM,CAAC,WAAW,CAAC;oBACjB,IAAI,EAAE,UAAU;oBAChB,SAAS;oBACT,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAA2B,CAAC;oBAC/C,MAAM;iBACP,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;QAED,UAAU,CAAC,MAAkB,EAAE,MAA4B;YACzD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACrC,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;gBAC/B,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,OAAO,EAAE,MAAM,EAA6B,CAAC,CAAC;gBAE/E,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;gBAC3B,MAAM,CAAC,WAAW,CAAC;oBACjB,IAAI,EAAE,YAAY;oBAClB,SAAS;oBACT,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAA2B,CAAC;oBAC/C,MAAM;iBACP,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;QAED,SAAS;YACP,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;YACtC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,IAAI;YACF,OAAO,OAAO,CAAC,MAAM,CAAC;QACxB,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAAkB,EAClB,MAA0B;IAE1B,MAAM,IAAI,GAAG,MAAM,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAEvC,IAAI,CAAC;QACH,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7C,CAAC;YAAS,CAAC;QACT,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,MAAkB,EAClB,MAA4B;IAE5B,MAAM,IAAI,GAAG,MAAM,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAEvC,IAAI,CAAC;QACH,OAAO,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/C,CAAC;YAAS,CAAC;QACT,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* High-level compression API.
|
|
3
|
+
*/
|
|
4
|
+
import { type CompressionConfig } from './config.js';
|
|
5
|
+
import { type CompressionResult, type TokenInput, type DiscoveredPattern } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Compress a token sequence.
|
|
8
|
+
*
|
|
9
|
+
* @param tokens - The token sequence to compress (Array, Uint32Array, or similar)
|
|
10
|
+
* @param config - Optional compression configuration
|
|
11
|
+
* @returns Promise resolving to compression result
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { compress, decompress, initWasm } from '@small-ltsc/sdk';
|
|
16
|
+
*
|
|
17
|
+
* await initWasm();
|
|
18
|
+
*
|
|
19
|
+
* const tokens = [1, 2, 3, 1, 2, 3, 1, 2, 3];
|
|
20
|
+
* const result = await compress(tokens);
|
|
21
|
+
*
|
|
22
|
+
* console.log(`Compressed ${result.originalLength} -> ${result.compressedLength}`);
|
|
23
|
+
* console.log(`Ratio: ${(result.compressionRatio * 100).toFixed(1)}%`);
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export declare function compress(tokens: TokenInput, config?: CompressionConfig): Promise<CompressionResult>;
|
|
27
|
+
/**
|
|
28
|
+
* Discover patterns in a token sequence without compressing.
|
|
29
|
+
*
|
|
30
|
+
* Useful for analysis, building static dictionaries, or understanding
|
|
31
|
+
* what patterns would be compressed.
|
|
32
|
+
*
|
|
33
|
+
* @param tokens - The token sequence to analyze
|
|
34
|
+
* @param minLength - Minimum pattern length (default: 2)
|
|
35
|
+
* @param maxLength - Maximum pattern length (default: 8)
|
|
36
|
+
* @returns Array of discovered patterns sorted by potential savings
|
|
37
|
+
*/
|
|
38
|
+
export declare function discoverPatterns(tokens: TokenInput, minLength?: number, maxLength?: number): Promise<DiscoveredPattern[]>;
|
|
39
|
+
//# sourceMappingURL=compress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compress.d.ts","sourceRoot":"","sources":["../../src/compress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,KAAK,iBAAiB,EAA6C,MAAM,aAAa,CAAC;AAChG,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,UAAU,EACf,KAAK,iBAAiB,EAEvB,MAAM,YAAY,CAAC;AAGpB;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,QAAQ,CAC5B,MAAM,EAAE,UAAU,EAClB,MAAM,CAAC,EAAE,iBAAiB,GACzB,OAAO,CAAC,iBAAiB,CAAC,CAuC5B;AA+GD;;;;;;;;;;GAUG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,UAAU,EAClB,SAAS,SAAI,EACb,SAAS,SAAI,GACZ,OAAO,CAAC,iBAAiB,EAAE,CAAC,CAY9B"}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TypeScript configuration types for Small LTSC.
|
|
3
|
+
*/
|
|
4
|
+
import type { StaticDictionary } from './dictionaries/index.js';
|
|
5
|
+
/**
|
|
6
|
+
* Selection mode for pattern selection.
|
|
7
|
+
*/
|
|
8
|
+
export type SelectionMode = 'greedy' | 'optimal' | 'beam';
|
|
9
|
+
/**
|
|
10
|
+
* Configuration for compression operations.
|
|
11
|
+
*/
|
|
12
|
+
export interface CompressionConfig {
|
|
13
|
+
/**
|
|
14
|
+
* Minimum pattern length to consider.
|
|
15
|
+
* @default 2
|
|
16
|
+
*/
|
|
17
|
+
minSubsequenceLength?: number;
|
|
18
|
+
/**
|
|
19
|
+
* Maximum pattern length to consider.
|
|
20
|
+
* @default 8
|
|
21
|
+
*/
|
|
22
|
+
maxSubsequenceLength?: number;
|
|
23
|
+
/**
|
|
24
|
+
* Selection algorithm to use.
|
|
25
|
+
* - "greedy": Fast, good results for most inputs
|
|
26
|
+
* - "optimal": Uses DP for theoretically optimal selection (slower)
|
|
27
|
+
* - "beam": Beam search compromise between speed and quality
|
|
28
|
+
* @default "greedy"
|
|
29
|
+
*/
|
|
30
|
+
selectionMode?: SelectionMode;
|
|
31
|
+
/**
|
|
32
|
+
* Beam width for beam search selection.
|
|
33
|
+
* Only used when selectionMode is "beam".
|
|
34
|
+
* @default 8
|
|
35
|
+
*/
|
|
36
|
+
beamWidth?: number;
|
|
37
|
+
/**
|
|
38
|
+
* Enable hierarchical compression (meta-tokens can reference other meta-tokens).
|
|
39
|
+
* @default true
|
|
40
|
+
*/
|
|
41
|
+
hierarchicalEnabled?: boolean;
|
|
42
|
+
/**
|
|
43
|
+
* Maximum depth for hierarchical compression.
|
|
44
|
+
* @default 3
|
|
45
|
+
*/
|
|
46
|
+
hierarchicalMaxDepth?: number;
|
|
47
|
+
/**
|
|
48
|
+
* Static dictionary to use for pre-defined patterns.
|
|
49
|
+
* Can be a built-in dictionary ID or a custom dictionary.
|
|
50
|
+
*/
|
|
51
|
+
staticDictionary?: string | StaticDictionary;
|
|
52
|
+
/**
|
|
53
|
+
* Input size threshold above which streaming mode is automatically enabled.
|
|
54
|
+
* @default 50000
|
|
55
|
+
*/
|
|
56
|
+
streamingThreshold?: number;
|
|
57
|
+
/**
|
|
58
|
+
* Maximum memory usage for WASM in MB.
|
|
59
|
+
* @default 256
|
|
60
|
+
*/
|
|
61
|
+
maxMemoryMb?: number;
|
|
62
|
+
/**
|
|
63
|
+
* Enable round-trip verification after compression.
|
|
64
|
+
* When enabled, decompresses the result and verifies it matches the original.
|
|
65
|
+
* @default false
|
|
66
|
+
*/
|
|
67
|
+
verify?: boolean;
|
|
68
|
+
/**
|
|
69
|
+
* Dictionary start delimiter token.
|
|
70
|
+
* @default 0xFFFFFFF0
|
|
71
|
+
*/
|
|
72
|
+
dictStartToken?: number;
|
|
73
|
+
/**
|
|
74
|
+
* Dictionary end delimiter token.
|
|
75
|
+
* @default 0xFFFFFFF1
|
|
76
|
+
*/
|
|
77
|
+
dictEndToken?: number;
|
|
78
|
+
/**
|
|
79
|
+
* Starting meta-token ID for new dictionary entries.
|
|
80
|
+
* @default 0xFFFF0000
|
|
81
|
+
*/
|
|
82
|
+
nextMetaToken?: number;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Configuration for decompression operations.
|
|
86
|
+
*/
|
|
87
|
+
export interface DecompressionConfig {
|
|
88
|
+
/**
|
|
89
|
+
* Dictionary start delimiter token.
|
|
90
|
+
* Must match the token used during compression.
|
|
91
|
+
*/
|
|
92
|
+
dictStartToken?: number;
|
|
93
|
+
/**
|
|
94
|
+
* Dictionary end delimiter token.
|
|
95
|
+
* Must match the token used during compression.
|
|
96
|
+
*/
|
|
97
|
+
dictEndToken?: number;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Default configuration values.
|
|
101
|
+
*/
|
|
102
|
+
export declare const DEFAULT_CONFIG: Required<Omit<CompressionConfig, 'staticDictionary'>>;
|
|
103
|
+
/**
|
|
104
|
+
* Merge user config with defaults.
|
|
105
|
+
*/
|
|
106
|
+
export declare function mergeConfig(userConfig?: CompressionConfig): Required<Omit<CompressionConfig, 'staticDictionary'>> & {
|
|
107
|
+
staticDictionary?: string | StaticDictionary;
|
|
108
|
+
};
|
|
109
|
+
/**
|
|
110
|
+
* Convert SDK config to WASM config format.
|
|
111
|
+
*/
|
|
112
|
+
export declare function toWasmConfig(config: CompressionConfig): Record<string, unknown>;
|
|
113
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/config.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAEhE;;GAEG;AACH,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,SAAS,GAAG,MAAM,CAAC;AAE1D;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;;;;OAMG;IACH,aAAa,CAAC,EAAE,aAAa,CAAC;IAE9B;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;OAGG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAE9B;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,GAAG,gBAAgB,CAAC;IAE7C;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB;;;;OAIG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IAEjB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,eAAO,MAAM,cAAc,EAAE,QAAQ,CACnC,IAAI,CAAC,iBAAiB,EAAE,kBAAkB,CAAC,CAcnC,CAAC;AAEX;;GAEG;AACH,wBAAgB,WAAW,CACzB,UAAU,CAAC,EAAE,iBAAiB,GAC7B,QAAQ,CAAC,IAAI,CAAC,iBAAiB,EAAE,kBAAkB,CAAC,CAAC,GAAG;IACzD,gBAAgB,CAAC,EAAE,MAAM,GAAG,gBAAgB,CAAC;CAC9C,CAKA;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,iBAAiB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAa/E"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* High-level decompression API.
|
|
3
|
+
*/
|
|
4
|
+
import { type DecompressionConfig } from './config.js';
|
|
5
|
+
import { type TokenInput } from './types.js';
|
|
6
|
+
/**
|
|
7
|
+
* Decompress a compressed token sequence.
|
|
8
|
+
*
|
|
9
|
+
* @param tokens - The compressed token sequence
|
|
10
|
+
* @param config - Optional decompression configuration
|
|
11
|
+
* @returns Promise resolving to the original token sequence
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* import { compress, decompress, initWasm } from '@small-ltsc/sdk';
|
|
16
|
+
*
|
|
17
|
+
* await initWasm();
|
|
18
|
+
*
|
|
19
|
+
* const tokens = [1, 2, 3, 1, 2, 3, 1, 2, 3];
|
|
20
|
+
* const result = await compress(tokens);
|
|
21
|
+
* const restored = await decompress(result.serializedTokens);
|
|
22
|
+
*
|
|
23
|
+
* console.assert(JSON.stringify(tokens) === JSON.stringify(restored));
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export declare function decompress(tokens: TokenInput, config?: DecompressionConfig): Promise<readonly number[]>;
|
|
27
|
+
/**
|
|
28
|
+
* Extract the dictionary from a compressed token sequence.
|
|
29
|
+
*
|
|
30
|
+
* @param tokens - The compressed token sequence
|
|
31
|
+
* @param config - Optional decompression configuration
|
|
32
|
+
* @returns Map of meta-tokens to their definitions
|
|
33
|
+
*/
|
|
34
|
+
export declare function extractDictionary(tokens: TokenInput, config?: DecompressionConfig): Promise<ReadonlyMap<number, readonly number[]>>;
|
|
35
|
+
/**
|
|
36
|
+
* Extract the body tokens from a compressed sequence (without decompression).
|
|
37
|
+
*
|
|
38
|
+
* @param tokens - The compressed token sequence
|
|
39
|
+
* @param config - Optional decompression configuration
|
|
40
|
+
* @returns Body tokens with meta-token references
|
|
41
|
+
*/
|
|
42
|
+
export declare function extractBody(tokens: TokenInput, config?: DecompressionConfig): readonly number[];
|
|
43
|
+
/**
|
|
44
|
+
* Check if a token sequence appears to be compressed.
|
|
45
|
+
*
|
|
46
|
+
* Looks for the dictionary start token marker.
|
|
47
|
+
*
|
|
48
|
+
* @param tokens - The token sequence to check
|
|
49
|
+
* @param config - Optional decompression configuration
|
|
50
|
+
* @returns True if the sequence contains a dictionary section
|
|
51
|
+
*/
|
|
52
|
+
export declare function isCompressed(tokens: TokenInput, config?: DecompressionConfig): boolean;
|
|
53
|
+
//# sourceMappingURL=decompress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"decompress.d.ts","sourceRoot":"","sources":["../../src/decompress.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,EAAE,KAAK,mBAAmB,EAAkB,MAAM,aAAa,CAAC;AACvE,OAAO,EAAE,KAAK,UAAU,EAAmB,MAAM,YAAY,CAAC;AAE9D;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAsB,UAAU,CAC9B,MAAM,EAAE,UAAU,EAClB,MAAM,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,SAAS,MAAM,EAAE,CAAC,CAgB5B;AAED;;;;;;GAMG;AACH,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,UAAU,EAClB,MAAM,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,WAAW,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAC,CA8BjD;AAED;;;;;;GAMG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,UAAU,EAClB,MAAM,CAAC,EAAE,mBAAmB,GAC3B,SAAS,MAAM,EAAE,CAanB;AAED;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAC1B,MAAM,EAAE,UAAU,EAClB,MAAM,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAIT"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Static dictionary support for Small LTSC.
|
|
3
|
+
*
|
|
4
|
+
* Pre-built dictionaries for common domains that can be used
|
|
5
|
+
* to improve compression of domain-specific content.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Static dictionary definition.
|
|
9
|
+
*/
|
|
10
|
+
export interface StaticDictionary {
|
|
11
|
+
/**
|
|
12
|
+
* Unique identifier for the dictionary.
|
|
13
|
+
*/
|
|
14
|
+
id: string;
|
|
15
|
+
/**
|
|
16
|
+
* Dictionary version.
|
|
17
|
+
*/
|
|
18
|
+
version: string;
|
|
19
|
+
/**
|
|
20
|
+
* Human-readable name.
|
|
21
|
+
*/
|
|
22
|
+
name: string;
|
|
23
|
+
/**
|
|
24
|
+
* Description of what this dictionary is optimized for.
|
|
25
|
+
*/
|
|
26
|
+
description: string;
|
|
27
|
+
/**
|
|
28
|
+
* Dictionary entries: meta-token ID -> definition tokens.
|
|
29
|
+
*/
|
|
30
|
+
entries: Map<number, readonly number[]>;
|
|
31
|
+
/**
|
|
32
|
+
* Patterns (token subsequences) to definitions.
|
|
33
|
+
*/
|
|
34
|
+
patterns: Map<string, number>;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Available built-in static dictionaries.
|
|
38
|
+
*/
|
|
39
|
+
export declare const STATIC_DICTIONARIES: {
|
|
40
|
+
readonly 'python-v1': {
|
|
41
|
+
id: string;
|
|
42
|
+
version: string;
|
|
43
|
+
name: string;
|
|
44
|
+
description: string;
|
|
45
|
+
entries: {
|
|
46
|
+
metaToken: number;
|
|
47
|
+
pattern: number[];
|
|
48
|
+
}[];
|
|
49
|
+
};
|
|
50
|
+
readonly 'typescript-v1': {
|
|
51
|
+
id: string;
|
|
52
|
+
version: string;
|
|
53
|
+
name: string;
|
|
54
|
+
description: string;
|
|
55
|
+
entries: {
|
|
56
|
+
metaToken: number;
|
|
57
|
+
pattern: number[];
|
|
58
|
+
}[];
|
|
59
|
+
};
|
|
60
|
+
readonly 'markdown-v1': {
|
|
61
|
+
id: string;
|
|
62
|
+
version: string;
|
|
63
|
+
name: string;
|
|
64
|
+
description: string;
|
|
65
|
+
entries: {
|
|
66
|
+
metaToken: number;
|
|
67
|
+
pattern: number[];
|
|
68
|
+
}[];
|
|
69
|
+
};
|
|
70
|
+
readonly 'json-v1': {
|
|
71
|
+
id: string;
|
|
72
|
+
version: string;
|
|
73
|
+
name: string;
|
|
74
|
+
description: string;
|
|
75
|
+
entries: {
|
|
76
|
+
metaToken: number;
|
|
77
|
+
pattern: number[];
|
|
78
|
+
}[];
|
|
79
|
+
};
|
|
80
|
+
readonly 'sql-v1': {
|
|
81
|
+
id: string;
|
|
82
|
+
version: string;
|
|
83
|
+
name: string;
|
|
84
|
+
description: string;
|
|
85
|
+
entries: {
|
|
86
|
+
metaToken: number;
|
|
87
|
+
pattern: number[];
|
|
88
|
+
}[];
|
|
89
|
+
};
|
|
90
|
+
};
|
|
91
|
+
export type StaticDictionaryId = keyof typeof STATIC_DICTIONARIES;
|
|
92
|
+
/**
|
|
93
|
+
* Load a built-in static dictionary.
|
|
94
|
+
*
|
|
95
|
+
* @param id - Dictionary ID (e.g., 'python-v1', 'typescript-v1')
|
|
96
|
+
* @returns Promise resolving to the static dictionary
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* ```typescript
|
|
100
|
+
* import { loadStaticDictionary, compress } from '@small-ltsc/sdk';
|
|
101
|
+
*
|
|
102
|
+
* const pythonDict = await loadStaticDictionary('python-v1');
|
|
103
|
+
*
|
|
104
|
+
* const result = await compress(tokens, {
|
|
105
|
+
* staticDictionary: pythonDict,
|
|
106
|
+
* });
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
109
|
+
export declare function loadStaticDictionary(id: StaticDictionaryId): Promise<StaticDictionary>;
|
|
110
|
+
/**
|
|
111
|
+
* Create a custom static dictionary from patterns.
|
|
112
|
+
*
|
|
113
|
+
* @param id - Unique identifier for the dictionary
|
|
114
|
+
* @param patterns - Array of token patterns to include
|
|
115
|
+
* @param startMetaToken - Starting meta-token ID (default: 0xFFFF8000)
|
|
116
|
+
* @returns StaticDictionary ready for use
|
|
117
|
+
*/
|
|
118
|
+
export declare function createStaticDictionary(id: string, patterns: number[][], startMetaToken?: number): StaticDictionary;
|
|
119
|
+
/**
|
|
120
|
+
* List available built-in dictionaries.
|
|
121
|
+
*/
|
|
122
|
+
export declare function listStaticDictionaries(): StaticDictionaryId[];
|
|
123
|
+
/**
|
|
124
|
+
* Check if a dictionary ID is a built-in dictionary.
|
|
125
|
+
*/
|
|
126
|
+
export declare function isBuiltinDictionary(id: string): id is StaticDictionaryId;
|
|
127
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/dictionaries/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;OAEG;IACH,EAAE,EAAE,MAAM,CAAC;IAEX;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IAEpB;;OAEG;IACH,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAC;IAExC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC/B;AASD;;GAEG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAMtB,CAAC;AAEX,MAAM,MAAM,kBAAkB,GAAG,MAAM,OAAO,mBAAmB,CAAC;AAgBlE;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAsB,oBAAoB,CACxC,EAAE,EAAE,kBAAkB,GACrB,OAAO,CAAC,gBAAgB,CAAC,CAO3B;AAwBD;;;;;;;GAOG;AACH,wBAAgB,sBAAsB,CACpC,EAAE,EAAE,MAAM,EACV,QAAQ,EAAE,MAAM,EAAE,EAAE,EACpB,cAAc,SAAa,GAC1B,gBAAgB,CAmBlB;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,kBAAkB,EAAE,CAE7D;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,EAAE,EAAE,MAAM,GAAG,EAAE,IAAI,kBAAkB,CAExE"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Small LTSC SDK - Lossless Token Sequence Compression
|
|
3
|
+
*
|
|
4
|
+
* A TypeScript SDK for compressing LLM token sequences to reduce
|
|
5
|
+
* inference costs and context length requirements.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
*/
|
|
9
|
+
export { compress, discoverPatterns } from './compress.js';
|
|
10
|
+
export { decompress, extractDictionary, extractBody, isCompressed } from './decompress.js';
|
|
11
|
+
export { createStreamingCompressor, compressStream, processInChunks, type StreamingCompressor, } from './streaming.js';
|
|
12
|
+
export { createWorkerPool, compressInWorker, decompressInWorker, type WorkerPool, } from './worker.js';
|
|
13
|
+
export { type CompressionConfig, type DecompressionConfig, type SelectionMode, DEFAULT_CONFIG, mergeConfig, } from './config.js';
|
|
14
|
+
export { type Token, type TokenSeq, type TokenInput, type CompressionResult, type CompressionMetrics, type DiscoveredPattern, normalizeTokens, isTokenSeq, } from './types.js';
|
|
15
|
+
export { loadStaticDictionary, createStaticDictionary, listStaticDictionaries, isBuiltinDictionary, type StaticDictionary, type StaticDictionaryId, STATIC_DICTIONARIES, } from './dictionaries/index.js';
|
|
16
|
+
export { initWasm, initWasmFromModule, initWasmFromBytes, isWasmInitialized, getWasmVersion, } from './wasm/loader.js';
|
|
17
|
+
export declare const VERSION = "0.1.0";
|
|
18
|
+
//# sourceMappingURL=index.d.ts.map
|