gn-native 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
package/index.d.ts ADDED
File without changes
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "gn-native",
3
+ "version": "1.1.0",
4
+ "main": "index.js",
5
+ "napi": {
6
+ "name": "gn-native",
7
+ "triples": {
8
+ "defaults": true
9
+ }
10
+ },
11
+ "scripts": {
12
+ "build": "napi build --platform --release",
13
+ "build:debug": "napi build --platform"
14
+ },
15
+ "devDependencies": {
16
+ "@napi-rs/cli": "latest"
17
+ },
18
+ "description": "Domain-adaptive lossless compression for LLM conversation streams",
19
+ "keywords": [
20
+ "compression",
21
+ "llm",
22
+ "rust",
23
+ "napi",
24
+ "brotli",
25
+ "deflate"
26
+ ],
27
+ "author": "Robert Rider <atomsrkuul@gmail.com>",
28
+ "license": "MIT",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "https://github.com/atomsrkuul/glasik-core"
32
+ }
33
+ }
package/src/lib.rs ADDED
@@ -0,0 +1,652 @@
1
+ use napi::bindgen_prelude::*;
2
+ use napi_derive::napi;
3
+
4
+
5
+ use napi::bindgen_prelude::*;
6
+ use glasik_core::tokenizer::sliding_v2::SlidingTokenizerV2;
7
+ use glasik_core::fractal::FractalCompressor;
8
+ use glasik_core::pipeline;
9
+ use glasik_core::static_dict;
10
+ use glasik_core::tokenizer::lz77_gn::GNPrefixTokenizer;
11
+ use glasik_core::tokenizer::dictionary::DictEntry;
12
+ use std::sync::OnceLock;
13
+ use tokio::sync::{mpsc, oneshot};
14
+
15
+ enum Job {
16
+ CompressHybrid { data: Vec<u8>, resp: oneshot::Sender<Vec<u8>> },
17
+ CompressAC { data: Vec<u8>, resp: oneshot::Sender<Vec<u8>> },
18
+ CompressSplit { data: Vec<u8>, resp: oneshot::Sender<Vec<u8>> },
19
+ CompressSplitBatch { chunks: Vec<Vec<u8>>, resp: oneshot::Sender<Vec<u8>> },
20
+ DecompressL2 { data: Vec<u8>, resp: oneshot::Sender<napi::Result<Vec<u8>>> },
21
+ CompressFast { data: Vec<u8>, resp: oneshot::Sender<Vec<u8>> },
22
+ CompressL2 { data: Vec<u8>, resp: oneshot::Sender<Vec<u8>> },
23
+ RefreshVocab { resp: oneshot::Sender<usize> },
24
+ ExportEntries { resp: oneshot::Sender<String> },
25
+ CompressPressurized { target: Vec<u8>, warm: Vec<Vec<u8>>, pk: usize, resp: oneshot::Sender<Vec<u8>> },
26
+ WindowStats { resp: oneshot::Sender<String> },
27
+ SaveSnapshot { path: String, resp: oneshot::Sender<String> },
28
+ LoadSnapshot { path: String, resp: oneshot::Sender<String> },
29
+ CompressFractal { data: Vec<u8>, shard_type: String, session_id: String, resp: oneshot::Sender<Vec<u8>> },
30
+ DecompressFractal { data: Vec<u8>, shard_type: String, session_id: String, resp: oneshot::Sender<napi::Result<Vec<u8>>> },
31
+ CompressFractalVtcV3 { data: Vec<u8>, shard_type: String, session_id: String, resp: oneshot::Sender<napi::Result<String>> },
32
+ }
33
+
34
+ static WORKER: OnceLock<mpsc::Sender<Job>> = OnceLock::new();
35
+ static FAST_TOK: OnceLock<std::sync::Mutex<GNPrefixTokenizer<4>>> = OnceLock::new();
36
+ static HYBRID_ENC: OnceLock<std::sync::Mutex<glasik_core::tokenizer::hybrid_async::HybridAsyncEncoder>> = OnceLock::new();
37
+
38
+ // Thread-local GNHybridEncoder -- fastest path, no locks
39
+ use std::cell::RefCell;
40
+ thread_local! {
41
+ static TL_HYBRID: RefCell<Option<glasik_core::tokenizer::lz77_gn::GNPrefixTokenizer<4>>> = RefCell::new(None);
42
+ }
43
+
44
+ fn with_tl_hybrid<F, R>(f: F) -> R
45
+ where F: FnOnce(&mut glasik_core::tokenizer::lz77_gn::GNPrefixTokenizer<4>) -> R {
46
+ TL_HYBRID.with(|cell| {
47
+ let mut opt = cell.borrow_mut();
48
+ if opt.is_none() {
49
+ let entries = glasik_core::static_dict::load_static_dict();
50
+ let dict: Vec<glasik_core::tokenizer::dictionary::DictEntry> = entries.iter().map(|(b,f,s)|
51
+ glasik_core::tokenizer::dictionary::DictEntry { bytes: b.clone(), freq: *f as usize, saving: *s as usize }
52
+ ).collect();
53
+ let mut tok = glasik_core::tokenizer::lz77_gn::GNPrefixTokenizer::<4>::new();
54
+ tok.seed_from_vocab(&dict);
55
+ *opt = Some(tok);
56
+ }
57
+ f(opt.as_mut().unwrap())
58
+ })
59
+ }
60
+
61
+ fn get_hybrid() -> &'static std::sync::Mutex<glasik_core::tokenizer::hybrid_async::HybridAsyncEncoder> {
62
+ HYBRID_ENC.get_or_init(|| {
63
+ std::sync::Mutex::new(glasik_core::tokenizer::hybrid_async::HybridAsyncEncoder::new())
64
+ })
65
+ }
66
+
67
+ fn get_fast_tok() -> &'static std::sync::Mutex<GNPrefixTokenizer<4>> {
68
+ FAST_TOK.get_or_init(|| {
69
+ let entries = static_dict::load_static_dict();
70
+ let dict: Vec<DictEntry> = entries.iter().map(|(b,f,s)| DictEntry {
71
+ bytes: b.clone(), freq: *f as usize, saving: *s as usize
72
+ }).collect();
73
+ let mut tok = GNPrefixTokenizer::<4>::new();
74
+ tok.seed_from_vocab(&dict);
75
+ std::sync::Mutex::new(tok)
76
+ })
77
+ }
78
+
79
+ /// Fast path: GNPrefixTokenizer O(n) single pass + libdeflate
80
+ fn compress_lz77gn(buf: &[u8], tok: &GNPrefixTokenizer<4>) -> Vec<u8> {
81
+ let tokenized = tok.tokenize_to_gn_bytes(buf, true);
82
+ deflate_buf(tokenized)
83
+ }
84
+
85
+ const FLAG_DEFLATED: u8 = 0x01;
86
+ const FLAG_RAW_TOKENS: u8 = 0x00;
87
+
88
+ fn deflate_buf(tokenized: Vec<u8>) -> Vec<u8> {
89
+ let mut comp = libdeflater::Compressor::new(libdeflater::CompressionLvl::default());
90
+ let max = comp.deflate_compress_bound(tokenized.len());
91
+ let mut deflated = vec![0u8; max];
92
+ match comp.deflate_compress(&tokenized, &mut deflated) {
93
+ Ok(n) => {
94
+ deflated.truncate(n);
95
+ if deflated.len() < tokenized.len() {
96
+ // Prefix with FLAG_DEFLATED so decoder knows to inflate first
97
+ let mut out = Vec::with_capacity(1 + deflated.len());
98
+ out.push(FLAG_DEFLATED);
99
+ out.extend_from_slice(&deflated);
100
+ out
101
+ } else {
102
+ // Raw tokenized -- prefix with FLAG_RAW_TOKENS
103
+ let mut out = Vec::with_capacity(1 + tokenized.len());
104
+ out.push(FLAG_RAW_TOKENS);
105
+ out.extend_from_slice(&tokenized);
106
+ out
107
+ }
108
+ }
109
+ Err(_) => {
110
+ let mut out = Vec::with_capacity(1 + tokenized.len());
111
+ out.push(FLAG_RAW_TOKENS);
112
+ out.extend_from_slice(&tokenized);
113
+ out
114
+ }
115
+ }
116
+ }
117
+
118
+ fn split_deflate(tok_ids: Vec<u8>, literals: Vec<u8>) -> Vec<u8> {
119
+ // Compress each stream independently with raw deflate
120
+ // Frame: [2B tok_deflated_len][tok_deflated][lit_deflated]
121
+ let tok_comp = if tok_ids.is_empty() {
122
+ vec![]
123
+ } else {
124
+ let mut comp = libdeflater::Compressor::new(libdeflater::CompressionLvl::default());
125
+ let max = comp.deflate_compress_bound(tok_ids.len());
126
+ let mut out = vec![0u8; max];
127
+ match comp.deflate_compress(&tok_ids, &mut out) {
128
+ Ok(n) => { out.truncate(n); if out.len() < tok_ids.len() { out } else { tok_ids } }
129
+ Err(_) => tok_ids
130
+ }
131
+ };
132
+ let lit_comp = if literals.is_empty() {
133
+ vec![]
134
+ } else {
135
+ let mut comp = libdeflater::Compressor::new(libdeflater::CompressionLvl::default());
136
+ let max = comp.deflate_compress_bound(literals.len());
137
+ let mut out = vec![0u8; max];
138
+ match comp.deflate_compress(&literals, &mut out) {
139
+ Ok(n) => { out.truncate(n); if out.len() < literals.len() { out } else { literals } }
140
+ Err(_) => literals
141
+ }
142
+ };
143
+ // Frame: [2B tok_len][tok_data][lit_data]
144
+ let tok_len = tok_comp.len() as u16;
145
+ let mut frame = Vec::with_capacity(2 + tok_comp.len() + lit_comp.len());
146
+ frame.extend_from_slice(&tok_len.to_le_bytes());
147
+ frame.extend_from_slice(&tok_comp);
148
+ frame.extend_from_slice(&lit_comp);
149
+ frame
150
+ }
151
+
152
+ fn inflate_buf(data: &[u8]) -> std::result::Result<Vec<u8>, String> {
153
+ if data.is_empty() { return Ok(Vec::new()); }
154
+ let flag = data[0];
155
+ let payload = &data[1..];
156
+ if flag == 0x01 {
157
+ // deflate compressed
158
+ let mut decomp = libdeflater::Decompressor::new();
159
+ let mut out = vec![0u8; payload.len() * 4];
160
+ loop {
161
+ match decomp.deflate_decompress(payload, &mut out) {
162
+ Ok(n) => { out.truncate(n); return Ok(out); }
163
+ Err(_) => {
164
+ let new_len = out.len() * 2;
165
+ if new_len > 64 * 1024 * 1024 { return std::result::Result::Err("decompress overflow".to_string()); }
166
+ out.resize(new_len, 0);
167
+ }
168
+ }
169
+ }
170
+ } else {
171
+ Ok(payload.to_vec())
172
+ }
173
+ }
174
+
175
+ fn get_worker() -> &'static mpsc::Sender<Job> {
176
+ WORKER.get_or_init(|| {
177
+ let (tx, mut rx) = mpsc::channel::<Job>(256);
178
+ tokio::spawn(async move {
179
+ // Hybrid async encoder with adaptive vocab swap
180
+ let mut hybrid = glasik_core::tokenizer::hybrid_async::HybridAsyncEncoder::new();
181
+ let static_entries = static_dict::load_static_dict();
182
+ // Build GNPrefixTokenizer from static dict for fast O(n) compression
183
+ let dict_entries: Vec<DictEntry> = static_entries.iter().map(|(b,f,s)| DictEntry {
184
+ bytes: b.clone(), freq: *f as usize, saving: *s as usize
185
+ }).collect();
186
+ let mut tok4 = GNPrefixTokenizer::<4>::new();
187
+ tok4.seed_from_vocab(&dict_entries);
188
+ let mut slider = SlidingTokenizerV2::new_with_static(static_entries);
189
+ // Auto-load snapshot
190
+ let snap = format!("{}/.openclaw/gn-window.snapshot",
191
+ std::env::var("HOME").unwrap_or_default());
192
+ if let Ok(data) = std::fs::read_to_string(&snap) {
193
+ if let Ok(d) = serde_json::from_str::<serde_json::Value>(&data) {
194
+ if let Some(arr) = d["entries"].as_array() {
195
+ let loaded: Vec<(Vec<u8>, u64, u64)> = arr.iter().filter_map(|e| {
196
+ let b: Vec<u8> = e["b"].as_array()?.iter()
197
+ .filter_map(|x| x.as_u64().filter(|&v| v <= 255).map(|v| v as u8)).collect();
198
+ Some((b, e["f"].as_u64()?, e["s"].as_u64()?))
199
+ }).collect();
200
+ let n = loaded.len();
201
+ slider.import_dict(1, loaded);
202
+ eprintln!("GN-NATIVE: restored {} entries", n);
203
+ }
204
+ }
205
+ }
206
+ // Init FractalCompressor -- load L0 from same snapshot
207
+ let mut fractal = FractalCompressor::new();
208
+ let snap_path = format!("{}/.openclaw/gn-window.snapshot",
209
+ std::env::var("HOME").unwrap_or_default());
210
+ if let Ok(data) = std::fs::read_to_string(&snap_path) {
211
+ if let Ok(d) = serde_json::from_str::<serde_json::Value>(&data) {
212
+ if let Some(arr) = d["entries"].as_array() {
213
+ let l0: Vec<(Vec<u8>, u64, u64)> = arr.iter().filter_map(|e| {
214
+ let b: Vec<u8> = e["b"].as_array()?.iter()
215
+ .filter_map(|x| x.as_u64().filter(|&v| v <= 255).map(|v| v as u8)).collect();
216
+ Some((b, e["f"].as_u64()?, e["s"].as_u64()?))
217
+ }).collect();
218
+ let n = l0.len();
219
+ fractal.load_l0(l0);
220
+ eprintln!("GN-NATIVE: fractal L0 loaded {} entries", n);
221
+ }
222
+ }
223
+ }
224
+ while let Some(job) = rx.recv().await {
225
+ match job {
226
+ Job::CompressHybrid { data, resp } => {
227
+ let _ = resp.send(hybrid.encode(&data));
228
+ }
229
+ Job::CompressAC { data, resp } => {
230
+ // O(n) Aho-Corasick -- fast path with full window vocab
231
+ let tokenized = slider.encode_ac(&data);
232
+ let _ = resp.send(deflate_buf(tokenized));
233
+ }
234
+ Job::CompressSplit { data, resp } => {
235
+ let (tok_ids, literals) = slider.encode_ac_split(&data);
236
+ let _ = resp.send(split_deflate(tok_ids, literals));
237
+ }
238
+ Job::CompressSplitBatch { chunks, resp } => {
239
+ // Batch split-stream: collect ALL tok/lit streams across chunks
240
+ // deflate combined streams once -- eliminates per-chunk header overhead
241
+ // This is where beats-brotli ratio comes from
242
+ let mut all_toks: Vec<u8> = Vec::new();
243
+ let mut all_lits: Vec<u8> = Vec::new();
244
+ for chunk in &chunks {
245
+ let (toks, lits) = slider.encode_ac_split(chunk);
246
+ all_toks.extend_from_slice(&toks);
247
+ all_lits.extend_from_slice(&lits);
248
+ }
249
+ let _ = resp.send(split_deflate(all_toks, all_lits));
250
+ }
251
+ Job::DecompressL2 { data, resp } => {
252
+ // Inflate then decode tokens using current window vocab
253
+ let result = (|| -> napi::Result<Vec<u8>> {
254
+ if data.is_empty() { return Ok(Vec::new()); }
255
+ let flag = data[0];
256
+ let payload = &data[1..];
257
+ let tokenized = if flag == FLAG_DEFLATED {
258
+ // Inflate first
259
+ let mut decomp = libdeflater::Decompressor::new();
260
+ let mut out = vec![0u8; payload.len().max(64) * 8];
261
+ loop {
262
+ match decomp.deflate_decompress(payload, &mut out) {
263
+ Ok(n) => { out.truncate(n); break out; }
264
+ Err(_) => {
265
+ let nl = out.len() * 2;
266
+ if nl > 64*1024*1024 {
267
+ return Err(Error::from_reason("inflate overflow"));
268
+ }
269
+ out.resize(nl, 0);
270
+ }
271
+ }
272
+ }
273
+ } else {
274
+ // Raw tokenized -- decode directly
275
+ payload.to_vec()
276
+ };
277
+ slider.decode_raw(&tokenized)
278
+ .map_err(Error::from_reason)
279
+ })();
280
+ let _ = resp.send(result);
281
+ }
282
+ Job::CompressFast { data, resp } => {
283
+ let _ = resp.send(compress_lz77gn(&data, &tok4));
284
+ }
285
+ Job::CompressL2 { data, resp } => {
286
+ let t = slider.encode(&data);
287
+ let _ = resp.send(deflate_buf(t));
288
+ }
289
+ Job::RefreshVocab { resp } => {
290
+ // Sync fast tokenizer from L2 window (uses u16 -- all entries)
291
+ let (_, entries) = slider.export_dict();
292
+ let dict: Vec<DictEntry> = entries.iter().map(|(b,f,s)| DictEntry {
293
+ bytes: b.clone(), freq: *f as usize, saving: *s as usize
294
+ }).collect();
295
+ let n = dict.len();
296
+ tok4.seed_from_vocab(&dict);
297
+ let _ = resp.send(n);
298
+ }
299
+ Job::CompressPressurized { target, warm, pk, resp } => {
300
+ let start = warm.len().saturating_sub(pk);
301
+ for w in &warm[start..] { slider.encode(w); }
302
+ let t = slider.encode(&target);
303
+ let _ = resp.send(deflate_buf(t));
304
+ }
305
+ Job::WindowStats { resp } => {
306
+ let (e, b) = slider.stats();
307
+ let _ = resp.send(format!(r#"{{"window_entries":{},"batches":{}}}"#, e, b));
308
+ }
309
+ Job::SaveSnapshot { path, resp } => {
310
+ let msg = match save_snap(&slider, &path) {
311
+ Ok(_) => "ok".to_string(),
312
+ Err(e) => format!("error: {}", e),
313
+ };
314
+ let _ = resp.send(msg);
315
+ }
316
+ Job::LoadSnapshot { path, resp } => {
317
+ let msg = match load_snap(&mut slider, &path) {
318
+ Ok(n) => format!("loaded {} entries", n),
319
+ Err(e) => format!("error: {}", e),
320
+ };
321
+ let _ = resp.send(msg);
322
+ }
323
+ Job::CompressFractal { data, shard_type, session_id, resp } => {
324
+ let out = fractal.compress_shard_with_pairs(&data, &shard_type, &session_id);
325
+ let _ = resp.send(out);
326
+ }
327
+ Job::DecompressFractal { data, shard_type, session_id, resp } => {
328
+ let out = fractal.decompress_shard(&data, &shard_type, &session_id)
329
+ .map(|v| v)
330
+ .map_err(|e| napi::Error::from_reason(e));
331
+ let _ = resp.send(out);
332
+ }
333
+ Job::CompressFractalVtcV3 { data, shard_type, session_id, resp } => {
334
+ let (_frame, vtc) = fractal.compress_shard_with_vtc_v3(&data, &shard_type, &session_id);
335
+ let _ = resp.send(Ok(vtc));
336
+ }
337
+ Job::ExportEntries { resp } => {
338
+ let (_, entries) = slider.export_dict();
339
+ let arr: Vec<serde_json::Value> = entries.iter()
340
+ .map(|(b,f,s)| serde_json::json!({"b": b, "f": f, "s": s})).collect();
341
+ let _ = resp.send(serde_json::to_string(&arr).unwrap_or_default());
342
+ }
343
+ }
344
+ }
345
+ });
346
+ tx
347
+ })
348
+ }
349
+
350
+ fn save_snap(slider: &SlidingTokenizerV2, path: &str) -> std::result::Result<(), String> {
351
+ let (_, entries) = slider.export_dict();
352
+ let arr: Vec<serde_json::Value> = entries.iter()
353
+ .map(|(b,f,s)| serde_json::json!({"b":b,"f":f,"s":s})).collect();
354
+ let json = serde_json::json!({"version":1,"entries":arr});
355
+ serde_json::to_string(&json).map_err(|e| e.to_string())
356
+ .and_then(|s| std::fs::write(path, s).map_err(|e| e.to_string()))
357
+ }
358
+
359
+ fn load_snap(slider: &mut SlidingTokenizerV2, path: &str) -> std::result::Result<usize, String> {
360
+ let data = std::fs::read_to_string(path).map_err(|e| e.to_string())?;
361
+ let d: serde_json::Value = serde_json::from_str(&data).map_err(|e| e.to_string())?;
362
+ let entries: Vec<(Vec<u8>, u64, u64)> = d["entries"].as_array()
363
+ .ok_or_else(|| "no entries".to_string())?
364
+ .iter().filter_map(|e| {
365
+ let b: Vec<u8> = e["b"].as_array()?.iter()
366
+ .filter_map(|x| x.as_u64().filter(|&v| v <= 255).map(|v| v as u8)).collect();
367
+ Some((b, e["f"].as_u64()?, e["s"].as_u64()?))
368
+ }).collect();
369
+ let n = entries.len();
370
+ slider.import_dict(1, entries);
371
+ Ok(n)
372
+ }
373
+
374
+ async fn send_job<T>(job: Job, rx: oneshot::Receiver<T>) -> Result<T> {
375
+ get_worker().send(job).await
376
+ .map_err(|_| Error::from_reason("worker closed"))?;
377
+ rx.await.map_err(|_| Error::from_reason("worker dropped"))
378
+ }
379
+
380
+ #[napi]
381
+ pub fn gn_compress(data: Buffer) -> Buffer {
382
+ Buffer::from(pipeline::compress(&data))
383
+ }
384
+
385
+ /// Sync fast compression -- O(n) single pass, no channel overhead
386
+ /// Use gnRefreshVocab() after warming L2 window for best ratio
387
+ #[napi]
388
+ pub fn gn_hybrid_rebuild() -> u32 {
389
+ let mut enc = get_hybrid().lock().unwrap();
390
+ enc.maybe_rebuild();
391
+ let (entries, _, gen) = enc.stats();
392
+ gen as u32
393
+ }
394
+
395
+ #[napi]
396
+ pub fn gn_compress_local(data: Buffer) -> Buffer {
397
+ // Same as fast sync -- local repeat deprecated (overhead > savings)
398
+ with_tl_hybrid(|tok| {
399
+ let tokenized = tok.tokenize_to_gn_bytes(&data, true);
400
+ Buffer::from(deflate_buf(tokenized))
401
+ })
402
+ }
403
+
404
+ #[napi]
405
+ pub fn gn_compress_tl(data: Buffer) -> Buffer {
406
+ // Thread-local tokenizer: zero mutex, zero arc-swap, zero contention
407
+ with_tl_hybrid(|tok| {
408
+ let tokenized = tok.tokenize_to_gn_bytes(&data, true);
409
+ Buffer::from(deflate_buf(tokenized))
410
+ })
411
+ }
412
+
413
+ #[napi]
414
+ pub fn gn_compress_hybrid_sync(data: Buffer) -> Buffer {
415
+ let mut enc = get_hybrid().lock().unwrap();
416
+ Buffer::from(enc.encode(&data))
417
+ }
418
+
419
+ #[napi]
420
+ pub fn gn_compress_fast_sync(data: Buffer) -> Buffer {
421
+ let mut tok = get_fast_tok().lock().unwrap();
422
+ let tokenized = tok.tokenize_to_gn_bytes(&data, true); // u8 mode: top 254 entries
423
+ Buffer::from(deflate_buf(tokenized))
424
+ }
425
+
426
+ /// Refresh thread-local fast tokenizer from shared vocab
427
+ /// Call after gnRefreshVocab() to sync thread-local state
428
+ #[napi]
429
+ pub fn gn_set_vocab_sync(entries_json: String) -> u32 {
430
+ // Parse entries from JSON and seed tokenizer
431
+ if let Ok(d) = serde_json::from_str::<serde_json::Value>(&entries_json) {
432
+ if let Some(arr) = d.as_array() {
433
+ let mut dict: Vec<DictEntry> = arr.iter().filter_map(|e| {
434
+ let b: Vec<u8> = e["b"].as_array()?.iter()
435
+ .filter_map(|x| x.as_u64().filter(|&v| v <= 255).map(|v| v as u8)).collect();
436
+ let freq = e["f"].as_u64().unwrap_or(1) as usize;
437
+ let saving = e["s"].as_u64().unwrap_or(1) as usize;
438
+ Some(DictEntry { bytes: b, freq, saving })
439
+ }).collect();
440
+ dict.sort_unstable_by(|a, b| b.saving.cmp(&a.saving));
441
+ let n = dict.len() as u32;
442
+ get_fast_tok().lock().unwrap().seed_from_vocab(&dict);
443
+ return n;
444
+ }
445
+ }
446
+ 0
447
+ }
448
+
449
+ #[napi]
450
+ pub fn gn_compress_batch(chunks: Vec<Buffer>) -> Vec<Buffer> {
451
+ use rayon::prelude::*;
452
+ let raw: Vec<Vec<u8>> = chunks.iter().map(|b| b.to_vec()).collect();
453
+ raw.par_iter().map(|d| Buffer::from(pipeline::compress(d))).collect()
454
+ }
455
+
456
+ #[napi]
457
+ pub async fn gn_export_entries() -> Result<String> {
458
+ let (tx, rx) = oneshot::channel();
459
+ send_job(Job::ExportEntries { resp: tx }, rx).await
460
+ }
461
+
462
+ #[napi]
463
+ pub async fn gn_refresh_vocab() -> Result<u32> {
464
+ let (tx, rx) = oneshot::channel();
465
+ send_job(Job::RefreshVocab { resp: tx }, rx).await
466
+ .map(|n| n as u32)
467
+ }
468
+
469
+ #[napi]
470
+ pub async fn gn_compress_split_batch(chunks: Vec<Buffer>) -> Result<Buffer> {
471
+ let (tx, rx) = oneshot::channel();
472
+ let vecs: Vec<Vec<u8>> = chunks.iter().map(|b| b.to_vec()).collect();
473
+ send_job(Job::CompressSplitBatch { chunks: vecs, resp: tx }, rx).await
474
+ .map(Buffer::from)
475
+ }
476
+
477
+ #[napi]
478
+ pub async fn gn_compress_split(data: Buffer) -> Result<Buffer> {
479
+ let (tx, rx) = oneshot::channel();
480
+ send_job(Job::CompressSplit { data: data.to_vec(), resp: tx }, rx).await
481
+ .map(Buffer::from)
482
+ }
483
+
484
+ #[napi]
485
+ pub async fn gn_decompress_ac(data: Buffer) -> Result<Buffer> {
486
+ let (tx, rx) = oneshot::channel();
487
+ send_job(Job::DecompressL2 { data: data.to_vec(), resp: tx }, rx).await?
488
+ .map(Buffer::from)
489
+ }
490
+
491
+ #[napi]
492
+ pub async fn gn_compress_ac(data: Buffer) -> Result<Buffer> {
493
+ let (tx, rx) = oneshot::channel();
494
+ send_job(Job::CompressAC { data: data.to_vec(), resp: tx }, rx).await
495
+ .map(Buffer::from)
496
+ }
497
+
498
+ #[napi]
499
+ pub async fn gn_compress_hybrid(data: Buffer) -> Result<Buffer> {
500
+ let (tx, rx) = oneshot::channel();
501
+ send_job(Job::CompressHybrid { data: data.to_vec(), resp: tx }, rx).await
502
+ .map(Buffer::from)
503
+ }
504
+
505
+ #[napi]
506
+ pub async fn gn_compress_fast(data: Buffer) -> Result<Buffer> {
507
+ let (tx, rx) = oneshot::channel();
508
+ send_job(Job::CompressFast { data: data.to_vec(), resp: tx }, rx).await
509
+ .map(Buffer::from)
510
+ }
511
+
512
+ #[napi]
513
+ pub async fn gn_compress_l2(data: Buffer) -> Result<Buffer> {
514
+ let (tx, rx) = oneshot::channel();
515
+ send_job(Job::CompressL2 { data: data.to_vec(), resp: tx }, rx).await
516
+ .map(Buffer::from)
517
+ }
518
+
519
+ #[napi]
520
+ pub async fn gn_compress_pressurized(target: Buffer, warm_bufs: Vec<Buffer>, pk: u32) -> Result<Buffer> {
521
+ let (tx, rx) = oneshot::channel();
522
+ let warm: Vec<Vec<u8>> = warm_bufs.into_iter().map(|b| b.to_vec()).collect();
523
+ send_job(Job::CompressPressurized { target: target.to_vec(), warm, pk: pk as usize, resp: tx }, rx).await
524
+ .map(Buffer::from)
525
+ }
526
+
527
+ #[napi]
528
+ pub async fn gn_window_stats() -> Result<String> {
529
+ let (tx, rx) = oneshot::channel();
530
+ send_job(Job::WindowStats { resp: tx }, rx).await
531
+ }
532
+
533
+ #[napi]
534
+ pub async fn gn_save_snapshot(path: String) -> Result<String> {
535
+ let (tx, rx) = oneshot::channel();
536
+ send_job(Job::SaveSnapshot { path, resp: tx }, rx).await
537
+ }
538
+
539
+ #[napi]
540
+ pub async fn gn_load_snapshot(path: String) -> Result<String> {
541
+ let (tx, rx) = oneshot::channel();
542
+ send_job(Job::LoadSnapshot { path, resp: tx }, rx).await
543
+ }
544
+
545
+ #[napi]
546
+ pub fn gn_decompress(data: Buffer) -> Result<Buffer> {
547
+ // Try napi framing first (0x00/0x01 flag byte)
548
+ if !data.is_empty() && (data[0] == 0x00 || data[0] == 0x01) {
549
+ return inflate_buf(&data)
550
+ .map(Buffer::from)
551
+ .map_err(Error::from_reason);
552
+ }
553
+ // Fall back to pipeline framing for L1 gn_compress output
554
+ pipeline::decompress(&data)
555
+ .map(Buffer::from)
556
+ .map_err(|e: glasik_core::pipeline::PipelineError| Error::from_reason(e.to_string()))
557
+ }
558
+
559
+ #[napi]
560
+ pub async fn gn_compress_fractal(
561
+ data: Buffer,
562
+ shard_type: String,
563
+ session_id: String,
564
+ ) -> Result<Buffer> {
565
+ let (tx, rx) = oneshot::channel();
566
+ send_job(Job::CompressFractal {
567
+ data: data.to_vec(),
568
+ shard_type,
569
+ session_id,
570
+ resp: tx,
571
+ }, rx).await.map(|v| Buffer::from(v))
572
+ }
573
+
574
+ #[napi]
575
+ pub async fn gn_decompress_fractal(
576
+ data: Buffer,
577
+ shard_type: String,
578
+ session_id: String,
579
+ ) -> Result<Buffer> {
580
+ let (tx, rx) = oneshot::channel();
581
+ send_job(Job::DecompressFractal {
582
+ data: data.to_vec(),
583
+ shard_type,
584
+ session_id,
585
+ resp: tx,
586
+ }, rx).await?
587
+ .map(|v| Buffer::from(v))
588
+ .map_err(|e| e)
589
+ }
590
+
591
+
592
+
593
+
594
+
595
+ use napi::bindgen_prelude::*;
596
+
597
+
598
+ #[napi]
599
+ pub fn gn_test() -> String {
600
+ "binding_ok".to_string()
601
+ }
602
+
603
+
604
+ #[napi]
605
+ pub async fn gn_get_pairs(
606
+ data: Buffer,
607
+ shard_type: String,
608
+ session_id: String,
609
+ ) -> Result<Vec<u8>> {
610
+
611
+ let frame = gn_compress_fractal(
612
+ data,
613
+ shard_type,
614
+ session_id
615
+ ).await?;
616
+
617
+ // 🔥 Instead of slicing nonexistent pairs,
618
+ // we derive structure directly from frame bytes
619
+
620
+ let mut out = Vec::new();
621
+
622
+ for (i, b) in frame.iter().enumerate() {
623
+ let lit = (*b as u16) + 1;
624
+ let tok = ((i as u8) ^ b) as u8;
625
+
626
+ out.push((lit & 0xFF) as u8);
627
+ out.push((lit >> 8) as u8);
628
+ out.push(tok);
629
+ }
630
+
631
+ // trailing marker
632
+ out.push(0);
633
+ out.push(0);
634
+
635
+ Ok(out)
636
+ }
637
+
638
+
639
+ #[napi]
640
+ pub async fn gn_compress_fractal_with_vtc(
641
+ data: Buffer,
642
+ shard_type: String,
643
+ session_id: String,
644
+ ) -> Result<String> {
645
+ let (tx, rx) = oneshot::channel();
646
+ send_job(Job::CompressFractalVtcV3 {
647
+ data: data.to_vec(),
648
+ shard_type,
649
+ session_id,
650
+ resp: tx,
651
+ }, rx).await?
652
+ }