roxify 1.9.8 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +14 -2
- package/LICENSE +63 -21
- package/README.md +178 -163
- package/dist/cli.js +8 -1
- package/dist/utils/constants.d.ts +5 -0
- package/dist/utils/constants.js +1 -0
- package/dist/utils/decoder.js +12 -3
- package/dist/utils/encoder.js +24 -11
- package/dist/utils/rust-cli-wrapper.js +6 -3
- package/dist/utils/types.d.ts +1 -1
- package/libroxify_native-x86_64-unknown-linux-gnu.node +0 -0
- package/native/bench_hybrid.rs +145 -0
- package/native/bwt.rs +25 -69
- package/native/hybrid.rs +92 -70
- package/native/lib.rs +6 -3
- package/native/mtf.rs +106 -0
- package/native/rans_byte.rs +190 -0
- package/native/test_small_bwt.rs +31 -0
- package/native/test_stages.rs +70 -0
- package/package.json +5 -2
- package/dist/roxify_native.exe +0 -0
- package/roxify_native-x86_64-pc-windows-msvc.node +0 -0
package/dist/utils/encoder.js
CHANGED
|
@@ -93,7 +93,8 @@ export async function encodeBinaryToPng(input, opts = {}) {
|
|
|
93
93
|
// This must be checked BEFORE TS compression to avoid double-compression.
|
|
94
94
|
if (typeof native.nativeEncodePngWithNameAndFilelist === 'function' &&
|
|
95
95
|
opts.includeFileList &&
|
|
96
|
-
opts.fileList
|
|
96
|
+
opts.fileList &&
|
|
97
|
+
opts.compression !== 'bwt-ans') {
|
|
97
98
|
const fileName = opts.name || undefined;
|
|
98
99
|
const inputBuf = Array.isArray(input) ? Buffer.concat(input) : input;
|
|
99
100
|
let sizeMap = null;
|
|
@@ -170,15 +171,27 @@ export async function encodeBinaryToPng(input, opts = {}) {
|
|
|
170
171
|
}
|
|
171
172
|
if (opts.onProgress)
|
|
172
173
|
opts.onProgress({ phase: 'compress_start', total: totalLen });
|
|
173
|
-
let payload
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
174
|
+
let payload;
|
|
175
|
+
if (opts.compression === 'bwt-ans' && native?.hybridCompress) {
|
|
176
|
+
const flat = Array.isArray(payloadInput) ? Buffer.concat(payloadInput) : payloadInput;
|
|
177
|
+
if (opts.onProgress)
|
|
178
|
+
opts.onProgress({ phase: 'compress_progress', loaded: 0, total: 1 });
|
|
179
|
+
const compressed = Buffer.from(native.hybridCompress(flat));
|
|
180
|
+
payload = [compressed];
|
|
181
|
+
if (opts.onProgress)
|
|
182
|
+
opts.onProgress({ phase: 'compress_progress', loaded: 1, total: 1 });
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
payload = await parallelZstdCompress(payloadInput, compressionLevel, (loaded, total) => {
|
|
186
|
+
if (opts.onProgress) {
|
|
187
|
+
opts.onProgress({
|
|
188
|
+
phase: 'compress_progress',
|
|
189
|
+
loaded,
|
|
190
|
+
total,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
}, opts.dict);
|
|
194
|
+
}
|
|
182
195
|
if (opts.onProgress)
|
|
183
196
|
opts.onProgress({ phase: 'compress_done', loaded: payload.length });
|
|
184
197
|
if (Array.isArray(input)) {
|
|
@@ -392,7 +405,7 @@ export async function encodeBinaryToPng(input, opts = {}) {
|
|
|
392
405
|
[...dataWithoutMarkers, Buffer.alloc(padding)]
|
|
393
406
|
: dataWithoutMarkers;
|
|
394
407
|
const markerStartBytes = colorsToBytes(MARKER_START);
|
|
395
|
-
const compressionMarkerBytes = colorsToBytes(COMPRESSION_MARKERS.zstd);
|
|
408
|
+
const compressionMarkerBytes = colorsToBytes(opts.compression === 'bwt-ans' ? COMPRESSION_MARKERS['bwt-ans'] : COMPRESSION_MARKERS.zstd);
|
|
396
409
|
const dataWithMarkers = [
|
|
397
410
|
markerStartBytes,
|
|
398
411
|
compressionMarkerBytes,
|
|
@@ -15,9 +15,12 @@ else {
|
|
|
15
15
|
}
|
|
16
16
|
}
|
|
17
17
|
function findRustBinary() {
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
: ['roxify_native', 'roxify-cli', 'roxify_cli']
|
|
18
|
+
const platformBins = {
|
|
19
|
+
win32: ['roxify_native.exe', 'roxify-cli.exe', 'roxify_cli.exe'],
|
|
20
|
+
darwin: ['rox-macos-universal', 'roxify_native-macos-arm64', 'roxify_native-macos-x64', 'roxify_native', 'roxify-cli', 'roxify_cli'],
|
|
21
|
+
linux: ['roxify_native', 'roxify-cli', 'roxify_cli'],
|
|
22
|
+
};
|
|
23
|
+
const binNames = platformBins[process.platform] || platformBins.linux;
|
|
21
24
|
const baseDir = moduleDir;
|
|
22
25
|
for (const name of binNames) {
|
|
23
26
|
const sameDirPath = join(baseDir, name);
|
package/dist/utils/types.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { PackedFile } from '../pack.js';
|
|
2
2
|
import type { EccLevel } from './ecc.js';
|
|
3
3
|
export interface EncodeOptions {
|
|
4
|
-
compression?: 'zstd';
|
|
4
|
+
compression?: 'zstd' | 'bwt-ans';
|
|
5
5
|
compressionLevel?: number;
|
|
6
6
|
passphrase?: string;
|
|
7
7
|
/** optional dictionary to use for zstd compression */
|
|
Binary file
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
use std::time::Instant;
|
|
2
|
+
|
|
3
|
+
mod rans_byte;
|
|
4
|
+
mod bwt;
|
|
5
|
+
mod mtf;
|
|
6
|
+
mod context_mixing;
|
|
7
|
+
mod pool;
|
|
8
|
+
mod hybrid;
|
|
9
|
+
|
|
10
|
+
fn bench_roundtrip(name: &str, data: &[u8]) {
|
|
11
|
+
let compressor = hybrid::HybridCompressor::new(false, 4);
|
|
12
|
+
|
|
13
|
+
let start = Instant::now();
|
|
14
|
+
let (compressed, stats) = compressor.compress(data).unwrap();
|
|
15
|
+
let compress_time = start.elapsed();
|
|
16
|
+
|
|
17
|
+
let start = Instant::now();
|
|
18
|
+
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
19
|
+
let decompress_time = start.elapsed();
|
|
20
|
+
|
|
21
|
+
let ratio = (compressed.len() as f64) / (data.len() as f64) * 100.0;
|
|
22
|
+
let compress_mbps = (data.len() as f64 / 1_048_576.0) / compress_time.as_secs_f64();
|
|
23
|
+
let decompress_mbps = (data.len() as f64 / 1_048_576.0) / decompress_time.as_secs_f64();
|
|
24
|
+
|
|
25
|
+
assert_eq!(decompressed, data, "ROUND-TRIP FAILED for {}", name);
|
|
26
|
+
|
|
27
|
+
println!("=== {} ===", name);
|
|
28
|
+
println!(" Input: {} bytes", data.len());
|
|
29
|
+
println!(" Compressed: {} bytes ({:.1}%)", compressed.len(), ratio);
|
|
30
|
+
println!(" Reduction: {:.1}%", 100.0 - ratio);
|
|
31
|
+
println!(" Compress: {:.1} ms ({:.1} MB/s)", compress_time.as_secs_f64() * 1000.0, compress_mbps);
|
|
32
|
+
println!(" Decompress: {:.1} ms ({:.1} MB/s)", decompress_time.as_secs_f64() * 1000.0, decompress_mbps);
|
|
33
|
+
println!(" Entropy: {:.2} bits/byte", stats.entropy_bits);
|
|
34
|
+
println!();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
fn bench_zstd(name: &str, data: &[u8], level: i32) {
|
|
38
|
+
let start = Instant::now();
|
|
39
|
+
let compressed = zstd::encode_all(std::io::Cursor::new(data), level).unwrap();
|
|
40
|
+
let compress_time = start.elapsed();
|
|
41
|
+
|
|
42
|
+
let start = Instant::now();
|
|
43
|
+
let decompressed = zstd::decode_all(std::io::Cursor::new(&compressed)).unwrap();
|
|
44
|
+
let decompress_time = start.elapsed();
|
|
45
|
+
|
|
46
|
+
let ratio = (compressed.len() as f64) / (data.len() as f64) * 100.0;
|
|
47
|
+
let compress_mbps = (data.len() as f64 / 1_048_576.0) / compress_time.as_secs_f64();
|
|
48
|
+
let decompress_mbps = (data.len() as f64 / 1_048_576.0) / decompress_time.as_secs_f64();
|
|
49
|
+
|
|
50
|
+
assert_eq!(decompressed, data);
|
|
51
|
+
|
|
52
|
+
println!("=== Zstd L{} ({}) ===", level, name);
|
|
53
|
+
println!(" Compressed: {} bytes ({:.1}%)", compressed.len(), ratio);
|
|
54
|
+
println!(" Reduction: {:.1}%", 100.0 - ratio);
|
|
55
|
+
println!(" Compress: {:.1} ms ({:.1} MB/s)", compress_time.as_secs_f64() * 1000.0, compress_mbps);
|
|
56
|
+
println!(" Decompress: {:.1} ms ({:.1} MB/s)", decompress_time.as_secs_f64() * 1000.0, decompress_mbps);
|
|
57
|
+
println!();
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
fn main() {
|
|
61
|
+
println!("╔══════════════════════════════════════════════════════════╗");
|
|
62
|
+
println!("║ ROXIFY BWT-ANS COMPRESSION BENCHMARK ║");
|
|
63
|
+
println!("╚══════════════════════════════════════════════════════════╝\n");
|
|
64
|
+
|
|
65
|
+
let text_1k: Vec<u8> = "Hello World! This is a test of the roxify compression engine. ".repeat(16).into_bytes();
|
|
66
|
+
bench_roundtrip("Text 1KB", &text_1k);
|
|
67
|
+
bench_zstd("Text 1KB", &text_1k, 3);
|
|
68
|
+
bench_zstd("Text 1KB", &text_1k, 19);
|
|
69
|
+
|
|
70
|
+
let text_100k: Vec<u8> = "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs. ".repeat(1200).into_bytes();
|
|
71
|
+
bench_roundtrip("Text 100KB", &text_100k);
|
|
72
|
+
bench_zstd("Text 100KB", &text_100k, 3);
|
|
73
|
+
bench_zstd("Text 100KB", &text_100k, 19);
|
|
74
|
+
|
|
75
|
+
let text_1m: Vec<u8> = {
|
|
76
|
+
let mut data = Vec::with_capacity(1_048_576);
|
|
77
|
+
let phrases = [
|
|
78
|
+
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".as_slice(),
|
|
79
|
+
b"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ".as_slice(),
|
|
80
|
+
b"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. ".as_slice(),
|
|
81
|
+
b"Duis aute irure dolor in reprehenderit in voluptate velit esse. ".as_slice(),
|
|
82
|
+
b"Excepteur sint occaecat cupidatat non proident, sunt in culpa. ".as_slice(),
|
|
83
|
+
];
|
|
84
|
+
let mut i = 0;
|
|
85
|
+
while data.len() < 1_048_576 {
|
|
86
|
+
data.extend_from_slice(phrases[i % phrases.len()]);
|
|
87
|
+
i += 1;
|
|
88
|
+
}
|
|
89
|
+
data.truncate(1_048_576);
|
|
90
|
+
data
|
|
91
|
+
};
|
|
92
|
+
bench_roundtrip("Text 1MB", &text_1m);
|
|
93
|
+
bench_zstd("Text 1MB", &text_1m, 3);
|
|
94
|
+
bench_zstd("Text 1MB", &text_1m, 19);
|
|
95
|
+
|
|
96
|
+
let json_data: Vec<u8> = {
|
|
97
|
+
let mut data = String::with_capacity(512_000);
|
|
98
|
+
data.push('[');
|
|
99
|
+
for i in 0..5000 {
|
|
100
|
+
if i > 0 { data.push(','); }
|
|
101
|
+
data.push_str(&format!(
|
|
102
|
+
r#"{{"id":{},"name":"user_{}","email":"user{}@example.com","active":{},"score":{:.2},"tags":["tag1","tag2","tag3"]}}"#,
|
|
103
|
+
i, i, i, i % 2 == 0, (i as f64) * 1.337
|
|
104
|
+
));
|
|
105
|
+
}
|
|
106
|
+
data.push(']');
|
|
107
|
+
data.into_bytes()
|
|
108
|
+
};
|
|
109
|
+
bench_roundtrip("JSON 500KB", &json_data);
|
|
110
|
+
bench_zstd("JSON 500KB", &json_data, 3);
|
|
111
|
+
bench_zstd("JSON 500KB", &json_data, 19);
|
|
112
|
+
|
|
113
|
+
let random_data: Vec<u8> = {
|
|
114
|
+
let mut data = vec![0u8; 100_000];
|
|
115
|
+
let mut state = 12345u64;
|
|
116
|
+
for b in data.iter_mut() {
|
|
117
|
+
state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
|
118
|
+
*b = (state >> 33) as u8;
|
|
119
|
+
}
|
|
120
|
+
data
|
|
121
|
+
};
|
|
122
|
+
bench_roundtrip("Random 100KB", &random_data);
|
|
123
|
+
bench_zstd("Random 100KB", &random_data, 3);
|
|
124
|
+
|
|
125
|
+
let binary_data: Vec<u8> = {
|
|
126
|
+
let mut data = Vec::with_capacity(256_000);
|
|
127
|
+
for i in 0..256_000u32 {
|
|
128
|
+
match i % 7 {
|
|
129
|
+
0 => data.push(0),
|
|
130
|
+
1 => data.push(0xFF),
|
|
131
|
+
2 => data.push((i & 0xFF) as u8),
|
|
132
|
+
3 => data.push(((i >> 8) & 0xFF) as u8),
|
|
133
|
+
4 => data.push(b'A' + (i % 26) as u8),
|
|
134
|
+
5 => data.push(0x20),
|
|
135
|
+
_ => data.push((i.wrapping_mul(37) & 0xFF) as u8),
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
data
|
|
139
|
+
};
|
|
140
|
+
bench_roundtrip("Binary 256KB", &binary_data);
|
|
141
|
+
bench_zstd("Binary 256KB", &binary_data, 3);
|
|
142
|
+
bench_zstd("Binary 256KB", &binary_data, 19);
|
|
143
|
+
|
|
144
|
+
println!("All round-trip tests PASSED!");
|
|
145
|
+
}
|
package/native/bwt.rs
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
use anyhow::Result;
|
|
2
|
+
use libsais::bwt::Bwt;
|
|
3
|
+
use libsais::typestate::OwnedBuffer;
|
|
4
|
+
use libsais::BwtConstruction;
|
|
2
5
|
use rayon::prelude::*;
|
|
3
6
|
|
|
4
7
|
pub struct BwtResult {
|
|
@@ -7,86 +10,39 @@ pub struct BwtResult {
|
|
|
7
10
|
}
|
|
8
11
|
|
|
9
12
|
pub fn bwt_encode(data: &[u8]) -> Result<BwtResult> {
|
|
10
|
-
if data.is_empty() {
|
|
11
|
-
return Ok(BwtResult {
|
|
12
|
-
transformed: Vec::new(),
|
|
13
|
-
primary_index: 0,
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
|
|
17
13
|
let n = data.len();
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
for i in 0..n {
|
|
22
|
-
let ca = data[(a + i) % n];
|
|
23
|
-
let cb = data[(b + i) % n];
|
|
24
|
-
if ca != cb {
|
|
25
|
-
return ca.cmp(&cb);
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
std::cmp::Ordering::Equal
|
|
29
|
-
});
|
|
14
|
+
if n == 0 {
|
|
15
|
+
return Ok(BwtResult { transformed: Vec::new(), primary_index: 0 });
|
|
16
|
+
}
|
|
30
17
|
|
|
31
|
-
let
|
|
32
|
-
|
|
18
|
+
let bwt_result = BwtConstruction::for_text(data)
|
|
19
|
+
.with_owned_temporary_array_buffer32()
|
|
20
|
+
.single_threaded()
|
|
21
|
+
.run()
|
|
22
|
+
.map_err(|e| anyhow::anyhow!("libsais BWT: {:?}", e))?;
|
|
33
23
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
primary_index = idx as u32;
|
|
37
|
-
}
|
|
38
|
-
transformed.push(data[(rot + n - 1) % n]);
|
|
39
|
-
}
|
|
24
|
+
let primary_index = bwt_result.primary_index() as u32;
|
|
25
|
+
let transformed = bwt_result.bwt().to_vec();
|
|
40
26
|
|
|
41
|
-
Ok(BwtResult {
|
|
42
|
-
transformed,
|
|
43
|
-
primary_index,
|
|
44
|
-
})
|
|
27
|
+
Ok(BwtResult { transformed, primary_index })
|
|
45
28
|
}
|
|
46
29
|
|
|
47
|
-
pub fn bwt_decode(
|
|
48
|
-
if
|
|
30
|
+
pub fn bwt_decode(bwt_data: &[u8], primary_index: u32) -> Result<Vec<u8>> {
|
|
31
|
+
if bwt_data.is_empty() {
|
|
49
32
|
return Ok(Vec::new());
|
|
50
33
|
}
|
|
51
34
|
|
|
52
|
-
let
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
if primary_idx >= n {
|
|
56
|
-
return Err(anyhow::anyhow!("Invalid primary index"));
|
|
57
|
-
}
|
|
35
|
+
let bwt_obj: Bwt<'static, u8, OwnedBuffer> =
|
|
36
|
+
unsafe { Bwt::from_parts(bwt_data.to_vec(), primary_index as usize) };
|
|
58
37
|
|
|
59
|
-
let
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
let mut sum = 0;
|
|
66
|
-
for i in 0..256 {
|
|
67
|
-
cumsum[i] = sum;
|
|
68
|
-
sum += counts[i];
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
let mut next = vec![0usize; n];
|
|
72
|
-
let mut counts = vec![0usize; 256];
|
|
73
|
-
|
|
74
|
-
for i in 0..n {
|
|
75
|
-
let byte = data[i] as usize;
|
|
76
|
-
let pos = cumsum[byte] + counts[byte];
|
|
77
|
-
next[pos] = i;
|
|
78
|
-
counts[byte] += 1;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
let mut result = Vec::with_capacity(n);
|
|
82
|
-
let mut idx = primary_idx;
|
|
83
|
-
|
|
84
|
-
for _ in 0..n {
|
|
85
|
-
result.push(data[idx]);
|
|
86
|
-
idx = next[idx];
|
|
87
|
-
}
|
|
38
|
+
let text = bwt_obj
|
|
39
|
+
.unbwt()
|
|
40
|
+
.with_owned_temporary_array_buffer32()
|
|
41
|
+
.single_threaded()
|
|
42
|
+
.run()
|
|
43
|
+
.map_err(|e| anyhow::anyhow!("libsais UnBWT: {:?}", e))?;
|
|
88
44
|
|
|
89
|
-
Ok(
|
|
45
|
+
Ok(text.as_slice().to_vec())
|
|
90
46
|
}
|
|
91
47
|
|
|
92
48
|
pub fn bwt_encode_streaming(block_size: usize, data: &[u8]) -> Result<Vec<(BwtResult, usize)>> {
|
package/native/hybrid.rs
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
use anyhow::Result;
|
|
2
2
|
use rayon::prelude::*;
|
|
3
|
-
use crate::bwt::bwt_encode;
|
|
3
|
+
use crate::bwt::{bwt_encode, bwt_decode};
|
|
4
|
+
use crate::mtf::{mtf_encode, mtf_decode, rle0_encode, rle0_decode};
|
|
5
|
+
use crate::rans_byte::{SymbolStats, rans_encode_block, rans_decode_block};
|
|
4
6
|
use crate::context_mixing::analyze_entropy;
|
|
5
|
-
use crate::rans::{build_symbols_from_frequencies, RansEncoder};
|
|
6
|
-
use crate::pool::BufferPool;
|
|
7
|
-
use std::sync::Arc;
|
|
8
7
|
|
|
9
|
-
const BLOCK_SIZE: usize =
|
|
8
|
+
const BLOCK_SIZE: usize = 1024 * 1024;
|
|
10
9
|
|
|
11
10
|
#[derive(Clone, Debug)]
|
|
12
11
|
pub struct CompressionStats {
|
|
@@ -18,16 +17,12 @@ pub struct CompressionStats {
|
|
|
18
17
|
}
|
|
19
18
|
|
|
20
19
|
pub struct HybridCompressor {
|
|
21
|
-
pool: Arc<BufferPool>,
|
|
22
|
-
enable_gpu: bool,
|
|
23
20
|
block_size: usize,
|
|
24
21
|
}
|
|
25
22
|
|
|
26
23
|
impl HybridCompressor {
|
|
27
|
-
pub fn new(
|
|
24
|
+
pub fn new(_enable_gpu: bool, _pool_size: usize) -> Self {
|
|
28
25
|
HybridCompressor {
|
|
29
|
-
pool: Arc::new(BufferPool::new(pool_size, BLOCK_SIZE)),
|
|
30
|
-
enable_gpu,
|
|
31
26
|
block_size: BLOCK_SIZE,
|
|
32
27
|
}
|
|
33
28
|
}
|
|
@@ -40,12 +35,15 @@ impl HybridCompressor {
|
|
|
40
35
|
let blocks_count = blocks.len();
|
|
41
36
|
|
|
42
37
|
let compressed_blocks: Vec<Vec<u8>> = blocks
|
|
43
|
-
.
|
|
44
|
-
.map(|block|
|
|
38
|
+
.into_iter()
|
|
39
|
+
.map(|block| compress_block(block))
|
|
45
40
|
.collect::<Result<Vec<_>, _>>()?;
|
|
46
41
|
|
|
47
|
-
let
|
|
42
|
+
let total_compressed: usize = compressed_blocks.iter().map(|b| b.len() + 4).sum();
|
|
43
|
+
let mut result = Vec::with_capacity(16 + total_compressed);
|
|
44
|
+
result.extend_from_slice(b"RBW1");
|
|
48
45
|
result.extend_from_slice(&(blocks_count as u32).to_le_bytes());
|
|
46
|
+
result.extend_from_slice(&original_size.to_le_bytes());
|
|
49
47
|
|
|
50
48
|
for block in &compressed_blocks {
|
|
51
49
|
result.extend_from_slice(&(block.len() as u32).to_le_bytes());
|
|
@@ -64,83 +62,53 @@ impl HybridCompressor {
|
|
|
64
62
|
}))
|
|
65
63
|
}
|
|
66
64
|
|
|
67
|
-
fn
|
|
68
|
-
if
|
|
69
|
-
return
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
let bwt = bwt_encode(block)?;
|
|
73
|
-
let bwt_data = &bwt.transformed;
|
|
74
|
-
|
|
75
|
-
let mut freqs = vec![0u32; 256];
|
|
76
|
-
for &byte in bwt_data {
|
|
77
|
-
freqs[byte as usize] += 1;
|
|
65
|
+
pub fn decompress(&self, data: &[u8]) -> Result<Vec<u8>> {
|
|
66
|
+
if data.len() < 16 {
|
|
67
|
+
return Err(anyhow::anyhow!("Invalid compressed data"));
|
|
78
68
|
}
|
|
79
69
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
for &byte in bwt_data {
|
|
84
|
-
for bit_idx in 0..8 {
|
|
85
|
-
let bit = (byte >> bit_idx) & 1 == 1;
|
|
86
|
-
let symbol_idx = if bit { 1 } else { 0 };
|
|
87
|
-
let _ = encoder.encode(symbol_idx);
|
|
88
|
-
}
|
|
70
|
+
if &data[0..4] != b"RBW1" {
|
|
71
|
+
return Err(anyhow::anyhow!("Invalid magic"));
|
|
89
72
|
}
|
|
90
73
|
|
|
91
|
-
let
|
|
92
|
-
let
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
Ok(result)
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
pub fn decompress(&self, data: &[u8]) -> Result<Vec<u8>> {
|
|
100
|
-
if data.len() < 4 {
|
|
101
|
-
return Err(anyhow::anyhow!("Invalid compressed data"));
|
|
102
|
-
}
|
|
74
|
+
let blocks_count = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
|
|
75
|
+
let original_size = u64::from_le_bytes([
|
|
76
|
+
data[8], data[9], data[10], data[11],
|
|
77
|
+
data[12], data[13], data[14], data[15],
|
|
78
|
+
]) as usize;
|
|
103
79
|
|
|
104
|
-
let
|
|
105
|
-
let mut
|
|
106
|
-
let mut result = Vec::new();
|
|
80
|
+
let mut pos = 16;
|
|
81
|
+
let mut block_ranges: Vec<(usize, usize)> = Vec::with_capacity(blocks_count);
|
|
107
82
|
|
|
108
83
|
for _ in 0..blocks_count {
|
|
109
84
|
if pos + 4 > data.len() {
|
|
110
85
|
return Err(anyhow::anyhow!("Truncated block header"));
|
|
111
86
|
}
|
|
112
|
-
|
|
113
87
|
let block_size = u32::from_le_bytes([
|
|
114
|
-
data[pos],
|
|
115
|
-
data[pos + 1],
|
|
116
|
-
data[pos + 2],
|
|
117
|
-
data[pos + 3],
|
|
88
|
+
data[pos], data[pos + 1], data[pos + 2], data[pos + 3],
|
|
118
89
|
]) as usize;
|
|
119
90
|
pos += 4;
|
|
120
|
-
|
|
121
91
|
if pos + block_size > data.len() {
|
|
122
92
|
return Err(anyhow::anyhow!("Truncated block data"));
|
|
123
93
|
}
|
|
124
|
-
|
|
125
|
-
let block_data = &data[pos..pos + block_size];
|
|
94
|
+
block_ranges.push((pos, block_size));
|
|
126
95
|
pos += block_size;
|
|
127
|
-
|
|
128
|
-
let decompressed = self.decompress_block(block_data)?;
|
|
129
|
-
result.extend_from_slice(&decompressed);
|
|
130
96
|
}
|
|
131
97
|
|
|
132
|
-
|
|
133
|
-
|
|
98
|
+
let decompressed_blocks: Vec<Vec<u8>> = block_ranges
|
|
99
|
+
.into_iter()
|
|
100
|
+
.map(|(start, size)| {
|
|
101
|
+
let block_data = &data[start..start + size];
|
|
102
|
+
decompress_block(block_data)
|
|
103
|
+
})
|
|
104
|
+
.collect::<Result<Vec<_>, _>>()?;
|
|
134
105
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
106
|
+
let mut result = Vec::with_capacity(original_size);
|
|
107
|
+
for block in decompressed_blocks {
|
|
108
|
+
result.extend_from_slice(&block);
|
|
138
109
|
}
|
|
139
110
|
|
|
140
|
-
|
|
141
|
-
let _encoded_data = &block[4..];
|
|
142
|
-
|
|
143
|
-
Ok(Vec::new())
|
|
111
|
+
Ok(result)
|
|
144
112
|
}
|
|
145
113
|
|
|
146
114
|
pub fn estimate_gain(&self, data: &[u8]) -> f64 {
|
|
@@ -151,12 +119,66 @@ impl HybridCompressor {
|
|
|
151
119
|
}
|
|
152
120
|
}
|
|
153
121
|
|
|
122
|
+
fn compress_block(block: &[u8]) -> Result<Vec<u8>> {
|
|
123
|
+
if block.is_empty() {
|
|
124
|
+
return Ok(Vec::new());
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let bwt = bwt_encode(block)?;
|
|
128
|
+
let mtf_data = mtf_encode(&bwt.transformed);
|
|
129
|
+
let rle_data = rle0_encode(&mtf_data);
|
|
130
|
+
|
|
131
|
+
let stats = SymbolStats::from_data(&rle_data);
|
|
132
|
+
let encoded = rans_encode_block(&rle_data, &stats);
|
|
133
|
+
|
|
134
|
+
let stats_bytes = stats.serialize();
|
|
135
|
+
let rle_len = rle_data.len() as u32;
|
|
136
|
+
let orig_len = block.len() as u32;
|
|
137
|
+
|
|
138
|
+
let mut result = Vec::with_capacity(4 + 4 + 4 + stats_bytes.len() + encoded.len());
|
|
139
|
+
result.extend_from_slice(&bwt.primary_index.to_le_bytes());
|
|
140
|
+
result.extend_from_slice(&orig_len.to_le_bytes());
|
|
141
|
+
result.extend_from_slice(&rle_len.to_le_bytes());
|
|
142
|
+
result.extend_from_slice(&stats_bytes);
|
|
143
|
+
result.extend_from_slice(&encoded);
|
|
144
|
+
|
|
145
|
+
Ok(result)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
fn decompress_block(block: &[u8]) -> Result<Vec<u8>> {
|
|
149
|
+
if block.len() < 12 {
|
|
150
|
+
return Err(anyhow::anyhow!("Block too small"));
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
let primary_index = u32::from_le_bytes([block[0], block[1], block[2], block[3]]);
|
|
154
|
+
let orig_len = u32::from_le_bytes([block[4], block[5], block[6], block[7]]) as usize;
|
|
155
|
+
let rle_len = u32::from_le_bytes([block[8], block[9], block[10], block[11]]) as usize;
|
|
156
|
+
|
|
157
|
+
let (stats, stats_size) = SymbolStats::deserialize(&block[12..])?;
|
|
158
|
+
let encoded = &block[12 + stats_size..];
|
|
159
|
+
|
|
160
|
+
let rle_data = rans_decode_block(encoded, &stats, rle_len)?;
|
|
161
|
+
let mtf_data = rle0_decode(&rle_data);
|
|
162
|
+
let bwt_data = mtf_decode(&mtf_data);
|
|
163
|
+
let original = bwt_decode(&bwt_data, primary_index)?;
|
|
164
|
+
|
|
165
|
+
if original.len() != orig_len {
|
|
166
|
+
return Err(anyhow::anyhow!(
|
|
167
|
+
"Size mismatch: expected {}, got {}",
|
|
168
|
+
orig_len,
|
|
169
|
+
original.len()
|
|
170
|
+
));
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
Ok(original)
|
|
174
|
+
}
|
|
175
|
+
|
|
154
176
|
pub fn compress_high_performance(data: &[u8]) -> Result<(Vec<u8>, CompressionStats)> {
|
|
155
|
-
let compressor = HybridCompressor::new(
|
|
177
|
+
let compressor = HybridCompressor::new(false, 4);
|
|
156
178
|
compressor.compress(data)
|
|
157
179
|
}
|
|
158
180
|
|
|
159
181
|
pub fn decompress_high_performance(data: &[u8]) -> Result<Vec<u8>> {
|
|
160
|
-
let compressor = HybridCompressor::new(
|
|
182
|
+
let compressor = HybridCompressor::new(false, 4);
|
|
161
183
|
compressor.decompress(data)
|
|
162
184
|
}
|
package/native/lib.rs
CHANGED
|
@@ -9,7 +9,9 @@ mod core;
|
|
|
9
9
|
#[cfg(feature = "gpu")]
|
|
10
10
|
mod gpu;
|
|
11
11
|
mod rans;
|
|
12
|
+
mod rans_byte;
|
|
12
13
|
mod bwt;
|
|
14
|
+
mod mtf;
|
|
13
15
|
mod context_mixing;
|
|
14
16
|
mod pool;
|
|
15
17
|
mod hybrid;
|
|
@@ -150,17 +152,18 @@ pub fn entropy_estimate(buffer: Buffer) -> f32 {
|
|
|
150
152
|
|
|
151
153
|
#[cfg(not(test))]
|
|
152
154
|
#[napi]
|
|
153
|
-
pub fn hybrid_compress(buffer: Buffer) -> Result<
|
|
155
|
+
pub fn hybrid_compress(buffer: Buffer) -> Result<Buffer> {
|
|
154
156
|
match hybrid::compress_high_performance(&buffer) {
|
|
155
|
-
Ok((compressed, _stats)) => Ok(compressed),
|
|
157
|
+
Ok((compressed, _stats)) => Ok(Buffer::from(compressed)),
|
|
156
158
|
Err(e) => Err(Error::from_reason(e.to_string())),
|
|
157
159
|
}
|
|
158
160
|
}
|
|
159
161
|
|
|
160
162
|
#[cfg(not(test))]
|
|
161
163
|
#[napi]
|
|
162
|
-
pub fn hybrid_decompress(buffer: Buffer) -> Result<
|
|
164
|
+
pub fn hybrid_decompress(buffer: Buffer) -> Result<Buffer> {
|
|
163
165
|
hybrid::decompress_high_performance(&buffer)
|
|
166
|
+
.map(Buffer::from)
|
|
164
167
|
.map_err(|e| Error::from_reason(e.to_string()))
|
|
165
168
|
}
|
|
166
169
|
|