roxify 1.14.1 → 1.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/Cargo.toml +76 -0
  2. package/dist/cli.js +45 -22
  3. package/native/archive.rs +220 -0
  4. package/native/audio.rs +151 -0
  5. package/native/bench_hybrid.rs +145 -0
  6. package/native/bwt.rs +56 -0
  7. package/native/context_mixing.rs +117 -0
  8. package/native/core.rs +378 -0
  9. package/native/crypto.rs +209 -0
  10. package/native/encoder.rs +405 -0
  11. package/native/hybrid.rs +297 -0
  12. package/native/image_utils.rs +82 -0
  13. package/native/io_advice.rs +43 -0
  14. package/native/io_ntfs_optimized.rs +99 -0
  15. package/native/lib.rs +480 -0
  16. package/native/main.rs +939 -0
  17. package/native/mtf.rs +106 -0
  18. package/native/packer.rs +863 -0
  19. package/native/png_chunk_writer.rs +146 -0
  20. package/native/png_utils.rs +554 -0
  21. package/native/pool.rs +101 -0
  22. package/native/progress.rs +142 -0
  23. package/native/rans.rs +149 -0
  24. package/native/rans_byte.rs +286 -0
  25. package/native/reconstitution.rs +623 -0
  26. package/native/streaming.rs +189 -0
  27. package/native/streaming_decode.rs +720 -0
  28. package/native/streaming_encode.rs +684 -0
  29. package/native/test_small_bwt.rs +31 -0
  30. package/native/test_stages.rs +70 -0
  31. package/package.json +6 -6
  32. package/scripts/download-binary.cjs +259 -0
  33. package/scripts/postinstall.cjs +136 -110
  34. package/dist/rox-macos-universal +0 -0
  35. package/dist/roxify_native +0 -0
  36. package/dist/roxify_native-macos-arm64 +0 -0
  37. package/dist/roxify_native-macos-x64 +0 -0
  38. package/dist/roxify_native.exe +0 -0
  39. package/roxify_native-aarch64-apple-darwin.node +0 -0
  40. package/roxify_native-aarch64-pc-windows-msvc.node +0 -0
  41. package/roxify_native-aarch64-unknown-linux-gnu.node +0 -0
  42. package/roxify_native-i686-pc-windows-msvc.node +0 -0
  43. package/roxify_native-i686-unknown-linux-gnu.node +0 -0
  44. package/roxify_native-universal-apple-darwin.node +0 -0
  45. package/roxify_native-x86_64-apple-darwin.node +0 -0
  46. package/roxify_native-x86_64-pc-windows-msvc.node +0 -0
  47. package/roxify_native-x86_64-unknown-linux-gnu.node +0 -0
package/Cargo.toml ADDED
@@ -0,0 +1,76 @@
1
+ [package]
2
+ name = "roxify_native"
3
+ version = "1.14.0"
4
+ edition = "2021"
5
+ publish = false
6
+
7
+ [lib]
8
+ name = "roxify_native"
9
+ crate-type = ["cdylib"]
10
+ path = "native/lib.rs"
11
+
12
+ [[bin]]
13
+ name = "roxify_native"
14
+ path = "native/main.rs"
15
+
16
+ [dev-dependencies]
17
+
18
+ [dependencies]
19
+ napi = "2"
20
+ napi-derive = "2"
21
+ rayon = "1.7"
22
+ zstd = { version = "0.11", features = ["zstdmt"] }
23
+ crc32fast = "1.3"
24
+ num_cpus = "1.16"
25
+ clap = { version = "4", features = ["derive"] }
26
+ serde = { version = "1.0", features = ["derive"] }
27
+ serde_json = "1.0"
28
+ anyhow = "1.0"
29
+ png = "0.18.0"
30
+ image = { version = "0.25", default-features = false, features = ["png"] }
31
+ # indicatif removed from default deps to reduce heavy build graph
32
+ walkdir = "2.5.0"
33
+ tar = "0.4"
34
+ aes-gcm = "0.10"
35
+ aes = "0.8"
36
+ ctr = "0.9"
37
+ cipher = { version = "0.4", features = ["std"] }
38
+ hmac = "0.12"
39
+ pbkdf2 = "0.12"
40
+ rand = "0.8"
41
+ sha2 = "0.10"
42
+ mimalloc = "0.1"
43
+ simd-adler32 = "0.3"
44
+
45
+ bytemuck = { version = "1.14", features = ["derive"] }
46
+ parking_lot = "0.12"
47
+ libsais = { version = "0.2.0", default-features = false }
48
+ libc = "0.2"
49
+
50
+ [features]
51
+ default = []
52
+
53
+ [profile.release]
54
+ opt-level = 3
55
+ lto = "fat"
56
+ codegen-units = 1
57
+ strip = true
58
+ panic = "abort"
59
+
60
+ [profile.release-size]
61
+ inherits = "release"
62
+ opt-level = "z"
63
+ lto = true
64
+ strip = true
65
+
66
+ [profile.fastdev]
67
+ # Fast development profile for minimal user CPU and fast compilation.
68
+ # Lower optimization and high codegen units to parallelize compilation work,
69
+ # enable incremental to speed up subsequent incremental builds.
70
+ inherits = "release"
71
+ opt-level = 1
72
+ lto = false
73
+ codegen-units = 16
74
+ debug = false
75
+ incremental = true
76
+ panic = "abort"
package/dist/cli.js CHANGED
@@ -3,7 +3,7 @@ import { readdirSync, readFileSync, statSync, writeFileSync } from 'fs';
3
3
  import { open } from 'fs/promises';
4
4
  import { basename, dirname, join, resolve } from 'path';
5
5
  import * as cliProgress from './stub-progress.js';
6
- import { encodeWithRustCLI, havepassphraseWithRustCLI, isRustBinaryAvailable, listWithRustCLI, } from './utils/rust-cli-wrapper.js';
6
+ import { decodeWithRustCLI, encodeWithRustCLI, havepassphraseWithRustCLI, isRustBinaryAvailable, listWithRustCLI, } from './utils/rust-cli-wrapper.js';
7
7
  async function loadJsEngine() {
8
8
  const indexMod = await import('./index.js');
9
9
  const packMod = await import('./pack.js');
@@ -462,6 +462,38 @@ async function encodeCommand(args) {
462
462
  else {
463
463
  const resolvedInput = resolvedInputs[0];
464
464
  const st = statSync(resolvedInput);
465
+ // Calculate total size for deciding whether to use Rust CLI streaming
466
+ const totalInputSize = st.isDirectory()
467
+ ? getDirectorySize(resolvedInput)
468
+ : st.size;
469
+ // Use Rust CLI for large files (> 1GB) to avoid Buffer.concat 4GB limit
470
+ const STREAMING_THRESHOLD = 1024 * 1024 * 1024; // 1GB
471
+ if (totalInputSize > STREAMING_THRESHOLD && isRustBinaryAvailable()) {
472
+ console.log('Using native Rust encoder (streaming for large payload)\n');
473
+ const encodeStart = Date.now();
474
+ let lastPct = 0;
475
+ await encodeWithRustCLI(resolvedInput, resolvedOutput, parsed.level ? Number(parsed.level) : 6, parsed.passphrase, parsed.encrypt || 'aes', parsed.outputName || (st.isDirectory() ? basename(resolvedInput) : undefined), undefined, // ramBudgetMb
476
+ (current, total, step) => {
477
+ const pct = Math.floor((current / total) * 100);
478
+ if (pct !== lastPct) {
479
+ lastPct = pct;
480
+ if (barStarted) {
481
+ encodeBar.update(pct, { step });
482
+ }
483
+ }
484
+ });
485
+ const encodeTime = Date.now() - encodeStart;
486
+ if (barStarted) {
487
+ encodeBar.update(100, { step: 'done', elapsed: String(Math.floor(encodeTime / 1000)) });
488
+ encodeBar.stop();
489
+ }
490
+ console.log(`\nSuccess!`);
491
+ console.log(` Input: ${(totalInputSize / 1024 / 1024).toFixed(2)} MB`);
492
+ console.log(` Time: ${encodeTime}ms`);
493
+ console.log(` Saved: ${resolvedOutput}`);
494
+ console.log(' ');
495
+ return;
496
+ }
465
497
  if (st.isDirectory()) {
466
498
  currentEncodeStep = 'Reading files';
467
499
  const { index, stream, totalSize } = await js.packPathsGenerator([resolvedInput], dirname(resolvedInput), onProgress);
@@ -591,33 +623,24 @@ async function decodeCommand(args) {
591
623
  }
592
624
  const resolvedInput = resolve(inputPath);
593
625
  const resolvedOutput = parsed.output || outputPath || '.';
626
+ if (!isRustBinaryAvailable()) {
627
+ console.error('Error: Rust decoder binary not found');
628
+ process.exit(1);
629
+ }
594
630
  try {
595
631
  console.log(' ');
596
632
  console.log('Decoding... (Using native Rust decoder)\n');
597
633
  const startTime = Date.now();
598
634
  const decodeBar = new cliProgress.SingleBar({ format: ' {bar} {percentage}% | {step} | {elapsed}s' }, cliProgress.Presets.shades_classic);
599
635
  decodeBar.start(100, 0, { step: 'Decoding', elapsed: '0' });
600
- const js = await loadJsEngine();
601
- const result = await js.decodePngToBinary(readFileSync(resolvedInput));
602
- if (result.files && result.files.length > 0) {
603
- const outputDir = resolve(resolvedOutput);
604
- for (const file of result.files) {
605
- const dest = join(outputDir, file.path);
606
- const parent = dirname(dest);
607
- try {
608
- await import('fs/promises').then(({ mkdir }) => mkdir(parent, { recursive: true }));
609
- }
610
- catch { }
611
- writeFileSync(dest, file.buf);
612
- }
613
- }
614
- else if (result.buf) {
615
- const outputFile = resolvedOutput === '.' ? join(process.cwd(), result.meta?.name || basename(resolvedInput).replace(/\.[^.]+$/, '')) : resolvedOutput;
616
- writeFileSync(outputFile, result.buf);
617
- }
618
- else {
619
- throw new Error('Decoded result is empty');
620
- }
636
+ await decodeWithRustCLI(resolvedInput, resolvedOutput, parsed.passphrase, parsed.files, parsed.dict, parsed.ramBudgetMb, (current, total, step) => {
637
+ const pct = total > 0 ? Math.floor((current / total) * 100) : 0;
638
+ const elapsed = Math.floor((Date.now() - startTime) / 1000);
639
+ decodeBar.update(Math.min(pct, 99), {
640
+ step: step || 'Decoding',
641
+ elapsed: String(elapsed),
642
+ });
643
+ });
621
644
  const decodeTime = Date.now() - startTime;
622
645
  decodeBar.update(100, { step: 'done', elapsed: String(Math.floor(decodeTime / 1000)) });
623
646
  decodeBar.stop();
@@ -0,0 +1,220 @@
1
+ use std::io::Cursor;
2
+ use std::path::Path;
3
+ use rayon::prelude::*;
4
+ use tar::{Archive, Builder, Header};
5
+ use walkdir::WalkDir;
6
+
7
+ pub struct TarPackResult {
8
+ pub data: Vec<u8>,
9
+ pub file_list: Vec<(String, u64)>,
10
+ }
11
+
12
+ pub fn tar_pack_directory_with_list(dir_path: &Path) -> Result<TarPackResult, String> {
13
+ let base = dir_path;
14
+
15
+ let entries: Vec<_> = WalkDir::new(dir_path)
16
+ .follow_links(false)
17
+ .into_iter()
18
+ .filter_map(|e| e.ok())
19
+ .filter(|e| e.file_type().is_file())
20
+ .collect();
21
+
22
+ let file_data: Vec<(String, Vec<u8>)> = entries
23
+ .par_iter()
24
+ .filter_map(|entry| {
25
+ let full = entry.path();
26
+ let rel = full.strip_prefix(base).unwrap_or(full);
27
+ let rel_str = rel.to_string_lossy().replace('\\', "/");
28
+ match std::fs::read(full) {
29
+ Ok(data) => Some((rel_str, data)),
30
+ Err(_) => None,
31
+ }
32
+ })
33
+ .collect();
34
+
35
+ let file_list: Vec<(String, u64)> = file_data.iter()
36
+ .map(|(name, data)| (name.clone(), data.len() as u64))
37
+ .collect();
38
+
39
+ let total_estimate: usize = file_data.iter().map(|(n, d)| 512 + d.len() + 512 + n.len()).sum();
40
+ let mut buf = Vec::with_capacity(total_estimate + 1024);
41
+ {
42
+ let mut builder = Builder::new(&mut buf);
43
+ for (rel_str, data) in &file_data {
44
+ let mut header = Header::new_gnu();
45
+ header.set_size(data.len() as u64);
46
+ header.set_mode(0o644);
47
+ header.set_cksum();
48
+ builder
49
+ .append_data(&mut header, rel_str, &data[..])
50
+ .map_err(|e| format!("tar append {}: {}", rel_str, e))?;
51
+ }
52
+ builder.finish().map_err(|e| format!("tar finish: {}", e))?;
53
+ }
54
+ Ok(TarPackResult { data: buf, file_list })
55
+ }
56
+
57
+ pub fn tar_pack_directory(dir_path: &Path) -> Result<Vec<u8>, String> {
58
+ tar_pack_directory_with_list(dir_path).map(|r| r.data)
59
+ }
60
+
61
+ pub fn tar_file_list_fast(tar_data: &[u8]) -> Vec<(String, u64)> {
62
+ let mut list = Vec::new();
63
+ let mut pos = 0;
64
+ while pos + 512 <= tar_data.len() {
65
+ let header = &tar_data[pos..pos + 512];
66
+ if header.iter().all(|&b| b == 0) {
67
+ break;
68
+ }
69
+ let name_end = header[..100].iter().position(|&b| b == 0).unwrap_or(100);
70
+ let name = String::from_utf8_lossy(&header[..name_end]).to_string();
71
+ let size_str = String::from_utf8_lossy(&header[124..136]);
72
+ let size = u64::from_str_radix(size_str.trim().trim_matches('\0'), 8).unwrap_or(0);
73
+ if !name.is_empty() {
74
+ list.push((name, size));
75
+ }
76
+ let data_blocks = (size as usize + 511) / 512;
77
+ pos += 512 + data_blocks * 512;
78
+ }
79
+ list
80
+ }
81
+
82
+ pub fn tar_unpack(tar_data: &[u8], output_dir: &Path) -> Result<Vec<String>, String> {
83
+ let mut archive = Archive::new(Cursor::new(tar_data));
84
+ let mut entries_data: Vec<(std::path::PathBuf, Vec<u8>)> = Vec::new();
85
+
86
+ let entries = archive.entries().map_err(|e| format!("tar entries: {}", e))?;
87
+ for entry in entries {
88
+ let mut entry = entry.map_err(|e| format!("tar entry: {}", e))?;
89
+ let path = entry.path().map_err(|e| format!("tar entry path: {}", e))?.to_path_buf();
90
+
91
+ let mut safe = std::path::PathBuf::new();
92
+ for comp in path.components() {
93
+ if let std::path::Component::Normal(osstr) = comp {
94
+ safe.push(osstr);
95
+ }
96
+ }
97
+ if safe.as_os_str().is_empty() {
98
+ continue;
99
+ }
100
+
101
+ let mut data = Vec::with_capacity(entry.size() as usize);
102
+ std::io::Read::read_to_end(&mut entry, &mut data)
103
+ .map_err(|e| format!("tar read {:?}: {}", safe, e))?;
104
+ entries_data.push((safe, data));
105
+ }
106
+
107
+ let dirs: std::collections::HashSet<_> = entries_data.iter()
108
+ .filter_map(|(p, _)| {
109
+ let dest = output_dir.join(p);
110
+ dest.parent().map(|d| d.to_path_buf())
111
+ })
112
+ .collect();
113
+ for dir in &dirs {
114
+ std::fs::create_dir_all(dir).map_err(|e| format!("mkdir {:?}: {}", dir, e))?;
115
+ }
116
+
117
+ let written: Vec<String> = entries_data.par_iter()
118
+ .filter_map(|(safe, data)| {
119
+ let dest = output_dir.join(safe);
120
+ match std::fs::write(&dest, data) {
121
+ Ok(_) => Some(safe.to_string_lossy().to_string()),
122
+ Err(_) => None,
123
+ }
124
+ })
125
+ .collect();
126
+
127
+ Ok(written)
128
+ }
129
+
130
+ pub fn is_tar(data: &[u8]) -> bool {
131
+ if data.len() < 263 {
132
+ return false;
133
+ }
134
+ &data[257..262] == b"ustar"
135
+ }
136
+
137
+ pub fn tar_file_list(tar_data: &[u8]) -> Result<Vec<(String, u64)>, String> {
138
+ let mut archive = Archive::new(Cursor::new(tar_data));
139
+ let mut list = Vec::new();
140
+ let entries = archive.entries().map_err(|e| format!("tar entries: {}", e))?;
141
+ for entry in entries {
142
+ let entry = entry.map_err(|e| format!("tar entry: {}", e))?;
143
+ let path = entry
144
+ .path()
145
+ .map_err(|e| format!("tar path: {}", e))?
146
+ .to_string_lossy()
147
+ .to_string();
148
+ let size = entry.size();
149
+ list.push((path, size));
150
+ }
151
+ Ok(list)
152
+ }
153
+
154
+ #[cfg(test)]
155
+ mod tests {
156
+ use super::*;
157
+ use std::fs;
158
+
159
+ #[test]
160
+ fn test_tar_roundtrip() {
161
+ let tmp = std::env::temp_dir().join("rox_tar_test");
162
+ let _ = fs::remove_dir_all(&tmp);
163
+ fs::create_dir_all(tmp.join("sub")).unwrap();
164
+ fs::write(tmp.join("hello.txt"), b"Hello TAR").unwrap();
165
+ fs::write(tmp.join("sub/nested.txt"), b"Nested!").unwrap();
166
+
167
+ let tar_data = tar_pack_directory(&tmp).unwrap();
168
+ assert!(is_tar(&tar_data));
169
+
170
+ let list = tar_file_list(&tar_data).unwrap();
171
+ assert_eq!(list.len(), 2);
172
+
173
+ let out = std::env::temp_dir().join("rox_tar_test_out");
174
+ let _ = fs::remove_dir_all(&out);
175
+ fs::create_dir_all(&out).unwrap();
176
+
177
+ let written = tar_unpack(&tar_data, &out).unwrap();
178
+ assert_eq!(written.len(), 2);
179
+ assert_eq!(fs::read_to_string(out.join("hello.txt")).unwrap(), "Hello TAR");
180
+ assert_eq!(fs::read_to_string(out.join("sub/nested.txt")).unwrap(), "Nested!");
181
+
182
+ let _ = fs::remove_dir_all(&tmp);
183
+ let _ = fs::remove_dir_all(&out);
184
+ }
185
+
186
+ #[test]
187
+ fn test_tar_zstd_roundtrip() {
188
+ use std::io::Write;
189
+
190
+ let tmp = std::env::temp_dir().join("rox_tar_zstd_test");
191
+ let _ = fs::remove_dir_all(&tmp);
192
+ fs::create_dir_all(tmp.join("a/b")).unwrap();
193
+ fs::write(tmp.join("root.txt"), b"root file content").unwrap();
194
+ fs::write(tmp.join("a/mid.txt"), b"mid level").unwrap();
195
+ fs::write(tmp.join("a/b/deep.txt"), b"deep nested file").unwrap();
196
+
197
+ let tar_data = tar_pack_directory(&tmp).unwrap();
198
+ assert!(is_tar(&tar_data));
199
+
200
+ let mut encoder = zstd::stream::Encoder::new(Vec::new(), 3).unwrap();
201
+ encoder.write_all(&tar_data).unwrap();
202
+ let compressed = encoder.finish().unwrap();
203
+
204
+ let decompressed = crate::core::zstd_decompress_bytes(&compressed, None).unwrap();
205
+ assert!(is_tar(&decompressed));
206
+
207
+ let out = std::env::temp_dir().join("rox_tar_zstd_test_out");
208
+ let _ = fs::remove_dir_all(&out);
209
+ fs::create_dir_all(&out).unwrap();
210
+
211
+ let written = tar_unpack(&decompressed, &out).unwrap();
212
+ assert_eq!(written.len(), 3);
213
+ assert_eq!(fs::read_to_string(out.join("root.txt")).unwrap(), "root file content");
214
+ assert_eq!(fs::read_to_string(out.join("a/mid.txt")).unwrap(), "mid level");
215
+ assert_eq!(fs::read_to_string(out.join("a/b/deep.txt")).unwrap(), "deep nested file");
216
+
217
+ let _ = fs::remove_dir_all(&tmp);
218
+ let _ = fs::remove_dir_all(&out);
219
+ }
220
+ }
@@ -0,0 +1,151 @@
1
+ /// WAV container for binary data.
2
+ ///
3
+ /// Encodes raw bytes as 8-bit unsigned PCM mono samples (44100 Hz).
4
+ /// Header is exactly 44 bytes. Total overhead: 44 bytes.
5
+ ///
6
+ /// Compared to PNG (stored deflate): PNG overhead grows with data size
7
+ /// (zlib framing, filter bytes, chunk CRCs). WAV overhead is constant.
8
+
9
+ const WAV_HEADER_SIZE: usize = 44;
10
+ const SAMPLE_RATE: u32 = 44100;
11
+ const BITS_PER_SAMPLE: u16 = 8;
12
+ const NUM_CHANNELS: u16 = 1;
13
+
14
+ /// Pack raw bytes into a WAV file (8-bit PCM, mono, 44100 Hz).
15
+ /// The bytes are stored directly as unsigned PCM samples.
16
+ /// Returns the complete WAV file as a Vec<u8>.
17
+ pub fn bytes_to_wav(data: &[u8]) -> Vec<u8> {
18
+ let data_size = data.len() as u32;
19
+ let file_size = WAV_HEADER_SIZE as u32 - 8 + data_size; // RIFF chunk size
20
+
21
+ let byte_rate = SAMPLE_RATE * NUM_CHANNELS as u32 * (BITS_PER_SAMPLE as u32 / 8);
22
+ let block_align = NUM_CHANNELS * (BITS_PER_SAMPLE / 8);
23
+
24
+ let mut wav = Vec::with_capacity(WAV_HEADER_SIZE + data.len());
25
+
26
+ // RIFF header
27
+ wav.extend_from_slice(b"RIFF");
28
+ wav.extend_from_slice(&file_size.to_le_bytes());
29
+ wav.extend_from_slice(b"WAVE");
30
+
31
+ // fmt sub-chunk
32
+ wav.extend_from_slice(b"fmt ");
33
+ wav.extend_from_slice(&16u32.to_le_bytes()); // sub-chunk size (PCM = 16)
34
+ wav.extend_from_slice(&1u16.to_le_bytes()); // audio format (1 = PCM)
35
+ wav.extend_from_slice(&NUM_CHANNELS.to_le_bytes());
36
+ wav.extend_from_slice(&SAMPLE_RATE.to_le_bytes());
37
+ wav.extend_from_slice(&byte_rate.to_le_bytes());
38
+ wav.extend_from_slice(&block_align.to_le_bytes());
39
+ wav.extend_from_slice(&BITS_PER_SAMPLE.to_le_bytes());
40
+
41
+ // data sub-chunk
42
+ wav.extend_from_slice(b"data");
43
+ wav.extend_from_slice(&data_size.to_le_bytes());
44
+ wav.extend_from_slice(data);
45
+
46
+ wav
47
+ }
48
+
49
+ /// Extract raw bytes from a WAV file.
50
+ /// Returns the PCM data (the original bytes) or an error.
51
+ pub fn wav_to_bytes(wav: &[u8]) -> Result<Vec<u8>, String> {
52
+ if wav.len() < WAV_HEADER_SIZE {
53
+ return Err("WAV data too short".to_string());
54
+ }
55
+
56
+ // Validate RIFF header
57
+ if &wav[0..4] != b"RIFF" {
58
+ return Err("Not a RIFF file".to_string());
59
+ }
60
+ if &wav[8..12] != b"WAVE" {
61
+ return Err("Not a WAVE file".to_string());
62
+ }
63
+
64
+ // Find the "data" sub-chunk (skip fmt and any other chunks)
65
+ let mut offset = 12; // past "RIFF" + size + "WAVE"
66
+ loop {
67
+ if offset + 8 > wav.len() {
68
+ return Err("data chunk not found".to_string());
69
+ }
70
+ let chunk_id = &wav[offset..offset + 4];
71
+ let chunk_size = u32::from_le_bytes([
72
+ wav[offset + 4],
73
+ wav[offset + 5],
74
+ wav[offset + 6],
75
+ wav[offset + 7],
76
+ ]) as usize;
77
+
78
+ if chunk_id == b"data" {
79
+ let data_start = offset + 8;
80
+ let data_end = data_start + chunk_size;
81
+ if data_end > wav.len() {
82
+ // Allow truncation: return what we have
83
+ return Ok(wav[data_start..].to_vec());
84
+ }
85
+ return Ok(wav[data_start..data_end].to_vec());
86
+ }
87
+
88
+ // Skip this chunk (+ padding byte if odd size)
89
+ offset += 8 + chunk_size;
90
+ if chunk_size % 2 != 0 {
91
+ offset += 1; // RIFF chunks are word-aligned
92
+ }
93
+ }
94
+ }
95
+
96
+ /// Check if a buffer starts with a RIFF/WAVE header.
97
+ pub fn is_wav(buf: &[u8]) -> bool {
98
+ buf.len() >= 12 && &buf[0..4] == b"RIFF" && &buf[8..12] == b"WAVE"
99
+ }
100
+
101
+ #[cfg(test)]
102
+ mod tests {
103
+ use super::*;
104
+
105
+ #[test]
106
+ fn test_wav_roundtrip() {
107
+ let data = b"Hello, World! This is roxify audio container test data.";
108
+ let wav = bytes_to_wav(data);
109
+
110
+ // Check header
111
+ assert_eq!(&wav[0..4], b"RIFF");
112
+ assert_eq!(&wav[8..12], b"WAVE");
113
+ assert_eq!(wav.len(), 44 + data.len());
114
+
115
+ // Roundtrip
116
+ let recovered = wav_to_bytes(&wav).expect("decode should succeed");
117
+ assert_eq!(recovered, data);
118
+ }
119
+
120
+ #[test]
121
+ fn test_wav_empty() {
122
+ let data: &[u8] = b"";
123
+ let wav = bytes_to_wav(data);
124
+ assert_eq!(wav.len(), 44);
125
+ let recovered = wav_to_bytes(&wav).expect("decode empty");
126
+ assert!(recovered.is_empty());
127
+ }
128
+
129
+ #[test]
130
+ fn test_wav_large() {
131
+ let data = vec![0xAB_u8; 1024 * 1024]; // 1 MB
132
+ let wav = bytes_to_wav(&data);
133
+ assert_eq!(wav.len(), 44 + 1024 * 1024);
134
+ let recovered = wav_to_bytes(&wav).expect("decode large");
135
+ assert_eq!(recovered, data);
136
+ }
137
+
138
+ #[test]
139
+ fn test_is_wav() {
140
+ let wav = bytes_to_wav(b"test");
141
+ assert!(is_wav(&wav));
142
+ assert!(!is_wav(b"not a wav"));
143
+ assert!(!is_wav(b"RIFF1234XXXX")); // RIFF but not WAVE
144
+ }
145
+
146
+ #[test]
147
+ fn test_invalid_wav() {
148
+ assert!(wav_to_bytes(b"short").is_err());
149
+ assert!(wav_to_bytes(b"NOT a RIFF file!").is_err());
150
+ }
151
+ }
@@ -0,0 +1,145 @@
1
+ use std::time::Instant;
2
+
3
+ mod rans_byte;
4
+ mod bwt;
5
+ mod mtf;
6
+ mod context_mixing;
7
+ mod pool;
8
+ mod hybrid;
9
+
10
+ fn bench_roundtrip(name: &str, data: &[u8]) {
11
+ let compressor = hybrid::HybridCompressor::new();
12
+
13
+ let start = Instant::now();
14
+ let (compressed, stats) = compressor.compress(data).unwrap();
15
+ let compress_time = start.elapsed();
16
+
17
+ let start = Instant::now();
18
+ let decompressed = compressor.decompress(&compressed).unwrap();
19
+ let decompress_time = start.elapsed();
20
+
21
+ let ratio = (compressed.len() as f64) / (data.len() as f64) * 100.0;
22
+ let compress_mbps = (data.len() as f64 / 1_048_576.0) / compress_time.as_secs_f64();
23
+ let decompress_mbps = (data.len() as f64 / 1_048_576.0) / decompress_time.as_secs_f64();
24
+
25
+ assert_eq!(decompressed, data, "ROUND-TRIP FAILED for {}", name);
26
+
27
+ println!("=== {} ===", name);
28
+ println!(" Input: {} bytes", data.len());
29
+ println!(" Compressed: {} bytes ({:.1}%)", compressed.len(), ratio);
30
+ println!(" Reduction: {:.1}%", 100.0 - ratio);
31
+ println!(" Compress: {:.1} ms ({:.1} MB/s)", compress_time.as_secs_f64() * 1000.0, compress_mbps);
32
+ println!(" Decompress: {:.1} ms ({:.1} MB/s)", decompress_time.as_secs_f64() * 1000.0, decompress_mbps);
33
+ println!(" Entropy: {:.2} bits/byte", stats.entropy_bits);
34
+ println!();
35
+ }
36
+
37
+ fn bench_zstd(name: &str, data: &[u8], level: i32) {
38
+ let start = Instant::now();
39
+ let compressed = zstd::encode_all(std::io::Cursor::new(data), level).unwrap();
40
+ let compress_time = start.elapsed();
41
+
42
+ let start = Instant::now();
43
+ let decompressed = zstd::decode_all(std::io::Cursor::new(&compressed)).unwrap();
44
+ let decompress_time = start.elapsed();
45
+
46
+ let ratio = (compressed.len() as f64) / (data.len() as f64) * 100.0;
47
+ let compress_mbps = (data.len() as f64 / 1_048_576.0) / compress_time.as_secs_f64();
48
+ let decompress_mbps = (data.len() as f64 / 1_048_576.0) / decompress_time.as_secs_f64();
49
+
50
+ assert_eq!(decompressed, data);
51
+
52
+ println!("=== Zstd L{} ({}) ===", level, name);
53
+ println!(" Compressed: {} bytes ({:.1}%)", compressed.len(), ratio);
54
+ println!(" Reduction: {:.1}%", 100.0 - ratio);
55
+ println!(" Compress: {:.1} ms ({:.1} MB/s)", compress_time.as_secs_f64() * 1000.0, compress_mbps);
56
+ println!(" Decompress: {:.1} ms ({:.1} MB/s)", decompress_time.as_secs_f64() * 1000.0, decompress_mbps);
57
+ println!();
58
+ }
59
+
60
+ fn main() {
61
+ println!("╔══════════════════════════════════════════════════════════╗");
62
+ println!("║ ROXIFY BWT-ANS COMPRESSION BENCHMARK ║");
63
+ println!("╚══════════════════════════════════════════════════════════╝\n");
64
+
65
+ let text_1k: Vec<u8> = "Hello World! This is a test of the roxify compression engine. ".repeat(16).into_bytes();
66
+ bench_roundtrip("Text 1KB", &text_1k);
67
+ bench_zstd("Text 1KB", &text_1k, 3);
68
+ bench_zstd("Text 1KB", &text_1k, 19);
69
+
70
+ let text_100k: Vec<u8> = "The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs. ".repeat(1200).into_bytes();
71
+ bench_roundtrip("Text 100KB", &text_100k);
72
+ bench_zstd("Text 100KB", &text_100k, 3);
73
+ bench_zstd("Text 100KB", &text_100k, 19);
74
+
75
+ let text_1m: Vec<u8> = {
76
+ let mut data = Vec::with_capacity(1_048_576);
77
+ let phrases = [
78
+ b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. ".as_slice(),
79
+ b"Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ".as_slice(),
80
+ b"Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris. ".as_slice(),
81
+ b"Duis aute irure dolor in reprehenderit in voluptate velit esse. ".as_slice(),
82
+ b"Excepteur sint occaecat cupidatat non proident, sunt in culpa. ".as_slice(),
83
+ ];
84
+ let mut i = 0;
85
+ while data.len() < 1_048_576 {
86
+ data.extend_from_slice(phrases[i % phrases.len()]);
87
+ i += 1;
88
+ }
89
+ data.truncate(1_048_576);
90
+ data
91
+ };
92
+ bench_roundtrip("Text 1MB", &text_1m);
93
+ bench_zstd("Text 1MB", &text_1m, 3);
94
+ bench_zstd("Text 1MB", &text_1m, 19);
95
+
96
+ let json_data: Vec<u8> = {
97
+ let mut data = String::with_capacity(512_000);
98
+ data.push('[');
99
+ for i in 0..5000 {
100
+ if i > 0 { data.push(','); }
101
+ data.push_str(&format!(
102
+ r#"{{"id":{},"name":"user_{}","email":"user{}@example.com","active":{},"score":{:.2},"tags":["tag1","tag2","tag3"]}}"#,
103
+ i, i, i, i % 2 == 0, (i as f64) * 1.337
104
+ ));
105
+ }
106
+ data.push(']');
107
+ data.into_bytes()
108
+ };
109
+ bench_roundtrip("JSON 500KB", &json_data);
110
+ bench_zstd("JSON 500KB", &json_data, 3);
111
+ bench_zstd("JSON 500KB", &json_data, 19);
112
+
113
+ let random_data: Vec<u8> = {
114
+ let mut data = vec![0u8; 100_000];
115
+ let mut state = 12345u64;
116
+ for b in data.iter_mut() {
117
+ state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
118
+ *b = (state >> 33) as u8;
119
+ }
120
+ data
121
+ };
122
+ bench_roundtrip("Random 100KB", &random_data);
123
+ bench_zstd("Random 100KB", &random_data, 3);
124
+
125
+ let binary_data: Vec<u8> = {
126
+ let mut data = Vec::with_capacity(256_000);
127
+ for i in 0..256_000u32 {
128
+ match i % 7 {
129
+ 0 => data.push(0),
130
+ 1 => data.push(0xFF),
131
+ 2 => data.push((i & 0xFF) as u8),
132
+ 3 => data.push(((i >> 8) & 0xFF) as u8),
133
+ 4 => data.push(b'A' + (i % 26) as u8),
134
+ 5 => data.push(0x20),
135
+ _ => data.push((i.wrapping_mul(37) & 0xFF) as u8),
136
+ }
137
+ }
138
+ data
139
+ };
140
+ bench_roundtrip("Binary 256KB", &binary_data);
141
+ bench_zstd("Binary 256KB", &binary_data, 3);
142
+ bench_zstd("Binary 256KB", &binary_data, 19);
143
+
144
+ println!("All round-trip tests PASSED!");
145
+ }