roxify 1.13.3 → 1.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "roxify_native"
3
- version = "1.13.3"
3
+ version = "1.13.4"
4
4
  edition = "2021"
5
5
  publish = false
6
6
 
@@ -35,6 +35,7 @@ zstd = { version = "0.11", features = ["zstdmt"] }
35
35
  crc32fast = "1.3"
36
36
  num_cpus = "1.16"
37
37
  clap = { version = "4", features = ["derive"] }
38
+ serde = { version = "1.0", features = ["derive"] }
38
39
  serde_json = "1.0"
39
40
  anyhow = "1.0"
40
41
  png = "0.18.0"
package/README.md CHANGED
@@ -57,27 +57,29 @@ The core compression and image-processing logic is written in Rust and exposed t
57
57
 
58
58
  ## Benchmarks
59
59
 
60
- All measurements taken on Linux x64 (Intel i7-6700K @ 4.0 GHz, 32 GB RAM). Roxify uses its native Rust CLI (`roxify_native`) with streaming Zstd L3 + multi-threaded + LDM + window_log(30). ZIP uses `zip -r -q -9` (maximum compression).
60
+ All measurements below use Roxify native Rust CLI (`roxify_native`) with streaming directory packing, Zstd L3, multi-threading, long-distance matching, and `window_log(30)`.
61
61
 
62
- ### Real-world directory encoding: Roxify vs ZIP
62
+ ### Cold-cache throughput on ext4
63
63
 
64
- | Dataset | Original | ZIP -9 | Roxify PNG | ZIP time | Roxify time | Speedup |
65
- | --- | --- | --- | --- | --- | --- | --- |
66
- | Test A (19 638 files, 177 MB) | 177 MB | 87.7 MB (49.6%) | 54.9 MB (31.0%) | 17.6 s | 1.2 s | 14.7x |
67
- | Test B (3 936 files, 1.4 GB) | 1.4 GB | 513 MB (36.7%) | 409 MB (29.2%) | 1 min 46 s | 6.7 s | 15.9x |
64
+ Measured with targeted page-cache eviction (`POSIX_FADV_DONTNEED`) before both encode and decode. Raw manifest lives in `docs/COLD_BENCHMARK_2026-04-15.json`.
68
65
 
69
- ### Decompression
66
+ | Dataset | Files | Source | Output PNG | Encode | Encode throughput | Decode | Decode throughput |
67
+ | --- | --- | --- | --- | --- | --- | --- | --- |
68
+ | Glados-Disc | 19,645 | 208.18 MiB | 54.83 MiB | 2.883 s | 72.22 MiB/s | 0.954 s | 218.16 MiB/s |
69
+ | Gmod | 3,936 | 1.36 GiB | 411.09 MiB | 6.127 s | 227.69 MiB/s | 5.850 s | 238.48 MiB/s |
70
70
 
71
- | Dataset | unzip | Roxify decode | Speedup |
72
- | --- | --- | --- | --- |
73
- | Test A (177 MB) | 2.4 s | 1.3 s | 1.8x |
74
- | Test B (1.4 GB) | 8.4 s | 2.9 s | 2.9x |
71
+ ### High-latency source filesystem encode
75
72
 
76
- Roxify produces a valid PNG image instead of a ZIP archive. On these real-world datasets it compresses 20-37% smaller than ZIP -9 while encoding 15x faster, thanks to multi-threaded Zstd with long-distance matching.
73
+ Roxify 1.13.4 adds adaptive parallel preload for small files before feeding Zstd. This specifically targets metadata-heavy trees on slower filesystems such as NTFS, APFS, exFAT, and network-backed mounts.
74
+
75
+ | Dataset | Source FS | Before 1.13.4 | Roxify 1.13.4 | Speedup |
76
+ | --- | --- | --- | --- | --- |
77
+ | Glados-Disc (19,645 files) | NTFS under Linux | 81.608 s | 2.189 s | 37.3x |
78
+ | Gmod (3,936 files) | NTFS under Linux | 22.578 s | 4.517 s | 5.0x |
77
79
 
78
80
  ### Data integrity
79
81
 
80
- 100% lossless roundtrip verified by byte-exact diff on all datasets. Start and end markers verified in every output PNG.
82
+ All benchmark runs completed with byte-exact roundtrip validation. Decode output matched original logical source bytes on every dataset.
81
83
 
82
84
  ---
83
85
 
Binary file
Binary file
Binary file
Binary file
Binary file
package/native/main.rs CHANGED
@@ -144,6 +144,19 @@ fn parse_markers(v: &[String]) -> Option<Vec<u8>> {
144
144
 
145
145
  fn main() -> anyhow::Result<()> {
146
146
  let cli = Cli::parse();
147
+
148
+ fn parse_requested_files(files: &str) -> anyhow::Result<Vec<String>> {
149
+ if files.trim_start().starts_with('[') {
150
+ serde_json::from_str::<Vec<String>>(files)
151
+ .map_err(|e| anyhow::anyhow!("Invalid JSON for --files: {}", e))
152
+ } else {
153
+ Ok(files
154
+ .split(',')
155
+ .map(|file| file.trim().to_string())
156
+ .filter(|file| !file.is_empty())
157
+ .collect())
158
+ }
159
+ }
147
160
  match cli.command {
148
161
  Commands::TrainDict { samples, size, output } => {
149
162
  let dict = core::train_zstd_dictionary(&samples, size)?;
@@ -169,7 +182,7 @@ fn main() -> anyhow::Result<()> {
169
182
  eprintln!("PROGRESS:{}:{}:{}", current, total, step);
170
183
  })),
171
184
  )?;
172
- println!("(TAR archive, rXFL chunk embedded)");
185
+ println!("(directory payload, rXFL chunk embedded)");
173
186
  return Ok(());
174
187
  }
175
188
 
@@ -234,7 +247,7 @@ fn main() -> anyhow::Result<()> {
234
247
  if file_list_json.is_some() {
235
248
  eprintln!("PROGRESS:100:100:done");
236
249
  if is_dir {
237
- println!("(TAR archive, rXFL chunk embedded)");
250
+ println!("(directory payload, rXFL chunk embedded)");
238
251
  } else {
239
252
  println!("(rXFL chunk embedded)");
240
253
  }
@@ -343,6 +356,11 @@ fn main() -> anyhow::Result<()> {
343
356
  && sig == [137, 80, 78, 71, 13, 10, 26, 10]
344
357
  });
345
358
 
359
+ let requested_files = match files.as_deref() {
360
+ Some(files_str) => Some(parse_requested_files(files_str)?),
361
+ None => None,
362
+ };
363
+
346
364
  if is_png_file && files.is_none() && dict.is_none() {
347
365
  let out_dir = output.clone().unwrap_or_else(|| PathBuf::from("out.raw"));
348
366
  match streaming_decode::streaming_decode_to_dir_encrypted_with_progress(
@@ -355,7 +373,7 @@ fn main() -> anyhow::Result<()> {
355
373
  ) {
356
374
  Ok(written) => {
357
375
  eprintln!("PROGRESS:100:100:done");
358
- println!("Unpacked {} files (TAR)", written.len());
376
+ println!("Unpacked {} files", written.len());
359
377
  return Ok(());
360
378
  }
361
379
  Err(e) => {
@@ -364,25 +382,31 @@ fn main() -> anyhow::Result<()> {
364
382
  }
365
383
  }
366
384
 
385
+ if is_png_file && requested_files.is_some() && dict.is_none() {
386
+ let out_dir = output.clone().unwrap_or_else(|| PathBuf::from("."));
387
+ std::fs::create_dir_all(&out_dir).map_err(|e| anyhow::anyhow!("Cannot create output directory {:?}: {}", out_dir, e))?;
388
+ let written = streaming_decode::streaming_decode_selected_to_dir_encrypted_with_progress(
389
+ &input,
390
+ &out_dir,
391
+ requested_files.as_deref(),
392
+ passphrase.as_deref(),
393
+ Some(Box::new(|current, total, step| {
394
+ eprintln!("PROGRESS:{}:{}:{}", current, total, step);
395
+ })),
396
+ ).map_err(|e| anyhow::anyhow!(e))?;
397
+ eprintln!("PROGRESS:100:100:done");
398
+ println!("Unpacked {} files", written.len());
399
+ return Ok(());
400
+ }
401
+
367
402
  let buf = read_all(&input)?;
368
403
  eprintln!("PROGRESS:20:100:decompressing");
369
404
  let dict_bytes: Option<Vec<u8>> = match dict {
370
405
  Some(path) => Some(read_all(&path)?),
371
406
  None => None,
372
407
  };
373
- if let Some(files_str) = files {
374
- let file_list: Option<Vec<String>> = if files_str.trim_start().starts_with('[') {
375
- match serde_json::from_str::<Vec<String>>(&files_str) {
376
- Ok(v) => Some(v),
377
- Err(e) => {
378
- eprintln!("Invalid JSON for --files: {}", e);
379
- std::process::exit(1);
380
- }
381
- }
382
- } else {
383
- let list = files_str.split(',').map(|s| s.trim().to_string()).filter(|s| !s.is_empty()).collect::<Vec<_>>();
384
- Some(list)
385
- };
408
+ if requested_files.is_some() {
409
+ let file_list = requested_files;
386
410
 
387
411
  let is_png = buf.len() >= 8 && &buf[0..8] == &[137, 80, 78, 71, 13, 10, 26, 10];
388
412
 
@@ -427,7 +451,9 @@ fn main() -> anyhow::Result<()> {
427
451
  std::fs::create_dir_all(&out_dir).map_err(|e| anyhow::anyhow!("Cannot create output directory {:?}: {}", out_dir, e))?;
428
452
  let files_slice = file_list.as_ref().map(|v| v.as_slice());
429
453
 
430
- let written = packer::unpack_stream_to_dir(&mut reader, &out_dir, files_slice).map_err(|e| anyhow::anyhow!(e))?;
454
+ let written = packer::unpack_stream_to_dir(&mut reader, &out_dir, files_slice, Some(&|current, total, step| {
455
+ eprintln!("PROGRESS:{}:{}:{}", current, total, step);
456
+ }), 0).map_err(|e| anyhow::anyhow!(e))?;
431
457
  eprintln!("PROGRESS:100:100:done");
432
458
  println!("Unpacked {} files", written.len());
433
459
  } else {
@@ -498,7 +524,7 @@ fn main() -> anyhow::Result<()> {
498
524
  .map_err(|e| anyhow::anyhow!("mkdir {:?}: {}", out_dir, e))?;
499
525
  let written = archive::tar_unpack(&out_bytes, &out_dir)
500
526
  .map_err(|e| anyhow::anyhow!(e))?;
501
- println!("Unpacked {} files (TAR) to {:?}", written.len(), out_dir);
527
+ println!("Unpacked {} files to {:?}", written.len(), out_dir);
502
528
  } else if out_bytes.len() >= 4
503
529
  && (u32::from_be_bytes(out_bytes[0..4].try_into().unwrap()) == 0x524f5850u32
504
530
  || u32::from_be_bytes(out_bytes[0..4].try_into().unwrap()) == 0x524f5849u32)
package/native/packer.rs CHANGED
@@ -255,13 +255,50 @@ fn unpack_entries_sequential(buf: &[u8], start: usize, out_dir: &Path, files_opt
255
255
  Ok(written)
256
256
  }
257
257
 
258
- pub fn unpack_stream_to_dir<R: std::io::Read>(reader: &mut R, out_dir: &Path, files_opt: Option<&[String]>) -> Result<Vec<String>> {
258
+ fn unpack_progress_percent(total_expected: u64, bytes_processed: u64, file_count: usize, processed_files: usize) -> u64 {
259
+ if total_expected > 0 {
260
+ return 10 + (bytes_processed.saturating_mul(89) / total_expected).min(89);
261
+ }
262
+ if file_count > 0 {
263
+ return 10 + ((processed_files as u64).saturating_mul(89) / file_count as u64).min(89);
264
+ }
265
+ 10
266
+ }
267
+
268
+ fn report_unpack_progress(
269
+ progress: Option<&(dyn Fn(u64, u64, &str) + Send)>,
270
+ total_expected: u64,
271
+ bytes_processed: u64,
272
+ file_count: usize,
273
+ processed_files: usize,
274
+ last_pct: &mut u64,
275
+ ) {
276
+ if let Some(cb) = progress {
277
+ let pct = unpack_progress_percent(total_expected, bytes_processed, file_count, processed_files);
278
+ if pct > *last_pct {
279
+ *last_pct = pct;
280
+ cb(pct, 100, "extracting");
281
+ }
282
+ }
283
+ }
284
+
285
+ pub fn unpack_stream_to_dir<R: std::io::Read>(
286
+ reader: &mut R,
287
+ out_dir: &Path,
288
+ files_opt: Option<&[String]>,
289
+ progress: Option<&(dyn Fn(u64, u64, &str) + Send)>,
290
+ total_expected: u64,
291
+ ) -> Result<Vec<String>> {
259
292
  let mut written = Vec::new();
260
293
  let mut buf: Vec<u8> = Vec::new();
261
294
  let mut pos: usize = 0;
262
295
  let mut temp = [0u8; 64 * 1024];
263
296
  let files_filter: Option<std::collections::HashSet<String>> = files_opt.map(|l| l.iter().map(|s| s.clone()).collect());
264
297
  let mut requested = files_filter.as_ref().map(|s| s.len()).unwrap_or(usize::MAX);
298
+ let mut file_count = 0usize;
299
+ let mut processed_files = 0usize;
300
+ let mut bytes_processed = 0u64;
301
+ let mut last_pct = 10u64;
265
302
 
266
303
  let mut header_parsed = false;
267
304
  let debug = std::env::var("ROX_DEBUG").is_ok();
@@ -280,10 +317,10 @@ pub fn unpack_stream_to_dir<R: std::io::Read>(reader: &mut R, out_dir: &Path, fi
280
317
  if debug { eprintln!("[rox debug] magic_header=0x{:08x}", magic_header); }
281
318
  if magic_header == 0x524f5850u32 {
282
319
  pos += 4;
283
- let _file_count = u32::from_be_bytes(buf[pos..pos+4].try_into().unwrap()) as usize;
320
+ file_count = u32::from_be_bytes(buf[pos..pos+4].try_into().unwrap()) as usize;
284
321
  pos += 4;
285
322
  header_parsed = true;
286
- if debug { eprintln!("[rox debug] header parsed, file_count={}", _file_count); }
323
+ if debug { eprintln!("[rox debug] header parsed, file_count={}", file_count); }
287
324
  } else if magic_header == 0x524f5831u32 {
288
325
  if debug { eprintln!("[rox debug] found ROX1 outer magic, skipping 4 bytes"); }
289
326
  pos += 4;
@@ -310,6 +347,8 @@ pub fn unpack_stream_to_dir<R: std::io::Read>(reader: &mut R, out_dir: &Path, fi
310
347
  let content_start = pos + 2 + name_len + 8;
311
348
  let content_end = content_start + size;
312
349
  let content = &buf[content_start..content_end];
350
+ processed_files = processed_files.saturating_add(1);
351
+ bytes_processed = bytes_processed.saturating_add(size as u64);
313
352
 
314
353
  let p = Path::new(&name);
315
354
  let mut safe = std::path::PathBuf::new();
@@ -328,10 +367,18 @@ pub fn unpack_stream_to_dir<R: std::io::Read>(reader: &mut R, out_dir: &Path, fi
328
367
  written.push(safe.to_string_lossy().to_string());
329
368
  if let Some(_set) = files_filter.as_ref() {
330
369
  requested = requested.saturating_sub(1);
331
- if requested == 0 { return Ok(written); }
370
+ report_unpack_progress(progress, total_expected, bytes_processed, file_count, processed_files, &mut last_pct);
371
+ if requested == 0 {
372
+ if let Some(cb) = progress {
373
+ cb(99, 100, "finishing");
374
+ }
375
+ return Ok(written);
376
+ }
332
377
  }
333
378
  }
334
379
 
380
+ report_unpack_progress(progress, total_expected, bytes_processed, file_count, processed_files, &mut last_pct);
381
+
335
382
  pos = content_end; if pos > 0 {
336
383
  buf.drain(0..pos);
337
384
  pos = 0;
@@ -344,6 +391,10 @@ pub fn unpack_stream_to_dir<R: std::io::Read>(reader: &mut R, out_dir: &Path, fi
344
391
  }
345
392
  }
346
393
 
394
+ if let Some(cb) = progress {
395
+ cb(99, 100, "finishing");
396
+ }
397
+
347
398
  Ok(written)
348
399
  }
349
400
 
@@ -390,7 +441,7 @@ mod stream_tests {
390
441
  let tmpdir = std::env::temp_dir().join(format!("rox_unpack_test_{}", ms));
391
442
  let _ = std::fs::create_dir_all(&tmpdir);
392
443
 
393
- let out = unpack_stream_to_dir(&mut dec2, &tmpdir, None)?;
444
+ let out = unpack_stream_to_dir(&mut dec2, &tmpdir, None, None, 0)?;
394
445
 
395
446
  assert_eq!(out.len(), 2);
396
447
  assert!(tmpdir.join("file1.txt").exists());
@@ -432,7 +483,7 @@ mod stream_tests {
432
483
  let tmpdir = std::env::temp_dir().join(format!("rox_unpack_png_test_{}", ms));
433
484
  let _ = std::fs::create_dir_all(&tmpdir);
434
485
 
435
- let out = unpack_stream_to_dir(&mut dec, &tmpdir, None)?;
486
+ let out = unpack_stream_to_dir(&mut dec, &tmpdir, None, None, 0)?;
436
487
 
437
488
  assert_eq!(out.len(), 2);
438
489
  assert!(tmpdir.join("file1.txt").exists());
@@ -4,13 +4,14 @@ use cipher::{KeyIvInit, StreamCipher};
4
4
 
5
5
  const PIXEL_MAGIC: &[u8] = b"PXL1";
6
6
  const MARKER_BYTES: usize = 12;
7
+ const PACK_MAGIC: [u8; 4] = 0x524f5850u32.to_be_bytes();
7
8
 
8
9
  type Aes256Ctr = ctr::Ctr64BE<aes::Aes256>;
9
10
 
10
11
  pub type DecodeProgressCallback = Box<dyn Fn(u64, u64, &str) + Send>;
11
12
 
12
13
  pub fn streaming_decode_to_dir(png_path: &Path, out_dir: &Path) -> Result<Vec<String>, String> {
13
- streaming_decode_to_dir_encrypted_with_progress(png_path, out_dir, None, None)
14
+ streaming_decode_selected_to_dir_encrypted_with_progress(png_path, out_dir, None, None, None)
14
15
  }
15
16
 
16
17
  pub fn streaming_decode_to_dir_encrypted(
@@ -18,7 +19,7 @@ pub fn streaming_decode_to_dir_encrypted(
18
19
  out_dir: &Path,
19
20
  passphrase: Option<&str>,
20
21
  ) -> Result<Vec<String>, String> {
21
- streaming_decode_to_dir_encrypted_with_progress(png_path, out_dir, passphrase, None)
22
+ streaming_decode_selected_to_dir_encrypted_with_progress(png_path, out_dir, None, passphrase, None)
22
23
  }
23
24
 
24
25
  pub fn streaming_decode_to_dir_encrypted_with_progress(
@@ -26,6 +27,16 @@ pub fn streaming_decode_to_dir_encrypted_with_progress(
26
27
  out_dir: &Path,
27
28
  passphrase: Option<&str>,
28
29
  progress: Option<DecodeProgressCallback>,
30
+ ) -> Result<Vec<String>, String> {
31
+ streaming_decode_selected_to_dir_encrypted_with_progress(png_path, out_dir, None, passphrase, progress)
32
+ }
33
+
34
+ pub fn streaming_decode_selected_to_dir_encrypted_with_progress(
35
+ png_path: &Path,
36
+ out_dir: &Path,
37
+ files_opt: Option<&[String]>,
38
+ passphrase: Option<&str>,
39
+ progress: Option<DecodeProgressCallback>,
29
40
  ) -> Result<Vec<String>, String> {
30
41
  let file = std::fs::File::open(png_path).map_err(|e| format!("open: {}", e))?;
31
42
  let mmap = unsafe { memmap2::Mmap::map(&file).map_err(|e| format!("mmap: {}", e))? };
@@ -88,7 +99,7 @@ pub fn streaming_decode_to_dir_encrypted_with_progress(
88
99
  let mut decoder = zstd::stream::Decoder::new(remaining_reader)
89
100
  .map_err(|e| format!("zstd decoder: {}", e))?;
90
101
  decoder.window_log_max(31).map_err(|e| format!("zstd window_log_max: {}", e))?;
91
- read_rox1_and_untar_with_progress(decoder, out_dir, progress, total_expected)
102
+ read_rox1_and_unpack_with_progress(decoder, out_dir, files_opt, progress, total_expected)
92
103
  }
93
104
  0x03 => {
94
105
  let pass = passphrase.ok_or("Passphrase required for AES-CTR decryption")?;
@@ -112,15 +123,16 @@ pub fn streaming_decode_to_dir_encrypted_with_progress(
112
123
  let mut decoder = zstd::stream::Decoder::new(ctr_reader)
113
124
  .map_err(|e| format!("zstd decoder: {}", e))?;
114
125
  decoder.window_log_max(31).map_err(|e| format!("zstd window_log_max: {}", e))?;
115
- read_rox1_and_untar_with_progress(decoder, out_dir, progress, total_expected)
126
+ read_rox1_and_unpack_with_progress(decoder, out_dir, files_opt, progress, total_expected)
116
127
  }
117
128
  _ => Err(format!("Unsupported encryption (enc=0x{:02x}) in streaming decode", enc_byte)),
118
129
  }
119
130
  }
120
131
 
121
- fn read_rox1_and_untar_with_progress<R: Read>(
132
+ fn read_rox1_and_unpack_with_progress<R: Read>(
122
133
  mut decoder: R,
123
134
  out_dir: &Path,
135
+ files_opt: Option<&[String]>,
124
136
  progress: Option<DecodeProgressCallback>,
125
137
  total_expected: u64,
126
138
  ) -> Result<Vec<String>, String> {
@@ -130,7 +142,17 @@ fn read_rox1_and_untar_with_progress<R: Read>(
130
142
  return Err(format!("Expected ROX1, got {:?}", magic));
131
143
  }
132
144
  std::fs::create_dir_all(out_dir).map_err(|e| format!("mkdir: {}", e))?;
133
- tar_unpack_from_reader_with_progress(decoder, out_dir, progress, total_expected)
145
+
146
+ let mut prefix = [0u8; 4];
147
+ decoder.read_exact(&mut prefix).map_err(|e| format!("read payload magic: {}", e))?;
148
+ let mut chained = std::io::Cursor::new(prefix).chain(decoder);
149
+
150
+ if prefix == PACK_MAGIC {
151
+ crate::packer::unpack_stream_to_dir(&mut chained, out_dir, files_opt, progress.as_deref(), total_expected)
152
+ .map_err(|e| format!("pack unpack: {}", e))
153
+ } else {
154
+ tar_unpack_from_reader_with_progress(chained, out_dir, files_opt, progress, total_expected)
155
+ }
134
156
  }
135
157
 
136
158
  fn parse_png_header(data: &[u8]) -> Result<(usize, usize, usize, usize), String> {
@@ -360,6 +382,7 @@ impl<R: Read> Read for CtrDecryptReader<R> {
360
382
  fn tar_unpack_from_reader_with_progress<R: Read>(
361
383
  reader: R,
362
384
  output_dir: &Path,
385
+ files_opt: Option<&[String]>,
363
386
  progress: Option<DecodeProgressCallback>,
364
387
  total_expected: u64,
365
388
  ) -> Result<Vec<String>, String> {
@@ -369,12 +392,16 @@ fn tar_unpack_from_reader_with_progress<R: Read>(
369
392
  let mut created_dirs = std::collections::HashSet::new();
370
393
  let mut bytes_extracted: u64 = 0;
371
394
  let mut last_pct: u64 = 10;
395
+ let files_filter: Option<std::collections::HashSet<&str>> = files_opt.map(|files| files.iter().map(|file| file.as_str()).collect());
396
+ let mut remaining = files_filter.as_ref().map(|files| files.len()).unwrap_or(usize::MAX);
372
397
 
373
398
  let entries = archive.entries().map_err(|e| format!("tar entries: {}", e))?;
374
399
  for entry in entries {
375
400
  let mut entry = entry.map_err(|e| format!("tar entry: {}", e))?;
376
401
  let entry_size = entry.size();
377
402
  let path = entry.path().map_err(|e| format!("tar path: {}", e))?.to_path_buf();
403
+ let logical_path = path.to_string_lossy().replace('\\', "/");
404
+ let should_write = files_filter.as_ref().map(|files| files.contains(logical_path.as_str())).unwrap_or(true);
378
405
 
379
406
  let mut safe = std::path::PathBuf::new();
380
407
  for comp in path.components() {
@@ -386,6 +413,23 @@ fn tar_unpack_from_reader_with_progress<R: Read>(
386
413
  continue;
387
414
  }
388
415
 
416
+ if !should_write {
417
+ std::io::copy(&mut entry, &mut std::io::sink()).map_err(|e| format!("skip {:?}: {}", safe, e))?;
418
+ bytes_extracted += entry_size;
419
+ if let Some(ref cb) = progress {
420
+ let pct = if total_expected > 0 {
421
+ 10 + (bytes_extracted * 89 / total_expected).min(89)
422
+ } else {
423
+ (10 + (bytes_extracted / (1024 * 1024))).min(99)
424
+ };
425
+ if pct > last_pct {
426
+ last_pct = pct;
427
+ cb(pct, 100, "extracting");
428
+ }
429
+ }
430
+ continue;
431
+ }
432
+
389
433
  let dest = output_dir.join(&safe);
390
434
  if let Some(parent) = dest.parent() {
391
435
  if created_dirs.insert(parent.to_path_buf()) {
@@ -399,6 +443,9 @@ fn tar_unpack_from_reader_with_progress<R: Read>(
399
443
  );
400
444
  std::io::copy(&mut entry, &mut f).map_err(|e| format!("write {:?}: {}", dest, e))?;
401
445
  written.push(safe.to_string_lossy().to_string());
446
+ if files_filter.is_some() {
447
+ remaining = remaining.saturating_sub(1);
448
+ }
402
449
 
403
450
  bytes_extracted += entry_size;
404
451
  if let Some(ref cb) = progress {
@@ -412,6 +459,9 @@ fn tar_unpack_from_reader_with_progress<R: Read>(
412
459
  cb(pct, 100, "extracting");
413
460
  }
414
461
  }
462
+ if remaining == 0 {
463
+ break;
464
+ }
415
465
  }
416
466
 
417
467
  if let Some(ref cb) = progress {
@@ -1,8 +1,9 @@
1
1
  use std::io::{Write, BufWriter, Read};
2
2
  use std::fs::File;
3
- use std::path::Path;
3
+ use std::path::{Path, PathBuf};
4
+ use rayon::prelude::*;
5
+ use serde::Serialize;
4
6
  use walkdir::WalkDir;
5
- use tar::{Builder, Header};
6
7
 
7
8
  const PNG_HEADER: &[u8] = &[137, 80, 78, 71, 13, 10, 26, 10];
8
9
  const PIXEL_MAGIC: &[u8] = b"PXL1";
@@ -10,9 +11,35 @@ const MARKER_START: [(u8, u8, u8); 3] = [(255, 0, 0), (0, 255, 0), (0, 0, 255)];
10
11
  const MARKER_END: [(u8, u8, u8); 3] = [(0, 0, 255), (0, 255, 0), (255, 0, 0)];
11
12
  const MARKER_ZSTD: (u8, u8, u8) = (0, 255, 0);
12
13
  const MAGIC: &[u8] = b"ROX1";
14
+ const PACK_MAGIC: u32 = 0x524f5850;
15
+
16
+ const MIN_ZST_CAPACITY: usize = 16 * 1024 * 1024;
17
+ const MB: u64 = 1024 * 1024;
18
+ const MAX_FILE_BUFFER_CAPACITY: usize = 4 * 1024 * 1024;
19
+ const PARALLEL_IO_FILE_THRESHOLD: u64 = MB;
20
+ const PARALLEL_IO_BATCH_BYTES: u64 = 128 * MB;
21
+ const PARALLEL_IO_BATCH_FILES: usize = 512;
22
+ const PARALLEL_IO_MIN_FILES: usize = 8;
13
23
 
14
24
  pub type ProgressCallback = Box<dyn Fn(u64, u64, &str) + Send>;
15
25
 
26
+ struct DirectoryFile {
27
+ path: PathBuf,
28
+ rel_path: String,
29
+ size: u64,
30
+ }
31
+
32
+ #[derive(Serialize)]
33
+ struct FileListEntry {
34
+ name: String,
35
+ size: u64,
36
+ }
37
+
38
+ struct CollectedDirectory {
39
+ entries: Vec<DirectoryFile>,
40
+ total_bytes: u64,
41
+ }
42
+
16
43
  pub fn encode_dir_to_png(
17
44
  dir_path: &Path,
18
45
  output_path: &Path,
@@ -42,9 +69,7 @@ pub fn encode_dir_to_png_encrypted_with_progress(
42
69
  encrypt_type: Option<&str>,
43
70
  progress: Option<ProgressCallback>,
44
71
  ) -> anyhow::Result<()> {
45
- let (zst_buf, file_list) = compress_dir_to_zst_mem(dir_path, compression_level, &progress)?;
46
-
47
- let file_list_json = serde_json::to_string(&file_list)?;
72
+ let (zst_buf, file_list_json) = compress_dir_to_zst_mem(dir_path, compression_level, &progress)?;
48
73
 
49
74
  let result = write_png_from_zst_mem(
50
75
  zst_buf, output_path, name, Some(&file_list_json),
@@ -62,26 +87,19 @@ fn compress_dir_to_zst_mem(
62
87
  dir_path: &Path,
63
88
  compression_level: i32,
64
89
  progress: &Option<ProgressCallback>,
65
- ) -> anyhow::Result<(Vec<u8>, Vec<serde_json::Value>)> {
66
- let base = dir_path;
67
-
68
- let entries: Vec<_> = WalkDir::new(dir_path)
69
- .follow_links(false)
70
- .into_iter()
71
- .filter_map(|e| e.ok())
72
- .filter(|e| e.file_type().is_file())
73
- .collect();
74
-
75
- let total_bytes: u64 = entries.iter()
76
- .filter_map(|e| std::fs::metadata(e.path()).ok())
77
- .map(|m| m.len())
78
- .sum();
90
+ ) -> anyhow::Result<(Vec<u8>, String)> {
91
+ let collected = collect_directory_files(dir_path);
92
+ let total_bytes = collected.total_bytes;
93
+ let entries = collected.entries;
79
94
 
80
95
  let actual_level = compression_level.min(3);
81
- let mut encoder = zstd::stream::Encoder::new(Vec::with_capacity(16 * 1024 * 1024), actual_level)
96
+ let mut encoder = zstd::stream::Encoder::new(
97
+ Vec::with_capacity(estimate_zst_capacity(total_bytes)),
98
+ actual_level,
99
+ )
82
100
  .map_err(|e| anyhow::anyhow!("zstd init: {}", e))?;
83
101
 
84
- let threads = num_cpus::get() as u32;
102
+ let threads = select_zstd_threads(total_bytes);
85
103
  if threads > 1 {
86
104
  let _ = encoder.multithread(threads);
87
105
  }
@@ -89,61 +107,221 @@ fn compress_dir_to_zst_mem(
89
107
  let _ = encoder.window_log(30);
90
108
 
91
109
  encoder.write_all(MAGIC)?;
110
+ encoder.write_all(&PACK_MAGIC.to_be_bytes())?;
111
+ encoder.write_all(&(entries.len() as u32).to_be_bytes())?;
92
112
 
93
- let mut file_list = Vec::new();
113
+ let mut file_list = Vec::with_capacity(entries.len());
94
114
  let mut bytes_processed: u64 = 0;
95
115
  let mut last_pct: u64 = 0;
96
- {
97
- let mut tar_builder = Builder::new(&mut encoder);
98
- for entry in entries.iter() {
99
- let full = entry.path();
100
- let rel = full.strip_prefix(base).unwrap_or(full);
101
- let rel_str = rel.to_string_lossy().replace('\\', "/");
102
-
103
- let metadata = match std::fs::metadata(full) {
104
- Ok(m) => m,
105
- Err(_) => continue,
106
- };
107
- let size = metadata.len();
108
-
109
- let mut header = Header::new_gnu();
110
- header.set_size(size);
111
- header.set_mode(0o644);
112
- header.set_cksum();
113
-
114
- let file = match File::open(full) {
115
- Ok(f) => f,
116
- Err(_) => continue,
117
- };
118
- let buf_reader = std::io::BufReader::with_capacity(
119
- (size as usize).min(4 * 1024 * 1024).max(8192),
120
- file,
121
- );
122
-
123
- tar_builder.append_data(&mut header, &rel_str, buf_reader)
124
- .map_err(|e| anyhow::anyhow!("tar append {}: {}", rel_str, e))?;
125
-
126
- file_list.push(serde_json::json!({"name": rel_str, "size": size}));
127
-
128
- bytes_processed += size;
129
- if let Some(ref cb) = progress {
130
- let pct = if total_bytes > 0 {
131
- (bytes_processed * 89 / total_bytes).min(89)
132
- } else {
133
- 89
116
+ let mut entry_index = 0usize;
117
+ while entry_index < entries.len() {
118
+ let batch_end = select_parallel_batch_end(&entries, entry_index);
119
+ if batch_end > entry_index + 1 {
120
+ let loaded = load_small_file_batch(&entries[entry_index..batch_end])?;
121
+ for (entry, maybe_bytes) in entries[entry_index..batch_end].iter().zip(loaded.into_iter()) {
122
+ let Some(bytes) = maybe_bytes else {
123
+ continue;
134
124
  };
135
- if pct > last_pct {
136
- last_pct = pct;
137
- cb(pct, 100, "compressing");
138
- }
125
+
126
+ write_pack_entry_header(&mut encoder, &entry.rel_path, entry.size)?;
127
+ encoder.write_all(&bytes)
128
+ .map_err(|e| anyhow::anyhow!("pack write {}: {}", entry.rel_path, e))?;
129
+
130
+ file_list.push(FileListEntry {
131
+ name: entry.rel_path.clone(),
132
+ size: entry.size,
133
+ });
134
+
135
+ bytes_processed += entry.size;
136
+ report_compress_progress(progress, total_bytes, bytes_processed, &mut last_pct);
139
137
  }
138
+ entry_index = batch_end;
139
+ continue;
140
+ }
141
+
142
+ let entry = &entries[entry_index];
143
+ if write_directory_entry(&mut encoder, entry)? {
144
+ file_list.push(FileListEntry {
145
+ name: entry.rel_path.clone(),
146
+ size: entry.size,
147
+ });
148
+
149
+ bytes_processed += entry.size;
150
+ report_compress_progress(progress, total_bytes, bytes_processed, &mut last_pct);
140
151
  }
141
- tar_builder.finish().map_err(|e| anyhow::anyhow!("tar finish: {}", e))?;
152
+ entry_index += 1;
142
153
  }
143
154
 
144
155
  let zst_buf = encoder.finish().map_err(|e| anyhow::anyhow!("zstd finish: {}", e))?;
156
+ let file_list_json = serde_json::to_string(&file_list)?;
145
157
 
146
- Ok((zst_buf, file_list))
158
+ Ok((zst_buf, file_list_json))
159
+ }
160
+
161
+ fn write_pack_entry_header<W: Write>(writer: &mut W, rel_path: &str, size: u64) -> anyhow::Result<()> {
162
+ let name_bytes = rel_path.as_bytes();
163
+ let name_len = u16::try_from(name_bytes.len())
164
+ .map_err(|_| anyhow::anyhow!("path too long for pack entry: {}", rel_path))?;
165
+ writer.write_all(&name_len.to_be_bytes())?;
166
+ writer.write_all(name_bytes)?;
167
+ writer.write_all(&size.to_be_bytes())?;
168
+ Ok(())
169
+ }
170
+
171
+ fn write_directory_entry<W: Write>(writer: &mut W, entry: &DirectoryFile) -> anyhow::Result<bool> {
172
+ let file = match File::open(&entry.path) {
173
+ Ok(file) => file,
174
+ Err(_) => return Ok(false),
175
+ };
176
+
177
+ write_pack_entry_header(writer, &entry.rel_path, entry.size)?;
178
+
179
+ let mut buf_reader = std::io::BufReader::with_capacity(file_buffer_capacity(entry.size), file);
180
+ std::io::copy(&mut buf_reader, writer)
181
+ .map_err(|e| anyhow::anyhow!("pack write {}: {}", entry.rel_path, e))?;
182
+
183
+ Ok(true)
184
+ }
185
+
186
+ fn load_small_file_batch(entries: &[DirectoryFile]) -> anyhow::Result<Vec<Option<Vec<u8>>>> {
187
+ entries.par_iter().map(load_directory_entry_bytes).collect()
188
+ }
189
+
190
+ fn load_directory_entry_bytes(entry: &DirectoryFile) -> anyhow::Result<Option<Vec<u8>>> {
191
+ let mut file = match File::open(&entry.path) {
192
+ Ok(file) => file,
193
+ Err(_) => return Ok(None),
194
+ };
195
+
196
+ let reserve = usize::try_from(entry.size.min(PARALLEL_IO_BATCH_BYTES)).unwrap_or(MAX_FILE_BUFFER_CAPACITY);
197
+ let mut bytes = Vec::with_capacity(reserve.max(8192));
198
+ file.read_to_end(&mut bytes)
199
+ .map_err(|e| anyhow::anyhow!("pack read {}: {}", entry.rel_path, e))?;
200
+
201
+ Ok(Some(bytes))
202
+ }
203
+
204
+ fn select_parallel_batch_end(entries: &[DirectoryFile], start: usize) -> usize {
205
+ let Some(first) = entries.get(start) else {
206
+ return start;
207
+ };
208
+ if !should_parallelize_entry(first) {
209
+ return start + 1;
210
+ }
211
+
212
+ let mut end = start;
213
+ let mut batch_bytes = 0u64;
214
+ while end < entries.len() {
215
+ let entry = &entries[end];
216
+ if !should_parallelize_entry(entry) {
217
+ break;
218
+ }
219
+ if end > start {
220
+ if end - start >= PARALLEL_IO_BATCH_FILES {
221
+ break;
222
+ }
223
+ if batch_bytes.saturating_add(entry.size) > PARALLEL_IO_BATCH_BYTES {
224
+ break;
225
+ }
226
+ }
227
+ batch_bytes = batch_bytes.saturating_add(entry.size);
228
+ end += 1;
229
+ }
230
+
231
+ if end - start >= PARALLEL_IO_MIN_FILES {
232
+ end
233
+ } else {
234
+ start + 1
235
+ }
236
+ }
237
+
238
+ fn should_parallelize_entry(entry: &DirectoryFile) -> bool {
239
+ entry.size <= PARALLEL_IO_FILE_THRESHOLD
240
+ }
241
+
242
+ fn file_buffer_capacity(size: u64) -> usize {
243
+ usize::try_from(size)
244
+ .unwrap_or(MAX_FILE_BUFFER_CAPACITY)
245
+ .min(MAX_FILE_BUFFER_CAPACITY)
246
+ .max(8192)
247
+ }
248
+
249
+ fn report_compress_progress(
250
+ progress: &Option<ProgressCallback>,
251
+ total_bytes: u64,
252
+ bytes_processed: u64,
253
+ last_pct: &mut u64,
254
+ ) {
255
+ if let Some(ref cb) = progress {
256
+ let pct = if total_bytes > 0 {
257
+ (bytes_processed * 89 / total_bytes).min(89)
258
+ } else {
259
+ 89
260
+ };
261
+ if pct > *last_pct {
262
+ *last_pct = pct;
263
+ cb(pct, 100, "compressing");
264
+ }
265
+ }
266
+ }
267
+
268
+ fn collect_directory_files(dir_path: &Path) -> CollectedDirectory {
269
+ let mut entries = Vec::new();
270
+ let mut total_bytes = 0u64;
271
+
272
+ for entry in WalkDir::new(dir_path)
273
+ .follow_links(false)
274
+ .into_iter()
275
+ .filter_map(|entry| entry.ok())
276
+ .filter(|entry| entry.file_type().is_file())
277
+ {
278
+ let size = match entry.metadata() {
279
+ Ok(metadata) => metadata.len(),
280
+ Err(_) => continue,
281
+ };
282
+ let path = entry.into_path();
283
+ let rel = path.strip_prefix(dir_path).unwrap_or(path.as_path());
284
+ let rel_path = normalize_rel_path(rel);
285
+
286
+ total_bytes += size;
287
+ entries.push(DirectoryFile {
288
+ path,
289
+ rel_path,
290
+ size,
291
+ });
292
+ }
293
+
294
+ CollectedDirectory {
295
+ entries,
296
+ total_bytes,
297
+ }
298
+ }
299
+
300
+ fn normalize_rel_path(path: &Path) -> String {
301
+ let rel_path = path.to_string_lossy();
302
+ if rel_path.contains('\\') {
303
+ rel_path.replace('\\', "/")
304
+ } else {
305
+ rel_path.into_owned()
306
+ }
307
+ }
308
+
309
+ fn estimate_zst_capacity(total_bytes: u64) -> usize {
310
+ let capped = total_bytes.min(usize::MAX as u64) as usize;
311
+ (capped / 3).max(MIN_ZST_CAPACITY)
312
+ }
313
+
314
+ fn select_zstd_threads(total_bytes: u64) -> u32 {
315
+ let max_threads = num_cpus::get().max(1) as u32;
316
+ if total_bytes <= 32 * MB {
317
+ 1
318
+ } else if total_bytes <= 128 * MB {
319
+ max_threads.min(2)
320
+ } else if total_bytes <= 512 * MB {
321
+ max_threads.min(4)
322
+ } else {
323
+ max_threads.min(8)
324
+ }
147
325
  }
148
326
 
149
327
  fn write_png_from_zst_mem(
@@ -312,13 +490,18 @@ fn write_idat_streaming<W: Write, R: Read>(
312
490
  let fl_chunk_data = file_list_chunk.unwrap_or(&[]);
313
491
  let payload_total = header_bytes.len() + zst_size + hmac_trailer_len + fl_chunk_data.len();
314
492
  let padding_after = total_data_bytes - payload_total.min(total_data_bytes);
315
-
316
493
  let marker_end_bytes = build_marker_end_bytes();
317
494
 
318
495
  let mut flat_pos: usize = 0;
319
496
  let mut scanline_pos: usize = 0;
320
497
  let mut deflate_block_remaining: usize = 0;
321
498
 
499
+ let mut adler = simd_adler32::Adler32::new();
500
+
501
+ let buf_size = 1024 * 1024;
502
+ let mut transfer_buf = vec![0u8; buf_size];
503
+ let zero_buf = vec![0u8; buf_size];
504
+
322
505
  let mut header_pos: usize = 0;
323
506
  let mut zst_remaining = zst_size;
324
507
  let mut hmac_pos: usize = 0;
@@ -327,12 +510,6 @@ fn write_idat_streaming<W: Write, R: Read>(
327
510
  let mut fl_pos: usize = 0;
328
511
  let mut zero_remaining = padding_after;
329
512
 
330
- let mut adler = simd_adler32::Adler32::new();
331
-
332
- let buf_size = 1024 * 1024;
333
- let mut transfer_buf = vec![0u8; buf_size];
334
- let zero_buf = vec![0u8; buf_size];
335
-
336
513
  let mut last_png_pct: u64 = 89;
337
514
 
338
515
  for row_idx in 0..height {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "roxify",
3
- "version": "1.13.3",
3
+ "version": "1.13.4",
4
4
  "type": "module",
5
5
  "description": "Ultra-lightweight PNG steganography with native Rust acceleration. Encode binary data into PNG images with zstd compression.",
6
6
  "main": "dist/index.js",