roxify 1.7.6 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +82 -0
- package/dist/cli.js +1 -1
- package/dist/roxify_native.exe +0 -0
- package/native/archive.rs +176 -0
- package/native/audio.rs +151 -0
- package/native/bwt.rs +100 -0
- package/native/context_mixing.rs +120 -0
- package/native/core.rs +297 -0
- package/native/crypto.rs +119 -0
- package/native/encoder.rs +640 -0
- package/native/gpu.rs +116 -0
- package/native/hybrid.rs +162 -0
- package/native/image_utils.rs +77 -0
- package/native/lib.rs +464 -0
- package/native/main.rs +462 -0
- package/native/packer.rs +447 -0
- package/native/png_utils.rs +192 -0
- package/native/pool.rs +101 -0
- package/native/progress.rs +43 -0
- package/native/rans.rs +149 -0
- package/native/reconstitution.rs +511 -0
- package/package.json +6 -1
- package/scripts/postinstall.cjs +101 -0
package/Cargo.toml
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "roxify_native"
|
|
3
|
+
version = "1.8.1"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
publish = false
|
|
6
|
+
|
|
7
|
+
[lib]
|
|
8
|
+
name = "roxify_native"
|
|
9
|
+
crate-type = ["cdylib"]
|
|
10
|
+
path = "native/lib.rs"
|
|
11
|
+
|
|
12
|
+
[[bin]]
|
|
13
|
+
name = "roxify_native"
|
|
14
|
+
path = "native/main.rs"
|
|
15
|
+
|
|
16
|
+
[dev-dependencies]
|
|
17
|
+
|
|
18
|
+
[dependencies]
|
|
19
|
+
napi = "2"
|
|
20
|
+
napi-derive = "2"
|
|
21
|
+
rayon = "1.7"
|
|
22
|
+
zstd = { version = "0.11", features = ["zstdmt"] }
|
|
23
|
+
crc32fast = "1.3"
|
|
24
|
+
num_cpus = "1.16"
|
|
25
|
+
clap = { version = "4", features = ["derive"] }
|
|
26
|
+
serde_json = "1.0"
|
|
27
|
+
anyhow = "1.0"
|
|
28
|
+
png = "0.18.0"
|
|
29
|
+
image = { version = "0.25", default-features = false, features = ["png"] }
|
|
30
|
+
# indicatif removed from default deps to reduce heavy build graph
|
|
31
|
+
walkdir = "2.5.0"
|
|
32
|
+
tar = "0.4"
|
|
33
|
+
aes-gcm = "0.10"
|
|
34
|
+
pbkdf2 = "0.12"
|
|
35
|
+
rand = "0.8"
|
|
36
|
+
sha2 = "0.10"
|
|
37
|
+
mimalloc = "0.1"
|
|
38
|
+
|
|
39
|
+
wgpu = { version = "0.19", optional = true }
|
|
40
|
+
memmap2 = "0.9"
|
|
41
|
+
bytemuck = { version = "1.14", features = ["derive"] }
|
|
42
|
+
# tokio is optional now; enable via the 'async' feature
|
|
43
|
+
tokio = { version = "1", features = ["sync", "rt"], optional = true }
|
|
44
|
+
parking_lot = "0.12"
|
|
45
|
+
pollster = { version = "0.3", optional = true }
|
|
46
|
+
|
|
47
|
+
[features]
|
|
48
|
+
# default is intentionally empty so the crate compiles fast for local checks.
|
|
49
|
+
# Enable 'gpu' to pull in the WGPU and pollster dependencies (heavy).
|
|
50
|
+
# Enable 'async' to include tokio runtime (optional).
|
|
51
|
+
# Example: `cargo build -p roxify_native --features gpu`
|
|
52
|
+
default = []
|
|
53
|
+
|
|
54
|
+
gpu = ["wgpu", "pollster"]
|
|
55
|
+
async = ["tokio"]
|
|
56
|
+
full = ["gpu", "async"]
|
|
57
|
+
|
|
58
|
+
[profile.release]
|
|
59
|
+
opt-level = 3
|
|
60
|
+
lto = "fat"
|
|
61
|
+
codegen-units = 1
|
|
62
|
+
strip = true
|
|
63
|
+
panic = "abort"
|
|
64
|
+
|
|
65
|
+
[profile.release-size]
|
|
66
|
+
inherits = "release"
|
|
67
|
+
opt-level = "z"
|
|
68
|
+
lto = true
|
|
69
|
+
strip = true
|
|
70
|
+
|
|
71
|
+
[profile.fastdev]
|
|
72
|
+
# Fast development profile for minimal user CPU and fast compilation.
|
|
73
|
+
# Lower optimization and high codegen units to parallelize compilation work,
|
|
74
|
+
# enable incremental to speed up subsequent incremental builds.
|
|
75
|
+
inherits = "release"
|
|
76
|
+
opt-level = 1
|
|
77
|
+
lto = false
|
|
78
|
+
codegen-units = 16
|
|
79
|
+
debug = false
|
|
80
|
+
incremental = true
|
|
81
|
+
panic = "abort"
|
|
82
|
+
|
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ import { DataFormatError, decodePngToBinary, encodeBinaryToPng, hasPassphraseInP
|
|
|
6
6
|
import { packPathsGenerator, unpackBuffer } from './pack.js';
|
|
7
7
|
import * as cliProgress from './stub-progress.js';
|
|
8
8
|
import { encodeWithRustCLI, isRustBinaryAvailable, } from './utils/rust-cli-wrapper.js';
|
|
9
|
-
const VERSION = '1.
|
|
9
|
+
const VERSION = '1.8.0';
|
|
10
10
|
function getDirectorySize(dirPath) {
|
|
11
11
|
let totalSize = 0;
|
|
12
12
|
try {
|
package/dist/roxify_native.exe
CHANGED
|
Binary file
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
use std::io::Cursor;
|
|
2
|
+
use std::path::Path;
|
|
3
|
+
use rayon::prelude::*;
|
|
4
|
+
use tar::{Archive, Builder, Header};
|
|
5
|
+
use walkdir::WalkDir;
|
|
6
|
+
|
|
7
|
+
pub fn tar_pack_directory(dir_path: &Path) -> Result<Vec<u8>, String> {
|
|
8
|
+
let base = dir_path;
|
|
9
|
+
|
|
10
|
+
let entries: Vec<_> = WalkDir::new(dir_path)
|
|
11
|
+
.follow_links(false)
|
|
12
|
+
.into_iter()
|
|
13
|
+
.filter_map(|e| e.ok())
|
|
14
|
+
.filter(|e| e.file_type().is_file())
|
|
15
|
+
.collect();
|
|
16
|
+
|
|
17
|
+
let file_data: Vec<(String, Vec<u8>)> = entries
|
|
18
|
+
.par_iter()
|
|
19
|
+
.filter_map(|entry| {
|
|
20
|
+
let full = entry.path();
|
|
21
|
+
let rel = full.strip_prefix(base).unwrap_or(full);
|
|
22
|
+
let rel_str = rel.to_string_lossy().replace('\\', "/");
|
|
23
|
+
match std::fs::read(full) {
|
|
24
|
+
Ok(data) => Some((rel_str, data)),
|
|
25
|
+
Err(_) => None,
|
|
26
|
+
}
|
|
27
|
+
})
|
|
28
|
+
.collect();
|
|
29
|
+
|
|
30
|
+
let total_estimate: usize = file_data.iter().map(|(n, d)| 512 + d.len() + 512 + n.len()).sum();
|
|
31
|
+
let mut buf = Vec::with_capacity(total_estimate + 1024);
|
|
32
|
+
{
|
|
33
|
+
let mut builder = Builder::new(&mut buf);
|
|
34
|
+
for (rel_str, data) in &file_data {
|
|
35
|
+
let mut header = Header::new_gnu();
|
|
36
|
+
header.set_size(data.len() as u64);
|
|
37
|
+
header.set_mode(0o644);
|
|
38
|
+
header.set_cksum();
|
|
39
|
+
builder
|
|
40
|
+
.append_data(&mut header, rel_str, &data[..])
|
|
41
|
+
.map_err(|e| format!("tar append {}: {}", rel_str, e))?;
|
|
42
|
+
}
|
|
43
|
+
builder.finish().map_err(|e| format!("tar finish: {}", e))?;
|
|
44
|
+
}
|
|
45
|
+
Ok(buf)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pub fn tar_unpack(tar_data: &[u8], output_dir: &Path) -> Result<Vec<String>, String> {
|
|
49
|
+
let mut archive = Archive::new(Cursor::new(tar_data));
|
|
50
|
+
let mut written = Vec::new();
|
|
51
|
+
|
|
52
|
+
let entries = archive.entries().map_err(|e| format!("tar entries: {}", e))?;
|
|
53
|
+
for entry in entries {
|
|
54
|
+
let mut entry = entry.map_err(|e| format!("tar entry: {}", e))?;
|
|
55
|
+
let path = entry
|
|
56
|
+
.path()
|
|
57
|
+
.map_err(|e| format!("tar entry path: {}", e))?
|
|
58
|
+
.to_path_buf();
|
|
59
|
+
|
|
60
|
+
let mut safe = std::path::PathBuf::new();
|
|
61
|
+
for comp in path.components() {
|
|
62
|
+
if let std::path::Component::Normal(osstr) = comp {
|
|
63
|
+
safe.push(osstr);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if safe.as_os_str().is_empty() {
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let dest = output_dir.join(&safe);
|
|
72
|
+
if let Some(parent) = dest.parent() {
|
|
73
|
+
std::fs::create_dir_all(parent)
|
|
74
|
+
.map_err(|e| format!("mkdir {:?}: {}", parent, e))?;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
entry
|
|
78
|
+
.unpack(&dest)
|
|
79
|
+
.map_err(|e| format!("tar unpack {:?}: {}", dest, e))?;
|
|
80
|
+
written.push(safe.to_string_lossy().to_string());
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
Ok(written)
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
pub fn is_tar(data: &[u8]) -> bool {
|
|
87
|
+
if data.len() < 263 {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
&data[257..262] == b"ustar"
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
pub fn tar_file_list(tar_data: &[u8]) -> Result<Vec<(String, u64)>, String> {
|
|
94
|
+
let mut archive = Archive::new(Cursor::new(tar_data));
|
|
95
|
+
let mut list = Vec::new();
|
|
96
|
+
let entries = archive.entries().map_err(|e| format!("tar entries: {}", e))?;
|
|
97
|
+
for entry in entries {
|
|
98
|
+
let entry = entry.map_err(|e| format!("tar entry: {}", e))?;
|
|
99
|
+
let path = entry
|
|
100
|
+
.path()
|
|
101
|
+
.map_err(|e| format!("tar path: {}", e))?
|
|
102
|
+
.to_string_lossy()
|
|
103
|
+
.to_string();
|
|
104
|
+
let size = entry.size();
|
|
105
|
+
list.push((path, size));
|
|
106
|
+
}
|
|
107
|
+
Ok(list)
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
#[cfg(test)]
|
|
111
|
+
mod tests {
|
|
112
|
+
use super::*;
|
|
113
|
+
use std::fs;
|
|
114
|
+
|
|
115
|
+
#[test]
|
|
116
|
+
fn test_tar_roundtrip() {
|
|
117
|
+
let tmp = std::env::temp_dir().join("rox_tar_test");
|
|
118
|
+
let _ = fs::remove_dir_all(&tmp);
|
|
119
|
+
fs::create_dir_all(tmp.join("sub")).unwrap();
|
|
120
|
+
fs::write(tmp.join("hello.txt"), b"Hello TAR").unwrap();
|
|
121
|
+
fs::write(tmp.join("sub/nested.txt"), b"Nested!").unwrap();
|
|
122
|
+
|
|
123
|
+
let tar_data = tar_pack_directory(&tmp).unwrap();
|
|
124
|
+
assert!(is_tar(&tar_data));
|
|
125
|
+
|
|
126
|
+
let list = tar_file_list(&tar_data).unwrap();
|
|
127
|
+
assert_eq!(list.len(), 2);
|
|
128
|
+
|
|
129
|
+
let out = std::env::temp_dir().join("rox_tar_test_out");
|
|
130
|
+
let _ = fs::remove_dir_all(&out);
|
|
131
|
+
fs::create_dir_all(&out).unwrap();
|
|
132
|
+
|
|
133
|
+
let written = tar_unpack(&tar_data, &out).unwrap();
|
|
134
|
+
assert_eq!(written.len(), 2);
|
|
135
|
+
assert_eq!(fs::read_to_string(out.join("hello.txt")).unwrap(), "Hello TAR");
|
|
136
|
+
assert_eq!(fs::read_to_string(out.join("sub/nested.txt")).unwrap(), "Nested!");
|
|
137
|
+
|
|
138
|
+
let _ = fs::remove_dir_all(&tmp);
|
|
139
|
+
let _ = fs::remove_dir_all(&out);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
#[test]
|
|
143
|
+
fn test_tar_zstd_roundtrip() {
|
|
144
|
+
use std::io::Write;
|
|
145
|
+
|
|
146
|
+
let tmp = std::env::temp_dir().join("rox_tar_zstd_test");
|
|
147
|
+
let _ = fs::remove_dir_all(&tmp);
|
|
148
|
+
fs::create_dir_all(tmp.join("a/b")).unwrap();
|
|
149
|
+
fs::write(tmp.join("root.txt"), b"root file content").unwrap();
|
|
150
|
+
fs::write(tmp.join("a/mid.txt"), b"mid level").unwrap();
|
|
151
|
+
fs::write(tmp.join("a/b/deep.txt"), b"deep nested file").unwrap();
|
|
152
|
+
|
|
153
|
+
let tar_data = tar_pack_directory(&tmp).unwrap();
|
|
154
|
+
assert!(is_tar(&tar_data));
|
|
155
|
+
|
|
156
|
+
let mut encoder = zstd::stream::Encoder::new(Vec::new(), 3).unwrap();
|
|
157
|
+
encoder.write_all(&tar_data).unwrap();
|
|
158
|
+
let compressed = encoder.finish().unwrap();
|
|
159
|
+
|
|
160
|
+
let decompressed = crate::core::zstd_decompress_bytes(&compressed, None).unwrap();
|
|
161
|
+
assert!(is_tar(&decompressed));
|
|
162
|
+
|
|
163
|
+
let out = std::env::temp_dir().join("rox_tar_zstd_test_out");
|
|
164
|
+
let _ = fs::remove_dir_all(&out);
|
|
165
|
+
fs::create_dir_all(&out).unwrap();
|
|
166
|
+
|
|
167
|
+
let written = tar_unpack(&decompressed, &out).unwrap();
|
|
168
|
+
assert_eq!(written.len(), 3);
|
|
169
|
+
assert_eq!(fs::read_to_string(out.join("root.txt")).unwrap(), "root file content");
|
|
170
|
+
assert_eq!(fs::read_to_string(out.join("a/mid.txt")).unwrap(), "mid level");
|
|
171
|
+
assert_eq!(fs::read_to_string(out.join("a/b/deep.txt")).unwrap(), "deep nested file");
|
|
172
|
+
|
|
173
|
+
let _ = fs::remove_dir_all(&tmp);
|
|
174
|
+
let _ = fs::remove_dir_all(&out);
|
|
175
|
+
}
|
|
176
|
+
}
|
package/native/audio.rs
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
/// WAV container for binary data.
|
|
2
|
+
///
|
|
3
|
+
/// Encodes raw bytes as 8-bit unsigned PCM mono samples (44100 Hz).
|
|
4
|
+
/// Header is exactly 44 bytes. Total overhead: 44 bytes.
|
|
5
|
+
///
|
|
6
|
+
/// Compared to PNG (stored deflate): PNG overhead grows with data size
|
|
7
|
+
/// (zlib framing, filter bytes, chunk CRCs). WAV overhead is constant.
|
|
8
|
+
|
|
9
|
+
const WAV_HEADER_SIZE: usize = 44;
|
|
10
|
+
const SAMPLE_RATE: u32 = 44100;
|
|
11
|
+
const BITS_PER_SAMPLE: u16 = 8;
|
|
12
|
+
const NUM_CHANNELS: u16 = 1;
|
|
13
|
+
|
|
14
|
+
/// Pack raw bytes into a WAV file (8-bit PCM, mono, 44100 Hz).
|
|
15
|
+
/// The bytes are stored directly as unsigned PCM samples.
|
|
16
|
+
/// Returns the complete WAV file as a Vec<u8>.
|
|
17
|
+
pub fn bytes_to_wav(data: &[u8]) -> Vec<u8> {
|
|
18
|
+
let data_size = data.len() as u32;
|
|
19
|
+
let file_size = WAV_HEADER_SIZE as u32 - 8 + data_size; // RIFF chunk size
|
|
20
|
+
|
|
21
|
+
let byte_rate = SAMPLE_RATE * NUM_CHANNELS as u32 * (BITS_PER_SAMPLE as u32 / 8);
|
|
22
|
+
let block_align = NUM_CHANNELS * (BITS_PER_SAMPLE / 8);
|
|
23
|
+
|
|
24
|
+
let mut wav = Vec::with_capacity(WAV_HEADER_SIZE + data.len());
|
|
25
|
+
|
|
26
|
+
// RIFF header
|
|
27
|
+
wav.extend_from_slice(b"RIFF");
|
|
28
|
+
wav.extend_from_slice(&file_size.to_le_bytes());
|
|
29
|
+
wav.extend_from_slice(b"WAVE");
|
|
30
|
+
|
|
31
|
+
// fmt sub-chunk
|
|
32
|
+
wav.extend_from_slice(b"fmt ");
|
|
33
|
+
wav.extend_from_slice(&16u32.to_le_bytes()); // sub-chunk size (PCM = 16)
|
|
34
|
+
wav.extend_from_slice(&1u16.to_le_bytes()); // audio format (1 = PCM)
|
|
35
|
+
wav.extend_from_slice(&NUM_CHANNELS.to_le_bytes());
|
|
36
|
+
wav.extend_from_slice(&SAMPLE_RATE.to_le_bytes());
|
|
37
|
+
wav.extend_from_slice(&byte_rate.to_le_bytes());
|
|
38
|
+
wav.extend_from_slice(&block_align.to_le_bytes());
|
|
39
|
+
wav.extend_from_slice(&BITS_PER_SAMPLE.to_le_bytes());
|
|
40
|
+
|
|
41
|
+
// data sub-chunk
|
|
42
|
+
wav.extend_from_slice(b"data");
|
|
43
|
+
wav.extend_from_slice(&data_size.to_le_bytes());
|
|
44
|
+
wav.extend_from_slice(data);
|
|
45
|
+
|
|
46
|
+
wav
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/// Extract raw bytes from a WAV file.
|
|
50
|
+
/// Returns the PCM data (the original bytes) or an error.
|
|
51
|
+
pub fn wav_to_bytes(wav: &[u8]) -> Result<Vec<u8>, String> {
|
|
52
|
+
if wav.len() < WAV_HEADER_SIZE {
|
|
53
|
+
return Err("WAV data too short".to_string());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Validate RIFF header
|
|
57
|
+
if &wav[0..4] != b"RIFF" {
|
|
58
|
+
return Err("Not a RIFF file".to_string());
|
|
59
|
+
}
|
|
60
|
+
if &wav[8..12] != b"WAVE" {
|
|
61
|
+
return Err("Not a WAVE file".to_string());
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Find the "data" sub-chunk (skip fmt and any other chunks)
|
|
65
|
+
let mut offset = 12; // past "RIFF" + size + "WAVE"
|
|
66
|
+
loop {
|
|
67
|
+
if offset + 8 > wav.len() {
|
|
68
|
+
return Err("data chunk not found".to_string());
|
|
69
|
+
}
|
|
70
|
+
let chunk_id = &wav[offset..offset + 4];
|
|
71
|
+
let chunk_size = u32::from_le_bytes([
|
|
72
|
+
wav[offset + 4],
|
|
73
|
+
wav[offset + 5],
|
|
74
|
+
wav[offset + 6],
|
|
75
|
+
wav[offset + 7],
|
|
76
|
+
]) as usize;
|
|
77
|
+
|
|
78
|
+
if chunk_id == b"data" {
|
|
79
|
+
let data_start = offset + 8;
|
|
80
|
+
let data_end = data_start + chunk_size;
|
|
81
|
+
if data_end > wav.len() {
|
|
82
|
+
// Allow truncation: return what we have
|
|
83
|
+
return Ok(wav[data_start..].to_vec());
|
|
84
|
+
}
|
|
85
|
+
return Ok(wav[data_start..data_end].to_vec());
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Skip this chunk (+ padding byte if odd size)
|
|
89
|
+
offset += 8 + chunk_size;
|
|
90
|
+
if chunk_size % 2 != 0 {
|
|
91
|
+
offset += 1; // RIFF chunks are word-aligned
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/// Check if a buffer starts with a RIFF/WAVE header.
|
|
97
|
+
pub fn is_wav(buf: &[u8]) -> bool {
|
|
98
|
+
buf.len() >= 12 && &buf[0..4] == b"RIFF" && &buf[8..12] == b"WAVE"
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#[cfg(test)]
|
|
102
|
+
mod tests {
|
|
103
|
+
use super::*;
|
|
104
|
+
|
|
105
|
+
#[test]
|
|
106
|
+
fn test_wav_roundtrip() {
|
|
107
|
+
let data = b"Hello, World! This is roxify audio container test data.";
|
|
108
|
+
let wav = bytes_to_wav(data);
|
|
109
|
+
|
|
110
|
+
// Check header
|
|
111
|
+
assert_eq!(&wav[0..4], b"RIFF");
|
|
112
|
+
assert_eq!(&wav[8..12], b"WAVE");
|
|
113
|
+
assert_eq!(wav.len(), 44 + data.len());
|
|
114
|
+
|
|
115
|
+
// Roundtrip
|
|
116
|
+
let recovered = wav_to_bytes(&wav).expect("decode should succeed");
|
|
117
|
+
assert_eq!(recovered, data);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
#[test]
|
|
121
|
+
fn test_wav_empty() {
|
|
122
|
+
let data: &[u8] = b"";
|
|
123
|
+
let wav = bytes_to_wav(data);
|
|
124
|
+
assert_eq!(wav.len(), 44);
|
|
125
|
+
let recovered = wav_to_bytes(&wav).expect("decode empty");
|
|
126
|
+
assert!(recovered.is_empty());
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
#[test]
|
|
130
|
+
fn test_wav_large() {
|
|
131
|
+
let data = vec![0xAB_u8; 1024 * 1024]; // 1 MB
|
|
132
|
+
let wav = bytes_to_wav(&data);
|
|
133
|
+
assert_eq!(wav.len(), 44 + 1024 * 1024);
|
|
134
|
+
let recovered = wav_to_bytes(&wav).expect("decode large");
|
|
135
|
+
assert_eq!(recovered, data);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
#[test]
|
|
139
|
+
fn test_is_wav() {
|
|
140
|
+
let wav = bytes_to_wav(b"test");
|
|
141
|
+
assert!(is_wav(&wav));
|
|
142
|
+
assert!(!is_wav(b"not a wav"));
|
|
143
|
+
assert!(!is_wav(b"RIFF1234XXXX")); // RIFF but not WAVE
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
#[test]
|
|
147
|
+
fn test_invalid_wav() {
|
|
148
|
+
assert!(wav_to_bytes(b"short").is_err());
|
|
149
|
+
assert!(wav_to_bytes(b"NOT a RIFF file!").is_err());
|
|
150
|
+
}
|
|
151
|
+
}
|
package/native/bwt.rs
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use rayon::prelude::*;
|
|
3
|
+
|
|
4
|
+
pub struct BwtResult {
|
|
5
|
+
pub transformed: Vec<u8>,
|
|
6
|
+
pub primary_index: u32,
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
pub fn bwt_encode(data: &[u8]) -> Result<BwtResult> {
|
|
10
|
+
if data.is_empty() {
|
|
11
|
+
return Ok(BwtResult {
|
|
12
|
+
transformed: Vec::new(),
|
|
13
|
+
primary_index: 0,
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
let n = data.len();
|
|
18
|
+
let mut rotations: Vec<usize> = (0..n).collect();
|
|
19
|
+
|
|
20
|
+
rotations.par_sort_by(|&a, &b| {
|
|
21
|
+
for i in 0..n {
|
|
22
|
+
let ca = data[(a + i) % n];
|
|
23
|
+
let cb = data[(b + i) % n];
|
|
24
|
+
if ca != cb {
|
|
25
|
+
return ca.cmp(&cb);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
std::cmp::Ordering::Equal
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
let mut transformed = Vec::with_capacity(n);
|
|
32
|
+
let mut primary_index = 0u32;
|
|
33
|
+
|
|
34
|
+
for (idx, &rot) in rotations.iter().enumerate() {
|
|
35
|
+
if rot == 0 {
|
|
36
|
+
primary_index = idx as u32;
|
|
37
|
+
}
|
|
38
|
+
transformed.push(data[(rot + n - 1) % n]);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
Ok(BwtResult {
|
|
42
|
+
transformed,
|
|
43
|
+
primary_index,
|
|
44
|
+
})
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
pub fn bwt_decode(data: &[u8], primary_index: u32) -> Result<Vec<u8>> {
|
|
48
|
+
if data.is_empty() {
|
|
49
|
+
return Ok(Vec::new());
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
let n = data.len();
|
|
53
|
+
let primary_idx = primary_index as usize;
|
|
54
|
+
|
|
55
|
+
if primary_idx >= n {
|
|
56
|
+
return Err(anyhow::anyhow!("Invalid primary index"));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
let mut counts = vec![0usize; 256];
|
|
60
|
+
for &byte in data {
|
|
61
|
+
counts[byte as usize] += 1;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let mut cumsum = vec![0usize; 256];
|
|
65
|
+
let mut sum = 0;
|
|
66
|
+
for i in 0..256 {
|
|
67
|
+
cumsum[i] = sum;
|
|
68
|
+
sum += counts[i];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let mut next = vec![0usize; n];
|
|
72
|
+
let mut counts = vec![0usize; 256];
|
|
73
|
+
|
|
74
|
+
for i in 0..n {
|
|
75
|
+
let byte = data[i] as usize;
|
|
76
|
+
let pos = cumsum[byte] + counts[byte];
|
|
77
|
+
next[pos] = i;
|
|
78
|
+
counts[byte] += 1;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
let mut result = Vec::with_capacity(n);
|
|
82
|
+
let mut idx = primary_idx;
|
|
83
|
+
|
|
84
|
+
for _ in 0..n {
|
|
85
|
+
result.push(data[idx]);
|
|
86
|
+
idx = next[idx];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
Ok(result)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
pub fn bwt_encode_streaming(block_size: usize, data: &[u8]) -> Result<Vec<(BwtResult, usize)>> {
|
|
93
|
+
data.par_chunks(block_size)
|
|
94
|
+
.enumerate()
|
|
95
|
+
.map(|(i, chunk)| {
|
|
96
|
+
let result = bwt_encode(chunk)?;
|
|
97
|
+
Ok((result, i * block_size))
|
|
98
|
+
})
|
|
99
|
+
.collect()
|
|
100
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
use rayon::prelude::*;
|
|
2
|
+
|
|
3
|
+
#[derive(Clone, Copy, Debug)]
|
|
4
|
+
pub struct ProbabilityEstimate {
|
|
5
|
+
pub p0: u32,
|
|
6
|
+
pub p1: u32,
|
|
7
|
+
pub total: u32,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
impl ProbabilityEstimate {
|
|
11
|
+
pub fn entropy_bits(&self) -> f32 {
|
|
12
|
+
if self.total == 0 {
|
|
13
|
+
return 0.0;
|
|
14
|
+
}
|
|
15
|
+
let p0 = (self.p0 as f32) / (self.total as f32);
|
|
16
|
+
let p1 = (self.p1 as f32) / (self.total as f32);
|
|
17
|
+
|
|
18
|
+
let mut bits = 0.0;
|
|
19
|
+
if p0 > 0.0 {
|
|
20
|
+
bits -= p0 * p0.log2();
|
|
21
|
+
}
|
|
22
|
+
if p1 > 0.0 {
|
|
23
|
+
bits -= p1 * p1.log2();
|
|
24
|
+
}
|
|
25
|
+
bits
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
pub struct ContextMixer {
|
|
30
|
+
contexts_order0: Vec<ProbabilityEstimate>,
|
|
31
|
+
contexts_order1: Vec<[ProbabilityEstimate; 256]>,
|
|
32
|
+
contexts_order2: Vec<[[ProbabilityEstimate; 256]; 256]>,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
impl ContextMixer {
|
|
36
|
+
pub fn new() -> Self {
|
|
37
|
+
ContextMixer {
|
|
38
|
+
contexts_order0: vec![ProbabilityEstimate { p0: 1, p1: 1, total: 2 }; 1],
|
|
39
|
+
contexts_order1: vec![
|
|
40
|
+
[ProbabilityEstimate { p0: 1, p1: 1, total: 2 }; 256];
|
|
41
|
+
256
|
|
42
|
+
],
|
|
43
|
+
contexts_order2: vec![
|
|
44
|
+
[[ProbabilityEstimate { p0: 1, p1: 1, total: 2 }; 256]; 256];
|
|
45
|
+
256
|
|
46
|
+
],
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
pub fn predict_order0(&self) -> ProbabilityEstimate {
|
|
51
|
+
self.contexts_order0[0]
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
pub fn predict_order1(&self, context1: u8) -> ProbabilityEstimate {
|
|
55
|
+
self.contexts_order1[context1 as usize][0]
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
pub fn predict_order2(&self, context1: u8, context2: u8) -> ProbabilityEstimate {
|
|
59
|
+
self.contexts_order2[context1 as usize][context2 as usize][0]
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
pub fn update_order0(&mut self, bit: bool) {
|
|
63
|
+
let ctx = &mut self.contexts_order0[0];
|
|
64
|
+
if bit {
|
|
65
|
+
ctx.p1 += 1;
|
|
66
|
+
} else {
|
|
67
|
+
ctx.p0 += 1;
|
|
68
|
+
}
|
|
69
|
+
ctx.total += 1;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
pub fn update_order1(&mut self, context1: u8, bit: bool) {
|
|
73
|
+
let ctx = &mut self.contexts_order1[context1 as usize][0];
|
|
74
|
+
if bit {
|
|
75
|
+
ctx.p1 += 1;
|
|
76
|
+
} else {
|
|
77
|
+
ctx.p0 += 1;
|
|
78
|
+
}
|
|
79
|
+
ctx.total += 1;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
pub fn update_order2(&mut self, context1: u8, context2: u8, bit: bool) {
|
|
83
|
+
let ctx = &mut self.contexts_order2[context1 as usize][context2 as usize][0];
|
|
84
|
+
if bit {
|
|
85
|
+
ctx.p1 += 1;
|
|
86
|
+
} else {
|
|
87
|
+
ctx.p0 += 1;
|
|
88
|
+
}
|
|
89
|
+
ctx.total += 1;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
pub fn analyze_entropy(data: &[u8]) -> f32 {
|
|
94
|
+
let freq: Vec<u32> = {
|
|
95
|
+
let mut f = vec![0u32; 256];
|
|
96
|
+
for &byte in data {
|
|
97
|
+
f[byte as usize] += 1;
|
|
98
|
+
}
|
|
99
|
+
f
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
let total: u32 = freq.iter().sum();
|
|
103
|
+
if total == 0 {
|
|
104
|
+
return 0.0;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
freq.par_iter()
|
|
108
|
+
.filter(|&&f| f > 0)
|
|
109
|
+
.map(|&f| {
|
|
110
|
+
let p = (f as f32) / (total as f32);
|
|
111
|
+
-p * p.log2()
|
|
112
|
+
})
|
|
113
|
+
.sum()
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
pub fn estimate_compression_gain(original: &[u8], entropy_bits: f32) -> f64 {
|
|
117
|
+
let theoretical_min = (original.len() as f64) * (entropy_bits as f64) / 8.0;
|
|
118
|
+
let ratio = theoretical_min / (original.len() as f64);
|
|
119
|
+
(1.0 - ratio) * 100.0
|
|
120
|
+
}
|