roxify 1.7.6 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/native/core.rs ADDED
@@ -0,0 +1,293 @@
1
+ use rayon::prelude::*;
2
+ use std::sync::Arc;
3
+ use std::path::PathBuf;
4
+ use anyhow::Result;
5
+
6
+ pub struct PlainScanResult {
7
+ pub marker_positions: Vec<u32>,
8
+ pub magic_positions: Vec<u32>,
9
+ }
10
+
11
+ pub fn scan_pixels_bytes(buf: &[u8], channels: usize, marker_bytes: Option<&[u8]>) -> PlainScanResult {
12
+ let magic = b"ROX1";
13
+
14
+ let magic_positions: Vec<u32> = if buf.len() >= 4 {
15
+ (0..(buf.len() - 3))
16
+ .into_par_iter()
17
+ .filter_map(|i| if &buf[i..i + 4] == magic { Some(i as u32) } else { None })
18
+ .collect()
19
+ } else {
20
+ Vec::new()
21
+ };
22
+
23
+ let markers: Vec<[u8; 3]> = match marker_bytes {
24
+ Some(bytes) if !bytes.is_empty() => {
25
+ if bytes.len() % 3 != 0 {
26
+ return PlainScanResult { marker_positions: Vec::new(), magic_positions };
27
+ }
28
+ bytes.chunks(3).map(|c| [c[0], c[1], c[2]]).collect()
29
+ }
30
+ _ => Vec::new(),
31
+ };
32
+
33
+ let marker_positions = if markers.is_empty() {
34
+ Vec::new()
35
+ } else {
36
+ let markers = Arc::new(markers);
37
+ let ch = channels as usize;
38
+ if ch < 3 || buf.len() < 3 {
39
+ Vec::new()
40
+ } else {
41
+ let pixel_count = buf.len() / ch;
42
+ (0..pixel_count)
43
+ .into_par_iter()
44
+ .filter_map(|i| {
45
+ let base = i * ch;
46
+ if base + 3 > buf.len() {
47
+ return None;
48
+ }
49
+ for m in markers.iter() {
50
+ if buf[base] == m[0] && buf[base + 1] == m[1] && buf[base + 2] == m[2] {
51
+ return Some(i as u32);
52
+ }
53
+ }
54
+ None
55
+ })
56
+ .collect()
57
+ }
58
+ };
59
+
60
+ PlainScanResult { marker_positions, magic_positions }
61
+ }
62
+
63
+ pub fn crc32_bytes(buf: &[u8]) -> u32 {
64
+ // parallelize checksum on large buffers, since crc32fast::hash is single-threaded
65
+ const PAR_THRESHOLD: usize = 4 * 1024 * 1024; // 4 MiB
66
+ if buf.len() < PAR_THRESHOLD {
67
+ crc32fast::hash(buf)
68
+ } else {
69
+ // compute per-chunk hasher in parallel then combine
70
+ let chunk = PAR_THRESHOLD;
71
+ let combined = buf
72
+ .par_chunks(chunk)
73
+ .map(|chunk| {
74
+ let mut h = crc32fast::Hasher::new();
75
+ h.update(chunk);
76
+ h
77
+ })
78
+ .reduce(|| crc32fast::Hasher::new(), |mut a, b| {
79
+ a.combine(&b);
80
+ a
81
+ });
82
+ combined.finalize()
83
+ }
84
+ }
85
+
86
+ pub fn adler32_bytes(buf: &[u8]) -> u32 {
87
+ const MOD: u32 = 65521;
88
+ let mut a: u32 = 1;
89
+ let mut b: u32 = 0;
90
+ for &v in buf {
91
+ a = (a + v as u32) % MOD;
92
+ b = (b + a) % MOD;
93
+ }
94
+ (b << 16) | a
95
+ }
96
+
97
+ pub fn delta_encode_bytes(buf: &[u8]) -> Vec<u8> {
98
+ let len = buf.len();
99
+ if len == 0 {
100
+ return Vec::new();
101
+ }
102
+ let mut out = vec![0u8; len];
103
+ out[0] = buf[0];
104
+ for i in 1..len {
105
+ out[i] = buf[i].wrapping_sub(buf[i - 1]);
106
+ }
107
+ out
108
+ }
109
+
110
+ pub fn delta_decode_bytes(buf: &[u8]) -> Vec<u8> {
111
+ let len = buf.len();
112
+ if len == 0 {
113
+ return Vec::new();
114
+ }
115
+ let mut out = vec![0u8; len];
116
+ out[0] = buf[0];
117
+ for i in 1..len {
118
+ out[i] = out[i - 1].wrapping_add(buf[i]);
119
+ }
120
+ out
121
+ }
122
+
123
+ fn compress_with_chunk_size(buf: &[u8], level: i32, chunk_size: usize) -> std::result::Result<Vec<u8>, String> {
124
+ use std::io::Write;
125
+
126
+ // Allow ultra levels (20-22) for maximum compression
127
+ let actual_level = level.min(22).max(1);
128
+ let mut encoder = zstd::stream::Encoder::new(Vec::new(), actual_level)
129
+ .map_err(|e| format!("zstd encoder init error: {}", e))?;
130
+
131
+ let threads = num_cpus::get() as u32;
132
+ if threads > 1 {
133
+ // Ultra levels (>=20) use much more memory per thread; limit to 4
134
+ let max_threads = if actual_level >= 20 { threads.min(4) } else { threads };
135
+ let _ = encoder.multithread(max_threads);
136
+ }
137
+
138
+ if buf.len() > 1024 * 1024 {
139
+ let _ = encoder.long_distance_matching(true);
140
+ let wlog = if buf.len() > 512 * 1024 * 1024 { 28 }
141
+ else if buf.len() > 64 * 1024 * 1024 { 27 }
142
+ else { 26 };
143
+ let _ = encoder.window_log(wlog);
144
+ }
145
+
146
+ let _ = encoder.set_pledged_src_size(Some(buf.len() as u64));
147
+
148
+ for chunk in buf.chunks(chunk_size) {
149
+ encoder.write_all(chunk).map_err(|e| format!("zstd write error: {}", e))?;
150
+ }
151
+
152
+ encoder.finish().map_err(|e| format!("zstd finish error: {}", e))
153
+ }
154
+
155
+ pub fn train_zstd_dictionary(sample_paths: &[PathBuf], dict_size: usize) -> Result<Vec<u8>> {
156
+ // load all sample files contiguously
157
+ let mut samples = Vec::new();
158
+ let mut lengths = Vec::new();
159
+ for path in sample_paths {
160
+ let data = std::fs::read(path)?;
161
+ lengths.push(data.len());
162
+ samples.extend_from_slice(&data);
163
+ }
164
+ let dict = zstd::dict::from_continuous(&samples, &lengths, dict_size)?;
165
+ Ok(dict)
166
+ }
167
+
168
+ /// Compress a slice with optional zstd dictionary.
169
+ ///
170
+ /// When `dict` is `Some`, the dictionary is passed to the encoder (same
171
+ /// dict required for decompression). Pass `None` for normal compression.
172
+ ///
173
+ /// For large buffers (>50 MiB) without a dictionary, multiple chunk sizes
174
+ /// are benchmarked on a sample and the best is selected automatically.
175
+ pub fn zstd_compress_bytes(buf: &[u8], level: i32, dict: Option<&[u8]>) -> std::result::Result<Vec<u8>, String> {
176
+ use std::io::Write;
177
+
178
+ let actual_level = level.min(22).max(1);
179
+ let mut encoder = if let Some(d) = dict {
180
+ zstd::stream::Encoder::with_dictionary(Vec::new(), actual_level, d)
181
+ .map_err(|e| format!("zstd encoder init error: {}", e))?
182
+ } else {
183
+ zstd::stream::Encoder::new(Vec::new(), actual_level)
184
+ .map_err(|e| format!("zstd encoder init error: {}", e))?
185
+ };
186
+
187
+ let threads = num_cpus::get() as u32;
188
+ if threads > 1 {
189
+ let max_threads = if actual_level >= 20 { threads.min(4) } else { threads };
190
+ let _ = encoder.multithread(max_threads);
191
+ }
192
+
193
+ if buf.len() > 1024 * 1024 {
194
+ let _ = encoder.long_distance_matching(true);
195
+ let wlog = if buf.len() > 512 * 1024 * 1024 { 28 }
196
+ else if buf.len() > 64 * 1024 * 1024 { 27 }
197
+ else { 26 };
198
+ let _ = encoder.window_log(wlog);
199
+ }
200
+
201
+ let _ = encoder.set_pledged_src_size(Some(buf.len() as u64));
202
+
203
+ encoder.write_all(buf).map_err(|e| format!("zstd write error: {}", e))?;
204
+ encoder.finish().map_err(|e| format!("zstd finish error: {}", e))
205
+ }
206
+
207
+ pub fn zstd_decompress_bytes(buf: &[u8], dict: Option<&[u8]>) -> std::result::Result<Vec<u8>, String> {
208
+ use std::io::Read;
209
+ if let Some(d) = dict {
210
+ let mut decoder = zstd::stream::Decoder::with_dictionary(std::io::Cursor::new(buf), d)
211
+ .map_err(|e| format!("zstd decoder init error: {}", e))?;
212
+ let mut out = Vec::new();
213
+ decoder.read_to_end(&mut out).map_err(|e| format!("zstd decompress error: {}", e))?;
214
+ Ok(out)
215
+ } else {
216
+ zstd::stream::decode_all(buf).map_err(|e| format!("zstd decompress error: {}", e))
217
+ }
218
+ }
219
+
220
+
221
+ #[cfg(test)]
222
+ mod tests {
223
+ use super::*;
224
+
225
+ #[test]
226
+ fn test_scan_magic() {
227
+ let data = b"xxxxROX1yyyyROX1".to_vec();
228
+ let res = scan_pixels_bytes(&data, 3, None);
229
+ assert_eq!(res.magic_positions.len(), 2);
230
+ }
231
+
232
+ #[test]
233
+ fn test_markers() {
234
+ let pixels = vec![1u8,2,3, 4,5,6, 1,2,3];
235
+ let markers_vec = vec![1u8,2,3];
236
+ let res = scan_pixels_bytes(&pixels, 3, Some(&markers_vec));
237
+ assert_eq!(res.marker_positions, vec![0,2]);
238
+ }
239
+
240
+ #[test]
241
+ fn test_train_dictionary() {
242
+ use std::fs::{write, create_dir_all};
243
+ let td = std::env::temp_dir().join("rox_dict_test");
244
+ let _ = create_dir_all(&td);
245
+ let f1 = td.join("a.bin");
246
+ let f2 = td.join("b.bin");
247
+ // produce 1 MiB of repeated data per file
248
+ let big = vec![0xABu8; 1024 * 1024];
249
+ write(&f1, &big).unwrap();
250
+ write(&f2, &big).unwrap();
251
+ // choose dictionary size 16 KiB (far below total sample size ≈2 MiB)
252
+ match train_zstd_dictionary(&[f1.clone(), f2.clone()], 16 * 1024) {
253
+ Ok(dict) => {
254
+ assert!(dict.len() <= 16 * 1024);
255
+ assert!(!dict.is_empty());
256
+ }
257
+ Err(e) => {
258
+ // dictionary training may fail due to insufficient or unsuitable samples;
259
+ // ensure error string is nonempty to catch panics
260
+ assert!(!e.to_string().is_empty());
261
+ }
262
+ }
263
+ }
264
+
265
+ #[test]
266
+ fn test_delta_roundtrip() {
267
+ let data = vec![10u8, 20, 30, 40, 250];
268
+ let enc = delta_encode_bytes(&data);
269
+ let dec = delta_decode_bytes(&enc);
270
+ assert_eq!(dec, data);
271
+ }
272
+
273
+ #[test]
274
+ fn test_crc_adler() {
275
+ let data = b"hello".to_vec();
276
+ assert_eq!(crc32_bytes(&data), crc32fast::hash(&data));
277
+ assert_eq!(adler32_bytes(&data), adler32_bytes(&data));
278
+
279
+ // also test large buffer triggers parallel branch
280
+ let big = vec![0xAAu8; 5 * 1024 * 1024];
281
+ assert_eq!(crc32_bytes(&big), crc32fast::hash(&big));
282
+ }
283
+
284
+ #[test]
285
+ fn test_zstd_dict_roundtrip() {
286
+ let data = b"this is some test data that repeats. ".repeat(1000);
287
+ // simple dictionary containing a substring
288
+ let dict = b"test data";
289
+ let compressed = zstd_compress_bytes(&data, 3, Some(dict)).expect("compress");
290
+ let decompressed = zstd_decompress_bytes(&compressed, Some(dict)).expect("decompress");
291
+ assert_eq!(decompressed, data);
292
+ }
293
+ }
@@ -0,0 +1,119 @@
1
+ use anyhow::{anyhow, Result};
2
+ use aes_gcm::{
3
+ aead::{Aead, KeyInit},
4
+ Aes256Gcm, Nonce,
5
+ };
6
+ use pbkdf2::pbkdf2_hmac;
7
+ use rand::RngCore;
8
+ use sha2::Sha256;
9
+
10
+ const ENC_NONE: u8 = 0x00;
11
+ const ENC_AES: u8 = 0x01;
12
+ const ENC_XOR: u8 = 0x02;
13
+ const PBKDF2_ITERS: u32 = 1_000_000;
14
+
15
+ pub fn encrypt_xor(data: &[u8], passphrase: &str) -> Vec<u8> {
16
+ let key = passphrase.as_bytes();
17
+ let mut result = Vec::with_capacity(1 + data.len());
18
+ result.push(ENC_XOR);
19
+
20
+ for (i, &byte) in data.iter().enumerate() {
21
+ result.push(byte ^ key[i % key.len()]);
22
+ }
23
+
24
+ result
25
+ }
26
+
27
+ pub fn encrypt_aes(data: &[u8], passphrase: &str) -> Result<Vec<u8>> {
28
+ let mut salt = [0u8; 16];
29
+ rand::thread_rng().fill_bytes(&mut salt);
30
+
31
+ let mut key = [0u8; 32];
32
+ pbkdf2_hmac::<Sha256>(passphrase.as_bytes(), &salt, PBKDF2_ITERS, &mut key);
33
+
34
+ let cipher = Aes256Gcm::new_from_slice(&key)
35
+ .map_err(|e| anyhow::anyhow!("Failed to create cipher: {}", e))?;
36
+
37
+ let mut iv = [0u8; 12];
38
+ rand::thread_rng().fill_bytes(&mut iv);
39
+ let nonce = Nonce::from_slice(&iv);
40
+
41
+ let ciphertext = cipher
42
+ .encrypt(nonce, data)
43
+ .map_err(|e| anyhow::anyhow!("Encryption failed: {}", e))?;
44
+
45
+ let cipher_len = ciphertext.len();
46
+ if cipher_len < 16 {
47
+ return Err(anyhow::anyhow!("Ciphertext too short"));
48
+ }
49
+
50
+ let tag = &ciphertext[cipher_len - 16..];
51
+ let encrypted_data = &ciphertext[..cipher_len - 16];
52
+
53
+ let mut result = Vec::with_capacity(1 + 16 + 12 + 16 + encrypted_data.len());
54
+ result.push(ENC_AES);
55
+ result.extend_from_slice(&salt);
56
+ result.extend_from_slice(&iv);
57
+ result.extend_from_slice(tag);
58
+ result.extend_from_slice(encrypted_data);
59
+
60
+ Ok(result)
61
+ }
62
+
63
+ pub fn no_encryption(data: &[u8]) -> Vec<u8> {
64
+ let mut result = Vec::with_capacity(1 + data.len());
65
+ result.push(ENC_NONE);
66
+ result.extend_from_slice(data);
67
+ result
68
+ }
69
+
70
+ pub fn decrypt_xor(data: &[u8], passphrase: &str) -> Result<Vec<u8>> {
71
+ if data.is_empty() { return Err(anyhow!("Empty xor payload")); }
72
+ if passphrase.is_empty() { return Err(anyhow!("Passphrase required")); }
73
+ let key = passphrase.as_bytes();
74
+ let mut out = Vec::with_capacity(data.len());
75
+ for (i, &b) in data.iter().enumerate() {
76
+ out.push(b ^ key[i % key.len()]);
77
+ }
78
+ Ok(out)
79
+ }
80
+
81
+ pub fn decrypt_aes(data: &[u8], passphrase: &str) -> Result<Vec<u8>> {
82
+ if data.len() < 1 + 16 + 12 + 16 { return Err(anyhow!("Invalid AES payload length")); }
83
+ let salt = &data[1..17];
84
+ let iv = &data[17..29];
85
+ let tag = &data[29..45];
86
+ let enc = &data[45..];
87
+
88
+ let mut key = [0u8; 32];
89
+ pbkdf2_hmac::<Sha256>(passphrase.as_bytes(), salt, PBKDF2_ITERS, &mut key);
90
+
91
+ let cipher = Aes256Gcm::new_from_slice(&key)
92
+ .map_err(|e| anyhow!("Failed to create cipher: {}", e))?;
93
+
94
+ let mut combined = Vec::with_capacity(enc.len() + tag.len());
95
+ combined.extend_from_slice(enc);
96
+ combined.extend_from_slice(tag);
97
+
98
+ let nonce = Nonce::from_slice(iv);
99
+ let decrypted = cipher.decrypt(nonce, combined.as_ref())
100
+ .map_err(|e| anyhow!("AES decryption failed: {}", e))?;
101
+ Ok(decrypted)
102
+ }
103
+
104
+ pub fn try_decrypt(buf: &[u8], passphrase: Option<&str>) -> Result<Vec<u8>> {
105
+ if buf.is_empty() { return Err(anyhow!("Empty buffer")); }
106
+ let flag = buf[0];
107
+ match flag {
108
+ ENC_NONE => Ok(buf[1..].to_vec()),
109
+ ENC_XOR => {
110
+ let pass = passphrase.ok_or_else(|| anyhow!("Passphrase required for XOR decryption"))?;
111
+ decrypt_xor(&buf[1..], pass)
112
+ }
113
+ ENC_AES => {
114
+ let pass = passphrase.ok_or_else(|| anyhow!("Passphrase required for AES decryption"))?;
115
+ decrypt_aes(buf, pass)
116
+ }
117
+ _ => Err(anyhow!("Unknown encryption flag: {}", flag)),
118
+ }
119
+ }