roxify 1.10.1 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "roxify_native"
3
- version = "1.10.1"
3
+ version = "1.11.0"
4
4
  edition = "2021"
5
5
  publish = false
6
6
 
@@ -1,5 +1,3 @@
1
- use rayon::prelude::*;
2
-
3
1
  #[derive(Clone, Copy, Debug)]
4
2
  pub struct ProbabilityEstimate {
5
3
  pub p0: u32,
@@ -91,26 +89,25 @@ impl ContextMixer {
91
89
  }
92
90
 
93
91
  pub fn analyze_entropy(data: &[u8]) -> f32 {
94
- let freq: Vec<u32> = {
95
- let mut f = vec![0u32; 256];
96
- for &byte in data {
97
- f[byte as usize] += 1;
98
- }
99
- f
100
- };
92
+ let mut freq = [0u32; 256];
93
+ for &byte in data {
94
+ freq[byte as usize] += 1;
95
+ }
101
96
 
102
- let total: u32 = freq.iter().sum();
103
- if total == 0 {
97
+ let total = data.len() as f32;
98
+ if total == 0.0 {
104
99
  return 0.0;
105
100
  }
106
101
 
107
- freq.par_iter()
108
- .filter(|&&f| f > 0)
109
- .map(|&f| {
110
- let p = (f as f32) / (total as f32);
111
- -p * p.log2()
112
- })
113
- .sum()
102
+ let inv_total = 1.0 / total;
103
+ let mut entropy = 0.0f32;
104
+ for &f in &freq {
105
+ if f > 0 {
106
+ let p = f as f32 * inv_total;
107
+ entropy -= p * p.log2();
108
+ }
109
+ }
110
+ entropy
114
111
  }
115
112
 
116
113
  pub fn estimate_compression_gain(original: &[u8], entropy_bits: f32) -> f64 {
package/native/hybrid.rs CHANGED
@@ -5,7 +5,14 @@ use crate::mtf::{mtf_encode, mtf_decode, rle0_encode, rle0_decode};
5
5
  use crate::rans_byte::{SymbolStats, rans_encode_block, rans_decode_block};
6
6
  use crate::context_mixing::analyze_entropy;
7
7
 
8
- const BLOCK_SIZE: usize = 1024 * 1024;
8
+ const BLOCK_SIZE: usize = 256 * 1024;
9
+
10
+ const BLOCK_FLAG_BWT: u8 = 0;
11
+ const BLOCK_FLAG_ZSTD: u8 = 1;
12
+ const BLOCK_FLAG_STORE: u8 = 2;
13
+
14
+ const ENTROPY_THRESHOLD_STORE: f32 = 7.95;
15
+ const ENTROPY_THRESHOLD_ZSTD: f32 = 7.5;
9
16
 
10
17
  #[derive(Clone, Debug)]
11
18
  pub struct CompressionStats {
@@ -29,19 +36,24 @@ impl HybridCompressor {
29
36
 
30
37
  pub fn compress(&self, data: &[u8]) -> Result<(Vec<u8>, CompressionStats)> {
31
38
  let original_size = data.len() as u64;
32
- let entropy = analyze_entropy(data);
33
39
 
34
40
  let blocks: Vec<&[u8]> = data.chunks(self.block_size).collect();
35
41
  let blocks_count = blocks.len();
36
42
 
37
43
  let compressed_blocks: Vec<Vec<u8>> = blocks
38
- .into_iter()
44
+ .par_iter()
39
45
  .map(|block| compress_block(block))
40
46
  .collect::<Result<Vec<_>, _>>()?;
41
47
 
48
+ let entropy = if data.len() > 4096 {
49
+ analyze_entropy(&data[..4096.min(data.len())])
50
+ } else {
51
+ analyze_entropy(data)
52
+ };
53
+
42
54
  let total_compressed: usize = compressed_blocks.iter().map(|b| b.len() + 4).sum();
43
55
  let mut result = Vec::with_capacity(16 + total_compressed);
44
- result.extend_from_slice(b"RBW1");
56
+ result.extend_from_slice(b"RBW2");
45
57
  result.extend_from_slice(&(blocks_count as u32).to_le_bytes());
46
58
  result.extend_from_slice(&original_size.to_le_bytes());
47
59
 
@@ -67,7 +79,9 @@ impl HybridCompressor {
67
79
  return Err(anyhow::anyhow!("Invalid compressed data"));
68
80
  }
69
81
 
70
- if &data[0..4] != b"RBW1" {
82
+ let magic = &data[0..4];
83
+ let v2 = magic == b"RBW2";
84
+ if magic != b"RBW1" && !v2 {
71
85
  return Err(anyhow::anyhow!("Invalid magic"));
72
86
  }
73
87
 
@@ -78,7 +92,7 @@ impl HybridCompressor {
78
92
  ]) as usize;
79
93
 
80
94
  let mut pos = 16;
81
- let mut block_ranges: Vec<(usize, usize)> = Vec::with_capacity(blocks_count);
95
+ let mut block_slices: Vec<&[u8]> = Vec::with_capacity(blocks_count);
82
96
 
83
97
  for _ in 0..blocks_count {
84
98
  if pos + 4 > data.len() {
@@ -91,15 +105,18 @@ impl HybridCompressor {
91
105
  if pos + block_size > data.len() {
92
106
  return Err(anyhow::anyhow!("Truncated block data"));
93
107
  }
94
- block_ranges.push((pos, block_size));
108
+ block_slices.push(&data[pos..pos + block_size]);
95
109
  pos += block_size;
96
110
  }
97
111
 
98
- let decompressed_blocks: Vec<Vec<u8>> = block_ranges
99
- .into_iter()
100
- .map(|(start, size)| {
101
- let block_data = &data[start..start + size];
102
- decompress_block(block_data)
112
+ let decompressed_blocks: Vec<Vec<u8>> = block_slices
113
+ .par_iter()
114
+ .map(|block_data| {
115
+ if v2 {
116
+ decompress_block_v2(block_data)
117
+ } else {
118
+ decompress_block_v1(block_data)
119
+ }
103
120
  })
104
121
  .collect::<Result<Vec<_>, _>>()?;
105
122
 
@@ -121,31 +138,121 @@ impl HybridCompressor {
121
138
 
122
139
  fn compress_block(block: &[u8]) -> Result<Vec<u8>> {
123
140
  if block.is_empty() {
124
- return Ok(Vec::new());
141
+ return Ok(vec![BLOCK_FLAG_STORE]);
142
+ }
143
+
144
+ let entropy = analyze_entropy(block);
145
+
146
+ if entropy >= ENTROPY_THRESHOLD_STORE {
147
+ let mut result = Vec::with_capacity(1 + block.len());
148
+ result.push(BLOCK_FLAG_STORE);
149
+ result.extend_from_slice(block);
150
+ return Ok(result);
151
+ }
152
+
153
+ if entropy >= ENTROPY_THRESHOLD_ZSTD {
154
+ let compressed = zstd::encode_all(block, 1)?;
155
+ if compressed.len() < block.len() {
156
+ let mut result = Vec::with_capacity(1 + 4 + compressed.len());
157
+ result.push(BLOCK_FLAG_ZSTD);
158
+ result.extend_from_slice(&(block.len() as u32).to_le_bytes());
159
+ result.extend_from_slice(&compressed);
160
+ return Ok(result);
161
+ }
162
+ let mut result = Vec::with_capacity(1 + block.len());
163
+ result.push(BLOCK_FLAG_STORE);
164
+ result.extend_from_slice(block);
165
+ return Ok(result);
125
166
  }
126
167
 
127
168
  let bwt = bwt_encode(block)?;
128
169
  let mtf_data = mtf_encode(&bwt.transformed);
129
170
  let rle_data = rle0_encode(&mtf_data);
130
-
131
171
  let stats = SymbolStats::from_data(&rle_data);
132
172
  let encoded = rans_encode_block(&rle_data, &stats);
133
-
134
173
  let stats_bytes = stats.serialize();
135
- let rle_len = rle_data.len() as u32;
136
- let orig_len = block.len() as u32;
137
174
 
138
- let mut result = Vec::with_capacity(4 + 4 + 4 + stats_bytes.len() + encoded.len());
139
- result.extend_from_slice(&bwt.primary_index.to_le_bytes());
140
- result.extend_from_slice(&orig_len.to_le_bytes());
141
- result.extend_from_slice(&rle_len.to_le_bytes());
142
- result.extend_from_slice(&stats_bytes);
143
- result.extend_from_slice(&encoded);
175
+ let bwt_total = 1 + 4 + 4 + 4 + stats_bytes.len() + encoded.len();
176
+
177
+ if bwt_total < block.len() {
178
+ let zstd_compressed = zstd::encode_all(block, 3)?;
179
+ let zstd_total = 1 + 4 + zstd_compressed.len();
144
180
 
181
+ if zstd_total < bwt_total {
182
+ let mut result = Vec::with_capacity(zstd_total);
183
+ result.push(BLOCK_FLAG_ZSTD);
184
+ result.extend_from_slice(&(block.len() as u32).to_le_bytes());
185
+ result.extend_from_slice(&zstd_compressed);
186
+ return Ok(result);
187
+ }
188
+
189
+ let mut result = Vec::with_capacity(bwt_total);
190
+ result.push(BLOCK_FLAG_BWT);
191
+ result.extend_from_slice(&bwt.primary_index.to_le_bytes());
192
+ result.extend_from_slice(&(block.len() as u32).to_le_bytes());
193
+ result.extend_from_slice(&(rle_data.len() as u32).to_le_bytes());
194
+ result.extend_from_slice(&stats_bytes);
195
+ result.extend_from_slice(&encoded);
196
+ return Ok(result);
197
+ }
198
+
199
+ let zstd_compressed = zstd::encode_all(block, 3)?;
200
+ if 1 + 4 + zstd_compressed.len() < block.len() {
201
+ let mut result = Vec::with_capacity(1 + 4 + zstd_compressed.len());
202
+ result.push(BLOCK_FLAG_ZSTD);
203
+ result.extend_from_slice(&(block.len() as u32).to_le_bytes());
204
+ result.extend_from_slice(&zstd_compressed);
205
+ return Ok(result);
206
+ }
207
+
208
+ let mut result = Vec::with_capacity(1 + block.len());
209
+ result.push(BLOCK_FLAG_STORE);
210
+ result.extend_from_slice(block);
145
211
  Ok(result)
146
212
  }
147
213
 
148
- fn decompress_block(block: &[u8]) -> Result<Vec<u8>> {
214
+ fn decompress_block_v2(block: &[u8]) -> Result<Vec<u8>> {
215
+ if block.is_empty() {
216
+ return Err(anyhow::anyhow!("Empty block"));
217
+ }
218
+
219
+ match block[0] {
220
+ BLOCK_FLAG_STORE => Ok(block[1..].to_vec()),
221
+ BLOCK_FLAG_ZSTD => {
222
+ if block.len() < 5 {
223
+ return Err(anyhow::anyhow!("Truncated zstd block"));
224
+ }
225
+ let orig_len = u32::from_le_bytes([block[1], block[2], block[3], block[4]]) as usize;
226
+ let mut decoded = zstd::decode_all(&block[5..])?;
227
+ decoded.truncate(orig_len);
228
+ Ok(decoded)
229
+ }
230
+ BLOCK_FLAG_BWT => {
231
+ if block.len() < 13 {
232
+ return Err(anyhow::anyhow!("Truncated BWT block"));
233
+ }
234
+ let primary_index = u32::from_le_bytes([block[1], block[2], block[3], block[4]]);
235
+ let orig_len = u32::from_le_bytes([block[5], block[6], block[7], block[8]]) as usize;
236
+ let rle_len = u32::from_le_bytes([block[9], block[10], block[11], block[12]]) as usize;
237
+
238
+ let (stats, stats_size) = SymbolStats::deserialize(&block[13..])?;
239
+ let encoded = &block[13 + stats_size..];
240
+
241
+ let rle_data = rans_decode_block(encoded, &stats, rle_len)?;
242
+ let mtf_data = rle0_decode(&rle_data);
243
+ let bwt_data = mtf_decode(&mtf_data);
244
+ let original = bwt_decode(&bwt_data, primary_index)?;
245
+
246
+ if original.len() != orig_len {
247
+ return Err(anyhow::anyhow!("Size mismatch"));
248
+ }
249
+ Ok(original)
250
+ }
251
+ _ => Err(anyhow::anyhow!("Unknown block type: {}", block[0])),
252
+ }
253
+ }
254
+
255
+ fn decompress_block_v1(block: &[u8]) -> Result<Vec<u8>> {
149
256
  if block.len() < 12 {
150
257
  return Err(anyhow::anyhow!("Block too small"));
151
258
  }
@@ -163,22 +270,18 @@ fn decompress_block(block: &[u8]) -> Result<Vec<u8>> {
163
270
  let original = bwt_decode(&bwt_data, primary_index)?;
164
271
 
165
272
  if original.len() != orig_len {
166
- return Err(anyhow::anyhow!(
167
- "Size mismatch: expected {}, got {}",
168
- orig_len,
169
- original.len()
170
- ));
273
+ return Err(anyhow::anyhow!("Size mismatch"));
171
274
  }
172
275
 
173
276
  Ok(original)
174
277
  }
175
278
 
176
279
  pub fn compress_high_performance(data: &[u8]) -> Result<(Vec<u8>, CompressionStats)> {
177
- let compressor = HybridCompressor::new(false, 4);
280
+ let compressor = HybridCompressor::new(false, 0);
178
281
  compressor.compress(data)
179
282
  }
180
283
 
181
284
  pub fn decompress_high_performance(data: &[u8]) -> Result<Vec<u8>> {
182
- let compressor = HybridCompressor::new(false, 4);
285
+ let compressor = HybridCompressor::new(false, 0);
183
286
  compressor.decompress(data)
184
287
  }
@@ -106,10 +106,10 @@ impl SymbolStats {
106
106
  }
107
107
  }
108
108
 
109
+ #[inline(always)]
109
110
  fn rans_enc_put(state: &mut u32, buf: &mut Vec<u8>, start: u32, freq: u32) {
110
- let x = *state;
111
111
  let x_max = ((RANS_BYTE_L >> PROB_BITS) << 8) * freq;
112
- let mut x = x;
112
+ let mut x = *state;
113
113
  while x >= x_max {
114
114
  buf.push((x & 0xFF) as u8);
115
115
  x >>= 8;
@@ -117,7 +117,22 @@ fn rans_enc_put(state: &mut u32, buf: &mut Vec<u8>, start: u32, freq: u32) {
117
117
  *state = ((x / freq) << PROB_BITS) + (x % freq) + start;
118
118
  }
119
119
 
120
- fn rans_dec_init(data: &[u8], pos: &mut usize) -> u32 {
120
+ #[inline(always)]
121
+ fn rans_dec_renorm(state: &mut u32, data: &[u8], pos: &mut usize) {
122
+ while *state < RANS_BYTE_L && *pos < data.len() {
123
+ *state = (*state << 8) | (data[*pos] as u32);
124
+ *pos += 1;
125
+ }
126
+ }
127
+
128
+ fn write_state(out: &mut Vec<u8>, state: u32) {
129
+ out.push((state >> 24) as u8);
130
+ out.push(((state >> 16) & 0xFF) as u8);
131
+ out.push(((state >> 8) & 0xFF) as u8);
132
+ out.push((state & 0xFF) as u8);
133
+ }
134
+
135
+ fn read_state(data: &[u8], pos: &mut usize) -> u32 {
121
136
  let s = (data[*pos] as u32) << 24
122
137
  | (data[*pos + 1] as u32) << 16
123
138
  | (data[*pos + 2] as u32) << 8
@@ -126,18 +141,48 @@ fn rans_dec_init(data: &[u8], pos: &mut usize) -> u32 {
126
141
  s
127
142
  }
128
143
 
129
- fn rans_dec_renorm(state: &mut u32, data: &[u8], pos: &mut usize) {
130
- while *state < RANS_BYTE_L && *pos < data.len() {
131
- *state = (*state << 8) | (data[*pos] as u32);
132
- *pos += 1;
133
- }
134
- }
135
-
136
144
  pub fn rans_encode_block(data: &[u8], stats: &SymbolStats) -> Vec<u8> {
137
145
  if data.is_empty() {
138
146
  return Vec::new();
139
147
  }
140
148
 
149
+ if data.len() < 8 {
150
+ return rans_encode_single(data, stats);
151
+ }
152
+
153
+ let mut s0: u32 = RANS_BYTE_L;
154
+ let mut s1: u32 = RANS_BYTE_L;
155
+ let mut rev_bytes: Vec<u8> = Vec::with_capacity(data.len() + 32);
156
+
157
+ let len = data.len();
158
+ let even_start = if len % 2 == 0 { len } else { len - 1 };
159
+
160
+ if len % 2 != 0 {
161
+ let sym = data[len - 1] as usize;
162
+ rans_enc_put(&mut s1, &mut rev_bytes, stats.cum_freqs[sym], stats.freqs[sym]);
163
+ }
164
+
165
+ let mut i = even_start;
166
+ while i >= 2 {
167
+ i -= 2;
168
+ let sym1 = data[i + 1] as usize;
169
+ rans_enc_put(&mut s1, &mut rev_bytes, stats.cum_freqs[sym1], stats.freqs[sym1]);
170
+ let sym0 = data[i] as usize;
171
+ rans_enc_put(&mut s0, &mut rev_bytes, stats.cum_freqs[sym0], stats.freqs[sym0]);
172
+ }
173
+
174
+ let mut output = Vec::with_capacity(9 + rev_bytes.len());
175
+ output.push(1);
176
+ write_state(&mut output, s0);
177
+ write_state(&mut output, s1);
178
+
179
+ for &b in rev_bytes.iter().rev() {
180
+ output.push(b);
181
+ }
182
+ output
183
+ }
184
+
185
+ fn rans_encode_single(data: &[u8], stats: &SymbolStats) -> Vec<u8> {
141
186
  let mut state: u32 = RANS_BYTE_L;
142
187
  let mut rev_bytes: Vec<u8> = Vec::with_capacity(data.len() + 16);
143
188
 
@@ -146,11 +191,9 @@ pub fn rans_encode_block(data: &[u8], stats: &SymbolStats) -> Vec<u8> {
146
191
  rans_enc_put(&mut state, &mut rev_bytes, stats.cum_freqs[s], stats.freqs[s]);
147
192
  }
148
193
 
149
- let mut output = Vec::with_capacity(4 + rev_bytes.len());
150
- output.push((state >> 24) as u8);
151
- output.push(((state >> 16) & 0xFF) as u8);
152
- output.push(((state >> 8) & 0xFF) as u8);
153
- output.push((state & 0xFF) as u8);
194
+ let mut output = Vec::with_capacity(5 + rev_bytes.len());
195
+ output.push(0);
196
+ write_state(&mut output, state);
154
197
 
155
198
  for &b in rev_bytes.iter().rev() {
156
199
  output.push(b);
@@ -159,19 +202,30 @@ pub fn rans_encode_block(data: &[u8], stats: &SymbolStats) -> Vec<u8> {
159
202
  }
160
203
 
161
204
  pub fn rans_decode_block(encoded: &[u8], stats: &SymbolStats, output_len: usize) -> Result<Vec<u8>> {
162
- if encoded.len() < 4 {
205
+ if encoded.is_empty() {
163
206
  return Err(anyhow::anyhow!("Data too short"));
164
207
  }
165
208
 
166
- let mut cum2sym = vec![0u8; PROB_SCALE as usize];
209
+ let mut cum2sym = [0u8; PROB_SCALE as usize];
167
210
  for s in 0..256usize {
168
- for slot in (stats.cum_freqs[s] as usize)..(stats.cum_freqs[s + 1] as usize) {
169
- cum2sym[slot] = s as u8;
211
+ let start = stats.cum_freqs[s] as usize;
212
+ let end = stats.cum_freqs[s + 1] as usize;
213
+ if end > start {
214
+ cum2sym[start..end].fill(s as u8);
170
215
  }
171
216
  }
172
217
 
173
- let mut pos = 0usize;
174
- let mut state = rans_dec_init(encoded, &mut pos);
218
+ let mode = encoded[0];
219
+ let mut pos = 1usize;
220
+
221
+ if mode == 1 && output_len >= 8 {
222
+ return rans_decode_interleaved(encoded, &cum2sym, stats, output_len, &mut pos);
223
+ }
224
+
225
+ if pos + 4 > encoded.len() {
226
+ return Err(anyhow::anyhow!("Data too short"));
227
+ }
228
+ let mut state = read_state(encoded, &mut pos);
175
229
  let mut output = Vec::with_capacity(output_len);
176
230
 
177
231
  for _ in 0..output_len {
@@ -188,3 +242,45 @@ pub fn rans_decode_block(encoded: &[u8], stats: &SymbolStats, output_len: usize)
188
242
 
189
243
  Ok(output)
190
244
  }
245
+
246
+ fn rans_decode_interleaved(
247
+ encoded: &[u8],
248
+ cum2sym: &[u8; PROB_SCALE as usize],
249
+ stats: &SymbolStats,
250
+ output_len: usize,
251
+ pos: &mut usize,
252
+ ) -> Result<Vec<u8>> {
253
+ if *pos + 8 > encoded.len() {
254
+ return Err(anyhow::anyhow!("Data too short for interleaved"));
255
+ }
256
+ let mut s0 = read_state(encoded, pos);
257
+ let mut s1 = read_state(encoded, pos);
258
+ let mut output = Vec::with_capacity(output_len);
259
+
260
+ let pairs = output_len / 2;
261
+ for _ in 0..pairs {
262
+ let slot0 = s0 & (PROB_SCALE - 1);
263
+ let sym0 = cum2sym[slot0 as usize];
264
+ output.push(sym0);
265
+ let freq0 = stats.freqs[sym0 as usize];
266
+ let start0 = stats.cum_freqs[sym0 as usize];
267
+ s0 = freq0 * (s0 >> PROB_BITS) + slot0 - start0;
268
+ rans_dec_renorm(&mut s0, encoded, pos);
269
+
270
+ let slot1 = s1 & (PROB_SCALE - 1);
271
+ let sym1 = cum2sym[slot1 as usize];
272
+ output.push(sym1);
273
+ let freq1 = stats.freqs[sym1 as usize];
274
+ let start1 = stats.cum_freqs[sym1 as usize];
275
+ s1 = freq1 * (s1 >> PROB_BITS) + slot1 - start1;
276
+ rans_dec_renorm(&mut s1, encoded, pos);
277
+ }
278
+
279
+ if output_len % 2 != 0 {
280
+ let slot = s1 & (PROB_SCALE - 1);
281
+ let sym = cum2sym[slot as usize];
282
+ output.push(sym);
283
+ }
284
+
285
+ Ok(output)
286
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "roxify",
3
- "version": "1.10.1",
3
+ "version": "1.11.0",
4
4
  "type": "module",
5
5
  "description": "Ultra-lightweight PNG steganography with native Rust acceleration. Encode binary data into PNG images with zstd compression.",
6
6
  "main": "dist/index.js",