rlz4 0.2.1 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +24 -12
- data/README.md +82 -57
- data/ext/rlz4/Cargo.toml +2 -2
- data/ext/rlz4/src/lib.rs +761 -159
- data/lib/rlz4/block_codec.rb +35 -0
- data/lib/rlz4/dictionary.rb +27 -0
- data/lib/rlz4/frame_codec.rb +34 -0
- data/lib/rlz4/version.rb +1 -1
- data/lib/rlz4.rb +4 -17
- metadata +9 -9
- data/tmp/x86_64-linux/stage/Cargo.toml +0 -9
- data/tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml +0 -16
data/ext/rlz4/src/lib.rs
CHANGED
|
@@ -6,14 +6,74 @@ use magnus::{
|
|
|
6
6
|
value::Opaque,
|
|
7
7
|
Error, Ruby,
|
|
8
8
|
};
|
|
9
|
-
use std::
|
|
9
|
+
use std::cell::RefCell;
|
|
10
|
+
use std::ptr;
|
|
10
11
|
use std::sync::OnceLock;
|
|
11
12
|
|
|
12
|
-
use
|
|
13
|
+
use lz4_sys::{
|
|
14
|
+
LZ4F_VERSION,
|
|
15
|
+
// block compress/decompress
|
|
16
|
+
LZ4_compressBound, LZ4_compress_fast, LZ4_decompress_safe,
|
|
17
|
+
LZ4_createStream, LZ4_freeStream, LZ4StreamEncode,
|
|
18
|
+
// frame compress/decompress
|
|
19
|
+
LZ4F_compressBound, LZ4F_compressBegin, LZ4F_compressUpdate, LZ4F_compressEnd,
|
|
20
|
+
LZ4F_createCompressionContext, LZ4F_freeCompressionContext,
|
|
21
|
+
LZ4F_createDecompressionContext, LZ4F_freeDecompressionContext,
|
|
22
|
+
LZ4F_decompress, LZ4F_isError, LZ4F_getErrorName,
|
|
23
|
+
LZ4FCompressionContext, LZ4FDecompressionContext,
|
|
24
|
+
LZ4FPreferences, LZ4FFrameInfo, LZ4FDecompressOptions,
|
|
25
|
+
BlockSize, BlockMode, ContentChecksum, FrameType, BlockChecksum,
|
|
26
|
+
c_int,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
// sizeof(LZ4_stream_t): union { char minStateSize[(1<<LZ4_MEMORY_USAGE)+32]; ... }
|
|
30
|
+
// LZ4_MEMORY_USAGE defaults to 14, so this is (16384 + 32) = 16416 bytes.
|
|
31
|
+
const LZ4_STREAM_SIZE: usize = (1 << 14) + 32;
|
|
32
|
+
|
|
33
|
+
// Functions present in liblz4 1.10.0 but not yet exposed by the lz4-sys crate.
|
|
34
|
+
extern "C" {
|
|
35
|
+
fn LZ4_resetStream_fast(stream: *mut LZ4StreamEncode);
|
|
36
|
+
fn LZ4_loadDict(stream: *mut LZ4StreamEncode, dict: *const u8, dict_size: c_int) -> c_int;
|
|
37
|
+
fn LZ4_compress_fast_continue(
|
|
38
|
+
stream: *mut LZ4StreamEncode,
|
|
39
|
+
src: *const u8,
|
|
40
|
+
dst: *mut u8,
|
|
41
|
+
src_size: c_int,
|
|
42
|
+
dst_capacity: c_int,
|
|
43
|
+
acceleration: c_int,
|
|
44
|
+
) -> c_int;
|
|
45
|
+
fn LZ4_decompress_safe_usingDict(
|
|
46
|
+
src: *const u8,
|
|
47
|
+
dst: *mut u8,
|
|
48
|
+
src_size: c_int,
|
|
49
|
+
dst_capacity: c_int,
|
|
50
|
+
dict: *const u8,
|
|
51
|
+
dict_size: c_int,
|
|
52
|
+
) -> c_int;
|
|
53
|
+
// lz4 >= 1.9.4
|
|
54
|
+
fn LZ4F_compressBegin_usingDict(
|
|
55
|
+
ctx: LZ4FCompressionContext,
|
|
56
|
+
dst: *mut u8,
|
|
57
|
+
dst_capacity: usize,
|
|
58
|
+
dict: *const u8,
|
|
59
|
+
dict_size: usize,
|
|
60
|
+
prefs: *const LZ4FPreferences,
|
|
61
|
+
) -> usize;
|
|
62
|
+
// lz4 >= 1.9.4
|
|
63
|
+
fn LZ4F_decompress_usingDict(
|
|
64
|
+
ctx: LZ4FDecompressionContext,
|
|
65
|
+
dst: *mut u8,
|
|
66
|
+
dst_size_ptr: *mut usize,
|
|
67
|
+
src: *const u8,
|
|
68
|
+
src_size_ptr: *mut usize,
|
|
69
|
+
dict: *const u8,
|
|
70
|
+
dict_size: usize,
|
|
71
|
+
opts: *const LZ4FDecompressOptions,
|
|
72
|
+
) -> usize;
|
|
73
|
+
}
|
|
13
74
|
|
|
14
75
|
const LZ4_FRAME_MAGIC: [u8; 4] = [0x04, 0x22, 0x4d, 0x18];
|
|
15
76
|
|
|
16
|
-
// Opaque<T> is Send+Sync and is designed for storing Ruby values in statics.
|
|
17
77
|
static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
18
78
|
|
|
19
79
|
fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
@@ -24,157 +84,518 @@ fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
|
24
84
|
)
|
|
25
85
|
}
|
|
26
86
|
|
|
27
|
-
// ---------- module
|
|
87
|
+
// ---------- module function: compress_bound ----------
|
|
28
88
|
|
|
29
|
-
fn
|
|
30
|
-
|
|
31
|
-
|
|
89
|
+
fn rlz4_compress_bound(_ruby: &Ruby, size: usize) -> Result<usize, Error> {
|
|
90
|
+
Ok(unsafe { LZ4_compressBound(size as c_int) } as usize)
|
|
91
|
+
}
|
|
32
92
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
encoder.write_all(&input).map_err(|e| {
|
|
38
|
-
Error::new(
|
|
39
|
-
ruby.exception_runtime_error(),
|
|
40
|
-
format!("lz4 frame encoder write failed: {e}"),
|
|
41
|
-
)
|
|
42
|
-
})?;
|
|
43
|
-
let compressed = encoder.finish().map_err(|e| {
|
|
44
|
-
Error::new(
|
|
45
|
-
ruby.exception_runtime_error(),
|
|
46
|
-
format!("lz4 frame encoder finish failed: {e}"),
|
|
47
|
-
)
|
|
48
|
-
})?;
|
|
93
|
+
// ---------- module function: block_stream_size ----------
|
|
94
|
+
//
|
|
95
|
+
// Returns sizeof(LZ4_stream_t). Exposed so the Ruby test suite can compute
|
|
96
|
+
// the expected #size of a dict-mode BlockCodec without hardcoding the constant.
|
|
49
97
|
|
|
50
|
-
|
|
98
|
+
fn rlz4_block_stream_size(_ruby: &Ruby) -> usize {
|
|
99
|
+
LZ4_STREAM_SIZE
|
|
51
100
|
}
|
|
52
101
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
102
|
+
// ---------- BlockCodec ----------
|
|
103
|
+
//
|
|
104
|
+
// No-dict codec: uses LZ4_compress_fast (stateless; stack-allocated hash
|
|
105
|
+
// table inside the C function). Ruby object owns no extra heap. #size = 0.
|
|
106
|
+
//
|
|
107
|
+
// Dict codec: allocates one LZ4StreamEncode via LZ4_createStream. Before
|
|
108
|
+
// each compress call, LZ4_resetStream_fast + LZ4_loadDict restore the
|
|
109
|
+
// dict-loaded state. #size = LZ4_STREAM_SIZE + dict.len().
|
|
110
|
+
//
|
|
111
|
+
// Decompression is always stateless per-block. Both compress and decompress
|
|
112
|
+
// live on the same class so callers hold one object per worker.
|
|
113
|
+
//
|
|
114
|
+
// Thread-local by construction (RefCell, not Send+Sync). A BlockCodec must
|
|
115
|
+
// not cross Ractor boundaries — send a new one instead.
|
|
116
|
+
|
|
117
|
+
struct EncodeStream(*mut LZ4StreamEncode);
|
|
118
|
+
|
|
119
|
+
// SAFETY: *mut LZ4StreamEncode is !Send by default. We guarantee exclusive
|
|
120
|
+
// access via RefCell (one borrow at a time, single-threaded Ruby GIL).
|
|
121
|
+
unsafe impl Send for EncodeStream {}
|
|
122
|
+
|
|
123
|
+
impl Drop for EncodeStream {
|
|
124
|
+
fn drop(&mut self) {
|
|
125
|
+
if !self.0.is_null() {
|
|
126
|
+
unsafe { LZ4_freeStream(self.0) };
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
56
130
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
131
|
+
#[magnus::wrap(class = "RLZ4::BlockCodec", free_immediately, size)]
|
|
132
|
+
struct BlockCodec {
|
|
133
|
+
stream: Option<RefCell<EncodeStream>>, // Some only when dict is set
|
|
134
|
+
dict: Option<Vec<u8>>,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
fn block_codec_new(_ruby: &Ruby, rb_dict: Option<RString>) -> Result<BlockCodec, Error> {
|
|
138
|
+
match rb_dict {
|
|
139
|
+
None => Ok(BlockCodec { stream: None, dict: None }),
|
|
140
|
+
Some(rb_dict) => {
|
|
141
|
+
// SAFETY: copy dict bytes before any Ruby allocation.
|
|
142
|
+
let bytes: Vec<u8> = unsafe { rb_dict.as_slice().to_vec() };
|
|
143
|
+
|
|
144
|
+
let raw = unsafe { LZ4_createStream() };
|
|
145
|
+
if raw.is_null() {
|
|
146
|
+
return Err(Error::new(
|
|
147
|
+
_ruby.exception_runtime_error(),
|
|
148
|
+
"LZ4_createStream allocation failed",
|
|
149
|
+
));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Pre-load the dict so the pristine-state cost is paid once here,
|
|
153
|
+
// not on the first compress call.
|
|
154
|
+
unsafe { LZ4_loadDict(raw, bytes.as_ptr(), bytes.len() as c_int) };
|
|
155
|
+
|
|
156
|
+
Ok(BlockCodec {
|
|
157
|
+
stream: Some(RefCell::new(EncodeStream(raw))),
|
|
158
|
+
dict: Some(bytes),
|
|
159
|
+
})
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
fn block_codec_size(rb_self: &BlockCodec) -> usize {
|
|
165
|
+
let stream_size = match &rb_self.stream {
|
|
166
|
+
Some(_) => LZ4_STREAM_SIZE,
|
|
167
|
+
None => 0,
|
|
168
|
+
};
|
|
169
|
+
stream_size + rb_self.dict.as_ref().map_or(0, |d| d.len())
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
fn block_codec_has_dict(rb_self: &BlockCodec) -> bool {
|
|
173
|
+
rb_self.dict.is_some()
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
fn block_codec_compress(
|
|
177
|
+
ruby: &Ruby,
|
|
178
|
+
rb_self: &BlockCodec,
|
|
179
|
+
rb_input: RString,
|
|
180
|
+
) -> Result<RString, Error> {
|
|
181
|
+
// SAFETY: rb_input is stack-pinned; the C compression functions perform no
|
|
182
|
+
// Ruby callbacks or GC-triggering allocations while the input slice is
|
|
183
|
+
// live. str_from_slice happens after.
|
|
184
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
185
|
+
|
|
186
|
+
let upper = unsafe { LZ4_compressBound(input.len() as c_int) as usize };
|
|
187
|
+
let mut out = vec![0u8; upper];
|
|
188
|
+
|
|
189
|
+
let compressed_len: c_int = match (&rb_self.stream, &rb_self.dict) {
|
|
190
|
+
(None, None) => unsafe {
|
|
191
|
+
LZ4_compress_fast(
|
|
192
|
+
input.as_ptr() as *const _,
|
|
193
|
+
out.as_mut_ptr() as *mut _,
|
|
194
|
+
input.len() as c_int,
|
|
195
|
+
upper as c_int,
|
|
196
|
+
1,
|
|
197
|
+
)
|
|
198
|
+
},
|
|
199
|
+
(Some(stream_cell), Some(dict)) => {
|
|
200
|
+
let stream = stream_cell.borrow_mut();
|
|
201
|
+
unsafe {
|
|
202
|
+
// Restore stream to the dict-loaded state before each call.
|
|
203
|
+
LZ4_resetStream_fast(stream.0);
|
|
204
|
+
LZ4_loadDict(stream.0, dict.as_ptr(), dict.len() as c_int);
|
|
205
|
+
LZ4_compress_fast_continue(
|
|
206
|
+
stream.0,
|
|
207
|
+
input.as_ptr(),
|
|
208
|
+
out.as_mut_ptr(),
|
|
209
|
+
input.len() as c_int,
|
|
210
|
+
upper as c_int,
|
|
211
|
+
1,
|
|
212
|
+
)
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
_ => unreachable!("stream and dict are always both Some or both None"),
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
if compressed_len <= 0 {
|
|
61
219
|
return Err(Error::new(
|
|
62
|
-
|
|
63
|
-
"lz4
|
|
220
|
+
ruby.exception_runtime_error(),
|
|
221
|
+
"lz4 block compress failed",
|
|
64
222
|
));
|
|
65
223
|
}
|
|
66
224
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
225
|
+
out.truncate(compressed_len as usize);
|
|
226
|
+
Ok(ruby.str_from_slice(&out))
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
fn block_codec_decompress(
|
|
230
|
+
ruby: &Ruby,
|
|
231
|
+
rb_self: &BlockCodec,
|
|
232
|
+
rb_input: RString,
|
|
233
|
+
decompressed_size: usize,
|
|
234
|
+
) -> Result<RString, Error> {
|
|
235
|
+
// SAFETY: same as compress. Decoder is pure C, no Ruby callbacks.
|
|
236
|
+
let compressed: &[u8] = unsafe { rb_input.as_slice() };
|
|
237
|
+
|
|
238
|
+
let mut out = vec![0u8; decompressed_size];
|
|
239
|
+
|
|
240
|
+
let actual_len: c_int = match &rb_self.dict {
|
|
241
|
+
None => unsafe {
|
|
242
|
+
LZ4_decompress_safe(
|
|
243
|
+
compressed.as_ptr() as *const _,
|
|
244
|
+
out.as_mut_ptr() as *mut _,
|
|
245
|
+
compressed.len() as c_int,
|
|
246
|
+
decompressed_size as c_int,
|
|
247
|
+
)
|
|
248
|
+
},
|
|
249
|
+
Some(dict) => unsafe {
|
|
250
|
+
LZ4_decompress_safe_usingDict(
|
|
251
|
+
compressed.as_ptr(),
|
|
252
|
+
out.as_mut_ptr(),
|
|
253
|
+
compressed.len() as c_int,
|
|
254
|
+
decompressed_size as c_int,
|
|
255
|
+
dict.as_ptr(),
|
|
256
|
+
dict.len() as c_int,
|
|
257
|
+
)
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
if actual_len < 0 {
|
|
262
|
+
return Err(Error::new(
|
|
73
263
|
decompress_error(ruby),
|
|
74
|
-
|
|
75
|
-
)
|
|
76
|
-
}
|
|
264
|
+
"lz4 block decode failed",
|
|
265
|
+
));
|
|
266
|
+
}
|
|
77
267
|
|
|
268
|
+
out.truncate(actual_len as usize);
|
|
78
269
|
Ok(ruby.str_from_slice(&out))
|
|
79
270
|
}
|
|
80
271
|
|
|
81
|
-
// ----------
|
|
272
|
+
// ---------- FrameCodec ----------
|
|
82
273
|
//
|
|
83
|
-
//
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
// FrameDescriptor — interoperable with the reference `lz4` CLI given the
|
|
87
|
-
// same dictionary file.
|
|
274
|
+
// One-shot compress/decompress using the LZ4F frame API. Contexts are
|
|
275
|
+
// created and freed per operation so FrameCodec holds no mutable state
|
|
276
|
+
// and is shareable across Ractors.
|
|
88
277
|
//
|
|
89
|
-
//
|
|
90
|
-
//
|
|
91
|
-
//
|
|
92
|
-
//
|
|
93
|
-
|
|
94
|
-
|
|
278
|
+
// Block mode: Linked. In Linked mode LZ4F_compressBegin_usingDict loads
|
|
279
|
+
// the dict as initial stream history before the first block, so the block
|
|
280
|
+
// compressor can back-reference into dict bytes. Independent mode would
|
|
281
|
+
// discard the raw dict bytes before each block (a known liblz4 limitation
|
|
282
|
+
// with the _usingDict raw-bytes API; _usingCDict avoids it but changes the
|
|
283
|
+
// dict-id derivation).
|
|
284
|
+
|
|
285
|
+
#[magnus::wrap(class = "RLZ4::FrameCodec", free_immediately, size)]
|
|
286
|
+
struct FrameCodec {
|
|
287
|
+
dict: Option<DictBound>,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
struct DictBound {
|
|
95
291
|
bytes: Vec<u8>,
|
|
96
292
|
id: u32,
|
|
97
293
|
}
|
|
98
294
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
295
|
+
unsafe impl Send for FrameCodec {}
|
|
296
|
+
unsafe impl Sync for FrameCodec {}
|
|
297
|
+
|
|
298
|
+
fn frame_codec_initialize(
|
|
299
|
+
_ruby: &Ruby,
|
|
300
|
+
rb_dict: Option<RString>,
|
|
301
|
+
id: u32,
|
|
302
|
+
) -> Result<FrameCodec, Error> {
|
|
303
|
+
let dict = rb_dict.map(|s| {
|
|
304
|
+
// SAFETY: copy dict bytes before any Ruby allocation.
|
|
305
|
+
let bytes: Vec<u8> = unsafe { s.as_slice().to_vec() };
|
|
306
|
+
s.freeze();
|
|
307
|
+
DictBound { bytes, id }
|
|
308
|
+
});
|
|
309
|
+
Ok(FrameCodec { dict })
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
fn lz4f_error(code: usize) -> String {
|
|
313
|
+
let name = unsafe { LZ4F_getErrorName(code) };
|
|
314
|
+
if name.is_null() {
|
|
315
|
+
return format!("lz4f error {code}");
|
|
316
|
+
}
|
|
317
|
+
unsafe { std::ffi::CStr::from_ptr(name) }
|
|
318
|
+
.to_string_lossy()
|
|
319
|
+
.into_owned()
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
fn default_prefs(dict_id: u32) -> LZ4FPreferences {
|
|
323
|
+
LZ4FPreferences {
|
|
324
|
+
frame_info: LZ4FFrameInfo {
|
|
325
|
+
block_size_id: BlockSize::Default,
|
|
326
|
+
// Linked mode: the dict is treated as the initial stream history,
|
|
327
|
+
// so the first (and often only) block can back-reference into it.
|
|
328
|
+
// Independent mode + raw-bytes dict would reset the hash table
|
|
329
|
+
// before each block, discarding the dict (liblz4 limitation).
|
|
330
|
+
block_mode: BlockMode::Linked,
|
|
331
|
+
content_checksum_flag: ContentChecksum::NoChecksum,
|
|
332
|
+
frame_type: FrameType::Frame,
|
|
333
|
+
content_size: 0,
|
|
334
|
+
dict_id,
|
|
335
|
+
block_checksum_flag: BlockChecksum::NoBlockChecksum,
|
|
336
|
+
},
|
|
337
|
+
compression_level: 0,
|
|
338
|
+
auto_flush: 0,
|
|
339
|
+
favor_dec_speed: 0,
|
|
340
|
+
reserved: [0; 3],
|
|
341
|
+
}
|
|
342
|
+
}
|
|
103
343
|
|
|
104
|
-
fn
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
344
|
+
fn zero_frame_info() -> LZ4FFrameInfo {
|
|
345
|
+
LZ4FFrameInfo {
|
|
346
|
+
block_size_id: BlockSize::Default,
|
|
347
|
+
block_mode: BlockMode::Independent,
|
|
348
|
+
content_checksum_flag: ContentChecksum::NoChecksum,
|
|
349
|
+
frame_type: FrameType::Frame,
|
|
350
|
+
content_size: 0,
|
|
351
|
+
dict_id: 0,
|
|
352
|
+
block_checksum_flag: BlockChecksum::NoBlockChecksum,
|
|
353
|
+
}
|
|
109
354
|
}
|
|
110
355
|
|
|
111
|
-
fn
|
|
112
|
-
let
|
|
113
|
-
let
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
&rb_self.bytes,
|
|
117
|
-
rb_self.id,
|
|
118
|
-
);
|
|
119
|
-
encoder.write_all(&input).map_err(|e| {
|
|
120
|
-
Error::new(
|
|
356
|
+
fn create_dctx(ruby: &Ruby) -> Result<LZ4FDecompressionContext, Error> {
|
|
357
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
358
|
+
let err = unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
359
|
+
if unsafe { LZ4F_isError(err) } != 0 {
|
|
360
|
+
return Err(Error::new(
|
|
121
361
|
ruby.exception_runtime_error(),
|
|
122
|
-
format!("
|
|
123
|
-
)
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
|
|
362
|
+
format!("LZ4F_createDecompressionContext: {}", lz4f_error(err)),
|
|
363
|
+
));
|
|
364
|
+
}
|
|
365
|
+
Ok(ctx)
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
fn frame_codec_compress(
|
|
369
|
+
ruby: &Ruby,
|
|
370
|
+
rb_self: &FrameCodec,
|
|
371
|
+
rb_input: RString,
|
|
372
|
+
) -> Result<RString, Error> {
|
|
373
|
+
// SAFETY: rb_input is stack-pinned; all LZ4F calls are pure C with no
|
|
374
|
+
// Ruby callbacks. str_from_slice happens after input is no longer live.
|
|
375
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
376
|
+
|
|
377
|
+
let prefs = default_prefs(rb_self.dict.as_ref().map_or(0, |d| d.id));
|
|
378
|
+
let data_bound = unsafe { LZ4F_compressBound(input.len(), &prefs) };
|
|
379
|
+
let capacity = data_bound + 64;
|
|
380
|
+
let mut out = vec![0u8; capacity];
|
|
381
|
+
let mut pos: usize = 0;
|
|
382
|
+
|
|
383
|
+
let mut ctx = LZ4FCompressionContext(ptr::null_mut());
|
|
384
|
+
let err = unsafe { LZ4F_createCompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
385
|
+
if unsafe { LZ4F_isError(err) } != 0 {
|
|
386
|
+
return Err(Error::new(
|
|
127
387
|
ruby.exception_runtime_error(),
|
|
128
|
-
format!("
|
|
129
|
-
)
|
|
130
|
-
}
|
|
131
|
-
|
|
388
|
+
format!("LZ4F_createCompressionContext: {}", lz4f_error(err)),
|
|
389
|
+
));
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
let result = (|| -> Result<usize, String> {
|
|
393
|
+
let n = match &rb_self.dict {
|
|
394
|
+
None => unsafe {
|
|
395
|
+
LZ4F_compressBegin(ctx, out.as_mut_ptr().add(pos), capacity - pos, &prefs)
|
|
396
|
+
},
|
|
397
|
+
Some(d) => unsafe {
|
|
398
|
+
LZ4F_compressBegin_usingDict(
|
|
399
|
+
ctx,
|
|
400
|
+
out.as_mut_ptr().add(pos),
|
|
401
|
+
capacity - pos,
|
|
402
|
+
d.bytes.as_ptr(),
|
|
403
|
+
d.bytes.len(),
|
|
404
|
+
&prefs,
|
|
405
|
+
)
|
|
406
|
+
},
|
|
407
|
+
};
|
|
408
|
+
if unsafe { LZ4F_isError(n) } != 0 {
|
|
409
|
+
return Err(format!("LZ4F_compressBegin: {}", lz4f_error(n)));
|
|
410
|
+
}
|
|
411
|
+
pos += n;
|
|
412
|
+
|
|
413
|
+
let n = unsafe {
|
|
414
|
+
LZ4F_compressUpdate(
|
|
415
|
+
ctx,
|
|
416
|
+
out.as_mut_ptr().add(pos),
|
|
417
|
+
capacity - pos,
|
|
418
|
+
input.as_ptr(),
|
|
419
|
+
input.len(),
|
|
420
|
+
ptr::null(),
|
|
421
|
+
)
|
|
422
|
+
};
|
|
423
|
+
if unsafe { LZ4F_isError(n) } != 0 {
|
|
424
|
+
return Err(format!("LZ4F_compressUpdate: {}", lz4f_error(n)));
|
|
425
|
+
}
|
|
426
|
+
pos += n;
|
|
427
|
+
|
|
428
|
+
let n = unsafe {
|
|
429
|
+
LZ4F_compressEnd(
|
|
430
|
+
ctx,
|
|
431
|
+
out.as_mut_ptr().add(pos),
|
|
432
|
+
capacity - pos,
|
|
433
|
+
ptr::null(),
|
|
434
|
+
)
|
|
435
|
+
};
|
|
436
|
+
if unsafe { LZ4F_isError(n) } != 0 {
|
|
437
|
+
return Err(format!("LZ4F_compressEnd: {}", lz4f_error(n)));
|
|
438
|
+
}
|
|
439
|
+
pos += n;
|
|
440
|
+
Ok(pos)
|
|
441
|
+
})();
|
|
442
|
+
|
|
443
|
+
unsafe { LZ4F_freeCompressionContext(ctx) };
|
|
444
|
+
|
|
445
|
+
match result {
|
|
446
|
+
Err(msg) => Err(Error::new(ruby.exception_runtime_error(), msg)),
|
|
447
|
+
Ok(written) => {
|
|
448
|
+
out.truncate(written);
|
|
449
|
+
Ok(ruby.str_from_slice(&out))
|
|
450
|
+
}
|
|
451
|
+
}
|
|
132
452
|
}
|
|
133
453
|
|
|
134
|
-
fn
|
|
454
|
+
fn frame_codec_decompress(
|
|
135
455
|
ruby: &Ruby,
|
|
136
|
-
rb_self: &
|
|
456
|
+
rb_self: &FrameCodec,
|
|
137
457
|
rb_input: RString,
|
|
138
458
|
) -> Result<RString, Error> {
|
|
139
|
-
|
|
140
|
-
|
|
459
|
+
// SAFETY: rb_input is stack-pinned; LZ4F calls are pure C.
|
|
460
|
+
let compressed: &[u8] = unsafe { rb_input.as_slice() };
|
|
461
|
+
|
|
462
|
+
if compressed.len() < 4 || compressed[..4] != LZ4_FRAME_MAGIC {
|
|
141
463
|
return Err(Error::new(
|
|
142
464
|
decompress_error(ruby),
|
|
143
|
-
"lz4
|
|
465
|
+
"lz4 frame decode failed: bad magic (input is not an LZ4 frame)",
|
|
144
466
|
));
|
|
145
467
|
}
|
|
146
468
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
)
|
|
469
|
+
// When we have a dict, use a temporary context to parse the frame header
|
|
470
|
+
// (LZ4F_getFrameInfo advances the context's stage past dstage_init), then
|
|
471
|
+
// use a fresh context for the actual decompress so LZ4F_decompress_usingDict
|
|
472
|
+
// sees dstage_init and correctly installs the dict.
|
|
473
|
+
if let Some(d) = &rb_self.dict {
|
|
474
|
+
let temp_ctx = create_dctx(ruby)?;
|
|
475
|
+
let mut frame_info = zero_frame_info();
|
|
476
|
+
let mut dummy = compressed.len();
|
|
477
|
+
let ret = unsafe {
|
|
478
|
+
lz4_sys::LZ4F_getFrameInfo(temp_ctx, &mut frame_info, compressed.as_ptr(), &mut dummy)
|
|
479
|
+
};
|
|
480
|
+
unsafe { LZ4F_freeDecompressionContext(temp_ctx) };
|
|
481
|
+
|
|
482
|
+
if unsafe { LZ4F_isError(ret) } != 0 {
|
|
483
|
+
return Err(Error::new(
|
|
484
|
+
decompress_error(ruby),
|
|
485
|
+
format!("lz4 frame header error: {}", lz4f_error(ret)),
|
|
486
|
+
));
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
if frame_info.dict_id != 0 && frame_info.dict_id != d.id {
|
|
490
|
+
return Err(Error::new(
|
|
491
|
+
decompress_error(ruby),
|
|
492
|
+
format!(
|
|
493
|
+
"lz4 frame dict_id mismatch: frame={:#010x} codec={:#010x}",
|
|
494
|
+
frame_info.dict_id, d.id
|
|
495
|
+
),
|
|
496
|
+
));
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Fresh context for actual decompression.
|
|
501
|
+
let ctx = create_dctx(ruby)?;
|
|
502
|
+
|
|
503
|
+
// Pass the full compressed buffer (including header) to the loop.
|
|
504
|
+
// LZ4F_decompress_usingDict sets the dict before parsing the header
|
|
505
|
+
// (dstage_init check), so the dict is available for the first block.
|
|
506
|
+
let result = frame_decompress_loop(ctx, compressed, rb_self.dict.as_ref());
|
|
507
|
+
|
|
508
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
509
|
+
|
|
510
|
+
match result {
|
|
511
|
+
Err(msg) => Err(Error::new(decompress_error(ruby), msg)),
|
|
512
|
+
Ok(out) => Ok(ruby.str_from_slice(&out)),
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
fn frame_decompress_loop(
|
|
517
|
+
ctx: LZ4FDecompressionContext,
|
|
518
|
+
compressed: &[u8],
|
|
519
|
+
dict: Option<&DictBound>,
|
|
520
|
+
) -> Result<Vec<u8>, String> {
|
|
152
521
|
let mut out = Vec::new();
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
522
|
+
let mut src_pos = 0usize;
|
|
523
|
+
let mut chunk = vec![0u8; 65536];
|
|
524
|
+
let mut complete = false;
|
|
525
|
+
|
|
526
|
+
loop {
|
|
527
|
+
let remaining = compressed.len() - src_pos;
|
|
528
|
+
if remaining == 0 {
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
let mut dst_written = chunk.len();
|
|
533
|
+
let mut src_consumed = remaining;
|
|
534
|
+
|
|
535
|
+
let ret = match dict {
|
|
536
|
+
None => unsafe {
|
|
537
|
+
LZ4F_decompress(
|
|
538
|
+
ctx,
|
|
539
|
+
chunk.as_mut_ptr(),
|
|
540
|
+
&mut dst_written,
|
|
541
|
+
compressed.as_ptr().add(src_pos),
|
|
542
|
+
&mut src_consumed,
|
|
543
|
+
ptr::null(),
|
|
544
|
+
)
|
|
545
|
+
},
|
|
546
|
+
Some(d) => unsafe {
|
|
547
|
+
LZ4F_decompress_usingDict(
|
|
548
|
+
ctx,
|
|
549
|
+
chunk.as_mut_ptr(),
|
|
550
|
+
&mut dst_written as *mut usize,
|
|
551
|
+
compressed.as_ptr().add(src_pos),
|
|
552
|
+
&mut src_consumed as *mut usize,
|
|
553
|
+
d.bytes.as_ptr(),
|
|
554
|
+
d.bytes.len(),
|
|
555
|
+
ptr::null(),
|
|
556
|
+
)
|
|
557
|
+
},
|
|
558
|
+
};
|
|
559
|
+
|
|
560
|
+
src_pos += src_consumed;
|
|
561
|
+
out.extend_from_slice(&chunk[..dst_written]);
|
|
562
|
+
|
|
563
|
+
if unsafe { LZ4F_isError(ret) } != 0 {
|
|
564
|
+
return Err(format!("lz4 frame decode failed: {}", lz4f_error(ret)));
|
|
565
|
+
}
|
|
566
|
+
if ret == 0 {
|
|
567
|
+
complete = true;
|
|
568
|
+
break;
|
|
569
|
+
}
|
|
570
|
+
// Guard against a degenerate case where the C library makes no progress.
|
|
571
|
+
if src_consumed == 0 && dst_written == 0 {
|
|
572
|
+
break;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
if !complete {
|
|
577
|
+
return Err("lz4 frame decode failed: truncated or incomplete frame".to_string());
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
Ok(out)
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
fn frame_codec_size(rb_self: &FrameCodec) -> usize {
|
|
584
|
+
rb_self.dict.as_ref().map_or(0, |d| d.bytes.len())
|
|
160
585
|
}
|
|
161
586
|
|
|
162
|
-
fn
|
|
163
|
-
rb_self.
|
|
587
|
+
fn frame_codec_has_dict(rb_self: &FrameCodec) -> bool {
|
|
588
|
+
rb_self.dict.is_some()
|
|
164
589
|
}
|
|
165
590
|
|
|
166
|
-
fn
|
|
167
|
-
rb_self.id
|
|
591
|
+
fn frame_codec_id(rb_self: &FrameCodec) -> Option<u32> {
|
|
592
|
+
rb_self.dict.as_ref().map(|d| d.id)
|
|
168
593
|
}
|
|
169
594
|
|
|
170
595
|
// ---------- module init ----------
|
|
171
596
|
|
|
172
597
|
#[magnus::init]
|
|
173
598
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
174
|
-
// Mark this extension as Ractor-safe. All our Rust code uses only
|
|
175
|
-
// stack/owned data, holds no globals aside from the Opaque exception
|
|
176
|
-
// class (which is Send+Sync by construction), and the Dictionary type
|
|
177
|
-
// is read-only after init, so it is safe to call from any Ractor.
|
|
178
599
|
unsafe { rb_sys::rb_ext_ractor_safe(true) };
|
|
179
600
|
|
|
180
601
|
let module = ruby.define_module("RLZ4")?;
|
|
@@ -185,17 +606,23 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
185
606
|
.set(Opaque::from(decompress_error_class))
|
|
186
607
|
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
187
608
|
|
|
188
|
-
module.define_module_function("
|
|
189
|
-
module.define_module_function("
|
|
609
|
+
module.define_module_function("compress_bound", function!(rlz4_compress_bound, 1))?;
|
|
610
|
+
module.define_module_function("block_stream_size", function!(rlz4_block_stream_size, 0))?;
|
|
611
|
+
|
|
612
|
+
let codec_class = module.define_class("BlockCodec", ruby.class_object())?;
|
|
613
|
+
codec_class.define_singleton_method("_native_new", function!(block_codec_new, 1))?;
|
|
614
|
+
codec_class.define_method("size", method!(block_codec_size, 0))?;
|
|
615
|
+
codec_class.define_method("has_dict?", method!(block_codec_has_dict, 0))?;
|
|
616
|
+
codec_class.define_method("compress", method!(block_codec_compress, 1))?;
|
|
617
|
+
codec_class.define_method("_decompress", method!(block_codec_decompress, 2))?;
|
|
190
618
|
|
|
191
|
-
let
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
dict_class.define_method("id", method!(dict_id, 0))?;
|
|
619
|
+
let frame_codec_class = module.define_class("FrameCodec", ruby.class_object())?;
|
|
620
|
+
frame_codec_class.define_singleton_method("_native_new", function!(frame_codec_initialize, 2))?;
|
|
621
|
+
frame_codec_class.define_method("compress", method!(frame_codec_compress, 1))?;
|
|
622
|
+
frame_codec_class.define_method("decompress", method!(frame_codec_decompress, 1))?;
|
|
623
|
+
frame_codec_class.define_method("size", method!(frame_codec_size, 0))?;
|
|
624
|
+
frame_codec_class.define_method("has_dict?", method!(frame_codec_has_dict, 0))?;
|
|
625
|
+
frame_codec_class.define_method("id", method!(frame_codec_id, 0))?;
|
|
199
626
|
|
|
200
627
|
Ok(())
|
|
201
628
|
}
|
|
@@ -204,41 +631,201 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
204
631
|
mod tests {
|
|
205
632
|
use super::*;
|
|
206
633
|
|
|
634
|
+
fn lz4_block_compress(input: &[u8]) -> Vec<u8> {
|
|
635
|
+
let upper = unsafe { LZ4_compressBound(input.len() as c_int) as usize };
|
|
636
|
+
let mut out = vec![0u8; upper];
|
|
637
|
+
let n = unsafe {
|
|
638
|
+
LZ4_compress_fast(
|
|
639
|
+
input.as_ptr() as *const _,
|
|
640
|
+
out.as_mut_ptr() as *mut _,
|
|
641
|
+
input.len() as c_int,
|
|
642
|
+
upper as c_int,
|
|
643
|
+
1,
|
|
644
|
+
)
|
|
645
|
+
};
|
|
646
|
+
assert!(n > 0);
|
|
647
|
+
out.truncate(n as usize);
|
|
648
|
+
out
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
fn lz4_block_decompress(compressed: &[u8], original_len: usize) -> Vec<u8> {
|
|
652
|
+
let mut out = vec![0u8; original_len];
|
|
653
|
+
let n = unsafe {
|
|
654
|
+
LZ4_decompress_safe(
|
|
655
|
+
compressed.as_ptr() as *const _,
|
|
656
|
+
out.as_mut_ptr() as *mut _,
|
|
657
|
+
compressed.len() as c_int,
|
|
658
|
+
original_len as c_int,
|
|
659
|
+
)
|
|
660
|
+
};
|
|
661
|
+
assert!(n >= 0);
|
|
662
|
+
out.truncate(n as usize);
|
|
663
|
+
out
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
fn lz4_block_compress_dict(input: &[u8], dict: &[u8]) -> Vec<u8> {
|
|
667
|
+
let upper = unsafe { LZ4_compressBound(input.len() as c_int) as usize };
|
|
668
|
+
let mut out = vec![0u8; upper];
|
|
669
|
+
let stream = unsafe { LZ4_createStream() };
|
|
670
|
+
assert!(!stream.is_null());
|
|
671
|
+
unsafe { LZ4_loadDict(stream, dict.as_ptr(), dict.len() as c_int) };
|
|
672
|
+
let n = unsafe {
|
|
673
|
+
LZ4_compress_fast_continue(
|
|
674
|
+
stream,
|
|
675
|
+
input.as_ptr(),
|
|
676
|
+
out.as_mut_ptr(),
|
|
677
|
+
input.len() as c_int,
|
|
678
|
+
upper as c_int,
|
|
679
|
+
1,
|
|
680
|
+
)
|
|
681
|
+
};
|
|
682
|
+
unsafe { LZ4_freeStream(stream) };
|
|
683
|
+
assert!(n > 0);
|
|
684
|
+
out.truncate(n as usize);
|
|
685
|
+
out
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
fn lz4_block_decompress_dict(compressed: &[u8], original_len: usize, dict: &[u8]) -> Vec<u8> {
|
|
689
|
+
let mut out = vec![0u8; original_len];
|
|
690
|
+
let n = unsafe {
|
|
691
|
+
LZ4_decompress_safe_usingDict(
|
|
692
|
+
compressed.as_ptr(),
|
|
693
|
+
out.as_mut_ptr(),
|
|
694
|
+
compressed.len() as c_int,
|
|
695
|
+
original_len as c_int,
|
|
696
|
+
dict.as_ptr(),
|
|
697
|
+
dict.len() as c_int,
|
|
698
|
+
)
|
|
699
|
+
};
|
|
700
|
+
assert!(n >= 0);
|
|
701
|
+
out.truncate(n as usize);
|
|
702
|
+
out
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
#[test]
|
|
706
|
+
fn block_round_trip() {
|
|
707
|
+
let data = b"hello hello hello hello".to_vec();
|
|
708
|
+
let ct = lz4_block_compress(&data);
|
|
709
|
+
let pt = lz4_block_decompress(&ct, data.len());
|
|
710
|
+
assert_eq!(pt, data);
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
#[test]
|
|
714
|
+
fn block_reuse_across_many_calls() {
|
|
715
|
+
for i in 0..100 {
|
|
716
|
+
let msg = format!("payload number {i} ").repeat(10).into_bytes();
|
|
717
|
+
let ct = lz4_block_compress(&msg);
|
|
718
|
+
let pt = lz4_block_decompress(&ct, msg.len());
|
|
719
|
+
assert_eq!(pt, msg);
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
#[test]
|
|
724
|
+
fn block_dict_round_trip() {
|
|
725
|
+
let dict = b"common log prefix: ".to_vec();
|
|
726
|
+
let msg = b"common log prefix: event=login user=alice".to_vec();
|
|
727
|
+
|
|
728
|
+
let ct_dict = lz4_block_compress_dict(&msg, &dict);
|
|
729
|
+
let pt = lz4_block_decompress_dict(&ct_dict, msg.len(), &dict);
|
|
730
|
+
assert_eq!(pt, msg);
|
|
731
|
+
|
|
732
|
+
let ct_plain = lz4_block_compress(&msg);
|
|
733
|
+
assert!(
|
|
734
|
+
ct_dict.len() < ct_plain.len(),
|
|
735
|
+
"dict compression should beat no-dict on shared-prefix input"
|
|
736
|
+
);
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
fn frame_compress(input: &[u8], dict: Option<(&[u8], u32)>) -> Vec<u8> {
|
|
740
|
+
let prefs = default_prefs(dict.map_or(0, |(_, id)| id));
|
|
741
|
+
let data_bound = unsafe { LZ4F_compressBound(input.len(), &prefs) };
|
|
742
|
+
let capacity = data_bound + 64;
|
|
743
|
+
let mut out = vec![0u8; capacity];
|
|
744
|
+
let mut pos = 0usize;
|
|
745
|
+
|
|
746
|
+
let mut ctx = LZ4FCompressionContext(ptr::null_mut());
|
|
747
|
+
let err = unsafe { LZ4F_createCompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
748
|
+
assert_eq!(unsafe { LZ4F_isError(err) }, 0);
|
|
749
|
+
|
|
750
|
+
let n = match dict {
|
|
751
|
+
None => unsafe {
|
|
752
|
+
LZ4F_compressBegin(ctx, out.as_mut_ptr().add(pos), capacity - pos, &prefs)
|
|
753
|
+
},
|
|
754
|
+
Some((d, _)) => unsafe {
|
|
755
|
+
LZ4F_compressBegin_usingDict(
|
|
756
|
+
ctx,
|
|
757
|
+
out.as_mut_ptr().add(pos),
|
|
758
|
+
capacity - pos,
|
|
759
|
+
d.as_ptr(),
|
|
760
|
+
d.len(),
|
|
761
|
+
&prefs,
|
|
762
|
+
)
|
|
763
|
+
},
|
|
764
|
+
};
|
|
765
|
+
assert_eq!(unsafe { LZ4F_isError(n) }, 0);
|
|
766
|
+
pos += n;
|
|
767
|
+
|
|
768
|
+
let n = unsafe {
|
|
769
|
+
LZ4F_compressUpdate(
|
|
770
|
+
ctx,
|
|
771
|
+
out.as_mut_ptr().add(pos),
|
|
772
|
+
capacity - pos,
|
|
773
|
+
input.as_ptr(),
|
|
774
|
+
input.len(),
|
|
775
|
+
ptr::null(),
|
|
776
|
+
)
|
|
777
|
+
};
|
|
778
|
+
assert_eq!(unsafe { LZ4F_isError(n) }, 0);
|
|
779
|
+
pos += n;
|
|
780
|
+
|
|
781
|
+
let n = unsafe {
|
|
782
|
+
LZ4F_compressEnd(ctx, out.as_mut_ptr().add(pos), capacity - pos, ptr::null())
|
|
783
|
+
};
|
|
784
|
+
assert_eq!(unsafe { LZ4F_isError(n) }, 0);
|
|
785
|
+
pos += n;
|
|
786
|
+
|
|
787
|
+
unsafe { LZ4F_freeCompressionContext(ctx) };
|
|
788
|
+
out.truncate(pos);
|
|
789
|
+
out
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
fn frame_decompress(compressed: &[u8], dict: Option<&[u8]>) -> Vec<u8> {
|
|
793
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
794
|
+
let err = unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
795
|
+
assert_eq!(unsafe { LZ4F_isError(err) }, 0);
|
|
796
|
+
|
|
797
|
+
let d = dict.map(|b| DictBound { bytes: b.to_vec(), id: 0 });
|
|
798
|
+
let out = frame_decompress_loop(ctx, compressed, d.as_ref()).unwrap();
|
|
799
|
+
|
|
800
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
801
|
+
out
|
|
802
|
+
}
|
|
803
|
+
|
|
207
804
|
#[test]
|
|
208
805
|
fn frame_round_trip() {
|
|
209
806
|
let data = b"the quick brown fox jumps over the lazy dog ".repeat(100);
|
|
210
|
-
let
|
|
211
|
-
enc.write_all(&data).unwrap();
|
|
212
|
-
let ct = enc.finish().unwrap();
|
|
807
|
+
let ct = frame_compress(&data, None);
|
|
213
808
|
assert!(ct.len() < data.len(), "should compress repetitive input");
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
let mut dec = FrameDecoder::new(&ct[..]);
|
|
218
|
-
let mut out = Vec::new();
|
|
219
|
-
dec.read_to_end(&mut out).unwrap();
|
|
220
|
-
assert_eq!(out, data);
|
|
809
|
+
assert_eq!(&ct[..4], &LZ4_FRAME_MAGIC);
|
|
810
|
+
let pt = frame_decompress(&ct, None);
|
|
811
|
+
assert_eq!(pt, data);
|
|
221
812
|
}
|
|
222
813
|
|
|
223
814
|
#[test]
|
|
224
815
|
fn frame_empty_round_trip() {
|
|
225
|
-
let
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
let mut dec = FrameDecoder::new(&ct[..]);
|
|
229
|
-
let mut out = Vec::new();
|
|
230
|
-
dec.read_to_end(&mut out).unwrap();
|
|
231
|
-
assert!(out.is_empty());
|
|
816
|
+
let ct = frame_compress(b"", None);
|
|
817
|
+
let pt = frame_decompress(&ct, None);
|
|
818
|
+
assert!(pt.is_empty());
|
|
232
819
|
}
|
|
233
820
|
|
|
234
821
|
#[test]
|
|
235
822
|
fn frame_garbage_fails() {
|
|
236
|
-
// A buffer that is long enough to look like a frame but has the
|
|
237
|
-
// wrong magic number must fail to decode.
|
|
238
823
|
let garbage = vec![0xFFu8; 32];
|
|
239
|
-
let mut
|
|
240
|
-
|
|
241
|
-
|
|
824
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
825
|
+
unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
826
|
+
let result = frame_decompress_loop(ctx, &garbage, None);
|
|
827
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
828
|
+
assert!(result.is_err());
|
|
242
829
|
}
|
|
243
830
|
|
|
244
831
|
#[test]
|
|
@@ -247,31 +834,46 @@ mod tests {
|
|
|
247
834
|
let id: u32 = 0xDEAD_BEEF;
|
|
248
835
|
let msg = b"JSON schema version 1 field name=hello value=world".to_vec();
|
|
249
836
|
|
|
250
|
-
let
|
|
251
|
-
|
|
252
|
-
let ct = enc.finish().unwrap();
|
|
253
|
-
assert_eq!(&ct[..4], &[0x04, 0x22, 0x4d, 0x18]);
|
|
837
|
+
let ct = frame_compress(&msg, Some((&dict, id)));
|
|
838
|
+
assert_eq!(&ct[..4], &LZ4_FRAME_MAGIC);
|
|
254
839
|
|
|
255
|
-
let
|
|
256
|
-
let mut pt = Vec::new();
|
|
257
|
-
dec.read_to_end(&mut pt).unwrap();
|
|
840
|
+
let pt = frame_decompress(&ct, Some(&dict));
|
|
258
841
|
assert_eq!(pt, msg);
|
|
259
842
|
}
|
|
260
843
|
|
|
261
844
|
#[test]
|
|
262
|
-
fn
|
|
263
|
-
let
|
|
264
|
-
let
|
|
265
|
-
|
|
845
|
+
fn frame_dict_id_in_header() {
|
|
846
|
+
let dict = b"common prefix AAA ".repeat(4);
|
|
847
|
+
let id: u32 = 0xAAAA_AAAA;
|
|
266
848
|
let msg = b"common prefix AAA : the payload";
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
let
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
let mut
|
|
275
|
-
|
|
849
|
+
|
|
850
|
+
let ct = frame_compress(msg, Some((&dict, id)));
|
|
851
|
+
|
|
852
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
853
|
+
unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
854
|
+
|
|
855
|
+
let mut frame_info = zero_frame_info();
|
|
856
|
+
let mut src_size = ct.len();
|
|
857
|
+
let ret = unsafe {
|
|
858
|
+
lz4_sys::LZ4F_getFrameInfo(ctx, &mut frame_info, ct.as_ptr(), &mut src_size)
|
|
859
|
+
};
|
|
860
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
861
|
+
|
|
862
|
+
assert_eq!(unsafe { LZ4F_isError(ret) }, 0, "LZ4F_getFrameInfo failed");
|
|
863
|
+
assert_eq!(frame_info.dict_id, id, "dict_id not written into frame header");
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
#[test]
|
|
867
|
+
fn frame_truncated_fails() {
|
|
868
|
+
let data = b"some data that should compress nicely ".repeat(10);
|
|
869
|
+
let ct = frame_compress(&data, None);
|
|
870
|
+
let truncated = &ct[..ct.len() / 2];
|
|
871
|
+
|
|
872
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
873
|
+
unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
874
|
+
let result = frame_decompress_loop(ctx, truncated, None);
|
|
875
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
876
|
+
|
|
877
|
+
assert!(result.is_err(), "truncated frame should return an error");
|
|
276
878
|
}
|
|
277
879
|
}
|