rlz4 0.1.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +23 -12
- data/README.md +87 -33
- data/ext/rlz4/Cargo.toml +2 -2
- data/ext/rlz4/src/lib.rs +783 -124
- data/lib/rlz4/block_codec.rb +35 -0
- data/lib/rlz4/dictionary.rb +27 -0
- data/lib/rlz4/frame_codec.rb +34 -0
- data/lib/rlz4/version.rb +1 -1
- data/lib/rlz4.rb +4 -1
- metadata +9 -9
- data/tmp/x86_64-linux/stage/Cargo.toml +0 -9
- data/tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml +0 -16
data/ext/rlz4/src/lib.rs
CHANGED
|
@@ -6,14 +6,74 @@ use magnus::{
|
|
|
6
6
|
value::Opaque,
|
|
7
7
|
Error, Ruby,
|
|
8
8
|
};
|
|
9
|
-
use std::
|
|
9
|
+
use std::cell::RefCell;
|
|
10
|
+
use std::ptr;
|
|
10
11
|
use std::sync::OnceLock;
|
|
11
12
|
|
|
12
|
-
use
|
|
13
|
+
use lz4_sys::{
|
|
14
|
+
LZ4F_VERSION,
|
|
15
|
+
// block compress/decompress
|
|
16
|
+
LZ4_compressBound, LZ4_compress_fast, LZ4_decompress_safe,
|
|
17
|
+
LZ4_createStream, LZ4_freeStream, LZ4StreamEncode,
|
|
18
|
+
// frame compress/decompress
|
|
19
|
+
LZ4F_compressBound, LZ4F_compressBegin, LZ4F_compressUpdate, LZ4F_compressEnd,
|
|
20
|
+
LZ4F_createCompressionContext, LZ4F_freeCompressionContext,
|
|
21
|
+
LZ4F_createDecompressionContext, LZ4F_freeDecompressionContext,
|
|
22
|
+
LZ4F_decompress, LZ4F_isError, LZ4F_getErrorName,
|
|
23
|
+
LZ4FCompressionContext, LZ4FDecompressionContext,
|
|
24
|
+
LZ4FPreferences, LZ4FFrameInfo, LZ4FDecompressOptions,
|
|
25
|
+
BlockSize, BlockMode, ContentChecksum, FrameType, BlockChecksum,
|
|
26
|
+
c_int,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
// sizeof(LZ4_stream_t): union { char minStateSize[(1<<LZ4_MEMORY_USAGE)+32]; ... }
|
|
30
|
+
// LZ4_MEMORY_USAGE defaults to 14, so this is (16384 + 32) = 16416 bytes.
|
|
31
|
+
const LZ4_STREAM_SIZE: usize = (1 << 14) + 32;
|
|
32
|
+
|
|
33
|
+
// Functions present in liblz4 1.10.0 but not yet exposed by the lz4-sys crate.
|
|
34
|
+
extern "C" {
|
|
35
|
+
fn LZ4_resetStream_fast(stream: *mut LZ4StreamEncode);
|
|
36
|
+
fn LZ4_loadDict(stream: *mut LZ4StreamEncode, dict: *const u8, dict_size: c_int) -> c_int;
|
|
37
|
+
fn LZ4_compress_fast_continue(
|
|
38
|
+
stream: *mut LZ4StreamEncode,
|
|
39
|
+
src: *const u8,
|
|
40
|
+
dst: *mut u8,
|
|
41
|
+
src_size: c_int,
|
|
42
|
+
dst_capacity: c_int,
|
|
43
|
+
acceleration: c_int,
|
|
44
|
+
) -> c_int;
|
|
45
|
+
fn LZ4_decompress_safe_usingDict(
|
|
46
|
+
src: *const u8,
|
|
47
|
+
dst: *mut u8,
|
|
48
|
+
src_size: c_int,
|
|
49
|
+
dst_capacity: c_int,
|
|
50
|
+
dict: *const u8,
|
|
51
|
+
dict_size: c_int,
|
|
52
|
+
) -> c_int;
|
|
53
|
+
// lz4 >= 1.9.4
|
|
54
|
+
fn LZ4F_compressBegin_usingDict(
|
|
55
|
+
ctx: LZ4FCompressionContext,
|
|
56
|
+
dst: *mut u8,
|
|
57
|
+
dst_capacity: usize,
|
|
58
|
+
dict: *const u8,
|
|
59
|
+
dict_size: usize,
|
|
60
|
+
prefs: *const LZ4FPreferences,
|
|
61
|
+
) -> usize;
|
|
62
|
+
// lz4 >= 1.9.4
|
|
63
|
+
fn LZ4F_decompress_usingDict(
|
|
64
|
+
ctx: LZ4FDecompressionContext,
|
|
65
|
+
dst: *mut u8,
|
|
66
|
+
dst_size_ptr: *mut usize,
|
|
67
|
+
src: *const u8,
|
|
68
|
+
src_size_ptr: *mut usize,
|
|
69
|
+
dict: *const u8,
|
|
70
|
+
dict_size: usize,
|
|
71
|
+
opts: *const LZ4FDecompressOptions,
|
|
72
|
+
) -> usize;
|
|
73
|
+
}
|
|
13
74
|
|
|
14
75
|
const LZ4_FRAME_MAGIC: [u8; 4] = [0x04, 0x22, 0x4d, 0x18];
|
|
15
76
|
|
|
16
|
-
// Opaque<T> is Send+Sync and is designed for storing Ruby values in statics.
|
|
17
77
|
static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
18
78
|
|
|
19
79
|
fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
@@ -24,120 +84,524 @@ fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
|
24
84
|
)
|
|
25
85
|
}
|
|
26
86
|
|
|
27
|
-
// ---------- module
|
|
87
|
+
// ---------- module function: compress_bound ----------
|
|
28
88
|
|
|
29
|
-
fn
|
|
30
|
-
|
|
31
|
-
|
|
89
|
+
fn rlz4_compress_bound(_ruby: &Ruby, size: usize) -> Result<usize, Error> {
|
|
90
|
+
Ok(unsafe { LZ4_compressBound(size as c_int) } as usize)
|
|
91
|
+
}
|
|
32
92
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
93
|
+
// ---------- module function: block_stream_size ----------
|
|
94
|
+
//
|
|
95
|
+
// Returns sizeof(LZ4_stream_t). Exposed so the Ruby test suite can compute
|
|
96
|
+
// the expected #size of a dict-mode BlockCodec without hardcoding the constant.
|
|
97
|
+
|
|
98
|
+
fn rlz4_block_stream_size(_ruby: &Ruby) -> usize {
|
|
99
|
+
LZ4_STREAM_SIZE
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ---------- BlockCodec ----------
|
|
103
|
+
//
|
|
104
|
+
// No-dict codec: uses LZ4_compress_fast (stateless; stack-allocated hash
|
|
105
|
+
// table inside the C function). Ruby object owns no extra heap. #size = 0.
|
|
106
|
+
//
|
|
107
|
+
// Dict codec: allocates one LZ4StreamEncode via LZ4_createStream. Before
|
|
108
|
+
// each compress call, LZ4_resetStream_fast + LZ4_loadDict restore the
|
|
109
|
+
// dict-loaded state. #size = LZ4_STREAM_SIZE + dict.len().
|
|
110
|
+
//
|
|
111
|
+
// Decompression is always stateless per-block. Both compress and decompress
|
|
112
|
+
// live on the same class so callers hold one object per worker.
|
|
113
|
+
//
|
|
114
|
+
// Thread-local by construction (RefCell, not Send+Sync). A BlockCodec must
|
|
115
|
+
// not cross Ractor boundaries — send a new one instead.
|
|
116
|
+
|
|
117
|
+
struct EncodeStream(*mut LZ4StreamEncode);
|
|
118
|
+
|
|
119
|
+
// SAFETY: *mut LZ4StreamEncode is !Send by default. We guarantee exclusive
|
|
120
|
+
// access via RefCell (one borrow at a time, single-threaded Ruby GIL).
|
|
121
|
+
unsafe impl Send for EncodeStream {}
|
|
122
|
+
|
|
123
|
+
impl Drop for EncodeStream {
|
|
124
|
+
fn drop(&mut self) {
|
|
125
|
+
if !self.0.is_null() {
|
|
126
|
+
unsafe { LZ4_freeStream(self.0) };
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
#[magnus::wrap(class = "RLZ4::BlockCodec", free_immediately, size)]
|
|
132
|
+
struct BlockCodec {
|
|
133
|
+
stream: Option<RefCell<EncodeStream>>, // Some only when dict is set
|
|
134
|
+
dict: Option<Vec<u8>>,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
fn block_codec_new(_ruby: &Ruby, rb_dict: Option<RString>) -> Result<BlockCodec, Error> {
|
|
138
|
+
match rb_dict {
|
|
139
|
+
None => Ok(BlockCodec { stream: None, dict: None }),
|
|
140
|
+
Some(rb_dict) => {
|
|
141
|
+
// SAFETY: copy dict bytes before any Ruby allocation.
|
|
142
|
+
let bytes: Vec<u8> = unsafe { rb_dict.as_slice().to_vec() };
|
|
143
|
+
|
|
144
|
+
let raw = unsafe { LZ4_createStream() };
|
|
145
|
+
if raw.is_null() {
|
|
146
|
+
return Err(Error::new(
|
|
147
|
+
_ruby.exception_runtime_error(),
|
|
148
|
+
"LZ4_createStream allocation failed",
|
|
149
|
+
));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Pre-load the dict so the pristine-state cost is paid once here,
|
|
153
|
+
// not on the first compress call.
|
|
154
|
+
unsafe { LZ4_loadDict(raw, bytes.as_ptr(), bytes.len() as c_int) };
|
|
155
|
+
|
|
156
|
+
Ok(BlockCodec {
|
|
157
|
+
stream: Some(RefCell::new(EncodeStream(raw))),
|
|
158
|
+
dict: Some(bytes),
|
|
159
|
+
})
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
49
163
|
|
|
50
|
-
|
|
164
|
+
fn block_codec_size(rb_self: &BlockCodec) -> usize {
|
|
165
|
+
let stream_size = match &rb_self.stream {
|
|
166
|
+
Some(_) => LZ4_STREAM_SIZE,
|
|
167
|
+
None => 0,
|
|
168
|
+
};
|
|
169
|
+
stream_size + rb_self.dict.as_ref().map_or(0, |d| d.len())
|
|
51
170
|
}
|
|
52
171
|
|
|
53
|
-
fn
|
|
54
|
-
|
|
55
|
-
|
|
172
|
+
fn block_codec_has_dict(rb_self: &BlockCodec) -> bool {
|
|
173
|
+
rb_self.dict.is_some()
|
|
174
|
+
}
|
|
56
175
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
176
|
+
fn block_codec_compress(
|
|
177
|
+
ruby: &Ruby,
|
|
178
|
+
rb_self: &BlockCodec,
|
|
179
|
+
rb_input: RString,
|
|
180
|
+
) -> Result<RString, Error> {
|
|
181
|
+
// SAFETY: rb_input is stack-pinned; the C compression functions perform no
|
|
182
|
+
// Ruby callbacks or GC-triggering allocations while the input slice is
|
|
183
|
+
// live. str_from_slice happens after.
|
|
184
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
185
|
+
|
|
186
|
+
let upper = unsafe { LZ4_compressBound(input.len() as c_int) as usize };
|
|
187
|
+
let mut out = vec![0u8; upper];
|
|
188
|
+
|
|
189
|
+
let compressed_len: c_int = match (&rb_self.stream, &rb_self.dict) {
|
|
190
|
+
(None, None) => unsafe {
|
|
191
|
+
LZ4_compress_fast(
|
|
192
|
+
input.as_ptr() as *const _,
|
|
193
|
+
out.as_mut_ptr() as *mut _,
|
|
194
|
+
input.len() as c_int,
|
|
195
|
+
upper as c_int,
|
|
196
|
+
1,
|
|
197
|
+
)
|
|
198
|
+
},
|
|
199
|
+
(Some(stream_cell), Some(dict)) => {
|
|
200
|
+
let stream = stream_cell.borrow_mut();
|
|
201
|
+
unsafe {
|
|
202
|
+
// Restore stream to the dict-loaded state before each call.
|
|
203
|
+
LZ4_resetStream_fast(stream.0);
|
|
204
|
+
LZ4_loadDict(stream.0, dict.as_ptr(), dict.len() as c_int);
|
|
205
|
+
LZ4_compress_fast_continue(
|
|
206
|
+
stream.0,
|
|
207
|
+
input.as_ptr(),
|
|
208
|
+
out.as_mut_ptr(),
|
|
209
|
+
input.len() as c_int,
|
|
210
|
+
upper as c_int,
|
|
211
|
+
1,
|
|
212
|
+
)
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
_ => unreachable!("stream and dict are always both Some or both None"),
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
if compressed_len <= 0 {
|
|
61
219
|
return Err(Error::new(
|
|
62
|
-
|
|
63
|
-
"lz4
|
|
220
|
+
ruby.exception_runtime_error(),
|
|
221
|
+
"lz4 block compress failed",
|
|
64
222
|
));
|
|
65
223
|
}
|
|
66
224
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
225
|
+
out.truncate(compressed_len as usize);
|
|
226
|
+
Ok(ruby.str_from_slice(&out))
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
fn block_codec_decompress(
|
|
230
|
+
ruby: &Ruby,
|
|
231
|
+
rb_self: &BlockCodec,
|
|
232
|
+
rb_input: RString,
|
|
233
|
+
decompressed_size: usize,
|
|
234
|
+
) -> Result<RString, Error> {
|
|
235
|
+
// SAFETY: same as compress. Decoder is pure C, no Ruby callbacks.
|
|
236
|
+
let compressed: &[u8] = unsafe { rb_input.as_slice() };
|
|
237
|
+
|
|
238
|
+
let mut out = vec![0u8; decompressed_size];
|
|
239
|
+
|
|
240
|
+
let actual_len: c_int = match &rb_self.dict {
|
|
241
|
+
None => unsafe {
|
|
242
|
+
LZ4_decompress_safe(
|
|
243
|
+
compressed.as_ptr() as *const _,
|
|
244
|
+
out.as_mut_ptr() as *mut _,
|
|
245
|
+
compressed.len() as c_int,
|
|
246
|
+
decompressed_size as c_int,
|
|
247
|
+
)
|
|
248
|
+
},
|
|
249
|
+
Some(dict) => unsafe {
|
|
250
|
+
LZ4_decompress_safe_usingDict(
|
|
251
|
+
compressed.as_ptr(),
|
|
252
|
+
out.as_mut_ptr(),
|
|
253
|
+
compressed.len() as c_int,
|
|
254
|
+
decompressed_size as c_int,
|
|
255
|
+
dict.as_ptr(),
|
|
256
|
+
dict.len() as c_int,
|
|
257
|
+
)
|
|
258
|
+
},
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
if actual_len < 0 {
|
|
262
|
+
return Err(Error::new(
|
|
73
263
|
decompress_error(ruby),
|
|
74
|
-
|
|
75
|
-
)
|
|
76
|
-
}
|
|
264
|
+
"lz4 block decode failed",
|
|
265
|
+
));
|
|
266
|
+
}
|
|
77
267
|
|
|
268
|
+
out.truncate(actual_len as usize);
|
|
78
269
|
Ok(ruby.str_from_slice(&out))
|
|
79
270
|
}
|
|
80
271
|
|
|
81
|
-
// ----------
|
|
272
|
+
// ---------- FrameCodec ----------
|
|
82
273
|
//
|
|
83
|
-
//
|
|
84
|
-
//
|
|
85
|
-
//
|
|
86
|
-
// fit anyway: lower per-message overhead and direct dictionary support.
|
|
274
|
+
// One-shot compress/decompress using the LZ4F frame API. Contexts are
|
|
275
|
+
// created and freed per operation so FrameCodec holds no mutable state
|
|
276
|
+
// and is shareable across Ractors.
|
|
87
277
|
//
|
|
88
|
-
//
|
|
89
|
-
// as
|
|
90
|
-
|
|
91
|
-
|
|
278
|
+
// Block mode: Linked. In Linked mode LZ4F_compressBegin_usingDict loads
|
|
279
|
+
// the dict as initial stream history before the first block, so the block
|
|
280
|
+
// compressor can back-reference into dict bytes. Independent mode would
|
|
281
|
+
// discard the raw dict bytes before each block (a known liblz4 limitation
|
|
282
|
+
// with the _usingDict raw-bytes API; _usingCDict avoids it but changes the
|
|
283
|
+
// dict-id derivation).
|
|
284
|
+
|
|
285
|
+
#[magnus::wrap(class = "RLZ4::FrameCodec", free_immediately, size)]
|
|
286
|
+
struct FrameCodec {
|
|
287
|
+
dict: Option<DictBound>,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
struct DictBound {
|
|
92
291
|
bytes: Vec<u8>,
|
|
292
|
+
id: u32,
|
|
93
293
|
}
|
|
94
294
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
295
|
+
unsafe impl Send for FrameCodec {}
|
|
296
|
+
unsafe impl Sync for FrameCodec {}
|
|
297
|
+
|
|
298
|
+
fn frame_codec_initialize(
|
|
299
|
+
_ruby: &Ruby,
|
|
300
|
+
rb_dict: Option<RString>,
|
|
301
|
+
id: u32,
|
|
302
|
+
) -> Result<FrameCodec, Error> {
|
|
303
|
+
let dict = rb_dict.map(|s| {
|
|
304
|
+
// SAFETY: copy dict bytes before any Ruby allocation.
|
|
305
|
+
let bytes: Vec<u8> = unsafe { s.as_slice().to_vec() };
|
|
306
|
+
s.freeze();
|
|
307
|
+
DictBound { bytes, id }
|
|
308
|
+
});
|
|
309
|
+
Ok(FrameCodec { dict })
|
|
310
|
+
}
|
|
99
311
|
|
|
100
|
-
fn
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
312
|
+
fn lz4f_error(code: usize) -> String {
|
|
313
|
+
let name = unsafe { LZ4F_getErrorName(code) };
|
|
314
|
+
if name.is_null() {
|
|
315
|
+
return format!("lz4f error {code}");
|
|
316
|
+
}
|
|
317
|
+
unsafe { std::ffi::CStr::from_ptr(name) }
|
|
318
|
+
.to_string_lossy()
|
|
319
|
+
.into_owned()
|
|
105
320
|
}
|
|
106
321
|
|
|
107
|
-
fn
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
322
|
+
fn default_prefs(dict_id: u32) -> LZ4FPreferences {
|
|
323
|
+
LZ4FPreferences {
|
|
324
|
+
frame_info: LZ4FFrameInfo {
|
|
325
|
+
block_size_id: BlockSize::Default,
|
|
326
|
+
// Linked mode: the dict is treated as the initial stream history,
|
|
327
|
+
// so the first (and often only) block can back-reference into it.
|
|
328
|
+
// Independent mode + raw-bytes dict would reset the hash table
|
|
329
|
+
// before each block, discarding the dict (liblz4 limitation).
|
|
330
|
+
block_mode: BlockMode::Linked,
|
|
331
|
+
content_checksum_flag: ContentChecksum::NoChecksum,
|
|
332
|
+
frame_type: FrameType::Frame,
|
|
333
|
+
content_size: 0,
|
|
334
|
+
dict_id,
|
|
335
|
+
block_checksum_flag: BlockChecksum::NoBlockChecksum,
|
|
336
|
+
},
|
|
337
|
+
compression_level: 0,
|
|
338
|
+
auto_flush: 0,
|
|
339
|
+
favor_dec_speed: 0,
|
|
340
|
+
reserved: [0; 3],
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
fn zero_frame_info() -> LZ4FFrameInfo {
|
|
345
|
+
LZ4FFrameInfo {
|
|
346
|
+
block_size_id: BlockSize::Default,
|
|
347
|
+
block_mode: BlockMode::Independent,
|
|
348
|
+
content_checksum_flag: ContentChecksum::NoChecksum,
|
|
349
|
+
frame_type: FrameType::Frame,
|
|
350
|
+
content_size: 0,
|
|
351
|
+
dict_id: 0,
|
|
352
|
+
block_checksum_flag: BlockChecksum::NoBlockChecksum,
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
fn create_dctx(ruby: &Ruby) -> Result<LZ4FDecompressionContext, Error> {
|
|
357
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
358
|
+
let err = unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
359
|
+
if unsafe { LZ4F_isError(err) } != 0 {
|
|
360
|
+
return Err(Error::new(
|
|
361
|
+
ruby.exception_runtime_error(),
|
|
362
|
+
format!("LZ4F_createDecompressionContext: {}", lz4f_error(err)),
|
|
363
|
+
));
|
|
364
|
+
}
|
|
365
|
+
Ok(ctx)
|
|
111
366
|
}
|
|
112
367
|
|
|
113
|
-
fn
|
|
368
|
+
fn frame_codec_compress(
|
|
114
369
|
ruby: &Ruby,
|
|
115
|
-
rb_self: &
|
|
370
|
+
rb_self: &FrameCodec,
|
|
116
371
|
rb_input: RString,
|
|
117
372
|
) -> Result<RString, Error> {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
373
|
+
// SAFETY: rb_input is stack-pinned; all LZ4F calls are pure C with no
|
|
374
|
+
// Ruby callbacks. str_from_slice happens after input is no longer live.
|
|
375
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
376
|
+
|
|
377
|
+
let prefs = default_prefs(rb_self.dict.as_ref().map_or(0, |d| d.id));
|
|
378
|
+
let data_bound = unsafe { LZ4F_compressBound(input.len(), &prefs) };
|
|
379
|
+
let capacity = data_bound + 64;
|
|
380
|
+
let mut out = vec![0u8; capacity];
|
|
381
|
+
let mut pos: usize = 0;
|
|
382
|
+
|
|
383
|
+
let mut ctx = LZ4FCompressionContext(ptr::null_mut());
|
|
384
|
+
let err = unsafe { LZ4F_createCompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
385
|
+
if unsafe { LZ4F_isError(err) } != 0 {
|
|
386
|
+
return Err(Error::new(
|
|
387
|
+
ruby.exception_runtime_error(),
|
|
388
|
+
format!("LZ4F_createCompressionContext: {}", lz4f_error(err)),
|
|
389
|
+
));
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
let result = (|| -> Result<usize, String> {
|
|
393
|
+
let n = match &rb_self.dict {
|
|
394
|
+
None => unsafe {
|
|
395
|
+
LZ4F_compressBegin(ctx, out.as_mut_ptr().add(pos), capacity - pos, &prefs)
|
|
396
|
+
},
|
|
397
|
+
Some(d) => unsafe {
|
|
398
|
+
LZ4F_compressBegin_usingDict(
|
|
399
|
+
ctx,
|
|
400
|
+
out.as_mut_ptr().add(pos),
|
|
401
|
+
capacity - pos,
|
|
402
|
+
d.bytes.as_ptr(),
|
|
403
|
+
d.bytes.len(),
|
|
404
|
+
&prefs,
|
|
405
|
+
)
|
|
406
|
+
},
|
|
407
|
+
};
|
|
408
|
+
if unsafe { LZ4F_isError(n) } != 0 {
|
|
409
|
+
return Err(format!("LZ4F_compressBegin: {}", lz4f_error(n)));
|
|
410
|
+
}
|
|
411
|
+
pos += n;
|
|
412
|
+
|
|
413
|
+
let n = unsafe {
|
|
414
|
+
LZ4F_compressUpdate(
|
|
415
|
+
ctx,
|
|
416
|
+
out.as_mut_ptr().add(pos),
|
|
417
|
+
capacity - pos,
|
|
418
|
+
input.as_ptr(),
|
|
419
|
+
input.len(),
|
|
420
|
+
ptr::null(),
|
|
124
421
|
)
|
|
125
|
-
}
|
|
126
|
-
|
|
422
|
+
};
|
|
423
|
+
if unsafe { LZ4F_isError(n) } != 0 {
|
|
424
|
+
return Err(format!("LZ4F_compressUpdate: {}", lz4f_error(n)));
|
|
425
|
+
}
|
|
426
|
+
pos += n;
|
|
427
|
+
|
|
428
|
+
let n = unsafe {
|
|
429
|
+
LZ4F_compressEnd(
|
|
430
|
+
ctx,
|
|
431
|
+
out.as_mut_ptr().add(pos),
|
|
432
|
+
capacity - pos,
|
|
433
|
+
ptr::null(),
|
|
434
|
+
)
|
|
435
|
+
};
|
|
436
|
+
if unsafe { LZ4F_isError(n) } != 0 {
|
|
437
|
+
return Err(format!("LZ4F_compressEnd: {}", lz4f_error(n)));
|
|
438
|
+
}
|
|
439
|
+
pos += n;
|
|
440
|
+
Ok(pos)
|
|
441
|
+
})();
|
|
442
|
+
|
|
443
|
+
unsafe { LZ4F_freeCompressionContext(ctx) };
|
|
444
|
+
|
|
445
|
+
match result {
|
|
446
|
+
Err(msg) => Err(Error::new(ruby.exception_runtime_error(), msg)),
|
|
447
|
+
Ok(written) => {
|
|
448
|
+
out.truncate(written);
|
|
449
|
+
Ok(ruby.str_from_slice(&out))
|
|
450
|
+
}
|
|
451
|
+
}
|
|
127
452
|
}
|
|
128
453
|
|
|
129
|
-
fn
|
|
130
|
-
|
|
454
|
+
fn frame_codec_decompress(
|
|
455
|
+
ruby: &Ruby,
|
|
456
|
+
rb_self: &FrameCodec,
|
|
457
|
+
rb_input: RString,
|
|
458
|
+
) -> Result<RString, Error> {
|
|
459
|
+
// SAFETY: rb_input is stack-pinned; LZ4F calls are pure C.
|
|
460
|
+
let compressed: &[u8] = unsafe { rb_input.as_slice() };
|
|
461
|
+
|
|
462
|
+
if compressed.len() < 4 || compressed[..4] != LZ4_FRAME_MAGIC {
|
|
463
|
+
return Err(Error::new(
|
|
464
|
+
decompress_error(ruby),
|
|
465
|
+
"lz4 frame decode failed: bad magic (input is not an LZ4 frame)",
|
|
466
|
+
));
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// When we have a dict, use a temporary context to parse the frame header
|
|
470
|
+
// (LZ4F_getFrameInfo advances the context's stage past dstage_init), then
|
|
471
|
+
// use a fresh context for the actual decompress so LZ4F_decompress_usingDict
|
|
472
|
+
// sees dstage_init and correctly installs the dict.
|
|
473
|
+
if let Some(d) = &rb_self.dict {
|
|
474
|
+
let temp_ctx = match create_dctx(ruby) {
|
|
475
|
+
Ok(c) => c,
|
|
476
|
+
Err(e) => return Err(e),
|
|
477
|
+
};
|
|
478
|
+
let mut frame_info = zero_frame_info();
|
|
479
|
+
let mut dummy = compressed.len();
|
|
480
|
+
let ret = unsafe {
|
|
481
|
+
lz4_sys::LZ4F_getFrameInfo(temp_ctx, &mut frame_info, compressed.as_ptr(), &mut dummy)
|
|
482
|
+
};
|
|
483
|
+
unsafe { LZ4F_freeDecompressionContext(temp_ctx) };
|
|
484
|
+
|
|
485
|
+
if unsafe { LZ4F_isError(ret) } != 0 {
|
|
486
|
+
return Err(Error::new(
|
|
487
|
+
decompress_error(ruby),
|
|
488
|
+
format!("lz4 frame header error: {}", lz4f_error(ret)),
|
|
489
|
+
));
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if frame_info.dict_id != 0 && frame_info.dict_id != d.id {
|
|
493
|
+
return Err(Error::new(
|
|
494
|
+
decompress_error(ruby),
|
|
495
|
+
format!(
|
|
496
|
+
"lz4 frame dict_id mismatch: frame={:#010x} codec={:#010x}",
|
|
497
|
+
frame_info.dict_id, d.id
|
|
498
|
+
),
|
|
499
|
+
));
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// Fresh context for actual decompression.
|
|
504
|
+
let ctx = match create_dctx(ruby) {
|
|
505
|
+
Ok(c) => c,
|
|
506
|
+
Err(e) => return Err(e),
|
|
507
|
+
};
|
|
508
|
+
|
|
509
|
+
// Pass the full compressed buffer (including header) to the loop.
|
|
510
|
+
// LZ4F_decompress_usingDict sets the dict before parsing the header
|
|
511
|
+
// (dstage_init check), so the dict is available for the first block.
|
|
512
|
+
let result = frame_decompress_loop(ctx, compressed, rb_self.dict.as_ref());
|
|
513
|
+
|
|
514
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
515
|
+
|
|
516
|
+
match result {
|
|
517
|
+
Err(msg) => Err(Error::new(decompress_error(ruby), msg)),
|
|
518
|
+
Ok(out) => Ok(ruby.str_from_slice(&out)),
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
fn frame_decompress_loop(
|
|
523
|
+
ctx: LZ4FDecompressionContext,
|
|
524
|
+
compressed: &[u8],
|
|
525
|
+
dict: Option<&DictBound>,
|
|
526
|
+
) -> Result<Vec<u8>, String> {
|
|
527
|
+
let mut out = Vec::new();
|
|
528
|
+
let mut src_pos = 0usize;
|
|
529
|
+
let mut chunk = vec![0u8; 65536];
|
|
530
|
+
let mut complete = false;
|
|
531
|
+
|
|
532
|
+
loop {
|
|
533
|
+
let remaining = compressed.len() - src_pos;
|
|
534
|
+
if remaining == 0 {
|
|
535
|
+
break;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
let mut dst_written = chunk.len();
|
|
539
|
+
let mut src_consumed = remaining;
|
|
540
|
+
|
|
541
|
+
let ret = match dict {
|
|
542
|
+
None => unsafe {
|
|
543
|
+
LZ4F_decompress(
|
|
544
|
+
ctx,
|
|
545
|
+
chunk.as_mut_ptr(),
|
|
546
|
+
&mut dst_written,
|
|
547
|
+
compressed.as_ptr().add(src_pos),
|
|
548
|
+
&mut src_consumed,
|
|
549
|
+
ptr::null(),
|
|
550
|
+
)
|
|
551
|
+
},
|
|
552
|
+
Some(d) => unsafe {
|
|
553
|
+
LZ4F_decompress_usingDict(
|
|
554
|
+
ctx,
|
|
555
|
+
chunk.as_mut_ptr(),
|
|
556
|
+
&mut dst_written as *mut usize,
|
|
557
|
+
compressed.as_ptr().add(src_pos),
|
|
558
|
+
&mut src_consumed as *mut usize,
|
|
559
|
+
d.bytes.as_ptr(),
|
|
560
|
+
d.bytes.len(),
|
|
561
|
+
ptr::null(),
|
|
562
|
+
)
|
|
563
|
+
},
|
|
564
|
+
};
|
|
565
|
+
|
|
566
|
+
src_pos += src_consumed;
|
|
567
|
+
out.extend_from_slice(&chunk[..dst_written]);
|
|
568
|
+
|
|
569
|
+
if unsafe { LZ4F_isError(ret) } != 0 {
|
|
570
|
+
return Err(format!("lz4 frame decode failed: {}", lz4f_error(ret)));
|
|
571
|
+
}
|
|
572
|
+
if ret == 0 {
|
|
573
|
+
complete = true;
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
// Guard against a degenerate case where the C library makes no progress.
|
|
577
|
+
if src_consumed == 0 && dst_written == 0 {
|
|
578
|
+
break;
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
if !complete {
|
|
583
|
+
return Err("lz4 frame decode failed: truncated or incomplete frame".to_string());
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
Ok(out)
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
fn frame_codec_size(rb_self: &FrameCodec) -> usize {
|
|
590
|
+
rb_self.dict.as_ref().map_or(0, |d| d.bytes.len())
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
fn frame_codec_has_dict(rb_self: &FrameCodec) -> bool {
|
|
594
|
+
rb_self.dict.is_some()
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
fn frame_codec_id(rb_self: &FrameCodec) -> Option<u32> {
|
|
598
|
+
rb_self.dict.as_ref().map(|d| d.id)
|
|
131
599
|
}
|
|
132
600
|
|
|
133
601
|
// ---------- module init ----------
|
|
134
602
|
|
|
135
603
|
#[magnus::init]
|
|
136
604
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
137
|
-
// Mark this extension as Ractor-safe. All our Rust code uses only
|
|
138
|
-
// stack/owned data, holds no globals aside from the Opaque exception
|
|
139
|
-
// class (which is Send+Sync by construction), and the Dictionary type
|
|
140
|
-
// is read-only after init, so it is safe to call from any Ractor.
|
|
141
605
|
unsafe { rb_sys::rb_ext_ractor_safe(true) };
|
|
142
606
|
|
|
143
607
|
let module = ruby.define_module("RLZ4")?;
|
|
@@ -148,14 +612,23 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
148
612
|
.set(Opaque::from(decompress_error_class))
|
|
149
613
|
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
150
614
|
|
|
151
|
-
module.define_module_function("
|
|
152
|
-
module.define_module_function("
|
|
615
|
+
module.define_module_function("compress_bound", function!(rlz4_compress_bound, 1))?;
|
|
616
|
+
module.define_module_function("block_stream_size", function!(rlz4_block_stream_size, 0))?;
|
|
153
617
|
|
|
154
|
-
let
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
618
|
+
let codec_class = module.define_class("BlockCodec", ruby.class_object())?;
|
|
619
|
+
codec_class.define_singleton_method("_native_new", function!(block_codec_new, 1))?;
|
|
620
|
+
codec_class.define_method("size", method!(block_codec_size, 0))?;
|
|
621
|
+
codec_class.define_method("has_dict?", method!(block_codec_has_dict, 0))?;
|
|
622
|
+
codec_class.define_method("compress", method!(block_codec_compress, 1))?;
|
|
623
|
+
codec_class.define_method("_decompress", method!(block_codec_decompress, 2))?;
|
|
624
|
+
|
|
625
|
+
let frame_codec_class = module.define_class("FrameCodec", ruby.class_object())?;
|
|
626
|
+
frame_codec_class.define_singleton_method("_native_new", function!(frame_codec_initialize, 2))?;
|
|
627
|
+
frame_codec_class.define_method("compress", method!(frame_codec_compress, 1))?;
|
|
628
|
+
frame_codec_class.define_method("decompress", method!(frame_codec_decompress, 1))?;
|
|
629
|
+
frame_codec_class.define_method("size", method!(frame_codec_size, 0))?;
|
|
630
|
+
frame_codec_class.define_method("has_dict?", method!(frame_codec_has_dict, 0))?;
|
|
631
|
+
frame_codec_class.define_method("id", method!(frame_codec_id, 0))?;
|
|
159
632
|
|
|
160
633
|
Ok(())
|
|
161
634
|
}
|
|
@@ -164,63 +637,249 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
164
637
|
mod tests {
|
|
165
638
|
use super::*;
|
|
166
639
|
|
|
640
|
+
fn lz4_block_compress(input: &[u8]) -> Vec<u8> {
|
|
641
|
+
let upper = unsafe { LZ4_compressBound(input.len() as c_int) as usize };
|
|
642
|
+
let mut out = vec![0u8; upper];
|
|
643
|
+
let n = unsafe {
|
|
644
|
+
LZ4_compress_fast(
|
|
645
|
+
input.as_ptr() as *const _,
|
|
646
|
+
out.as_mut_ptr() as *mut _,
|
|
647
|
+
input.len() as c_int,
|
|
648
|
+
upper as c_int,
|
|
649
|
+
1,
|
|
650
|
+
)
|
|
651
|
+
};
|
|
652
|
+
assert!(n > 0);
|
|
653
|
+
out.truncate(n as usize);
|
|
654
|
+
out
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
fn lz4_block_decompress(compressed: &[u8], original_len: usize) -> Vec<u8> {
|
|
658
|
+
let mut out = vec![0u8; original_len];
|
|
659
|
+
let n = unsafe {
|
|
660
|
+
LZ4_decompress_safe(
|
|
661
|
+
compressed.as_ptr() as *const _,
|
|
662
|
+
out.as_mut_ptr() as *mut _,
|
|
663
|
+
compressed.len() as c_int,
|
|
664
|
+
original_len as c_int,
|
|
665
|
+
)
|
|
666
|
+
};
|
|
667
|
+
assert!(n >= 0);
|
|
668
|
+
out.truncate(n as usize);
|
|
669
|
+
out
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
fn lz4_block_compress_dict(input: &[u8], dict: &[u8]) -> Vec<u8> {
|
|
673
|
+
let upper = unsafe { LZ4_compressBound(input.len() as c_int) as usize };
|
|
674
|
+
let mut out = vec![0u8; upper];
|
|
675
|
+
let stream = unsafe { LZ4_createStream() };
|
|
676
|
+
assert!(!stream.is_null());
|
|
677
|
+
unsafe { LZ4_loadDict(stream, dict.as_ptr(), dict.len() as c_int) };
|
|
678
|
+
let n = unsafe {
|
|
679
|
+
LZ4_compress_fast_continue(
|
|
680
|
+
stream,
|
|
681
|
+
input.as_ptr(),
|
|
682
|
+
out.as_mut_ptr(),
|
|
683
|
+
input.len() as c_int,
|
|
684
|
+
upper as c_int,
|
|
685
|
+
1,
|
|
686
|
+
)
|
|
687
|
+
};
|
|
688
|
+
unsafe { LZ4_freeStream(stream) };
|
|
689
|
+
assert!(n > 0);
|
|
690
|
+
out.truncate(n as usize);
|
|
691
|
+
out
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
fn lz4_block_decompress_dict(compressed: &[u8], original_len: usize, dict: &[u8]) -> Vec<u8> {
|
|
695
|
+
let mut out = vec![0u8; original_len];
|
|
696
|
+
let n = unsafe {
|
|
697
|
+
LZ4_decompress_safe_usingDict(
|
|
698
|
+
compressed.as_ptr(),
|
|
699
|
+
out.as_mut_ptr(),
|
|
700
|
+
compressed.len() as c_int,
|
|
701
|
+
original_len as c_int,
|
|
702
|
+
dict.as_ptr(),
|
|
703
|
+
dict.len() as c_int,
|
|
704
|
+
)
|
|
705
|
+
};
|
|
706
|
+
assert!(n >= 0);
|
|
707
|
+
out.truncate(n as usize);
|
|
708
|
+
out
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
#[test]
|
|
712
|
+
fn block_round_trip() {
|
|
713
|
+
let data = b"hello hello hello hello".to_vec();
|
|
714
|
+
let ct = lz4_block_compress(&data);
|
|
715
|
+
let pt = lz4_block_decompress(&ct, data.len());
|
|
716
|
+
assert_eq!(pt, data);
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
#[test]
|
|
720
|
+
fn block_reuse_across_many_calls() {
|
|
721
|
+
for i in 0..100 {
|
|
722
|
+
let msg = format!("payload number {i} ").repeat(10).into_bytes();
|
|
723
|
+
let ct = lz4_block_compress(&msg);
|
|
724
|
+
let pt = lz4_block_decompress(&ct, msg.len());
|
|
725
|
+
assert_eq!(pt, msg);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
#[test]
|
|
730
|
+
fn block_dict_round_trip() {
|
|
731
|
+
let dict = b"common log prefix: ".to_vec();
|
|
732
|
+
let msg = b"common log prefix: event=login user=alice".to_vec();
|
|
733
|
+
|
|
734
|
+
let ct_dict = lz4_block_compress_dict(&msg, &dict);
|
|
735
|
+
let pt = lz4_block_decompress_dict(&ct_dict, msg.len(), &dict);
|
|
736
|
+
assert_eq!(pt, msg);
|
|
737
|
+
|
|
738
|
+
let ct_plain = lz4_block_compress(&msg);
|
|
739
|
+
assert!(
|
|
740
|
+
ct_dict.len() < ct_plain.len(),
|
|
741
|
+
"dict compression should beat no-dict on shared-prefix input"
|
|
742
|
+
);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
fn frame_compress(input: &[u8], dict: Option<(&[u8], u32)>) -> Vec<u8> {
|
|
746
|
+
let prefs = default_prefs(dict.map_or(0, |(_, id)| id));
|
|
747
|
+
let data_bound = unsafe { LZ4F_compressBound(input.len(), &prefs) };
|
|
748
|
+
let capacity = data_bound + 64;
|
|
749
|
+
let mut out = vec![0u8; capacity];
|
|
750
|
+
let mut pos = 0usize;
|
|
751
|
+
|
|
752
|
+
let mut ctx = LZ4FCompressionContext(ptr::null_mut());
|
|
753
|
+
let err = unsafe { LZ4F_createCompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
754
|
+
assert_eq!(unsafe { LZ4F_isError(err) }, 0);
|
|
755
|
+
|
|
756
|
+
let n = match dict {
|
|
757
|
+
None => unsafe {
|
|
758
|
+
LZ4F_compressBegin(ctx, out.as_mut_ptr().add(pos), capacity - pos, &prefs)
|
|
759
|
+
},
|
|
760
|
+
Some((d, _)) => unsafe {
|
|
761
|
+
LZ4F_compressBegin_usingDict(
|
|
762
|
+
ctx,
|
|
763
|
+
out.as_mut_ptr().add(pos),
|
|
764
|
+
capacity - pos,
|
|
765
|
+
d.as_ptr(),
|
|
766
|
+
d.len(),
|
|
767
|
+
&prefs,
|
|
768
|
+
)
|
|
769
|
+
},
|
|
770
|
+
};
|
|
771
|
+
assert_eq!(unsafe { LZ4F_isError(n) }, 0);
|
|
772
|
+
pos += n;
|
|
773
|
+
|
|
774
|
+
let n = unsafe {
|
|
775
|
+
LZ4F_compressUpdate(
|
|
776
|
+
ctx,
|
|
777
|
+
out.as_mut_ptr().add(pos),
|
|
778
|
+
capacity - pos,
|
|
779
|
+
input.as_ptr(),
|
|
780
|
+
input.len(),
|
|
781
|
+
ptr::null(),
|
|
782
|
+
)
|
|
783
|
+
};
|
|
784
|
+
assert_eq!(unsafe { LZ4F_isError(n) }, 0);
|
|
785
|
+
pos += n;
|
|
786
|
+
|
|
787
|
+
let n = unsafe {
|
|
788
|
+
LZ4F_compressEnd(ctx, out.as_mut_ptr().add(pos), capacity - pos, ptr::null())
|
|
789
|
+
};
|
|
790
|
+
assert_eq!(unsafe { LZ4F_isError(n) }, 0);
|
|
791
|
+
pos += n;
|
|
792
|
+
|
|
793
|
+
unsafe { LZ4F_freeCompressionContext(ctx) };
|
|
794
|
+
out.truncate(pos);
|
|
795
|
+
out
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
fn frame_decompress(compressed: &[u8], dict: Option<&[u8]>) -> Vec<u8> {
|
|
799
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
800
|
+
let err = unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
801
|
+
assert_eq!(unsafe { LZ4F_isError(err) }, 0);
|
|
802
|
+
|
|
803
|
+
let d = dict.map(|b| DictBound { bytes: b.to_vec(), id: 0 });
|
|
804
|
+
let out = frame_decompress_loop(ctx, compressed, d.as_ref()).unwrap();
|
|
805
|
+
|
|
806
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
807
|
+
out
|
|
808
|
+
}
|
|
809
|
+
|
|
167
810
|
#[test]
|
|
168
811
|
fn frame_round_trip() {
|
|
169
812
|
let data = b"the quick brown fox jumps over the lazy dog ".repeat(100);
|
|
170
|
-
let
|
|
171
|
-
enc.write_all(&data).unwrap();
|
|
172
|
-
let ct = enc.finish().unwrap();
|
|
813
|
+
let ct = frame_compress(&data, None);
|
|
173
814
|
assert!(ct.len() < data.len(), "should compress repetitive input");
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
let mut dec = FrameDecoder::new(&ct[..]);
|
|
178
|
-
let mut out = Vec::new();
|
|
179
|
-
dec.read_to_end(&mut out).unwrap();
|
|
180
|
-
assert_eq!(out, data);
|
|
815
|
+
assert_eq!(&ct[..4], &LZ4_FRAME_MAGIC);
|
|
816
|
+
let pt = frame_decompress(&ct, None);
|
|
817
|
+
assert_eq!(pt, data);
|
|
181
818
|
}
|
|
182
819
|
|
|
183
820
|
#[test]
|
|
184
821
|
fn frame_empty_round_trip() {
|
|
185
|
-
let
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
let mut dec = FrameDecoder::new(&ct[..]);
|
|
189
|
-
let mut out = Vec::new();
|
|
190
|
-
dec.read_to_end(&mut out).unwrap();
|
|
191
|
-
assert!(out.is_empty());
|
|
822
|
+
let ct = frame_compress(b"", None);
|
|
823
|
+
let pt = frame_decompress(&ct, None);
|
|
824
|
+
assert!(pt.is_empty());
|
|
192
825
|
}
|
|
193
826
|
|
|
194
827
|
#[test]
|
|
195
828
|
fn frame_garbage_fails() {
|
|
196
|
-
// A buffer that is long enough to look like a frame but has the
|
|
197
|
-
// wrong magic number must fail to decode.
|
|
198
829
|
let garbage = vec![0xFFu8; 32];
|
|
199
|
-
let mut
|
|
200
|
-
|
|
201
|
-
|
|
830
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
831
|
+
unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
832
|
+
let result = frame_decompress_loop(ctx, &garbage, None);
|
|
833
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
834
|
+
assert!(result.is_err());
|
|
202
835
|
}
|
|
203
836
|
|
|
204
837
|
#[test]
|
|
205
|
-
fn
|
|
206
|
-
let dict = b"JSON schema version 1 field ";
|
|
207
|
-
let
|
|
208
|
-
let
|
|
209
|
-
|
|
838
|
+
fn frame_dict_round_trip() {
|
|
839
|
+
let dict = b"JSON schema version 1 field ".repeat(4);
|
|
840
|
+
let id: u32 = 0xDEAD_BEEF;
|
|
841
|
+
let msg = b"JSON schema version 1 field name=hello value=world".to_vec();
|
|
842
|
+
|
|
843
|
+
let ct = frame_compress(&msg, Some((&dict, id)));
|
|
844
|
+
assert_eq!(&ct[..4], &LZ4_FRAME_MAGIC);
|
|
845
|
+
|
|
846
|
+
let pt = frame_decompress(&ct, Some(&dict));
|
|
210
847
|
assert_eq!(pt, msg);
|
|
211
848
|
}
|
|
212
849
|
|
|
213
850
|
#[test]
|
|
214
|
-
fn
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
let dict_a = b"common prefix AAA ";
|
|
218
|
-
let dict_b = b"common prefix BBB ";
|
|
851
|
+
fn frame_dict_id_in_header() {
|
|
852
|
+
let dict = b"common prefix AAA ".repeat(4);
|
|
853
|
+
let id: u32 = 0xAAAA_AAAA;
|
|
219
854
|
let msg = b"common prefix AAA : the payload";
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
}
|
|
855
|
+
|
|
856
|
+
let ct = frame_compress(msg, Some((&dict, id)));
|
|
857
|
+
|
|
858
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
859
|
+
unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
860
|
+
|
|
861
|
+
let mut frame_info = zero_frame_info();
|
|
862
|
+
let mut src_size = ct.len();
|
|
863
|
+
let ret = unsafe {
|
|
864
|
+
lz4_sys::LZ4F_getFrameInfo(ctx, &mut frame_info, ct.as_ptr(), &mut src_size)
|
|
865
|
+
};
|
|
866
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
867
|
+
|
|
868
|
+
assert_eq!(unsafe { LZ4F_isError(ret) }, 0, "LZ4F_getFrameInfo failed");
|
|
869
|
+
assert_eq!(frame_info.dict_id, id, "dict_id not written into frame header");
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
#[test]
|
|
873
|
+
fn frame_truncated_fails() {
|
|
874
|
+
let data = b"some data that should compress nicely ".repeat(10);
|
|
875
|
+
let ct = frame_compress(&data, None);
|
|
876
|
+
let truncated = &ct[..ct.len() / 2];
|
|
877
|
+
|
|
878
|
+
let mut ctx = LZ4FDecompressionContext(ptr::null_mut());
|
|
879
|
+
unsafe { LZ4F_createDecompressionContext(&mut ctx, LZ4F_VERSION) };
|
|
880
|
+
let result = frame_decompress_loop(ctx, truncated, None);
|
|
881
|
+
unsafe { LZ4F_freeDecompressionContext(ctx) };
|
|
882
|
+
|
|
883
|
+
assert!(result.is_err(), "truncated frame should return an error");
|
|
225
884
|
}
|
|
226
885
|
}
|