rzstd 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/rzstd/src/lib.rs +5 -3
- data/lib/rzstd/version.rb +1 -1
- data/lib/rzstd.rb +38 -12
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4d839dd8c43cda81d339bbcb624508952c77468320439d429699d75b3f441537
|
|
4
|
+
data.tar.gz: e57e1a1f7d8f782e1243fccca978833e51fe829b8dbdf4d7d652f6b64106ae14
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 21c2663f86669e9d6bbfccb6620786aebdc8e7e793cf4e96e3c6ce863324c1065f85a1d2f4ab68be77231d09536e490f2467bba0b70635cf41f6fcbc114fdc25
|
|
7
|
+
data.tar.gz: 54b0e0f46779782a089706618fbad3a0af26ee72205c9e7c835a90a25c184034a62db669e6a2ca0caa0b308e6a77fda1d4a5f81b399e8678a8b37a89c0c401b8
|
data/ext/rzstd/src/lib.rs
CHANGED
|
@@ -257,9 +257,11 @@ fn dict_initialize(ruby: &Ruby, rb_dict: RString, id: u32, level: i32) -> Result
|
|
|
257
257
|
})?;
|
|
258
258
|
// Enable the content checksum so decoding with the wrong dictionary
|
|
259
259
|
// (or wrong bytes generally) fails fast at the trailing XXH64 check
|
|
260
|
-
// instead of silently returning garbage.
|
|
261
|
-
//
|
|
262
|
-
//
|
|
260
|
+
// instead of silently returning garbage. For ZDICT-format dicts
|
|
261
|
+
// the frame header additionally carries the dict's own dict_id
|
|
262
|
+
// (via `ZSTD_c_dictIDFlag`, enabled by default), which lets peers
|
|
263
|
+
// dispatch by id; raw-content dicts carry a frame `dictID` of 0
|
|
264
|
+
// and rely solely on the checksum to detect mismatch.
|
|
263
265
|
cctx.set_parameter(CParameter::ContentSizeFlag(true))
|
|
264
266
|
.map_err(|code| {
|
|
265
267
|
Error::new(
|
data/lib/rzstd/version.rb
CHANGED
data/lib/rzstd.rb
CHANGED
|
@@ -8,6 +8,7 @@ require_relative "rzstd/version"
|
|
|
8
8
|
module RZstd
|
|
9
9
|
DEFAULT_LEVEL = 3
|
|
10
10
|
|
|
11
|
+
|
|
11
12
|
# Wrap the native `_native_compress(bytes, level)` so callers get a
|
|
12
13
|
# `level:` kwarg with a sensible default. Defined as a real method
|
|
13
14
|
# (not a Proc-bound singleton method) so it remains Ractor-shareable.
|
|
@@ -15,6 +16,7 @@ module RZstd
|
|
|
15
16
|
_native_compress(bytes, Integer(level))
|
|
16
17
|
end
|
|
17
18
|
|
|
19
|
+
|
|
18
20
|
# Bounded single-shot decompression. When `max_output_size:` is given,
|
|
19
21
|
# the Rust extension reads the frame's Frame_Content_Size header, raises
|
|
20
22
|
# MissingContentSizeError if absent, and raises OutputSizeLimitError if
|
|
@@ -25,20 +27,38 @@ module RZstd
|
|
|
25
27
|
_native_decompress(bytes, Integer(max_output_size || 0))
|
|
26
28
|
end
|
|
27
29
|
|
|
30
|
+
|
|
28
31
|
class Dictionary
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#
|
|
33
|
-
#
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
#
|
|
39
|
-
#
|
|
32
|
+
ZDICT_MAGIC = "\x37\xA4\x30\xEC".b.freeze
|
|
33
|
+
|
|
34
|
+
# Public Dict_ID range per the Zstandard spec. IDs `0..32_767` are
|
|
35
|
+
# reserved for a future registrar, and `>= 2**31` is reserved. Only
|
|
36
|
+
# `32_768..(2**31 - 1)` is available for private/auto-generated dicts.
|
|
37
|
+
USER_DICT_ID_MIN = 32_768
|
|
38
|
+
USER_DICT_ID_MAX = (2**31) - 1
|
|
39
|
+
USER_DICT_ID_SIZE = USER_DICT_ID_MAX - USER_DICT_ID_MIN + 1
|
|
40
|
+
|
|
41
|
+
# Public constructor. Resolves the Zstd `Dict_ID`:
|
|
42
|
+
#
|
|
43
|
+
# - If `bytes` begins with the ZDICT magic (`0x EC30A437` LE), the
|
|
44
|
+
# id is read from bytes `[4..7]` of the dictionary header. This is
|
|
45
|
+
# the same id zstd writes into every compressed frame header via
|
|
46
|
+
# `ZSTD_c_dictIDFlag` (enabled by default), so on-wire frames and
|
|
47
|
+
# `Dictionary#id` agree.
|
|
48
|
+
# - Otherwise the dict is raw content: zstd writes a frame `dictID`
|
|
49
|
+
# of 0, and this wrapper falls back to `sha256(bytes)[0..3]` LE
|
|
50
|
+
# mapped into the public range `32_768..(2**31 - 1)`, purely as
|
|
51
|
+
# an out-of-band identifier for the Ruby side. Wrong-dict decoding
|
|
52
|
+
# of raw dicts is caught by the content checksum the encoder
|
|
53
|
+
# enables.
|
|
40
54
|
def self.new(bytes, level: DEFAULT_LEVEL)
|
|
41
|
-
id =
|
|
55
|
+
id = if bytes.byteslice(0, 4) == ZDICT_MAGIC
|
|
56
|
+
bytes.byteslice(4, 4).unpack1("V")
|
|
57
|
+
else
|
|
58
|
+
raw = Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V")
|
|
59
|
+
USER_DICT_ID_MIN + (raw % USER_DICT_ID_SIZE)
|
|
60
|
+
end
|
|
61
|
+
|
|
42
62
|
_native_new(bytes, id, Integer(level))
|
|
43
63
|
end
|
|
44
64
|
|
|
@@ -59,5 +79,11 @@ module RZstd
|
|
|
59
79
|
samples.each { |s| buffer << s.b }
|
|
60
80
|
_native_train(buffer, sizes, Integer(capacity))
|
|
61
81
|
end
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def decompress(bytes, max_output_size: nil)
|
|
85
|
+
_native_decompress(bytes, Integer(max_output_size || 0))
|
|
86
|
+
end
|
|
87
|
+
|
|
62
88
|
end
|
|
63
89
|
end
|