rlz4 0.2.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +24 -12
- data/README.md +82 -57
- data/ext/rlz4/Cargo.toml +2 -2
- data/ext/rlz4/src/lib.rs +767 -159
- data/lib/rlz4/block_codec.rb +35 -0
- data/lib/rlz4/dictionary.rb +27 -0
- data/lib/rlz4/frame_codec.rb +34 -0
- data/lib/rlz4/version.rb +1 -1
- data/lib/rlz4.rb +4 -17
- metadata +9 -9
- data/tmp/x86_64-linux/stage/Cargo.toml +0 -9
- data/tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml +0 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2c9d2822432c3875768ab9ca26a5c95b148f5be3d08e1687e49c131dd3d304a3
|
|
4
|
+
data.tar.gz: 15726fc411ec89b05dda6c4b41af0ee9a99553d30c45ed0b9a7dab9a76b758da
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 91ab2e6a86d5e7b68be6055848752e871c0baf3d0a1eabf10cbb2624960f6ad6a73d669b3264a3dc1631082efdf02e1f441c303331447b23f0f412a091943142
|
|
7
|
+
data.tar.gz: 727ee0cd74c5974094f7ccb25257254cd1ee38605aa89aab513fc133bd429e09e9b7c6c09e59f91b6a4161bded800d4d18324973f397a3e50567805af999cac2
|
data/Cargo.lock
CHANGED
|
@@ -35,6 +35,16 @@ version = "2.11.0"
|
|
|
35
35
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
36
|
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
|
37
37
|
|
|
38
|
+
[[package]]
|
|
39
|
+
name = "cc"
|
|
40
|
+
version = "1.2.61"
|
|
41
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
42
|
+
checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d"
|
|
43
|
+
dependencies = [
|
|
44
|
+
"find-msvc-tools",
|
|
45
|
+
"shlex",
|
|
46
|
+
]
|
|
47
|
+
|
|
38
48
|
[[package]]
|
|
39
49
|
name = "cexpr"
|
|
40
50
|
version = "0.6.0"
|
|
@@ -67,6 +77,12 @@ version = "1.15.0"
|
|
|
67
77
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
68
78
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
69
79
|
|
|
80
|
+
[[package]]
|
|
81
|
+
name = "find-msvc-tools"
|
|
82
|
+
version = "0.1.9"
|
|
83
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
84
|
+
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
|
85
|
+
|
|
70
86
|
[[package]]
|
|
71
87
|
name = "glob"
|
|
72
88
|
version = "0.3.3"
|
|
@@ -105,11 +121,13 @@ dependencies = [
|
|
|
105
121
|
]
|
|
106
122
|
|
|
107
123
|
[[package]]
|
|
108
|
-
name = "
|
|
109
|
-
version = "
|
|
110
|
-
source = "
|
|
124
|
+
name = "lz4-sys"
|
|
125
|
+
version = "1.11.1+lz4-1.10.0"
|
|
126
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
127
|
+
checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
|
|
111
128
|
dependencies = [
|
|
112
|
-
"
|
|
129
|
+
"cc",
|
|
130
|
+
"libc",
|
|
113
131
|
]
|
|
114
132
|
|
|
115
133
|
[[package]]
|
|
@@ -236,9 +254,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
|
|
236
254
|
|
|
237
255
|
[[package]]
|
|
238
256
|
name = "rlz4"
|
|
239
|
-
version = "0.
|
|
257
|
+
version = "0.4.0"
|
|
240
258
|
dependencies = [
|
|
241
|
-
"
|
|
259
|
+
"lz4-sys",
|
|
242
260
|
"magnus",
|
|
243
261
|
"rb-sys",
|
|
244
262
|
]
|
|
@@ -278,12 +296,6 @@ dependencies = [
|
|
|
278
296
|
"unicode-ident",
|
|
279
297
|
]
|
|
280
298
|
|
|
281
|
-
[[package]]
|
|
282
|
-
name = "twox-hash"
|
|
283
|
-
version = "2.1.2"
|
|
284
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
285
|
-
checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
|
|
286
|
-
|
|
287
299
|
[[package]]
|
|
288
300
|
name = "unicode-ident"
|
|
289
301
|
version = "1.0.24"
|
data/README.md
CHANGED
|
@@ -27,91 +27,110 @@ gem "rlz4"
|
|
|
27
27
|
|
|
28
28
|
Building requires a Rust toolchain (stable).
|
|
29
29
|
|
|
30
|
-
##
|
|
30
|
+
## API
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
Three classes plus one utility module function:
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
| | Purpose | Wire format |
|
|
35
|
+
|---|---|---|
|
|
36
|
+
| `RLZ4::Dictionary` | Value type: dict bytes + 4-byte id | — |
|
|
37
|
+
| `RLZ4::FrameCodec` | Optionally dict-bound frame codec | LZ4 frame (`04 22 4D 18`), interoperable with `lz4` CLI |
|
|
38
|
+
| `RLZ4::BlockCodec` | Optionally dict-bound block codec, reusable scratch | Raw LZ4 block, no framing |
|
|
39
|
+
| `RLZ4.compress_bound(n)` | Worst-case output size for input size `n` | — |
|
|
36
40
|
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
Invalid input on decompress raises `RLZ4::DecompressError`
|
|
42
|
+
(a `StandardError` subclass).
|
|
39
43
|
|
|
40
|
-
|
|
41
|
-
# interoperable with any other LZ4 frame implementation.
|
|
42
|
-
```
|
|
44
|
+
## RLZ4::Dictionary
|
|
43
45
|
|
|
44
|
-
|
|
46
|
+
Pure value type — just the dict bytes plus a 4-byte id. Built on
|
|
47
|
+
`Data.define`, so it's immutable, has value equality, and is
|
|
48
|
+
shareable across `Ractor`s. The id defaults to `sha256(bytes)[0, 4]`
|
|
49
|
+
interpreted little-endian (the derivation LZ4 frame `FLG.DictID`
|
|
50
|
+
uses); override with `id:` if you need a coordinated value.
|
|
45
51
|
|
|
46
52
|
```ruby
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
dict = RLZ4::Dictionary.new(bytes: "schema=v1 type=message field1=")
|
|
54
|
+
dict.bytes # => "schema=v1..." frozen binary
|
|
55
|
+
dict.id # => u32
|
|
56
|
+
dict.size # => 30
|
|
57
|
+
|
|
58
|
+
# With a caller-supplied id (e.g. from an out-of-band protocol):
|
|
59
|
+
custom = RLZ4::Dictionary.new(bytes: raw, id: 0xDEAD_BEEF)
|
|
52
60
|
```
|
|
53
61
|
|
|
54
|
-
|
|
62
|
+
## RLZ4::FrameCodec — frame-format LZ4
|
|
63
|
+
|
|
64
|
+
Emits a real LZ4 frame (magic `04 22 4D 18`), interoperable with the
|
|
65
|
+
`lz4` CLI. With a dictionary, sets `FLG.DictID` and writes `Dict_ID`
|
|
66
|
+
into the FrameDescriptor — a receiver routing by id can pick the
|
|
67
|
+
right dict from a set purely by parsing the frame header.
|
|
55
68
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
compression ratio. `RLZ4::Dictionary#compress` emits a **real LZ4 frame**
|
|
59
|
-
(magic `04 22 4D 18`) with the `FLG.DictID` bit set and the dictionary's
|
|
60
|
-
`Dict_ID` written into the FrameDescriptor — interoperable with the
|
|
61
|
-
reference `lz4` CLI given the same dictionary file (`lz4 -d -D dict.bin`).
|
|
69
|
+
Stateless (no scratch), so `FrameCodec` instances are shareable
|
|
70
|
+
across `Ractor`s.
|
|
62
71
|
|
|
63
72
|
```ruby
|
|
64
|
-
|
|
73
|
+
codec = RLZ4::FrameCodec.new # no dict
|
|
74
|
+
codec = RLZ4::FrameCodec.new(dict: dict) # Dictionary value
|
|
75
|
+
codec = RLZ4::FrameCodec.new(dict: "raw bytes here") # String shortcut
|
|
65
76
|
|
|
66
|
-
|
|
67
|
-
|
|
77
|
+
ct = codec.compress("hello world" * 100)
|
|
78
|
+
pt = codec.decompress(ct)
|
|
68
79
|
|
|
69
|
-
|
|
70
|
-
|
|
80
|
+
codec.has_dict? # => true / false
|
|
81
|
+
codec.id # => u32 id when dict-bound, nil otherwise
|
|
82
|
+
codec.size # => dict size when dict-bound, 0 otherwise
|
|
71
83
|
```
|
|
72
84
|
|
|
73
|
-
|
|
74
|
-
|
|
85
|
+
Dict id mismatch on decompress raises `RLZ4::DecompressError`
|
|
86
|
+
before touching the payload — no silently corrupt output.
|
|
75
87
|
|
|
76
|
-
##
|
|
88
|
+
## RLZ4::BlockCodec — block-format LZ4
|
|
77
89
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
90
|
+
For hot paths that compress many small messages and want to amortise
|
|
91
|
+
allocation. Emits a raw LZ4 block — no frame header, no end-mark,
|
|
92
|
+
no checksum. Not interoperable with the reference `lz4` CLI; meant
|
|
93
|
+
for callers who carry their own framing (e.g. ZMTP transports).
|
|
82
94
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
multiple dictionaries can therefore route incoming frames to the
|
|
88
|
-
right one purely by parsing the frame header — no out-of-band id
|
|
89
|
-
channel needed.
|
|
95
|
+
Wraps a reusable 16 KiB scratch hash table. With a dictionary, also
|
|
96
|
+
carries a pristine dict-loaded table and restores it into the scratch
|
|
97
|
+
via a single 16 KiB `memcpy` before each compress call — so dict
|
|
98
|
+
initialisation is paid once at construction, not per call.
|
|
90
99
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
dict bytes — which is what `Dictionary.new` does — is the simplest
|
|
96
|
-
option.
|
|
100
|
+
```ruby
|
|
101
|
+
codec = RLZ4::BlockCodec.new # no dict
|
|
102
|
+
codec = RLZ4::BlockCodec.new(dict: dict) # Dictionary value
|
|
103
|
+
codec = RLZ4::BlockCodec.new(dict: "raw bytes here") # String shortcut
|
|
97
104
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
105
|
+
ct = codec.compress("hello world" * 100)
|
|
106
|
+
pt = codec.decompress(ct, decompressed_size: 1100)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
`#decompress` requires `decompressed_size:` because raw LZ4 blocks
|
|
110
|
+
carry no length prefix. The decoder refuses to write past that
|
|
111
|
+
value even on crafted malformed input — raises
|
|
112
|
+
`RLZ4::DecompressError` on any overrun.
|
|
113
|
+
|
|
114
|
+
Use `RLZ4.compress_bound(n)` to pre-size output buffers.
|
|
102
115
|
|
|
103
|
-
|
|
116
|
+
`BlockCodec` holds a `RefCell` internally and is **thread-local** —
|
|
117
|
+
do not cross `Ractor` boundaries. Allocate one per `Ractor`. The
|
|
118
|
+
block format has no on-wire `Dict_ID` field; a dict mismatch
|
|
119
|
+
produces garbage plaintext (not an error). Detect at a higher
|
|
120
|
+
layer (checksum, schema validation, etc.).
|
|
104
121
|
|
|
105
|
-
|
|
106
|
-
|
|
122
|
+
## Ractor safety
|
|
123
|
+
|
|
124
|
+
`Dictionary` and `FrameCodec` can be used from any `Ractor`. Example:
|
|
107
125
|
|
|
108
126
|
```ruby
|
|
109
127
|
ractors = 4.times.map do |i|
|
|
110
128
|
Ractor.new(i) do |idx|
|
|
111
|
-
|
|
129
|
+
codec = RLZ4::FrameCodec.new
|
|
130
|
+
pt = "ractor #{idx} payload " * 1000
|
|
112
131
|
1000.times do
|
|
113
|
-
ct =
|
|
114
|
-
raise "mismatch" unless
|
|
132
|
+
ct = codec.compress(pt)
|
|
133
|
+
raise "mismatch" unless codec.decompress(ct) == pt
|
|
115
134
|
end
|
|
116
135
|
:ok
|
|
117
136
|
end
|
|
@@ -119,11 +138,17 @@ end
|
|
|
119
138
|
ractors.map(&:value) # => [:ok, :ok, :ok, :ok]
|
|
120
139
|
```
|
|
121
140
|
|
|
141
|
+
`BlockCodec` must not cross `Ractor` boundaries — allocate one per
|
|
142
|
+
`Ractor`.
|
|
143
|
+
|
|
122
144
|
## Non-goals
|
|
123
145
|
|
|
124
146
|
- High-compression mode (LZ4_HC).
|
|
125
147
|
- Streaming / chunked compression.
|
|
126
148
|
- Preservation of string encoding on decompress (output is always binary).
|
|
149
|
+
- Dictionary training from a sample corpus. LZ4 has no equivalent of
|
|
150
|
+
Zstd's `ZDICT_trainFromBuffer`. Dictionaries are caller-supplied
|
|
151
|
+
raw bytes.
|
|
127
152
|
|
|
128
153
|
## License
|
|
129
154
|
|
data/ext/rlz4/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "rlz4"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
|
|
6
6
|
[lib]
|
|
@@ -8,7 +8,7 @@ name = "rlz4"
|
|
|
8
8
|
crate-type = ["cdylib", "rlib"]
|
|
9
9
|
|
|
10
10
|
[dependencies]
|
|
11
|
-
|
|
11
|
+
lz4-sys = "1.11"
|
|
12
12
|
magnus = "0.8"
|
|
13
13
|
rb-sys = "0.9"
|
|
14
14
|
|