rlz4 0.1.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +23 -12
- data/README.md +87 -33
- data/ext/rlz4/Cargo.toml +2 -2
- data/ext/rlz4/src/lib.rs +783 -124
- data/lib/rlz4/block_codec.rb +35 -0
- data/lib/rlz4/dictionary.rb +27 -0
- data/lib/rlz4/frame_codec.rb +34 -0
- data/lib/rlz4/version.rb +1 -1
- data/lib/rlz4.rb +4 -1
- metadata +9 -9
- data/tmp/x86_64-linux/stage/Cargo.toml +0 -9
- data/tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml +0 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2c9d2822432c3875768ab9ca26a5c95b148f5be3d08e1687e49c131dd3d304a3
|
|
4
|
+
data.tar.gz: 15726fc411ec89b05dda6c4b41af0ee9a99553d30c45ed0b9a7dab9a76b758da
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 91ab2e6a86d5e7b68be6055848752e871c0baf3d0a1eabf10cbb2624960f6ad6a73d669b3264a3dc1631082efdf02e1f441c303331447b23f0f412a091943142
|
|
7
|
+
data.tar.gz: 727ee0cd74c5974094f7ccb25257254cd1ee38605aa89aab513fc133bd429e09e9b7c6c09e59f91b6a4161bded800d4d18324973f397a3e50567805af999cac2
|
data/Cargo.lock
CHANGED
|
@@ -35,6 +35,16 @@ version = "2.11.0"
|
|
|
35
35
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
36
|
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
|
37
37
|
|
|
38
|
+
[[package]]
|
|
39
|
+
name = "cc"
|
|
40
|
+
version = "1.2.61"
|
|
41
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
42
|
+
checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d"
|
|
43
|
+
dependencies = [
|
|
44
|
+
"find-msvc-tools",
|
|
45
|
+
"shlex",
|
|
46
|
+
]
|
|
47
|
+
|
|
38
48
|
[[package]]
|
|
39
49
|
name = "cexpr"
|
|
40
50
|
version = "0.6.0"
|
|
@@ -67,6 +77,12 @@ version = "1.15.0"
|
|
|
67
77
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
68
78
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
69
79
|
|
|
80
|
+
[[package]]
|
|
81
|
+
name = "find-msvc-tools"
|
|
82
|
+
version = "0.1.9"
|
|
83
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
84
|
+
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
|
85
|
+
|
|
70
86
|
[[package]]
|
|
71
87
|
name = "glob"
|
|
72
88
|
version = "0.3.3"
|
|
@@ -105,12 +121,13 @@ dependencies = [
|
|
|
105
121
|
]
|
|
106
122
|
|
|
107
123
|
[[package]]
|
|
108
|
-
name = "
|
|
109
|
-
version = "
|
|
124
|
+
name = "lz4-sys"
|
|
125
|
+
version = "1.11.1+lz4-1.10.0"
|
|
110
126
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
111
|
-
checksum = "
|
|
127
|
+
checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
|
|
112
128
|
dependencies = [
|
|
113
|
-
"
|
|
129
|
+
"cc",
|
|
130
|
+
"libc",
|
|
114
131
|
]
|
|
115
132
|
|
|
116
133
|
[[package]]
|
|
@@ -237,9 +254,9 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
|
|
237
254
|
|
|
238
255
|
[[package]]
|
|
239
256
|
name = "rlz4"
|
|
240
|
-
version = "0.
|
|
257
|
+
version = "0.4.0"
|
|
241
258
|
dependencies = [
|
|
242
|
-
"
|
|
259
|
+
"lz4-sys",
|
|
243
260
|
"magnus",
|
|
244
261
|
"rb-sys",
|
|
245
262
|
]
|
|
@@ -279,12 +296,6 @@ dependencies = [
|
|
|
279
296
|
"unicode-ident",
|
|
280
297
|
]
|
|
281
298
|
|
|
282
|
-
[[package]]
|
|
283
|
-
name = "twox-hash"
|
|
284
|
-
version = "2.1.2"
|
|
285
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
286
|
-
checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
|
|
287
|
-
|
|
288
299
|
[[package]]
|
|
289
300
|
name = "unicode-ident"
|
|
290
301
|
version = "1.0.24"
|
data/README.md
CHANGED
|
@@ -27,62 +27,110 @@ gem "rlz4"
|
|
|
27
27
|
|
|
28
28
|
Building requires a Rust toolchain (stable).
|
|
29
29
|
|
|
30
|
-
##
|
|
30
|
+
## API
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
Three classes plus one utility module function:
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
| | Purpose | Wire format |
|
|
35
|
+
|---|---|---|
|
|
36
|
+
| `RLZ4::Dictionary` | Value type: dict bytes + 4-byte id | — |
|
|
37
|
+
| `RLZ4::FrameCodec` | Optionally dict-bound frame codec | LZ4 frame (`04 22 4D 18`), interoperable with `lz4` CLI |
|
|
38
|
+
| `RLZ4::BlockCodec` | Optionally dict-bound block codec, reusable scratch | Raw LZ4 block, no framing |
|
|
39
|
+
| `RLZ4.compress_bound(n)` | Worst-case output size for input size `n` | — |
|
|
36
40
|
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
Invalid input on decompress raises `RLZ4::DecompressError`
|
|
42
|
+
(a `StandardError` subclass).
|
|
39
43
|
|
|
40
|
-
|
|
41
|
-
# interoperable with any other LZ4 frame implementation.
|
|
42
|
-
```
|
|
44
|
+
## RLZ4::Dictionary
|
|
43
45
|
|
|
44
|
-
|
|
46
|
+
Pure value type — just the dict bytes plus a 4-byte id. Built on
|
|
47
|
+
`Data.define`, so it's immutable, has value equality, and is
|
|
48
|
+
shareable across `Ractor`s. The id defaults to `sha256(bytes)[0, 4]`
|
|
49
|
+
interpreted little-endian (the derivation LZ4 frame `FLG.DictID`
|
|
50
|
+
uses); override with `id:` if you need a coordinated value.
|
|
45
51
|
|
|
46
52
|
```ruby
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
dict = RLZ4::Dictionary.new(bytes: "schema=v1 type=message field1=")
|
|
54
|
+
dict.bytes # => "schema=v1..." frozen binary
|
|
55
|
+
dict.id # => u32
|
|
56
|
+
dict.size # => 30
|
|
57
|
+
|
|
58
|
+
# With a caller-supplied id (e.g. from an out-of-band protocol):
|
|
59
|
+
custom = RLZ4::Dictionary.new(bytes: raw, id: 0xDEAD_BEEF)
|
|
52
60
|
```
|
|
53
61
|
|
|
54
|
-
|
|
62
|
+
## RLZ4::FrameCodec — frame-format LZ4
|
|
55
63
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
64
|
+
Emits a real LZ4 frame (magic `04 22 4D 18`), interoperable with the
|
|
65
|
+
`lz4` CLI. With a dictionary, sets `FLG.DictID` and writes `Dict_ID`
|
|
66
|
+
into the FrameDescriptor — a receiver routing by id can pick the
|
|
67
|
+
right dict from a set purely by parsing the frame header.
|
|
68
|
+
|
|
69
|
+
Stateless (no scratch), so `FrameCodec` instances are shareable
|
|
70
|
+
across `Ractor`s.
|
|
61
71
|
|
|
62
72
|
```ruby
|
|
63
|
-
|
|
73
|
+
codec = RLZ4::FrameCodec.new # no dict
|
|
74
|
+
codec = RLZ4::FrameCodec.new(dict: dict) # Dictionary value
|
|
75
|
+
codec = RLZ4::FrameCodec.new(dict: "raw bytes here") # String shortcut
|
|
76
|
+
|
|
77
|
+
ct = codec.compress("hello world" * 100)
|
|
78
|
+
pt = codec.decompress(ct)
|
|
79
|
+
|
|
80
|
+
codec.has_dict? # => true / false
|
|
81
|
+
codec.id # => u32 id when dict-bound, nil otherwise
|
|
82
|
+
codec.size # => dict size when dict-bound, 0 otherwise
|
|
83
|
+
```
|
|
64
84
|
|
|
65
|
-
|
|
66
|
-
|
|
85
|
+
Dict id mismatch on decompress raises `RLZ4::DecompressError`
|
|
86
|
+
before touching the payload — no silently corrupt output.
|
|
67
87
|
|
|
68
|
-
|
|
88
|
+
## RLZ4::BlockCodec — block-format LZ4
|
|
89
|
+
|
|
90
|
+
For hot paths that compress many small messages and want to amortise
|
|
91
|
+
allocation. Emits a raw LZ4 block — no frame header, no end-mark,
|
|
92
|
+
no checksum. Not interoperable with the reference `lz4` CLI; meant
|
|
93
|
+
for callers who carry their own framing (e.g. ZMTP transports).
|
|
94
|
+
|
|
95
|
+
Wraps a reusable 16 KiB scratch hash table. With a dictionary, also
|
|
96
|
+
carries a pristine dict-loaded table and restores it into the scratch
|
|
97
|
+
via a single 16 KiB `memcpy` before each compress call — so dict
|
|
98
|
+
initialisation is paid once at construction, not per call.
|
|
99
|
+
|
|
100
|
+
```ruby
|
|
101
|
+
codec = RLZ4::BlockCodec.new # no dict
|
|
102
|
+
codec = RLZ4::BlockCodec.new(dict: dict) # Dictionary value
|
|
103
|
+
codec = RLZ4::BlockCodec.new(dict: "raw bytes here") # String shortcut
|
|
104
|
+
|
|
105
|
+
ct = codec.compress("hello world" * 100)
|
|
106
|
+
pt = codec.decompress(ct, decompressed_size: 1100)
|
|
69
107
|
```
|
|
70
108
|
|
|
71
|
-
`
|
|
72
|
-
|
|
109
|
+
`#decompress` requires `decompressed_size:` because raw LZ4 blocks
|
|
110
|
+
carry no length prefix. The decoder refuses to write past that
|
|
111
|
+
value even on crafted malformed input — raises
|
|
112
|
+
`RLZ4::DecompressError` on any overrun.
|
|
113
|
+
|
|
114
|
+
Use `RLZ4.compress_bound(n)` to pre-size output buffers.
|
|
73
115
|
|
|
74
|
-
|
|
116
|
+
`BlockCodec` holds a `RefCell` internally and is **thread-local** —
|
|
117
|
+
do not cross `Ractor` boundaries. Allocate one per `Ractor`. The
|
|
118
|
+
block format has no on-wire `Dict_ID` field; a dict mismatch
|
|
119
|
+
produces garbage plaintext (not an error). Detect at a higher
|
|
120
|
+
layer (checksum, schema validation, etc.).
|
|
75
121
|
|
|
76
|
-
|
|
77
|
-
|
|
122
|
+
## Ractor safety
|
|
123
|
+
|
|
124
|
+
`Dictionary` and `FrameCodec` can be used from any `Ractor`. Example:
|
|
78
125
|
|
|
79
126
|
```ruby
|
|
80
127
|
ractors = 4.times.map do |i|
|
|
81
128
|
Ractor.new(i) do |idx|
|
|
82
|
-
|
|
129
|
+
codec = RLZ4::FrameCodec.new
|
|
130
|
+
pt = "ractor #{idx} payload " * 1000
|
|
83
131
|
1000.times do
|
|
84
|
-
ct =
|
|
85
|
-
raise "mismatch" unless
|
|
132
|
+
ct = codec.compress(pt)
|
|
133
|
+
raise "mismatch" unless codec.decompress(ct) == pt
|
|
86
134
|
end
|
|
87
135
|
:ok
|
|
88
136
|
end
|
|
@@ -90,11 +138,17 @@ end
|
|
|
90
138
|
ractors.map(&:value) # => [:ok, :ok, :ok, :ok]
|
|
91
139
|
```
|
|
92
140
|
|
|
141
|
+
`BlockCodec` must not cross `Ractor` boundaries — allocate one per
|
|
142
|
+
`Ractor`.
|
|
143
|
+
|
|
93
144
|
## Non-goals
|
|
94
145
|
|
|
95
146
|
- High-compression mode (LZ4_HC).
|
|
96
147
|
- Streaming / chunked compression.
|
|
97
148
|
- Preservation of string encoding on decompress (output is always binary).
|
|
149
|
+
- Dictionary training from a sample corpus. LZ4 has no equivalent of
|
|
150
|
+
Zstd's `ZDICT_trainFromBuffer`. Dictionaries are caller-supplied
|
|
151
|
+
raw bytes.
|
|
98
152
|
|
|
99
153
|
## License
|
|
100
154
|
|
data/ext/rlz4/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "rlz4"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.4.0"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
|
|
6
6
|
[lib]
|
|
@@ -8,7 +8,7 @@ name = "rlz4"
|
|
|
8
8
|
crate-type = ["cdylib", "rlib"]
|
|
9
9
|
|
|
10
10
|
[dependencies]
|
|
11
|
-
|
|
11
|
+
lz4-sys = "1.11"
|
|
12
12
|
magnus = "0.8"
|
|
13
13
|
rb-sys = "0.9"
|
|
14
14
|
|