omq-lz4 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +0 -1
- data/lib/omq/lz4/codec.rb +79 -5
- data/lib/omq/lz4/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4217a939f757a07b0e3e2ad1a905d11ea3cd017e54baa2f059a75f304386bdab
|
|
4
|
+
data.tar.gz: 1d0f4e4076e0cf02291b3f714fe20c721b8e05e0d4d17af4fbc8662e83897319
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a963f09065ef7a019a8b766a00093e5ef84ba9489fed66d3524da5e8a0d8d956d27cd01534926ac1cb3f5c5088439da9e0db5c5f71c0b7b58b7b90db8ccfc8bf
|
|
7
|
+
data.tar.gz: 86e7b5fe26c080a13a6b5c5c44b0e0a250bc45d650b2a9f76f4a80330209d6e24374b8a6412dd28118cb971619f56d3bede7a5ed937c91c5a80fd520ab17b5ac
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.0 (2026-05-11)
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- **LZ4M multi-block encoding/decoding** (RFC §5.3a, §5.4a, §5.5 rule 4).
|
|
8
|
+
Parts larger than `LZ4M_BLOCK_SIZE` (1 GiB) are split into independently
|
|
9
|
+
decodable blocks, each compressed against the installed dict (if any).
|
|
10
|
+
`encode_part` / `decode_part` accept a `block_size:` keyword for testing
|
|
11
|
+
with smaller-than-protocol block sizes.
|
|
12
|
+
- `LZ4M_SENTINEL` (`"LZ4M"`) and `LZ4M_BLOCK_SIZE` (1,073,741,824) constants
|
|
13
|
+
in `OMQ::LZ4::Codec`.
|
|
14
|
+
- LZ4B `decompressed_size` cap: the decoder now rejects single-block parts
|
|
15
|
+
whose declared `decompressed_size` exceeds `LZ4M_BLOCK_SIZE` (RFC §5.5
|
|
16
|
+
rule 3).
|
|
17
|
+
- Codec tests for LZ4M round-trips (with and without dict, partial last
|
|
18
|
+
block, random bytes), malformed LZ4M inputs (truncated, leftover bytes,
|
|
19
|
+
corrupt block data, budget overrun), and the LZ4B block size limit.
|
|
20
|
+
|
|
3
21
|
## 0.2.0 (2026-05-04)
|
|
4
22
|
|
|
5
23
|
### Changed
|
data/README.md
CHANGED
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
[](LICENSE)
|
|
5
5
|
[](https://www.ruby-lang.org)
|
|
6
6
|
|
|
7
|
-
> **Status:** 0.1.0 — first landable release. See
|
|
8
7
|
> [RFC.md](RFC.md) for the wire-format spec and
|
|
9
8
|
> [CHANGELOG.md](CHANGELOG.md) for what's in.
|
|
10
9
|
|
data/lib/omq/lz4/codec.rb
CHANGED
|
@@ -14,18 +14,22 @@ module OMQ
|
|
|
14
14
|
# Each wire part begins with a 4-byte sentinel:
|
|
15
15
|
#
|
|
16
16
|
# 00 00 00 00 uncompressed plaintext
|
|
17
|
-
# 4C 5A 34 42 LZ4-compressed block ("LZ4B" in ASCII)
|
|
17
|
+
# 4C 5A 34 42 LZ4-compressed single block ("LZ4B" in ASCII)
|
|
18
|
+
# 4C 5A 34 4D LZ4-compressed multi-block ("LZ4M" in ASCII)
|
|
18
19
|
# 4C 5A 34 44 dictionary shipment ("LZ4D" in ASCII)
|
|
19
20
|
#
|
|
20
|
-
# `decode_part` handles UNCOMPRESSED and
|
|
21
|
+
# `decode_part` handles UNCOMPRESSED, LZ4B, and LZ4M. Dictionary
|
|
21
22
|
# shipments are a transport-layer concern: the transport peeks the
|
|
22
23
|
# first 4 bytes of each incoming wire part, routes LZ4D to
|
|
23
24
|
# `decode_dict_shipment`, and never hands a shipment to `decode_part`.
|
|
24
25
|
module Codec
|
|
25
26
|
UNCOMPRESSED_SENTINEL = "\x00\x00\x00\x00".b.freeze
|
|
26
27
|
LZ4B_SENTINEL = "LZ4B".b.freeze
|
|
28
|
+
LZ4M_SENTINEL = "LZ4M".b.freeze
|
|
27
29
|
LZ4D_SENTINEL = "LZ4D".b.freeze
|
|
28
30
|
|
|
31
|
+
LZ4M_BLOCK_SIZE = 1_073_741_824
|
|
32
|
+
|
|
29
33
|
# Size thresholds below which compression isn't worth attempting.
|
|
30
34
|
# Empirically tuned on Lorem-ipsum-like input via
|
|
31
35
|
# bench/min_compress_size_sweep.rb: for block-format LZ4 the
|
|
@@ -65,10 +69,11 @@ module OMQ
|
|
|
65
69
|
# `min_size` overrides the default threshold. Nil (the default)
|
|
66
70
|
# picks `MIN_COMPRESS_NO_DICT` for a no-dict codec and
|
|
67
71
|
# `MIN_COMPRESS_WITH_DICT` for a dict codec.
|
|
68
|
-
def encode_part(plaintext, block_codec:, min_size: nil)
|
|
72
|
+
def encode_part(plaintext, block_codec:, min_size: nil, block_size: LZ4M_BLOCK_SIZE)
|
|
69
73
|
min_size ||= block_codec.has_dict? ? MIN_COMPRESS_WITH_DICT : MIN_COMPRESS_NO_DICT
|
|
70
74
|
|
|
71
75
|
return encode_passthrough(plaintext) if plaintext.bytesize < min_size
|
|
76
|
+
return encode_multi_block(plaintext, block_codec, block_size) if plaintext.bytesize > block_size
|
|
72
77
|
|
|
73
78
|
compressed = block_codec.compress(plaintext)
|
|
74
79
|
|
|
@@ -91,7 +96,7 @@ module OMQ
|
|
|
91
96
|
#
|
|
92
97
|
# Does not handle LZ4D shipments; transport must route those to
|
|
93
98
|
# `decode_dict_shipment` before calling here.
|
|
94
|
-
def decode_part(wire_bytes, block_codec:, max_size: nil)
|
|
99
|
+
def decode_part(wire_bytes, block_codec:, max_size: nil, block_size: LZ4M_BLOCK_SIZE)
|
|
95
100
|
if wire_bytes.bytesize < 4
|
|
96
101
|
raise ProtocolError, "wire part too short (< 4 bytes)"
|
|
97
102
|
end
|
|
@@ -107,6 +112,10 @@ module OMQ
|
|
|
107
112
|
raise ProtocolError, "LZ4B part too short (< 12 bytes, no room for size field)"
|
|
108
113
|
end
|
|
109
114
|
decompressed_size = wire_bytes.byteslice(4, 8).unpack1("Q<")
|
|
115
|
+
if decompressed_size > block_size
|
|
116
|
+
raise ProtocolError,
|
|
117
|
+
"LZ4B decompressed_size #{decompressed_size} exceeds block size limit #{block_size}"
|
|
118
|
+
end
|
|
110
119
|
check_size!(decompressed_size, max_size)
|
|
111
120
|
block = wire_bytes.byteslice(12, wire_bytes.bytesize - 12)
|
|
112
121
|
begin
|
|
@@ -114,8 +123,9 @@ module OMQ
|
|
|
114
123
|
rescue RLZ4::DecompressError => e
|
|
115
124
|
raise ProtocolError, "LZ4B decode failed: #{e.message}"
|
|
116
125
|
end
|
|
126
|
+
when LZ4M_SENTINEL
|
|
127
|
+
decode_multi_block(wire_bytes, block_codec, max_size, block_size)
|
|
117
128
|
when LZ4D_SENTINEL
|
|
118
|
-
# Should not reach decode_part; transport should have routed this.
|
|
119
129
|
raise ProtocolError,
|
|
120
130
|
"LZ4D dictionary shipment seen at decode_part (transport should route to decode_dict_shipment)"
|
|
121
131
|
else
|
|
@@ -154,6 +164,70 @@ module OMQ
|
|
|
154
164
|
class << self
|
|
155
165
|
private
|
|
156
166
|
|
|
167
|
+
def encode_multi_block(plaintext, block_codec, block_size)
|
|
168
|
+
buf = String.new(encoding: Encoding::BINARY)
|
|
169
|
+
buf << LZ4M_SENTINEL
|
|
170
|
+
buf << [plaintext.bytesize].pack("Q<")
|
|
171
|
+
|
|
172
|
+
offset = 0
|
|
173
|
+
while offset < plaintext.bytesize
|
|
174
|
+
chunk_size = [block_size, plaintext.bytesize - offset].min
|
|
175
|
+
chunk = plaintext.byteslice(offset, chunk_size)
|
|
176
|
+
compressed = block_codec.compress(chunk)
|
|
177
|
+
buf << [compressed.bytesize].pack("V")
|
|
178
|
+
buf << compressed
|
|
179
|
+
offset += chunk_size
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
buf
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def decode_multi_block(wire_bytes, block_codec, max_size, block_size)
|
|
187
|
+
if wire_bytes.bytesize < 12
|
|
188
|
+
raise ProtocolError, "LZ4M part too short (< 12 bytes, no room for size field)"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
decompressed_size = wire_bytes.byteslice(4, 8).unpack1("Q<")
|
|
192
|
+
check_size!(decompressed_size, max_size)
|
|
193
|
+
|
|
194
|
+
output = String.new(capacity: decompressed_size, encoding: Encoding::BINARY)
|
|
195
|
+
offset = 12
|
|
196
|
+
remaining = decompressed_size
|
|
197
|
+
|
|
198
|
+
while remaining > 0
|
|
199
|
+
if offset + 4 > wire_bytes.bytesize
|
|
200
|
+
raise ProtocolError, "LZ4M truncated: no room for block length at offset #{offset}"
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
compressed_len = wire_bytes.byteslice(offset, 4).unpack1("V")
|
|
204
|
+
offset += 4
|
|
205
|
+
|
|
206
|
+
if offset + compressed_len > wire_bytes.bytesize
|
|
207
|
+
raise ProtocolError, "LZ4M truncated: block at offset #{offset} extends past wire end"
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
block_data = wire_bytes.byteslice(offset, compressed_len)
|
|
211
|
+
offset += compressed_len
|
|
212
|
+
|
|
213
|
+
block_decompressed_size = [block_size, remaining].min
|
|
214
|
+
begin
|
|
215
|
+
output << block_codec.decompress(block_data, decompressed_size: block_decompressed_size)
|
|
216
|
+
rescue RLZ4::DecompressError => e
|
|
217
|
+
raise ProtocolError, "LZ4M block decode failed: #{e.message}"
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
remaining -= block_decompressed_size
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
if offset != wire_bytes.bytesize
|
|
224
|
+
raise ProtocolError, "LZ4M: #{wire_bytes.bytesize - offset} leftover bytes after last block"
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
output
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
|
|
157
231
|
def encode_passthrough(plaintext)
|
|
158
232
|
UNCOMPRESSED_SENTINEL + plaintext
|
|
159
233
|
end
|
data/lib/omq/lz4/version.rb
CHANGED