vibe_zstd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +3 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +21 -0
- data/README.md +978 -0
- data/Rakefile +20 -0
- data/benchmark/README.md +198 -0
- data/benchmark/compression_levels.rb +99 -0
- data/benchmark/context_reuse.rb +174 -0
- data/benchmark/decompression_speed_by_level.rb +65 -0
- data/benchmark/dictionary_training.rb +182 -0
- data/benchmark/dictionary_usage.rb +121 -0
- data/benchmark/for_readme.rb +157 -0
- data/benchmark/generate_fixture.rb +82 -0
- data/benchmark/helpers.rb +237 -0
- data/benchmark/multithreading.rb +105 -0
- data/benchmark/run_all.rb +150 -0
- data/benchmark/streaming.rb +154 -0
- data/ext/vibe_zstd/Makefile +270 -0
- data/ext/vibe_zstd/cctx.c +565 -0
- data/ext/vibe_zstd/dctx.c +493 -0
- data/ext/vibe_zstd/dict.c +587 -0
- data/ext/vibe_zstd/extconf.rb +52 -0
- data/ext/vibe_zstd/frames.c +132 -0
- data/ext/vibe_zstd/libzstd/LICENSE +30 -0
- data/ext/vibe_zstd/libzstd/common/allocations.h +55 -0
- data/ext/vibe_zstd/libzstd/common/bits.h +205 -0
- data/ext/vibe_zstd/libzstd/common/bitstream.h +454 -0
- data/ext/vibe_zstd/libzstd/common/compiler.h +464 -0
- data/ext/vibe_zstd/libzstd/common/cpu.h +249 -0
- data/ext/vibe_zstd/libzstd/common/debug.c +30 -0
- data/ext/vibe_zstd/libzstd/common/debug.h +107 -0
- data/ext/vibe_zstd/libzstd/common/entropy_common.c +340 -0
- data/ext/vibe_zstd/libzstd/common/error_private.c +64 -0
- data/ext/vibe_zstd/libzstd/common/error_private.h +158 -0
- data/ext/vibe_zstd/libzstd/common/fse.h +625 -0
- data/ext/vibe_zstd/libzstd/common/fse_decompress.c +315 -0
- data/ext/vibe_zstd/libzstd/common/huf.h +277 -0
- data/ext/vibe_zstd/libzstd/common/mem.h +422 -0
- data/ext/vibe_zstd/libzstd/common/pool.c +371 -0
- data/ext/vibe_zstd/libzstd/common/pool.h +81 -0
- data/ext/vibe_zstd/libzstd/common/portability_macros.h +171 -0
- data/ext/vibe_zstd/libzstd/common/threading.c +182 -0
- data/ext/vibe_zstd/libzstd/common/threading.h +142 -0
- data/ext/vibe_zstd/libzstd/common/xxhash.c +18 -0
- data/ext/vibe_zstd/libzstd/common/xxhash.h +7094 -0
- data/ext/vibe_zstd/libzstd/common/zstd_common.c +48 -0
- data/ext/vibe_zstd/libzstd/common/zstd_deps.h +123 -0
- data/ext/vibe_zstd/libzstd/common/zstd_internal.h +324 -0
- data/ext/vibe_zstd/libzstd/common/zstd_trace.h +156 -0
- data/ext/vibe_zstd/libzstd/compress/clevels.h +134 -0
- data/ext/vibe_zstd/libzstd/compress/fse_compress.c +625 -0
- data/ext/vibe_zstd/libzstd/compress/hist.c +191 -0
- data/ext/vibe_zstd/libzstd/compress/hist.h +82 -0
- data/ext/vibe_zstd/libzstd/compress/huf_compress.c +1464 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress.c +7843 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_internal.h +1636 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.c +235 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.h +39 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.c +442 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.h +55 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.c +688 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_cwksp.h +765 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.c +778 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.h +42 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_fast.c +985 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_fast.h +30 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_lazy.c +2199 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_lazy.h +193 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_ldm.c +745 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_ldm.h +109 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_opt.c +1580 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_opt.h +72 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.c +238 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.h +33 -0
- data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.c +1923 -0
- data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.h +102 -0
- data/ext/vibe_zstd/libzstd/decompress/huf_decompress.c +1944 -0
- data/ext/vibe_zstd/libzstd/decompress/huf_decompress_amd64.S +602 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.c +244 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.h +44 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress.c +2410 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.c +2209 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.h +73 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_internal.h +240 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff.h +214 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff_common.c +26 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff_compress.c +167 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff_decompress.c +77 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/cover.c +1302 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/cover.h +152 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.h +57 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/fastcover.c +766 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/zdict.c +1133 -0
- data/ext/vibe_zstd/libzstd/zdict.h +481 -0
- data/ext/vibe_zstd/libzstd/zstd.h +3198 -0
- data/ext/vibe_zstd/libzstd/zstd_errors.h +107 -0
- data/ext/vibe_zstd/streaming.c +410 -0
- data/ext/vibe_zstd/vibe_zstd.c +293 -0
- data/ext/vibe_zstd/vibe_zstd.h +56 -0
- data/ext/vibe_zstd/vibe_zstd_internal.h +27 -0
- data/lib/vibe_zstd/constants.rb +67 -0
- data/lib/vibe_zstd/version.rb +5 -0
- data/lib/vibe_zstd.rb +255 -0
- data/sig/vibe_zstd.rbs +76 -0
- metadata +179 -0
data/README.md
ADDED
|
@@ -0,0 +1,978 @@
|
|
|
1
|
+
# VibeZstd
|
|
2
|
+
|
|
3
|
+
Fast, high-ratio compression for Ruby using the Zstandard (Zstd) library. VibeZstd provides a native Ruby C extension with an idiomatic API for compressing and decompressing data.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
require 'vibe_zstd'
|
|
9
|
+
|
|
10
|
+
# One-line compression/decompression
|
|
11
|
+
compressed = VibeZstd.compress("Hello, world!")
|
|
12
|
+
original = VibeZstd.decompress(compressed)
|
|
13
|
+
|
|
14
|
+
# With custom compression level (1-22, or negative for ultra-fast)
|
|
15
|
+
compressed = VibeZstd.compress(data, level: 9)
|
|
16
|
+
|
|
17
|
+
# Reusable contexts (recommended for multiple operations - 2.2x faster!)
|
|
18
|
+
cctx = VibeZstd::CCtx.new
|
|
19
|
+
dctx = VibeZstd::DCtx.new
|
|
20
|
+
|
|
21
|
+
# Reuse the same contexts for multiple operations
|
|
22
|
+
files.each do |file_data|
|
|
23
|
+
compressed = cctx.compress(file_data)
|
|
24
|
+
decompressed = dctx.decompress(compressed)
|
|
25
|
+
# ... process data
|
|
26
|
+
end
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
Add to your Gemfile:
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
gem 'vibe_zstd'
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Then run:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
bundle install
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Or install directly:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
gem install vibe_zstd
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Performance & Best Practices
|
|
50
|
+
|
|
51
|
+
VibeZstd leverages Zstandard's excellent compression performance:
|
|
52
|
+
|
|
53
|
+
- **Compression ratios** comparable to or better than gzip/bzip2 at similar speeds
|
|
54
|
+
- **Extremely fast decompression**
|
|
55
|
+
- **Compression levels** from -7 (ultra-fast) to 22 (maximum compression)
|
|
56
|
+
|
|
57
|
+
### Context Reuse (Important!)
|
|
58
|
+
|
|
59
|
+
**Always reuse contexts for multiple operations** - it's 2-3x faster:
|
|
60
|
+
|
|
61
|
+
| Data Size | New Context | Reused Context | Speedup |
|
|
62
|
+
|-----------|-------------|----------------|---------|
|
|
63
|
+
| 1KB | 72,610 ops/s | 159,454 ops/s | **2.2x** |
|
|
64
|
+
| 10KB | 34,941 ops/s | 61,171 ops/s | **1.75x** |
|
|
65
|
+
| 100KB | 7,675 ops/s | 9,491 ops/s | **1.24x** |
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
# ❌ Don't do this (creates new context each time)
|
|
69
|
+
1000.times do
|
|
70
|
+
compressed = VibeZstd.compress(data)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# ✅ Do this instead (reuse context)
|
|
74
|
+
cctx = VibeZstd::CCtx.new
|
|
75
|
+
1000.times do
|
|
76
|
+
compressed = cctx.compress(data)
|
|
77
|
+
end
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Memory savings:** Reusing contexts saves ~6.7GB for 5000 operations:
|
|
81
|
+
- Creating new contexts: 5000 × 1.34MB = 6.70GB
|
|
82
|
+
- Reusing contexts: 1 × 1.34MB = 1.34MB
|
|
83
|
+
- **Savings: 6.69GB (99.98% reduction)**
|
|
84
|
+
|
|
85
|
+
*Note: 1.34MB = CCtx memory (~1.24MB at level 3) + DCtx memory (~128KB)*
|
|
86
|
+
|
|
87
|
+
### Compression Level Trade-offs
|
|
88
|
+
|
|
89
|
+
Choose the right level for your use case:
|
|
90
|
+
|
|
91
|
+
| Level | Ratio | Speed (ops/sec) | Memory | Use Case |
|
|
92
|
+
|-------|-------|-----------------|--------|----------|
|
|
93
|
+
| -1 | 6.03x | 11,507 | 537KB | Ultra-fast, real-time |
|
|
94
|
+
| 1 | 8.2x | 10,752 | 569KB | Fast, high-throughput |
|
|
95
|
+
| 3 | 7.93x | 9,191 | 1.24MB | **Balanced (default)** |
|
|
96
|
+
| 9 | 9.17x | 987 | 12.49MB | Better compression |
|
|
97
|
+
| 19 | 10.3x | 35 | 81.25MB | Maximum compression |
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
cctx = VibeZstd::CCtx.new
|
|
101
|
+
|
|
102
|
+
# Ultra-fast for real-time processing
|
|
103
|
+
compressed = cctx.compress(data, level: -1)
|
|
104
|
+
|
|
105
|
+
# Maximum compression for archival
|
|
106
|
+
compressed = cctx.compress(data, level: 19)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Dictionary Compression
|
|
110
|
+
|
|
111
|
+
For small, similar data (JSON, logs, API responses), dictionaries provide dramatic improvements:
|
|
112
|
+
|
|
113
|
+
| Method | Compressed Size | Ratio | Improvement |
|
|
114
|
+
|--------|----------------|-------|-------------|
|
|
115
|
+
| Without dictionary | 110B | 1.15x | - |
|
|
116
|
+
| With dictionary (16KB) | 54B | 2.33x | **50.9% smaller** |
|
|
117
|
+
|
|
118
|
+
See the [Dictionaries](#dictionaries) section below for usage examples.
|
|
119
|
+
|
|
120
|
+
## Basic Usage
|
|
121
|
+
|
|
122
|
+
### Simple Compression
|
|
123
|
+
|
|
124
|
+
```ruby
|
|
125
|
+
require 'vibe_zstd'
|
|
126
|
+
|
|
127
|
+
data = "Hello, world! This is a test string."
|
|
128
|
+
|
|
129
|
+
# One-off compression (creates context internally)
|
|
130
|
+
compressed = VibeZstd.compress(data)
|
|
131
|
+
decompressed = VibeZstd.decompress(compressed)
|
|
132
|
+
|
|
133
|
+
# With custom level
|
|
134
|
+
compressed = VibeZstd.compress(data, level: 5)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Using Contexts (Recommended)
|
|
138
|
+
|
|
139
|
+
For multiple operations, create reusable contexts:
|
|
140
|
+
|
|
141
|
+
```ruby
|
|
142
|
+
# Create contexts once
|
|
143
|
+
cctx = VibeZstd::CCtx.new
|
|
144
|
+
dctx = VibeZstd::DCtx.new
|
|
145
|
+
|
|
146
|
+
# Reuse for multiple operations
|
|
147
|
+
files.each do |file|
|
|
148
|
+
data = File.read(file)
|
|
149
|
+
compressed = cctx.compress(data)
|
|
150
|
+
File.write("#{file}.zst", compressed)
|
|
151
|
+
end
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Compression Levels
|
|
155
|
+
|
|
156
|
+
```ruby
|
|
157
|
+
cctx = VibeZstd::CCtx.new
|
|
158
|
+
|
|
159
|
+
# Fast compression (level 1)
|
|
160
|
+
compressed = cctx.compress(data, level: 1)
|
|
161
|
+
|
|
162
|
+
# Default level (3)
|
|
163
|
+
compressed = cctx.compress(data)
|
|
164
|
+
|
|
165
|
+
# High compression (level 9)
|
|
166
|
+
compressed = cctx.compress(data, level: 9)
|
|
167
|
+
|
|
168
|
+
# Negative levels for ultra-fast compression
|
|
169
|
+
compressed = cctx.compress(data, level: -1)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Frame Information
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
# Check decompressed size before decompressing
|
|
176
|
+
size = VibeZstd.frame_content_size(compressed_data)
|
|
177
|
+
puts "Will decompress to #{size} bytes" if size
|
|
178
|
+
|
|
179
|
+
# Get compression bound (maximum compressed size)
|
|
180
|
+
max_size = VibeZstd.compress_bound(data.bytesize)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Advanced Features
|
|
184
|
+
|
|
185
|
+
### Dictionaries
|
|
186
|
+
|
|
187
|
+
Dictionaries dramatically improve compression for small, similar data by pre-training on representative samples.
|
|
188
|
+
|
|
189
|
+
#### Training a Dictionary
|
|
190
|
+
|
|
191
|
+
```ruby
|
|
192
|
+
# Collect representative samples
|
|
193
|
+
samples = [
|
|
194
|
+
{id: 1, name: "Alice", email: "alice@example.com"}.to_json,
|
|
195
|
+
{id: 2, name: "Bob", email: "bob@example.com"}.to_json,
|
|
196
|
+
{id: 3, name: "Charlie", email: "charlie@example.com"}.to_json
|
|
197
|
+
# ... more samples
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
# Train dictionary with default algorithm (fast, good for most use cases)
|
|
201
|
+
dict_data = VibeZstd.train_dict(samples)
|
|
202
|
+
|
|
203
|
+
# Or specify custom size
|
|
204
|
+
dict_data = VibeZstd.train_dict(samples, max_dict_size: 16_384)
|
|
205
|
+
|
|
206
|
+
# Advanced: Use COVER algorithm for better dictionaries
|
|
207
|
+
# k: segment size (typical: 16-2048+)
|
|
208
|
+
# d: dmer size (typical: 6-16, must be ≤ k)
|
|
209
|
+
dict_data = VibeZstd.train_dict_cover(
|
|
210
|
+
samples,
|
|
211
|
+
max_dict_size: 16_384,
|
|
212
|
+
k: 200, # Segment size
|
|
213
|
+
d: 6 # Dmer size
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Advanced: Fast COVER for quick training
|
|
217
|
+
dict_data = VibeZstd.train_dict_fast_cover(
|
|
218
|
+
samples,
|
|
219
|
+
max_dict_size: 16_384,
|
|
220
|
+
k: 200,
|
|
221
|
+
d: 6,
|
|
222
|
+
accel: 1 # Higher = faster but less accurate (1-10)
|
|
223
|
+
)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
#### Using Dictionaries
|
|
227
|
+
|
|
228
|
+
```ruby
|
|
229
|
+
# Create dictionary objects
|
|
230
|
+
dict_data = File.binread('my.dict')
|
|
231
|
+
cdict = VibeZstd::CDict.new(dict_data)
|
|
232
|
+
ddict = VibeZstd::DDict.new(dict_data)
|
|
233
|
+
|
|
234
|
+
# Use with contexts
|
|
235
|
+
cctx = VibeZstd::CCtx.new
|
|
236
|
+
dctx = VibeZstd::DCtx.new
|
|
237
|
+
|
|
238
|
+
compressed = cctx.compress(data, dict: cdict)
|
|
239
|
+
decompressed = dctx.decompress(compressed, dict: ddict)
|
|
240
|
+
|
|
241
|
+
# Or use convenience methods
|
|
242
|
+
compressed = VibeZstd.compress(data, dict: cdict)
|
|
243
|
+
decompressed = VibeZstd.decompress(compressed, dict: ddict)
|
|
244
|
+
|
|
245
|
+
# Check dictionary properties
|
|
246
|
+
puts "Dictionary size: #{cdict.size} bytes"
|
|
247
|
+
puts "Dictionary ID: #{cdict.dict_id}"
|
|
248
|
+
|
|
249
|
+
# Get dictionary ID from compressed data
|
|
250
|
+
dict_id = VibeZstd.get_dict_id_from_frame(compressed)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
#### Prefix Dictionaries (Lightweight Alternative)
|
|
254
|
+
|
|
255
|
+
For cases where training isn't practical:
|
|
256
|
+
|
|
257
|
+
```ruby
|
|
258
|
+
cctx = VibeZstd::CCtx.new
|
|
259
|
+
dctx = VibeZstd::DCtx.new
|
|
260
|
+
|
|
261
|
+
# Use a common prefix (same for compression and decompression)
|
|
262
|
+
prefix = "Common data prefix that appears frequently"
|
|
263
|
+
|
|
264
|
+
compressed = cctx.use_prefix(prefix).compress(data)
|
|
265
|
+
decompressed = dctx.use_prefix(prefix).decompress(compressed)
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Streaming API
|
|
269
|
+
|
|
270
|
+
Process large files with constant memory usage.
|
|
271
|
+
|
|
272
|
+
#### Streaming Compression
|
|
273
|
+
|
|
274
|
+
```ruby
|
|
275
|
+
# Compress to file
|
|
276
|
+
File.open('output.zst', 'wb') do |file|
|
|
277
|
+
writer = VibeZstd::CompressWriter.new(file, level: 5)
|
|
278
|
+
|
|
279
|
+
writer.write("chunk 1")
|
|
280
|
+
writer.write("chunk 2")
|
|
281
|
+
writer.flush # Optional: force output
|
|
282
|
+
writer.finish # Required: finalize frame
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Or use block form (auto-finishes)
|
|
286
|
+
VibeZstd::CompressWriter.open(file, level: 5) do |writer|
|
|
287
|
+
large_data.each_slice(65536) do |chunk|
|
|
288
|
+
writer.write(chunk)
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# With dictionary
|
|
293
|
+
cdict = VibeZstd::CDict.new(dict_data)
|
|
294
|
+
writer = VibeZstd::CompressWriter.new(file, level: 5, dict: cdict)
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
#### Streaming Decompression
|
|
298
|
+
|
|
299
|
+
```ruby
|
|
300
|
+
# Decompress file in chunks (memory-safe for large files)
|
|
301
|
+
File.open('input.zst', 'rb') do |file|
|
|
302
|
+
reader = VibeZstd::DecompressReader.new(file)
|
|
303
|
+
|
|
304
|
+
# Read in ~128KB chunks by default
|
|
305
|
+
while chunk = reader.read
|
|
306
|
+
process(chunk)
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Custom chunk sizes
|
|
311
|
+
reader = VibeZstd::DecompressReader.new(file, initial_chunk_size: 1_048_576)
|
|
312
|
+
while chunk = reader.read # Returns up to 1MB per call
|
|
313
|
+
process(chunk)
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Or specify per-read
|
|
317
|
+
while chunk = reader.read(65536) # Read 64KB chunks
|
|
318
|
+
process(chunk)
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
# Block form
|
|
322
|
+
VibeZstd::DecompressReader.open(file) do |reader|
|
|
323
|
+
reader.each do |chunk|
|
|
324
|
+
process(chunk)
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# HTTP streaming example
|
|
329
|
+
require 'net/http'
|
|
330
|
+
uri = URI('https://example.com/large_file.zst')
|
|
331
|
+
File.open('output.txt', 'wb') do |output|
|
|
332
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
|
|
333
|
+
http.request_get(uri.path) do |response|
|
|
334
|
+
reader = VibeZstd::DecompressReader.new(response.body)
|
|
335
|
+
reader.each { |chunk| output.write(chunk) }
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
**Chunk size recommendations:**
|
|
342
|
+
- **Small data (< 10KB)**: `initial_chunk_size: 4096` to avoid over-allocation
|
|
343
|
+
- **Medium data (10KB-1MB)**: Default (~128KB) for balanced performance
|
|
344
|
+
- **Large data (> 1MB)**: `initial_chunk_size: 1_048_576` to reduce overhead
|
|
345
|
+
- **Memory-constrained**: Use smaller chunks (4-8KB)
|
|
346
|
+
- **High throughput**: Use larger chunks (1-10MB)
|
|
347
|
+
|
|
348
|
+
#### Line-by-Line Processing
|
|
349
|
+
|
|
350
|
+
`DecompressReader` provides IO-like methods for processing compressed text files line by line:
|
|
351
|
+
|
|
352
|
+
```ruby
|
|
353
|
+
# Process compressed log file line by line
|
|
354
|
+
File.open('app.log.zst', 'rb') do |file|
|
|
355
|
+
reader = VibeZstd::DecompressReader.new(file)
|
|
356
|
+
|
|
357
|
+
# Read lines one at a time
|
|
358
|
+
while line = reader.gets
|
|
359
|
+
# Process each log entry
|
|
360
|
+
if line.include?('ERROR')
|
|
361
|
+
alert_on_call(line)
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Or use each_line for cleaner iteration
|
|
367
|
+
VibeZstd::DecompressReader.open(file) do |reader|
|
|
368
|
+
reader.each_line do |line|
|
|
369
|
+
process_log_entry(line)
|
|
370
|
+
end
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
# Read specific number of bytes
|
|
374
|
+
reader.readpartial(4096) # Raises EOFError at end of stream
|
|
375
|
+
|
|
376
|
+
# Check for end of stream
|
|
377
|
+
reader.eof? # => true/false
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
**Use cases:**
|
|
381
|
+
- **Log processing** - Parse compressed log files without decompressing the entire file
|
|
382
|
+
- **CSV/TSV parsing** - Read compressed data files line by line for memory-efficient ETL
|
|
383
|
+
- **Configuration files** - Load compressed config files with minimal memory footprint
|
|
384
|
+
|
|
385
|
+
### Multi-threaded Compression
|
|
386
|
+
|
|
387
|
+
Enable parallel compression for large data:
|
|
388
|
+
|
|
389
|
+
```ruby
|
|
390
|
+
cctx = VibeZstd::CCtx.new
|
|
391
|
+
|
|
392
|
+
# Enable 4 worker threads
|
|
393
|
+
cctx.workers = 4
|
|
394
|
+
|
|
395
|
+
# Or set during initialization
|
|
396
|
+
cctx = VibeZstd::CCtx.new(workers: 4)
|
|
397
|
+
|
|
398
|
+
large_data = File.read('big_file.txt')
|
|
399
|
+
compressed = cctx.compress(large_data)
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
**Multi-threading performance** (500KB data):
|
|
403
|
+
|
|
404
|
+
| Workers | Throughput | Speedup | Efficiency |
|
|
405
|
+
|---------|------------|---------|------------|
|
|
406
|
+
| 0 (single) | 795MB/s | 1.0x | 100% |
|
|
407
|
+
| 2 | 784MB/s | 0.99x | 49% |
|
|
408
|
+
| 4 | 748MB/s | 0.94x | 24% |
|
|
409
|
+
|
|
410
|
+
**Note:** Multi-threading works best for data > 1MB. Overhead may outweigh benefits for smaller payloads.
|
|
411
|
+
|
|
412
|
+
#### Multi-threading Tuning
|
|
413
|
+
|
|
414
|
+
```ruby
|
|
415
|
+
cctx = VibeZstd::CCtx.new(workers: 4)
|
|
416
|
+
|
|
417
|
+
# Tune job size (default: auto)
|
|
418
|
+
# Larger = better ratio but higher latency
|
|
419
|
+
cctx.job_size = 1_048_576 # 1MB per job
|
|
420
|
+
|
|
421
|
+
# Tune overlap (0-9)
|
|
422
|
+
# Higher = better ratio but slower
|
|
423
|
+
cctx.overlap_log = 6 # Default: auto (usually 6-9)
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
### Compression Parameters
|
|
427
|
+
|
|
428
|
+
Fine-tune compression behavior using property setters:
|
|
429
|
+
|
|
430
|
+
```ruby
|
|
431
|
+
# Set during initialization (recommended)
|
|
432
|
+
cctx = VibeZstd::CCtx.new(
|
|
433
|
+
checksum_flag: 1, # Add checksum for integrity
|
|
434
|
+
content_size_flag: 1, # Include size in frame header
|
|
435
|
+
window_log: 20, # 1MB window (2^20)
|
|
436
|
+
workers: 4 # 4 threads
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Or set after creation
|
|
440
|
+
cctx = VibeZstd::CCtx.new
|
|
441
|
+
cctx.checksum_flag = 1
|
|
442
|
+
cctx.content_size_flag = 1
|
|
443
|
+
cctx.workers = 4
|
|
444
|
+
|
|
445
|
+
data = "Your data here"
|
|
446
|
+
compressed = cctx.compress(data)
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
#### Common Parameters
|
|
450
|
+
|
|
451
|
+
**Frame parameters:**
|
|
452
|
+
```ruby
|
|
453
|
+
cctx.checksum_flag = 1 # Enable 32-bit checksum
|
|
454
|
+
cctx.content_size_flag = 1 # Include decompressed size
|
|
455
|
+
cctx.dict_id_flag = 1 # Include dictionary ID
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
**Compression tuning:**
|
|
459
|
+
```ruby
|
|
460
|
+
cctx.compression_level = 9 # Same as level: argument
|
|
461
|
+
cctx.window_log = 20 # Window size (2^20 = 1MB)
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
**Long Distance Matching (for large files with repeated patterns):**
|
|
465
|
+
```ruby
|
|
466
|
+
cctx.enable_long_distance_matching = 1
|
|
467
|
+
cctx.ldm_hash_log = 20
|
|
468
|
+
cctx.ldm_min_match = 64
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
**Multi-threading:**
|
|
472
|
+
```ruby
|
|
473
|
+
cctx.workers = 4 # Number of threads
|
|
474
|
+
cctx.job_size = 1_048_576 # Size per job
|
|
475
|
+
cctx.overlap_log = 6 # Overlap between jobs
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
#### Query Parameter Bounds
|
|
479
|
+
|
|
480
|
+
```ruby
|
|
481
|
+
# Get valid range for a parameter
|
|
482
|
+
bounds = VibeZstd::CCtx.parameter_bounds(:compression_level)
|
|
483
|
+
puts "Level range: #{bounds[:min]} to #{bounds[:max]}"
|
|
484
|
+
# => Level range: -131072 to 22
|
|
485
|
+
# Note: Practical range is -7 to 22; -131072 is a technical limit, not a usable level
|
|
486
|
+
|
|
487
|
+
# Validate before setting
|
|
488
|
+
level = user_input.to_i
|
|
489
|
+
bounds = VibeZstd::CCtx.parameter_bounds(:compression_level)
|
|
490
|
+
if level >= bounds[:min] && level <= bounds[:max]
|
|
491
|
+
cctx.compression_level = level
|
|
492
|
+
else
|
|
493
|
+
raise "Invalid level"
|
|
494
|
+
end
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
#### Get Current Parameter Values
|
|
498
|
+
|
|
499
|
+
```ruby
|
|
500
|
+
cctx = VibeZstd::CCtx.new(compression_level: 9)
|
|
501
|
+
|
|
502
|
+
# Read current values
|
|
503
|
+
puts cctx.compression_level # => 9
|
|
504
|
+
puts cctx.checksum_flag # => 0
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
### Decompression Parameters
|
|
508
|
+
|
|
509
|
+
Control decompression behavior to prevent memory exhaustion:
|
|
510
|
+
|
|
511
|
+
```ruby
|
|
512
|
+
dctx = VibeZstd::DCtx.new
|
|
513
|
+
|
|
514
|
+
# Limit maximum window size (prevents memory attacks)
|
|
515
|
+
dctx.window_log_max = 20 # Max 1MB window (2^20)
|
|
516
|
+
|
|
517
|
+
# Or set during initialization
|
|
518
|
+
dctx = VibeZstd::DCtx.new(window_log_max: 20)
|
|
519
|
+
|
|
520
|
+
compressed = File.read('data.zst')
|
|
521
|
+
decompressed = dctx.decompress(compressed)
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
#### Optimize for Unknown-Size Frames
|
|
525
|
+
|
|
526
|
+
When decompressing frames without known content size:
|
|
527
|
+
|
|
528
|
+
```ruby
|
|
529
|
+
# Set globally for all new DCtx instances
|
|
530
|
+
VibeZstd::DCtx.default_initial_capacity = 1_048_576 # 1MB for large data
|
|
531
|
+
|
|
532
|
+
# Set per instance
|
|
533
|
+
dctx = VibeZstd::DCtx.new(initial_capacity: 512_000)
|
|
534
|
+
|
|
535
|
+
# Or per call (overrides instance setting)
|
|
536
|
+
dctx.decompress(compressed, initial_capacity: 16_384)
|
|
537
|
+
|
|
538
|
+
# Reset to default (~128KB)
|
|
539
|
+
VibeZstd::DCtx.default_initial_capacity = nil
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
**When to configure:**
|
|
543
|
+
- **Small data (< 10KB)**: Set to `4096-8192`
|
|
544
|
+
- **Large data (> 1MB)**: Set to `1_048_576` or higher
|
|
545
|
+
- **Known-size frames**: Not applicable (size read from frame header)
|
|
546
|
+
|
|
547
|
+
### Memory Estimation
|
|
548
|
+
|
|
549
|
+
Estimate memory usage before creating contexts:
|
|
550
|
+
|
|
551
|
+
```ruby
|
|
552
|
+
# Compression context memory at level 5
|
|
553
|
+
cctx_bytes = VibeZstd::CCtx.estimate_memory(5)
|
|
554
|
+
puts "CCtx will use ~#{cctx_bytes} bytes"
|
|
555
|
+
|
|
556
|
+
# Decompression context
|
|
557
|
+
dctx_bytes = VibeZstd::DCtx.estimate_memory
|
|
558
|
+
puts "DCtx will use ~#{dctx_bytes} bytes"
|
|
559
|
+
|
|
560
|
+
# Dictionary memory
|
|
561
|
+
dict_size = 16_384
|
|
562
|
+
cdict_bytes = VibeZstd::CDict.estimate_memory(dict_size, 5)
|
|
563
|
+
ddict_bytes = VibeZstd::DDict.estimate_memory(dict_size)
|
|
564
|
+
puts "CDict: #{cdict_bytes} bytes, DDict: #{ddict_bytes} bytes"
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
## Integration Examples
|
|
568
|
+
|
|
569
|
+
Real-world examples demonstrating VibeZstd in production scenarios.
|
|
570
|
+
|
|
571
|
+
### Rails Encrypted Columns with Thread-Local Contexts
|
|
572
|
+
|
|
573
|
+
Use VibeZstd with ActiveRecord::Encryption for high-performance compression of encrypted attributes.
|
|
574
|
+
|
|
575
|
+
#### Rails 7.1+ (Global Compressor Configuration)
|
|
576
|
+
|
|
577
|
+
```ruby
|
|
578
|
+
# config/initializers/vibe_zstd_encryption.rb
|
|
579
|
+
module VibeZstdCompressor
|
|
580
|
+
# Compress using thread-local contexts (2-3x faster in multi-threaded environments)
|
|
581
|
+
def self.deflate(data)
|
|
582
|
+
VibeZstd::ThreadLocal.compress(data, level: 3)
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
def self.inflate(data)
|
|
586
|
+
VibeZstd::ThreadLocal.decompress(data)
|
|
587
|
+
end
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
ActiveSupport.on_load(:active_record) do
|
|
591
|
+
ActiveRecord::Encryption.config.support_unencrypted_data = true
|
|
592
|
+
ActiveRecord::Encryption.config.compressor = VibeZstdCompressor
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
# In your model - all encrypted attributes use VibeZstd
|
|
596
|
+
class User < ApplicationRecord
|
|
597
|
+
encrypts :preferences
|
|
598
|
+
encrypts :metadata
|
|
599
|
+
end
|
|
600
|
+
```
|
|
601
|
+
|
|
602
|
+
#### Rails 8.0+ (Per-Attribute Compressor)
|
|
603
|
+
|
|
604
|
+
Rails 8 introduces per-attribute `compressor:` option for fine-grained control:
|
|
605
|
+
|
|
606
|
+
```ruby
|
|
607
|
+
# config/initializers/vibe_zstd_encryption.rb
|
|
608
|
+
module VibeZstdCompressor
|
|
609
|
+
def self.deflate(data)
|
|
610
|
+
VibeZstd::ThreadLocal.compress(data, level: 3)
|
|
611
|
+
end
|
|
612
|
+
|
|
613
|
+
def self.inflate(data)
|
|
614
|
+
VibeZstd::ThreadLocal.decompress(data)
|
|
615
|
+
end
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# In your model - specify compressor per attribute
|
|
619
|
+
class User < ApplicationRecord
|
|
620
|
+
# Use VibeZstd for large JSON columns
|
|
621
|
+
encrypts :preferences, compressor: VibeZstdCompressor
|
|
622
|
+
encrypts :settings, compressor: VibeZstdCompressor
|
|
623
|
+
|
|
624
|
+
# Use default Zlib for small text fields
|
|
625
|
+
encrypts :api_key
|
|
626
|
+
end
|
|
627
|
+
```
|
|
628
|
+
|
|
629
|
+
#### Rails 8.0+ with Per-Attribute Dictionaries
|
|
630
|
+
|
|
631
|
+
Rails 8's per-attribute compressor enables custom dictionaries for individual fields—maximum compression for structured data:
|
|
632
|
+
|
|
633
|
+
```ruby
|
|
634
|
+
# config/initializers/vibe_zstd_encryption.rb
|
|
635
|
+
|
|
636
|
+
# Compressor for user preferences with custom dictionary
|
|
637
|
+
module UserPrefsCompressor
|
|
638
|
+
DICT = VibeZstd::CDict.new(
|
|
639
|
+
File.binread('config/dictionaries/user_preferences.dict')
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
def self.deflate(data)
|
|
643
|
+
VibeZstd::ThreadLocal.compress(data, dict: DICT, level: 5)
|
|
644
|
+
end
|
|
645
|
+
|
|
646
|
+
def self.inflate(data)
|
|
647
|
+
VibeZstd::ThreadLocal.decompress(data, dict: DICT.to_ddict)
|
|
648
|
+
end
|
|
649
|
+
end
|
|
650
|
+
|
|
651
|
+
# Compressor for audit logs with different dictionary
|
|
652
|
+
module AuditLogCompressor
|
|
653
|
+
DICT = VibeZstd::CDict.new(
|
|
654
|
+
File.binread('config/dictionaries/audit_logs.dict')
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
def self.deflate(data)
|
|
658
|
+
VibeZstd::ThreadLocal.compress(data, dict: DICT, level: 3)
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
def self.inflate(data)
|
|
662
|
+
VibeZstd::ThreadLocal.decompress(data, dict: DICT.to_ddict)
|
|
663
|
+
end
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
# In your models - each attribute gets optimized dictionary
|
|
667
|
+
class User < ApplicationRecord
|
|
668
|
+
encrypts :preferences, compressor: UserPrefsCompressor # 50%+ smaller with custom dict
|
|
669
|
+
encrypts :settings, compressor: VibeZstdCompressor # Standard VibeZstd (no dict)
|
|
670
|
+
encrypts :api_key # Default Zlib for small data
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
class AuditEvent < ApplicationRecord
|
|
674
|
+
encrypts :event_data, compressor: AuditLogCompressor # Custom dict for audit logs
|
|
675
|
+
end
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
**Why per-attribute dictionaries?**
|
|
679
|
+
- **50-70% size reduction** for small, similar data (JSON user preferences, API responses, logs)
|
|
680
|
+
- **Different dictionaries** trained on different data patterns (user prefs vs audit logs)
|
|
681
|
+
- **ThreadLocal pooling** keeps one context per dictionary per thread—minimal memory overhead
|
|
682
|
+
|
|
683
|
+
**Why ThreadLocal?** In Puma/multi-threaded Rails apps, `ThreadLocal` reuses contexts per thread (saves ~1.3MB per operation × requests). Each Puma worker thread maintains one CCtx and one DCtx, reducing memory and improving throughput.
|
|
684
|
+
|
|
685
|
+
**Rails 8 Advantage:** Per-attribute compressors let you optimize each field—use VibeZstd for large structured data (JSON, serialized objects) and default Zlib for small strings.
|
|
686
|
+
|
|
687
|
+
### Dictionary Training for Encrypted Columns
|
|
688
|
+
|
|
689
|
+
For small, structured data (JSON, serialized objects), dictionaries can reduce size by 50%+:
|
|
690
|
+
|
|
691
|
+
```ruby
|
|
692
|
+
# Step 1: Train dictionary from representative samples (one-time setup)
|
|
693
|
+
samples = User.limit(1000).pluck(:preferences).compact
|
|
694
|
+
dict_data = VibeZstd.train_dict(samples, max_dict_size: 16_384)
|
|
695
|
+
File.write('config/user_prefs.dict', dict_data)
|
|
696
|
+
|
|
697
|
+
# Step 2: Load dictionary at boot (config/initializers/vibe_zstd_encryption.rb)
|
|
698
|
+
module UserPrefsCompressor
|
|
699
|
+
DICT = VibeZstd::CDict.new(File.binread('config/user_prefs.dict'))
|
|
700
|
+
|
|
701
|
+
def self.deflate(data)
|
|
702
|
+
VibeZstd::ThreadLocal.compress(data, dict: DICT)
|
|
703
|
+
end
|
|
704
|
+
|
|
705
|
+
def self.inflate(data)
|
|
706
|
+
VibeZstd::ThreadLocal.decompress(data, dict: DICT.to_ddict)
|
|
707
|
+
end
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
# Step 3: Configure in your model
|
|
711
|
+
# Rails 7.1+: Set as global compressor
|
|
712
|
+
ActiveSupport.on_load(:active_record) do
|
|
713
|
+
ActiveRecord::Encryption.config.compressor = UserPrefsCompressor
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
# Rails 8.0+: Set per-attribute
|
|
717
|
+
class User < ApplicationRecord
|
|
718
|
+
encrypts :preferences, compressor: UserPrefsCompressor
|
|
719
|
+
end
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
**Dictionary guidelines:**
|
|
723
|
+
- **Samples:** 100+ representative samples, similar to production data
|
|
724
|
+
- **Algorithm:** `train_dict` (fast, good) or `train_dict_cover` (slower, better compression)
|
|
725
|
+
- **Size:** 16-64KB typical; larger doesn't always improve compression
|
|
726
|
+
- **Best for:** Small (< 10KB), similar data (JSON, logs, structured text)
|
|
727
|
+
- **Avoid for:** Large files, binary data, highly variable content
|
|
728
|
+
|
|
729
|
+
### Stream Decompressing a Remote File
|
|
730
|
+
|
|
731
|
+
Memory-efficient decompression of large remote `.zst` files:
|
|
732
|
+
|
|
733
|
+
```ruby
|
|
734
|
+
require 'net/http'
|
|
735
|
+
require 'vibe_zstd'
|
|
736
|
+
|
|
737
|
+
uri = URI('https://example.com/large_dataset.zst')
|
|
738
|
+
File.open('dataset.csv', 'wb') do |output|
|
|
739
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
|
|
740
|
+
http.request_get(uri.path) do |response|
|
|
741
|
+
reader = VibeZstd::DecompressReader.new(response.body)
|
|
742
|
+
reader.each { |chunk| output.write(chunk) }
|
|
743
|
+
end
|
|
744
|
+
end
|
|
745
|
+
end
|
|
746
|
+
```
|
|
747
|
+
|
|
748
|
+
**Constant memory:** Processes files of any size with ~128KB RAM (configurable via `initial_chunk_size`).
|
|
749
|
+
|
|
750
|
+
### Stream Compressing Large Files
|
|
751
|
+
|
|
752
|
+
Compress large files without loading into memory:
|
|
753
|
+
|
|
754
|
+
```ruby
|
|
755
|
+
# Compress 10GB file in chunks
|
|
756
|
+
File.open('large_data.txt', 'rb') do |input|
|
|
757
|
+
VibeZstd::CompressWriter.open('large_data.txt.zst', level: 5) do |writer|
|
|
758
|
+
while chunk = input.read(1_048_576) # 1MB chunks
|
|
759
|
+
writer.write(chunk)
|
|
760
|
+
end
|
|
761
|
+
end
|
|
762
|
+
end
|
|
763
|
+
```
|
|
764
|
+
|
|
765
|
+
**When to stream:**
|
|
766
|
+
- Files > 100MB (avoids loading entire file into memory)
|
|
767
|
+
- Network streams, pipes, or IO objects
|
|
768
|
+
- Progressive compression (write data as it's generated)
|
|
769
|
+
|
|
770
|
+
### Skippable Frame Metadata
|
|
771
|
+
|
|
772
|
+
Add metadata (version, timestamp, checksums) without affecting decompression:
|
|
773
|
+
|
|
774
|
+
```ruby
|
|
775
|
+
# Write file with metadata
|
|
776
|
+
metadata = {version: "2.0", created_at: Time.now.to_i, schema: "users_v2"}.to_json
|
|
777
|
+
File.open('data.zst', 'wb') do |f|
|
|
778
|
+
f.write VibeZstd.write_skippable_frame(metadata, magic_number: 0)
|
|
779
|
+
f.write VibeZstd.compress(actual_data)
|
|
780
|
+
end
|
|
781
|
+
|
|
782
|
+
# Read decompresses normally (skips metadata automatically)
|
|
783
|
+
data = VibeZstd.decompress(File.binread('data.zst'))
|
|
784
|
+
|
|
785
|
+
# Extract metadata without decompressing payload
|
|
786
|
+
File.open('data.zst', 'rb') do |f|
|
|
787
|
+
VibeZstd.each_skippable_frame(f.read) do |content, magic, offset|
|
|
788
|
+
metadata = JSON.parse(content)
|
|
789
|
+
puts "File version: #{metadata['version']}"
|
|
790
|
+
end
|
|
791
|
+
end
|
|
792
|
+
```
|
|
793
|
+
|
|
794
|
+
**Use cases:**
|
|
795
|
+
- **Versioning:** Track data schema versions for migrations
|
|
796
|
+
- **Provenance:** Store creation timestamp, user, source system
|
|
797
|
+
- **Integrity:** Add checksums or signatures before compression
|
|
798
|
+
- **Archives:** Multi-file archives with per-file metadata (see test_skippable_frame_archive_pattern in tests)
|
|
799
|
+
|
|
800
|
+
**Note:** Skippable frames add 8 bytes + metadata size. For small files, consider alternatives (separate metadata file, database columns).
|
|
801
|
+
|
|
802
|
+
## API Reference
|
|
803
|
+
|
|
804
|
+
### Module Methods
|
|
805
|
+
|
|
806
|
+
```ruby
|
|
807
|
+
VibeZstd.compress(data, level: nil, dict: nil)
|
|
808
|
+
VibeZstd.decompress(data, dict: nil)
|
|
809
|
+
VibeZstd.frame_content_size(data)
|
|
810
|
+
VibeZstd.compress_bound(size)
|
|
811
|
+
VibeZstd.train_dict(samples, max_dict_size: 112640)
|
|
812
|
+
VibeZstd.train_dict_cover(samples, max_dict_size:, k:, d:, **opts)
|
|
813
|
+
VibeZstd.train_dict_fast_cover(samples, max_dict_size:, k:, d:, **opts)
|
|
814
|
+
VibeZstd.get_dict_id(dict_data)
|
|
815
|
+
VibeZstd.get_dict_id_from_frame(data)
|
|
816
|
+
VibeZstd.version_number # e.g., 10507
|
|
817
|
+
VibeZstd.version_string # e.g., "1.5.7"
|
|
818
|
+
VibeZstd.min_level # Minimum compression level
|
|
819
|
+
VibeZstd.max_level # Maximum compression level
|
|
820
|
+
VibeZstd.default_level # Default compression level
|
|
821
|
+
```
|
|
822
|
+
|
|
823
|
+
### CCtx (Compression Context)
|
|
824
|
+
|
|
825
|
+
```ruby
|
|
826
|
+
cctx = VibeZstd::CCtx.new(**params)
|
|
827
|
+
cctx.compress(data, level: nil, dict: nil, pledged_size: nil)
|
|
828
|
+
cctx.use_prefix(prefix_data)
|
|
829
|
+
|
|
830
|
+
# Property setters (see parameters section)
|
|
831
|
+
cctx.checksum_flag = 1
|
|
832
|
+
cctx.content_size_flag = 1
|
|
833
|
+
cctx.compression_level = 9
|
|
834
|
+
cctx.window_log = 20
|
|
835
|
+
cctx.workers = 4
|
|
836
|
+
# ... and many more
|
|
837
|
+
|
|
838
|
+
# Class methods
|
|
839
|
+
VibeZstd::CCtx.parameter_bounds(param)
|
|
840
|
+
VibeZstd::CCtx.estimate_memory(level)
|
|
841
|
+
```
|
|
842
|
+
|
|
843
|
+
### DCtx (Decompression Context)
|
|
844
|
+
|
|
845
|
+
```ruby
|
|
846
|
+
dctx = VibeZstd::DCtx.new(**params)
|
|
847
|
+
dctx.decompress(data, dict: nil, initial_capacity: nil)
|
|
848
|
+
dctx.use_prefix(prefix_data)
|
|
849
|
+
dctx.initial_capacity = 1_048_576
|
|
850
|
+
dctx.window_log_max = 20
|
|
851
|
+
|
|
852
|
+
# Class methods
|
|
853
|
+
VibeZstd::DCtx.default_initial_capacity = value
|
|
854
|
+
VibeZstd::DCtx.parameter_bounds(param)
|
|
855
|
+
VibeZstd::DCtx.frame_content_size(data)
|
|
856
|
+
VibeZstd::DCtx.estimate_memory
|
|
857
|
+
```
|
|
858
|
+
|
|
859
|
+
### CDict / DDict (Dictionaries)
|
|
860
|
+
|
|
861
|
+
```ruby
|
|
862
|
+
cdict = VibeZstd::CDict.new(dict_data, level = nil)
|
|
863
|
+
cdict.size # Dictionary size in bytes
|
|
864
|
+
cdict.dict_id # Dictionary ID
|
|
865
|
+
|
|
866
|
+
ddict = VibeZstd::DDict.new(dict_data)
|
|
867
|
+
ddict.size
|
|
868
|
+
ddict.dict_id
|
|
869
|
+
|
|
870
|
+
# Class methods
|
|
871
|
+
VibeZstd::CDict.estimate_memory(dict_size, level)
|
|
872
|
+
VibeZstd::DDict.estimate_memory(dict_size)
|
|
873
|
+
```
|
|
874
|
+
|
|
875
|
+
### Streaming
|
|
876
|
+
|
|
877
|
+
```ruby
|
|
878
|
+
# Compression
|
|
879
|
+
writer = VibeZstd::CompressWriter.new(io, level: 3, dict: nil, pledged_size: nil)
|
|
880
|
+
VibeZstd::CompressWriter.open(io, **opts) { |w| ... }
|
|
881
|
+
writer.write(data)
|
|
882
|
+
writer.flush
|
|
883
|
+
writer.finish # or writer.close
|
|
884
|
+
|
|
885
|
+
# Decompression
|
|
886
|
+
reader = VibeZstd::DecompressReader.new(io, dict: nil, initial_chunk_size: nil)
|
|
887
|
+
VibeZstd::DecompressReader.open(io, **opts) { |r| ... }
|
|
888
|
+
reader.read(size = nil)
|
|
889
|
+
reader.eof?
|
|
890
|
+
reader.each { |chunk| ... }
|
|
891
|
+
reader.each_line(separator = $/) { |line| ... }
|
|
892
|
+
reader.gets(separator = $/)
|
|
893
|
+
reader.readline(separator = $/)
|
|
894
|
+
reader.readpartial(maxlen)
|
|
895
|
+
reader.read_all
|
|
896
|
+
```
|
|
897
|
+
|
|
898
|
+
### ThreadLocal (Context Pooling)
|
|
899
|
+
|
|
900
|
+
```ruby
|
|
901
|
+
# Thread-local context reuse (ideal for Rails/Puma applications)
|
|
902
|
+
VibeZstd::ThreadLocal.compress(data, level: nil, dict: nil, pledged_size: nil)
|
|
903
|
+
VibeZstd::ThreadLocal.decompress(data, dict: nil, initial_capacity: nil)
|
|
904
|
+
VibeZstd::ThreadLocal.clear_thread_cache!
|
|
905
|
+
VibeZstd::ThreadLocal.thread_cache_stats
|
|
906
|
+
```
|
|
907
|
+
|
|
908
|
+
## Thread Safety and Ractors
|
|
909
|
+
|
|
910
|
+
VibeZstd is designed to be thread-safe and Ractor-compatible:
|
|
911
|
+
|
|
912
|
+
- Each context/dictionary object manages its own Zstd state
|
|
913
|
+
- CPU-intensive operations release the GVL for concurrent execution
|
|
914
|
+
- Create separate instances for each thread/Ractor as needed
|
|
915
|
+
|
|
916
|
+
```ruby
|
|
917
|
+
# Safe: Each thread has its own context
|
|
918
|
+
threads = 10.times.map do
|
|
919
|
+
Thread.new do
|
|
920
|
+
cctx = VibeZstd::CCtx.new
|
|
921
|
+
# ... use cctx
|
|
922
|
+
end
|
|
923
|
+
end
|
|
924
|
+
```
|
|
925
|
+
|
|
926
|
+
## Benchmarking
|
|
927
|
+
|
|
928
|
+
Run comprehensive benchmarks:
|
|
929
|
+
|
|
930
|
+
```bash
|
|
931
|
+
# All benchmarks
|
|
932
|
+
ruby benchmark/run_all.rb
|
|
933
|
+
|
|
934
|
+
# Specific benchmarks
|
|
935
|
+
ruby benchmark/context_reuse.rb
|
|
936
|
+
ruby benchmark/dictionary_usage.rb
|
|
937
|
+
ruby benchmark/compression_levels.rb
|
|
938
|
+
ruby benchmark/streaming.rb
|
|
939
|
+
ruby benchmark/multithreading.rb
|
|
940
|
+
|
|
941
|
+
# Generate README benchmark output
|
|
942
|
+
ruby benchmark/for_readme.rb
|
|
943
|
+
```
|
|
944
|
+
|
|
945
|
+
See `benchmark/README.md` for detailed documentation.
|
|
946
|
+
|
|
947
|
+
## Development
|
|
948
|
+
|
|
949
|
+
To set up the development environment:
|
|
950
|
+
|
|
951
|
+
```bash
|
|
952
|
+
bin/setup # Install dependencies
|
|
953
|
+
rake compile # Build C extension
|
|
954
|
+
rake test # Run tests
|
|
955
|
+
bin/console # Interactive console
|
|
956
|
+
bundle exec rake install # Install locally
|
|
957
|
+
```
|
|
958
|
+
|
|
959
|
+
## Contributing
|
|
960
|
+
|
|
961
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/kreynolds/vibe_zstd.
|
|
962
|
+
|
|
963
|
+
## Vendored Libraries
|
|
964
|
+
|
|
965
|
+
This gem vendors the Zstandard (zstd) compression library to provide consistent behavior across all platforms. The vendored zstd library is located in `ext/vibe_zstd/libzstd/` and is licensed under the BSD License.
|
|
966
|
+
|
|
967
|
+
**Zstandard License:**
|
|
968
|
+
- Copyright (c) Meta Platforms, Inc. and affiliates
|
|
969
|
+
- Licensed under the BSD License (see `ext/vibe_zstd/libzstd/LICENSE`)
|
|
970
|
+
- Project: https://github.com/facebook/zstd
|
|
971
|
+
|
|
972
|
+
For the complete zstd license text, see the LICENSE file in the vendored library directory.
|
|
973
|
+
|
|
974
|
+
## License
|
|
975
|
+
|
|
976
|
+
The VibeZstd gem itself is available as open source under the [MIT License](https://opensource.org/licenses/MIT).
|
|
977
|
+
|
|
978
|
+
This gem vendors the Zstandard library, which is separately licensed under the BSD License. See the [Vendored Libraries](#vendored-libraries) section above for details.
|