vibe_zstd 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.standard.yml +3 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +21 -0
- data/README.md +978 -0
- data/Rakefile +20 -0
- data/benchmark/README.md +198 -0
- data/benchmark/compression_levels.rb +99 -0
- data/benchmark/context_reuse.rb +174 -0
- data/benchmark/decompression_speed_by_level.rb +65 -0
- data/benchmark/dictionary_training.rb +182 -0
- data/benchmark/dictionary_usage.rb +121 -0
- data/benchmark/for_readme.rb +157 -0
- data/benchmark/generate_fixture.rb +82 -0
- data/benchmark/helpers.rb +237 -0
- data/benchmark/multithreading.rb +105 -0
- data/benchmark/run_all.rb +150 -0
- data/benchmark/streaming.rb +154 -0
- data/ext/vibe_zstd/Makefile +270 -0
- data/ext/vibe_zstd/cctx.c +565 -0
- data/ext/vibe_zstd/dctx.c +493 -0
- data/ext/vibe_zstd/dict.c +587 -0
- data/ext/vibe_zstd/extconf.rb +52 -0
- data/ext/vibe_zstd/frames.c +132 -0
- data/ext/vibe_zstd/libzstd/LICENSE +30 -0
- data/ext/vibe_zstd/libzstd/common/allocations.h +55 -0
- data/ext/vibe_zstd/libzstd/common/bits.h +205 -0
- data/ext/vibe_zstd/libzstd/common/bitstream.h +454 -0
- data/ext/vibe_zstd/libzstd/common/compiler.h +464 -0
- data/ext/vibe_zstd/libzstd/common/cpu.h +249 -0
- data/ext/vibe_zstd/libzstd/common/debug.c +30 -0
- data/ext/vibe_zstd/libzstd/common/debug.h +107 -0
- data/ext/vibe_zstd/libzstd/common/entropy_common.c +340 -0
- data/ext/vibe_zstd/libzstd/common/error_private.c +64 -0
- data/ext/vibe_zstd/libzstd/common/error_private.h +158 -0
- data/ext/vibe_zstd/libzstd/common/fse.h +625 -0
- data/ext/vibe_zstd/libzstd/common/fse_decompress.c +315 -0
- data/ext/vibe_zstd/libzstd/common/huf.h +277 -0
- data/ext/vibe_zstd/libzstd/common/mem.h +422 -0
- data/ext/vibe_zstd/libzstd/common/pool.c +371 -0
- data/ext/vibe_zstd/libzstd/common/pool.h +81 -0
- data/ext/vibe_zstd/libzstd/common/portability_macros.h +171 -0
- data/ext/vibe_zstd/libzstd/common/threading.c +182 -0
- data/ext/vibe_zstd/libzstd/common/threading.h +142 -0
- data/ext/vibe_zstd/libzstd/common/xxhash.c +18 -0
- data/ext/vibe_zstd/libzstd/common/xxhash.h +7094 -0
- data/ext/vibe_zstd/libzstd/common/zstd_common.c +48 -0
- data/ext/vibe_zstd/libzstd/common/zstd_deps.h +123 -0
- data/ext/vibe_zstd/libzstd/common/zstd_internal.h +324 -0
- data/ext/vibe_zstd/libzstd/common/zstd_trace.h +156 -0
- data/ext/vibe_zstd/libzstd/compress/clevels.h +134 -0
- data/ext/vibe_zstd/libzstd/compress/fse_compress.c +625 -0
- data/ext/vibe_zstd/libzstd/compress/hist.c +191 -0
- data/ext/vibe_zstd/libzstd/compress/hist.h +82 -0
- data/ext/vibe_zstd/libzstd/compress/huf_compress.c +1464 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress.c +7843 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_internal.h +1636 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.c +235 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_literals.h +39 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.c +442 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_sequences.h +55 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.c +688 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_cwksp.h +765 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.c +778 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_double_fast.h +42 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_fast.c +985 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_fast.h +30 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_lazy.c +2199 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_lazy.h +193 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_ldm.c +745 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_ldm.h +109 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_opt.c +1580 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_opt.h +72 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.c +238 -0
- data/ext/vibe_zstd/libzstd/compress/zstd_preSplit.h +33 -0
- data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.c +1923 -0
- data/ext/vibe_zstd/libzstd/compress/zstdmt_compress.h +102 -0
- data/ext/vibe_zstd/libzstd/decompress/huf_decompress.c +1944 -0
- data/ext/vibe_zstd/libzstd/decompress/huf_decompress_amd64.S +602 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.c +244 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_ddict.h +44 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress.c +2410 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.c +2209 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_block.h +73 -0
- data/ext/vibe_zstd/libzstd/decompress/zstd_decompress_internal.h +240 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff.h +214 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff_common.c +26 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff_compress.c +167 -0
- data/ext/vibe_zstd/libzstd/deprecated/zbuff_decompress.c +77 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/cover.c +1302 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/cover.h +152 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/divsufsort.h +57 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/fastcover.c +766 -0
- data/ext/vibe_zstd/libzstd/dictBuilder/zdict.c +1133 -0
- data/ext/vibe_zstd/libzstd/zdict.h +481 -0
- data/ext/vibe_zstd/libzstd/zstd.h +3198 -0
- data/ext/vibe_zstd/libzstd/zstd_errors.h +107 -0
- data/ext/vibe_zstd/streaming.c +410 -0
- data/ext/vibe_zstd/vibe_zstd.c +293 -0
- data/ext/vibe_zstd/vibe_zstd.h +56 -0
- data/ext/vibe_zstd/vibe_zstd_internal.h +27 -0
- data/lib/vibe_zstd/constants.rb +67 -0
- data/lib/vibe_zstd/version.rb +5 -0
- data/lib/vibe_zstd.rb +255 -0
- data/sig/vibe_zstd.rbs +76 -0
- metadata +179 -0
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
// DCtx implementation for VibeZstd
|
|
2
|
+
#include "vibe_zstd_internal.h"
|
|
3
|
+
|
|
4
|
+
// TypedData type - defined in vibe_zstd.c
|
|
5
|
+
extern rb_data_type_t vibe_zstd_dctx_type;
|
|
6
|
+
|
|
7
|
+
// Class-level default for initial capacity (0 = use ZSTD_DStreamOutSize)
|
|
8
|
+
static size_t default_initial_capacity = 0;
|
|
9
|
+
|
|
10
|
+
// Helper to set DCtx parameter from Ruby keyword argument
|
|
11
|
+
static int
|
|
12
|
+
vibe_zstd_dctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
|
|
13
|
+
// Build the setter method name: key + "="
|
|
14
|
+
const char* key_str = rb_id2name(SYM2ID(key));
|
|
15
|
+
size_t setter_len = strlen(key_str) + 2;
|
|
16
|
+
char* setter = ALLOC_N(char, setter_len);
|
|
17
|
+
snprintf(setter, setter_len, "%s=", key_str);
|
|
18
|
+
|
|
19
|
+
// Call the setter method
|
|
20
|
+
rb_funcall(self, rb_intern(setter), 1, value);
|
|
21
|
+
|
|
22
|
+
xfree(setter);
|
|
23
|
+
return ST_CONTINUE;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
static VALUE
|
|
27
|
+
vibe_zstd_dctx_initialize(int argc, VALUE* argv, VALUE self) {
|
|
28
|
+
VALUE options;
|
|
29
|
+
rb_scan_args(argc, argv, "0:", &options);
|
|
30
|
+
|
|
31
|
+
// If keyword arguments provided, set parameters
|
|
32
|
+
if (!NIL_P(options)) {
|
|
33
|
+
rb_hash_foreach(options, vibe_zstd_dctx_init_param_iter, self);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return self;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Memory estimation class method
|
|
40
|
+
// DCtx.estimate_memory()
|
|
41
|
+
static VALUE
|
|
42
|
+
vibe_zstd_dctx_estimate_memory(VALUE self) {
|
|
43
|
+
size_t estimate = ZSTD_estimateDCtxSize();
|
|
44
|
+
return SIZET2NUM(estimate);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Parameter lookup table for DCtx
|
|
48
|
+
typedef struct {
|
|
49
|
+
ID symbol_id;
|
|
50
|
+
ZSTD_dParameter param;
|
|
51
|
+
const char* name;
|
|
52
|
+
} dctx_param_entry;
|
|
53
|
+
|
|
54
|
+
static dctx_param_entry dctx_param_table[] = {
|
|
55
|
+
{0, ZSTD_d_windowLogMax, "window_log_max"}
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
#define DCTX_PARAM_TABLE_SIZE (sizeof(dctx_param_table) / sizeof(dctx_param_entry))
|
|
59
|
+
|
|
60
|
+
// Initialize DCtx parameter lookup table symbol IDs
|
|
61
|
+
static void
|
|
62
|
+
init_dctx_param_table(void) {
|
|
63
|
+
for (size_t i = 0; i < DCTX_PARAM_TABLE_SIZE; i++) {
|
|
64
|
+
dctx_param_table[i].symbol_id = rb_intern(dctx_param_table[i].name);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Helper: look up DCtx parameter enum from symbol ID
|
|
69
|
+
// Maps Ruby symbol (e.g., :window_log_max) to ZSTD parameter constant
|
|
70
|
+
// Returns 1 if found, 0 if unknown parameter
|
|
71
|
+
static int
|
|
72
|
+
lookup_dctx_param(ID symbol_id, ZSTD_dParameter* param_out, const char** name_out) {
|
|
73
|
+
for (size_t i = 0; i < DCTX_PARAM_TABLE_SIZE; i++) {
|
|
74
|
+
if (dctx_param_table[i].symbol_id == symbol_id) {
|
|
75
|
+
*param_out = dctx_param_table[i].param;
|
|
76
|
+
if (name_out) *name_out = dctx_param_table[i].name;
|
|
77
|
+
return 1;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return 0;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Generic setter with bounds checking for DCtx
|
|
84
|
+
static VALUE
|
|
85
|
+
vibe_zstd_dctx_set_param_generic(VALUE self, VALUE value, ZSTD_dParameter param, const char* param_name) {
|
|
86
|
+
vibe_zstd_dctx* dctx;
|
|
87
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
88
|
+
|
|
89
|
+
int val = NUM2INT(value);
|
|
90
|
+
|
|
91
|
+
// Get bounds for validation
|
|
92
|
+
ZSTD_bounds bounds = ZSTD_dParam_getBounds(param);
|
|
93
|
+
if (ZSTD_isError(bounds.error)) {
|
|
94
|
+
rb_raise(rb_eRuntimeError, "Failed to get bounds for %s: %s",
|
|
95
|
+
param_name, ZSTD_getErrorName(bounds.error));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Validate value is within bounds
|
|
99
|
+
if (val < bounds.lowerBound || val > bounds.upperBound) {
|
|
100
|
+
rb_raise(rb_eArgError, "%s must be between %d and %d (got %d)",
|
|
101
|
+
param_name, bounds.lowerBound, bounds.upperBound, val);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
size_t result = ZSTD_DCtx_setParameter(dctx->dctx, param, val);
|
|
105
|
+
if (ZSTD_isError(result)) {
|
|
106
|
+
rb_raise(rb_eRuntimeError, "Failed to set %s: %s",
|
|
107
|
+
param_name, ZSTD_getErrorName(result));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return self;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Generic getter for DCtx
|
|
114
|
+
static VALUE
|
|
115
|
+
vibe_zstd_dctx_get_param_generic(VALUE self, ZSTD_dParameter param, const char* param_name) {
|
|
116
|
+
vibe_zstd_dctx* dctx;
|
|
117
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
118
|
+
|
|
119
|
+
int value;
|
|
120
|
+
size_t result = ZSTD_DCtx_getParameter(dctx->dctx, param, &value);
|
|
121
|
+
if (ZSTD_isError(result)) {
|
|
122
|
+
rb_raise(rb_eRuntimeError, "Failed to get %s: %s",
|
|
123
|
+
param_name, ZSTD_getErrorName(result));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return INT2NUM(value);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Macro to define setter/getter methods for a DCtx parameter
|
|
130
|
+
#define DEFINE_DCTX_PARAM_ACCESSORS(rb_name, param_enum, param_str) \
|
|
131
|
+
static VALUE vibe_zstd_dctx_set_##rb_name(VALUE self, VALUE value) { \
|
|
132
|
+
return vibe_zstd_dctx_set_param_generic(self, value, param_enum, param_str); \
|
|
133
|
+
} \
|
|
134
|
+
static VALUE vibe_zstd_dctx_get_##rb_name(VALUE self) { \
|
|
135
|
+
return vibe_zstd_dctx_get_param_generic(self, param_enum, param_str); \
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Define all DCtx parameter accessors
|
|
139
|
+
DEFINE_DCTX_PARAM_ACCESSORS(window_log_max, ZSTD_d_windowLogMax, "window_log_max")
|
|
140
|
+
|
|
141
|
+
// DCtx parameter_bounds - query parameter bounds (class method, kept for introspection)
|
|
142
|
+
static VALUE
|
|
143
|
+
vibe_zstd_dctx_parameter_bounds(VALUE self, VALUE param_sym) {
|
|
144
|
+
ID symbol_id = SYM2ID(param_sym);
|
|
145
|
+
ZSTD_dParameter param;
|
|
146
|
+
const char* param_name;
|
|
147
|
+
|
|
148
|
+
if (!lookup_dctx_param(symbol_id, ¶m, ¶m_name)) {
|
|
149
|
+
rb_raise(rb_eArgError, "Unknown parameter: %s", rb_id2name(symbol_id));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
ZSTD_bounds bounds = ZSTD_dParam_getBounds(param);
|
|
153
|
+
if (ZSTD_isError(bounds.error)) {
|
|
154
|
+
rb_raise(rb_eRuntimeError, "Failed to get parameter bounds for %s: %s",
|
|
155
|
+
param_name, ZSTD_getErrorName(bounds.error));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
VALUE result = rb_hash_new();
|
|
159
|
+
rb_hash_aset(result, ID2SYM(rb_intern("min")), INT2NUM(bounds.lowerBound));
|
|
160
|
+
rb_hash_aset(result, ID2SYM(rb_intern("max")), INT2NUM(bounds.upperBound));
|
|
161
|
+
return result;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// DCtx default_initial_capacity getter (class method)
|
|
165
|
+
static VALUE
|
|
166
|
+
vibe_zstd_dctx_get_default_initial_capacity(VALUE self) {
|
|
167
|
+
if (default_initial_capacity == 0) {
|
|
168
|
+
return SIZET2NUM(ZSTD_DStreamOutSize());
|
|
169
|
+
}
|
|
170
|
+
return SIZET2NUM(default_initial_capacity);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// DCtx default_initial_capacity setter (class method)
|
|
174
|
+
static VALUE
|
|
175
|
+
vibe_zstd_dctx_set_default_initial_capacity(VALUE self, VALUE value) {
|
|
176
|
+
if (NIL_P(value)) {
|
|
177
|
+
default_initial_capacity = 0; // Reset to default
|
|
178
|
+
} else {
|
|
179
|
+
size_t capacity = NUM2SIZET(value);
|
|
180
|
+
if (capacity == 0) {
|
|
181
|
+
rb_raise(rb_eArgError, "initial_capacity must be positive (or nil to reset to default)");
|
|
182
|
+
}
|
|
183
|
+
default_initial_capacity = capacity;
|
|
184
|
+
}
|
|
185
|
+
return value;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// DCtx initial_capacity getter (instance method)
|
|
189
|
+
static VALUE
|
|
190
|
+
vibe_zstd_dctx_get_initial_capacity(VALUE self) {
|
|
191
|
+
vibe_zstd_dctx* dctx;
|
|
192
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
193
|
+
|
|
194
|
+
if (dctx->initial_capacity == 0) {
|
|
195
|
+
// Return the class default
|
|
196
|
+
return vibe_zstd_dctx_get_default_initial_capacity(Qnil);
|
|
197
|
+
}
|
|
198
|
+
return SIZET2NUM(dctx->initial_capacity);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// DCtx initial_capacity setter (instance method)
|
|
202
|
+
static VALUE
|
|
203
|
+
vibe_zstd_dctx_set_initial_capacity(VALUE self, VALUE value) {
|
|
204
|
+
vibe_zstd_dctx* dctx;
|
|
205
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
206
|
+
|
|
207
|
+
if (NIL_P(value)) {
|
|
208
|
+
dctx->initial_capacity = 0; // Use class default
|
|
209
|
+
} else {
|
|
210
|
+
size_t capacity = NUM2SIZET(value);
|
|
211
|
+
if (capacity == 0) {
|
|
212
|
+
rb_raise(rb_eArgError, "initial_capacity must be positive (or nil to use class default)");
|
|
213
|
+
}
|
|
214
|
+
dctx->initial_capacity = capacity;
|
|
215
|
+
}
|
|
216
|
+
return value;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Decompress args for GVL release
|
|
220
|
+
// This structure packages all arguments needed for decompression so we can
|
|
221
|
+
// call ZSTD functions without holding Ruby's Global VM Lock (GVL).
|
|
222
|
+
// Releasing the GVL allows other Ruby threads to run during CPU-intensive decompression.
|
|
223
|
+
typedef struct {
|
|
224
|
+
ZSTD_DCtx* dctx;
|
|
225
|
+
ZSTD_DDict* ddict;
|
|
226
|
+
const void* src;
|
|
227
|
+
size_t srcSize;
|
|
228
|
+
void* dst;
|
|
229
|
+
size_t dstCapacity;
|
|
230
|
+
size_t result;
|
|
231
|
+
} decompress_args;
|
|
232
|
+
|
|
233
|
+
// Decompress without holding Ruby's GVL
|
|
234
|
+
// Called via rb_thread_call_without_gvl to allow parallel Ruby thread execution
|
|
235
|
+
// during CPU-intensive decompression operations
|
|
236
|
+
static void*
|
|
237
|
+
decompress_without_gvl(void* arg) {
|
|
238
|
+
decompress_args* args = arg;
|
|
239
|
+
if (args->ddict) {
|
|
240
|
+
args->result = ZSTD_decompress_usingDDict(args->dctx, args->dst, args->dstCapacity, args->src, args->srcSize, args->ddict);
|
|
241
|
+
} else {
|
|
242
|
+
args->result = ZSTD_decompressDCtx(args->dctx, args->dst, args->dstCapacity, args->src, args->srcSize);
|
|
243
|
+
}
|
|
244
|
+
return NULL;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// DCtx frame_content_size - class method to get frame content size
|
|
248
|
+
static VALUE
|
|
249
|
+
vibe_zstd_dctx_frame_content_size(VALUE self, VALUE data) {
|
|
250
|
+
StringValue(data);
|
|
251
|
+
unsigned long long contentSize = ZSTD_getFrameContentSize(RSTRING_PTR(data), RSTRING_LEN(data));
|
|
252
|
+
|
|
253
|
+
if (contentSize == ZSTD_CONTENTSIZE_ERROR) {
|
|
254
|
+
return Qnil; // Invalid frame
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (contentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
|
|
258
|
+
return Qnil; // Unknown size
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return ULL2NUM(contentSize);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// DCtx decompress - Decompress ZSTD-compressed data
|
|
265
|
+
//
|
|
266
|
+
// This function handles two decompression paths:
|
|
267
|
+
// 1. Known content size: Allocates exact buffer size and decompresses in one shot
|
|
268
|
+
// 2. Unknown content size: Uses streaming decompression with exponential buffer growth
|
|
269
|
+
//
|
|
270
|
+
// The unknown-size path uses a standard exponential growth strategy (doubling)
|
|
271
|
+
// which provides optimal O(n) amortized performance. Initial capacity can be
|
|
272
|
+
// configured via initial_capacity parameter to reduce reallocations for known size ranges.
|
|
273
|
+
//
|
|
274
|
+
// Dictionary validation is performed to ensure frame requirements match provided dict.
|
|
275
|
+
// Skippable frames at the beginning of data are automatically skipped.
|
|
276
|
+
static VALUE
|
|
277
|
+
vibe_zstd_dctx_decompress(int argc, VALUE* argv, VALUE self) {
|
|
278
|
+
VALUE data, options = Qnil;
|
|
279
|
+
rb_scan_args(argc, argv, "1:", &data, &options);
|
|
280
|
+
vibe_zstd_dctx* dctx;
|
|
281
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
282
|
+
StringValue(data);
|
|
283
|
+
const char* src = RSTRING_PTR(data);
|
|
284
|
+
size_t srcSize = RSTRING_LEN(data);
|
|
285
|
+
size_t offset = 0;
|
|
286
|
+
|
|
287
|
+
// Skip any leading skippable frames
|
|
288
|
+
while (offset < srcSize && ZSTD_isSkippableFrame(src + offset, srcSize - offset)) {
|
|
289
|
+
size_t frameSize = ZSTD_findFrameCompressedSize(src + offset, srcSize - offset);
|
|
290
|
+
if (ZSTD_isError(frameSize)) {
|
|
291
|
+
rb_raise(rb_eRuntimeError, "Invalid skippable frame at offset %zu: %s", offset, ZSTD_getErrorName(frameSize));
|
|
292
|
+
}
|
|
293
|
+
offset += frameSize;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Now check the actual compressed frame
|
|
297
|
+
if (offset >= srcSize) {
|
|
298
|
+
rb_raise(rb_eRuntimeError, "No compressed frame found in %zu bytes (only skippable frames)", srcSize);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
src += offset;
|
|
302
|
+
srcSize -= offset;
|
|
303
|
+
|
|
304
|
+
unsigned long long contentSize = ZSTD_getFrameContentSize(src, srcSize);
|
|
305
|
+
if (contentSize == ZSTD_CONTENTSIZE_ERROR) {
|
|
306
|
+
rb_raise(rb_eRuntimeError, "Invalid compressed data: not a valid zstd frame (size: %zu bytes)", srcSize);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Check dictionary requirements from the frame
|
|
310
|
+
unsigned int frame_dict_id = ZSTD_getDictID_fromFrame(src, srcSize);
|
|
311
|
+
|
|
312
|
+
// Extract keyword arguments
|
|
313
|
+
ZSTD_DDict* ddict = NULL;
|
|
314
|
+
unsigned int provided_dict_id = 0;
|
|
315
|
+
size_t initial_capacity = 0; // 0 = not specified in per-call options
|
|
316
|
+
|
|
317
|
+
if (!NIL_P(options)) {
|
|
318
|
+
VALUE dict_val = rb_hash_aref(options, ID2SYM(rb_intern("dict")));
|
|
319
|
+
if (!NIL_P(dict_val)) {
|
|
320
|
+
vibe_zstd_ddict* ddict_struct;
|
|
321
|
+
TypedData_Get_Struct(dict_val, vibe_zstd_ddict, &vibe_zstd_ddict_type, ddict_struct);
|
|
322
|
+
ddict = ddict_struct->ddict;
|
|
323
|
+
provided_dict_id = ZSTD_getDictID_fromDDict(ddict);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
VALUE initial_capacity_val = rb_hash_aref(options, ID2SYM(rb_intern("initial_capacity")));
|
|
327
|
+
if (!NIL_P(initial_capacity_val)) {
|
|
328
|
+
initial_capacity = NUM2SIZET(initial_capacity_val);
|
|
329
|
+
if (initial_capacity == 0) {
|
|
330
|
+
rb_raise(rb_eArgError, "initial_capacity must be positive");
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Validate dictionary matches frame requirements
|
|
336
|
+
if (frame_dict_id != 0 && ddict == NULL) {
|
|
337
|
+
rb_raise(rb_eArgError, "Data requires dictionary (dict_id: %u) but none provided", frame_dict_id);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (ddict != NULL && frame_dict_id != 0 && provided_dict_id != frame_dict_id) {
|
|
341
|
+
rb_raise(rb_eArgError, "Dictionary mismatch: frame requires dict_id %u, provided dict_id %u",
|
|
342
|
+
frame_dict_id, provided_dict_id);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Resolve initial_capacity fallback chain: per-call > instance > class default > ZSTD default
|
|
346
|
+
if (initial_capacity == 0) {
|
|
347
|
+
initial_capacity = dctx->initial_capacity; // Instance default
|
|
348
|
+
if (initial_capacity == 0) {
|
|
349
|
+
initial_capacity = default_initial_capacity; // Class default
|
|
350
|
+
if (initial_capacity == 0) {
|
|
351
|
+
initial_capacity = ZSTD_DStreamOutSize(); // ZSTD default (~128KB)
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// If content size is unknown, use streaming decompression with exponential growth
|
|
357
|
+
if (contentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
|
|
358
|
+
size_t chunk_size = ZSTD_DStreamOutSize(); // Fixed chunk buffer size
|
|
359
|
+
VALUE tmpBuffer = rb_str_buf_new(chunk_size);
|
|
360
|
+
|
|
361
|
+
// Start with configured initial capacity
|
|
362
|
+
size_t result_capacity = initial_capacity;
|
|
363
|
+
size_t result_size = 0;
|
|
364
|
+
VALUE result = rb_str_buf_new(result_capacity);
|
|
365
|
+
|
|
366
|
+
ZSTD_inBuffer input = { src, srcSize, 0 };
|
|
367
|
+
|
|
368
|
+
while (input.pos < input.size) {
|
|
369
|
+
ZSTD_outBuffer output = { RSTRING_PTR(tmpBuffer), chunk_size, 0 };
|
|
370
|
+
|
|
371
|
+
size_t ret = ZSTD_decompressStream(dctx->dctx, &output, &input);
|
|
372
|
+
if (ZSTD_isError(ret)) {
|
|
373
|
+
rb_raise(rb_eRuntimeError, "Decompression failed: %s", ZSTD_getErrorName(ret));
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
if (output.pos > 0) {
|
|
377
|
+
// Grow result buffer exponentially if needed
|
|
378
|
+
if (result_size + output.pos > result_capacity) {
|
|
379
|
+
// Double capacity until it fits
|
|
380
|
+
while (result_capacity < result_size + output.pos) {
|
|
381
|
+
result_capacity *= 2;
|
|
382
|
+
}
|
|
383
|
+
rb_str_resize(result, result_capacity);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// Copy directly into result buffer
|
|
387
|
+
memcpy(RSTRING_PTR(result) + result_size, RSTRING_PTR(tmpBuffer), output.pos);
|
|
388
|
+
result_size += output.pos;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Trim to actual size
|
|
393
|
+
rb_str_resize(result, result_size);
|
|
394
|
+
return result;
|
|
395
|
+
}
|
|
396
|
+
VALUE result = rb_str_new(NULL, contentSize);
|
|
397
|
+
decompress_args args = {
|
|
398
|
+
.dctx = dctx->dctx,
|
|
399
|
+
.ddict = ddict,
|
|
400
|
+
.src = src,
|
|
401
|
+
.srcSize = srcSize,
|
|
402
|
+
.dst = RSTRING_PTR(result),
|
|
403
|
+
.dstCapacity = contentSize,
|
|
404
|
+
.result = 0
|
|
405
|
+
};
|
|
406
|
+
rb_thread_call_without_gvl(decompress_without_gvl, &args, NULL, NULL);
|
|
407
|
+
if (ZSTD_isError(args.result)) {
|
|
408
|
+
rb_raise(rb_eRuntimeError, "Decompression failed: %s", ZSTD_getErrorName(args.result));
|
|
409
|
+
}
|
|
410
|
+
rb_str_set_len(result, args.result);
|
|
411
|
+
return result;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// DCtx use_prefix - use raw data as prefix (lightweight dictionary)
|
|
415
|
+
static VALUE
|
|
416
|
+
vibe_zstd_dctx_use_prefix(VALUE self, VALUE prefix_data) {
|
|
417
|
+
vibe_zstd_dctx* dctx;
|
|
418
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
419
|
+
|
|
420
|
+
StringValue(prefix_data);
|
|
421
|
+
|
|
422
|
+
size_t result = ZSTD_DCtx_refPrefix(dctx->dctx, RSTRING_PTR(prefix_data), RSTRING_LEN(prefix_data));
|
|
423
|
+
|
|
424
|
+
if (ZSTD_isError(result)) {
|
|
425
|
+
rb_raise(rb_eRuntimeError, "Failed to set prefix: %s", ZSTD_getErrorName(result));
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
return self;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// DCtx reset - reset context to clean state
|
|
432
|
+
static VALUE
|
|
433
|
+
vibe_zstd_dctx_reset(int argc, VALUE* argv, VALUE self) {
|
|
434
|
+
VALUE reset_mode;
|
|
435
|
+
rb_scan_args(argc, argv, "01", &reset_mode);
|
|
436
|
+
|
|
437
|
+
vibe_zstd_dctx* dctx;
|
|
438
|
+
TypedData_Get_Struct(self, vibe_zstd_dctx, &vibe_zstd_dctx_type, dctx);
|
|
439
|
+
|
|
440
|
+
// Default to SESSION_AND_PARAMETERS if no argument provided
|
|
441
|
+
ZSTD_ResetDirective directive = ZSTD_reset_session_and_parameters;
|
|
442
|
+
|
|
443
|
+
if (!NIL_P(reset_mode)) {
|
|
444
|
+
int mode = NUM2INT(reset_mode);
|
|
445
|
+
if (mode == ZSTD_reset_session_only) {
|
|
446
|
+
directive = ZSTD_reset_session_only;
|
|
447
|
+
} else if (mode == ZSTD_reset_parameters) {
|
|
448
|
+
directive = ZSTD_reset_parameters;
|
|
449
|
+
} else if (mode == ZSTD_reset_session_and_parameters) {
|
|
450
|
+
directive = ZSTD_reset_session_and_parameters;
|
|
451
|
+
} else {
|
|
452
|
+
rb_raise(rb_eArgError, "Invalid reset_mode %d: must be ResetDirective::SESSION (1), PARAMETERS (2), or BOTH (3)", mode);
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
size_t result = ZSTD_DCtx_reset(dctx->dctx, directive);
|
|
457
|
+
|
|
458
|
+
if (ZSTD_isError(result)) {
|
|
459
|
+
rb_raise(rb_eRuntimeError, "Failed to reset decompression context: %s", ZSTD_getErrorName(result));
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return self;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Class initialization function called from main Init_vibe_zstd
|
|
466
|
+
void
|
|
467
|
+
vibe_zstd_dctx_init_class(VALUE rb_cVibeZstdDCtx) {
|
|
468
|
+
// Initialize parameter lookup table
|
|
469
|
+
init_dctx_param_table();
|
|
470
|
+
|
|
471
|
+
rb_define_alloc_func(rb_cVibeZstdDCtx, vibe_zstd_dctx_alloc);
|
|
472
|
+
rb_define_method(rb_cVibeZstdDCtx, "initialize", vibe_zstd_dctx_initialize, -1);
|
|
473
|
+
rb_define_method(rb_cVibeZstdDCtx, "decompress", vibe_zstd_dctx_decompress, -1);
|
|
474
|
+
rb_define_method(rb_cVibeZstdDCtx, "use_prefix", vibe_zstd_dctx_use_prefix, 1);
|
|
475
|
+
rb_define_method(rb_cVibeZstdDCtx, "reset", vibe_zstd_dctx_reset, -1);
|
|
476
|
+
rb_define_singleton_method(rb_cVibeZstdDCtx, "parameter_bounds", vibe_zstd_dctx_parameter_bounds, 1);
|
|
477
|
+
rb_define_singleton_method(rb_cVibeZstdDCtx, "frame_content_size", vibe_zstd_dctx_frame_content_size, 1);
|
|
478
|
+
rb_define_singleton_method(rb_cVibeZstdDCtx, "estimate_memory", vibe_zstd_dctx_estimate_memory, 0);
|
|
479
|
+
|
|
480
|
+
// Class-level default_initial_capacity accessors
|
|
481
|
+
rb_define_singleton_method(rb_cVibeZstdDCtx, "default_initial_capacity", vibe_zstd_dctx_get_default_initial_capacity, 0);
|
|
482
|
+
rb_define_singleton_method(rb_cVibeZstdDCtx, "default_initial_capacity=", vibe_zstd_dctx_set_default_initial_capacity, 1);
|
|
483
|
+
|
|
484
|
+
// DCtx parameter accessors
|
|
485
|
+
rb_define_method(rb_cVibeZstdDCtx, "window_log_max=", vibe_zstd_dctx_set_window_log_max, 1);
|
|
486
|
+
rb_define_method(rb_cVibeZstdDCtx, "window_log_max", vibe_zstd_dctx_get_window_log_max, 0);
|
|
487
|
+
rb_define_alias(rb_cVibeZstdDCtx, "max_window_log=", "window_log_max=");
|
|
488
|
+
rb_define_alias(rb_cVibeZstdDCtx, "max_window_log", "window_log_max");
|
|
489
|
+
|
|
490
|
+
// Instance-level initial_capacity accessors
|
|
491
|
+
rb_define_method(rb_cVibeZstdDCtx, "initial_capacity", vibe_zstd_dctx_get_initial_capacity, 0);
|
|
492
|
+
rb_define_method(rb_cVibeZstdDCtx, "initial_capacity=", vibe_zstd_dctx_set_initial_capacity, 1);
|
|
493
|
+
}
|