vibe_zstd 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -1
- data/ext/vibe_zstd/cctx.c +2 -4
- data/ext/vibe_zstd/dctx.c +96 -39
- data/ext/vibe_zstd/dict.c +157 -125
- data/ext/vibe_zstd/extconf.rb +3 -5
- data/ext/vibe_zstd/streaming.c +30 -19
- data/ext/vibe_zstd/vibe_zstd.c +48 -16
- data/ext/vibe_zstd/vibe_zstd.h +1 -0
- data/lib/vibe_zstd/version.rb +1 -1
- data/lib/vibe_zstd.rb +24 -11
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2e4a4b0e94bb623793a619286308a6e459ecf3e5b23b6ddf668b8fe1063b4bef
|
|
4
|
+
data.tar.gz: 98878819233bc19e8d89f63c0f6b610b7c3177ac2d1d062897cf01e25162d2e8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 35135f9a6c458690982d7b17494ca6eec5439a901d210226a839be685d533c7c387b2046985fa603c05771bdf7baa2db3cba08faa97da2ac256a38b659f6afa3
|
|
7
|
+
data.tar.gz: b739b0318dcc3f88852b974705c2c2e989e96e063def67525d990b96553c2da0dbe6959c741024aaa4c1e88d357969dc6f33580550216e936b098bf910286ea2
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.1.0] - 2026-03-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Release GVL during unknown-size streaming decompression, preventing thread blocking in multi-threaded servers (Puma, etc.)
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- `DecompressReader#gets` now uses 8KB buffered reads instead of 1-byte-at-a-time, dramatically reducing read call overhead on line-oriented data
|
|
17
|
+
- `CompressWriter` reuses a single output buffer across calls instead of allocating ~128KB per `write`/`flush`/`finish`
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
- Exception safety in dict training: all four training functions now use `rb_ensure` so C buffers are always freed even if a Ruby exception is raised
|
|
21
|
+
- Add `dsize` callbacks to all `TypedData` types so the GC sees accurate memory pressure from ZSTD context objects
|
|
22
|
+
- Add `RUBY_TYPED_WB_PROTECTED` and proper write barriers to all typed structs for GC correctness
|
|
23
|
+
|
|
24
|
+
### Performance
|
|
25
|
+
- Stack-allocated string buffer in CCtx setter, eliminating a malloc/free per keyword-argument call
|
|
26
|
+
- Cache `id_write`/`id_read` as static IDs instead of calling `rb_intern` on every I/O call
|
|
27
|
+
- Remove redundant `init_cctx_param_table`/`init_dctx_param_table` calls at startup
|
|
28
|
+
|
|
10
29
|
## [1.0.2] - 2025-01-20
|
|
11
30
|
|
|
12
31
|
### Fixed
|
|
@@ -29,4 +48,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
29
48
|
- Thread pool support for parallel compression
|
|
30
49
|
- Memory-efficient API for large files
|
|
31
50
|
|
|
32
|
-
[1.
|
|
51
|
+
[1.1.0]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.2...v1.1.0
|
|
52
|
+
[1.0.2]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.1...v1.0.2
|
|
53
|
+
[1.0.1]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.0...v1.0.1
|
|
54
|
+
[1.0.0]: https://github.com/kreynolds/vibe_zstd/releases/tag/v1.0.0
|
data/ext/vibe_zstd/cctx.c
CHANGED
|
@@ -9,14 +9,12 @@ static int
|
|
|
9
9
|
vibe_zstd_cctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
|
|
10
10
|
// Build the setter method name: key + "="
|
|
11
11
|
const char* key_str = rb_id2name(SYM2ID(key));
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
snprintf(setter, setter_len, "%s=", key_str);
|
|
12
|
+
char setter[256];
|
|
13
|
+
snprintf(setter, sizeof(setter), "%s=", key_str);
|
|
15
14
|
|
|
16
15
|
// Call the setter method
|
|
17
16
|
rb_funcall(self, rb_intern(setter), 1, value);
|
|
18
17
|
|
|
19
|
-
xfree(setter);
|
|
20
18
|
return ST_CONTINUE;
|
|
21
19
|
}
|
|
22
20
|
|
data/ext/vibe_zstd/dctx.c
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// DCtx implementation for VibeZstd
|
|
2
2
|
#include "vibe_zstd_internal.h"
|
|
3
|
+
#include <stdlib.h> // malloc, realloc, free for no-GVL decompression path
|
|
3
4
|
|
|
4
5
|
// TypedData type - defined in vibe_zstd.c
|
|
5
6
|
extern rb_data_type_t vibe_zstd_dctx_type;
|
|
@@ -12,14 +13,12 @@ static int
|
|
|
12
13
|
vibe_zstd_dctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
|
|
13
14
|
// Build the setter method name: key + "="
|
|
14
15
|
const char* key_str = rb_id2name(SYM2ID(key));
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
snprintf(setter, setter_len, "%s=", key_str);
|
|
16
|
+
char setter[256];
|
|
17
|
+
snprintf(setter, sizeof(setter), "%s=", key_str);
|
|
18
18
|
|
|
19
19
|
// Call the setter method
|
|
20
20
|
rb_funcall(self, rb_intern(setter), 1, value);
|
|
21
21
|
|
|
22
|
-
xfree(setter);
|
|
23
22
|
return ST_CONTINUE;
|
|
24
23
|
}
|
|
25
24
|
|
|
@@ -244,6 +243,76 @@ decompress_without_gvl(void* arg) {
|
|
|
244
243
|
return NULL;
|
|
245
244
|
}
|
|
246
245
|
|
|
246
|
+
// Decompress stream args for GVL release (unknown content size path)
|
|
247
|
+
// Uses plain C malloc/realloc since Ruby API calls are not allowed without GVL
|
|
248
|
+
typedef struct {
|
|
249
|
+
ZSTD_DCtx *dctx;
|
|
250
|
+
const char *src;
|
|
251
|
+
size_t src_size;
|
|
252
|
+
char *dst;
|
|
253
|
+
size_t dst_capacity;
|
|
254
|
+
size_t dst_size;
|
|
255
|
+
size_t initial_capacity;
|
|
256
|
+
int error;
|
|
257
|
+
const char *error_name;
|
|
258
|
+
} decompress_stream_nogvl_args;
|
|
259
|
+
|
|
260
|
+
// Decompress stream without holding Ruby's GVL (unknown content size path)
|
|
261
|
+
// Performs the entire ZSTD_decompressStream loop using C malloc/realloc.
|
|
262
|
+
// No Ruby API calls allowed here.
|
|
263
|
+
static void*
|
|
264
|
+
decompress_stream_without_gvl(void* arg) {
|
|
265
|
+
decompress_stream_nogvl_args* args = arg;
|
|
266
|
+
args->error = 0;
|
|
267
|
+
args->error_name = NULL;
|
|
268
|
+
|
|
269
|
+
args->dst_capacity = args->initial_capacity;
|
|
270
|
+
args->dst = malloc(args->dst_capacity);
|
|
271
|
+
if (!args->dst) {
|
|
272
|
+
args->error = 1;
|
|
273
|
+
args->error_name = "malloc failed for decompression buffer";
|
|
274
|
+
return NULL;
|
|
275
|
+
}
|
|
276
|
+
args->dst_size = 0;
|
|
277
|
+
|
|
278
|
+
ZSTD_inBuffer input = { args->src, args->src_size, 0 };
|
|
279
|
+
|
|
280
|
+
while (input.pos < input.size) {
|
|
281
|
+
// Ensure we have room for output
|
|
282
|
+
if (args->dst_size >= args->dst_capacity) {
|
|
283
|
+
size_t new_capacity = args->dst_capacity * 2;
|
|
284
|
+
char* new_buf = realloc(args->dst, new_capacity);
|
|
285
|
+
if (!new_buf) {
|
|
286
|
+
args->error = 1;
|
|
287
|
+
args->error_name = "realloc failed during decompression";
|
|
288
|
+
return NULL;
|
|
289
|
+
}
|
|
290
|
+
args->dst = new_buf;
|
|
291
|
+
args->dst_capacity = new_capacity;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
ZSTD_outBuffer output = {
|
|
295
|
+
args->dst + args->dst_size,
|
|
296
|
+
args->dst_capacity - args->dst_size,
|
|
297
|
+
0
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
size_t ret = ZSTD_decompressStream(args->dctx, &output, &input);
|
|
301
|
+
if (ZSTD_isError(ret)) {
|
|
302
|
+
args->error = 1;
|
|
303
|
+
args->error_name = ZSTD_getErrorName(ret);
|
|
304
|
+
return NULL;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
args->dst_size += output.pos;
|
|
308
|
+
|
|
309
|
+
// ret == 0 means frame is complete
|
|
310
|
+
if (ret == 0) break;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return NULL;
|
|
314
|
+
}
|
|
315
|
+
|
|
247
316
|
// DCtx frame_content_size - class method to get frame content size
|
|
248
317
|
static VALUE
|
|
249
318
|
vibe_zstd_dctx_frame_content_size(VALUE self, VALUE data) {
|
|
@@ -353,44 +422,32 @@ vibe_zstd_dctx_decompress(int argc, VALUE* argv, VALUE self) {
|
|
|
353
422
|
}
|
|
354
423
|
}
|
|
355
424
|
|
|
356
|
-
// If content size is unknown, use streaming decompression with exponential growth
|
|
425
|
+
// If content size is unknown, use streaming decompression with exponential growth.
|
|
426
|
+
// Releases GVL to allow other Ruby threads to run during decompression.
|
|
427
|
+
// Uses C malloc/realloc (not Ruby allocators) since Ruby API calls are forbidden without GVL.
|
|
357
428
|
if (contentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
if (output.pos > 0) {
|
|
377
|
-
// Grow result buffer exponentially if needed
|
|
378
|
-
if (result_size + output.pos > result_capacity) {
|
|
379
|
-
// Double capacity until it fits
|
|
380
|
-
while (result_capacity < result_size + output.pos) {
|
|
381
|
-
result_capacity *= 2;
|
|
382
|
-
}
|
|
383
|
-
rb_str_resize(result, result_capacity);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// Copy directly into result buffer
|
|
387
|
-
memcpy(RSTRING_PTR(result) + result_size, RSTRING_PTR(tmpBuffer), output.pos);
|
|
388
|
-
result_size += output.pos;
|
|
389
|
-
}
|
|
429
|
+
decompress_stream_nogvl_args stream_args = {
|
|
430
|
+
.dctx = dctx->dctx,
|
|
431
|
+
.src = src,
|
|
432
|
+
.src_size = srcSize,
|
|
433
|
+
.dst = NULL,
|
|
434
|
+
.dst_capacity = 0,
|
|
435
|
+
.dst_size = 0,
|
|
436
|
+
.initial_capacity = initial_capacity,
|
|
437
|
+
.error = 0,
|
|
438
|
+
.error_name = NULL
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
rb_thread_call_without_gvl(decompress_stream_without_gvl, &stream_args, NULL, NULL);
|
|
442
|
+
|
|
443
|
+
if (stream_args.error) {
|
|
444
|
+
if (stream_args.dst) free(stream_args.dst);
|
|
445
|
+
rb_raise(rb_eRuntimeError, "Decompression failed: %s", stream_args.error_name);
|
|
390
446
|
}
|
|
391
447
|
|
|
392
|
-
//
|
|
393
|
-
|
|
448
|
+
// Create Ruby string from the C buffer, then free the C buffer
|
|
449
|
+
VALUE result = rb_str_new(stream_args.dst, stream_args.dst_size);
|
|
450
|
+
free(stream_args.dst);
|
|
394
451
|
return result;
|
|
395
452
|
}
|
|
396
453
|
VALUE result = rb_str_new(NULL, contentSize);
|
data/ext/vibe_zstd/dict.c
CHANGED
|
@@ -115,7 +115,7 @@ typedef struct {
|
|
|
115
115
|
|
|
116
116
|
// Cleanup function for dictionary training resources
|
|
117
117
|
// Safely frees all allocated memory, checking for NULL to handle partial allocations.
|
|
118
|
-
//
|
|
118
|
+
// Used as the ensure callback in rb_ensure to guarantee cleanup regardless of exceptions.
|
|
119
119
|
static VALUE
|
|
120
120
|
dict_training_cleanup(VALUE arg) {
|
|
121
121
|
dict_training_resources* resources = (dict_training_resources*)arg;
|
|
@@ -125,6 +125,108 @@ dict_training_cleanup(VALUE arg) {
|
|
|
125
125
|
return Qnil;
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
+
// Copy Ruby sample strings into contiguous C buffer for ZDICT functions
|
|
129
|
+
static void
|
|
130
|
+
copy_samples_to_buffer(dict_training_resources* resources, VALUE samples, long num_samples) {
|
|
131
|
+
size_t offset = 0;
|
|
132
|
+
for (long i = 0; i < num_samples; i++) {
|
|
133
|
+
VALUE sample = rb_ary_entry(samples, i);
|
|
134
|
+
size_t sample_len = RSTRING_LEN(sample);
|
|
135
|
+
resources->sample_sizes[i] = sample_len;
|
|
136
|
+
memcpy(resources->samples_buffer + offset, RSTRING_PTR(sample), sample_len);
|
|
137
|
+
offset += sample_len;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Context structs and body functions for rb_ensure-based dict training.
|
|
142
|
+
// Each training function uses rb_ensure to guarantee resource cleanup even if
|
|
143
|
+
// rb_str_new or other Ruby API calls raise exceptions (e.g., OOM).
|
|
144
|
+
// Common fields are in dict_training_ctx; variant structs embed it as first member.
|
|
145
|
+
|
|
146
|
+
typedef struct {
|
|
147
|
+
dict_training_resources* resources;
|
|
148
|
+
VALUE result;
|
|
149
|
+
size_t max_dict_size;
|
|
150
|
+
long num_samples;
|
|
151
|
+
VALUE samples;
|
|
152
|
+
} dict_training_ctx;
|
|
153
|
+
|
|
154
|
+
static VALUE train_dict_basic_body(VALUE arg) {
|
|
155
|
+
dict_training_ctx* ctx = (dict_training_ctx*)arg;
|
|
156
|
+
copy_samples_to_buffer(ctx->resources, ctx->samples, ctx->num_samples);
|
|
157
|
+
size_t dict_size = ZDICT_trainFromBuffer(
|
|
158
|
+
ctx->resources->dict_buffer, ctx->max_dict_size,
|
|
159
|
+
ctx->resources->samples_buffer, ctx->resources->sample_sizes, (unsigned)ctx->num_samples
|
|
160
|
+
);
|
|
161
|
+
if (ZDICT_isError(dict_size)) {
|
|
162
|
+
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
163
|
+
}
|
|
164
|
+
ctx->result = rb_str_new(ctx->resources->dict_buffer, dict_size);
|
|
165
|
+
return ctx->result;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
typedef struct {
|
|
169
|
+
dict_training_ctx base;
|
|
170
|
+
ZDICT_cover_params_t params;
|
|
171
|
+
} train_dict_cover_ctx;
|
|
172
|
+
|
|
173
|
+
static VALUE train_dict_cover_body(VALUE arg) {
|
|
174
|
+
train_dict_cover_ctx* ctx = (train_dict_cover_ctx*)arg;
|
|
175
|
+
copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
|
|
176
|
+
size_t dict_size = ZDICT_trainFromBuffer_cover(
|
|
177
|
+
ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
|
|
178
|
+
ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
|
|
179
|
+
ctx->params
|
|
180
|
+
);
|
|
181
|
+
if (ZDICT_isError(dict_size)) {
|
|
182
|
+
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
183
|
+
}
|
|
184
|
+
ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
|
|
185
|
+
return ctx->base.result;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
typedef struct {
|
|
189
|
+
dict_training_ctx base;
|
|
190
|
+
ZDICT_fastCover_params_t params;
|
|
191
|
+
} train_dict_fast_cover_ctx;
|
|
192
|
+
|
|
193
|
+
static VALUE train_dict_fast_cover_body(VALUE arg) {
|
|
194
|
+
train_dict_fast_cover_ctx* ctx = (train_dict_fast_cover_ctx*)arg;
|
|
195
|
+
copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
|
|
196
|
+
size_t dict_size = ZDICT_trainFromBuffer_fastCover(
|
|
197
|
+
ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
|
|
198
|
+
ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
|
|
199
|
+
ctx->params
|
|
200
|
+
);
|
|
201
|
+
if (ZDICT_isError(dict_size)) {
|
|
202
|
+
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
203
|
+
}
|
|
204
|
+
ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
|
|
205
|
+
return ctx->base.result;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
typedef struct {
|
|
209
|
+
dict_training_ctx base;
|
|
210
|
+
VALUE content_val;
|
|
211
|
+
ZDICT_params_t params;
|
|
212
|
+
} finalize_dict_ctx;
|
|
213
|
+
|
|
214
|
+
static VALUE finalize_dict_body(VALUE arg) {
|
|
215
|
+
finalize_dict_ctx* ctx = (finalize_dict_ctx*)arg;
|
|
216
|
+
copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
|
|
217
|
+
size_t dict_size = ZDICT_finalizeDictionary(
|
|
218
|
+
ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
|
|
219
|
+
RSTRING_PTR(ctx->content_val), RSTRING_LEN(ctx->content_val),
|
|
220
|
+
ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
|
|
221
|
+
ctx->params
|
|
222
|
+
);
|
|
223
|
+
if (ZDICT_isError(dict_size)) {
|
|
224
|
+
rb_raise(rb_eRuntimeError, "Dictionary finalization failed: %s", ZDICT_getErrorName(dict_size));
|
|
225
|
+
}
|
|
226
|
+
ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
|
|
227
|
+
return ctx->base.result;
|
|
228
|
+
}
|
|
229
|
+
|
|
128
230
|
// Train dictionary from samples - module-level method
|
|
129
231
|
// VibeZstd.train_dict(samples, max_dict_size: 112640)
|
|
130
232
|
//
|
|
@@ -166,36 +268,17 @@ vibe_zstd_train_dict(int argc, VALUE* argv, VALUE self) {
|
|
|
166
268
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
167
269
|
resources.dict_buffer = ALLOC_N(char, max_dict_size);
|
|
168
270
|
|
|
169
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
// Train the dictionary
|
|
181
|
-
size_t dict_size = ZDICT_trainFromBuffer(
|
|
182
|
-
resources.dict_buffer, max_dict_size,
|
|
183
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples
|
|
184
|
-
);
|
|
185
|
-
|
|
186
|
-
// Check for errors
|
|
187
|
-
if (ZDICT_isError(dict_size)) {
|
|
188
|
-
dict_training_cleanup((VALUE)&resources);
|
|
189
|
-
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Create Ruby string with the trained dictionary
|
|
193
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
194
|
-
|
|
195
|
-
// Clean up all resources
|
|
196
|
-
dict_training_cleanup((VALUE)&resources);
|
|
197
|
-
|
|
198
|
-
return dict_string;
|
|
271
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
272
|
+
dict_training_ctx ctx = {
|
|
273
|
+
.resources = &resources,
|
|
274
|
+
.result = Qnil,
|
|
275
|
+
.max_dict_size = max_dict_size,
|
|
276
|
+
.num_samples = num_samples,
|
|
277
|
+
.samples = samples
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
rb_ensure(train_dict_basic_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
281
|
+
return ctx.result;
|
|
199
282
|
}
|
|
200
283
|
|
|
201
284
|
// VibeZstd.train_dict_cover(samples, max_dict_size: 112640, k: 0, d: 0, steps: 0, split_point: 1.0, shrink_dict: false, shrink_dict_max_regression: 0, nb_threads: 0)
|
|
@@ -268,37 +351,20 @@ vibe_zstd_train_dict_cover(int argc, VALUE* argv, VALUE self) {
|
|
|
268
351
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
269
352
|
resources.dict_buffer = ALLOC_N(char, max_dict_size);
|
|
270
353
|
|
|
271
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
|
|
286
|
-
params
|
|
287
|
-
);
|
|
288
|
-
|
|
289
|
-
// Check for errors
|
|
290
|
-
if (ZDICT_isError(dict_size)) {
|
|
291
|
-
dict_training_cleanup((VALUE)&resources);
|
|
292
|
-
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
// Create Ruby string with the trained dictionary
|
|
296
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
297
|
-
|
|
298
|
-
// Clean up all resources
|
|
299
|
-
dict_training_cleanup((VALUE)&resources);
|
|
300
|
-
|
|
301
|
-
return dict_string;
|
|
354
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
355
|
+
train_dict_cover_ctx ctx = {
|
|
356
|
+
.base = {
|
|
357
|
+
.resources = &resources,
|
|
358
|
+
.result = Qnil,
|
|
359
|
+
.max_dict_size = max_dict_size,
|
|
360
|
+
.num_samples = num_samples,
|
|
361
|
+
.samples = samples
|
|
362
|
+
},
|
|
363
|
+
.params = params
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
rb_ensure(train_dict_cover_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
367
|
+
return ctx.base.result;
|
|
302
368
|
}
|
|
303
369
|
|
|
304
370
|
// VibeZstd.train_dict_fast_cover(samples, max_dict_size: 112640, k: 0, d: 0, f: 0, split_point: 1.0, accel: 0, shrink_dict: false, shrink_dict_max_regression: 0, nb_threads: 0)
|
|
@@ -374,37 +440,20 @@ vibe_zstd_train_dict_fast_cover(int argc, VALUE* argv, VALUE self) {
|
|
|
374
440
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
375
441
|
resources.dict_buffer = ALLOC_N(char, max_dict_size);
|
|
376
442
|
|
|
377
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
|
|
392
|
-
params
|
|
393
|
-
);
|
|
394
|
-
|
|
395
|
-
// Check for errors
|
|
396
|
-
if (ZDICT_isError(dict_size)) {
|
|
397
|
-
dict_training_cleanup((VALUE)&resources);
|
|
398
|
-
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
// Create Ruby string with the trained dictionary
|
|
402
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
403
|
-
|
|
404
|
-
// Clean up all resources
|
|
405
|
-
dict_training_cleanup((VALUE)&resources);
|
|
406
|
-
|
|
407
|
-
return dict_string;
|
|
443
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
444
|
+
train_dict_fast_cover_ctx ctx = {
|
|
445
|
+
.base = {
|
|
446
|
+
.resources = &resources,
|
|
447
|
+
.result = Qnil,
|
|
448
|
+
.max_dict_size = max_dict_size,
|
|
449
|
+
.num_samples = num_samples,
|
|
450
|
+
.samples = samples
|
|
451
|
+
},
|
|
452
|
+
.params = params
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
rb_ensure(train_dict_fast_cover_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
456
|
+
return ctx.base.result;
|
|
408
457
|
}
|
|
409
458
|
|
|
410
459
|
// Get dictionary ID from raw dictionary data - module-level utility
|
|
@@ -490,38 +539,21 @@ vibe_zstd_finalize_dictionary(int argc, VALUE* argv, VALUE self) {
|
|
|
490
539
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
491
540
|
resources.dict_buffer = ALLOC_N(char, max_size);
|
|
492
541
|
|
|
493
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
|
|
509
|
-
params
|
|
510
|
-
);
|
|
511
|
-
|
|
512
|
-
// Check for errors
|
|
513
|
-
if (ZDICT_isError(dict_size)) {
|
|
514
|
-
dict_training_cleanup((VALUE)&resources);
|
|
515
|
-
rb_raise(rb_eRuntimeError, "Dictionary finalization failed: %s", ZDICT_getErrorName(dict_size));
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
// Create Ruby string with the finalized dictionary
|
|
519
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
520
|
-
|
|
521
|
-
// Clean up all resources
|
|
522
|
-
dict_training_cleanup((VALUE)&resources);
|
|
523
|
-
|
|
524
|
-
return dict_string;
|
|
542
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
543
|
+
finalize_dict_ctx ctx = {
|
|
544
|
+
.base = {
|
|
545
|
+
.resources = &resources,
|
|
546
|
+
.result = Qnil,
|
|
547
|
+
.max_dict_size = max_size,
|
|
548
|
+
.num_samples = num_samples,
|
|
549
|
+
.samples = samples_val
|
|
550
|
+
},
|
|
551
|
+
.content_val = content_val,
|
|
552
|
+
.params = params
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
rb_ensure(finalize_dict_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
556
|
+
return ctx.base.result;
|
|
525
557
|
}
|
|
526
558
|
|
|
527
559
|
// Get dictionary header size - module-level utility
|
data/ext/vibe_zstd/extconf.rb
CHANGED
|
@@ -16,8 +16,8 @@ $INCFLAGS << " -I#{LIBZSTD_DIR}/dictBuilder"
|
|
|
16
16
|
|
|
17
17
|
# Add preprocessor definitions
|
|
18
18
|
append_cflags("-DXXH_NAMESPACE=ZSTD_")
|
|
19
|
-
append_cflags("-DZSTD_LEGACY_SUPPORT=0")
|
|
20
|
-
append_cflags("-DZSTD_MULTITHREAD")
|
|
19
|
+
append_cflags("-DZSTD_LEGACY_SUPPORT=0") # Disable legacy support to reduce size
|
|
20
|
+
append_cflags("-DZSTD_MULTITHREAD") # Enable multithreading support
|
|
21
21
|
|
|
22
22
|
# Link with pthread for multithreading
|
|
23
23
|
have_library("pthread") || abort("pthread library is required for multithreading support")
|
|
@@ -32,8 +32,7 @@ zstd_sources = Dir[
|
|
|
32
32
|
"#{LIBZSTD_DIR}/common/*.c",
|
|
33
33
|
"#{LIBZSTD_DIR}/compress/*.c",
|
|
34
34
|
"#{LIBZSTD_DIR}/decompress/*.{c,S}",
|
|
35
|
-
"#{LIBZSTD_DIR}/dictBuilder/*.c"
|
|
36
|
-
"#{LIBZSTD_DIR}/deprecated/*.c"
|
|
35
|
+
"#{LIBZSTD_DIR}/dictBuilder/*.c"
|
|
37
36
|
].map { |path| File.basename(path) }
|
|
38
37
|
|
|
39
38
|
# Add the main vibe_zstd.c file (which includes the split files via #include)
|
|
@@ -46,7 +45,6 @@ $VPATH << "$(srcdir)/libzstd/common"
|
|
|
46
45
|
$VPATH << "$(srcdir)/libzstd/compress"
|
|
47
46
|
$VPATH << "$(srcdir)/libzstd/decompress"
|
|
48
47
|
$VPATH << "$(srcdir)/libzstd/dictBuilder"
|
|
49
|
-
$VPATH << "$(srcdir)/libzstd/deprecated"
|
|
50
48
|
# standard:enable Style/GlobalVars
|
|
51
49
|
|
|
52
50
|
create_makefile("vibe_zstd/vibe_zstd")
|
data/ext/vibe_zstd/streaming.c
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
// Streaming implementation for VibeZstd
|
|
2
2
|
#include "vibe_zstd_internal.h"
|
|
3
3
|
|
|
4
|
+
// Cached method IDs for frequently called methods
|
|
5
|
+
static ID id_write;
|
|
6
|
+
static ID id_read;
|
|
7
|
+
|
|
4
8
|
// Forward declarations
|
|
5
9
|
static VALUE vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self);
|
|
6
10
|
static VALUE vibe_zstd_writer_write(VALUE self, VALUE data);
|
|
@@ -25,12 +29,12 @@ vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
25
29
|
TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
|
|
26
30
|
|
|
27
31
|
// Validate IO object responds to write (duck typing)
|
|
28
|
-
if (!rb_respond_to(io,
|
|
32
|
+
if (!rb_respond_to(io, id_write)) {
|
|
29
33
|
rb_raise(rb_eTypeError, "IO object must respond to write");
|
|
30
34
|
}
|
|
31
35
|
|
|
32
|
-
// Store IO object
|
|
33
|
-
cstream->io
|
|
36
|
+
// Store IO object (write barrier for WB_PROTECTED)
|
|
37
|
+
RB_OBJ_WRITE(self, &cstream->io, io);
|
|
34
38
|
rb_ivar_set(self, rb_intern("@io"), io);
|
|
35
39
|
|
|
36
40
|
// Parse options
|
|
@@ -87,6 +91,9 @@ vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
87
91
|
}
|
|
88
92
|
}
|
|
89
93
|
|
|
94
|
+
// Allocate reusable output buffer (write barrier for WB_PROTECTED)
|
|
95
|
+
RB_OBJ_WRITE(self, &cstream->output_buffer, rb_str_buf_new(ZSTD_CStreamOutSize()));
|
|
96
|
+
|
|
90
97
|
return self;
|
|
91
98
|
}
|
|
92
99
|
|
|
@@ -105,10 +112,11 @@ vibe_zstd_writer_write(VALUE self, VALUE data) {
|
|
|
105
112
|
};
|
|
106
113
|
|
|
107
114
|
size_t outBufferSize = ZSTD_CStreamOutSize();
|
|
108
|
-
VALUE outBuffer =
|
|
115
|
+
VALUE outBuffer = cstream->output_buffer;
|
|
109
116
|
|
|
110
117
|
// Process all input data in chunks
|
|
111
118
|
while (input.pos < input.size) {
|
|
119
|
+
rb_str_set_len(outBuffer, 0); // Reset buffer for reuse
|
|
112
120
|
ZSTD_outBuffer output = {
|
|
113
121
|
.dst = RSTRING_PTR(outBuffer),
|
|
114
122
|
.size = outBufferSize,
|
|
@@ -125,8 +133,7 @@ vibe_zstd_writer_write(VALUE self, VALUE data) {
|
|
|
125
133
|
// Write any compressed output that was produced
|
|
126
134
|
if (output.pos > 0) {
|
|
127
135
|
rb_str_set_len(outBuffer, output.pos);
|
|
128
|
-
rb_funcall(cstream->io,
|
|
129
|
-
// No need to resize - buffer capacity remains at outBufferSize
|
|
136
|
+
rb_funcall(cstream->io, id_write, 1, outBuffer);
|
|
130
137
|
}
|
|
131
138
|
}
|
|
132
139
|
|
|
@@ -139,7 +146,7 @@ vibe_zstd_writer_flush(VALUE self) {
|
|
|
139
146
|
TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
|
|
140
147
|
|
|
141
148
|
size_t outBufferSize = ZSTD_CStreamOutSize();
|
|
142
|
-
VALUE outBuffer =
|
|
149
|
+
VALUE outBuffer = cstream->output_buffer;
|
|
143
150
|
|
|
144
151
|
ZSTD_inBuffer input = { NULL, 0, 0 };
|
|
145
152
|
size_t remaining;
|
|
@@ -147,6 +154,7 @@ vibe_zstd_writer_flush(VALUE self) {
|
|
|
147
154
|
// ZSTD_e_flush: flush internal buffers, making all data readable
|
|
148
155
|
// Loop until remaining == 0 (flush complete)
|
|
149
156
|
do {
|
|
157
|
+
rb_str_set_len(outBuffer, 0); // Reset buffer for reuse
|
|
150
158
|
ZSTD_outBuffer output = {
|
|
151
159
|
.dst = RSTRING_PTR(outBuffer),
|
|
152
160
|
.size = outBufferSize,
|
|
@@ -161,8 +169,7 @@ vibe_zstd_writer_flush(VALUE self) {
|
|
|
161
169
|
|
|
162
170
|
if (output.pos > 0) {
|
|
163
171
|
rb_str_set_len(outBuffer, output.pos);
|
|
164
|
-
rb_funcall(cstream->io,
|
|
165
|
-
// No need to resize - buffer capacity remains at outBufferSize
|
|
172
|
+
rb_funcall(cstream->io, id_write, 1, outBuffer);
|
|
166
173
|
}
|
|
167
174
|
} while (remaining > 0);
|
|
168
175
|
|
|
@@ -175,7 +182,7 @@ vibe_zstd_writer_finish(VALUE self) {
|
|
|
175
182
|
TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
|
|
176
183
|
|
|
177
184
|
size_t outBufferSize = ZSTD_CStreamOutSize();
|
|
178
|
-
VALUE outBuffer =
|
|
185
|
+
VALUE outBuffer = cstream->output_buffer;
|
|
179
186
|
|
|
180
187
|
ZSTD_inBuffer input = { NULL, 0, 0 };
|
|
181
188
|
size_t remaining;
|
|
@@ -183,6 +190,7 @@ vibe_zstd_writer_finish(VALUE self) {
|
|
|
183
190
|
// ZSTD_e_end: finalize frame with checksum and epilogue
|
|
184
191
|
// Loop until remaining == 0 (frame complete)
|
|
185
192
|
do {
|
|
193
|
+
rb_str_set_len(outBuffer, 0); // Reset buffer for reuse
|
|
186
194
|
ZSTD_outBuffer output = {
|
|
187
195
|
.dst = RSTRING_PTR(outBuffer),
|
|
188
196
|
.size = outBufferSize,
|
|
@@ -197,8 +205,7 @@ vibe_zstd_writer_finish(VALUE self) {
|
|
|
197
205
|
|
|
198
206
|
if (output.pos > 0) {
|
|
199
207
|
rb_str_set_len(outBuffer, output.pos);
|
|
200
|
-
rb_funcall(cstream->io,
|
|
201
|
-
// No need to resize - buffer capacity remains at outBufferSize
|
|
208
|
+
rb_funcall(cstream->io, id_write, 1, outBuffer);
|
|
202
209
|
}
|
|
203
210
|
} while (remaining > 0);
|
|
204
211
|
|
|
@@ -216,12 +223,12 @@ vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
216
223
|
TypedData_Get_Struct(self, vibe_zstd_dstream, &vibe_zstd_dstream_type, dstream);
|
|
217
224
|
|
|
218
225
|
// Validate IO object responds to read (duck typing)
|
|
219
|
-
if (!rb_respond_to(io,
|
|
226
|
+
if (!rb_respond_to(io, id_read)) {
|
|
220
227
|
rb_raise(rb_eTypeError, "IO object must respond to read");
|
|
221
228
|
}
|
|
222
229
|
|
|
223
|
-
// Store IO object
|
|
224
|
-
dstream->io
|
|
230
|
+
// Store IO object (write barrier for WB_PROTECTED)
|
|
231
|
+
RB_OBJ_WRITE(self, &dstream->io, io);
|
|
225
232
|
rb_ivar_set(self, rb_intern("@io"), io);
|
|
226
233
|
|
|
227
234
|
// Parse options
|
|
@@ -263,7 +270,7 @@ vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
263
270
|
}
|
|
264
271
|
|
|
265
272
|
// Initialize input buffer management
|
|
266
|
-
dstream->input_data
|
|
273
|
+
RB_OBJ_WRITE(self, &dstream->input_data, rb_str_new(NULL, 0));
|
|
267
274
|
dstream->input.src = NULL;
|
|
268
275
|
dstream->input.size = 0;
|
|
269
276
|
dstream->input.pos = 0;
|
|
@@ -317,7 +324,7 @@ vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
|
|
|
317
324
|
while (total_read < requested_size) {
|
|
318
325
|
// Refill input buffer when all compressed data consumed
|
|
319
326
|
if (dstream->input.pos >= dstream->input.size) {
|
|
320
|
-
VALUE chunk = rb_funcall(dstream->io,
|
|
327
|
+
VALUE chunk = rb_funcall(dstream->io, id_read, 1, SIZET2NUM(inBufferSize));
|
|
321
328
|
if (NIL_P(chunk)) {
|
|
322
329
|
dstream->eof = 1;
|
|
323
330
|
if (total_read == 0 && !made_progress) {
|
|
@@ -326,8 +333,8 @@ vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
|
|
|
326
333
|
break;
|
|
327
334
|
}
|
|
328
335
|
|
|
329
|
-
// Reset input buffer with new data
|
|
330
|
-
dstream->input_data
|
|
336
|
+
// Reset input buffer with new data (write barrier for WB_PROTECTED)
|
|
337
|
+
RB_OBJ_WRITE(self, &dstream->input_data, chunk);
|
|
331
338
|
dstream->input.src = RSTRING_PTR(chunk);
|
|
332
339
|
dstream->input.size = RSTRING_LEN(chunk);
|
|
333
340
|
dstream->input.pos = 0;
|
|
@@ -394,6 +401,10 @@ vibe_zstd_reader_eof(VALUE self) {
|
|
|
394
401
|
// Class initialization function called from main Init_vibe_zstd
|
|
395
402
|
void
|
|
396
403
|
vibe_zstd_streaming_init_classes(VALUE rb_cVibeZstdCompressWriter, VALUE rb_cVibeZstdDecompressReader) {
|
|
404
|
+
// Cache method IDs for frequently called methods
|
|
405
|
+
id_write = rb_intern("write");
|
|
406
|
+
id_read = rb_intern("read");
|
|
407
|
+
|
|
397
408
|
// CompressWriter setup
|
|
398
409
|
rb_define_alloc_func(rb_cVibeZstdCompressWriter, vibe_zstd_cstream_alloc);
|
|
399
410
|
rb_define_method(rb_cVibeZstdCompressWriter, "initialize", vibe_zstd_writer_initialize, -1);
|
data/ext/vibe_zstd/vibe_zstd.c
CHANGED
|
@@ -12,7 +12,7 @@ VALUE rb_cVibeZstdDDict;
|
|
|
12
12
|
VALUE rb_cVibeZstdCompressWriter;
|
|
13
13
|
VALUE rb_cVibeZstdDecompressReader;
|
|
14
14
|
|
|
15
|
-
// Forward declarations for free and
|
|
15
|
+
// Forward declarations for free, mark, and dsize functions
|
|
16
16
|
static void vibe_zstd_cctx_free(void* ptr);
|
|
17
17
|
static void vibe_zstd_dctx_free(void* ptr);
|
|
18
18
|
static void vibe_zstd_cdict_free(void* ptr);
|
|
@@ -22,16 +22,47 @@ static void vibe_zstd_cstream_mark(void* ptr);
|
|
|
22
22
|
static void vibe_zstd_dstream_free(void* ptr);
|
|
23
23
|
static void vibe_zstd_dstream_mark(void* ptr);
|
|
24
24
|
|
|
25
|
+
// dsize callbacks - report memory usage to Ruby GC for accurate memory pressure tracking
|
|
26
|
+
static size_t vibe_zstd_cctx_dsize(const void* ptr) {
|
|
27
|
+
const vibe_zstd_cctx* cctx = ptr;
|
|
28
|
+
return sizeof(vibe_zstd_cctx) + (cctx->cctx ? ZSTD_sizeof_CCtx(cctx->cctx) : 0);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static size_t vibe_zstd_dctx_dsize(const void* ptr) {
|
|
32
|
+
const vibe_zstd_dctx* dctx = ptr;
|
|
33
|
+
return sizeof(vibe_zstd_dctx) + (dctx->dctx ? ZSTD_sizeof_DCtx(dctx->dctx) : 0);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static size_t vibe_zstd_cdict_dsize(const void* ptr) {
|
|
37
|
+
const vibe_zstd_cdict* cdict = ptr;
|
|
38
|
+
return sizeof(vibe_zstd_cdict) + (cdict->cdict ? ZSTD_sizeof_CDict(cdict->cdict) : 0);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
static size_t vibe_zstd_ddict_dsize(const void* ptr) {
|
|
42
|
+
const vibe_zstd_ddict* ddict = ptr;
|
|
43
|
+
return sizeof(vibe_zstd_ddict) + (ddict->ddict ? ZSTD_sizeof_DDict(ddict->ddict) : 0);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
static size_t vibe_zstd_cstream_dsize(const void* ptr) {
|
|
47
|
+
const vibe_zstd_cstream* cstream = ptr;
|
|
48
|
+
return sizeof(vibe_zstd_cstream) + (cstream->cstream ? ZSTD_sizeof_CStream(cstream->cstream) : 0);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static size_t vibe_zstd_dstream_dsize(const void* ptr) {
|
|
52
|
+
const vibe_zstd_dstream* dstream = ptr;
|
|
53
|
+
return sizeof(vibe_zstd_dstream) + (dstream->dstream ? ZSTD_sizeof_DStream(dstream->dstream) : 0);
|
|
54
|
+
}
|
|
55
|
+
|
|
25
56
|
// TypedData type definitions (these are referenced by extern in the split files)
|
|
26
57
|
rb_data_type_t vibe_zstd_cctx_type = {
|
|
27
58
|
.wrap_struct_name = "vibe_zstd_cctx",
|
|
28
59
|
.function = {
|
|
29
60
|
.dmark = NULL,
|
|
30
61
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_cctx_free,
|
|
31
|
-
.dsize =
|
|
62
|
+
.dsize = vibe_zstd_cctx_dsize,
|
|
32
63
|
},
|
|
33
64
|
.data = NULL,
|
|
34
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
65
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
35
66
|
};
|
|
36
67
|
|
|
37
68
|
rb_data_type_t vibe_zstd_dctx_type = {
|
|
@@ -39,10 +70,10 @@ rb_data_type_t vibe_zstd_dctx_type = {
|
|
|
39
70
|
.function = {
|
|
40
71
|
.dmark = NULL,
|
|
41
72
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_dctx_free,
|
|
42
|
-
.dsize =
|
|
73
|
+
.dsize = vibe_zstd_dctx_dsize,
|
|
43
74
|
},
|
|
44
75
|
.data = NULL,
|
|
45
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
76
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
46
77
|
};
|
|
47
78
|
|
|
48
79
|
rb_data_type_t vibe_zstd_cdict_type = {
|
|
@@ -50,10 +81,10 @@ rb_data_type_t vibe_zstd_cdict_type = {
|
|
|
50
81
|
.function = {
|
|
51
82
|
.dmark = NULL,
|
|
52
83
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_cdict_free,
|
|
53
|
-
.dsize =
|
|
84
|
+
.dsize = vibe_zstd_cdict_dsize,
|
|
54
85
|
},
|
|
55
86
|
.data = NULL,
|
|
56
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
87
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
57
88
|
};
|
|
58
89
|
|
|
59
90
|
rb_data_type_t vibe_zstd_ddict_type = {
|
|
@@ -61,10 +92,10 @@ rb_data_type_t vibe_zstd_ddict_type = {
|
|
|
61
92
|
.function = {
|
|
62
93
|
.dmark = NULL,
|
|
63
94
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_ddict_free,
|
|
64
|
-
.dsize =
|
|
95
|
+
.dsize = vibe_zstd_ddict_dsize,
|
|
65
96
|
},
|
|
66
97
|
.data = NULL,
|
|
67
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
98
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
68
99
|
};
|
|
69
100
|
|
|
70
101
|
rb_data_type_t vibe_zstd_cstream_type = {
|
|
@@ -72,10 +103,10 @@ rb_data_type_t vibe_zstd_cstream_type = {
|
|
|
72
103
|
.function = {
|
|
73
104
|
.dmark = (RUBY_DATA_FUNC)vibe_zstd_cstream_mark,
|
|
74
105
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_cstream_free,
|
|
75
|
-
.dsize =
|
|
106
|
+
.dsize = vibe_zstd_cstream_dsize,
|
|
76
107
|
},
|
|
77
108
|
.data = NULL,
|
|
78
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
109
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
79
110
|
};
|
|
80
111
|
|
|
81
112
|
rb_data_type_t vibe_zstd_dstream_type = {
|
|
@@ -83,10 +114,10 @@ rb_data_type_t vibe_zstd_dstream_type = {
|
|
|
83
114
|
.function = {
|
|
84
115
|
.dmark = (RUBY_DATA_FUNC)vibe_zstd_dstream_mark,
|
|
85
116
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_dstream_free,
|
|
86
|
-
.dsize =
|
|
117
|
+
.dsize = vibe_zstd_dstream_dsize,
|
|
87
118
|
},
|
|
88
119
|
.data = NULL,
|
|
89
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
120
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
90
121
|
};
|
|
91
122
|
|
|
92
123
|
// Free functions
|
|
@@ -130,6 +161,7 @@ static void
|
|
|
130
161
|
vibe_zstd_cstream_mark(void* ptr) {
|
|
131
162
|
vibe_zstd_cstream* cstream = ptr;
|
|
132
163
|
rb_gc_mark(cstream->io);
|
|
164
|
+
rb_gc_mark(cstream->output_buffer);
|
|
133
165
|
}
|
|
134
166
|
|
|
135
167
|
static void
|
|
@@ -200,6 +232,7 @@ vibe_zstd_cstream_alloc(VALUE klass) {
|
|
|
200
232
|
vibe_zstd_cstream* cstream = ALLOC(vibe_zstd_cstream);
|
|
201
233
|
cstream->cstream = NULL;
|
|
202
234
|
cstream->io = Qnil;
|
|
235
|
+
cstream->output_buffer = Qnil;
|
|
203
236
|
return TypedData_Wrap_Struct(klass, &vibe_zstd_cstream_type, cstream);
|
|
204
237
|
}
|
|
205
238
|
|
|
@@ -257,9 +290,8 @@ vibe_zstd_default_c_level(VALUE self) {
|
|
|
257
290
|
RUBY_FUNC_EXPORTED void
|
|
258
291
|
Init_vibe_zstd(void)
|
|
259
292
|
{
|
|
260
|
-
//
|
|
261
|
-
|
|
262
|
-
init_dctx_param_table();
|
|
293
|
+
// Parameter lookup tables are initialized in vibe_zstd_cctx_init_class()
|
|
294
|
+
// and vibe_zstd_dctx_init_class() respectively - no need to call here.
|
|
263
295
|
|
|
264
296
|
rb_mVibeZstd = rb_define_module("VibeZstd");
|
|
265
297
|
|
data/ext/vibe_zstd/vibe_zstd.h
CHANGED
data/lib/vibe_zstd/version.rb
CHANGED
data/lib/vibe_zstd.rb
CHANGED
|
@@ -39,9 +39,7 @@ module VibeZstd
|
|
|
39
39
|
|
|
40
40
|
# Defense: Prevent infinite loop on malformed data
|
|
41
41
|
# A valid frame must have non-zero size (at minimum: frame header)
|
|
42
|
-
if frame_size <= 0
|
|
43
|
-
raise Error, "Invalid frame: zero or negative size at offset #{offset}"
|
|
44
|
-
end
|
|
42
|
+
raise Error, "Invalid frame: zero or negative size at offset #{offset}" if frame_size <= 0
|
|
45
43
|
|
|
46
44
|
if skippable_frame?(frame_data)
|
|
47
45
|
content, magic_variant = read_skippable_frame(frame_data)
|
|
@@ -191,8 +189,14 @@ module VibeZstd
|
|
|
191
189
|
end
|
|
192
190
|
|
|
193
191
|
# Read all remaining data
|
|
192
|
+
# Drains any buffered data from line_buffer first
|
|
194
193
|
def read_all
|
|
195
194
|
chunks = []
|
|
195
|
+
# Drain line buffer first if present
|
|
196
|
+
if @line_buffer && !@line_buffer.empty?
|
|
197
|
+
chunks << @line_buffer
|
|
198
|
+
@line_buffer = +""
|
|
199
|
+
end
|
|
196
200
|
while (chunk = read)
|
|
197
201
|
chunks << chunk
|
|
198
202
|
end
|
|
@@ -214,20 +218,29 @@ module VibeZstd
|
|
|
214
218
|
end
|
|
215
219
|
end
|
|
216
220
|
|
|
217
|
-
# Read a single line (up to
|
|
221
|
+
# Read a single line (up to separator or EOF)
|
|
222
|
+
# Uses buffered reads (8192 bytes) instead of byte-at-a-time for performance.
|
|
223
|
+
# Orders of magnitude faster for line-oriented reading.
|
|
218
224
|
def gets(sep = $/)
|
|
219
|
-
return nil if eof?
|
|
225
|
+
return nil if eof? && (@line_buffer.nil? || @line_buffer.empty?)
|
|
220
226
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
227
|
+
@line_buffer ||= +""
|
|
228
|
+
|
|
229
|
+
loop do
|
|
230
|
+
# Check buffer for separator
|
|
231
|
+
if (idx = @line_buffer.index(sep))
|
|
232
|
+
return @line_buffer.slice!(0, idx + sep.bytesize)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Read more data in larger chunks
|
|
236
|
+
chunk = read(8192)
|
|
224
237
|
break unless chunk
|
|
225
238
|
|
|
226
|
-
|
|
227
|
-
break if chunk.end_with?(sep)
|
|
239
|
+
@line_buffer << chunk
|
|
228
240
|
end
|
|
229
241
|
|
|
230
|
-
|
|
242
|
+
# Return remaining buffer or nil
|
|
243
|
+
@line_buffer.empty? ? nil : @line_buffer.slice!(0, @line_buffer.bytesize)
|
|
231
244
|
end
|
|
232
245
|
|
|
233
246
|
# Iterate over lines
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: vibe_zstd
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kelley Reynolds
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-03-03 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: benchmark-ips
|