vibe_zstd 1.0.2 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/ext/vibe_zstd/cctx.c +2 -4
- data/ext/vibe_zstd/dctx.c +96 -39
- data/ext/vibe_zstd/dict.c +157 -125
- data/ext/vibe_zstd/extconf.rb +6 -7
- data/ext/vibe_zstd/streaming.c +38 -19
- data/ext/vibe_zstd/vibe_zstd.c +48 -16
- data/ext/vibe_zstd/vibe_zstd.h +1 -0
- data/lib/vibe_zstd/version.rb +1 -1
- data/lib/vibe_zstd.rb +24 -11
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9b3326bfa52942e1f7ee95578bbbff2cd87647748a086742551d562eda6d94f0
|
|
4
|
+
data.tar.gz: b594dade59dab715722477dc6d39eaa7768a39505fef2354c494090462f03afd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bb5a4e27578f337ef0a72c133344c8d3f4e229b250f07c771d534bf65ffa40c0588d167e3cf01140d5baa1627e6866080b9710c059f15a59245b48eb7de026e8
|
|
7
|
+
data.tar.gz: 26f0ac03864044c25068cf00c8039d78d7ac677ec1b61298f0bf09fc8918a48a0105785607a53c12bf2c6f5c7a1c3f47d6dc64aefa4ef27e03803509f3717d80
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.1.1] - 2026-03-25
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- Fix `RuntimeError: can't set length of shared string` in `CompressWriter` when writing to File IO on Ruby 3.3+ caused by COW buffer sharing during `IO#write`
|
|
14
|
+
- Fix vendored zstd build flags (`-DZSTD_MULTITHREAD`, `-DXXH_NAMESPACE`, `-DZSTD_LEGACY_SUPPORT`) not propagating to compiled sources, restoring multithreaded compression support (`workers`, `rsyncable` parameters)
|
|
15
|
+
|
|
16
|
+
## [1.1.0] - 2026-03-02
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
- Release GVL during unknown-size streaming decompression, preventing thread blocking in multi-threaded servers (Puma, etc.)
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- `DecompressReader#gets` now uses 8KB buffered reads instead of 1-byte-at-a-time, dramatically reducing read call overhead on line-oriented data
|
|
23
|
+
- `CompressWriter` reuses a single output buffer across calls instead of allocating ~128KB per `write`/`flush`/`finish`
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
- Exception safety in dict training: all four training functions now use `rb_ensure` so C buffers are always freed even if a Ruby exception is raised
|
|
27
|
+
- Add `dsize` callbacks to all `TypedData` types so the GC sees accurate memory pressure from ZSTD context objects
|
|
28
|
+
- Add `RUBY_TYPED_WB_PROTECTED` and proper write barriers to all typed structs for GC correctness
|
|
29
|
+
|
|
30
|
+
### Performance
|
|
31
|
+
- Stack-allocated string buffer in CCtx setter, eliminating a malloc/free per keyword-argument call
|
|
32
|
+
- Cache `id_write`/`id_read` as static IDs instead of calling `rb_intern` on every I/O call
|
|
33
|
+
- Remove redundant `init_cctx_param_table`/`init_dctx_param_table` calls at startup
|
|
34
|
+
|
|
10
35
|
## [1.0.2] - 2025-01-20
|
|
11
36
|
|
|
12
37
|
### Fixed
|
|
@@ -29,4 +54,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
29
54
|
- Thread pool support for parallel compression
|
|
30
55
|
- Memory-efficient API for large files
|
|
31
56
|
|
|
32
|
-
[1.
|
|
57
|
+
[1.1.1]: https://github.com/kreynolds/vibe_zstd/compare/v1.1.0...v1.1.1
|
|
58
|
+
[1.1.0]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.2...v1.1.0
|
|
59
|
+
[1.0.2]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.1...v1.0.2
|
|
60
|
+
[1.0.1]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.0...v1.0.1
|
|
61
|
+
[1.0.0]: https://github.com/kreynolds/vibe_zstd/releases/tag/v1.0.0
|
data/ext/vibe_zstd/cctx.c
CHANGED
|
@@ -9,14 +9,12 @@ static int
|
|
|
9
9
|
vibe_zstd_cctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
|
|
10
10
|
// Build the setter method name: key + "="
|
|
11
11
|
const char* key_str = rb_id2name(SYM2ID(key));
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
snprintf(setter, setter_len, "%s=", key_str);
|
|
12
|
+
char setter[256];
|
|
13
|
+
snprintf(setter, sizeof(setter), "%s=", key_str);
|
|
15
14
|
|
|
16
15
|
// Call the setter method
|
|
17
16
|
rb_funcall(self, rb_intern(setter), 1, value);
|
|
18
17
|
|
|
19
|
-
xfree(setter);
|
|
20
18
|
return ST_CONTINUE;
|
|
21
19
|
}
|
|
22
20
|
|
data/ext/vibe_zstd/dctx.c
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// DCtx implementation for VibeZstd
|
|
2
2
|
#include "vibe_zstd_internal.h"
|
|
3
|
+
#include <stdlib.h> // malloc, realloc, free for no-GVL decompression path
|
|
3
4
|
|
|
4
5
|
// TypedData type - defined in vibe_zstd.c
|
|
5
6
|
extern rb_data_type_t vibe_zstd_dctx_type;
|
|
@@ -12,14 +13,12 @@ static int
|
|
|
12
13
|
vibe_zstd_dctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
|
|
13
14
|
// Build the setter method name: key + "="
|
|
14
15
|
const char* key_str = rb_id2name(SYM2ID(key));
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
snprintf(setter, setter_len, "%s=", key_str);
|
|
16
|
+
char setter[256];
|
|
17
|
+
snprintf(setter, sizeof(setter), "%s=", key_str);
|
|
18
18
|
|
|
19
19
|
// Call the setter method
|
|
20
20
|
rb_funcall(self, rb_intern(setter), 1, value);
|
|
21
21
|
|
|
22
|
-
xfree(setter);
|
|
23
22
|
return ST_CONTINUE;
|
|
24
23
|
}
|
|
25
24
|
|
|
@@ -244,6 +243,76 @@ decompress_without_gvl(void* arg) {
|
|
|
244
243
|
return NULL;
|
|
245
244
|
}
|
|
246
245
|
|
|
246
|
+
// Decompress stream args for GVL release (unknown content size path)
|
|
247
|
+
// Uses plain C malloc/realloc since Ruby API calls are not allowed without GVL
|
|
248
|
+
typedef struct {
|
|
249
|
+
ZSTD_DCtx *dctx;
|
|
250
|
+
const char *src;
|
|
251
|
+
size_t src_size;
|
|
252
|
+
char *dst;
|
|
253
|
+
size_t dst_capacity;
|
|
254
|
+
size_t dst_size;
|
|
255
|
+
size_t initial_capacity;
|
|
256
|
+
int error;
|
|
257
|
+
const char *error_name;
|
|
258
|
+
} decompress_stream_nogvl_args;
|
|
259
|
+
|
|
260
|
+
// Decompress stream without holding Ruby's GVL (unknown content size path)
|
|
261
|
+
// Performs the entire ZSTD_decompressStream loop using C malloc/realloc.
|
|
262
|
+
// No Ruby API calls allowed here.
|
|
263
|
+
static void*
|
|
264
|
+
decompress_stream_without_gvl(void* arg) {
|
|
265
|
+
decompress_stream_nogvl_args* args = arg;
|
|
266
|
+
args->error = 0;
|
|
267
|
+
args->error_name = NULL;
|
|
268
|
+
|
|
269
|
+
args->dst_capacity = args->initial_capacity;
|
|
270
|
+
args->dst = malloc(args->dst_capacity);
|
|
271
|
+
if (!args->dst) {
|
|
272
|
+
args->error = 1;
|
|
273
|
+
args->error_name = "malloc failed for decompression buffer";
|
|
274
|
+
return NULL;
|
|
275
|
+
}
|
|
276
|
+
args->dst_size = 0;
|
|
277
|
+
|
|
278
|
+
ZSTD_inBuffer input = { args->src, args->src_size, 0 };
|
|
279
|
+
|
|
280
|
+
while (input.pos < input.size) {
|
|
281
|
+
// Ensure we have room for output
|
|
282
|
+
if (args->dst_size >= args->dst_capacity) {
|
|
283
|
+
size_t new_capacity = args->dst_capacity * 2;
|
|
284
|
+
char* new_buf = realloc(args->dst, new_capacity);
|
|
285
|
+
if (!new_buf) {
|
|
286
|
+
args->error = 1;
|
|
287
|
+
args->error_name = "realloc failed during decompression";
|
|
288
|
+
return NULL;
|
|
289
|
+
}
|
|
290
|
+
args->dst = new_buf;
|
|
291
|
+
args->dst_capacity = new_capacity;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
ZSTD_outBuffer output = {
|
|
295
|
+
args->dst + args->dst_size,
|
|
296
|
+
args->dst_capacity - args->dst_size,
|
|
297
|
+
0
|
|
298
|
+
};
|
|
299
|
+
|
|
300
|
+
size_t ret = ZSTD_decompressStream(args->dctx, &output, &input);
|
|
301
|
+
if (ZSTD_isError(ret)) {
|
|
302
|
+
args->error = 1;
|
|
303
|
+
args->error_name = ZSTD_getErrorName(ret);
|
|
304
|
+
return NULL;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
args->dst_size += output.pos;
|
|
308
|
+
|
|
309
|
+
// ret == 0 means frame is complete
|
|
310
|
+
if (ret == 0) break;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return NULL;
|
|
314
|
+
}
|
|
315
|
+
|
|
247
316
|
// DCtx frame_content_size - class method to get frame content size
|
|
248
317
|
static VALUE
|
|
249
318
|
vibe_zstd_dctx_frame_content_size(VALUE self, VALUE data) {
|
|
@@ -353,44 +422,32 @@ vibe_zstd_dctx_decompress(int argc, VALUE* argv, VALUE self) {
|
|
|
353
422
|
}
|
|
354
423
|
}
|
|
355
424
|
|
|
356
|
-
// If content size is unknown, use streaming decompression with exponential growth
|
|
425
|
+
// If content size is unknown, use streaming decompression with exponential growth.
|
|
426
|
+
// Releases GVL to allow other Ruby threads to run during decompression.
|
|
427
|
+
// Uses C malloc/realloc (not Ruby allocators) since Ruby API calls are forbidden without GVL.
|
|
357
428
|
if (contentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
if (output.pos > 0) {
|
|
377
|
-
// Grow result buffer exponentially if needed
|
|
378
|
-
if (result_size + output.pos > result_capacity) {
|
|
379
|
-
// Double capacity until it fits
|
|
380
|
-
while (result_capacity < result_size + output.pos) {
|
|
381
|
-
result_capacity *= 2;
|
|
382
|
-
}
|
|
383
|
-
rb_str_resize(result, result_capacity);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// Copy directly into result buffer
|
|
387
|
-
memcpy(RSTRING_PTR(result) + result_size, RSTRING_PTR(tmpBuffer), output.pos);
|
|
388
|
-
result_size += output.pos;
|
|
389
|
-
}
|
|
429
|
+
decompress_stream_nogvl_args stream_args = {
|
|
430
|
+
.dctx = dctx->dctx,
|
|
431
|
+
.src = src,
|
|
432
|
+
.src_size = srcSize,
|
|
433
|
+
.dst = NULL,
|
|
434
|
+
.dst_capacity = 0,
|
|
435
|
+
.dst_size = 0,
|
|
436
|
+
.initial_capacity = initial_capacity,
|
|
437
|
+
.error = 0,
|
|
438
|
+
.error_name = NULL
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
rb_thread_call_without_gvl(decompress_stream_without_gvl, &stream_args, NULL, NULL);
|
|
442
|
+
|
|
443
|
+
if (stream_args.error) {
|
|
444
|
+
if (stream_args.dst) free(stream_args.dst);
|
|
445
|
+
rb_raise(rb_eRuntimeError, "Decompression failed: %s", stream_args.error_name);
|
|
390
446
|
}
|
|
391
447
|
|
|
392
|
-
//
|
|
393
|
-
|
|
448
|
+
// Create Ruby string from the C buffer, then free the C buffer
|
|
449
|
+
VALUE result = rb_str_new(stream_args.dst, stream_args.dst_size);
|
|
450
|
+
free(stream_args.dst);
|
|
394
451
|
return result;
|
|
395
452
|
}
|
|
396
453
|
VALUE result = rb_str_new(NULL, contentSize);
|
data/ext/vibe_zstd/dict.c
CHANGED
|
@@ -115,7 +115,7 @@ typedef struct {
|
|
|
115
115
|
|
|
116
116
|
// Cleanup function for dictionary training resources
|
|
117
117
|
// Safely frees all allocated memory, checking for NULL to handle partial allocations.
|
|
118
|
-
//
|
|
118
|
+
// Used as the ensure callback in rb_ensure to guarantee cleanup regardless of exceptions.
|
|
119
119
|
static VALUE
|
|
120
120
|
dict_training_cleanup(VALUE arg) {
|
|
121
121
|
dict_training_resources* resources = (dict_training_resources*)arg;
|
|
@@ -125,6 +125,108 @@ dict_training_cleanup(VALUE arg) {
|
|
|
125
125
|
return Qnil;
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
+
// Copy Ruby sample strings into contiguous C buffer for ZDICT functions
|
|
129
|
+
static void
|
|
130
|
+
copy_samples_to_buffer(dict_training_resources* resources, VALUE samples, long num_samples) {
|
|
131
|
+
size_t offset = 0;
|
|
132
|
+
for (long i = 0; i < num_samples; i++) {
|
|
133
|
+
VALUE sample = rb_ary_entry(samples, i);
|
|
134
|
+
size_t sample_len = RSTRING_LEN(sample);
|
|
135
|
+
resources->sample_sizes[i] = sample_len;
|
|
136
|
+
memcpy(resources->samples_buffer + offset, RSTRING_PTR(sample), sample_len);
|
|
137
|
+
offset += sample_len;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Context structs and body functions for rb_ensure-based dict training.
|
|
142
|
+
// Each training function uses rb_ensure to guarantee resource cleanup even if
|
|
143
|
+
// rb_str_new or other Ruby API calls raise exceptions (e.g., OOM).
|
|
144
|
+
// Common fields are in dict_training_ctx; variant structs embed it as first member.
|
|
145
|
+
|
|
146
|
+
typedef struct {
|
|
147
|
+
dict_training_resources* resources;
|
|
148
|
+
VALUE result;
|
|
149
|
+
size_t max_dict_size;
|
|
150
|
+
long num_samples;
|
|
151
|
+
VALUE samples;
|
|
152
|
+
} dict_training_ctx;
|
|
153
|
+
|
|
154
|
+
static VALUE train_dict_basic_body(VALUE arg) {
|
|
155
|
+
dict_training_ctx* ctx = (dict_training_ctx*)arg;
|
|
156
|
+
copy_samples_to_buffer(ctx->resources, ctx->samples, ctx->num_samples);
|
|
157
|
+
size_t dict_size = ZDICT_trainFromBuffer(
|
|
158
|
+
ctx->resources->dict_buffer, ctx->max_dict_size,
|
|
159
|
+
ctx->resources->samples_buffer, ctx->resources->sample_sizes, (unsigned)ctx->num_samples
|
|
160
|
+
);
|
|
161
|
+
if (ZDICT_isError(dict_size)) {
|
|
162
|
+
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
163
|
+
}
|
|
164
|
+
ctx->result = rb_str_new(ctx->resources->dict_buffer, dict_size);
|
|
165
|
+
return ctx->result;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
typedef struct {
|
|
169
|
+
dict_training_ctx base;
|
|
170
|
+
ZDICT_cover_params_t params;
|
|
171
|
+
} train_dict_cover_ctx;
|
|
172
|
+
|
|
173
|
+
static VALUE train_dict_cover_body(VALUE arg) {
|
|
174
|
+
train_dict_cover_ctx* ctx = (train_dict_cover_ctx*)arg;
|
|
175
|
+
copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
|
|
176
|
+
size_t dict_size = ZDICT_trainFromBuffer_cover(
|
|
177
|
+
ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
|
|
178
|
+
ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
|
|
179
|
+
ctx->params
|
|
180
|
+
);
|
|
181
|
+
if (ZDICT_isError(dict_size)) {
|
|
182
|
+
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
183
|
+
}
|
|
184
|
+
ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
|
|
185
|
+
return ctx->base.result;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
typedef struct {
|
|
189
|
+
dict_training_ctx base;
|
|
190
|
+
ZDICT_fastCover_params_t params;
|
|
191
|
+
} train_dict_fast_cover_ctx;
|
|
192
|
+
|
|
193
|
+
static VALUE train_dict_fast_cover_body(VALUE arg) {
|
|
194
|
+
train_dict_fast_cover_ctx* ctx = (train_dict_fast_cover_ctx*)arg;
|
|
195
|
+
copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
|
|
196
|
+
size_t dict_size = ZDICT_trainFromBuffer_fastCover(
|
|
197
|
+
ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
|
|
198
|
+
ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
|
|
199
|
+
ctx->params
|
|
200
|
+
);
|
|
201
|
+
if (ZDICT_isError(dict_size)) {
|
|
202
|
+
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
203
|
+
}
|
|
204
|
+
ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
|
|
205
|
+
return ctx->base.result;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
typedef struct {
|
|
209
|
+
dict_training_ctx base;
|
|
210
|
+
VALUE content_val;
|
|
211
|
+
ZDICT_params_t params;
|
|
212
|
+
} finalize_dict_ctx;
|
|
213
|
+
|
|
214
|
+
static VALUE finalize_dict_body(VALUE arg) {
|
|
215
|
+
finalize_dict_ctx* ctx = (finalize_dict_ctx*)arg;
|
|
216
|
+
copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
|
|
217
|
+
size_t dict_size = ZDICT_finalizeDictionary(
|
|
218
|
+
ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
|
|
219
|
+
RSTRING_PTR(ctx->content_val), RSTRING_LEN(ctx->content_val),
|
|
220
|
+
ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
|
|
221
|
+
ctx->params
|
|
222
|
+
);
|
|
223
|
+
if (ZDICT_isError(dict_size)) {
|
|
224
|
+
rb_raise(rb_eRuntimeError, "Dictionary finalization failed: %s", ZDICT_getErrorName(dict_size));
|
|
225
|
+
}
|
|
226
|
+
ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
|
|
227
|
+
return ctx->base.result;
|
|
228
|
+
}
|
|
229
|
+
|
|
128
230
|
// Train dictionary from samples - module-level method
|
|
129
231
|
// VibeZstd.train_dict(samples, max_dict_size: 112640)
|
|
130
232
|
//
|
|
@@ -166,36 +268,17 @@ vibe_zstd_train_dict(int argc, VALUE* argv, VALUE self) {
|
|
|
166
268
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
167
269
|
resources.dict_buffer = ALLOC_N(char, max_dict_size);
|
|
168
270
|
|
|
169
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
// Train the dictionary
|
|
181
|
-
size_t dict_size = ZDICT_trainFromBuffer(
|
|
182
|
-
resources.dict_buffer, max_dict_size,
|
|
183
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples
|
|
184
|
-
);
|
|
185
|
-
|
|
186
|
-
// Check for errors
|
|
187
|
-
if (ZDICT_isError(dict_size)) {
|
|
188
|
-
dict_training_cleanup((VALUE)&resources);
|
|
189
|
-
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Create Ruby string with the trained dictionary
|
|
193
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
194
|
-
|
|
195
|
-
// Clean up all resources
|
|
196
|
-
dict_training_cleanup((VALUE)&resources);
|
|
197
|
-
|
|
198
|
-
return dict_string;
|
|
271
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
272
|
+
dict_training_ctx ctx = {
|
|
273
|
+
.resources = &resources,
|
|
274
|
+
.result = Qnil,
|
|
275
|
+
.max_dict_size = max_dict_size,
|
|
276
|
+
.num_samples = num_samples,
|
|
277
|
+
.samples = samples
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
rb_ensure(train_dict_basic_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
281
|
+
return ctx.result;
|
|
199
282
|
}
|
|
200
283
|
|
|
201
284
|
// VibeZstd.train_dict_cover(samples, max_dict_size: 112640, k: 0, d: 0, steps: 0, split_point: 1.0, shrink_dict: false, shrink_dict_max_regression: 0, nb_threads: 0)
|
|
@@ -268,37 +351,20 @@ vibe_zstd_train_dict_cover(int argc, VALUE* argv, VALUE self) {
|
|
|
268
351
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
269
352
|
resources.dict_buffer = ALLOC_N(char, max_dict_size);
|
|
270
353
|
|
|
271
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
|
|
286
|
-
params
|
|
287
|
-
);
|
|
288
|
-
|
|
289
|
-
// Check for errors
|
|
290
|
-
if (ZDICT_isError(dict_size)) {
|
|
291
|
-
dict_training_cleanup((VALUE)&resources);
|
|
292
|
-
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
// Create Ruby string with the trained dictionary
|
|
296
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
297
|
-
|
|
298
|
-
// Clean up all resources
|
|
299
|
-
dict_training_cleanup((VALUE)&resources);
|
|
300
|
-
|
|
301
|
-
return dict_string;
|
|
354
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
355
|
+
train_dict_cover_ctx ctx = {
|
|
356
|
+
.base = {
|
|
357
|
+
.resources = &resources,
|
|
358
|
+
.result = Qnil,
|
|
359
|
+
.max_dict_size = max_dict_size,
|
|
360
|
+
.num_samples = num_samples,
|
|
361
|
+
.samples = samples
|
|
362
|
+
},
|
|
363
|
+
.params = params
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
rb_ensure(train_dict_cover_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
367
|
+
return ctx.base.result;
|
|
302
368
|
}
|
|
303
369
|
|
|
304
370
|
// VibeZstd.train_dict_fast_cover(samples, max_dict_size: 112640, k: 0, d: 0, f: 0, split_point: 1.0, accel: 0, shrink_dict: false, shrink_dict_max_regression: 0, nb_threads: 0)
|
|
@@ -374,37 +440,20 @@ vibe_zstd_train_dict_fast_cover(int argc, VALUE* argv, VALUE self) {
|
|
|
374
440
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
375
441
|
resources.dict_buffer = ALLOC_N(char, max_dict_size);
|
|
376
442
|
|
|
377
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
|
|
392
|
-
params
|
|
393
|
-
);
|
|
394
|
-
|
|
395
|
-
// Check for errors
|
|
396
|
-
if (ZDICT_isError(dict_size)) {
|
|
397
|
-
dict_training_cleanup((VALUE)&resources);
|
|
398
|
-
rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
// Create Ruby string with the trained dictionary
|
|
402
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
403
|
-
|
|
404
|
-
// Clean up all resources
|
|
405
|
-
dict_training_cleanup((VALUE)&resources);
|
|
406
|
-
|
|
407
|
-
return dict_string;
|
|
443
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
444
|
+
train_dict_fast_cover_ctx ctx = {
|
|
445
|
+
.base = {
|
|
446
|
+
.resources = &resources,
|
|
447
|
+
.result = Qnil,
|
|
448
|
+
.max_dict_size = max_dict_size,
|
|
449
|
+
.num_samples = num_samples,
|
|
450
|
+
.samples = samples
|
|
451
|
+
},
|
|
452
|
+
.params = params
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
rb_ensure(train_dict_fast_cover_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
456
|
+
return ctx.base.result;
|
|
408
457
|
}
|
|
409
458
|
|
|
410
459
|
// Get dictionary ID from raw dictionary data - module-level utility
|
|
@@ -490,38 +539,21 @@ vibe_zstd_finalize_dictionary(int argc, VALUE* argv, VALUE self) {
|
|
|
490
539
|
resources.samples_buffer = ALLOC_N(char, total_samples_size);
|
|
491
540
|
resources.dict_buffer = ALLOC_N(char, max_size);
|
|
492
541
|
|
|
493
|
-
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
|
|
509
|
-
params
|
|
510
|
-
);
|
|
511
|
-
|
|
512
|
-
// Check for errors
|
|
513
|
-
if (ZDICT_isError(dict_size)) {
|
|
514
|
-
dict_training_cleanup((VALUE)&resources);
|
|
515
|
-
rb_raise(rb_eRuntimeError, "Dictionary finalization failed: %s", ZDICT_getErrorName(dict_size));
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
// Create Ruby string with the finalized dictionary
|
|
519
|
-
VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
|
|
520
|
-
|
|
521
|
-
// Clean up all resources
|
|
522
|
-
dict_training_cleanup((VALUE)&resources);
|
|
523
|
-
|
|
524
|
-
return dict_string;
|
|
542
|
+
// Layer 3: Use rb_ensure for guaranteed cleanup
|
|
543
|
+
finalize_dict_ctx ctx = {
|
|
544
|
+
.base = {
|
|
545
|
+
.resources = &resources,
|
|
546
|
+
.result = Qnil,
|
|
547
|
+
.max_dict_size = max_size,
|
|
548
|
+
.num_samples = num_samples,
|
|
549
|
+
.samples = samples_val
|
|
550
|
+
},
|
|
551
|
+
.content_val = content_val,
|
|
552
|
+
.params = params
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
rb_ensure(finalize_dict_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
|
|
556
|
+
return ctx.base.result;
|
|
525
557
|
}
|
|
526
558
|
|
|
527
559
|
// Get dictionary header size - module-level utility
|
data/ext/vibe_zstd/extconf.rb
CHANGED
|
@@ -14,10 +14,11 @@ $INCFLAGS << " -I#{LIBZSTD_DIR}/decompress"
|
|
|
14
14
|
$INCFLAGS << " -I#{LIBZSTD_DIR}/dictBuilder"
|
|
15
15
|
# standard:enable Style/GlobalVars
|
|
16
16
|
|
|
17
|
-
# Add preprocessor definitions
|
|
18
|
-
append_cflags
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
# Add preprocessor definitions (use $defs so they appear in DEFS in the Makefile,
|
|
18
|
+
# append_cflags only validates the flag but doesn't reliably propagate -D flags)
|
|
19
|
+
$defs << "-DXXH_NAMESPACE=ZSTD_"
|
|
20
|
+
$defs << "-DZSTD_LEGACY_SUPPORT=0" # Disable legacy support to reduce size
|
|
21
|
+
$defs << "-DZSTD_MULTITHREAD" # Enable multithreading support
|
|
21
22
|
|
|
22
23
|
# Link with pthread for multithreading
|
|
23
24
|
have_library("pthread") || abort("pthread library is required for multithreading support")
|
|
@@ -32,8 +33,7 @@ zstd_sources = Dir[
|
|
|
32
33
|
"#{LIBZSTD_DIR}/common/*.c",
|
|
33
34
|
"#{LIBZSTD_DIR}/compress/*.c",
|
|
34
35
|
"#{LIBZSTD_DIR}/decompress/*.{c,S}",
|
|
35
|
-
"#{LIBZSTD_DIR}/dictBuilder/*.c"
|
|
36
|
-
"#{LIBZSTD_DIR}/deprecated/*.c"
|
|
36
|
+
"#{LIBZSTD_DIR}/dictBuilder/*.c"
|
|
37
37
|
].map { |path| File.basename(path) }
|
|
38
38
|
|
|
39
39
|
# Add the main vibe_zstd.c file (which includes the split files via #include)
|
|
@@ -46,7 +46,6 @@ $VPATH << "$(srcdir)/libzstd/common"
|
|
|
46
46
|
$VPATH << "$(srcdir)/libzstd/compress"
|
|
47
47
|
$VPATH << "$(srcdir)/libzstd/decompress"
|
|
48
48
|
$VPATH << "$(srcdir)/libzstd/dictBuilder"
|
|
49
|
-
$VPATH << "$(srcdir)/libzstd/deprecated"
|
|
50
49
|
# standard:enable Style/GlobalVars
|
|
51
50
|
|
|
52
51
|
create_makefile("vibe_zstd/vibe_zstd")
|
data/ext/vibe_zstd/streaming.c
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
// Streaming implementation for VibeZstd
|
|
2
2
|
#include "vibe_zstd_internal.h"
|
|
3
3
|
|
|
4
|
+
// Cached method IDs for frequently called methods
|
|
5
|
+
static ID id_write;
|
|
6
|
+
static ID id_read;
|
|
7
|
+
|
|
4
8
|
// Forward declarations
|
|
5
9
|
static VALUE vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self);
|
|
6
10
|
static VALUE vibe_zstd_writer_write(VALUE self, VALUE data);
|
|
@@ -25,12 +29,12 @@ vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
25
29
|
TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
|
|
26
30
|
|
|
27
31
|
// Validate IO object responds to write (duck typing)
|
|
28
|
-
if (!rb_respond_to(io,
|
|
32
|
+
if (!rb_respond_to(io, id_write)) {
|
|
29
33
|
rb_raise(rb_eTypeError, "IO object must respond to write");
|
|
30
34
|
}
|
|
31
35
|
|
|
32
|
-
// Store IO object
|
|
33
|
-
cstream->io
|
|
36
|
+
// Store IO object (write barrier for WB_PROTECTED)
|
|
37
|
+
RB_OBJ_WRITE(self, &cstream->io, io);
|
|
34
38
|
rb_ivar_set(self, rb_intern("@io"), io);
|
|
35
39
|
|
|
36
40
|
// Parse options
|
|
@@ -87,6 +91,9 @@ vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
87
91
|
}
|
|
88
92
|
}
|
|
89
93
|
|
|
94
|
+
// Allocate reusable output buffer (write barrier for WB_PROTECTED)
|
|
95
|
+
RB_OBJ_WRITE(self, &cstream->output_buffer, rb_str_buf_new(ZSTD_CStreamOutSize()));
|
|
96
|
+
|
|
90
97
|
return self;
|
|
91
98
|
}
|
|
92
99
|
|
|
@@ -105,10 +112,15 @@ vibe_zstd_writer_write(VALUE self, VALUE data) {
|
|
|
105
112
|
};
|
|
106
113
|
|
|
107
114
|
size_t outBufferSize = ZSTD_CStreamOutSize();
|
|
108
|
-
VALUE outBuffer =
|
|
115
|
+
VALUE outBuffer = cstream->output_buffer;
|
|
109
116
|
|
|
110
117
|
// Process all input data in chunks
|
|
111
118
|
while (input.pos < input.size) {
|
|
119
|
+
// Unshare buffer if COW-shared by a prior IO#write receiver (Ruby 3.3+),
|
|
120
|
+
// then restore capacity which may have shrunk during unsharing
|
|
121
|
+
rb_str_modify(outBuffer);
|
|
122
|
+
rb_str_resize(outBuffer, (long)outBufferSize);
|
|
123
|
+
rb_str_set_len(outBuffer, 0);
|
|
112
124
|
ZSTD_outBuffer output = {
|
|
113
125
|
.dst = RSTRING_PTR(outBuffer),
|
|
114
126
|
.size = outBufferSize,
|
|
@@ -125,8 +137,7 @@ vibe_zstd_writer_write(VALUE self, VALUE data) {
|
|
|
125
137
|
// Write any compressed output that was produced
|
|
126
138
|
if (output.pos > 0) {
|
|
127
139
|
rb_str_set_len(outBuffer, output.pos);
|
|
128
|
-
rb_funcall(cstream->io,
|
|
129
|
-
// No need to resize - buffer capacity remains at outBufferSize
|
|
140
|
+
rb_funcall(cstream->io, id_write, 1, outBuffer);
|
|
130
141
|
}
|
|
131
142
|
}
|
|
132
143
|
|
|
@@ -139,7 +150,7 @@ vibe_zstd_writer_flush(VALUE self) {
|
|
|
139
150
|
TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
|
|
140
151
|
|
|
141
152
|
size_t outBufferSize = ZSTD_CStreamOutSize();
|
|
142
|
-
VALUE outBuffer =
|
|
153
|
+
VALUE outBuffer = cstream->output_buffer;
|
|
143
154
|
|
|
144
155
|
ZSTD_inBuffer input = { NULL, 0, 0 };
|
|
145
156
|
size_t remaining;
|
|
@@ -147,6 +158,9 @@ vibe_zstd_writer_flush(VALUE self) {
|
|
|
147
158
|
// ZSTD_e_flush: flush internal buffers, making all data readable
|
|
148
159
|
// Loop until remaining == 0 (flush complete)
|
|
149
160
|
do {
|
|
161
|
+
rb_str_modify(outBuffer);
|
|
162
|
+
rb_str_resize(outBuffer, (long)outBufferSize);
|
|
163
|
+
rb_str_set_len(outBuffer, 0);
|
|
150
164
|
ZSTD_outBuffer output = {
|
|
151
165
|
.dst = RSTRING_PTR(outBuffer),
|
|
152
166
|
.size = outBufferSize,
|
|
@@ -161,8 +175,7 @@ vibe_zstd_writer_flush(VALUE self) {
|
|
|
161
175
|
|
|
162
176
|
if (output.pos > 0) {
|
|
163
177
|
rb_str_set_len(outBuffer, output.pos);
|
|
164
|
-
rb_funcall(cstream->io,
|
|
165
|
-
// No need to resize - buffer capacity remains at outBufferSize
|
|
178
|
+
rb_funcall(cstream->io, id_write, 1, outBuffer);
|
|
166
179
|
}
|
|
167
180
|
} while (remaining > 0);
|
|
168
181
|
|
|
@@ -175,7 +188,7 @@ vibe_zstd_writer_finish(VALUE self) {
|
|
|
175
188
|
TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
|
|
176
189
|
|
|
177
190
|
size_t outBufferSize = ZSTD_CStreamOutSize();
|
|
178
|
-
VALUE outBuffer =
|
|
191
|
+
VALUE outBuffer = cstream->output_buffer;
|
|
179
192
|
|
|
180
193
|
ZSTD_inBuffer input = { NULL, 0, 0 };
|
|
181
194
|
size_t remaining;
|
|
@@ -183,6 +196,9 @@ vibe_zstd_writer_finish(VALUE self) {
|
|
|
183
196
|
// ZSTD_e_end: finalize frame with checksum and epilogue
|
|
184
197
|
// Loop until remaining == 0 (frame complete)
|
|
185
198
|
do {
|
|
199
|
+
rb_str_modify(outBuffer);
|
|
200
|
+
rb_str_resize(outBuffer, (long)outBufferSize);
|
|
201
|
+
rb_str_set_len(outBuffer, 0);
|
|
186
202
|
ZSTD_outBuffer output = {
|
|
187
203
|
.dst = RSTRING_PTR(outBuffer),
|
|
188
204
|
.size = outBufferSize,
|
|
@@ -197,8 +213,7 @@ vibe_zstd_writer_finish(VALUE self) {
|
|
|
197
213
|
|
|
198
214
|
if (output.pos > 0) {
|
|
199
215
|
rb_str_set_len(outBuffer, output.pos);
|
|
200
|
-
rb_funcall(cstream->io,
|
|
201
|
-
// No need to resize - buffer capacity remains at outBufferSize
|
|
216
|
+
rb_funcall(cstream->io, id_write, 1, outBuffer);
|
|
202
217
|
}
|
|
203
218
|
} while (remaining > 0);
|
|
204
219
|
|
|
@@ -216,12 +231,12 @@ vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
216
231
|
TypedData_Get_Struct(self, vibe_zstd_dstream, &vibe_zstd_dstream_type, dstream);
|
|
217
232
|
|
|
218
233
|
// Validate IO object responds to read (duck typing)
|
|
219
|
-
if (!rb_respond_to(io,
|
|
234
|
+
if (!rb_respond_to(io, id_read)) {
|
|
220
235
|
rb_raise(rb_eTypeError, "IO object must respond to read");
|
|
221
236
|
}
|
|
222
237
|
|
|
223
|
-
// Store IO object
|
|
224
|
-
dstream->io
|
|
238
|
+
// Store IO object (write barrier for WB_PROTECTED)
|
|
239
|
+
RB_OBJ_WRITE(self, &dstream->io, io);
|
|
225
240
|
rb_ivar_set(self, rb_intern("@io"), io);
|
|
226
241
|
|
|
227
242
|
// Parse options
|
|
@@ -263,7 +278,7 @@ vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
263
278
|
}
|
|
264
279
|
|
|
265
280
|
// Initialize input buffer management
|
|
266
|
-
dstream->input_data
|
|
281
|
+
RB_OBJ_WRITE(self, &dstream->input_data, rb_str_new(NULL, 0));
|
|
267
282
|
dstream->input.src = NULL;
|
|
268
283
|
dstream->input.size = 0;
|
|
269
284
|
dstream->input.pos = 0;
|
|
@@ -317,7 +332,7 @@ vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
|
|
|
317
332
|
while (total_read < requested_size) {
|
|
318
333
|
// Refill input buffer when all compressed data consumed
|
|
319
334
|
if (dstream->input.pos >= dstream->input.size) {
|
|
320
|
-
VALUE chunk = rb_funcall(dstream->io,
|
|
335
|
+
VALUE chunk = rb_funcall(dstream->io, id_read, 1, SIZET2NUM(inBufferSize));
|
|
321
336
|
if (NIL_P(chunk)) {
|
|
322
337
|
dstream->eof = 1;
|
|
323
338
|
if (total_read == 0 && !made_progress) {
|
|
@@ -326,8 +341,8 @@ vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
|
|
|
326
341
|
break;
|
|
327
342
|
}
|
|
328
343
|
|
|
329
|
-
// Reset input buffer with new data
|
|
330
|
-
dstream->input_data
|
|
344
|
+
// Reset input buffer with new data (write barrier for WB_PROTECTED)
|
|
345
|
+
RB_OBJ_WRITE(self, &dstream->input_data, chunk);
|
|
331
346
|
dstream->input.src = RSTRING_PTR(chunk);
|
|
332
347
|
dstream->input.size = RSTRING_LEN(chunk);
|
|
333
348
|
dstream->input.pos = 0;
|
|
@@ -394,6 +409,10 @@ vibe_zstd_reader_eof(VALUE self) {
|
|
|
394
409
|
// Class initialization function called from main Init_vibe_zstd
|
|
395
410
|
void
|
|
396
411
|
vibe_zstd_streaming_init_classes(VALUE rb_cVibeZstdCompressWriter, VALUE rb_cVibeZstdDecompressReader) {
|
|
412
|
+
// Cache method IDs for frequently called methods
|
|
413
|
+
id_write = rb_intern("write");
|
|
414
|
+
id_read = rb_intern("read");
|
|
415
|
+
|
|
397
416
|
// CompressWriter setup
|
|
398
417
|
rb_define_alloc_func(rb_cVibeZstdCompressWriter, vibe_zstd_cstream_alloc);
|
|
399
418
|
rb_define_method(rb_cVibeZstdCompressWriter, "initialize", vibe_zstd_writer_initialize, -1);
|
data/ext/vibe_zstd/vibe_zstd.c
CHANGED
|
@@ -12,7 +12,7 @@ VALUE rb_cVibeZstdDDict;
|
|
|
12
12
|
VALUE rb_cVibeZstdCompressWriter;
|
|
13
13
|
VALUE rb_cVibeZstdDecompressReader;
|
|
14
14
|
|
|
15
|
-
// Forward declarations for free and
|
|
15
|
+
// Forward declarations for free, mark, and dsize functions
|
|
16
16
|
static void vibe_zstd_cctx_free(void* ptr);
|
|
17
17
|
static void vibe_zstd_dctx_free(void* ptr);
|
|
18
18
|
static void vibe_zstd_cdict_free(void* ptr);
|
|
@@ -22,16 +22,47 @@ static void vibe_zstd_cstream_mark(void* ptr);
|
|
|
22
22
|
static void vibe_zstd_dstream_free(void* ptr);
|
|
23
23
|
static void vibe_zstd_dstream_mark(void* ptr);
|
|
24
24
|
|
|
25
|
+
// dsize callbacks - report memory usage to Ruby GC for accurate memory pressure tracking
|
|
26
|
+
static size_t vibe_zstd_cctx_dsize(const void* ptr) {
|
|
27
|
+
const vibe_zstd_cctx* cctx = ptr;
|
|
28
|
+
return sizeof(vibe_zstd_cctx) + (cctx->cctx ? ZSTD_sizeof_CCtx(cctx->cctx) : 0);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static size_t vibe_zstd_dctx_dsize(const void* ptr) {
|
|
32
|
+
const vibe_zstd_dctx* dctx = ptr;
|
|
33
|
+
return sizeof(vibe_zstd_dctx) + (dctx->dctx ? ZSTD_sizeof_DCtx(dctx->dctx) : 0);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static size_t vibe_zstd_cdict_dsize(const void* ptr) {
|
|
37
|
+
const vibe_zstd_cdict* cdict = ptr;
|
|
38
|
+
return sizeof(vibe_zstd_cdict) + (cdict->cdict ? ZSTD_sizeof_CDict(cdict->cdict) : 0);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
static size_t vibe_zstd_ddict_dsize(const void* ptr) {
|
|
42
|
+
const vibe_zstd_ddict* ddict = ptr;
|
|
43
|
+
return sizeof(vibe_zstd_ddict) + (ddict->ddict ? ZSTD_sizeof_DDict(ddict->ddict) : 0);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
static size_t vibe_zstd_cstream_dsize(const void* ptr) {
|
|
47
|
+
const vibe_zstd_cstream* cstream = ptr;
|
|
48
|
+
return sizeof(vibe_zstd_cstream) + (cstream->cstream ? ZSTD_sizeof_CStream(cstream->cstream) : 0);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static size_t vibe_zstd_dstream_dsize(const void* ptr) {
|
|
52
|
+
const vibe_zstd_dstream* dstream = ptr;
|
|
53
|
+
return sizeof(vibe_zstd_dstream) + (dstream->dstream ? ZSTD_sizeof_DStream(dstream->dstream) : 0);
|
|
54
|
+
}
|
|
55
|
+
|
|
25
56
|
// TypedData type definitions (these are referenced by extern in the split files)
|
|
26
57
|
rb_data_type_t vibe_zstd_cctx_type = {
|
|
27
58
|
.wrap_struct_name = "vibe_zstd_cctx",
|
|
28
59
|
.function = {
|
|
29
60
|
.dmark = NULL,
|
|
30
61
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_cctx_free,
|
|
31
|
-
.dsize =
|
|
62
|
+
.dsize = vibe_zstd_cctx_dsize,
|
|
32
63
|
},
|
|
33
64
|
.data = NULL,
|
|
34
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
65
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
35
66
|
};
|
|
36
67
|
|
|
37
68
|
rb_data_type_t vibe_zstd_dctx_type = {
|
|
@@ -39,10 +70,10 @@ rb_data_type_t vibe_zstd_dctx_type = {
|
|
|
39
70
|
.function = {
|
|
40
71
|
.dmark = NULL,
|
|
41
72
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_dctx_free,
|
|
42
|
-
.dsize =
|
|
73
|
+
.dsize = vibe_zstd_dctx_dsize,
|
|
43
74
|
},
|
|
44
75
|
.data = NULL,
|
|
45
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
76
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
46
77
|
};
|
|
47
78
|
|
|
48
79
|
rb_data_type_t vibe_zstd_cdict_type = {
|
|
@@ -50,10 +81,10 @@ rb_data_type_t vibe_zstd_cdict_type = {
|
|
|
50
81
|
.function = {
|
|
51
82
|
.dmark = NULL,
|
|
52
83
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_cdict_free,
|
|
53
|
-
.dsize =
|
|
84
|
+
.dsize = vibe_zstd_cdict_dsize,
|
|
54
85
|
},
|
|
55
86
|
.data = NULL,
|
|
56
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
87
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
57
88
|
};
|
|
58
89
|
|
|
59
90
|
rb_data_type_t vibe_zstd_ddict_type = {
|
|
@@ -61,10 +92,10 @@ rb_data_type_t vibe_zstd_ddict_type = {
|
|
|
61
92
|
.function = {
|
|
62
93
|
.dmark = NULL,
|
|
63
94
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_ddict_free,
|
|
64
|
-
.dsize =
|
|
95
|
+
.dsize = vibe_zstd_ddict_dsize,
|
|
65
96
|
},
|
|
66
97
|
.data = NULL,
|
|
67
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
98
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
68
99
|
};
|
|
69
100
|
|
|
70
101
|
rb_data_type_t vibe_zstd_cstream_type = {
|
|
@@ -72,10 +103,10 @@ rb_data_type_t vibe_zstd_cstream_type = {
|
|
|
72
103
|
.function = {
|
|
73
104
|
.dmark = (RUBY_DATA_FUNC)vibe_zstd_cstream_mark,
|
|
74
105
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_cstream_free,
|
|
75
|
-
.dsize =
|
|
106
|
+
.dsize = vibe_zstd_cstream_dsize,
|
|
76
107
|
},
|
|
77
108
|
.data = NULL,
|
|
78
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
109
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
79
110
|
};
|
|
80
111
|
|
|
81
112
|
rb_data_type_t vibe_zstd_dstream_type = {
|
|
@@ -83,10 +114,10 @@ rb_data_type_t vibe_zstd_dstream_type = {
|
|
|
83
114
|
.function = {
|
|
84
115
|
.dmark = (RUBY_DATA_FUNC)vibe_zstd_dstream_mark,
|
|
85
116
|
.dfree = (RUBY_DATA_FUNC)vibe_zstd_dstream_free,
|
|
86
|
-
.dsize =
|
|
117
|
+
.dsize = vibe_zstd_dstream_dsize,
|
|
87
118
|
},
|
|
88
119
|
.data = NULL,
|
|
89
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
|
120
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
90
121
|
};
|
|
91
122
|
|
|
92
123
|
// Free functions
|
|
@@ -130,6 +161,7 @@ static void
|
|
|
130
161
|
vibe_zstd_cstream_mark(void* ptr) {
|
|
131
162
|
vibe_zstd_cstream* cstream = ptr;
|
|
132
163
|
rb_gc_mark(cstream->io);
|
|
164
|
+
rb_gc_mark(cstream->output_buffer);
|
|
133
165
|
}
|
|
134
166
|
|
|
135
167
|
static void
|
|
@@ -200,6 +232,7 @@ vibe_zstd_cstream_alloc(VALUE klass) {
|
|
|
200
232
|
vibe_zstd_cstream* cstream = ALLOC(vibe_zstd_cstream);
|
|
201
233
|
cstream->cstream = NULL;
|
|
202
234
|
cstream->io = Qnil;
|
|
235
|
+
cstream->output_buffer = Qnil;
|
|
203
236
|
return TypedData_Wrap_Struct(klass, &vibe_zstd_cstream_type, cstream);
|
|
204
237
|
}
|
|
205
238
|
|
|
@@ -257,9 +290,8 @@ vibe_zstd_default_c_level(VALUE self) {
|
|
|
257
290
|
RUBY_FUNC_EXPORTED void
|
|
258
291
|
Init_vibe_zstd(void)
|
|
259
292
|
{
|
|
260
|
-
//
|
|
261
|
-
|
|
262
|
-
init_dctx_param_table();
|
|
293
|
+
// Parameter lookup tables are initialized in vibe_zstd_cctx_init_class()
|
|
294
|
+
// and vibe_zstd_dctx_init_class() respectively - no need to call here.
|
|
263
295
|
|
|
264
296
|
rb_mVibeZstd = rb_define_module("VibeZstd");
|
|
265
297
|
|
data/ext/vibe_zstd/vibe_zstd.h
CHANGED
data/lib/vibe_zstd/version.rb
CHANGED
data/lib/vibe_zstd.rb
CHANGED
|
@@ -39,9 +39,7 @@ module VibeZstd
|
|
|
39
39
|
|
|
40
40
|
# Defense: Prevent infinite loop on malformed data
|
|
41
41
|
# A valid frame must have non-zero size (at minimum: frame header)
|
|
42
|
-
if frame_size <= 0
|
|
43
|
-
raise Error, "Invalid frame: zero or negative size at offset #{offset}"
|
|
44
|
-
end
|
|
42
|
+
raise Error, "Invalid frame: zero or negative size at offset #{offset}" if frame_size <= 0
|
|
45
43
|
|
|
46
44
|
if skippable_frame?(frame_data)
|
|
47
45
|
content, magic_variant = read_skippable_frame(frame_data)
|
|
@@ -191,8 +189,14 @@ module VibeZstd
|
|
|
191
189
|
end
|
|
192
190
|
|
|
193
191
|
# Read all remaining data
|
|
192
|
+
# Drains any buffered data from line_buffer first
|
|
194
193
|
def read_all
|
|
195
194
|
chunks = []
|
|
195
|
+
# Drain line buffer first if present
|
|
196
|
+
if @line_buffer && !@line_buffer.empty?
|
|
197
|
+
chunks << @line_buffer
|
|
198
|
+
@line_buffer = +""
|
|
199
|
+
end
|
|
196
200
|
while (chunk = read)
|
|
197
201
|
chunks << chunk
|
|
198
202
|
end
|
|
@@ -214,20 +218,29 @@ module VibeZstd
|
|
|
214
218
|
end
|
|
215
219
|
end
|
|
216
220
|
|
|
217
|
-
# Read a single line (up to
|
|
221
|
+
# Read a single line (up to separator or EOF)
|
|
222
|
+
# Uses buffered reads (8192 bytes) instead of byte-at-a-time for performance.
|
|
223
|
+
# Orders of magnitude faster for line-oriented reading.
|
|
218
224
|
def gets(sep = $/)
|
|
219
|
-
return nil if eof?
|
|
225
|
+
return nil if eof? && (@line_buffer.nil? || @line_buffer.empty?)
|
|
220
226
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
227
|
+
@line_buffer ||= +""
|
|
228
|
+
|
|
229
|
+
loop do
|
|
230
|
+
# Check buffer for separator
|
|
231
|
+
if (idx = @line_buffer.index(sep))
|
|
232
|
+
return @line_buffer.slice!(0, idx + sep.bytesize)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Read more data in larger chunks
|
|
236
|
+
chunk = read(8192)
|
|
224
237
|
break unless chunk
|
|
225
238
|
|
|
226
|
-
|
|
227
|
-
break if chunk.end_with?(sep)
|
|
239
|
+
@line_buffer << chunk
|
|
228
240
|
end
|
|
229
241
|
|
|
230
|
-
|
|
242
|
+
# Return remaining buffer or nil
|
|
243
|
+
@line_buffer.empty? ? nil : @line_buffer.slice!(0, @line_buffer.bytesize)
|
|
231
244
|
end
|
|
232
245
|
|
|
233
246
|
# Iterate over lines
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: vibe_zstd
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kelley Reynolds
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-
|
|
10
|
+
date: 2026-03-25 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: benchmark-ips
|