RubyGems - vibe_zstd - Versions diffs - 1.0.2 → 1.1.1 - Mend

vibe_zstd 1.0.2 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 883dad10ddd5d490a77a9b991afea9c1a7d9c820b3890da2ee9eb3a58f47b24b
-  data.tar.gz: 5738f506a74bbdb918008b16eecacbc25fe27661502a428c430240b28306dd6d
+  metadata.gz: 9b3326bfa52942e1f7ee95578bbbff2cd87647748a086742551d562eda6d94f0
+  data.tar.gz: b594dade59dab715722477dc6d39eaa7768a39505fef2354c494090462f03afd
 SHA512:
-  metadata.gz: c21925310cef3ba4b147c727391cff8e66f7ea2f79fb6a511d19cda8fa5273f742beb284f3b2984b9a9bb17050d1ca547a206b4548689f68ef9254be1608b6a0
-  data.tar.gz: ed48566c16a7174c4182dee0cc271e0bbb1fbb311049d714f6a146a06059d5d81acbb9e5ee1fbfd09b87d83c862745b64bcee177f00be78f6e1a26031e4ea8cc
+  metadata.gz: bb5a4e27578f337ef0a72c133344c8d3f4e229b250f07c771d534bf65ffa40c0588d167e3cf01140d5baa1627e6866080b9710c059f15a59245b48eb7de026e8
+  data.tar.gz: 26f0ac03864044c25068cf00c8039d78d7ac677ec1b61298f0bf09fc8918a48a0105785607a53c12bf2c6f5c7a1c3f47d6dc64aefa4ef27e03803509f3717d80

data/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [1.1.1] - 2026-03-25
+### Fixed
+- Fix `RuntimeError: can't set length of shared string` in `CompressWriter` when writing to File IO on Ruby 3.3+ caused by COW buffer sharing during `IO#write`
+- Fix vendored zstd build flags (`-DZSTD_MULTITHREAD`, `-DXXH_NAMESPACE`, `-DZSTD_LEGACY_SUPPORT`) not propagating to compiled sources, restoring multithreaded compression support (`workers`, `rsyncable` parameters)
+## [1.1.0] - 2026-03-02
+### Added
+- Release GVL during unknown-size streaming decompression, preventing thread blocking in multi-threaded servers (Puma, etc.)
+### Changed
+- `DecompressReader#gets` now uses 8KB buffered reads instead of 1-byte-at-a-time, dramatically reducing read call overhead on line-oriented data
+- `CompressWriter` reuses a single output buffer across calls instead of allocating ~128KB per `write`/`flush`/`finish`
+### Fixed
+- Exception safety in dict training: all four training functions now use `rb_ensure` so C buffers are always freed even if a Ruby exception is raised
+- Add `dsize` callbacks to all `TypedData` types so the GC sees accurate memory pressure from ZSTD context objects
+- Add `RUBY_TYPED_WB_PROTECTED` and proper write barriers to all typed structs for GC correctness
+### Performance
+- Stack-allocated string buffer in CCtx setter, eliminating a malloc/free per keyword-argument call
+- Cache `id_write`/`id_read` as static IDs instead of calling `rb_intern` on every I/O call
+- Remove redundant `init_cctx_param_table`/`init_dctx_param_table` calls at startup
 ## [1.0.2] - 2025-01-20
 ### Fixed
@@ -29,4 +54,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Thread pool support for parallel compression
 - Memory-efficient API for large files
-[1.0.0]: https://github.com/kreynolds/vipe_zstd/releases/tag/v1.0.0
+[1.1.1]: https://github.com/kreynolds/vibe_zstd/compare/v1.1.0...v1.1.1
+[1.1.0]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.2...v1.1.0
+[1.0.2]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.1...v1.0.2
+[1.0.1]: https://github.com/kreynolds/vibe_zstd/compare/v1.0.0...v1.0.1
+[1.0.0]: https://github.com/kreynolds/vibe_zstd/releases/tag/v1.0.0

data/ext/vibe_zstd/cctx.c CHANGED Viewed

@@ -9,14 +9,12 @@ static int
 vibe_zstd_cctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
     // Build the setter method name: key + "="
     const char* key_str = rb_id2name(SYM2ID(key));
-    size_t setter_len = strlen(key_str) + 2;  // +1 for '=' + 1 for '\0'
-    char* setter = ALLOC_N(char, setter_len);
-    snprintf(setter, setter_len, "%s=", key_str);
+    char setter[256];
+    snprintf(setter, sizeof(setter), "%s=", key_str);
     // Call the setter method
     rb_funcall(self, rb_intern(setter), 1, value);
-    xfree(setter);
     return ST_CONTINUE;
 }

data/ext/vibe_zstd/dctx.c CHANGED Viewed

@@ -1,5 +1,6 @@
 // DCtx implementation for VibeZstd
 #include "vibe_zstd_internal.h"
+#include <stdlib.h>  // malloc, realloc, free for no-GVL decompression path
 // TypedData type - defined in vibe_zstd.c
 extern rb_data_type_t vibe_zstd_dctx_type;
@@ -12,14 +13,12 @@ static int
 vibe_zstd_dctx_init_param_iter(VALUE key, VALUE value, VALUE self) {
     // Build the setter method name: key + "="
     const char* key_str = rb_id2name(SYM2ID(key));
-    size_t setter_len = strlen(key_str) + 2;
-    char* setter = ALLOC_N(char, setter_len);
-    snprintf(setter, setter_len, "%s=", key_str);
+    char setter[256];
+    snprintf(setter, sizeof(setter), "%s=", key_str);
     // Call the setter method
     rb_funcall(self, rb_intern(setter), 1, value);
-    xfree(setter);
     return ST_CONTINUE;
 }
@@ -244,6 +243,76 @@ decompress_without_gvl(void* arg) {
     return NULL;
 }
+// Decompress stream args for GVL release (unknown content size path)
+// Uses plain C malloc/realloc since Ruby API calls are not allowed without GVL
+typedef struct {
+    ZSTD_DCtx *dctx;
+    const char *src;
+    size_t src_size;
+    char *dst;
+    size_t dst_capacity;
+    size_t dst_size;
+    size_t initial_capacity;
+    int error;
+    const char *error_name;
+} decompress_stream_nogvl_args;
+// Decompress stream without holding Ruby's GVL (unknown content size path)
+// Performs the entire ZSTD_decompressStream loop using C malloc/realloc.
+// No Ruby API calls allowed here.
+static void*
+decompress_stream_without_gvl(void* arg) {
+    decompress_stream_nogvl_args* args = arg;
+    args->error = 0;
+    args->error_name = NULL;
+    args->dst_capacity = args->initial_capacity;
+    args->dst = malloc(args->dst_capacity);
+    if (!args->dst) {
+        args->error = 1;
+        args->error_name = "malloc failed for decompression buffer";
+        return NULL;
+    }
+    args->dst_size = 0;
+    ZSTD_inBuffer input = { args->src, args->src_size, 0 };
+    while (input.pos < input.size) {
+        // Ensure we have room for output
+        if (args->dst_size >= args->dst_capacity) {
+            size_t new_capacity = args->dst_capacity * 2;
+            char* new_buf = realloc(args->dst, new_capacity);
+            if (!new_buf) {
+                args->error = 1;
+                args->error_name = "realloc failed during decompression";
+                return NULL;
+            }
+            args->dst = new_buf;
+            args->dst_capacity = new_capacity;
+        }
+        ZSTD_outBuffer output = {
+            args->dst + args->dst_size,
+            args->dst_capacity - args->dst_size,
+            0
+        };
+        size_t ret = ZSTD_decompressStream(args->dctx, &output, &input);
+        if (ZSTD_isError(ret)) {
+            args->error = 1;
+            args->error_name = ZSTD_getErrorName(ret);
+            return NULL;
+        }
+        args->dst_size += output.pos;
+        // ret == 0 means frame is complete
+        if (ret == 0) break;
+    }
+    return NULL;
+}
 // DCtx frame_content_size - class method to get frame content size
 static VALUE
 vibe_zstd_dctx_frame_content_size(VALUE self, VALUE data) {
@@ -353,44 +422,32 @@ vibe_zstd_dctx_decompress(int argc, VALUE* argv, VALUE self) {
         }
     }
-    // If content size is unknown, use streaming decompression with exponential growth
+    // If content size is unknown, use streaming decompression with exponential growth.
+    // Releases GVL to allow other Ruby threads to run during decompression.
+    // Uses C malloc/realloc (not Ruby allocators) since Ruby API calls are forbidden without GVL.
     if (contentSize == ZSTD_CONTENTSIZE_UNKNOWN) {
-        size_t chunk_size = ZSTD_DStreamOutSize();  // Fixed chunk buffer size
-        VALUE tmpBuffer = rb_str_buf_new(chunk_size);
-        // Start with configured initial capacity
-        size_t result_capacity = initial_capacity;
-        size_t result_size = 0;
-        VALUE result = rb_str_buf_new(result_capacity);
-        ZSTD_inBuffer input = { src, srcSize, 0 };
-        while (input.pos < input.size) {
-            ZSTD_outBuffer output = { RSTRING_PTR(tmpBuffer), chunk_size, 0 };
-            size_t ret = ZSTD_decompressStream(dctx->dctx, &output, &input);
-            if (ZSTD_isError(ret)) {
-                rb_raise(rb_eRuntimeError, "Decompression failed: %s", ZSTD_getErrorName(ret));
-            }
-            if (output.pos > 0) {
-                // Grow result buffer exponentially if needed
-                if (result_size + output.pos > result_capacity) {
-                    // Double capacity until it fits
-                    while (result_capacity < result_size + output.pos) {
-                        result_capacity *= 2;
-                    }
-                    rb_str_resize(result, result_capacity);
-                }
-                // Copy directly into result buffer
-                memcpy(RSTRING_PTR(result) + result_size, RSTRING_PTR(tmpBuffer), output.pos);
-                result_size += output.pos;
-            }
+        decompress_stream_nogvl_args stream_args = {
+            .dctx = dctx->dctx,
+            .src = src,
+            .src_size = srcSize,
+            .dst = NULL,
+            .dst_capacity = 0,
+            .dst_size = 0,
+            .initial_capacity = initial_capacity,
+            .error = 0,
+            .error_name = NULL
+        };
+        rb_thread_call_without_gvl(decompress_stream_without_gvl, &stream_args, NULL, NULL);
+        if (stream_args.error) {
+            if (stream_args.dst) free(stream_args.dst);
+            rb_raise(rb_eRuntimeError, "Decompression failed: %s", stream_args.error_name);
         }
-        // Trim to actual size
-        rb_str_resize(result, result_size);
+        // Create Ruby string from the C buffer, then free the C buffer
+        VALUE result = rb_str_new(stream_args.dst, stream_args.dst_size);
+        free(stream_args.dst);
         return result;
     }
     VALUE result = rb_str_new(NULL, contentSize);

data/ext/vibe_zstd/dict.c CHANGED Viewed

@@ -115,7 +115,7 @@ typedef struct {
 // Cleanup function for dictionary training resources
 // Safely frees all allocated memory, checking for NULL to handle partial allocations.
-// Called explicitly in error paths and after successful training to prevent leaks.
+// Used as the ensure callback in rb_ensure to guarantee cleanup regardless of exceptions.
 static VALUE
 dict_training_cleanup(VALUE arg) {
     dict_training_resources* resources = (dict_training_resources*)arg;
@@ -125,6 +125,108 @@ dict_training_cleanup(VALUE arg) {
     return Qnil;
 }
+// Copy Ruby sample strings into contiguous C buffer for ZDICT functions
+static void
+copy_samples_to_buffer(dict_training_resources* resources, VALUE samples, long num_samples) {
+    size_t offset = 0;
+    for (long i = 0; i < num_samples; i++) {
+        VALUE sample = rb_ary_entry(samples, i);
+        size_t sample_len = RSTRING_LEN(sample);
+        resources->sample_sizes[i] = sample_len;
+        memcpy(resources->samples_buffer + offset, RSTRING_PTR(sample), sample_len);
+        offset += sample_len;
+    }
+}
+// Context structs and body functions for rb_ensure-based dict training.
+// Each training function uses rb_ensure to guarantee resource cleanup even if
+// rb_str_new or other Ruby API calls raise exceptions (e.g., OOM).
+// Common fields are in dict_training_ctx; variant structs embed it as first member.
+typedef struct {
+    dict_training_resources* resources;
+    VALUE result;
+    size_t max_dict_size;
+    long num_samples;
+    VALUE samples;
+} dict_training_ctx;
+static VALUE train_dict_basic_body(VALUE arg) {
+    dict_training_ctx* ctx = (dict_training_ctx*)arg;
+    copy_samples_to_buffer(ctx->resources, ctx->samples, ctx->num_samples);
+    size_t dict_size = ZDICT_trainFromBuffer(
+        ctx->resources->dict_buffer, ctx->max_dict_size,
+        ctx->resources->samples_buffer, ctx->resources->sample_sizes, (unsigned)ctx->num_samples
+    );
+    if (ZDICT_isError(dict_size)) {
+        rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
+    }
+    ctx->result = rb_str_new(ctx->resources->dict_buffer, dict_size);
+    return ctx->result;
+}
+typedef struct {
+    dict_training_ctx base;
+    ZDICT_cover_params_t params;
+} train_dict_cover_ctx;
+static VALUE train_dict_cover_body(VALUE arg) {
+    train_dict_cover_ctx* ctx = (train_dict_cover_ctx*)arg;
+    copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
+    size_t dict_size = ZDICT_trainFromBuffer_cover(
+        ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
+        ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
+        ctx->params
+    );
+    if (ZDICT_isError(dict_size)) {
+        rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
+    }
+    ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
+    return ctx->base.result;
+}
+typedef struct {
+    dict_training_ctx base;
+    ZDICT_fastCover_params_t params;
+} train_dict_fast_cover_ctx;
+static VALUE train_dict_fast_cover_body(VALUE arg) {
+    train_dict_fast_cover_ctx* ctx = (train_dict_fast_cover_ctx*)arg;
+    copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
+    size_t dict_size = ZDICT_trainFromBuffer_fastCover(
+        ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
+        ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
+        ctx->params
+    );
+    if (ZDICT_isError(dict_size)) {
+        rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
+    }
+    ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
+    return ctx->base.result;
+}
+typedef struct {
+    dict_training_ctx base;
+    VALUE content_val;
+    ZDICT_params_t params;
+} finalize_dict_ctx;
+static VALUE finalize_dict_body(VALUE arg) {
+    finalize_dict_ctx* ctx = (finalize_dict_ctx*)arg;
+    copy_samples_to_buffer(ctx->base.resources, ctx->base.samples, ctx->base.num_samples);
+    size_t dict_size = ZDICT_finalizeDictionary(
+        ctx->base.resources->dict_buffer, ctx->base.max_dict_size,
+        RSTRING_PTR(ctx->content_val), RSTRING_LEN(ctx->content_val),
+        ctx->base.resources->samples_buffer, ctx->base.resources->sample_sizes, (unsigned)ctx->base.num_samples,
+        ctx->params
+    );
+    if (ZDICT_isError(dict_size)) {
+        rb_raise(rb_eRuntimeError, "Dictionary finalization failed: %s", ZDICT_getErrorName(dict_size));
+    }
+    ctx->base.result = rb_str_new(ctx->base.resources->dict_buffer, dict_size);
+    return ctx->base.result;
+}
 // Train dictionary from samples - module-level method
 // VibeZstd.train_dict(samples, max_dict_size: 112640)
 //
@@ -166,36 +268,17 @@ vibe_zstd_train_dict(int argc, VALUE* argv, VALUE self) {
     resources.samples_buffer = ALLOC_N(char, total_samples_size);
     resources.dict_buffer = ALLOC_N(char, max_dict_size);
-    // Layer 3: Use rb_ensure for guaranteed cleanup (safety net)
-    // Build samples buffer - we already validated, so just copy
-    size_t offset = 0;
-    for (long i = 0; i < num_samples; i++) {
-        VALUE sample = rb_ary_entry(samples, i);
-        size_t sample_len = RSTRING_LEN(sample);
-        resources.sample_sizes[i] = sample_len;
-        memcpy(resources.samples_buffer + offset, RSTRING_PTR(sample), sample_len);
-        offset += sample_len;
-    }
-    // Train the dictionary
-    size_t dict_size = ZDICT_trainFromBuffer(
-        resources.dict_buffer, max_dict_size,
-        resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples
-    );
-    // Check for errors
-    if (ZDICT_isError(dict_size)) {
-        dict_training_cleanup((VALUE)&resources);
-        rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
-    }
-    // Create Ruby string with the trained dictionary
-    VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
-    // Clean up all resources
-    dict_training_cleanup((VALUE)&resources);
-    return dict_string;
+    // Layer 3: Use rb_ensure for guaranteed cleanup
+    dict_training_ctx ctx = {
+        .resources = &resources,
+        .result = Qnil,
+        .max_dict_size = max_dict_size,
+        .num_samples = num_samples,
+        .samples = samples
+    };
+    rb_ensure(train_dict_basic_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
+    return ctx.result;
 }
 // VibeZstd.train_dict_cover(samples, max_dict_size: 112640, k: 0, d: 0, steps: 0, split_point: 1.0, shrink_dict: false, shrink_dict_max_regression: 0, nb_threads: 0)
@@ -268,37 +351,20 @@ vibe_zstd_train_dict_cover(int argc, VALUE* argv, VALUE self) {
     resources.samples_buffer = ALLOC_N(char, total_samples_size);
     resources.dict_buffer = ALLOC_N(char, max_dict_size);
-    // Layer 3: Use rb_ensure for guaranteed cleanup (safety net)
-    // Build samples buffer - we already validated, so just copy
-    size_t offset = 0;
-    for (long i = 0; i < num_samples; i++) {
-        VALUE sample = rb_ary_entry(samples, i);
-        size_t sample_len = RSTRING_LEN(sample);
-        resources.sample_sizes[i] = sample_len;
-        memcpy(resources.samples_buffer + offset, RSTRING_PTR(sample), sample_len);
-        offset += sample_len;
-    }
-    // Train the dictionary using COVER algorithm
-    size_t dict_size = ZDICT_trainFromBuffer_cover(
-        resources.dict_buffer, max_dict_size,
-        resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
-        params
-    );
-    // Check for errors
-    if (ZDICT_isError(dict_size)) {
-        dict_training_cleanup((VALUE)&resources);
-        rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
-    }
-    // Create Ruby string with the trained dictionary
-    VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
-    // Clean up all resources
-    dict_training_cleanup((VALUE)&resources);
-    return dict_string;
+    // Layer 3: Use rb_ensure for guaranteed cleanup
+    train_dict_cover_ctx ctx = {
+        .base = {
+            .resources = &resources,
+            .result = Qnil,
+            .max_dict_size = max_dict_size,
+            .num_samples = num_samples,
+            .samples = samples
+        },
+        .params = params
+    };
+    rb_ensure(train_dict_cover_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
+    return ctx.base.result;
 }
 // VibeZstd.train_dict_fast_cover(samples, max_dict_size: 112640, k: 0, d: 0, f: 0, split_point: 1.0, accel: 0, shrink_dict: false, shrink_dict_max_regression: 0, nb_threads: 0)
@@ -374,37 +440,20 @@ vibe_zstd_train_dict_fast_cover(int argc, VALUE* argv, VALUE self) {
     resources.samples_buffer = ALLOC_N(char, total_samples_size);
     resources.dict_buffer = ALLOC_N(char, max_dict_size);
-    // Layer 3: Use rb_ensure for guaranteed cleanup (safety net)
-    // Build samples buffer - we already validated, so just copy
-    size_t offset = 0;
-    for (long i = 0; i < num_samples; i++) {
-        VALUE sample = rb_ary_entry(samples, i);
-        size_t sample_len = RSTRING_LEN(sample);
-        resources.sample_sizes[i] = sample_len;
-        memcpy(resources.samples_buffer + offset, RSTRING_PTR(sample), sample_len);
-        offset += sample_len;
-    }
-    // Train the dictionary using fast COVER algorithm
-    size_t dict_size = ZDICT_trainFromBuffer_fastCover(
-        resources.dict_buffer, max_dict_size,
-        resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
-        params
-    );
-    // Check for errors
-    if (ZDICT_isError(dict_size)) {
-        dict_training_cleanup((VALUE)&resources);
-        rb_raise(rb_eRuntimeError, "Dictionary training failed: %s", ZDICT_getErrorName(dict_size));
-    }
-    // Create Ruby string with the trained dictionary
-    VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
-    // Clean up all resources
-    dict_training_cleanup((VALUE)&resources);
-    return dict_string;
+    // Layer 3: Use rb_ensure for guaranteed cleanup
+    train_dict_fast_cover_ctx ctx = {
+        .base = {
+            .resources = &resources,
+            .result = Qnil,
+            .max_dict_size = max_dict_size,
+            .num_samples = num_samples,
+            .samples = samples
+        },
+        .params = params
+    };
+    rb_ensure(train_dict_fast_cover_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
+    return ctx.base.result;
 }
 // Get dictionary ID from raw dictionary data - module-level utility
@@ -490,38 +539,21 @@ vibe_zstd_finalize_dictionary(int argc, VALUE* argv, VALUE self) {
     resources.samples_buffer = ALLOC_N(char, total_samples_size);
     resources.dict_buffer = ALLOC_N(char, max_size);
-    // Layer 3: Use rb_ensure for guaranteed cleanup (safety net)
-    // Build samples buffer - we already validated, so just copy
-    size_t offset = 0;
-    for (long i = 0; i < num_samples; i++) {
-        VALUE sample = rb_ary_entry(samples_val, i);
-        size_t sample_len = RSTRING_LEN(sample);
-        resources.sample_sizes[i] = sample_len;
-        memcpy(resources.samples_buffer + offset, RSTRING_PTR(sample), sample_len);
-        offset += sample_len;
-    }
-    // Finalize the dictionary
-    size_t dict_size = ZDICT_finalizeDictionary(
-        resources.dict_buffer, max_size,
-        RSTRING_PTR(content_val), RSTRING_LEN(content_val),
-        resources.samples_buffer, resources.sample_sizes, (unsigned)num_samples,
-        params
-    );
-    // Check for errors
-    if (ZDICT_isError(dict_size)) {
-        dict_training_cleanup((VALUE)&resources);
-        rb_raise(rb_eRuntimeError, "Dictionary finalization failed: %s", ZDICT_getErrorName(dict_size));
-    }
-    // Create Ruby string with the finalized dictionary
-    VALUE dict_string = rb_str_new(resources.dict_buffer, dict_size);
-    // Clean up all resources
-    dict_training_cleanup((VALUE)&resources);
-    return dict_string;
+    // Layer 3: Use rb_ensure for guaranteed cleanup
+    finalize_dict_ctx ctx = {
+        .base = {
+            .resources = &resources,
+            .result = Qnil,
+            .max_dict_size = max_size,
+            .num_samples = num_samples,
+            .samples = samples_val
+        },
+        .content_val = content_val,
+        .params = params
+    };
+    rb_ensure(finalize_dict_body, (VALUE)&ctx, dict_training_cleanup, (VALUE)&resources);
+    return ctx.base.result;
 }
 // Get dictionary header size - module-level utility

data/ext/vibe_zstd/extconf.rb CHANGED Viewed

@@ -14,10 +14,11 @@ $INCFLAGS << " -I#{LIBZSTD_DIR}/decompress"
 $INCFLAGS << " -I#{LIBZSTD_DIR}/dictBuilder"
 # standard:enable Style/GlobalVars
-# Add preprocessor definitions
-append_cflags("-DXXH_NAMESPACE=ZSTD_")
-append_cflags("-DZSTD_LEGACY_SUPPORT=0")  # Disable legacy support to reduce size
-append_cflags("-DZSTD_MULTITHREAD")  # Enable multithreading support
+# Add preprocessor definitions (use $defs so they appear in DEFS in the Makefile,
+# append_cflags only validates the flag but doesn't reliably propagate -D flags)
+$defs << "-DXXH_NAMESPACE=ZSTD_"
+$defs << "-DZSTD_LEGACY_SUPPORT=0" # Disable legacy support to reduce size
+$defs << "-DZSTD_MULTITHREAD" # Enable multithreading support
 # Link with pthread for multithreading
 have_library("pthread") || abort("pthread library is required for multithreading support")
@@ -32,8 +33,7 @@ zstd_sources = Dir[
   "#{LIBZSTD_DIR}/common/*.c",
   "#{LIBZSTD_DIR}/compress/*.c",
   "#{LIBZSTD_DIR}/decompress/*.{c,S}",
-  "#{LIBZSTD_DIR}/dictBuilder/*.c",
-  "#{LIBZSTD_DIR}/deprecated/*.c"
+  "#{LIBZSTD_DIR}/dictBuilder/*.c"
 ].map { |path| File.basename(path) }
 # Add the main vibe_zstd.c file (which includes the split files via #include)
@@ -46,7 +46,6 @@ $VPATH << "$(srcdir)/libzstd/common"
 $VPATH << "$(srcdir)/libzstd/compress"
 $VPATH << "$(srcdir)/libzstd/decompress"
 $VPATH << "$(srcdir)/libzstd/dictBuilder"
-$VPATH << "$(srcdir)/libzstd/deprecated"
 # standard:enable Style/GlobalVars
 create_makefile("vibe_zstd/vibe_zstd")

data/ext/vibe_zstd/streaming.c CHANGED Viewed

@@ -1,6 +1,10 @@
 // Streaming implementation for VibeZstd
 #include "vibe_zstd_internal.h"
+// Cached method IDs for frequently called methods
+static ID id_write;
+static ID id_read;
 // Forward declarations
 static VALUE vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self);
 static VALUE vibe_zstd_writer_write(VALUE self, VALUE data);
@@ -25,12 +29,12 @@ vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self) {
     TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
     // Validate IO object responds to write (duck typing)
-    if (!rb_respond_to(io, rb_intern("write"))) {
+    if (!rb_respond_to(io, id_write)) {
         rb_raise(rb_eTypeError, "IO object must respond to write");
     }
-    // Store IO object
-    cstream->io = io;
+    // Store IO object (write barrier for WB_PROTECTED)
+    RB_OBJ_WRITE(self, &cstream->io, io);
     rb_ivar_set(self, rb_intern("@io"), io);
     // Parse options
@@ -87,6 +91,9 @@ vibe_zstd_writer_initialize(int argc, VALUE *argv, VALUE self) {
         }
     }
+    // Allocate reusable output buffer (write barrier for WB_PROTECTED)
+    RB_OBJ_WRITE(self, &cstream->output_buffer, rb_str_buf_new(ZSTD_CStreamOutSize()));
     return self;
 }
@@ -105,10 +112,15 @@ vibe_zstd_writer_write(VALUE self, VALUE data) {
     };
     size_t outBufferSize = ZSTD_CStreamOutSize();
-    VALUE outBuffer = rb_str_buf_new(outBufferSize);
+    VALUE outBuffer = cstream->output_buffer;
     // Process all input data in chunks
     while (input.pos < input.size) {
+        // Unshare buffer if COW-shared by a prior IO#write receiver (Ruby 3.3+),
+        // then restore capacity which may have shrunk during unsharing
+        rb_str_modify(outBuffer);
+        rb_str_resize(outBuffer, (long)outBufferSize);
+        rb_str_set_len(outBuffer, 0);
         ZSTD_outBuffer output = {
             .dst = RSTRING_PTR(outBuffer),
             .size = outBufferSize,
@@ -125,8 +137,7 @@ vibe_zstd_writer_write(VALUE self, VALUE data) {
         // Write any compressed output that was produced
         if (output.pos > 0) {
             rb_str_set_len(outBuffer, output.pos);
-            rb_funcall(cstream->io, rb_intern("write"), 1, outBuffer);
-            // No need to resize - buffer capacity remains at outBufferSize
+            rb_funcall(cstream->io, id_write, 1, outBuffer);
         }
     }
@@ -139,7 +150,7 @@ vibe_zstd_writer_flush(VALUE self) {
     TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
     size_t outBufferSize = ZSTD_CStreamOutSize();
-    VALUE outBuffer = rb_str_buf_new(outBufferSize);
+    VALUE outBuffer = cstream->output_buffer;
     ZSTD_inBuffer input = { NULL, 0, 0 };
     size_t remaining;
@@ -147,6 +158,9 @@ vibe_zstd_writer_flush(VALUE self) {
     // ZSTD_e_flush: flush internal buffers, making all data readable
     // Loop until remaining == 0 (flush complete)
     do {
+        rb_str_modify(outBuffer);
+        rb_str_resize(outBuffer, (long)outBufferSize);
+        rb_str_set_len(outBuffer, 0);
         ZSTD_outBuffer output = {
             .dst = RSTRING_PTR(outBuffer),
             .size = outBufferSize,
@@ -161,8 +175,7 @@ vibe_zstd_writer_flush(VALUE self) {
         if (output.pos > 0) {
             rb_str_set_len(outBuffer, output.pos);
-            rb_funcall(cstream->io, rb_intern("write"), 1, outBuffer);
-            // No need to resize - buffer capacity remains at outBufferSize
+            rb_funcall(cstream->io, id_write, 1, outBuffer);
         }
     } while (remaining > 0);
@@ -175,7 +188,7 @@ vibe_zstd_writer_finish(VALUE self) {
     TypedData_Get_Struct(self, vibe_zstd_cstream, &vibe_zstd_cstream_type, cstream);
     size_t outBufferSize = ZSTD_CStreamOutSize();
-    VALUE outBuffer = rb_str_buf_new(outBufferSize);
+    VALUE outBuffer = cstream->output_buffer;
     ZSTD_inBuffer input = { NULL, 0, 0 };
     size_t remaining;
@@ -183,6 +196,9 @@ vibe_zstd_writer_finish(VALUE self) {
     // ZSTD_e_end: finalize frame with checksum and epilogue
     // Loop until remaining == 0 (frame complete)
     do {
+        rb_str_modify(outBuffer);
+        rb_str_resize(outBuffer, (long)outBufferSize);
+        rb_str_set_len(outBuffer, 0);
         ZSTD_outBuffer output = {
             .dst = RSTRING_PTR(outBuffer),
             .size = outBufferSize,
@@ -197,8 +213,7 @@ vibe_zstd_writer_finish(VALUE self) {
         if (output.pos > 0) {
             rb_str_set_len(outBuffer, output.pos);
-            rb_funcall(cstream->io, rb_intern("write"), 1, outBuffer);
-            // No need to resize - buffer capacity remains at outBufferSize
+            rb_funcall(cstream->io, id_write, 1, outBuffer);
         }
     } while (remaining > 0);
@@ -216,12 +231,12 @@ vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
     TypedData_Get_Struct(self, vibe_zstd_dstream, &vibe_zstd_dstream_type, dstream);
     // Validate IO object responds to read (duck typing)
-    if (!rb_respond_to(io, rb_intern("read"))) {
+    if (!rb_respond_to(io, id_read)) {
         rb_raise(rb_eTypeError, "IO object must respond to read");
     }
-    // Store IO object
-    dstream->io = io;
+    // Store IO object (write barrier for WB_PROTECTED)
+    RB_OBJ_WRITE(self, &dstream->io, io);
     rb_ivar_set(self, rb_intern("@io"), io);
     // Parse options
@@ -263,7 +278,7 @@ vibe_zstd_reader_initialize(int argc, VALUE *argv, VALUE self) {
     }
     // Initialize input buffer management
-    dstream->input_data = rb_str_new(NULL, 0);
+    RB_OBJ_WRITE(self, &dstream->input_data, rb_str_new(NULL, 0));
     dstream->input.src = NULL;
     dstream->input.size = 0;
     dstream->input.pos = 0;
@@ -317,7 +332,7 @@ vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
     while (total_read < requested_size) {
         // Refill input buffer when all compressed data consumed
         if (dstream->input.pos >= dstream->input.size) {
-            VALUE chunk = rb_funcall(dstream->io, rb_intern("read"), 1, SIZET2NUM(inBufferSize));
+            VALUE chunk = rb_funcall(dstream->io, id_read, 1, SIZET2NUM(inBufferSize));
             if (NIL_P(chunk)) {
                 dstream->eof = 1;
                 if (total_read == 0 && !made_progress) {
@@ -326,8 +341,8 @@ vibe_zstd_reader_read(int argc, VALUE *argv, VALUE self) {
                 break;
             }
-            // Reset input buffer with new data
-            dstream->input_data = chunk;
+            // Reset input buffer with new data (write barrier for WB_PROTECTED)
+            RB_OBJ_WRITE(self, &dstream->input_data, chunk);
             dstream->input.src = RSTRING_PTR(chunk);
             dstream->input.size = RSTRING_LEN(chunk);
             dstream->input.pos = 0;
@@ -394,6 +409,10 @@ vibe_zstd_reader_eof(VALUE self) {
 // Class initialization function called from main Init_vibe_zstd
 void
 vibe_zstd_streaming_init_classes(VALUE rb_cVibeZstdCompressWriter, VALUE rb_cVibeZstdDecompressReader) {
+    // Cache method IDs for frequently called methods
+    id_write = rb_intern("write");
+    id_read = rb_intern("read");
     // CompressWriter setup
     rb_define_alloc_func(rb_cVibeZstdCompressWriter, vibe_zstd_cstream_alloc);
     rb_define_method(rb_cVibeZstdCompressWriter, "initialize", vibe_zstd_writer_initialize, -1);

data/ext/vibe_zstd/vibe_zstd.c CHANGED Viewed

@@ -12,7 +12,7 @@ VALUE rb_cVibeZstdDDict;
 VALUE rb_cVibeZstdCompressWriter;
 VALUE rb_cVibeZstdDecompressReader;
-// Forward declarations for free and mark functions
+// Forward declarations for free, mark, and dsize functions
 static void vibe_zstd_cctx_free(void* ptr);
 static void vibe_zstd_dctx_free(void* ptr);
 static void vibe_zstd_cdict_free(void* ptr);
@@ -22,16 +22,47 @@ static void vibe_zstd_cstream_mark(void* ptr);
 static void vibe_zstd_dstream_free(void* ptr);
 static void vibe_zstd_dstream_mark(void* ptr);
+// dsize callbacks - report memory usage to Ruby GC for accurate memory pressure tracking
+static size_t vibe_zstd_cctx_dsize(const void* ptr) {
+    const vibe_zstd_cctx* cctx = ptr;
+    return sizeof(vibe_zstd_cctx) + (cctx->cctx ? ZSTD_sizeof_CCtx(cctx->cctx) : 0);
+}
+static size_t vibe_zstd_dctx_dsize(const void* ptr) {
+    const vibe_zstd_dctx* dctx = ptr;
+    return sizeof(vibe_zstd_dctx) + (dctx->dctx ? ZSTD_sizeof_DCtx(dctx->dctx) : 0);
+}
+static size_t vibe_zstd_cdict_dsize(const void* ptr) {
+    const vibe_zstd_cdict* cdict = ptr;
+    return sizeof(vibe_zstd_cdict) + (cdict->cdict ? ZSTD_sizeof_CDict(cdict->cdict) : 0);
+}
+static size_t vibe_zstd_ddict_dsize(const void* ptr) {
+    const vibe_zstd_ddict* ddict = ptr;
+    return sizeof(vibe_zstd_ddict) + (ddict->ddict ? ZSTD_sizeof_DDict(ddict->ddict) : 0);
+}
+static size_t vibe_zstd_cstream_dsize(const void* ptr) {
+    const vibe_zstd_cstream* cstream = ptr;
+    return sizeof(vibe_zstd_cstream) + (cstream->cstream ? ZSTD_sizeof_CStream(cstream->cstream) : 0);
+}
+static size_t vibe_zstd_dstream_dsize(const void* ptr) {
+    const vibe_zstd_dstream* dstream = ptr;
+    return sizeof(vibe_zstd_dstream) + (dstream->dstream ? ZSTD_sizeof_DStream(dstream->dstream) : 0);
+}
 // TypedData type definitions (these are referenced by extern in the split files)
 rb_data_type_t vibe_zstd_cctx_type = {
     .wrap_struct_name = "vibe_zstd_cctx",
     .function = {
         .dmark = NULL,
         .dfree = (RUBY_DATA_FUNC)vibe_zstd_cctx_free,
-        .dsize = NULL,
+        .dsize = vibe_zstd_cctx_dsize,
     },
     .data = NULL,
-    .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
 };
 rb_data_type_t vibe_zstd_dctx_type = {
@@ -39,10 +70,10 @@ rb_data_type_t vibe_zstd_dctx_type = {
     .function = {
         .dmark = NULL,
         .dfree = (RUBY_DATA_FUNC)vibe_zstd_dctx_free,
-        .dsize = NULL,
+        .dsize = vibe_zstd_dctx_dsize,
     },
     .data = NULL,
-    .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
 };
 rb_data_type_t vibe_zstd_cdict_type = {
@@ -50,10 +81,10 @@ rb_data_type_t vibe_zstd_cdict_type = {
     .function = {
         .dmark = NULL,
         .dfree = (RUBY_DATA_FUNC)vibe_zstd_cdict_free,
-        .dsize = NULL,
+        .dsize = vibe_zstd_cdict_dsize,
     },
     .data = NULL,
-    .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
 };
 rb_data_type_t vibe_zstd_ddict_type = {
@@ -61,10 +92,10 @@ rb_data_type_t vibe_zstd_ddict_type = {
     .function = {
         .dmark = NULL,
         .dfree = (RUBY_DATA_FUNC)vibe_zstd_ddict_free,
-        .dsize = NULL,
+        .dsize = vibe_zstd_ddict_dsize,
     },
     .data = NULL,
-    .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
 };
 rb_data_type_t vibe_zstd_cstream_type = {
@@ -72,10 +103,10 @@ rb_data_type_t vibe_zstd_cstream_type = {
     .function = {
         .dmark = (RUBY_DATA_FUNC)vibe_zstd_cstream_mark,
         .dfree = (RUBY_DATA_FUNC)vibe_zstd_cstream_free,
-        .dsize = NULL,
+        .dsize = vibe_zstd_cstream_dsize,
     },
     .data = NULL,
-    .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
 };
 rb_data_type_t vibe_zstd_dstream_type = {
@@ -83,10 +114,10 @@ rb_data_type_t vibe_zstd_dstream_type = {
     .function = {
         .dmark = (RUBY_DATA_FUNC)vibe_zstd_dstream_mark,
         .dfree = (RUBY_DATA_FUNC)vibe_zstd_dstream_free,
-        .dsize = NULL,
+        .dsize = vibe_zstd_dstream_dsize,
     },
     .data = NULL,
-    .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
 };
 // Free functions
@@ -130,6 +161,7 @@ static void
 vibe_zstd_cstream_mark(void* ptr) {
     vibe_zstd_cstream* cstream = ptr;
     rb_gc_mark(cstream->io);
+    rb_gc_mark(cstream->output_buffer);
 }
 static void
@@ -200,6 +232,7 @@ vibe_zstd_cstream_alloc(VALUE klass) {
     vibe_zstd_cstream* cstream = ALLOC(vibe_zstd_cstream);
     cstream->cstream = NULL;
     cstream->io = Qnil;
+    cstream->output_buffer = Qnil;
     return TypedData_Wrap_Struct(klass, &vibe_zstd_cstream_type, cstream);
 }
@@ -257,9 +290,8 @@ vibe_zstd_default_c_level(VALUE self) {
 RUBY_FUNC_EXPORTED void
 Init_vibe_zstd(void)
 {
-  // Initialize parameter lookup tables
-  init_cctx_param_table();
-  init_dctx_param_table();
+  // Parameter lookup tables are initialized in vibe_zstd_cctx_init_class()
+  // and vibe_zstd_dctx_init_class() respectively - no need to call here.
   rb_mVibeZstd = rb_define_module("VibeZstd");

data/ext/vibe_zstd/vibe_zstd.h CHANGED Viewed

@@ -26,6 +26,7 @@ typedef struct {
 typedef struct {
     ZSTD_CStream* cstream;
     VALUE io;
+    VALUE output_buffer;  // Reusable output buffer to avoid ~128KB allocation per write/flush/finish
 } vibe_zstd_cstream;
 typedef struct {

data/lib/vibe_zstd/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module VibeZstd
-  VERSION = "1.0.2"
+  VERSION = "1.1.1"
 end

data/lib/vibe_zstd.rb CHANGED Viewed

@@ -39,9 +39,7 @@ module VibeZstd
       # Defense: Prevent infinite loop on malformed data
       # A valid frame must have non-zero size (at minimum: frame header)
-      if frame_size <= 0
-        raise Error, "Invalid frame: zero or negative size at offset #{offset}"
-      end
+      raise Error, "Invalid frame: zero or negative size at offset #{offset}" if frame_size <= 0
       if skippable_frame?(frame_data)
         content, magic_variant = read_skippable_frame(frame_data)
@@ -191,8 +189,14 @@ module VibeZstd
     end
     # Read all remaining data
+    # Drains any buffered data from line_buffer first
     def read_all
       chunks = []
+      # Drain line buffer first if present
+      if @line_buffer && !@line_buffer.empty?
+        chunks << @line_buffer
+        @line_buffer = +""
+      end
       while (chunk = read)
         chunks << chunk
       end
@@ -214,20 +218,29 @@ module VibeZstd
       end
     end
-    # Read a single line (up to newline or EOF)
+    # Read a single line (up to separator or EOF)
+    # Uses buffered reads (8192 bytes) instead of byte-at-a-time for performance.
+    # Orders of magnitude faster for line-oriented reading.
     def gets(sep = $/)
-      return nil if eof?
+      return nil if eof? && (@line_buffer.nil? || @line_buffer.empty?)
-      line = +""
-      until eof?
-        chunk = read(1)
+      @line_buffer ||= +""
+      loop do
+        # Check buffer for separator
+        if (idx = @line_buffer.index(sep))
+          return @line_buffer.slice!(0, idx + sep.bytesize)
+        end
+        # Read more data in larger chunks
+        chunk = read(8192)
         break unless chunk
-        line << chunk
-        break if chunk.end_with?(sep)
+        @line_buffer << chunk
       end
-      line.empty? ? nil : line
+      # Return remaining buffer or nil
+      @line_buffer.empty? ? nil : @line_buffer.slice!(0, @line_buffer.bytesize)
     end
     # Iterate over lines

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: vibe_zstd
 version: !ruby/object:Gem::Version
-  version: 1.0.2
+  version: 1.1.1
 platform: ruby
 authors:
 - Kelley Reynolds
 bindir: exe
 cert_chain: []
-date: 2026-01-20 00:00:00.000000000 Z
+date: 2026-03-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: benchmark-ips