multi_compress 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6af5cef6e9b15c5b0e11a148b0543377c0ad332e710083950bca7cb3d49e70e
4
- data.tar.gz: 6722c74ed13eabcdb3a4a7ca81cd12b9d3f286f11d00e73de759861db198f2a2
3
+ metadata.gz: 51bd49ce7dbb59d7428cec080e9f2e1dc3b83412848b8e30e37eec3dc7f4fbf4
4
+ data.tar.gz: 34bd66cd9e255a5e13bb48f91afb584e2c3f3caefed71bd5c8367f61d6199e7d
5
5
  SHA512:
6
- metadata.gz: bca66c39d081538682abd1a45862f4d141605f028b3f233ec886d21f3b38c84000956275b73303c00fda5755f37796c3bcd8bb533db32a0f12b272107ed298c1
7
- data.tar.gz: 209cebbbbc4be03bd48eea3d59ec1254502db4a8a5e9109b5f03425cc2f05d474ab7f250d2a2ecfaba36f1c6ac9686b159cf0433bb9c50bd84aecffe62ab365e
6
+ metadata.gz: 3c2139458b9ec45769988f3efb3a3165c96801436ca31c7086f6f56a1a0549095d4b493fe239ea1607932b4209315eab076a2ed5fb68ac2249af5175382ec898
7
+ data.tar.gz: 78290aed6265c4915f7eb850646fbdc0e6000a615312a4714c8dbf1aac7d8817e88bcef06b92507d718a2f774601b183c521f05f7c12758bc85d04407afb51f9
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.4]
4
+
5
+ ### Changed
6
+ - Improved one-shot Zstd performance by reusing per-thread CCtx/DCtx.
7
+ - On deterministic arm64-darwin benchmarks, total zstd roundtrip improved by ~11–19% on ~10KB payloads,
8
+ ~1–4% on medium payloads, and ~2–6% on log-like large payloads, with identical compressed sizes.
9
+
3
10
  ## [0.3.3]
4
11
 
5
12
  ### Changed
@@ -6,6 +6,8 @@ USE_SYSTEM = arg_config("--use-system-libraries") ||
6
6
  ENV["COMPRESS_USE_SYSTEM_LIBRARIES"]
7
7
  FORCE_VENDORED = arg_config("--force-vendored") ||
8
8
  ENV["COMPRESS_FORCE_VENDORED"]
9
+ DISABLE_ZSTD_ASM = arg_config("--disable-zstd-asm") ||
10
+ ENV["MULTI_COMPRESS_DISABLE_ZSTD_ASM"] == "1"
9
11
 
10
12
  ZSTD_SUBDIRS = %w[lib/common lib/compress lib/decompress lib/dictBuilder].freeze
11
13
  BROTLI_SUBDIRS = %w[c/common c/enc c/dec].freeze
@@ -53,6 +55,15 @@ def find_compress_c_dir
53
55
  &.then { |path| File.expand_path(path) } || __dir__
54
56
  end
55
57
 
58
+ def zstd_asm_supported?
59
+ case RUBY_PLATFORM
60
+ when /x86_64|amd64/
61
+ !RUBY_PLATFORM.include?("mswin") && !RUBY_PLATFORM.include?("mingw")
62
+ else
63
+ false
64
+ end
65
+ end
66
+
56
67
  def configure_system_libraries
57
68
  puts "Building with SYSTEM libraries"
58
69
 
@@ -98,7 +109,10 @@ def configure_vendored_libraries(vendor_dir)
98
109
  puts " #{all_vendor_srcs.length} vendored C files"
99
110
 
100
111
  add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
101
- $CPPFLAGS += " -DZSTD_DISABLE_ASM"
112
+ if DISABLE_ZSTD_ASM
113
+ $CPPFLAGS += " -DZSTD_DISABLE_ASM"
114
+ puts " ZSTD ASM Huffman decoder disabled (--disable-zstd-asm or MULTI_COMPRESS_DISABLE_ZSTD_ASM=1)"
115
+ end
102
116
 
103
117
  vpath_dirs = build_vpath_dirs(zstd_dir, lz4_dir, brotli_dir)
104
118
 
@@ -106,9 +120,14 @@ def configure_vendored_libraries(vendor_dir)
106
120
 
107
121
  compress_c_dir = find_compress_c_dir
108
122
 
109
- $srcs = ["multi_compress.c"] + all_vendor_srcs.map { |s| File.basename(s) }
123
+ c_srcs = all_vendor_srcs.reject { |s| s.end_with?(".S") }
124
+ asm_srcs = all_vendor_srcs.select { |s| s.end_with?(".S") }
125
+
126
+ $srcs = ["multi_compress.c"] + c_srcs.map { |s| File.basename(s) }
110
127
  $VPATH = [compress_c_dir] + vpath_dirs
111
128
 
129
+ $multi_compress_asm_srcs = asm_srcs
130
+
112
131
  $warnflags = ""
113
132
 
114
133
  vpath_dirs
@@ -117,6 +136,11 @@ end
117
136
  def collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
118
137
  zstd_srcs = ZSTD_SUBDIRS.flat_map { |d| Dir[File.join(zstd_dir, d, "*.c")] }
119
138
 
139
+ unless DISABLE_ZSTD_ASM
140
+ asm = File.join(zstd_dir, "lib", "decompress", "huf_decompress_amd64.S")
141
+ zstd_srcs << asm if File.exist?(asm) && zstd_asm_supported?
142
+ end
143
+
120
144
  lz4_srcs = LZ4_SOURCES.filter_map do |f|
121
145
  path = File.join(lz4_dir, "lib", f)
122
146
  path if File.exist?(path)
@@ -178,6 +202,36 @@ def patch_makefile_vpath!(vpath_dirs)
178
202
  puts " Patched Makefile with #{vpath_dirs.length} VPATH entries"
179
203
  end
180
204
 
205
+ def patch_makefile_asm!(asm_srcs)
206
+ return if asm_srcs.nil? || asm_srcs.empty?
207
+
208
+ makefile = File.read("Makefile")
209
+ return if makefile.include?("# vendored asm")
210
+
211
+ asm_dirs = asm_srcs.map { |s| File.dirname(s) }.uniq
212
+ vpath_lines = asm_dirs.map { |d| "vpath %.S #{d}" }.join("\n")
213
+
214
+ asm_objs = asm_srcs.map { |s| File.basename(s, ".S") + ".o" }
215
+ obj_append = asm_objs.join(" ")
216
+
217
+ unless makefile.sub!(/^(OBJS\s*=\s*[^\n]+?)(\s*)$/) { "#{Regexp.last_match(1)} #{obj_append}#{Regexp.last_match(2)}" }
218
+ makefile << "\nOBJS = #{obj_append}\n"
219
+ end
220
+
221
+ pattern_rule = <<~MAKE
222
+ # vendored asm
223
+ #{vpath_lines}
224
+ %.o: %.S
225
+ \t$(ECHO) compiling $(<)
226
+ \t$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
227
+ MAKE
228
+
229
+ makefile << "\n#{pattern_rule}\n"
230
+
231
+ File.write("Makefile", makefile)
232
+ puts " Patched Makefile with #{asm_srcs.length} ASM source(s): #{asm_objs.join(", ")}"
233
+ end
234
+
181
235
  # --- Main ---
182
236
 
183
237
  VENDOR_DIR = find_vendor_dir
@@ -211,3 +265,4 @@ have_library("pthread") unless RUBY_PLATFORM.include?("darwin")
211
265
  create_makefile("multi_compress/multi_compress")
212
266
 
213
267
  patch_makefile_vpath!(vpath_dirs) if VENDORED && !USE_SYSTEM && vpath_dirs
268
+ patch_makefile_asm!($multi_compress_asm_srcs) if VENDORED && !USE_SYSTEM && $multi_compress_asm_srcs
@@ -95,6 +95,97 @@ typedef enum { LZ4_FORMAT_BLOCK = 0, LZ4_FORMAT_FRAME = 1 } lz4_format_t;
95
95
 
96
96
  #define MC_NUM_ALGOS 3
97
97
 
98
+ static pthread_once_t zstd_tls_once = PTHREAD_ONCE_INIT;
99
+ static pthread_key_t zstd_cctx_key;
100
+ static pthread_key_t zstd_dctx_key;
101
+
102
+ static void zstd_tls_free_cctx(void *ptr) {
103
+ if (ptr)
104
+ ZSTD_freeCCtx((ZSTD_CCtx *)ptr);
105
+ }
106
+
107
+ static void zstd_tls_free_dctx(void *ptr) {
108
+ if (ptr)
109
+ ZSTD_freeDCtx((ZSTD_DCtx *)ptr);
110
+ }
111
+
112
+ static void zstd_tls_init(void) {
113
+ if (pthread_key_create(&zstd_cctx_key, zstd_tls_free_cctx) != 0)
114
+ abort();
115
+ if (pthread_key_create(&zstd_dctx_key, zstd_tls_free_dctx) != 0)
116
+ abort();
117
+ }
118
+
119
+ static ZSTD_CCtx *zstd_tls_get_cctx(void) {
120
+ pthread_once(&zstd_tls_once, zstd_tls_init);
121
+
122
+ ZSTD_CCtx *cctx = (ZSTD_CCtx *)pthread_getspecific(zstd_cctx_key);
123
+ if (cctx)
124
+ return cctx;
125
+
126
+ cctx = ZSTD_createCCtx();
127
+ if (!cctx)
128
+ return NULL;
129
+
130
+ if (pthread_setspecific(zstd_cctx_key, cctx) != 0) {
131
+ ZSTD_freeCCtx(cctx);
132
+ return NULL;
133
+ }
134
+
135
+ return cctx;
136
+ }
137
+
138
+ static ZSTD_DCtx *zstd_tls_get_dctx(void) {
139
+ pthread_once(&zstd_tls_once, zstd_tls_init);
140
+
141
+ ZSTD_DCtx *dctx = (ZSTD_DCtx *)pthread_getspecific(zstd_dctx_key);
142
+ if (dctx)
143
+ return dctx;
144
+
145
+ dctx = ZSTD_createDCtx();
146
+ if (!dctx)
147
+ return NULL;
148
+
149
+ if (pthread_setspecific(zstd_dctx_key, dctx) != 0) {
150
+ ZSTD_freeDCtx(dctx);
151
+ return NULL;
152
+ }
153
+
154
+ return dctx;
155
+ }
156
+
157
+ static size_t zstd_compress_cached(char *dst, size_t dst_cap, const char *src, size_t src_len,
158
+ int level, ZSTD_CDict *cdict, int *ctx_error) {
159
+ *ctx_error = 0;
160
+
161
+ ZSTD_CCtx *cctx = zstd_tls_get_cctx();
162
+ if (!cctx) {
163
+ *ctx_error = 1;
164
+ return 0;
165
+ }
166
+
167
+ if (cdict)
168
+ return ZSTD_compress_usingCDict(cctx, dst, dst_cap, src, src_len, cdict);
169
+
170
+ return ZSTD_compressCCtx(cctx, dst, dst_cap, src, src_len, level);
171
+ }
172
+
173
+ static size_t zstd_decompress_cached(void *dst, size_t dst_cap, const void *src, size_t src_len,
174
+ ZSTD_DDict *ddict, int *ctx_error) {
175
+ *ctx_error = 0;
176
+
177
+ ZSTD_DCtx *dctx = zstd_tls_get_dctx();
178
+ if (!dctx) {
179
+ *ctx_error = 1;
180
+ return 0;
181
+ }
182
+
183
+ if (ddict)
184
+ return ZSTD_decompress_usingDDict(dctx, dst, dst_cap, src, src_len, ddict);
185
+
186
+ return ZSTD_decompressDCtx(dctx, dst, dst_cap, src, src_len);
187
+ }
188
+
98
189
  _Static_assert(ALGO_BROTLI == MC_NUM_ALGOS - 1,
99
190
  "compress_algo_t must be contiguous [0..MC_NUM_ALGOS-1]");
100
191
 
@@ -439,8 +530,6 @@ static inline VALUE rb_binary_str_buf_new(long capa) {
439
530
  static inline VALUE rb_binary_str_buf_reserve(long capa) {
440
531
  VALUE str = rb_str_buf_new(capa);
441
532
  rb_enc_associate(str, binary_encoding);
442
- if (capa > 0)
443
- rb_str_modify_expand(str, capa + 1);
444
533
  return str;
445
534
  }
446
535
 
@@ -689,13 +778,19 @@ static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_pro
689
778
  return bytes_since_yield;
690
779
  }
691
780
 
692
- #define DICT_CDICT_CACHE_SIZE 22
693
- _Static_assert(DICT_CDICT_CACHE_SIZE > 0, "CDict cache needs at least one slot");
781
+ #define DICT_ZSTD_MIN_LEVEL 1
782
+ #define DICT_ZSTD_MAX_LEVEL 22
783
+ #define DICT_CDICT_CACHE_SIZE (DICT_ZSTD_MAX_LEVEL + 1)
784
+ _Static_assert(DICT_CDICT_CACHE_SIZE > DICT_ZSTD_MAX_LEVEL,
785
+ "CDict cache needs one slot for every accepted zstd level");
694
786
 
695
- typedef struct {
696
- int level;
697
- ZSTD_CDict *cdict;
698
- } cdict_cache_entry_t;
787
+ #if defined(__GNUC__) || defined(__clang__)
788
+ #define MC_HAS_ATOMIC_PTR 1
789
+ #define MC_ATOMIC_LOAD_PTR(ptr) __atomic_load_n((ptr), __ATOMIC_ACQUIRE)
790
+ #define MC_ATOMIC_STORE_PTR(ptr, val) __atomic_store_n((ptr), (val), __ATOMIC_RELEASE)
791
+ #else
792
+ #define MC_HAS_ATOMIC_PTR 0
793
+ #endif
699
794
 
700
795
  struct dictionary_s {
701
796
  compress_algo_t algo;
@@ -703,8 +798,7 @@ struct dictionary_s {
703
798
  size_t size;
704
799
  pthread_mutex_t cache_mutex;
705
800
 
706
- cdict_cache_entry_t cdict_cache[DICT_CDICT_CACHE_SIZE];
707
- int cdict_cache_count;
801
+ ZSTD_CDict *cdict_cache[DICT_CDICT_CACHE_SIZE];
708
802
 
709
803
  ZSTD_DDict *ddict;
710
804
  };
@@ -713,9 +807,9 @@ static void dict_free(void *ptr) {
713
807
  dictionary_t *dict = (dictionary_t *)ptr;
714
808
  if (!dict)
715
809
  return;
716
- for (int i = 0; i < dict->cdict_cache_count; i++) {
717
- if (dict->cdict_cache[i].cdict)
718
- ZSTD_freeCDict(dict->cdict_cache[i].cdict);
810
+ for (int i = DICT_ZSTD_MIN_LEVEL; i <= DICT_ZSTD_MAX_LEVEL; i++) {
811
+ if (dict->cdict_cache[i])
812
+ ZSTD_freeCDict(dict->cdict_cache[i]);
719
813
  }
720
814
  if (dict->ddict)
721
815
  ZSTD_freeDDict(dict->ddict);
@@ -732,9 +826,9 @@ static size_t dict_memsize(const void *ptr) {
732
826
 
733
827
  size_t total = sizeof(dictionary_t) + d->size;
734
828
  if (d->algo == ALGO_ZSTD) {
735
- for (int i = 0; i < d->cdict_cache_count; i++) {
736
- if (d->cdict_cache[i].cdict)
737
- total += ZSTD_sizeof_CDict(d->cdict_cache[i].cdict);
829
+ for (int i = DICT_ZSTD_MIN_LEVEL; i <= DICT_ZSTD_MAX_LEVEL; i++) {
830
+ if (d->cdict_cache[i])
831
+ total += ZSTD_sizeof_CDict(d->cdict_cache[i]);
738
832
  }
739
833
  if (d->ddict)
740
834
  total += ZSTD_sizeof_DDict(d->ddict);
@@ -756,17 +850,18 @@ static VALUE dict_alloc(VALUE klass) {
756
850
  }
757
851
 
758
852
  static ZSTD_CDict *dict_get_cdict(dictionary_t *dict, int level) {
759
- ZSTD_CDict *existing = NULL;
853
+ if (MC_UNLIKELY(level < DICT_ZSTD_MIN_LEVEL || level > DICT_ZSTD_MAX_LEVEL))
854
+ rb_raise(eLevelError, "zstd level must be %d..%d, got %d", DICT_ZSTD_MIN_LEVEL,
855
+ DICT_ZSTD_MAX_LEVEL, level);
760
856
 
857
+ ZSTD_CDict *existing;
858
+ #if MC_HAS_ATOMIC_PTR
859
+ existing = MC_ATOMIC_LOAD_PTR(&dict->cdict_cache[level]);
860
+ #else
761
861
  pthread_mutex_lock(&dict->cache_mutex);
762
- for (int i = 0; i < dict->cdict_cache_count; i++) {
763
- if (dict->cdict_cache[i].level == level) {
764
- existing = dict->cdict_cache[i].cdict;
765
- break;
766
- }
767
- }
862
+ existing = dict->cdict_cache[level];
768
863
  pthread_mutex_unlock(&dict->cache_mutex);
769
-
864
+ #endif
770
865
  if (existing)
771
866
  return existing;
772
867
 
@@ -775,34 +870,31 @@ static ZSTD_CDict *dict_get_cdict(dictionary_t *dict, int level) {
775
870
  return NULL;
776
871
 
777
872
  pthread_mutex_lock(&dict->cache_mutex);
778
- for (int i = 0; i < dict->cdict_cache_count; i++) {
779
- if (dict->cdict_cache[i].level == level) {
780
- existing = dict->cdict_cache[i].cdict;
781
- pthread_mutex_unlock(&dict->cache_mutex);
782
- ZSTD_freeCDict(cdict);
783
- return existing;
784
- }
785
- }
786
-
787
- if (dict->cdict_cache_count >= DICT_CDICT_CACHE_SIZE) {
873
+ existing = dict->cdict_cache[level];
874
+ if (!existing) {
875
+ #if MC_HAS_ATOMIC_PTR
876
+ MC_ATOMIC_STORE_PTR(&dict->cdict_cache[level], cdict);
877
+ #else
878
+ dict->cdict_cache[level] = cdict;
879
+ #endif
788
880
  pthread_mutex_unlock(&dict->cache_mutex);
789
- ZSTD_freeCDict(cdict);
790
- rb_raise(eError, "zstd dictionary cdict cache exhausted");
881
+ return cdict;
791
882
  }
792
883
 
793
- dict->cdict_cache[dict->cdict_cache_count].level = level;
794
- dict->cdict_cache[dict->cdict_cache_count].cdict = cdict;
795
- dict->cdict_cache_count++;
796
884
  pthread_mutex_unlock(&dict->cache_mutex);
797
- return cdict;
885
+ ZSTD_freeCDict(cdict);
886
+ return existing;
798
887
  }
799
888
 
800
889
  static ZSTD_DDict *dict_get_ddict(dictionary_t *dict) {
801
890
  ZSTD_DDict *existing;
802
-
891
+ #if MC_HAS_ATOMIC_PTR
892
+ existing = MC_ATOMIC_LOAD_PTR(&dict->ddict);
893
+ #else
803
894
  pthread_mutex_lock(&dict->cache_mutex);
804
895
  existing = dict->ddict;
805
896
  pthread_mutex_unlock(&dict->cache_mutex);
897
+ #endif
806
898
  if (existing)
807
899
  return existing;
808
900
 
@@ -811,12 +903,17 @@ static ZSTD_DDict *dict_get_ddict(dictionary_t *dict) {
811
903
  return NULL;
812
904
 
813
905
  pthread_mutex_lock(&dict->cache_mutex);
814
- if (!dict->ddict) {
906
+ existing = dict->ddict;
907
+ if (!existing) {
908
+ #if MC_HAS_ATOMIC_PTR
909
+ MC_ATOMIC_STORE_PTR(&dict->ddict, created);
910
+ #else
815
911
  dict->ddict = created;
912
+ #endif
816
913
  pthread_mutex_unlock(&dict->cache_mutex);
817
914
  return created;
818
915
  }
819
- existing = dict->ddict;
916
+
820
917
  pthread_mutex_unlock(&dict->cache_mutex);
821
918
  ZSTD_freeDDict(created);
822
919
  return existing;
@@ -835,19 +932,8 @@ typedef struct {
835
932
 
836
933
  static void *zstd_compress_nogvl(void *arg) {
837
934
  zstd_compress_args_t *a = (zstd_compress_args_t *)arg;
838
- if (a->cdict) {
839
- ZSTD_CCtx *cctx = ZSTD_createCCtx();
840
- if (!cctx) {
841
- a->error = 1;
842
- return NULL;
843
- }
844
- a->result =
845
- ZSTD_compress_usingCDict(cctx, a->dst, a->dst_cap, a->src, a->src_len, a->cdict);
846
- ZSTD_freeCCtx(cctx);
847
- } else {
848
- a->result = ZSTD_compress(a->dst, a->dst_cap, a->src, a->src_len, a->level);
849
- }
850
- a->error = 0;
935
+ a->result =
936
+ zstd_compress_cached(a->dst, a->dst_cap, a->src, a->src_len, a->level, a->cdict, &a->error);
851
937
  return NULL;
852
938
  }
853
939
 
@@ -863,19 +949,7 @@ typedef struct {
863
949
 
864
950
  static void *zstd_decompress_nogvl(void *arg) {
865
951
  zstd_decompress_args_t *a = (zstd_decompress_args_t *)arg;
866
- if (a->ddict) {
867
- ZSTD_DCtx *dctx = ZSTD_createDCtx();
868
- if (!dctx) {
869
- a->error = 1;
870
- return NULL;
871
- }
872
- a->result =
873
- ZSTD_decompress_usingDDict(dctx, a->dst, a->dst_cap, a->src, a->src_len, a->ddict);
874
- ZSTD_freeDCtx(dctx);
875
- } else {
876
- a->result = ZSTD_decompress(a->dst, a->dst_cap, a->src, a->src_len);
877
- }
878
- a->error = 0;
952
+ a->result = zstd_decompress_cached(a->dst, a->dst_cap, a->src, a->src_len, a->ddict, &a->error);
879
953
  return NULL;
880
954
  }
881
955
 
@@ -1168,18 +1242,8 @@ static void *brotli_decompress_stream_fiber_nogvl(void *arg) {
1168
1242
 
1169
1243
  static void *zstd_fiber_compress_nogvl(void *arg) {
1170
1244
  zstd_fiber_compress_t *a = (zstd_fiber_compress_t *)arg;
1171
- if (a->cdict) {
1172
- ZSTD_CCtx *cctx = ZSTD_createCCtx();
1173
- if (!cctx) {
1174
- a->error = 1;
1175
- return NULL;
1176
- }
1177
- a->result =
1178
- ZSTD_compress_usingCDict(cctx, a->dst, a->dst_cap, a->src, a->src_len, a->cdict);
1179
- ZSTD_freeCCtx(cctx);
1180
- } else {
1181
- a->result = ZSTD_compress(a->dst, a->dst_cap, a->src, a->src_len, a->level);
1182
- }
1245
+ a->result =
1246
+ zstd_compress_cached(a->dst, a->dst_cap, a->src, a->src_len, a->level, a->cdict, &a->error);
1183
1247
  return NULL;
1184
1248
  }
1185
1249
 
@@ -1226,16 +1290,11 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
1226
1290
 
1227
1291
  if (slen < policy->gvl_unlock_threshold) {
1228
1292
  VALUE dst = rb_binary_str_buf_reserve(bound);
1229
- size_t csize;
1230
- if (cdict) {
1231
- ZSTD_CCtx *cctx = ZSTD_createCCtx();
1232
- if (!cctx)
1233
- rb_raise(eMemError, "zstd: failed to create context");
1234
- csize = ZSTD_compress_usingCDict(cctx, RSTRING_PTR(dst), bound, src, slen, cdict);
1235
- ZSTD_freeCCtx(cctx);
1236
- } else {
1237
- csize = ZSTD_compress(RSTRING_PTR(dst), bound, src, slen, level);
1238
- }
1293
+ int ctx_error = 0;
1294
+ size_t csize =
1295
+ zstd_compress_cached(RSTRING_PTR(dst), bound, src, slen, level, cdict, &ctx_error);
1296
+ if (ctx_error)
1297
+ rb_raise(eMemError, "zstd: failed to create context");
1239
1298
  if (ZSTD_isError(csize))
1240
1299
  rb_raise(eError, "zstd compress: %s", ZSTD_getErrorName(csize));
1241
1300
  rb_str_set_len(dst, (long)csize);
@@ -1416,8 +1475,18 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
1416
1475
  rb_raise(eMemError, "brotli: failed to prepare dictionary");
1417
1476
  }
1418
1477
 
1419
- if (!BrotliEncoderSetParameter(enc, BROTLI_PARAM_QUALITY, level) ||
1420
- !BrotliEncoderAttachPreparedDictionary(enc, pd)) {
1478
+ if (!BrotliEncoderSetParameter(enc, BROTLI_PARAM_QUALITY, level)) {
1479
+ BrotliEncoderDestroyPreparedDictionary(pd);
1480
+ BrotliEncoderDestroyInstance(enc);
1481
+ rb_raise(eError, "brotli: failed to set quality parameter");
1482
+ }
1483
+ if (!BrotliEncoderSetParameter(enc, BROTLI_PARAM_SIZE_HINT,
1484
+ slen > UINT32_MAX ? UINT32_MAX : (uint32_t)slen)) {
1485
+ BrotliEncoderDestroyPreparedDictionary(pd);
1486
+ BrotliEncoderDestroyInstance(enc);
1487
+ rb_raise(eError, "brotli: failed to set size hint parameter");
1488
+ }
1489
+ if (!BrotliEncoderAttachPreparedDictionary(enc, pd)) {
1421
1490
  BrotliEncoderDestroyPreparedDictionary(pd);
1422
1491
  BrotliEncoderDestroyInstance(enc);
1423
1492
  rb_raise(eError, "brotli: failed to attach dictionary");
@@ -1576,20 +1645,19 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1576
1645
  } else {
1577
1646
  VALUE dst = rb_binary_str_buf_reserve((size_t)frame_size);
1578
1647
 
1648
+ ZSTD_DDict *ddict = NULL;
1579
1649
  if (dict) {
1580
- ZSTD_DDict *ddict = dict_get_ddict(dict);
1650
+ ddict = dict_get_ddict(dict);
1581
1651
  if (!ddict)
1582
1652
  rb_raise(eMemError, "zstd: failed to create ddict");
1583
- ZSTD_DCtx *dctx = ZSTD_createDCtx();
1584
- if (!dctx)
1585
- rb_raise(eMemError, "zstd: failed to create dctx");
1586
- dsize = ZSTD_decompress_usingDDict(dctx, RSTRING_PTR(dst), (size_t)frame_size,
1587
- src, slen, ddict);
1588
- ZSTD_freeDCtx(dctx);
1589
- } else {
1590
- dsize = ZSTD_decompress(RSTRING_PTR(dst), (size_t)frame_size, src, slen);
1591
1653
  }
1592
1654
 
1655
+ int ctx_error = 0;
1656
+ dsize = zstd_decompress_cached(RSTRING_PTR(dst), (size_t)frame_size, src, slen,
1657
+ ddict, &ctx_error);
1658
+ if (ctx_error)
1659
+ rb_raise(eMemError, "zstd: failed to create dctx");
1660
+
1593
1661
  if (ZSTD_isError(dsize))
1594
1662
  rb_raise(eDataError, "zstd decompress: %s", ZSTD_getErrorName(dsize));
1595
1663
  enforce_output_and_ratio_limits(dsize, slen, limits.max_output_size,
@@ -1601,16 +1669,21 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1601
1669
  }
1602
1670
  }
1603
1671
 
1604
- ZSTD_DCtx *dctx = ZSTD_createDCtx();
1672
+ ZSTD_DCtx *dctx = zstd_tls_get_dctx();
1605
1673
  if (!dctx)
1606
1674
  rb_raise(eMemError, "zstd: failed to create dctx");
1607
1675
 
1676
+ {
1677
+ size_t r = ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
1678
+ if (ZSTD_isError(r))
1679
+ rb_raise(eError, "zstd dctx reset: %s", ZSTD_getErrorName(r));
1680
+ }
1681
+
1608
1682
  if (dict) {
1609
1683
  ZSTD_DDict *ddict = dict_get_ddict(dict);
1610
1684
  if (ddict) {
1611
1685
  size_t r = ZSTD_DCtx_refDDict(dctx, ddict);
1612
1686
  if (ZSTD_isError(r)) {
1613
- ZSTD_freeDCtx(dctx);
1614
1687
  rb_raise(eError, "zstd dict ref: %s", ZSTD_getErrorName(r));
1615
1688
  }
1616
1689
  }
@@ -1629,7 +1702,6 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1629
1702
  while (input.pos < input.size) {
1630
1703
  if (total_out >= alloc_size) {
1631
1704
  if (alloc_size >= limits.max_output_size) {
1632
- ZSTD_freeDCtx(dctx);
1633
1705
  rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1634
1706
  limits.max_output_size);
1635
1707
  }
@@ -1642,7 +1714,6 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1642
1714
 
1643
1715
  size_t remaining_budget = limits.max_output_size - total_out;
1644
1716
  if (remaining_budget == 0) {
1645
- ZSTD_freeDCtx(dctx);
1646
1717
  rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1647
1718
  limits.max_output_size);
1648
1719
  }
@@ -1654,7 +1725,6 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1654
1725
  ZSTD_outBuffer output = {RSTRING_PTR(dst) + total_out, out_cap, 0};
1655
1726
  size_t ret = ZSTD_decompressStream(dctx, &output, &input);
1656
1727
  if (ZSTD_isError(ret)) {
1657
- ZSTD_freeDCtx(dctx);
1658
1728
  rb_raise(eDataError, "zstd decompress: %s", ZSTD_getErrorName(ret));
1659
1729
  }
1660
1730
  total_out = checked_add_size(total_out, output.pos,
@@ -1665,7 +1735,6 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1665
1735
  break;
1666
1736
  }
1667
1737
 
1668
- ZSTD_freeDCtx(dctx);
1669
1738
  rb_str_set_len(dst, total_out);
1670
1739
  RB_GC_GUARD(data);
1671
1740
  RB_GC_GUARD(dict_val);
@@ -1875,7 +1944,7 @@ static void crc32_init_tables(void) {
1875
1944
  for (uint32_t i = 0; i < 256; i++) {
1876
1945
  uint32_t crc = i;
1877
1946
  for (int j = 0; j < 8; j++) {
1878
- crc = (crc >> 1) ^ (0xEDB88320 & (-(int32_t)(crc & 1)));
1947
+ crc = (crc >> 1) ^ (0xEDB88320u & (0u - (crc & 1u)));
1879
1948
  }
1880
1949
  crc32_tables[0][i] = crc;
1881
1950
  }
@@ -2189,6 +2258,10 @@ static VALUE lz4_compress_ring_block(deflater_t *d) {
2189
2258
 
2190
2259
  write_le_u32((uint8_t *)out, (uint32_t)src_size);
2191
2260
 
2261
+ /* Keep blocks independently decodable. Switching to LZ4_*_continue would
2262
+ * require a coordinated format/decoder change that preserves dictionaries
2263
+ * across blocks.
2264
+ */
2192
2265
  int csize;
2193
2266
  if (d->level > 1) {
2194
2267
  csize = LZ4_compress_HC(block_start, out + 8, src_size, bound, d->level);
@@ -2979,13 +3052,24 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2979
3052
  }
2980
3053
  case ALGO_LZ4: {
2981
3054
  size_t data_len = inf->lz4_buf.len - inf->lz4_buf.offset;
2982
- size_t needed = data_len + slen;
3055
+ size_t needed =
3056
+ checked_add_size(data_len, slen, "lz4 stream input buffer exceeds representable size");
2983
3057
 
2984
- if (inf->lz4_buf.offset > 0 && needed > inf->lz4_buf.cap) {
2985
- if (data_len > 0)
2986
- memmove(inf->lz4_buf.buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
2987
- inf->lz4_buf.offset = 0;
2988
- inf->lz4_buf.len = data_len;
3058
+ if (needed > inf->lz4_buf.cap) {
3059
+ size_t new_cap = needed > SIZE_MAX / 2 ? needed : needed * 2;
3060
+ if (inf->lz4_buf.offset > 0) {
3061
+ char *new_buf = ALLOC_N(char, new_cap);
3062
+ if (data_len > 0)
3063
+ memcpy(new_buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
3064
+ xfree(inf->lz4_buf.buf);
3065
+ inf->lz4_buf.buf = new_buf;
3066
+ inf->lz4_buf.offset = 0;
3067
+ inf->lz4_buf.len = data_len;
3068
+ inf->lz4_buf.cap = new_cap;
3069
+ } else {
3070
+ REALLOC_N(inf->lz4_buf.buf, char, new_cap);
3071
+ inf->lz4_buf.cap = new_cap;
3072
+ }
2989
3073
  } else if (inf->lz4_buf.offset > inf->lz4_buf.cap / 2) {
2990
3074
  if (data_len > 0)
2991
3075
  memmove(inf->lz4_buf.buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
@@ -2993,11 +3077,6 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2993
3077
  inf->lz4_buf.len = data_len;
2994
3078
  }
2995
3079
 
2996
- needed = inf->lz4_buf.len + slen;
2997
- if (needed > inf->lz4_buf.cap) {
2998
- inf->lz4_buf.cap = needed * 2;
2999
- REALLOC_N(inf->lz4_buf.buf, char, inf->lz4_buf.cap);
3000
- }
3001
3080
  memcpy(inf->lz4_buf.buf + inf->lz4_buf.len, src, slen);
3002
3081
  inf->lz4_buf.len += slen;
3003
3082
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module MultiCompress
4
- VERSION = "0.3.3"
4
+ VERSION = "0.3.4"
5
5
  end
@@ -97,17 +97,19 @@ module MultiCompress
97
97
  end
98
98
 
99
99
  def self.zstd(data, level: nil)
100
- compress(data, algo: :zstd, **level_opts(level))
100
+ compress(data, algo: :zstd, level: level)
101
101
  end
102
102
 
103
103
  def self.lz4(data, level: nil, format: nil)
104
- opts = level_opts(level)
105
- opts[:format] = format if format
106
- compress(data, algo: :lz4, **opts)
104
+ if format
105
+ compress(data, algo: :lz4, level: level, format: format)
106
+ else
107
+ compress(data, algo: :lz4, level: level)
108
+ end
107
109
  end
108
110
 
109
111
  def self.brotli(data, level: nil)
110
- compress(data, algo: :brotli, **level_opts(level))
112
+ compress(data, algo: :brotli, level: level)
111
113
  end
112
114
 
113
115
  def self.decompress(data, **opts)
@@ -119,9 +121,11 @@ module MultiCompress
119
121
  end
120
122
 
121
123
  def self.lz4_decompress(data, format: nil)
122
- opts = { algo: :lz4 }
123
- opts[:format] = format if format
124
- decompress(data, **opts)
124
+ if format
125
+ decompress(data, algo: :lz4, format: format)
126
+ else
127
+ decompress(data, algo: :lz4)
128
+ end
125
129
  end
126
130
 
127
131
  def self.brotli_decompress(data)
@@ -132,17 +136,13 @@ module MultiCompress
132
136
  EXTENSION_MAP[File.extname(path).downcase]
133
137
  end
134
138
 
135
- def self.level_opts(level)
136
- level ? { level: level } : {}
137
- end
138
-
139
139
  def self.resolved_one_shot_options(opts)
140
- resolved = opts.dup
141
- resolved[:max_output_size] = config.max_output_size unless resolved.key?(:max_output_size)
142
- resolved
140
+ return opts.merge(max_output_size: config.max_output_size) unless opts.key?(:max_output_size)
141
+
142
+ opts
143
143
  end
144
144
 
145
- private_class_method :level_opts, :resolved_one_shot_options
145
+ private_class_method :resolved_one_shot_options
146
146
 
147
147
  module InflaterDefaults
148
148
  def initialize(*args, **opts)
@@ -248,6 +248,7 @@ module MultiCompress
248
248
 
249
249
  class Reader
250
250
  CHUNK_SIZE = 8192
251
+ BUFFER_COMPACT_THRESHOLD = 64 * 1024
251
252
 
252
253
  def self.open(path_or_io, algo: nil, dictionary: nil, **opts, &block)
253
254
  io, algo, owned = resolve_io(path_or_io, algo, mode: "rb")
@@ -264,12 +265,13 @@ module MultiCompress
264
265
  end
265
266
 
266
267
  def initialize(io, algo: nil, dictionary: nil, **opts)
267
- @io = io
268
- @inflater = Inflater.new(algo: algo, dictionary: dictionary, **opts)
269
- @closed = false
270
- @owned_io = false
271
- @buffer = +""
272
- @eof = false
268
+ @io = io
269
+ @inflater = Inflater.new(algo: algo, dictionary: dictionary, **opts)
270
+ @closed = false
271
+ @owned_io = false
272
+ @buffer = +"".b
273
+ @buffer_pos = 0
274
+ @eof = false
273
275
  end
274
276
 
275
277
  def read(length = nil)
@@ -281,12 +283,12 @@ module MultiCompress
281
283
 
282
284
  def gets(separator = "\n")
283
285
  ensure_open!
284
- return nil if @eof && @buffer.empty?
286
+ return nil if @eof && buffer_empty?
285
287
 
286
- fill_buffer_until { @buffer.include?(separator) }
288
+ fill_buffer_until { buffer_includes?(separator) }
287
289
 
288
- return extract_line(separator) if @buffer.include?(separator)
289
- return consume_buffer unless @buffer.empty?
290
+ return extract_line(separator) if buffer_includes?(separator)
291
+ return consume_buffer unless buffer_empty?
290
292
 
291
293
  nil
292
294
  end
@@ -305,7 +307,7 @@ module MultiCompress
305
307
  end
306
308
 
307
309
  def eof?
308
- @eof && @buffer.empty?
310
+ @eof && buffer_empty?
309
311
  end
310
312
 
311
313
  def each_line
@@ -334,11 +336,44 @@ module MultiCompress
334
336
  raise StreamError, "reader is closed" if @closed
335
337
  end
336
338
 
339
+ def buffer_size
340
+ @buffer.bytesize - @buffer_pos
341
+ end
342
+
343
+ def buffer_empty?
344
+ @buffer_pos >= @buffer.bytesize
345
+ end
346
+
347
+ def buffer_append(data)
348
+ compact_buffer_if_needed
349
+ @buffer << data
350
+ end
351
+
352
+ def compact_buffer_if_needed
353
+ return if @buffer_pos == 0
354
+
355
+ total = @buffer.bytesize
356
+ return unless @buffer_pos >= BUFFER_COMPACT_THRESHOLD && @buffer_pos * 2 >= total
357
+
358
+ @buffer = @buffer.byteslice(@buffer_pos, total - @buffer_pos)
359
+ @buffer_pos = 0
360
+ end
361
+
362
+ def buffer_includes?(separator)
363
+ idx = @buffer.index(separator, @buffer_pos)
364
+ !idx.nil?
365
+ end
366
+
337
367
  def read_all
338
- return nil if @eof && @buffer.empty?
368
+ return nil if @eof && buffer_empty?
339
369
 
340
- result = @buffer.dup
370
+ result = if buffer_empty?
371
+ +"".b
372
+ else
373
+ @buffer.byteslice(@buffer_pos, @buffer.bytesize - @buffer_pos) || +"".b
374
+ end
341
375
  @buffer.clear
376
+ @buffer_pos = 0
342
377
 
343
378
  until @eof
344
379
  chunk = read_compressed_chunk
@@ -355,15 +390,16 @@ module MultiCompress
355
390
  end
356
391
 
357
392
  def read_exactly(length)
358
- return nil if @eof && @buffer.empty?
393
+ return nil if @eof && buffer_empty?
359
394
 
360
- fill_buffer_until { @buffer.bytesize >= length }
395
+ fill_buffer_until { buffer_size >= length }
361
396
 
362
- if @buffer.bytesize >= length
363
- result = @buffer[0, length]
364
- @buffer = @buffer[length..]
397
+ if buffer_size >= length
398
+ result = @buffer.byteslice(@buffer_pos, length)
399
+ @buffer_pos += length
400
+ compact_buffer_if_needed
365
401
  result
366
- elsif !@buffer.empty?
402
+ elsif !buffer_empty?
367
403
  consume_buffer
368
404
  end
369
405
  end
@@ -376,20 +412,23 @@ module MultiCompress
376
412
  break
377
413
  end
378
414
  decompressed = @inflater.write(chunk)
379
- @buffer << decompressed if decompressed
415
+ buffer_append(decompressed) if decompressed
380
416
  end
381
417
  end
382
418
 
383
419
  def extract_line(separator)
384
- idx = @buffer.index(separator)
385
- result = @buffer[0, idx + separator.bytesize]
386
- @buffer = @buffer[(idx + separator.bytesize)..] || +""
420
+ idx = @buffer.index(separator, @buffer_pos)
421
+ end_pos = idx + separator.bytesize
422
+ result = @buffer.byteslice(@buffer_pos, end_pos - @buffer_pos)
423
+ @buffer_pos = end_pos
424
+ compact_buffer_if_needed
387
425
  result
388
426
  end
389
427
 
390
428
  def consume_buffer
391
- result = @buffer
392
- @buffer = +""
429
+ result = @buffer.byteslice(@buffer_pos, @buffer.bytesize - @buffer_pos) || +"".b
430
+ @buffer.clear
431
+ @buffer_pos = 0
393
432
  result
394
433
  end
395
434
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multi_compress
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roman Haydarov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-30 00:00:00.000000000 Z
11
+ date: 2026-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler