multi_compress 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f801c58708b34a23e3348b12d482ce3afbbf12208bdfa7d8454b20d4afbf55f4
4
- data.tar.gz: 666eee26931519bf0069d451767053bc87cf91cede44cfe69a87408dc4b08736
3
+ metadata.gz: 23ab6bacd75b21b5cfbf5b7b3121428c18060bd9e097f815438af6c8bafd8883
4
+ data.tar.gz: 90321202358a43bb732077aa1f28b8a9e5d756e27067df851671883a21eb1470
5
5
  SHA512:
6
- metadata.gz: 39c03d5ce59b250947c9c91a61926d5ac2bdad8193cdd0b5d38bdb639e7ad38921a26e439729c0ea55cebe133a3724bd3010e3d83147aa4ce00a66909f34d7e6
7
- data.tar.gz: d633ab7a2257c71d741b66c598aef4924ca99ef62dbaa01185aa1efdaaae93e7ffcaaec0205a300bc6291e279d8fafc34560a203e71da3c9ae6825c69819faa6
6
+ metadata.gz: 431921d63b8757216df9179dfd5d792103777dbf597d52cfafeb2e593ee8ab52ce12f374499826b0eba45a164bfde0f4adaa3aa3ba9ee811c6d9f44ed2ccdb7d
7
+ data.tar.gz: 2e1d3bf4455fd627696c7ff7a500e6424497df012939487d39896b8e50dad421c8acd8b427f10fc3de6a51b799ca5b1532cda5508485440e4f633a5ac5677a1d
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.2]
4
+
5
+ ### Changed
6
+ - Micro-optimization
7
+
8
+ ## [0.3.1]
9
+
10
+ ### Fixed
11
+ - Data race fix
12
+
3
13
  ## [0.3.0]
4
14
 
5
15
  ### Added
data/README.md CHANGED
@@ -106,9 +106,9 @@ Performance comparison against Ruby's built-in zlib compression (200 iterations
106
106
  │ Medium JSON (~370KB, no GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5% │
107
107
  │ Medium logs (~168KB, no GC) │ 8.6% │ 17.2% │ 5.4% │ 3.2% │
108
108
  │ Large JSON (~1.6MB, GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
109
- │ Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.8% │ 2.1% │
109
+ │ Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.9% │ 2.0% │
110
110
  │ Large JSON (~1.6MB, no GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
111
- │ Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.8% │ 2.1% │
111
+ │ Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.9% │ 2.0% │
112
112
  └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
113
113
  ```
114
114
 
@@ -117,18 +117,18 @@ Performance comparison against Ruby's built-in zlib compression (200 iterations
117
117
  ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
118
118
  │ Configuration │ zlib │ lz4 │ zstd │ brotli │
119
119
  ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
120
- │ Small JSON (~10KB, GC) │ 0.05 │ 0.01 │ 0.02 │ 0.14
121
- │ Small text (~10KB, GC) │ 0.04 │ 0.00 │ 0.01 │ 0.11
120
+ │ Small JSON (~10KB, GC) │ 0.05 │ 0.01 │ 0.02 │ 0.12
121
+ │ Small text (~10KB, GC) │ 0.03 │ 0.00 │ 0.01 │ 0.09
122
122
  │ Small JSON (~10KB, no GC) │ 0.06 │ 0.01 │ 0.02 │ 0.13 │
123
- │ Small text (~10KB, no GC) │ 0.04 │ 0.00 │ 0.01 │ 0.11 │
124
- │ Medium JSON (~370KB, GC) │ 2.73 │ 0.29 │ 0.42 │ 2.36
125
- │ Medium logs (~168KB, GC) │ 1.23 │ 0.14 │ 0.18 │ 0.92
126
- │ Medium JSON (~370KB, no GC) │ 2.72 │ 0.28 │ 0.41 │ 2.41
127
- │ Medium logs (~168KB, no GC) │ 1.26 │ 0.13 │ 0.18 │ 0.96
128
- │ Large JSON (~1.6MB, GC) │ 12.44 │ 1.38 │ 1.9612.44
129
- │ Large logs (~600KB, GC) │ 4.29 │ 0.46 │ 0.49 │ 2.85
130
- │ Large JSON (~1.6MB, no GC) │ 12.22 │ 1.28 │ 1.86 │ 11.83
131
- │ Large logs (~600KB, no GC) │ 4.39 │ 0.42 │ 0.44 │ 2.86
123
+ │ Small text (~10KB, no GC) │ 0.03 │ 0.00 │ 0.01 │ 0.11 │
124
+ │ Medium JSON (~370KB, GC) │ 2.62 │ 0.28 │ 0.39 │ 2.31
125
+ │ Medium logs (~168KB, GC) │ 1.23 │ 0.13 │ 0.18 │ 0.88
126
+ │ Medium JSON (~370KB, no GC) │ 2.65 │ 0.27 │ 0.40 │ 2.31
127
+ │ Medium logs (~168KB, no GC) │ 1.27 │ 0.13 │ 0.18 │ 0.95
128
+ │ Large JSON (~1.6MB, GC) │ 11.70 │ 1.36 │ 1.9311.95
129
+ │ Large logs (~600KB, GC) │ 4.10 │ 0.45 │ 0.45 │ 2.62
130
+ │ Large JSON (~1.6MB, no GC) │ 11.47 │ 1.27 │ 1.88 │ 11.47
131
+ │ Large logs (~600KB, no GC) │ 4.06 │ 0.41 │ 0.45 │ 2.79
132
132
  └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
133
133
  ```
134
134
 
@@ -137,18 +137,18 @@ Performance comparison against Ruby's built-in zlib compression (200 iterations
137
137
  ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
138
138
  │ Configuration │ zlib │ lz4 │ zstd │ brotli │
139
139
  ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
140
- │ Small JSON (~10KB, GC) │ 1.00x │ 5.00x │ 2.50x │ 0.36x
141
- │ Small text (~10KB, GC) │ 1.00x │ N/A │ 4.00x │ 0.36x
140
+ │ Small JSON (~10KB, GC) │ 1.00x │ 5.00x │ 2.50x │ 0.42x
141
+ │ Small text (~10KB, GC) │ 1.00x │ N/A │ 3.00x │ 0.33x
142
142
  │ Small JSON (~10KB, no GC) │ 1.00x │ 6.00x │ 3.00x │ 0.46x │
143
- │ Small text (~10KB, no GC) │ 1.00x │ N/A │ 4.00x │ 0.36x
144
- │ Medium JSON (~370KB, GC) │ 1.00x │ 9.41x │ 6.50x │ 1.16x
145
- │ Medium logs (~168KB, GC) │ 1.00x │ 8.79x │ 6.83x │ 1.34x
146
- │ Medium JSON (~370KB, no GC) │ 1.00x │ 9.71x │ 6.63x │ 1.13x
147
- │ Medium logs (~168KB, no GC) │ 1.00x │ 9.69x │ 7.00x │ 1.31x
148
- │ Large JSON (~1.6MB, GC) │ 1.00x │ 9.01x │ 6.35x1.00x
149
- │ Large logs (~600KB, GC) │ 1.00x │ 9.33x8.76x │ 1.51x
150
- │ Large JSON (~1.6MB, no GC) │ 1.00x │ 9.55x │ 6.57x │ 1.03x
151
- │ Large logs (~600KB, no GC) │ 1.00x │ 10.45x │ 9.98x │ 1.53x
143
+ │ Small text (~10KB, no GC) │ 1.00x │ N/A │ 3.00x │ 0.27x
144
+ │ Medium JSON (~370KB, GC) │ 1.00x │ 9.36x │ 6.72x │ 1.13x
145
+ │ Medium logs (~168KB, GC) │ 1.00x │ 9.46x │ 6.83x │ 1.40x
146
+ │ Medium JSON (~370KB, no GC) │ 1.00x │ 9.81x │ 6.62x │ 1.15x
147
+ │ Medium logs (~168KB, no GC) │ 1.00x │ 9.77x │ 7.06x │ 1.34x
148
+ │ Large JSON (~1.6MB, GC) │ 1.00x │ 8.60x │ 6.06x0.98x
149
+ │ Large logs (~600KB, GC) │ 1.00x │ 9.11x9.11x │ 1.56x
150
+ │ Large JSON (~1.6MB, no GC) │ 1.00x │ 9.03x │ 6.10x │ 1.00x
151
+ │ Large logs (~600KB, no GC) │ 1.00x │ 9.90x │ 9.02x │ 1.46x
152
152
  └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
153
153
  ```
154
154
 
@@ -137,12 +137,94 @@ static void init_id_cache(void) {
137
137
  sym_cache.max_ratio = ID2SYM(id_cache.max_ratio);
138
138
  }
139
139
 
140
- static inline VALUE opt_get(VALUE opts, VALUE sym) {
141
- return NIL_P(opts) ? Qnil : rb_hash_aref(opts, sym);
140
+ typedef struct {
141
+ VALUE algo;
142
+ VALUE level;
143
+ VALUE dictionary;
144
+ VALUE size;
145
+ VALUE format;
146
+ VALUE max_output_size;
147
+ VALUE max_ratio;
148
+ int saw_algorithm_keyword;
149
+ } mc_opts_t;
150
+
151
+ static inline void mc_opts_init(mc_opts_t *opts) {
152
+ opts->algo = Qnil;
153
+ opts->level = Qnil;
154
+ opts->dictionary = Qnil;
155
+ opts->size = Qnil;
156
+ opts->format = Qnil;
157
+ opts->max_output_size = Qundef;
158
+ opts->max_ratio = Qundef;
159
+ opts->saw_algorithm_keyword = 0;
160
+ }
161
+
162
+ static int mc_opts_parse_i(VALUE key, VALUE value, VALUE arg) {
163
+ mc_opts_t *opts = (mc_opts_t *)arg;
164
+
165
+ if (!SYMBOL_P(key))
166
+ return ST_CONTINUE;
167
+
168
+ ID id = SYM2ID(key);
169
+ if (id == id_cache.algo) {
170
+ opts->algo = value;
171
+ } else if (id == id_cache.level) {
172
+ opts->level = value;
173
+ } else if (id == id_cache.dictionary) {
174
+ opts->dictionary = value;
175
+ } else if (id == id_cache.size) {
176
+ opts->size = value;
177
+ } else if (id == id_cache.format) {
178
+ opts->format = value;
179
+ } else if (id == id_cache.max_output_size) {
180
+ opts->max_output_size = value;
181
+ } else if (id == id_cache.max_ratio) {
182
+ opts->max_ratio = value;
183
+ } else if (id == id_cache.algorithm) {
184
+ opts->saw_algorithm_keyword = 1;
185
+ }
186
+
187
+ return ST_CONTINUE;
188
+ }
189
+
190
+ static inline void mc_parse_opts(VALUE opts_hash, mc_opts_t *opts) {
191
+ mc_opts_init(opts);
192
+ if (NIL_P(opts_hash))
193
+ return;
194
+ Check_Type(opts_hash, T_HASH);
195
+ rb_hash_foreach(opts_hash, mc_opts_parse_i, (VALUE)opts);
142
196
  }
143
197
 
144
- static inline VALUE opt_lookup2(VALUE opts, VALUE sym, VALUE default_value) {
145
- return NIL_P(opts) ? default_value : rb_hash_lookup2(opts, sym, default_value);
198
+ static inline void scan_one_required_keywords(int argc, VALUE *argv, VALUE *arg, VALUE *opts) {
199
+ if (argc == 1) {
200
+ *arg = argv[0];
201
+ *opts = Qnil;
202
+ return;
203
+ }
204
+
205
+ if (argc == 2 && rb_keyword_given_p()) {
206
+ *arg = argv[0];
207
+ *opts = argv[1];
208
+ Check_Type(*opts, T_HASH);
209
+ return;
210
+ }
211
+
212
+ rb_error_arity(argc, 1, 1);
213
+ }
214
+
215
+ static inline void scan_zero_required_keywords(int argc, VALUE *argv, VALUE *opts) {
216
+ if (argc == 0) {
217
+ *opts = Qnil;
218
+ return;
219
+ }
220
+
221
+ if (argc == 1 && rb_keyword_given_p()) {
222
+ *opts = argv[0];
223
+ Check_Type(*opts, T_HASH);
224
+ return;
225
+ }
226
+
227
+ rb_error_arity(argc, 0, 0);
146
228
  }
147
229
 
148
230
  enum { LZ4_FRAME_MAGIC_LEN = 4 };
@@ -152,8 +234,9 @@ static inline int is_lz4_frame_magic(const uint8_t *data, size_t len) {
152
234
  return len >= LZ4_FRAME_MAGIC_LEN && memcmp(data, LZ4_FRAME_MAGIC, LZ4_FRAME_MAGIC_LEN) == 0;
153
235
  }
154
236
 
155
- static lz4_format_t parse_lz4_format(VALUE opts, compress_algo_t algo, int explicit_algo) {
156
- VALUE format_val = opt_lookup2(opts, sym_cache.format, Qundef);
237
+ static lz4_format_t parse_lz4_format(const mc_opts_t *opts, compress_algo_t algo,
238
+ int explicit_algo) {
239
+ VALUE format_val = opts->format;
157
240
  if (format_val == Qundef || NIL_P(format_val))
158
241
  return LZ4_FORMAT_BLOCK;
159
242
  if (explicit_algo && algo != ALGO_LZ4)
@@ -169,10 +252,8 @@ static lz4_format_t parse_lz4_format(VALUE opts, compress_algo_t algo, int expli
169
252
  return LZ4_FORMAT_BLOCK;
170
253
  }
171
254
 
172
- static inline void reject_algorithm_keyword(VALUE opts) {
173
- if (NIL_P(opts))
174
- return;
175
- if (rb_hash_lookup2(opts, sym_cache.algorithm, Qundef) != Qundef) {
255
+ static inline void reject_algorithm_keyword(const mc_opts_t *opts) {
256
+ if (opts->saw_algorithm_keyword) {
176
257
  rb_raise(rb_eArgError, "unknown keyword: :algorithm (use :algo)");
177
258
  }
178
259
  }
@@ -383,11 +464,8 @@ static void limits_config_init(limits_config_t *limits) {
383
464
  limits->max_ratio = DEFAULT_MAX_RATIO;
384
465
  }
385
466
 
386
- static void limits_config_apply_opts(VALUE opts, limits_config_t *limits) {
387
- if (NIL_P(opts))
388
- return;
389
-
390
- VALUE val = opt_lookup2(opts, sym_cache.max_output_size, Qundef);
467
+ static void limits_config_apply_parsed(const mc_opts_t *opts, limits_config_t *limits) {
468
+ VALUE val = opts->max_output_size;
391
469
  if (val != Qundef && !NIL_P(val)) {
392
470
  size_t max_output_size = NUM2SIZET(val);
393
471
  if (max_output_size == 0)
@@ -395,7 +473,7 @@ static void limits_config_apply_opts(VALUE opts, limits_config_t *limits) {
395
473
  limits->max_output_size = max_output_size;
396
474
  }
397
475
 
398
- val = opt_lookup2(opts, sym_cache.max_ratio, Qundef);
476
+ val = opts->max_ratio;
399
477
  if (val == Qundef)
400
478
  return;
401
479
  if (NIL_P(val)) {
@@ -411,9 +489,9 @@ static void limits_config_apply_opts(VALUE opts, limits_config_t *limits) {
411
489
  limits->max_ratio = max_ratio;
412
490
  }
413
491
 
414
- static void parse_limits_from_opts(VALUE opts, limits_config_t *limits) {
492
+ static void parse_limits_from_parsed_opts(const mc_opts_t *opts, limits_config_t *limits) {
415
493
  limits_config_init(limits);
416
- limits_config_apply_opts(opts, limits);
494
+ limits_config_apply_parsed(opts, limits);
417
495
  }
418
496
 
419
497
  static inline size_t checked_add_size(size_t left, size_t right, const char *message) {
@@ -611,7 +689,7 @@ static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_pro
611
689
  return bytes_since_yield;
612
690
  }
613
691
 
614
- #define DICT_CDICT_CACHE_SIZE 4
692
+ #define DICT_CDICT_CACHE_SIZE 22
615
693
  _Static_assert(DICT_CDICT_CACHE_SIZE > 0, "CDict cache needs at least one slot");
616
694
 
617
695
  typedef struct {
@@ -623,6 +701,7 @@ struct dictionary_s {
623
701
  compress_algo_t algo;
624
702
  uint8_t *data;
625
703
  size_t size;
704
+ pthread_mutex_t cache_mutex;
626
705
 
627
706
  cdict_cache_entry_t cdict_cache[DICT_CDICT_CACHE_SIZE];
628
707
  int cdict_cache_count;
@@ -632,12 +711,15 @@ struct dictionary_s {
632
711
 
633
712
  static void dict_free(void *ptr) {
634
713
  dictionary_t *dict = (dictionary_t *)ptr;
714
+ if (!dict)
715
+ return;
635
716
  for (int i = 0; i < dict->cdict_cache_count; i++) {
636
717
  if (dict->cdict_cache[i].cdict)
637
718
  ZSTD_freeCDict(dict->cdict_cache[i].cdict);
638
719
  }
639
720
  if (dict->ddict)
640
721
  ZSTD_freeDDict(dict->ddict);
722
+ pthread_mutex_destroy(&dict->cache_mutex);
641
723
  if (dict->data)
642
724
  xfree(dict->data);
643
725
  xfree(dict);
@@ -666,47 +748,78 @@ static const rb_data_type_t dictionary_type = {
666
748
  static VALUE dict_alloc(VALUE klass) {
667
749
  dictionary_t *d = ALLOC(dictionary_t);
668
750
  memset(d, 0, sizeof(dictionary_t));
751
+ if (pthread_mutex_init(&d->cache_mutex, NULL) != 0) {
752
+ xfree(d);
753
+ rb_raise(eMemError, "failed to initialize dictionary cache mutex");
754
+ }
669
755
  return TypedData_Wrap_Struct(klass, &dictionary_type, d);
670
756
  }
671
757
 
672
758
  static ZSTD_CDict *dict_get_cdict(dictionary_t *dict, int level) {
759
+ ZSTD_CDict *existing = NULL;
760
+
761
+ pthread_mutex_lock(&dict->cache_mutex);
673
762
  for (int i = 0; i < dict->cdict_cache_count; i++) {
674
- if (dict->cdict_cache[i].level == level)
675
- return dict->cdict_cache[i].cdict;
763
+ if (dict->cdict_cache[i].level == level) {
764
+ existing = dict->cdict_cache[i].cdict;
765
+ break;
766
+ }
676
767
  }
768
+ pthread_mutex_unlock(&dict->cache_mutex);
769
+
770
+ if (existing)
771
+ return existing;
677
772
 
678
773
  ZSTD_CDict *cdict = ZSTD_createCDict(dict->data, dict->size, level);
679
774
  if (!cdict)
680
775
  return NULL;
681
776
 
777
+ pthread_mutex_lock(&dict->cache_mutex);
682
778
  for (int i = 0; i < dict->cdict_cache_count; i++) {
683
779
  if (dict->cdict_cache[i].level == level) {
780
+ existing = dict->cdict_cache[i].cdict;
781
+ pthread_mutex_unlock(&dict->cache_mutex);
684
782
  ZSTD_freeCDict(cdict);
685
- return dict->cdict_cache[i].cdict;
783
+ return existing;
686
784
  }
687
785
  }
688
786
 
689
- if (dict->cdict_cache_count < DICT_CDICT_CACHE_SIZE) {
690
- dict->cdict_cache[dict->cdict_cache_count].level = level;
691
- dict->cdict_cache[dict->cdict_cache_count].cdict = cdict;
692
- dict->cdict_cache_count++;
693
- } else {
694
- ZSTD_CDict *old_cdict = dict->cdict_cache[0].cdict;
695
- memmove(&dict->cdict_cache[0], &dict->cdict_cache[1],
696
- sizeof(cdict_cache_entry_t) * (DICT_CDICT_CACHE_SIZE - 1));
697
- dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].level = level;
698
- dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].cdict = cdict;
699
- if (old_cdict)
700
- ZSTD_freeCDict(old_cdict);
787
+ if (dict->cdict_cache_count >= DICT_CDICT_CACHE_SIZE) {
788
+ pthread_mutex_unlock(&dict->cache_mutex);
789
+ ZSTD_freeCDict(cdict);
790
+ rb_raise(eError, "zstd dictionary cdict cache exhausted");
701
791
  }
792
+
793
+ dict->cdict_cache[dict->cdict_cache_count].level = level;
794
+ dict->cdict_cache[dict->cdict_cache_count].cdict = cdict;
795
+ dict->cdict_cache_count++;
796
+ pthread_mutex_unlock(&dict->cache_mutex);
702
797
  return cdict;
703
798
  }
704
799
 
705
800
  static ZSTD_DDict *dict_get_ddict(dictionary_t *dict) {
801
+ ZSTD_DDict *existing;
802
+
803
+ pthread_mutex_lock(&dict->cache_mutex);
804
+ existing = dict->ddict;
805
+ pthread_mutex_unlock(&dict->cache_mutex);
806
+ if (existing)
807
+ return existing;
808
+
809
+ ZSTD_DDict *created = ZSTD_createDDict(dict->data, dict->size);
810
+ if (!created)
811
+ return NULL;
812
+
813
+ pthread_mutex_lock(&dict->cache_mutex);
706
814
  if (!dict->ddict) {
707
- dict->ddict = ZSTD_createDDict(dict->data, dict->size);
815
+ dict->ddict = created;
816
+ pthread_mutex_unlock(&dict->cache_mutex);
817
+ return created;
708
818
  }
709
- return dict->ddict;
819
+ existing = dict->ddict;
820
+ pthread_mutex_unlock(&dict->cache_mutex);
821
+ ZSTD_freeDDict(created);
822
+ return existing;
710
823
  }
711
824
 
712
825
  typedef struct {
@@ -1072,20 +1185,20 @@ static void *zstd_fiber_compress_nogvl(void *arg) {
1072
1185
 
1073
1186
  static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
1074
1187
  VALUE data, opts;
1075
- rb_scan_args(argc, argv, "1:", &data, &opts);
1188
+ scan_one_required_keywords(argc, argv, &data, &opts);
1076
1189
  StringValue(data);
1077
- reject_algorithm_keyword(opts);
1078
1190
 
1079
- VALUE algo_sym = Qnil, level_val = Qnil, dict_val = Qnil;
1080
- if (!NIL_P(opts)) {
1081
- algo_sym = opt_get(opts, sym_cache.algo);
1082
- level_val = opt_get(opts, sym_cache.level);
1083
- dict_val = opt_get(opts, sym_cache.dictionary);
1084
- }
1191
+ mc_opts_t parsed_opts;
1192
+ mc_parse_opts(opts, &parsed_opts);
1193
+ reject_algorithm_keyword(&parsed_opts);
1194
+
1195
+ VALUE algo_sym = parsed_opts.algo;
1196
+ VALUE level_val = parsed_opts.level;
1197
+ VALUE dict_val = parsed_opts.dictionary;
1085
1198
 
1086
1199
  int explicit_algo = !NIL_P(algo_sym);
1087
1200
  compress_algo_t algo = explicit_algo ? sym_to_algo(algo_sym) : ALGO_ZSTD;
1088
- lz4_format_t lz4_format = parse_lz4_format(opts, algo, explicit_algo);
1201
+ lz4_format_t lz4_format = parse_lz4_format(&parsed_opts, algo, explicit_algo);
1089
1202
  int level = resolve_level(algo, level_val);
1090
1203
 
1091
1204
  dictionary_t *dict = NULL;
@@ -1371,17 +1484,17 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
1371
1484
 
1372
1485
  static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1373
1486
  VALUE data, opts;
1374
- rb_scan_args(argc, argv, "1:", &data, &opts);
1487
+ scan_one_required_keywords(argc, argv, &data, &opts);
1375
1488
  StringValue(data);
1376
- reject_algorithm_keyword(opts);
1377
1489
 
1378
- VALUE algo_sym = Qnil, dict_val = Qnil;
1490
+ mc_opts_t parsed_opts;
1491
+ mc_parse_opts(opts, &parsed_opts);
1492
+ reject_algorithm_keyword(&parsed_opts);
1493
+
1494
+ VALUE algo_sym = parsed_opts.algo;
1495
+ VALUE dict_val = parsed_opts.dictionary;
1379
1496
  limits_config_t limits;
1380
- parse_limits_from_opts(opts, &limits);
1381
- if (!NIL_P(opts)) {
1382
- algo_sym = opt_get(opts, sym_cache.algo);
1383
- dict_val = opt_get(opts, sym_cache.dictionary);
1384
- }
1497
+ parse_limits_from_parsed_opts(&parsed_opts, &limits);
1385
1498
 
1386
1499
  const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
1387
1500
  size_t slen = RSTRING_LEN(data);
@@ -1393,7 +1506,7 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1393
1506
  } else {
1394
1507
  algo = sym_to_algo(algo_sym);
1395
1508
  }
1396
- lz4_format_t lz4_format = parse_lz4_format(opts, algo, explicit_algo);
1509
+ lz4_format_t lz4_format = parse_lz4_format(&parsed_opts, algo, explicit_algo);
1397
1510
 
1398
1511
  const algo_policy_t *policy = algo_policy(algo);
1399
1512
 
@@ -1974,18 +2087,18 @@ static VALUE deflater_alloc(VALUE klass) {
1974
2087
 
1975
2088
  static VALUE deflater_initialize(int argc, VALUE *argv, VALUE self) {
1976
2089
  VALUE opts;
1977
- rb_scan_args(argc, argv, "0:", &opts);
1978
- reject_algorithm_keyword(opts);
2090
+ scan_zero_required_keywords(argc, argv, &opts);
2091
+
2092
+ mc_opts_t parsed_opts;
2093
+ mc_parse_opts(opts, &parsed_opts);
2094
+ reject_algorithm_keyword(&parsed_opts);
1979
2095
 
1980
2096
  deflater_t *d;
1981
2097
  TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
1982
2098
 
1983
- VALUE algo_sym = Qnil, level_val = Qnil, dict_val = Qnil;
1984
- if (!NIL_P(opts)) {
1985
- algo_sym = opt_get(opts, sym_cache.algo);
1986
- level_val = opt_get(opts, sym_cache.level);
1987
- dict_val = opt_get(opts, sym_cache.dictionary);
1988
- }
2099
+ VALUE algo_sym = parsed_opts.algo;
2100
+ VALUE level_val = parsed_opts.level;
2101
+ VALUE dict_val = parsed_opts.dictionary;
1989
2102
 
1990
2103
  d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1991
2104
  d->level = resolve_level(d->algo, level_val);
@@ -2628,19 +2741,19 @@ static VALUE inflater_alloc(VALUE klass) {
2628
2741
 
2629
2742
  static VALUE inflater_initialize(int argc, VALUE *argv, VALUE self) {
2630
2743
  VALUE opts;
2631
- rb_scan_args(argc, argv, "0:", &opts);
2632
- reject_algorithm_keyword(opts);
2744
+ scan_zero_required_keywords(argc, argv, &opts);
2745
+
2746
+ mc_opts_t parsed_opts;
2747
+ mc_parse_opts(opts, &parsed_opts);
2748
+ reject_algorithm_keyword(&parsed_opts);
2633
2749
 
2634
2750
  inflater_t *inf;
2635
2751
  TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2636
2752
 
2637
- VALUE algo_sym = Qnil, dict_val = Qnil;
2753
+ VALUE algo_sym = parsed_opts.algo;
2754
+ VALUE dict_val = parsed_opts.dictionary;
2638
2755
  limits_config_t limits;
2639
- parse_limits_from_opts(opts, &limits);
2640
- if (!NIL_P(opts)) {
2641
- algo_sym = opt_get(opts, sym_cache.algo);
2642
- dict_val = opt_get(opts, sym_cache.dictionary);
2643
- }
2756
+ parse_limits_from_parsed_opts(&parsed_opts, &limits);
2644
2757
 
2645
2758
  inf->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
2646
2759
  inf->closed = 0;
@@ -2867,7 +2980,6 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
2867
2980
  case ALGO_LZ4: {
2868
2981
  size_t data_len = inf->lz4_buf.len - inf->lz4_buf.offset;
2869
2982
  size_t needed = data_len + slen;
2870
- // TODO(v0.4): optional standard LZ4 frame format support via lz4frame.h
2871
2983
 
2872
2984
  if (inf->lz4_buf.offset > 0 && needed > inf->lz4_buf.cap) {
2873
2985
  if (data_len > 0)
@@ -3054,17 +3166,17 @@ static VALUE inflater_closed_p(VALUE self) {
3054
3166
 
3055
3167
  static VALUE dict_initialize(int argc, VALUE *argv, VALUE self) {
3056
3168
  VALUE raw, opts;
3057
- rb_scan_args(argc, argv, "1:", &raw, &opts);
3169
+ scan_one_required_keywords(argc, argv, &raw, &opts);
3058
3170
  StringValue(raw);
3059
- reject_algorithm_keyword(opts);
3171
+
3172
+ mc_opts_t parsed_opts;
3173
+ mc_parse_opts(opts, &parsed_opts);
3174
+ reject_algorithm_keyword(&parsed_opts);
3060
3175
 
3061
3176
  dictionary_t *d;
3062
3177
  TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
3063
3178
 
3064
- VALUE algo_sym = Qnil;
3065
- if (!NIL_P(opts)) {
3066
- algo_sym = opt_get(opts, sym_cache.algo);
3067
- }
3179
+ VALUE algo_sym = parsed_opts.algo;
3068
3180
  d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
3069
3181
 
3070
3182
  if (d->algo == ALGO_LZ4)
@@ -3148,39 +3260,35 @@ static VALUE train_dictionary_internal(VALUE samples, VALUE size_val, compress_a
3148
3260
  }
3149
3261
 
3150
3262
  static VALUE zstd_train_dictionary(int argc, VALUE *argv, VALUE self) {
3151
- // #if defined(__APPLE__) && (defined(__arm64__) || defined(__aarch64__))
3152
- // rb_raise(eUnsupportedError,
3153
- // "Zstd dictionary training is temporarily disabled on arm64-darwin "
3154
- // "because the current vendored trainer path crashes on this platform");
3155
- // #endif
3156
-
3157
3263
  VALUE samples, opts;
3158
- rb_scan_args(argc, argv, "1:", &samples, &opts);
3159
- reject_algorithm_keyword(opts);
3160
- VALUE size_val = opt_get(opts, sym_cache.size);
3161
- return train_dictionary_internal(samples, size_val, ALGO_ZSTD);
3264
+ scan_one_required_keywords(argc, argv, &samples, &opts);
3265
+ mc_opts_t parsed_opts;
3266
+ mc_parse_opts(opts, &parsed_opts);
3267
+ reject_algorithm_keyword(&parsed_opts);
3268
+ return train_dictionary_internal(samples, parsed_opts.size, ALGO_ZSTD);
3162
3269
  }
3163
3270
 
3164
3271
  static VALUE brotli_train_dictionary(int argc, VALUE *argv, VALUE self) {
3165
3272
  VALUE samples, opts;
3166
- rb_scan_args(argc, argv, "1:", &samples, &opts);
3167
- reject_algorithm_keyword(opts);
3168
- VALUE size_val = opt_get(opts, sym_cache.size);
3273
+ scan_one_required_keywords(argc, argv, &samples, &opts);
3274
+ mc_opts_t parsed_opts;
3275
+ mc_parse_opts(opts, &parsed_opts);
3276
+ reject_algorithm_keyword(&parsed_opts);
3169
3277
 
3170
- return train_dictionary_internal(samples, size_val, ALGO_BROTLI);
3278
+ return train_dictionary_internal(samples, parsed_opts.size, ALGO_BROTLI);
3171
3279
  }
3172
3280
 
3173
3281
  static VALUE dict_load(int argc, VALUE *argv, VALUE self) {
3174
3282
  VALUE path, opts;
3175
- rb_scan_args(argc, argv, "1:", &path, &opts);
3283
+ scan_one_required_keywords(argc, argv, &path, &opts);
3176
3284
  StringValue(path);
3177
- reject_algorithm_keyword(opts);
3178
3285
  raise_if_path_has_null_byte(path);
3179
3286
 
3180
- VALUE algo_sym = Qnil;
3181
- if (!NIL_P(opts)) {
3182
- algo_sym = opt_get(opts, sym_cache.algo);
3183
- }
3287
+ mc_opts_t parsed_opts;
3288
+ mc_parse_opts(opts, &parsed_opts);
3289
+ reject_algorithm_keyword(&parsed_opts);
3290
+
3291
+ VALUE algo_sym = parsed_opts.algo;
3184
3292
  compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
3185
3293
 
3186
3294
  if (algo == ALGO_LZ4)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module MultiCompress
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multi_compress
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roman Haydarov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-18 00:00:00.000000000 Z
11
+ date: 2026-04-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler