multi_compress 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +24 -24
- data/ext/multi_compress/multi_compress.c +205 -97
- data/lib/multi_compress/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 23ab6bacd75b21b5cfbf5b7b3121428c18060bd9e097f815438af6c8bafd8883
|
|
4
|
+
data.tar.gz: 90321202358a43bb732077aa1f28b8a9e5d756e27067df851671883a21eb1470
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 431921d63b8757216df9179dfd5d792103777dbf597d52cfafeb2e593ee8ab52ce12f374499826b0eba45a164bfde0f4adaa3aa3ba9ee811c6d9f44ed2ccdb7d
|
|
7
|
+
data.tar.gz: 2e1d3bf4455fd627696c7ff7a500e6424497df012939487d39896b8e50dad421c8acd8b427f10fc3de6a51b799ca5b1532cda5508485440e4f633a5ac5677a1d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -106,9 +106,9 @@ Performance comparison against Ruby's built-in zlib compression (200 iterations
|
|
|
106
106
|
│ Medium JSON (~370KB, no GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5% │
|
|
107
107
|
│ Medium logs (~168KB, no GC) │ 8.6% │ 17.2% │ 5.4% │ 3.2% │
|
|
108
108
|
│ Large JSON (~1.6MB, GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
|
|
109
|
-
│ Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.
|
|
109
|
+
│ Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.9% │ 2.0% │
|
|
110
110
|
│ Large JSON (~1.6MB, no GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
|
|
111
|
-
│ Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.
|
|
111
|
+
│ Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.9% │ 2.0% │
|
|
112
112
|
└─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
|
|
113
113
|
```
|
|
114
114
|
|
|
@@ -117,18 +117,18 @@ Performance comparison against Ruby's built-in zlib compression (200 iterations
|
|
|
117
117
|
┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
|
|
118
118
|
│ Configuration │ zlib │ lz4 │ zstd │ brotli │
|
|
119
119
|
├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
|
|
120
|
-
│ Small JSON (~10KB, GC) │ 0.05 │ 0.01 │ 0.02 │ 0.
|
|
121
|
-
│ Small text (~10KB, GC) │ 0.
|
|
120
|
+
│ Small JSON (~10KB, GC) │ 0.05 │ 0.01 │ 0.02 │ 0.12 │
|
|
121
|
+
│ Small text (~10KB, GC) │ 0.03 │ 0.00 │ 0.01 │ 0.09 │
|
|
122
122
|
│ Small JSON (~10KB, no GC) │ 0.06 │ 0.01 │ 0.02 │ 0.13 │
|
|
123
|
-
│ Small text (~10KB, no GC) │ 0.
|
|
124
|
-
│ Medium JSON (~370KB, GC) │ 2.
|
|
125
|
-
│ Medium logs (~168KB, GC) │ 1.23 │ 0.
|
|
126
|
-
│ Medium JSON (~370KB, no GC) │ 2.
|
|
127
|
-
│ Medium logs (~168KB, no GC) │ 1.
|
|
128
|
-
│ Large JSON (~1.6MB, GC) │
|
|
129
|
-
│ Large logs (~600KB, GC) │ 4.
|
|
130
|
-
│ Large JSON (~1.6MB, no GC) │
|
|
131
|
-
│ Large logs (~600KB, no GC) │ 4.
|
|
123
|
+
│ Small text (~10KB, no GC) │ 0.03 │ 0.00 │ 0.01 │ 0.11 │
|
|
124
|
+
│ Medium JSON (~370KB, GC) │ 2.62 │ 0.28 │ 0.39 │ 2.31 │
|
|
125
|
+
│ Medium logs (~168KB, GC) │ 1.23 │ 0.13 │ 0.18 │ 0.88 │
|
|
126
|
+
│ Medium JSON (~370KB, no GC) │ 2.65 │ 0.27 │ 0.40 │ 2.31 │
|
|
127
|
+
│ Medium logs (~168KB, no GC) │ 1.27 │ 0.13 │ 0.18 │ 0.95 │
|
|
128
|
+
│ Large JSON (~1.6MB, GC) │ 11.70 │ 1.36 │ 1.93 │ 11.95 │
|
|
129
|
+
│ Large logs (~600KB, GC) │ 4.10 │ 0.45 │ 0.45 │ 2.62 │
|
|
130
|
+
│ Large JSON (~1.6MB, no GC) │ 11.47 │ 1.27 │ 1.88 │ 11.47 │
|
|
131
|
+
│ Large logs (~600KB, no GC) │ 4.06 │ 0.41 │ 0.45 │ 2.79 │
|
|
132
132
|
└─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
|
|
133
133
|
```
|
|
134
134
|
|
|
@@ -137,18 +137,18 @@ Performance comparison against Ruby's built-in zlib compression (200 iterations
|
|
|
137
137
|
┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
|
|
138
138
|
│ Configuration │ zlib │ lz4 │ zstd │ brotli │
|
|
139
139
|
├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
|
|
140
|
-
│ Small JSON (~10KB, GC) │ 1.00x │ 5.00x │ 2.50x │ 0.
|
|
141
|
-
│ Small text (~10KB, GC) │ 1.00x │ N/A │
|
|
140
|
+
│ Small JSON (~10KB, GC) │ 1.00x │ 5.00x │ 2.50x │ 0.42x │
|
|
141
|
+
│ Small text (~10KB, GC) │ 1.00x │ N/A │ 3.00x │ 0.33x │
|
|
142
142
|
│ Small JSON (~10KB, no GC) │ 1.00x │ 6.00x │ 3.00x │ 0.46x │
|
|
143
|
-
│ Small text (~10KB, no GC) │ 1.00x │ N/A │
|
|
144
|
-
│ Medium JSON (~370KB, GC) │ 1.00x │ 9.
|
|
145
|
-
│ Medium logs (~168KB, GC) │ 1.00x │
|
|
146
|
-
│ Medium JSON (~370KB, no GC) │ 1.00x │ 9.
|
|
147
|
-
│ Medium logs (~168KB, no GC) │ 1.00x │ 9.
|
|
148
|
-
│ Large JSON (~1.6MB, GC) │ 1.00x │
|
|
149
|
-
│ Large logs (~600KB, GC) │ 1.00x │ 9.
|
|
150
|
-
│ Large JSON (~1.6MB, no GC) │ 1.00x │ 9.
|
|
151
|
-
│ Large logs (~600KB, no GC) │ 1.00x │
|
|
143
|
+
│ Small text (~10KB, no GC) │ 1.00x │ N/A │ 3.00x │ 0.27x │
|
|
144
|
+
│ Medium JSON (~370KB, GC) │ 1.00x │ 9.36x │ 6.72x │ 1.13x │
|
|
145
|
+
│ Medium logs (~168KB, GC) │ 1.00x │ 9.46x │ 6.83x │ 1.40x │
|
|
146
|
+
│ Medium JSON (~370KB, no GC) │ 1.00x │ 9.81x │ 6.62x │ 1.15x │
|
|
147
|
+
│ Medium logs (~168KB, no GC) │ 1.00x │ 9.77x │ 7.06x │ 1.34x │
|
|
148
|
+
│ Large JSON (~1.6MB, GC) │ 1.00x │ 8.60x │ 6.06x │ 0.98x │
|
|
149
|
+
│ Large logs (~600KB, GC) │ 1.00x │ 9.11x │ 9.11x │ 1.56x │
|
|
150
|
+
│ Large JSON (~1.6MB, no GC) │ 1.00x │ 9.03x │ 6.10x │ 1.00x │
|
|
151
|
+
│ Large logs (~600KB, no GC) │ 1.00x │ 9.90x │ 9.02x │ 1.46x │
|
|
152
152
|
└─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
|
|
153
153
|
```
|
|
154
154
|
|
|
@@ -137,12 +137,94 @@ static void init_id_cache(void) {
|
|
|
137
137
|
sym_cache.max_ratio = ID2SYM(id_cache.max_ratio);
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
-
|
|
141
|
-
|
|
140
|
+
typedef struct {
|
|
141
|
+
VALUE algo;
|
|
142
|
+
VALUE level;
|
|
143
|
+
VALUE dictionary;
|
|
144
|
+
VALUE size;
|
|
145
|
+
VALUE format;
|
|
146
|
+
VALUE max_output_size;
|
|
147
|
+
VALUE max_ratio;
|
|
148
|
+
int saw_algorithm_keyword;
|
|
149
|
+
} mc_opts_t;
|
|
150
|
+
|
|
151
|
+
static inline void mc_opts_init(mc_opts_t *opts) {
|
|
152
|
+
opts->algo = Qnil;
|
|
153
|
+
opts->level = Qnil;
|
|
154
|
+
opts->dictionary = Qnil;
|
|
155
|
+
opts->size = Qnil;
|
|
156
|
+
opts->format = Qnil;
|
|
157
|
+
opts->max_output_size = Qundef;
|
|
158
|
+
opts->max_ratio = Qundef;
|
|
159
|
+
opts->saw_algorithm_keyword = 0;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
static int mc_opts_parse_i(VALUE key, VALUE value, VALUE arg) {
|
|
163
|
+
mc_opts_t *opts = (mc_opts_t *)arg;
|
|
164
|
+
|
|
165
|
+
if (!SYMBOL_P(key))
|
|
166
|
+
return ST_CONTINUE;
|
|
167
|
+
|
|
168
|
+
ID id = SYM2ID(key);
|
|
169
|
+
if (id == id_cache.algo) {
|
|
170
|
+
opts->algo = value;
|
|
171
|
+
} else if (id == id_cache.level) {
|
|
172
|
+
opts->level = value;
|
|
173
|
+
} else if (id == id_cache.dictionary) {
|
|
174
|
+
opts->dictionary = value;
|
|
175
|
+
} else if (id == id_cache.size) {
|
|
176
|
+
opts->size = value;
|
|
177
|
+
} else if (id == id_cache.format) {
|
|
178
|
+
opts->format = value;
|
|
179
|
+
} else if (id == id_cache.max_output_size) {
|
|
180
|
+
opts->max_output_size = value;
|
|
181
|
+
} else if (id == id_cache.max_ratio) {
|
|
182
|
+
opts->max_ratio = value;
|
|
183
|
+
} else if (id == id_cache.algorithm) {
|
|
184
|
+
opts->saw_algorithm_keyword = 1;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return ST_CONTINUE;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
static inline void mc_parse_opts(VALUE opts_hash, mc_opts_t *opts) {
|
|
191
|
+
mc_opts_init(opts);
|
|
192
|
+
if (NIL_P(opts_hash))
|
|
193
|
+
return;
|
|
194
|
+
Check_Type(opts_hash, T_HASH);
|
|
195
|
+
rb_hash_foreach(opts_hash, mc_opts_parse_i, (VALUE)opts);
|
|
142
196
|
}
|
|
143
197
|
|
|
144
|
-
static inline
|
|
145
|
-
|
|
198
|
+
static inline void scan_one_required_keywords(int argc, VALUE *argv, VALUE *arg, VALUE *opts) {
|
|
199
|
+
if (argc == 1) {
|
|
200
|
+
*arg = argv[0];
|
|
201
|
+
*opts = Qnil;
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (argc == 2 && rb_keyword_given_p()) {
|
|
206
|
+
*arg = argv[0];
|
|
207
|
+
*opts = argv[1];
|
|
208
|
+
Check_Type(*opts, T_HASH);
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
rb_error_arity(argc, 1, 1);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
static inline void scan_zero_required_keywords(int argc, VALUE *argv, VALUE *opts) {
|
|
216
|
+
if (argc == 0) {
|
|
217
|
+
*opts = Qnil;
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (argc == 1 && rb_keyword_given_p()) {
|
|
222
|
+
*opts = argv[0];
|
|
223
|
+
Check_Type(*opts, T_HASH);
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
rb_error_arity(argc, 0, 0);
|
|
146
228
|
}
|
|
147
229
|
|
|
148
230
|
enum { LZ4_FRAME_MAGIC_LEN = 4 };
|
|
@@ -152,8 +234,9 @@ static inline int is_lz4_frame_magic(const uint8_t *data, size_t len) {
|
|
|
152
234
|
return len >= LZ4_FRAME_MAGIC_LEN && memcmp(data, LZ4_FRAME_MAGIC, LZ4_FRAME_MAGIC_LEN) == 0;
|
|
153
235
|
}
|
|
154
236
|
|
|
155
|
-
static lz4_format_t parse_lz4_format(
|
|
156
|
-
|
|
237
|
+
static lz4_format_t parse_lz4_format(const mc_opts_t *opts, compress_algo_t algo,
|
|
238
|
+
int explicit_algo) {
|
|
239
|
+
VALUE format_val = opts->format;
|
|
157
240
|
if (format_val == Qundef || NIL_P(format_val))
|
|
158
241
|
return LZ4_FORMAT_BLOCK;
|
|
159
242
|
if (explicit_algo && algo != ALGO_LZ4)
|
|
@@ -169,10 +252,8 @@ static lz4_format_t parse_lz4_format(VALUE opts, compress_algo_t algo, int expli
|
|
|
169
252
|
return LZ4_FORMAT_BLOCK;
|
|
170
253
|
}
|
|
171
254
|
|
|
172
|
-
static inline void reject_algorithm_keyword(
|
|
173
|
-
if (
|
|
174
|
-
return;
|
|
175
|
-
if (rb_hash_lookup2(opts, sym_cache.algorithm, Qundef) != Qundef) {
|
|
255
|
+
static inline void reject_algorithm_keyword(const mc_opts_t *opts) {
|
|
256
|
+
if (opts->saw_algorithm_keyword) {
|
|
176
257
|
rb_raise(rb_eArgError, "unknown keyword: :algorithm (use :algo)");
|
|
177
258
|
}
|
|
178
259
|
}
|
|
@@ -383,11 +464,8 @@ static void limits_config_init(limits_config_t *limits) {
|
|
|
383
464
|
limits->max_ratio = DEFAULT_MAX_RATIO;
|
|
384
465
|
}
|
|
385
466
|
|
|
386
|
-
static void
|
|
387
|
-
|
|
388
|
-
return;
|
|
389
|
-
|
|
390
|
-
VALUE val = opt_lookup2(opts, sym_cache.max_output_size, Qundef);
|
|
467
|
+
static void limits_config_apply_parsed(const mc_opts_t *opts, limits_config_t *limits) {
|
|
468
|
+
VALUE val = opts->max_output_size;
|
|
391
469
|
if (val != Qundef && !NIL_P(val)) {
|
|
392
470
|
size_t max_output_size = NUM2SIZET(val);
|
|
393
471
|
if (max_output_size == 0)
|
|
@@ -395,7 +473,7 @@ static void limits_config_apply_opts(VALUE opts, limits_config_t *limits) {
|
|
|
395
473
|
limits->max_output_size = max_output_size;
|
|
396
474
|
}
|
|
397
475
|
|
|
398
|
-
val =
|
|
476
|
+
val = opts->max_ratio;
|
|
399
477
|
if (val == Qundef)
|
|
400
478
|
return;
|
|
401
479
|
if (NIL_P(val)) {
|
|
@@ -411,9 +489,9 @@ static void limits_config_apply_opts(VALUE opts, limits_config_t *limits) {
|
|
|
411
489
|
limits->max_ratio = max_ratio;
|
|
412
490
|
}
|
|
413
491
|
|
|
414
|
-
static void
|
|
492
|
+
static void parse_limits_from_parsed_opts(const mc_opts_t *opts, limits_config_t *limits) {
|
|
415
493
|
limits_config_init(limits);
|
|
416
|
-
|
|
494
|
+
limits_config_apply_parsed(opts, limits);
|
|
417
495
|
}
|
|
418
496
|
|
|
419
497
|
static inline size_t checked_add_size(size_t left, size_t right, const char *message) {
|
|
@@ -611,7 +689,7 @@ static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_pro
|
|
|
611
689
|
return bytes_since_yield;
|
|
612
690
|
}
|
|
613
691
|
|
|
614
|
-
#define DICT_CDICT_CACHE_SIZE
|
|
692
|
+
#define DICT_CDICT_CACHE_SIZE 22
|
|
615
693
|
_Static_assert(DICT_CDICT_CACHE_SIZE > 0, "CDict cache needs at least one slot");
|
|
616
694
|
|
|
617
695
|
typedef struct {
|
|
@@ -623,6 +701,7 @@ struct dictionary_s {
|
|
|
623
701
|
compress_algo_t algo;
|
|
624
702
|
uint8_t *data;
|
|
625
703
|
size_t size;
|
|
704
|
+
pthread_mutex_t cache_mutex;
|
|
626
705
|
|
|
627
706
|
cdict_cache_entry_t cdict_cache[DICT_CDICT_CACHE_SIZE];
|
|
628
707
|
int cdict_cache_count;
|
|
@@ -632,12 +711,15 @@ struct dictionary_s {
|
|
|
632
711
|
|
|
633
712
|
static void dict_free(void *ptr) {
|
|
634
713
|
dictionary_t *dict = (dictionary_t *)ptr;
|
|
714
|
+
if (!dict)
|
|
715
|
+
return;
|
|
635
716
|
for (int i = 0; i < dict->cdict_cache_count; i++) {
|
|
636
717
|
if (dict->cdict_cache[i].cdict)
|
|
637
718
|
ZSTD_freeCDict(dict->cdict_cache[i].cdict);
|
|
638
719
|
}
|
|
639
720
|
if (dict->ddict)
|
|
640
721
|
ZSTD_freeDDict(dict->ddict);
|
|
722
|
+
pthread_mutex_destroy(&dict->cache_mutex);
|
|
641
723
|
if (dict->data)
|
|
642
724
|
xfree(dict->data);
|
|
643
725
|
xfree(dict);
|
|
@@ -666,47 +748,78 @@ static const rb_data_type_t dictionary_type = {
|
|
|
666
748
|
static VALUE dict_alloc(VALUE klass) {
|
|
667
749
|
dictionary_t *d = ALLOC(dictionary_t);
|
|
668
750
|
memset(d, 0, sizeof(dictionary_t));
|
|
751
|
+
if (pthread_mutex_init(&d->cache_mutex, NULL) != 0) {
|
|
752
|
+
xfree(d);
|
|
753
|
+
rb_raise(eMemError, "failed to initialize dictionary cache mutex");
|
|
754
|
+
}
|
|
669
755
|
return TypedData_Wrap_Struct(klass, &dictionary_type, d);
|
|
670
756
|
}
|
|
671
757
|
|
|
672
758
|
static ZSTD_CDict *dict_get_cdict(dictionary_t *dict, int level) {
|
|
759
|
+
ZSTD_CDict *existing = NULL;
|
|
760
|
+
|
|
761
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
673
762
|
for (int i = 0; i < dict->cdict_cache_count; i++) {
|
|
674
|
-
if (dict->cdict_cache[i].level == level)
|
|
675
|
-
|
|
763
|
+
if (dict->cdict_cache[i].level == level) {
|
|
764
|
+
existing = dict->cdict_cache[i].cdict;
|
|
765
|
+
break;
|
|
766
|
+
}
|
|
676
767
|
}
|
|
768
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
769
|
+
|
|
770
|
+
if (existing)
|
|
771
|
+
return existing;
|
|
677
772
|
|
|
678
773
|
ZSTD_CDict *cdict = ZSTD_createCDict(dict->data, dict->size, level);
|
|
679
774
|
if (!cdict)
|
|
680
775
|
return NULL;
|
|
681
776
|
|
|
777
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
682
778
|
for (int i = 0; i < dict->cdict_cache_count; i++) {
|
|
683
779
|
if (dict->cdict_cache[i].level == level) {
|
|
780
|
+
existing = dict->cdict_cache[i].cdict;
|
|
781
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
684
782
|
ZSTD_freeCDict(cdict);
|
|
685
|
-
return
|
|
783
|
+
return existing;
|
|
686
784
|
}
|
|
687
785
|
}
|
|
688
786
|
|
|
689
|
-
if (dict->cdict_cache_count
|
|
690
|
-
dict->
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
} else {
|
|
694
|
-
ZSTD_CDict *old_cdict = dict->cdict_cache[0].cdict;
|
|
695
|
-
memmove(&dict->cdict_cache[0], &dict->cdict_cache[1],
|
|
696
|
-
sizeof(cdict_cache_entry_t) * (DICT_CDICT_CACHE_SIZE - 1));
|
|
697
|
-
dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].level = level;
|
|
698
|
-
dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].cdict = cdict;
|
|
699
|
-
if (old_cdict)
|
|
700
|
-
ZSTD_freeCDict(old_cdict);
|
|
787
|
+
if (dict->cdict_cache_count >= DICT_CDICT_CACHE_SIZE) {
|
|
788
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
789
|
+
ZSTD_freeCDict(cdict);
|
|
790
|
+
rb_raise(eError, "zstd dictionary cdict cache exhausted");
|
|
701
791
|
}
|
|
792
|
+
|
|
793
|
+
dict->cdict_cache[dict->cdict_cache_count].level = level;
|
|
794
|
+
dict->cdict_cache[dict->cdict_cache_count].cdict = cdict;
|
|
795
|
+
dict->cdict_cache_count++;
|
|
796
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
702
797
|
return cdict;
|
|
703
798
|
}
|
|
704
799
|
|
|
705
800
|
static ZSTD_DDict *dict_get_ddict(dictionary_t *dict) {
|
|
801
|
+
ZSTD_DDict *existing;
|
|
802
|
+
|
|
803
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
804
|
+
existing = dict->ddict;
|
|
805
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
806
|
+
if (existing)
|
|
807
|
+
return existing;
|
|
808
|
+
|
|
809
|
+
ZSTD_DDict *created = ZSTD_createDDict(dict->data, dict->size);
|
|
810
|
+
if (!created)
|
|
811
|
+
return NULL;
|
|
812
|
+
|
|
813
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
706
814
|
if (!dict->ddict) {
|
|
707
|
-
dict->ddict =
|
|
815
|
+
dict->ddict = created;
|
|
816
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
817
|
+
return created;
|
|
708
818
|
}
|
|
709
|
-
|
|
819
|
+
existing = dict->ddict;
|
|
820
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
821
|
+
ZSTD_freeDDict(created);
|
|
822
|
+
return existing;
|
|
710
823
|
}
|
|
711
824
|
|
|
712
825
|
typedef struct {
|
|
@@ -1072,20 +1185,20 @@ static void *zstd_fiber_compress_nogvl(void *arg) {
|
|
|
1072
1185
|
|
|
1073
1186
|
static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
|
|
1074
1187
|
VALUE data, opts;
|
|
1075
|
-
|
|
1188
|
+
scan_one_required_keywords(argc, argv, &data, &opts);
|
|
1076
1189
|
StringValue(data);
|
|
1077
|
-
reject_algorithm_keyword(opts);
|
|
1078
1190
|
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1191
|
+
mc_opts_t parsed_opts;
|
|
1192
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
1193
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
1194
|
+
|
|
1195
|
+
VALUE algo_sym = parsed_opts.algo;
|
|
1196
|
+
VALUE level_val = parsed_opts.level;
|
|
1197
|
+
VALUE dict_val = parsed_opts.dictionary;
|
|
1085
1198
|
|
|
1086
1199
|
int explicit_algo = !NIL_P(algo_sym);
|
|
1087
1200
|
compress_algo_t algo = explicit_algo ? sym_to_algo(algo_sym) : ALGO_ZSTD;
|
|
1088
|
-
lz4_format_t lz4_format = parse_lz4_format(
|
|
1201
|
+
lz4_format_t lz4_format = parse_lz4_format(&parsed_opts, algo, explicit_algo);
|
|
1089
1202
|
int level = resolve_level(algo, level_val);
|
|
1090
1203
|
|
|
1091
1204
|
dictionary_t *dict = NULL;
|
|
@@ -1371,17 +1484,17 @@ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
|
|
|
1371
1484
|
|
|
1372
1485
|
static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
|
|
1373
1486
|
VALUE data, opts;
|
|
1374
|
-
|
|
1487
|
+
scan_one_required_keywords(argc, argv, &data, &opts);
|
|
1375
1488
|
StringValue(data);
|
|
1376
|
-
reject_algorithm_keyword(opts);
|
|
1377
1489
|
|
|
1378
|
-
|
|
1490
|
+
mc_opts_t parsed_opts;
|
|
1491
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
1492
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
1493
|
+
|
|
1494
|
+
VALUE algo_sym = parsed_opts.algo;
|
|
1495
|
+
VALUE dict_val = parsed_opts.dictionary;
|
|
1379
1496
|
limits_config_t limits;
|
|
1380
|
-
|
|
1381
|
-
if (!NIL_P(opts)) {
|
|
1382
|
-
algo_sym = opt_get(opts, sym_cache.algo);
|
|
1383
|
-
dict_val = opt_get(opts, sym_cache.dictionary);
|
|
1384
|
-
}
|
|
1497
|
+
parse_limits_from_parsed_opts(&parsed_opts, &limits);
|
|
1385
1498
|
|
|
1386
1499
|
const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
|
|
1387
1500
|
size_t slen = RSTRING_LEN(data);
|
|
@@ -1393,7 +1506,7 @@ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
|
|
|
1393
1506
|
} else {
|
|
1394
1507
|
algo = sym_to_algo(algo_sym);
|
|
1395
1508
|
}
|
|
1396
|
-
lz4_format_t lz4_format = parse_lz4_format(
|
|
1509
|
+
lz4_format_t lz4_format = parse_lz4_format(&parsed_opts, algo, explicit_algo);
|
|
1397
1510
|
|
|
1398
1511
|
const algo_policy_t *policy = algo_policy(algo);
|
|
1399
1512
|
|
|
@@ -1974,18 +2087,18 @@ static VALUE deflater_alloc(VALUE klass) {
|
|
|
1974
2087
|
|
|
1975
2088
|
static VALUE deflater_initialize(int argc, VALUE *argv, VALUE self) {
|
|
1976
2089
|
VALUE opts;
|
|
1977
|
-
|
|
1978
|
-
|
|
2090
|
+
scan_zero_required_keywords(argc, argv, &opts);
|
|
2091
|
+
|
|
2092
|
+
mc_opts_t parsed_opts;
|
|
2093
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
2094
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
1979
2095
|
|
|
1980
2096
|
deflater_t *d;
|
|
1981
2097
|
TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
|
|
1982
2098
|
|
|
1983
|
-
VALUE algo_sym =
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
level_val = opt_get(opts, sym_cache.level);
|
|
1987
|
-
dict_val = opt_get(opts, sym_cache.dictionary);
|
|
1988
|
-
}
|
|
2099
|
+
VALUE algo_sym = parsed_opts.algo;
|
|
2100
|
+
VALUE level_val = parsed_opts.level;
|
|
2101
|
+
VALUE dict_val = parsed_opts.dictionary;
|
|
1989
2102
|
|
|
1990
2103
|
d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
|
|
1991
2104
|
d->level = resolve_level(d->algo, level_val);
|
|
@@ -2628,19 +2741,19 @@ static VALUE inflater_alloc(VALUE klass) {
|
|
|
2628
2741
|
|
|
2629
2742
|
static VALUE inflater_initialize(int argc, VALUE *argv, VALUE self) {
|
|
2630
2743
|
VALUE opts;
|
|
2631
|
-
|
|
2632
|
-
|
|
2744
|
+
scan_zero_required_keywords(argc, argv, &opts);
|
|
2745
|
+
|
|
2746
|
+
mc_opts_t parsed_opts;
|
|
2747
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
2748
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
2633
2749
|
|
|
2634
2750
|
inflater_t *inf;
|
|
2635
2751
|
TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
|
|
2636
2752
|
|
|
2637
|
-
VALUE algo_sym =
|
|
2753
|
+
VALUE algo_sym = parsed_opts.algo;
|
|
2754
|
+
VALUE dict_val = parsed_opts.dictionary;
|
|
2638
2755
|
limits_config_t limits;
|
|
2639
|
-
|
|
2640
|
-
if (!NIL_P(opts)) {
|
|
2641
|
-
algo_sym = opt_get(opts, sym_cache.algo);
|
|
2642
|
-
dict_val = opt_get(opts, sym_cache.dictionary);
|
|
2643
|
-
}
|
|
2756
|
+
parse_limits_from_parsed_opts(&parsed_opts, &limits);
|
|
2644
2757
|
|
|
2645
2758
|
inf->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
|
|
2646
2759
|
inf->closed = 0;
|
|
@@ -2867,7 +2980,6 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
|
|
|
2867
2980
|
case ALGO_LZ4: {
|
|
2868
2981
|
size_t data_len = inf->lz4_buf.len - inf->lz4_buf.offset;
|
|
2869
2982
|
size_t needed = data_len + slen;
|
|
2870
|
-
// TODO(v0.4): optional standard LZ4 frame format support via lz4frame.h
|
|
2871
2983
|
|
|
2872
2984
|
if (inf->lz4_buf.offset > 0 && needed > inf->lz4_buf.cap) {
|
|
2873
2985
|
if (data_len > 0)
|
|
@@ -3054,17 +3166,17 @@ static VALUE inflater_closed_p(VALUE self) {
|
|
|
3054
3166
|
|
|
3055
3167
|
static VALUE dict_initialize(int argc, VALUE *argv, VALUE self) {
|
|
3056
3168
|
VALUE raw, opts;
|
|
3057
|
-
|
|
3169
|
+
scan_one_required_keywords(argc, argv, &raw, &opts);
|
|
3058
3170
|
StringValue(raw);
|
|
3059
|
-
|
|
3171
|
+
|
|
3172
|
+
mc_opts_t parsed_opts;
|
|
3173
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
3174
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
3060
3175
|
|
|
3061
3176
|
dictionary_t *d;
|
|
3062
3177
|
TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
|
|
3063
3178
|
|
|
3064
|
-
VALUE algo_sym =
|
|
3065
|
-
if (!NIL_P(opts)) {
|
|
3066
|
-
algo_sym = opt_get(opts, sym_cache.algo);
|
|
3067
|
-
}
|
|
3179
|
+
VALUE algo_sym = parsed_opts.algo;
|
|
3068
3180
|
d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
|
|
3069
3181
|
|
|
3070
3182
|
if (d->algo == ALGO_LZ4)
|
|
@@ -3148,39 +3260,35 @@ static VALUE train_dictionary_internal(VALUE samples, VALUE size_val, compress_a
|
|
|
3148
3260
|
}
|
|
3149
3261
|
|
|
3150
3262
|
static VALUE zstd_train_dictionary(int argc, VALUE *argv, VALUE self) {
|
|
3151
|
-
// #if defined(__APPLE__) && (defined(__arm64__) || defined(__aarch64__))
|
|
3152
|
-
// rb_raise(eUnsupportedError,
|
|
3153
|
-
// "Zstd dictionary training is temporarily disabled on arm64-darwin "
|
|
3154
|
-
// "because the current vendored trainer path crashes on this platform");
|
|
3155
|
-
// #endif
|
|
3156
|
-
|
|
3157
3263
|
VALUE samples, opts;
|
|
3158
|
-
|
|
3159
|
-
|
|
3160
|
-
|
|
3161
|
-
|
|
3264
|
+
scan_one_required_keywords(argc, argv, &samples, &opts);
|
|
3265
|
+
mc_opts_t parsed_opts;
|
|
3266
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
3267
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
3268
|
+
return train_dictionary_internal(samples, parsed_opts.size, ALGO_ZSTD);
|
|
3162
3269
|
}
|
|
3163
3270
|
|
|
3164
3271
|
static VALUE brotli_train_dictionary(int argc, VALUE *argv, VALUE self) {
|
|
3165
3272
|
VALUE samples, opts;
|
|
3166
|
-
|
|
3167
|
-
|
|
3168
|
-
|
|
3273
|
+
scan_one_required_keywords(argc, argv, &samples, &opts);
|
|
3274
|
+
mc_opts_t parsed_opts;
|
|
3275
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
3276
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
3169
3277
|
|
|
3170
|
-
return train_dictionary_internal(samples,
|
|
3278
|
+
return train_dictionary_internal(samples, parsed_opts.size, ALGO_BROTLI);
|
|
3171
3279
|
}
|
|
3172
3280
|
|
|
3173
3281
|
static VALUE dict_load(int argc, VALUE *argv, VALUE self) {
|
|
3174
3282
|
VALUE path, opts;
|
|
3175
|
-
|
|
3283
|
+
scan_one_required_keywords(argc, argv, &path, &opts);
|
|
3176
3284
|
StringValue(path);
|
|
3177
|
-
reject_algorithm_keyword(opts);
|
|
3178
3285
|
raise_if_path_has_null_byte(path);
|
|
3179
3286
|
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3287
|
+
mc_opts_t parsed_opts;
|
|
3288
|
+
mc_parse_opts(opts, &parsed_opts);
|
|
3289
|
+
reject_algorithm_keyword(&parsed_opts);
|
|
3290
|
+
|
|
3291
|
+
VALUE algo_sym = parsed_opts.algo;
|
|
3184
3292
|
compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
|
|
3185
3293
|
|
|
3186
3294
|
if (algo == ALGO_LZ4)
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: multi_compress
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Roman Haydarov
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|