multi_compress 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/multi_compress/multi_compress.c +53 -25
- data/lib/multi_compress/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 404264c1ba462057374ad7a6a2a72565a16e1da607e47b1d3d91b727b4e74351
|
|
4
|
+
data.tar.gz: d23f9c3af016742a7aef3880209a2e68ba5ed2107c6c3eb3943a30802606c6ac
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 91091ad2e184758916b7c79aa0ec82a3216f5d87798a7c597b43afe25b8eaa091fd7094c5de71e4eac5e01804f7b9928734514224cd0312edd1f60187d04c91b
|
|
7
|
+
data.tar.gz: 275044013ecacca6ee300ce8b980c252048f021acc2cf51e169722830add7301dbc4e9f6942c07845ea672c5e1dd9fe8074c7840b155096c3b40b2b1e3d5ebdc
|
data/CHANGELOG.md
CHANGED
|
@@ -611,7 +611,7 @@ static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_pro
|
|
|
611
611
|
return bytes_since_yield;
|
|
612
612
|
}
|
|
613
613
|
|
|
614
|
-
#define DICT_CDICT_CACHE_SIZE
|
|
614
|
+
#define DICT_CDICT_CACHE_SIZE 22
|
|
615
615
|
_Static_assert(DICT_CDICT_CACHE_SIZE > 0, "CDict cache needs at least one slot");
|
|
616
616
|
|
|
617
617
|
typedef struct {
|
|
@@ -623,6 +623,7 @@ struct dictionary_s {
|
|
|
623
623
|
compress_algo_t algo;
|
|
624
624
|
uint8_t *data;
|
|
625
625
|
size_t size;
|
|
626
|
+
pthread_mutex_t cache_mutex;
|
|
626
627
|
|
|
627
628
|
cdict_cache_entry_t cdict_cache[DICT_CDICT_CACHE_SIZE];
|
|
628
629
|
int cdict_cache_count;
|
|
@@ -632,12 +633,15 @@ struct dictionary_s {
|
|
|
632
633
|
|
|
633
634
|
static void dict_free(void *ptr) {
|
|
634
635
|
dictionary_t *dict = (dictionary_t *)ptr;
|
|
636
|
+
if (!dict)
|
|
637
|
+
return;
|
|
635
638
|
for (int i = 0; i < dict->cdict_cache_count; i++) {
|
|
636
639
|
if (dict->cdict_cache[i].cdict)
|
|
637
640
|
ZSTD_freeCDict(dict->cdict_cache[i].cdict);
|
|
638
641
|
}
|
|
639
642
|
if (dict->ddict)
|
|
640
643
|
ZSTD_freeDDict(dict->ddict);
|
|
644
|
+
pthread_mutex_destroy(&dict->cache_mutex);
|
|
641
645
|
if (dict->data)
|
|
642
646
|
xfree(dict->data);
|
|
643
647
|
xfree(dict);
|
|
@@ -666,47 +670,78 @@ static const rb_data_type_t dictionary_type = {
|
|
|
666
670
|
static VALUE dict_alloc(VALUE klass) {
|
|
667
671
|
dictionary_t *d = ALLOC(dictionary_t);
|
|
668
672
|
memset(d, 0, sizeof(dictionary_t));
|
|
673
|
+
if (pthread_mutex_init(&d->cache_mutex, NULL) != 0) {
|
|
674
|
+
xfree(d);
|
|
675
|
+
rb_raise(eMemError, "failed to initialize dictionary cache mutex");
|
|
676
|
+
}
|
|
669
677
|
return TypedData_Wrap_Struct(klass, &dictionary_type, d);
|
|
670
678
|
}
|
|
671
679
|
|
|
672
680
|
static ZSTD_CDict *dict_get_cdict(dictionary_t *dict, int level) {
|
|
681
|
+
ZSTD_CDict *existing = NULL;
|
|
682
|
+
|
|
683
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
673
684
|
for (int i = 0; i < dict->cdict_cache_count; i++) {
|
|
674
|
-
if (dict->cdict_cache[i].level == level)
|
|
675
|
-
|
|
685
|
+
if (dict->cdict_cache[i].level == level) {
|
|
686
|
+
existing = dict->cdict_cache[i].cdict;
|
|
687
|
+
break;
|
|
688
|
+
}
|
|
676
689
|
}
|
|
690
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
691
|
+
|
|
692
|
+
if (existing)
|
|
693
|
+
return existing;
|
|
677
694
|
|
|
678
695
|
ZSTD_CDict *cdict = ZSTD_createCDict(dict->data, dict->size, level);
|
|
679
696
|
if (!cdict)
|
|
680
697
|
return NULL;
|
|
681
698
|
|
|
699
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
682
700
|
for (int i = 0; i < dict->cdict_cache_count; i++) {
|
|
683
701
|
if (dict->cdict_cache[i].level == level) {
|
|
702
|
+
existing = dict->cdict_cache[i].cdict;
|
|
703
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
684
704
|
ZSTD_freeCDict(cdict);
|
|
685
|
-
return
|
|
705
|
+
return existing;
|
|
686
706
|
}
|
|
687
707
|
}
|
|
688
708
|
|
|
689
|
-
if (dict->cdict_cache_count
|
|
690
|
-
dict->
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
} else {
|
|
694
|
-
ZSTD_CDict *old_cdict = dict->cdict_cache[0].cdict;
|
|
695
|
-
memmove(&dict->cdict_cache[0], &dict->cdict_cache[1],
|
|
696
|
-
sizeof(cdict_cache_entry_t) * (DICT_CDICT_CACHE_SIZE - 1));
|
|
697
|
-
dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].level = level;
|
|
698
|
-
dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].cdict = cdict;
|
|
699
|
-
if (old_cdict)
|
|
700
|
-
ZSTD_freeCDict(old_cdict);
|
|
709
|
+
if (dict->cdict_cache_count >= DICT_CDICT_CACHE_SIZE) {
|
|
710
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
711
|
+
ZSTD_freeCDict(cdict);
|
|
712
|
+
rb_raise(eError, "zstd dictionary cdict cache exhausted");
|
|
701
713
|
}
|
|
714
|
+
|
|
715
|
+
dict->cdict_cache[dict->cdict_cache_count].level = level;
|
|
716
|
+
dict->cdict_cache[dict->cdict_cache_count].cdict = cdict;
|
|
717
|
+
dict->cdict_cache_count++;
|
|
718
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
702
719
|
return cdict;
|
|
703
720
|
}
|
|
704
721
|
|
|
705
722
|
static ZSTD_DDict *dict_get_ddict(dictionary_t *dict) {
|
|
723
|
+
ZSTD_DDict *existing;
|
|
724
|
+
|
|
725
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
726
|
+
existing = dict->ddict;
|
|
727
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
728
|
+
if (existing)
|
|
729
|
+
return existing;
|
|
730
|
+
|
|
731
|
+
ZSTD_DDict *created = ZSTD_createDDict(dict->data, dict->size);
|
|
732
|
+
if (!created)
|
|
733
|
+
return NULL;
|
|
734
|
+
|
|
735
|
+
pthread_mutex_lock(&dict->cache_mutex);
|
|
706
736
|
if (!dict->ddict) {
|
|
707
|
-
dict->ddict =
|
|
737
|
+
dict->ddict = created;
|
|
738
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
739
|
+
return created;
|
|
708
740
|
}
|
|
709
|
-
|
|
741
|
+
existing = dict->ddict;
|
|
742
|
+
pthread_mutex_unlock(&dict->cache_mutex);
|
|
743
|
+
ZSTD_freeDDict(created);
|
|
744
|
+
return existing;
|
|
710
745
|
}
|
|
711
746
|
|
|
712
747
|
typedef struct {
|
|
@@ -2867,7 +2902,6 @@ static VALUE inflater_write(VALUE self, VALUE chunk) {
|
|
|
2867
2902
|
case ALGO_LZ4: {
|
|
2868
2903
|
size_t data_len = inf->lz4_buf.len - inf->lz4_buf.offset;
|
|
2869
2904
|
size_t needed = data_len + slen;
|
|
2870
|
-
// TODO(v0.4): optional standard LZ4 frame format support via lz4frame.h
|
|
2871
2905
|
|
|
2872
2906
|
if (inf->lz4_buf.offset > 0 && needed > inf->lz4_buf.cap) {
|
|
2873
2907
|
if (data_len > 0)
|
|
@@ -3148,12 +3182,6 @@ static VALUE train_dictionary_internal(VALUE samples, VALUE size_val, compress_a
|
|
|
3148
3182
|
}
|
|
3149
3183
|
|
|
3150
3184
|
static VALUE zstd_train_dictionary(int argc, VALUE *argv, VALUE self) {
|
|
3151
|
-
// #if defined(__APPLE__) && (defined(__arm64__) || defined(__aarch64__))
|
|
3152
|
-
// rb_raise(eUnsupportedError,
|
|
3153
|
-
// "Zstd dictionary training is temporarily disabled on arm64-darwin "
|
|
3154
|
-
// "because the current vendored trainer path crashes on this platform");
|
|
3155
|
-
// #endif
|
|
3156
|
-
|
|
3157
3185
|
VALUE samples, opts;
|
|
3158
3186
|
rb_scan_args(argc, argv, "1:", &samples, &opts);
|
|
3159
3187
|
reject_algorithm_keyword(opts);
|