zstd-ruby 1.3.5.0 → 1.3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/README.md +2 -1
- data/ext/zstdruby/libzstd/BUCK +1 -0
- data/ext/zstdruby/libzstd/Makefile +25 -13
- data/ext/zstdruby/libzstd/README.md +11 -10
- data/ext/zstdruby/libzstd/common/bitstream.h +8 -11
- data/ext/zstdruby/libzstd/common/compiler.h +30 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/mem.h +20 -2
- data/ext/zstdruby/libzstd/common/xxhash.c +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +55 -48
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +290 -147
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +5 -2
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +63 -51
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +44 -33
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +125 -116
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -15
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -11
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +0 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +42 -36
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -9
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +96 -51
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +16 -6
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +169 -101
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +111 -87
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +83 -0
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +3 -3
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +728 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +34 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +9 -3
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +12 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +20 -18
- data/ext/zstdruby/libzstd/zstd.h +109 -50
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -2
@@ -29,6 +29,7 @@
|
|
29
29
|
#include "mem.h" /* read */
|
30
30
|
#include "pool.h"
|
31
31
|
#include "threading.h"
|
32
|
+
#include "cover.h"
|
32
33
|
#include "zstd_internal.h" /* includes zstd.h */
|
33
34
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
34
35
|
#define ZDICT_STATIC_LINKING_ONLY
|
@@ -39,6 +40,7 @@
|
|
39
40
|
* Constants
|
40
41
|
***************************************/
|
41
42
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
43
|
+
#define DEFAULT_SPLITPOINT 1.0
|
42
44
|
|
43
45
|
/*-*************************************
|
44
46
|
* Console display
|
@@ -184,7 +186,7 @@ static void COVER_map_remove(COVER_map_t *map, U32 key) {
|
|
184
186
|
}
|
185
187
|
|
186
188
|
/**
|
187
|
-
*
|
189
|
+
* Destroys a map that is inited with COVER_map_init().
|
188
190
|
*/
|
189
191
|
static void COVER_map_destroy(COVER_map_t *map) {
|
190
192
|
if (map->data) {
|
@@ -203,6 +205,8 @@ typedef struct {
|
|
203
205
|
size_t *offsets;
|
204
206
|
const size_t *samplesSizes;
|
205
207
|
size_t nbSamples;
|
208
|
+
size_t nbTrainSamples;
|
209
|
+
size_t nbTestSamples;
|
206
210
|
U32 *suffix;
|
207
211
|
size_t suffixSize;
|
208
212
|
U32 *freqs;
|
@@ -220,9 +224,9 @@ static COVER_ctx_t *g_ctx = NULL;
|
|
220
224
|
/**
|
221
225
|
* Returns the sum of the sample sizes.
|
222
226
|
*/
|
223
|
-
|
227
|
+
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
224
228
|
size_t sum = 0;
|
225
|
-
|
229
|
+
unsigned i;
|
226
230
|
for (i = 0; i < nbSamples; ++i) {
|
227
231
|
sum += samplesSizes[i];
|
228
232
|
}
|
@@ -377,14 +381,6 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
|
377
381
|
ctx->suffix[dmerId] = freq;
|
378
382
|
}
|
379
383
|
|
380
|
-
/**
|
381
|
-
* A segment is a range in the source as well as the score of the segment.
|
382
|
-
*/
|
383
|
-
typedef struct {
|
384
|
-
U32 begin;
|
385
|
-
U32 end;
|
386
|
-
U32 score;
|
387
|
-
} COVER_segment_t;
|
388
384
|
|
389
385
|
/**
|
390
386
|
* Selects the best segment in an epoch.
|
@@ -494,6 +490,10 @@ static int COVER_checkParameters(ZDICT_cover_params_t parameters,
|
|
494
490
|
if (parameters.d > parameters.k) {
|
495
491
|
return 0;
|
496
492
|
}
|
493
|
+
/* 0 < splitPoint <= 1 */
|
494
|
+
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
|
495
|
+
return 0;
|
496
|
+
}
|
497
497
|
return 1;
|
498
498
|
}
|
499
499
|
|
@@ -531,9 +531,14 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
531
531
|
*/
|
532
532
|
static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
533
533
|
const size_t *samplesSizes, unsigned nbSamples,
|
534
|
-
unsigned d) {
|
534
|
+
unsigned d, double splitPoint) {
|
535
535
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
536
536
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
537
|
+
/* Split samples into testing and training sets */
|
538
|
+
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
|
539
|
+
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
|
540
|
+
const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
|
541
|
+
const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
|
537
542
|
/* Checks */
|
538
543
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
539
544
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
@@ -541,15 +546,29 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
541
546
|
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
542
547
|
return 0;
|
543
548
|
}
|
549
|
+
/* Check if there are at least 5 training samples */
|
550
|
+
if (nbTrainSamples < 5) {
|
551
|
+
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
552
|
+
return 0;
|
553
|
+
}
|
554
|
+
/* Check if there's testing sample */
|
555
|
+
if (nbTestSamples < 1) {
|
556
|
+
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
557
|
+
return 0;
|
558
|
+
}
|
544
559
|
/* Zero the context */
|
545
560
|
memset(ctx, 0, sizeof(*ctx));
|
546
|
-
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n",
|
547
|
-
(U32)
|
561
|
+
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
562
|
+
(U32)trainingSamplesSize);
|
563
|
+
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
564
|
+
(U32)testSamplesSize);
|
548
565
|
ctx->samples = samples;
|
549
566
|
ctx->samplesSizes = samplesSizes;
|
550
567
|
ctx->nbSamples = nbSamples;
|
568
|
+
ctx->nbTrainSamples = nbTrainSamples;
|
569
|
+
ctx->nbTestSamples = nbTestSamples;
|
551
570
|
/* Partial suffix array */
|
552
|
-
ctx->suffixSize =
|
571
|
+
ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
|
553
572
|
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
554
573
|
/* Maps index to the dmerID */
|
555
574
|
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
@@ -563,7 +582,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
563
582
|
ctx->freqs = NULL;
|
564
583
|
ctx->d = d;
|
565
584
|
|
566
|
-
/* Fill offsets from the
|
585
|
+
/* Fill offsets from the samplesSizes */
|
567
586
|
{
|
568
587
|
U32 i;
|
569
588
|
ctx->offsets[0] = 0;
|
@@ -665,7 +684,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
665
684
|
BYTE* const dict = (BYTE*)dictBuffer;
|
666
685
|
COVER_ctx_t ctx;
|
667
686
|
COVER_map_t activeDmers;
|
668
|
-
|
687
|
+
parameters.splitPoint = 1.0;
|
669
688
|
/* Initialize global data */
|
670
689
|
g_displayLevel = parameters.zParams.notificationLevel;
|
671
690
|
/* Checks */
|
@@ -684,7 +703,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
684
703
|
}
|
685
704
|
/* Initialize context and activeDmers */
|
686
705
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
687
|
-
parameters.d)) {
|
706
|
+
parameters.d, parameters.splitPoint)) {
|
688
707
|
return ERROR(GENERIC);
|
689
708
|
}
|
690
709
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
@@ -711,28 +730,65 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
711
730
|
}
|
712
731
|
}
|
713
732
|
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
*
|
718
|
-
*
|
719
|
-
|
720
|
-
*
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
size_t
|
728
|
-
|
729
|
-
|
730
|
-
|
733
|
+
|
734
|
+
|
735
|
+
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
736
|
+
const size_t *samplesSizes, const BYTE *samples,
|
737
|
+
size_t *offsets,
|
738
|
+
size_t nbTrainSamples, size_t nbSamples,
|
739
|
+
BYTE *const dict, size_t dictBufferCapacity) {
|
740
|
+
size_t totalCompressedSize = ERROR(GENERIC);
|
741
|
+
/* Pointers */
|
742
|
+
ZSTD_CCtx *cctx;
|
743
|
+
ZSTD_CDict *cdict;
|
744
|
+
void *dst;
|
745
|
+
/* Local variables */
|
746
|
+
size_t dstCapacity;
|
747
|
+
size_t i;
|
748
|
+
/* Allocate dst with enough space to compress the maximum sized sample */
|
749
|
+
{
|
750
|
+
size_t maxSampleSize = 0;
|
751
|
+
i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
|
752
|
+
for (; i < nbSamples; ++i) {
|
753
|
+
maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
|
754
|
+
}
|
755
|
+
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
756
|
+
dst = malloc(dstCapacity);
|
757
|
+
}
|
758
|
+
/* Create the cctx and cdict */
|
759
|
+
cctx = ZSTD_createCCtx();
|
760
|
+
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
761
|
+
parameters.zParams.compressionLevel);
|
762
|
+
if (!dst || !cctx || !cdict) {
|
763
|
+
goto _compressCleanup;
|
764
|
+
}
|
765
|
+
/* Compress each sample and sum their sizes (or error) */
|
766
|
+
totalCompressedSize = dictBufferCapacity;
|
767
|
+
i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
|
768
|
+
for (; i < nbSamples; ++i) {
|
769
|
+
const size_t size = ZSTD_compress_usingCDict(
|
770
|
+
cctx, dst, dstCapacity, samples + offsets[i],
|
771
|
+
samplesSizes[i], cdict);
|
772
|
+
if (ZSTD_isError(size)) {
|
773
|
+
totalCompressedSize = ERROR(GENERIC);
|
774
|
+
goto _compressCleanup;
|
775
|
+
}
|
776
|
+
totalCompressedSize += size;
|
777
|
+
}
|
778
|
+
_compressCleanup:
|
779
|
+
ZSTD_freeCCtx(cctx);
|
780
|
+
ZSTD_freeCDict(cdict);
|
781
|
+
if (dst) {
|
782
|
+
free(dst);
|
783
|
+
}
|
784
|
+
return totalCompressedSize;
|
785
|
+
}
|
786
|
+
|
731
787
|
|
732
788
|
/**
|
733
789
|
* Initialize the `COVER_best_t`.
|
734
790
|
*/
|
735
|
-
|
791
|
+
void COVER_best_init(COVER_best_t *best) {
|
736
792
|
if (best==NULL) return; /* compatible with init on NULL */
|
737
793
|
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
738
794
|
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
@@ -746,7 +802,7 @@ static void COVER_best_init(COVER_best_t *best) {
|
|
746
802
|
/**
|
747
803
|
* Wait until liveJobs == 0.
|
748
804
|
*/
|
749
|
-
|
805
|
+
void COVER_best_wait(COVER_best_t *best) {
|
750
806
|
if (!best) {
|
751
807
|
return;
|
752
808
|
}
|
@@ -760,7 +816,7 @@ static void COVER_best_wait(COVER_best_t *best) {
|
|
760
816
|
/**
|
761
817
|
* Call COVER_best_wait() and then destroy the COVER_best_t.
|
762
818
|
*/
|
763
|
-
|
819
|
+
void COVER_best_destroy(COVER_best_t *best) {
|
764
820
|
if (!best) {
|
765
821
|
return;
|
766
822
|
}
|
@@ -776,7 +832,7 @@ static void COVER_best_destroy(COVER_best_t *best) {
|
|
776
832
|
* Called when a thread is about to be launched.
|
777
833
|
* Increments liveJobs.
|
778
834
|
*/
|
779
|
-
|
835
|
+
void COVER_best_start(COVER_best_t *best) {
|
780
836
|
if (!best) {
|
781
837
|
return;
|
782
838
|
}
|
@@ -790,7 +846,7 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
790
846
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
791
847
|
* If this dictionary is the best so far save it and its parameters.
|
792
848
|
*/
|
793
|
-
|
849
|
+
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
794
850
|
ZDICT_cover_params_t parameters, void *dict,
|
795
851
|
size_t dictSize) {
|
796
852
|
if (!best) {
|
@@ -821,10 +877,10 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
821
877
|
best->parameters = parameters;
|
822
878
|
best->compressedSize = compressedSize;
|
823
879
|
}
|
824
|
-
ZSTD_pthread_mutex_unlock(&best->mutex);
|
825
880
|
if (liveJobs == 0) {
|
826
881
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
827
882
|
}
|
883
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
828
884
|
}
|
829
885
|
}
|
830
886
|
|
@@ -839,7 +895,7 @@ typedef struct COVER_tryParameters_data_s {
|
|
839
895
|
} COVER_tryParameters_data_t;
|
840
896
|
|
841
897
|
/**
|
842
|
-
* Tries a set of parameters and
|
898
|
+
* Tries a set of parameters and updates the COVER_best_t with the results.
|
843
899
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
844
900
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
845
901
|
*/
|
@@ -870,7 +926,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
870
926
|
dictBufferCapacity, parameters);
|
871
927
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
872
928
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
873
|
-
ctx->samples, ctx->samplesSizes, (unsigned)ctx->
|
929
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
|
874
930
|
parameters.zParams);
|
875
931
|
if (ZDICT_isError(dictBufferCapacity)) {
|
876
932
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
@@ -878,49 +934,10 @@ static void COVER_tryParameters(void *opaque) {
|
|
878
934
|
}
|
879
935
|
}
|
880
936
|
/* Check total compressed size */
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
void *dst;
|
886
|
-
/* Local variables */
|
887
|
-
size_t dstCapacity;
|
888
|
-
size_t i;
|
889
|
-
/* Allocate dst with enough space to compress the maximum sized sample */
|
890
|
-
{
|
891
|
-
size_t maxSampleSize = 0;
|
892
|
-
for (i = 0; i < ctx->nbSamples; ++i) {
|
893
|
-
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
|
894
|
-
}
|
895
|
-
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
896
|
-
dst = malloc(dstCapacity);
|
897
|
-
}
|
898
|
-
/* Create the cctx and cdict */
|
899
|
-
cctx = ZSTD_createCCtx();
|
900
|
-
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
901
|
-
parameters.zParams.compressionLevel);
|
902
|
-
if (!dst || !cctx || !cdict) {
|
903
|
-
goto _compressCleanup;
|
904
|
-
}
|
905
|
-
/* Compress each sample and sum their sizes (or error) */
|
906
|
-
totalCompressedSize = dictBufferCapacity;
|
907
|
-
for (i = 0; i < ctx->nbSamples; ++i) {
|
908
|
-
const size_t size = ZSTD_compress_usingCDict(
|
909
|
-
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
910
|
-
ctx->samplesSizes[i], cdict);
|
911
|
-
if (ZSTD_isError(size)) {
|
912
|
-
totalCompressedSize = ERROR(GENERIC);
|
913
|
-
goto _compressCleanup;
|
914
|
-
}
|
915
|
-
totalCompressedSize += size;
|
916
|
-
}
|
917
|
-
_compressCleanup:
|
918
|
-
ZSTD_freeCCtx(cctx);
|
919
|
-
ZSTD_freeCDict(cdict);
|
920
|
-
if (dst) {
|
921
|
-
free(dst);
|
922
|
-
}
|
923
|
-
}
|
937
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
938
|
+
ctx->samples, ctx->offsets,
|
939
|
+
ctx->nbTrainSamples, ctx->nbSamples,
|
940
|
+
dict, dictBufferCapacity);
|
924
941
|
|
925
942
|
_cleanup:
|
926
943
|
COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
@@ -941,6 +958,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
941
958
|
ZDICT_cover_params_t *parameters) {
|
942
959
|
/* constants */
|
943
960
|
const unsigned nbThreads = parameters->nbThreads;
|
961
|
+
const double splitPoint =
|
962
|
+
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
|
944
963
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
945
964
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
946
965
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -958,6 +977,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
958
977
|
POOL_ctx *pool = NULL;
|
959
978
|
|
960
979
|
/* Checks */
|
980
|
+
if (splitPoint <= 0 || splitPoint > 1) {
|
981
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
982
|
+
return ERROR(GENERIC);
|
983
|
+
}
|
961
984
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
962
985
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
963
986
|
return ERROR(GENERIC);
|
@@ -988,7 +1011,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
988
1011
|
/* Initialize the context for this value of d */
|
989
1012
|
COVER_ctx_t ctx;
|
990
1013
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
991
|
-
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
|
1014
|
+
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
|
992
1015
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
993
1016
|
COVER_best_destroy(&best);
|
994
1017
|
POOL_free(pool);
|
@@ -1013,6 +1036,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1013
1036
|
data->parameters = *parameters;
|
1014
1037
|
data->parameters.k = k;
|
1015
1038
|
data->parameters.d = d;
|
1039
|
+
data->parameters.splitPoint = splitPoint;
|
1016
1040
|
data->parameters.steps = kSteps;
|
1017
1041
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
1018
1042
|
/* Check the parameters */
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#include <stdio.h> /* fprintf */
|
2
|
+
#include <stdlib.h> /* malloc, free, qsort */
|
3
|
+
#include <string.h> /* memset */
|
4
|
+
#include <time.h> /* clock */
|
5
|
+
#include "mem.h" /* read */
|
6
|
+
#include "pool.h"
|
7
|
+
#include "threading.h"
|
8
|
+
#include "zstd_internal.h" /* includes zstd.h */
|
9
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
10
|
+
#define ZDICT_STATIC_LINKING_ONLY
|
11
|
+
#endif
|
12
|
+
#include "zdict.h"
|
13
|
+
|
14
|
+
/**
|
15
|
+
* COVER_best_t is used for two purposes:
|
16
|
+
* 1. Synchronizing threads.
|
17
|
+
* 2. Saving the best parameters and dictionary.
|
18
|
+
*
|
19
|
+
* All of the methods except COVER_best_init() are thread safe if zstd is
|
20
|
+
* compiled with multithreaded support.
|
21
|
+
*/
|
22
|
+
typedef struct COVER_best_s {
|
23
|
+
ZSTD_pthread_mutex_t mutex;
|
24
|
+
ZSTD_pthread_cond_t cond;
|
25
|
+
size_t liveJobs;
|
26
|
+
void *dict;
|
27
|
+
size_t dictSize;
|
28
|
+
ZDICT_cover_params_t parameters;
|
29
|
+
size_t compressedSize;
|
30
|
+
} COVER_best_t;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* A segment is a range in the source as well as the score of the segment.
|
34
|
+
*/
|
35
|
+
typedef struct {
|
36
|
+
U32 begin;
|
37
|
+
U32 end;
|
38
|
+
U32 score;
|
39
|
+
} COVER_segment_t;
|
40
|
+
|
41
|
+
/**
|
42
|
+
* Checks total compressed size of a dictionary
|
43
|
+
*/
|
44
|
+
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
45
|
+
const size_t *samplesSizes, const BYTE *samples,
|
46
|
+
size_t *offsets,
|
47
|
+
size_t nbTrainSamples, size_t nbSamples,
|
48
|
+
BYTE *const dict, size_t dictBufferCapacity);
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Returns the sum of the sample sizes.
|
52
|
+
*/
|
53
|
+
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
|
54
|
+
|
55
|
+
/**
|
56
|
+
* Initialize the `COVER_best_t`.
|
57
|
+
*/
|
58
|
+
void COVER_best_init(COVER_best_t *best);
|
59
|
+
|
60
|
+
/**
|
61
|
+
* Wait until liveJobs == 0.
|
62
|
+
*/
|
63
|
+
void COVER_best_wait(COVER_best_t *best);
|
64
|
+
|
65
|
+
/**
|
66
|
+
* Call COVER_best_wait() and then destroy the COVER_best_t.
|
67
|
+
*/
|
68
|
+
void COVER_best_destroy(COVER_best_t *best);
|
69
|
+
|
70
|
+
/**
|
71
|
+
* Called when a thread is about to be launched.
|
72
|
+
* Increments liveJobs.
|
73
|
+
*/
|
74
|
+
void COVER_best_start(COVER_best_t *best);
|
75
|
+
|
76
|
+
/**
|
77
|
+
* Called when a thread finishes executing, both on error or success.
|
78
|
+
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
79
|
+
* If this dictionary is the best so far save it and its parameters.
|
80
|
+
*/
|
81
|
+
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
82
|
+
ZDICT_cover_params_t parameters, void *dict,
|
83
|
+
size_t dictSize);
|