zstd-ruby 1.3.5.0 → 1.3.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/README.md +2 -1
- data/ext/zstdruby/libzstd/BUCK +1 -0
- data/ext/zstdruby/libzstd/Makefile +25 -13
- data/ext/zstdruby/libzstd/README.md +11 -10
- data/ext/zstdruby/libzstd/common/bitstream.h +8 -11
- data/ext/zstdruby/libzstd/common/compiler.h +30 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/mem.h +20 -2
- data/ext/zstdruby/libzstd/common/xxhash.c +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +55 -48
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +290 -147
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +5 -2
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +63 -51
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +44 -33
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -4
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +125 -116
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -15
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -11
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +0 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +42 -36
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -9
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +96 -51
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +16 -6
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +169 -101
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +111 -87
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +83 -0
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +3 -3
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +728 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +34 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +9 -3
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +12 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +20 -18
- data/ext/zstdruby/libzstd/zstd.h +109 -50
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -2
@@ -29,6 +29,7 @@
|
|
29
29
|
#include "mem.h" /* read */
|
30
30
|
#include "pool.h"
|
31
31
|
#include "threading.h"
|
32
|
+
#include "cover.h"
|
32
33
|
#include "zstd_internal.h" /* includes zstd.h */
|
33
34
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
34
35
|
#define ZDICT_STATIC_LINKING_ONLY
|
@@ -39,6 +40,7 @@
|
|
39
40
|
* Constants
|
40
41
|
***************************************/
|
41
42
|
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
43
|
+
#define DEFAULT_SPLITPOINT 1.0
|
42
44
|
|
43
45
|
/*-*************************************
|
44
46
|
* Console display
|
@@ -184,7 +186,7 @@ static void COVER_map_remove(COVER_map_t *map, U32 key) {
|
|
184
186
|
}
|
185
187
|
|
186
188
|
/**
|
187
|
-
*
|
189
|
+
* Destroys a map that is inited with COVER_map_init().
|
188
190
|
*/
|
189
191
|
static void COVER_map_destroy(COVER_map_t *map) {
|
190
192
|
if (map->data) {
|
@@ -203,6 +205,8 @@ typedef struct {
|
|
203
205
|
size_t *offsets;
|
204
206
|
const size_t *samplesSizes;
|
205
207
|
size_t nbSamples;
|
208
|
+
size_t nbTrainSamples;
|
209
|
+
size_t nbTestSamples;
|
206
210
|
U32 *suffix;
|
207
211
|
size_t suffixSize;
|
208
212
|
U32 *freqs;
|
@@ -220,9 +224,9 @@ static COVER_ctx_t *g_ctx = NULL;
|
|
220
224
|
/**
|
221
225
|
* Returns the sum of the sample sizes.
|
222
226
|
*/
|
223
|
-
|
227
|
+
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
224
228
|
size_t sum = 0;
|
225
|
-
|
229
|
+
unsigned i;
|
226
230
|
for (i = 0; i < nbSamples; ++i) {
|
227
231
|
sum += samplesSizes[i];
|
228
232
|
}
|
@@ -377,14 +381,6 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
|
377
381
|
ctx->suffix[dmerId] = freq;
|
378
382
|
}
|
379
383
|
|
380
|
-
/**
|
381
|
-
* A segment is a range in the source as well as the score of the segment.
|
382
|
-
*/
|
383
|
-
typedef struct {
|
384
|
-
U32 begin;
|
385
|
-
U32 end;
|
386
|
-
U32 score;
|
387
|
-
} COVER_segment_t;
|
388
384
|
|
389
385
|
/**
|
390
386
|
* Selects the best segment in an epoch.
|
@@ -494,6 +490,10 @@ static int COVER_checkParameters(ZDICT_cover_params_t parameters,
|
|
494
490
|
if (parameters.d > parameters.k) {
|
495
491
|
return 0;
|
496
492
|
}
|
493
|
+
/* 0 < splitPoint <= 1 */
|
494
|
+
if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
|
495
|
+
return 0;
|
496
|
+
}
|
497
497
|
return 1;
|
498
498
|
}
|
499
499
|
|
@@ -531,9 +531,14 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
531
531
|
*/
|
532
532
|
static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
533
533
|
const size_t *samplesSizes, unsigned nbSamples,
|
534
|
-
unsigned d) {
|
534
|
+
unsigned d, double splitPoint) {
|
535
535
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
536
536
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
537
|
+
/* Split samples into testing and training sets */
|
538
|
+
const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
|
539
|
+
const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
|
540
|
+
const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
|
541
|
+
const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
|
537
542
|
/* Checks */
|
538
543
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
539
544
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
@@ -541,15 +546,29 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
541
546
|
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
542
547
|
return 0;
|
543
548
|
}
|
549
|
+
/* Check if there are at least 5 training samples */
|
550
|
+
if (nbTrainSamples < 5) {
|
551
|
+
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
552
|
+
return 0;
|
553
|
+
}
|
554
|
+
/* Check if there's testing sample */
|
555
|
+
if (nbTestSamples < 1) {
|
556
|
+
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
557
|
+
return 0;
|
558
|
+
}
|
544
559
|
/* Zero the context */
|
545
560
|
memset(ctx, 0, sizeof(*ctx));
|
546
|
-
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n",
|
547
|
-
(U32)
|
561
|
+
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
562
|
+
(U32)trainingSamplesSize);
|
563
|
+
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
564
|
+
(U32)testSamplesSize);
|
548
565
|
ctx->samples = samples;
|
549
566
|
ctx->samplesSizes = samplesSizes;
|
550
567
|
ctx->nbSamples = nbSamples;
|
568
|
+
ctx->nbTrainSamples = nbTrainSamples;
|
569
|
+
ctx->nbTestSamples = nbTestSamples;
|
551
570
|
/* Partial suffix array */
|
552
|
-
ctx->suffixSize =
|
571
|
+
ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
|
553
572
|
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
554
573
|
/* Maps index to the dmerID */
|
555
574
|
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
@@ -563,7 +582,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
563
582
|
ctx->freqs = NULL;
|
564
583
|
ctx->d = d;
|
565
584
|
|
566
|
-
/* Fill offsets from the
|
585
|
+
/* Fill offsets from the samplesSizes */
|
567
586
|
{
|
568
587
|
U32 i;
|
569
588
|
ctx->offsets[0] = 0;
|
@@ -665,7 +684,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
665
684
|
BYTE* const dict = (BYTE*)dictBuffer;
|
666
685
|
COVER_ctx_t ctx;
|
667
686
|
COVER_map_t activeDmers;
|
668
|
-
|
687
|
+
parameters.splitPoint = 1.0;
|
669
688
|
/* Initialize global data */
|
670
689
|
g_displayLevel = parameters.zParams.notificationLevel;
|
671
690
|
/* Checks */
|
@@ -684,7 +703,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
684
703
|
}
|
685
704
|
/* Initialize context and activeDmers */
|
686
705
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
687
|
-
parameters.d)) {
|
706
|
+
parameters.d, parameters.splitPoint)) {
|
688
707
|
return ERROR(GENERIC);
|
689
708
|
}
|
690
709
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
@@ -711,28 +730,65 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
711
730
|
}
|
712
731
|
}
|
713
732
|
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
*
|
718
|
-
*
|
719
|
-
|
720
|
-
*
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
size_t
|
728
|
-
|
729
|
-
|
730
|
-
|
733
|
+
|
734
|
+
|
735
|
+
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
736
|
+
const size_t *samplesSizes, const BYTE *samples,
|
737
|
+
size_t *offsets,
|
738
|
+
size_t nbTrainSamples, size_t nbSamples,
|
739
|
+
BYTE *const dict, size_t dictBufferCapacity) {
|
740
|
+
size_t totalCompressedSize = ERROR(GENERIC);
|
741
|
+
/* Pointers */
|
742
|
+
ZSTD_CCtx *cctx;
|
743
|
+
ZSTD_CDict *cdict;
|
744
|
+
void *dst;
|
745
|
+
/* Local variables */
|
746
|
+
size_t dstCapacity;
|
747
|
+
size_t i;
|
748
|
+
/* Allocate dst with enough space to compress the maximum sized sample */
|
749
|
+
{
|
750
|
+
size_t maxSampleSize = 0;
|
751
|
+
i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
|
752
|
+
for (; i < nbSamples; ++i) {
|
753
|
+
maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
|
754
|
+
}
|
755
|
+
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
756
|
+
dst = malloc(dstCapacity);
|
757
|
+
}
|
758
|
+
/* Create the cctx and cdict */
|
759
|
+
cctx = ZSTD_createCCtx();
|
760
|
+
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
761
|
+
parameters.zParams.compressionLevel);
|
762
|
+
if (!dst || !cctx || !cdict) {
|
763
|
+
goto _compressCleanup;
|
764
|
+
}
|
765
|
+
/* Compress each sample and sum their sizes (or error) */
|
766
|
+
totalCompressedSize = dictBufferCapacity;
|
767
|
+
i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
|
768
|
+
for (; i < nbSamples; ++i) {
|
769
|
+
const size_t size = ZSTD_compress_usingCDict(
|
770
|
+
cctx, dst, dstCapacity, samples + offsets[i],
|
771
|
+
samplesSizes[i], cdict);
|
772
|
+
if (ZSTD_isError(size)) {
|
773
|
+
totalCompressedSize = ERROR(GENERIC);
|
774
|
+
goto _compressCleanup;
|
775
|
+
}
|
776
|
+
totalCompressedSize += size;
|
777
|
+
}
|
778
|
+
_compressCleanup:
|
779
|
+
ZSTD_freeCCtx(cctx);
|
780
|
+
ZSTD_freeCDict(cdict);
|
781
|
+
if (dst) {
|
782
|
+
free(dst);
|
783
|
+
}
|
784
|
+
return totalCompressedSize;
|
785
|
+
}
|
786
|
+
|
731
787
|
|
732
788
|
/**
|
733
789
|
* Initialize the `COVER_best_t`.
|
734
790
|
*/
|
735
|
-
|
791
|
+
void COVER_best_init(COVER_best_t *best) {
|
736
792
|
if (best==NULL) return; /* compatible with init on NULL */
|
737
793
|
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
738
794
|
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
@@ -746,7 +802,7 @@ static void COVER_best_init(COVER_best_t *best) {
|
|
746
802
|
/**
|
747
803
|
* Wait until liveJobs == 0.
|
748
804
|
*/
|
749
|
-
|
805
|
+
void COVER_best_wait(COVER_best_t *best) {
|
750
806
|
if (!best) {
|
751
807
|
return;
|
752
808
|
}
|
@@ -760,7 +816,7 @@ static void COVER_best_wait(COVER_best_t *best) {
|
|
760
816
|
/**
|
761
817
|
* Call COVER_best_wait() and then destroy the COVER_best_t.
|
762
818
|
*/
|
763
|
-
|
819
|
+
void COVER_best_destroy(COVER_best_t *best) {
|
764
820
|
if (!best) {
|
765
821
|
return;
|
766
822
|
}
|
@@ -776,7 +832,7 @@ static void COVER_best_destroy(COVER_best_t *best) {
|
|
776
832
|
* Called when a thread is about to be launched.
|
777
833
|
* Increments liveJobs.
|
778
834
|
*/
|
779
|
-
|
835
|
+
void COVER_best_start(COVER_best_t *best) {
|
780
836
|
if (!best) {
|
781
837
|
return;
|
782
838
|
}
|
@@ -790,7 +846,7 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
790
846
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
791
847
|
* If this dictionary is the best so far save it and its parameters.
|
792
848
|
*/
|
793
|
-
|
849
|
+
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
794
850
|
ZDICT_cover_params_t parameters, void *dict,
|
795
851
|
size_t dictSize) {
|
796
852
|
if (!best) {
|
@@ -821,10 +877,10 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
821
877
|
best->parameters = parameters;
|
822
878
|
best->compressedSize = compressedSize;
|
823
879
|
}
|
824
|
-
ZSTD_pthread_mutex_unlock(&best->mutex);
|
825
880
|
if (liveJobs == 0) {
|
826
881
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
827
882
|
}
|
883
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
828
884
|
}
|
829
885
|
}
|
830
886
|
|
@@ -839,7 +895,7 @@ typedef struct COVER_tryParameters_data_s {
|
|
839
895
|
} COVER_tryParameters_data_t;
|
840
896
|
|
841
897
|
/**
|
842
|
-
* Tries a set of parameters and
|
898
|
+
* Tries a set of parameters and updates the COVER_best_t with the results.
|
843
899
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
844
900
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
845
901
|
*/
|
@@ -870,7 +926,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
870
926
|
dictBufferCapacity, parameters);
|
871
927
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
872
928
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
873
|
-
ctx->samples, ctx->samplesSizes, (unsigned)ctx->
|
929
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
|
874
930
|
parameters.zParams);
|
875
931
|
if (ZDICT_isError(dictBufferCapacity)) {
|
876
932
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
@@ -878,49 +934,10 @@ static void COVER_tryParameters(void *opaque) {
|
|
878
934
|
}
|
879
935
|
}
|
880
936
|
/* Check total compressed size */
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
void *dst;
|
886
|
-
/* Local variables */
|
887
|
-
size_t dstCapacity;
|
888
|
-
size_t i;
|
889
|
-
/* Allocate dst with enough space to compress the maximum sized sample */
|
890
|
-
{
|
891
|
-
size_t maxSampleSize = 0;
|
892
|
-
for (i = 0; i < ctx->nbSamples; ++i) {
|
893
|
-
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
|
894
|
-
}
|
895
|
-
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
896
|
-
dst = malloc(dstCapacity);
|
897
|
-
}
|
898
|
-
/* Create the cctx and cdict */
|
899
|
-
cctx = ZSTD_createCCtx();
|
900
|
-
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
901
|
-
parameters.zParams.compressionLevel);
|
902
|
-
if (!dst || !cctx || !cdict) {
|
903
|
-
goto _compressCleanup;
|
904
|
-
}
|
905
|
-
/* Compress each sample and sum their sizes (or error) */
|
906
|
-
totalCompressedSize = dictBufferCapacity;
|
907
|
-
for (i = 0; i < ctx->nbSamples; ++i) {
|
908
|
-
const size_t size = ZSTD_compress_usingCDict(
|
909
|
-
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
910
|
-
ctx->samplesSizes[i], cdict);
|
911
|
-
if (ZSTD_isError(size)) {
|
912
|
-
totalCompressedSize = ERROR(GENERIC);
|
913
|
-
goto _compressCleanup;
|
914
|
-
}
|
915
|
-
totalCompressedSize += size;
|
916
|
-
}
|
917
|
-
_compressCleanup:
|
918
|
-
ZSTD_freeCCtx(cctx);
|
919
|
-
ZSTD_freeCDict(cdict);
|
920
|
-
if (dst) {
|
921
|
-
free(dst);
|
922
|
-
}
|
923
|
-
}
|
937
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
938
|
+
ctx->samples, ctx->offsets,
|
939
|
+
ctx->nbTrainSamples, ctx->nbSamples,
|
940
|
+
dict, dictBufferCapacity);
|
924
941
|
|
925
942
|
_cleanup:
|
926
943
|
COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
@@ -941,6 +958,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
941
958
|
ZDICT_cover_params_t *parameters) {
|
942
959
|
/* constants */
|
943
960
|
const unsigned nbThreads = parameters->nbThreads;
|
961
|
+
const double splitPoint =
|
962
|
+
parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
|
944
963
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
945
964
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
946
965
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -958,6 +977,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
958
977
|
POOL_ctx *pool = NULL;
|
959
978
|
|
960
979
|
/* Checks */
|
980
|
+
if (splitPoint <= 0 || splitPoint > 1) {
|
981
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
982
|
+
return ERROR(GENERIC);
|
983
|
+
}
|
961
984
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
962
985
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
963
986
|
return ERROR(GENERIC);
|
@@ -988,7 +1011,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
988
1011
|
/* Initialize the context for this value of d */
|
989
1012
|
COVER_ctx_t ctx;
|
990
1013
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
991
|
-
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
|
1014
|
+
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
|
992
1015
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
993
1016
|
COVER_best_destroy(&best);
|
994
1017
|
POOL_free(pool);
|
@@ -1013,6 +1036,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1013
1036
|
data->parameters = *parameters;
|
1014
1037
|
data->parameters.k = k;
|
1015
1038
|
data->parameters.d = d;
|
1039
|
+
data->parameters.splitPoint = splitPoint;
|
1016
1040
|
data->parameters.steps = kSteps;
|
1017
1041
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
1018
1042
|
/* Check the parameters */
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#include <stdio.h> /* fprintf */
|
2
|
+
#include <stdlib.h> /* malloc, free, qsort */
|
3
|
+
#include <string.h> /* memset */
|
4
|
+
#include <time.h> /* clock */
|
5
|
+
#include "mem.h" /* read */
|
6
|
+
#include "pool.h"
|
7
|
+
#include "threading.h"
|
8
|
+
#include "zstd_internal.h" /* includes zstd.h */
|
9
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
10
|
+
#define ZDICT_STATIC_LINKING_ONLY
|
11
|
+
#endif
|
12
|
+
#include "zdict.h"
|
13
|
+
|
14
|
+
/**
|
15
|
+
* COVER_best_t is used for two purposes:
|
16
|
+
* 1. Synchronizing threads.
|
17
|
+
* 2. Saving the best parameters and dictionary.
|
18
|
+
*
|
19
|
+
* All of the methods except COVER_best_init() are thread safe if zstd is
|
20
|
+
* compiled with multithreaded support.
|
21
|
+
*/
|
22
|
+
typedef struct COVER_best_s {
|
23
|
+
ZSTD_pthread_mutex_t mutex;
|
24
|
+
ZSTD_pthread_cond_t cond;
|
25
|
+
size_t liveJobs;
|
26
|
+
void *dict;
|
27
|
+
size_t dictSize;
|
28
|
+
ZDICT_cover_params_t parameters;
|
29
|
+
size_t compressedSize;
|
30
|
+
} COVER_best_t;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* A segment is a range in the source as well as the score of the segment.
|
34
|
+
*/
|
35
|
+
typedef struct {
|
36
|
+
U32 begin;
|
37
|
+
U32 end;
|
38
|
+
U32 score;
|
39
|
+
} COVER_segment_t;
|
40
|
+
|
41
|
+
/**
|
42
|
+
* Checks total compressed size of a dictionary
|
43
|
+
*/
|
44
|
+
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
45
|
+
const size_t *samplesSizes, const BYTE *samples,
|
46
|
+
size_t *offsets,
|
47
|
+
size_t nbTrainSamples, size_t nbSamples,
|
48
|
+
BYTE *const dict, size_t dictBufferCapacity);
|
49
|
+
|
50
|
+
/**
|
51
|
+
* Returns the sum of the sample sizes.
|
52
|
+
*/
|
53
|
+
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
|
54
|
+
|
55
|
+
/**
|
56
|
+
* Initialize the `COVER_best_t`.
|
57
|
+
*/
|
58
|
+
void COVER_best_init(COVER_best_t *best);
|
59
|
+
|
60
|
+
/**
|
61
|
+
* Wait until liveJobs == 0.
|
62
|
+
*/
|
63
|
+
void COVER_best_wait(COVER_best_t *best);
|
64
|
+
|
65
|
+
/**
|
66
|
+
* Call COVER_best_wait() and then destroy the COVER_best_t.
|
67
|
+
*/
|
68
|
+
void COVER_best_destroy(COVER_best_t *best);
|
69
|
+
|
70
|
+
/**
|
71
|
+
* Called when a thread is about to be launched.
|
72
|
+
* Increments liveJobs.
|
73
|
+
*/
|
74
|
+
void COVER_best_start(COVER_best_t *best);
|
75
|
+
|
76
|
+
/**
|
77
|
+
* Called when a thread finishes executing, both on error or success.
|
78
|
+
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
79
|
+
* If this dictionary is the best so far save it and its parameters.
|
80
|
+
*/
|
81
|
+
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
82
|
+
ZDICT_cover_params_t parameters, void *dict,
|
83
|
+
size_t dictSize);
|