zstd-ruby 1.2.0.0 → 1.3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +7 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
- data/ext/zstdruby/libzstd/common/error_private.c +4 -1
- data/ext/zstdruby/libzstd/common/huf.h +20 -0
- data/ext/zstdruby/libzstd/common/mem.h +0 -14
- data/ext/zstdruby/libzstd/common/pool.c +12 -0
- data/ext/zstdruby/libzstd/common/pool.h +5 -0
- data/ext/zstdruby/libzstd/common/threading.c +0 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
- data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
- data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
- data/ext/zstdruby/libzstd/zstd.h +507 -166
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -398,7 +398,8 @@ typedef struct {
|
|
398
398
|
*/
|
399
399
|
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
400
400
|
COVER_map_t *activeDmers, U32 begin,
|
401
|
-
U32 end,
|
401
|
+
U32 end,
|
402
|
+
ZDICT_cover_params_t parameters) {
|
402
403
|
/* Constants */
|
403
404
|
const U32 k = parameters.k;
|
404
405
|
const U32 d = parameters.d;
|
@@ -478,7 +479,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
|
478
479
|
* Check the validity of the parameters.
|
479
480
|
* Returns non-zero if the parameters are valid and 0 otherwise.
|
480
481
|
*/
|
481
|
-
static int COVER_checkParameters(
|
482
|
+
static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
|
482
483
|
/* k and d are required parameters */
|
483
484
|
if (parameters.d == 0 || parameters.k == 0) {
|
484
485
|
return 0;
|
@@ -600,7 +601,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
600
601
|
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
601
602
|
COVER_map_t *activeDmers, void *dictBuffer,
|
602
603
|
size_t dictBufferCapacity,
|
603
|
-
|
604
|
+
ZDICT_cover_params_t parameters) {
|
604
605
|
BYTE *const dict = (BYTE *)dictBuffer;
|
605
606
|
size_t tail = dictBufferCapacity;
|
606
607
|
/* Divide the data up into epochs of equal size.
|
@@ -639,22 +640,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
639
640
|
return tail;
|
640
641
|
}
|
641
642
|
|
642
|
-
|
643
|
-
* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
|
644
|
-
* dictionary.
|
645
|
-
*/
|
646
|
-
static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
647
|
-
ZDICT_params_t zdictParams;
|
648
|
-
memset(&zdictParams, 0, sizeof(zdictParams));
|
649
|
-
zdictParams.notificationLevel = 1;
|
650
|
-
zdictParams.dictID = parameters.dictID;
|
651
|
-
zdictParams.compressionLevel = parameters.compressionLevel;
|
652
|
-
return zdictParams;
|
653
|
-
}
|
654
|
-
|
655
|
-
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
643
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
656
644
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
657
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
645
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
646
|
+
ZDICT_cover_params_t parameters) {
|
658
647
|
BYTE *const dict = (BYTE *)dictBuffer;
|
659
648
|
COVER_ctx_t ctx;
|
660
649
|
COVER_map_t activeDmers;
|
@@ -673,7 +662,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
673
662
|
return ERROR(dstSize_tooSmall);
|
674
663
|
}
|
675
664
|
/* Initialize global data */
|
676
|
-
g_displayLevel = parameters.notificationLevel;
|
665
|
+
g_displayLevel = parameters.zParams.notificationLevel;
|
677
666
|
/* Initialize context and activeDmers */
|
678
667
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
679
668
|
parameters.d)) {
|
@@ -690,10 +679,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
690
679
|
const size_t tail =
|
691
680
|
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
692
681
|
dictBufferCapacity, parameters);
|
693
|
-
ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
694
682
|
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
695
683
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
696
|
-
samplesBuffer, samplesSizes, nbSamples,
|
684
|
+
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
697
685
|
if (!ZSTD_isError(dictionarySize)) {
|
698
686
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
699
687
|
(U32)dictionarySize);
|
@@ -718,7 +706,7 @@ typedef struct COVER_best_s {
|
|
718
706
|
size_t liveJobs;
|
719
707
|
void *dict;
|
720
708
|
size_t dictSize;
|
721
|
-
|
709
|
+
ZDICT_cover_params_t parameters;
|
722
710
|
size_t compressedSize;
|
723
711
|
} COVER_best_t;
|
724
712
|
|
@@ -786,7 +774,7 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
786
774
|
* If this dictionary is the best so far save it and its parameters.
|
787
775
|
*/
|
788
776
|
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
789
|
-
|
777
|
+
ZDICT_cover_params_t parameters, void *dict,
|
790
778
|
size_t dictSize) {
|
791
779
|
if (!best) {
|
792
780
|
return;
|
@@ -830,7 +818,7 @@ typedef struct COVER_tryParameters_data_s {
|
|
830
818
|
const COVER_ctx_t *ctx;
|
831
819
|
COVER_best_t *best;
|
832
820
|
size_t dictBufferCapacity;
|
833
|
-
|
821
|
+
ZDICT_cover_params_t parameters;
|
834
822
|
} COVER_tryParameters_data_t;
|
835
823
|
|
836
824
|
/**
|
@@ -842,7 +830,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
842
830
|
/* Save parameters as local variables */
|
843
831
|
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
844
832
|
const COVER_ctx_t *const ctx = data->ctx;
|
845
|
-
const
|
833
|
+
const ZDICT_cover_params_t parameters = data->parameters;
|
846
834
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
847
835
|
size_t totalCompressedSize = ERROR(GENERIC);
|
848
836
|
/* Allocate space for hash table, dict, and freqs */
|
@@ -863,10 +851,10 @@ static void COVER_tryParameters(void *opaque) {
|
|
863
851
|
{
|
864
852
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
865
853
|
dictBufferCapacity, parameters);
|
866
|
-
const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
867
854
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
868
855
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
869
|
-
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
856
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
857
|
+
parameters.zParams);
|
870
858
|
if (ZDICT_isError(dictBufferCapacity)) {
|
871
859
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
872
860
|
goto _cleanup;
|
@@ -892,8 +880,8 @@ static void COVER_tryParameters(void *opaque) {
|
|
892
880
|
}
|
893
881
|
/* Create the cctx and cdict */
|
894
882
|
cctx = ZSTD_createCCtx();
|
895
|
-
cdict =
|
896
|
-
|
883
|
+
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
884
|
+
parameters.zParams.compressionLevel);
|
897
885
|
if (!dst || !cctx || !cdict) {
|
898
886
|
goto _compressCleanup;
|
899
887
|
}
|
@@ -930,12 +918,10 @@ _cleanup:
|
|
930
918
|
}
|
931
919
|
}
|
932
920
|
|
933
|
-
ZDICTLIB_API size_t
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
unsigned nbSamples,
|
938
|
-
COVER_params_t *parameters) {
|
921
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
922
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
923
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
924
|
+
ZDICT_cover_params_t *parameters) {
|
939
925
|
/* constants */
|
940
926
|
const unsigned nbThreads = parameters->nbThreads;
|
941
927
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
@@ -947,7 +933,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
947
933
|
const unsigned kIterations =
|
948
934
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
949
935
|
/* Local variables */
|
950
|
-
const int displayLevel = parameters->notificationLevel;
|
936
|
+
const int displayLevel = parameters->zParams.notificationLevel;
|
951
937
|
unsigned iteration = 1;
|
952
938
|
unsigned d;
|
953
939
|
unsigned k;
|
@@ -976,7 +962,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
976
962
|
/* Initialization */
|
977
963
|
COVER_best_init(&best);
|
978
964
|
/* Turn down global display level to clean up display at level 2 and below */
|
979
|
-
g_displayLevel = parameters->notificationLevel - 1;
|
965
|
+
g_displayLevel = parameters->zParams.notificationLevel - 1;
|
980
966
|
/* Loop through d first because each new value needs a new context */
|
981
967
|
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
982
968
|
kIterations);
|
@@ -94,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
|
|
94
94
|
unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
95
95
|
{
|
96
96
|
if (dictSize < 8) return 0;
|
97
|
-
if (MEM_readLE32(dictBuffer) !=
|
97
|
+
if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
|
98
98
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
99
99
|
}
|
100
100
|
|
@@ -487,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|
487
487
|
}
|
488
488
|
|
489
489
|
|
490
|
-
static size_t
|
490
|
+
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
491
491
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
492
492
|
const size_t* fileSizes, unsigned nbFiles,
|
493
493
|
U32 minRatio, U32 notificationLevel)
|
@@ -576,7 +576,7 @@ typedef struct
|
|
576
576
|
{
|
577
577
|
ZSTD_CCtx* ref;
|
578
578
|
ZSTD_CCtx* zc;
|
579
|
-
void* workPlace; /* must be
|
579
|
+
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
580
580
|
} EStats_ress_t;
|
581
581
|
|
582
582
|
#define MAXREPOFFSET 1024
|
@@ -585,14 +585,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
585
585
|
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
586
586
|
const void* src, size_t srcSize, U32 notificationLevel)
|
587
587
|
{
|
588
|
-
size_t const blockSizeMax = MIN (
|
588
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
589
589
|
size_t cSize;
|
590
590
|
|
591
591
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
592
592
|
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
|
593
593
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
|
594
594
|
}
|
595
|
-
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace,
|
595
|
+
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
596
596
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
597
597
|
|
598
598
|
if (cSize) { /* if == 0; block is not compressible */
|
@@ -634,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
634
634
|
} } }
|
635
635
|
}
|
636
636
|
|
637
|
-
/*
|
638
|
-
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
639
|
-
{
|
640
|
-
unsigned u;
|
641
|
-
size_t max=0;
|
642
|
-
for (u=0; u<nbFiles; u++)
|
643
|
-
if (max < fileSizes[u]) max = fileSizes[u];
|
644
|
-
return max;
|
645
|
-
}
|
646
|
-
*/
|
647
|
-
|
648
637
|
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
649
638
|
{
|
650
639
|
size_t total=0;
|
@@ -700,7 +689,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
700
689
|
/* init */
|
701
690
|
esr.ref = ZSTD_createCCtx();
|
702
691
|
esr.zc = ZSTD_createCCtx();
|
703
|
-
esr.workPlace = malloc(
|
692
|
+
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
704
693
|
if (!esr.ref || !esr.zc || !esr.workPlace) {
|
705
694
|
eSize = ERROR(memory_allocation);
|
706
695
|
DISPLAYLEVEL(1, "Not enough memory \n");
|
@@ -865,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
865
854
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
866
855
|
|
867
856
|
/* dictionary header */
|
868
|
-
MEM_writeLE32(header,
|
857
|
+
MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
|
869
858
|
{ U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
|
870
859
|
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
871
860
|
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
@@ -917,7 +906,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
917
906
|
}
|
918
907
|
|
919
908
|
/* add dictionary header (after entropy tables) */
|
920
|
-
MEM_writeLE32(dictBuffer,
|
909
|
+
MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
|
921
910
|
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
922
911
|
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
923
912
|
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
@@ -930,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
930
919
|
}
|
931
920
|
|
932
921
|
|
933
|
-
/*!
|
922
|
+
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
934
923
|
* Warning : `samplesBuffer` must be followed by noisy guard band.
|
935
924
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
936
925
|
*/
|
937
|
-
size_t
|
926
|
+
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
938
927
|
void* dictBuffer, size_t maxDictSize,
|
939
928
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
940
|
-
|
929
|
+
ZDICT_legacy_params_t params)
|
941
930
|
{
|
942
931
|
U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
|
943
932
|
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
@@ -946,7 +935,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
946
935
|
size_t const targetDictSize = maxDictSize;
|
947
936
|
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
948
937
|
size_t dictSize = 0;
|
949
|
-
U32 const notificationLevel = params.notificationLevel;
|
938
|
+
U32 const notificationLevel = params.zParams.notificationLevel;
|
950
939
|
|
951
940
|
/* checks */
|
952
941
|
if (!dictList) return ERROR(memory_allocation);
|
@@ -957,13 +946,13 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
957
946
|
ZDICT_initDictItem(dictList);
|
958
947
|
|
959
948
|
/* build dictionary */
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
949
|
+
ZDICT_trainBuffer_legacy(dictList, dictListSize,
|
950
|
+
samplesBuffer, samplesBuffSize,
|
951
|
+
samplesSizes, nbSamples,
|
952
|
+
minRep, notificationLevel);
|
964
953
|
|
965
954
|
/* display best matches */
|
966
|
-
if (params.notificationLevel>= 3) {
|
955
|
+
if (params.zParams.notificationLevel>= 3) {
|
967
956
|
U32 const nb = MIN(25, dictList[0].pos);
|
968
957
|
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
969
958
|
U32 u;
|
@@ -1026,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
1026
1015
|
|
1027
1016
|
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
|
1028
1017
|
samplesBuffer, samplesSizes, nbSamples,
|
1029
|
-
params);
|
1018
|
+
params.zParams);
|
1030
1019
|
}
|
1031
1020
|
|
1032
1021
|
/* clean up */
|
@@ -1037,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
1037
1026
|
|
1038
1027
|
/* issue : samplesBuffer need to be followed by a noisy guard band.
|
1039
1028
|
* work around : duplicate the buffer, and add the noise */
|
1040
|
-
size_t
|
1041
|
-
|
1042
|
-
|
1029
|
+
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
1030
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
1031
|
+
ZDICT_legacy_params_t params)
|
1043
1032
|
{
|
1044
1033
|
size_t result;
|
1045
1034
|
void* newBuff;
|
@@ -1052,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|
1052
1041
|
memcpy(newBuff, samplesBuffer, sBuffSize);
|
1053
1042
|
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
1054
1043
|
|
1055
|
-
result =
|
1056
|
-
|
1057
|
-
|
1058
|
-
params);
|
1044
|
+
result =
|
1045
|
+
ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
|
1046
|
+
samplesSizes, nbSamples, params);
|
1059
1047
|
free(newBuff);
|
1060
1048
|
return result;
|
1061
1049
|
}
|
@@ -1064,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|
1064
1052
|
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
1065
1053
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
1066
1054
|
{
|
1067
|
-
|
1055
|
+
ZDICT_cover_params_t params;
|
1068
1056
|
memset(¶ms, 0, sizeof(params));
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1057
|
+
params.d = 8;
|
1058
|
+
params.steps = 4;
|
1059
|
+
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
1060
|
+
samplesBuffer, samplesSizes,
|
1061
|
+
nbSamples, ¶ms);
|
1072
1062
|
}
|
1073
1063
|
|
1074
1064
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
@@ -20,10 +20,12 @@ extern "C" {
|
|
20
20
|
|
21
21
|
|
22
22
|
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
23
|
+
#ifndef ZDICTLIB_VISIBILITY
|
24
|
+
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
25
|
+
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
26
|
+
# else
|
27
|
+
# define ZDICTLIB_VISIBILITY
|
28
|
+
# endif
|
27
29
|
#endif
|
28
30
|
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
29
31
|
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
@@ -34,18 +36,20 @@ extern "C" {
|
|
34
36
|
#endif
|
35
37
|
|
36
38
|
|
37
|
-
/*! ZDICT_trainFromBuffer()
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
39
|
+
/*! ZDICT_trainFromBuffer():
|
40
|
+
* Train a dictionary from an array of samples.
|
41
|
+
* Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
42
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
43
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
44
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
45
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
46
|
+
* or an error code, which can be tested with ZDICT_isError().
|
47
|
+
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
48
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
49
|
+
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
50
|
+
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
51
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
52
|
+
*/
|
49
53
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
50
54
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
51
55
|
|
@@ -67,94 +71,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
67
71
|
* ==================================================================================== */
|
68
72
|
|
69
73
|
typedef struct {
|
70
|
-
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
71
74
|
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
72
75
|
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
73
76
|
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
74
|
-
unsigned reserved[2]; /* reserved space for future parameters */
|
75
77
|
} ZDICT_params_t;
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
|
82
|
-
or an error code, which can be tested by ZDICT_isError().
|
83
|
-
note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
|
84
|
-
*/
|
85
|
-
ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
86
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
87
|
-
ZDICT_params_t parameters);
|
88
|
-
|
89
|
-
/*! COVER_params_t :
|
90
|
-
For all values 0 means default.
|
91
|
-
k and d are the only required parameters.
|
92
|
-
*/
|
79
|
+
/*! ZDICT_cover_params_t:
|
80
|
+
* For all values 0 means default.
|
81
|
+
* k and d are the only required parameters.
|
82
|
+
*/
|
93
83
|
typedef struct {
|
94
84
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
95
85
|
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
96
86
|
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
97
|
-
|
98
87
|
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
/*! ZDICT_finalizeDictionary()
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
*/
|
88
|
+
ZDICT_params_t zParams;
|
89
|
+
} ZDICT_cover_params_t;
|
90
|
+
|
91
|
+
|
92
|
+
/*! ZDICT_trainFromBuffer_cover():
|
93
|
+
* Train a dictionary from an array of samples using the COVER algorithm.
|
94
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
95
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
96
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
97
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
98
|
+
* or an error code, which can be tested with ZDICT_isError().
|
99
|
+
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
100
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
101
|
+
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
102
|
+
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
103
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
104
|
+
*/
|
105
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
106
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
107
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
108
|
+
ZDICT_cover_params_t parameters);
|
109
|
+
|
110
|
+
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
111
|
+
* The same requirements as above hold for all the parameters except `parameters`.
|
112
|
+
* This function tries many parameter combinations and picks the best parameters.
|
113
|
+
* `*parameters` is filled with the best parameters found, and the dictionary
|
114
|
+
* constructed with those parameters is stored in `dictBuffer`.
|
115
|
+
*
|
116
|
+
* All of the parameters d, k, steps are optional.
|
117
|
+
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
118
|
+
* if steps is zero it defaults to its default value.
|
119
|
+
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
120
|
+
*
|
121
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
122
|
+
* or an error code, which can be tested with ZDICT_isError().
|
123
|
+
* On success `*parameters` contains the parameters selected.
|
124
|
+
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
125
|
+
*/
|
126
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
127
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
128
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
129
|
+
ZDICT_cover_params_t *parameters);
|
130
|
+
|
131
|
+
/*! ZDICT_finalizeDictionary():
|
132
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
133
|
+
* finalize dictionary by adding headers and statistics.
|
134
|
+
*
|
135
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
136
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
137
|
+
*
|
138
|
+
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
139
|
+
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
140
|
+
*
|
141
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
142
|
+
* or an error code, which can be tested by ZDICT_isError().
|
143
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
144
|
+
* Note 2: dictBuffer and dictContent can overlap
|
145
|
+
*/
|
158
146
|
#define ZDICT_CONTENTSIZE_MIN 128
|
159
147
|
#define ZDICT_DICTSIZE_MIN 256
|
160
148
|
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
@@ -162,7 +150,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer
|
|
162
150
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
163
151
|
ZDICT_params_t parameters);
|
164
152
|
|
165
|
-
|
153
|
+
typedef struct {
|
154
|
+
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
155
|
+
ZDICT_params_t zParams;
|
156
|
+
} ZDICT_legacy_params_t;
|
157
|
+
|
158
|
+
/*! ZDICT_trainFromBuffer_legacy():
|
159
|
+
* Train a dictionary from an array of samples.
|
160
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
161
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
162
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
163
|
+
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
164
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
165
|
+
* or an error code, which can be tested with ZDICT_isError().
|
166
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
167
|
+
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
168
|
+
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
169
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
170
|
+
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
171
|
+
*/
|
172
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
173
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
174
|
+
const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
|
166
175
|
|
167
176
|
/* Deprecation warnings */
|
168
177
|
/* It is generally possible to disable deprecation warnings from compiler,
|