zstd-ruby 1.2.0.0 → 1.3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +7 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
- data/ext/zstdruby/libzstd/common/error_private.c +4 -1
- data/ext/zstdruby/libzstd/common/huf.h +20 -0
- data/ext/zstdruby/libzstd/common/mem.h +0 -14
- data/ext/zstdruby/libzstd/common/pool.c +12 -0
- data/ext/zstdruby/libzstd/common/pool.h +5 -0
- data/ext/zstdruby/libzstd/common/threading.c +0 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
- data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
- data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
- data/ext/zstdruby/libzstd/zstd.h +507 -166
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -398,7 +398,8 @@ typedef struct {
|
|
398
398
|
*/
|
399
399
|
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
400
400
|
COVER_map_t *activeDmers, U32 begin,
|
401
|
-
U32 end,
|
401
|
+
U32 end,
|
402
|
+
ZDICT_cover_params_t parameters) {
|
402
403
|
/* Constants */
|
403
404
|
const U32 k = parameters.k;
|
404
405
|
const U32 d = parameters.d;
|
@@ -478,7 +479,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
|
478
479
|
* Check the validity of the parameters.
|
479
480
|
* Returns non-zero if the parameters are valid and 0 otherwise.
|
480
481
|
*/
|
481
|
-
static int COVER_checkParameters(
|
482
|
+
static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
|
482
483
|
/* k and d are required parameters */
|
483
484
|
if (parameters.d == 0 || parameters.k == 0) {
|
484
485
|
return 0;
|
@@ -600,7 +601,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
600
601
|
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
601
602
|
COVER_map_t *activeDmers, void *dictBuffer,
|
602
603
|
size_t dictBufferCapacity,
|
603
|
-
|
604
|
+
ZDICT_cover_params_t parameters) {
|
604
605
|
BYTE *const dict = (BYTE *)dictBuffer;
|
605
606
|
size_t tail = dictBufferCapacity;
|
606
607
|
/* Divide the data up into epochs of equal size.
|
@@ -639,22 +640,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
639
640
|
return tail;
|
640
641
|
}
|
641
642
|
|
642
|
-
|
643
|
-
* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
|
644
|
-
* dictionary.
|
645
|
-
*/
|
646
|
-
static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
647
|
-
ZDICT_params_t zdictParams;
|
648
|
-
memset(&zdictParams, 0, sizeof(zdictParams));
|
649
|
-
zdictParams.notificationLevel = 1;
|
650
|
-
zdictParams.dictID = parameters.dictID;
|
651
|
-
zdictParams.compressionLevel = parameters.compressionLevel;
|
652
|
-
return zdictParams;
|
653
|
-
}
|
654
|
-
|
655
|
-
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
643
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
656
644
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
657
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
645
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
646
|
+
ZDICT_cover_params_t parameters) {
|
658
647
|
BYTE *const dict = (BYTE *)dictBuffer;
|
659
648
|
COVER_ctx_t ctx;
|
660
649
|
COVER_map_t activeDmers;
|
@@ -673,7 +662,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
673
662
|
return ERROR(dstSize_tooSmall);
|
674
663
|
}
|
675
664
|
/* Initialize global data */
|
676
|
-
g_displayLevel = parameters.notificationLevel;
|
665
|
+
g_displayLevel = parameters.zParams.notificationLevel;
|
677
666
|
/* Initialize context and activeDmers */
|
678
667
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
679
668
|
parameters.d)) {
|
@@ -690,10 +679,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
690
679
|
const size_t tail =
|
691
680
|
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
692
681
|
dictBufferCapacity, parameters);
|
693
|
-
ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
694
682
|
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
695
683
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
696
|
-
samplesBuffer, samplesSizes, nbSamples,
|
684
|
+
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
697
685
|
if (!ZSTD_isError(dictionarySize)) {
|
698
686
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
699
687
|
(U32)dictionarySize);
|
@@ -718,7 +706,7 @@ typedef struct COVER_best_s {
|
|
718
706
|
size_t liveJobs;
|
719
707
|
void *dict;
|
720
708
|
size_t dictSize;
|
721
|
-
|
709
|
+
ZDICT_cover_params_t parameters;
|
722
710
|
size_t compressedSize;
|
723
711
|
} COVER_best_t;
|
724
712
|
|
@@ -786,7 +774,7 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
786
774
|
* If this dictionary is the best so far save it and its parameters.
|
787
775
|
*/
|
788
776
|
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
789
|
-
|
777
|
+
ZDICT_cover_params_t parameters, void *dict,
|
790
778
|
size_t dictSize) {
|
791
779
|
if (!best) {
|
792
780
|
return;
|
@@ -830,7 +818,7 @@ typedef struct COVER_tryParameters_data_s {
|
|
830
818
|
const COVER_ctx_t *ctx;
|
831
819
|
COVER_best_t *best;
|
832
820
|
size_t dictBufferCapacity;
|
833
|
-
|
821
|
+
ZDICT_cover_params_t parameters;
|
834
822
|
} COVER_tryParameters_data_t;
|
835
823
|
|
836
824
|
/**
|
@@ -842,7 +830,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
842
830
|
/* Save parameters as local variables */
|
843
831
|
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
844
832
|
const COVER_ctx_t *const ctx = data->ctx;
|
845
|
-
const
|
833
|
+
const ZDICT_cover_params_t parameters = data->parameters;
|
846
834
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
847
835
|
size_t totalCompressedSize = ERROR(GENERIC);
|
848
836
|
/* Allocate space for hash table, dict, and freqs */
|
@@ -863,10 +851,10 @@ static void COVER_tryParameters(void *opaque) {
|
|
863
851
|
{
|
864
852
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
865
853
|
dictBufferCapacity, parameters);
|
866
|
-
const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
867
854
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
868
855
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
869
|
-
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
856
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
857
|
+
parameters.zParams);
|
870
858
|
if (ZDICT_isError(dictBufferCapacity)) {
|
871
859
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
872
860
|
goto _cleanup;
|
@@ -892,8 +880,8 @@ static void COVER_tryParameters(void *opaque) {
|
|
892
880
|
}
|
893
881
|
/* Create the cctx and cdict */
|
894
882
|
cctx = ZSTD_createCCtx();
|
895
|
-
cdict =
|
896
|
-
|
883
|
+
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
884
|
+
parameters.zParams.compressionLevel);
|
897
885
|
if (!dst || !cctx || !cdict) {
|
898
886
|
goto _compressCleanup;
|
899
887
|
}
|
@@ -930,12 +918,10 @@ _cleanup:
|
|
930
918
|
}
|
931
919
|
}
|
932
920
|
|
933
|
-
ZDICTLIB_API size_t
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
unsigned nbSamples,
|
938
|
-
COVER_params_t *parameters) {
|
921
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
922
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
923
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
924
|
+
ZDICT_cover_params_t *parameters) {
|
939
925
|
/* constants */
|
940
926
|
const unsigned nbThreads = parameters->nbThreads;
|
941
927
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
@@ -947,7 +933,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
947
933
|
const unsigned kIterations =
|
948
934
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
949
935
|
/* Local variables */
|
950
|
-
const int displayLevel = parameters->notificationLevel;
|
936
|
+
const int displayLevel = parameters->zParams.notificationLevel;
|
951
937
|
unsigned iteration = 1;
|
952
938
|
unsigned d;
|
953
939
|
unsigned k;
|
@@ -976,7 +962,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
976
962
|
/* Initialization */
|
977
963
|
COVER_best_init(&best);
|
978
964
|
/* Turn down global display level to clean up display at level 2 and below */
|
979
|
-
g_displayLevel = parameters->notificationLevel - 1;
|
965
|
+
g_displayLevel = parameters->zParams.notificationLevel - 1;
|
980
966
|
/* Loop through d first because each new value needs a new context */
|
981
967
|
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
982
968
|
kIterations);
|
@@ -94,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
|
|
94
94
|
unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
95
95
|
{
|
96
96
|
if (dictSize < 8) return 0;
|
97
|
-
if (MEM_readLE32(dictBuffer) !=
|
97
|
+
if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
|
98
98
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
99
99
|
}
|
100
100
|
|
@@ -487,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|
487
487
|
}
|
488
488
|
|
489
489
|
|
490
|
-
static size_t
|
490
|
+
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
491
491
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
492
492
|
const size_t* fileSizes, unsigned nbFiles,
|
493
493
|
U32 minRatio, U32 notificationLevel)
|
@@ -576,7 +576,7 @@ typedef struct
|
|
576
576
|
{
|
577
577
|
ZSTD_CCtx* ref;
|
578
578
|
ZSTD_CCtx* zc;
|
579
|
-
void* workPlace; /* must be
|
579
|
+
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
580
580
|
} EStats_ress_t;
|
581
581
|
|
582
582
|
#define MAXREPOFFSET 1024
|
@@ -585,14 +585,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
585
585
|
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
586
586
|
const void* src, size_t srcSize, U32 notificationLevel)
|
587
587
|
{
|
588
|
-
size_t const blockSizeMax = MIN (
|
588
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
589
589
|
size_t cSize;
|
590
590
|
|
591
591
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
592
592
|
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
|
593
593
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
|
594
594
|
}
|
595
|
-
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace,
|
595
|
+
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
596
596
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
597
597
|
|
598
598
|
if (cSize) { /* if == 0; block is not compressible */
|
@@ -634,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
634
634
|
} } }
|
635
635
|
}
|
636
636
|
|
637
|
-
/*
|
638
|
-
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
639
|
-
{
|
640
|
-
unsigned u;
|
641
|
-
size_t max=0;
|
642
|
-
for (u=0; u<nbFiles; u++)
|
643
|
-
if (max < fileSizes[u]) max = fileSizes[u];
|
644
|
-
return max;
|
645
|
-
}
|
646
|
-
*/
|
647
|
-
|
648
637
|
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
649
638
|
{
|
650
639
|
size_t total=0;
|
@@ -700,7 +689,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
700
689
|
/* init */
|
701
690
|
esr.ref = ZSTD_createCCtx();
|
702
691
|
esr.zc = ZSTD_createCCtx();
|
703
|
-
esr.workPlace = malloc(
|
692
|
+
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
704
693
|
if (!esr.ref || !esr.zc || !esr.workPlace) {
|
705
694
|
eSize = ERROR(memory_allocation);
|
706
695
|
DISPLAYLEVEL(1, "Not enough memory \n");
|
@@ -865,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
865
854
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
866
855
|
|
867
856
|
/* dictionary header */
|
868
|
-
MEM_writeLE32(header,
|
857
|
+
MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
|
869
858
|
{ U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
|
870
859
|
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
871
860
|
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
@@ -917,7 +906,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
917
906
|
}
|
918
907
|
|
919
908
|
/* add dictionary header (after entropy tables) */
|
920
|
-
MEM_writeLE32(dictBuffer,
|
909
|
+
MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
|
921
910
|
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
922
911
|
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
923
912
|
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
@@ -930,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
930
919
|
}
|
931
920
|
|
932
921
|
|
933
|
-
/*!
|
922
|
+
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
934
923
|
* Warning : `samplesBuffer` must be followed by noisy guard band.
|
935
924
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
936
925
|
*/
|
937
|
-
size_t
|
926
|
+
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
938
927
|
void* dictBuffer, size_t maxDictSize,
|
939
928
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
940
|
-
|
929
|
+
ZDICT_legacy_params_t params)
|
941
930
|
{
|
942
931
|
U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
|
943
932
|
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
@@ -946,7 +935,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
946
935
|
size_t const targetDictSize = maxDictSize;
|
947
936
|
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
948
937
|
size_t dictSize = 0;
|
949
|
-
U32 const notificationLevel = params.notificationLevel;
|
938
|
+
U32 const notificationLevel = params.zParams.notificationLevel;
|
950
939
|
|
951
940
|
/* checks */
|
952
941
|
if (!dictList) return ERROR(memory_allocation);
|
@@ -957,13 +946,13 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
957
946
|
ZDICT_initDictItem(dictList);
|
958
947
|
|
959
948
|
/* build dictionary */
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
949
|
+
ZDICT_trainBuffer_legacy(dictList, dictListSize,
|
950
|
+
samplesBuffer, samplesBuffSize,
|
951
|
+
samplesSizes, nbSamples,
|
952
|
+
minRep, notificationLevel);
|
964
953
|
|
965
954
|
/* display best matches */
|
966
|
-
if (params.notificationLevel>= 3) {
|
955
|
+
if (params.zParams.notificationLevel>= 3) {
|
967
956
|
U32 const nb = MIN(25, dictList[0].pos);
|
968
957
|
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
969
958
|
U32 u;
|
@@ -1026,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
1026
1015
|
|
1027
1016
|
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
|
1028
1017
|
samplesBuffer, samplesSizes, nbSamples,
|
1029
|
-
params);
|
1018
|
+
params.zParams);
|
1030
1019
|
}
|
1031
1020
|
|
1032
1021
|
/* clean up */
|
@@ -1037,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
1037
1026
|
|
1038
1027
|
/* issue : samplesBuffer need to be followed by a noisy guard band.
|
1039
1028
|
* work around : duplicate the buffer, and add the noise */
|
1040
|
-
size_t
|
1041
|
-
|
1042
|
-
|
1029
|
+
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
1030
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
1031
|
+
ZDICT_legacy_params_t params)
|
1043
1032
|
{
|
1044
1033
|
size_t result;
|
1045
1034
|
void* newBuff;
|
@@ -1052,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|
1052
1041
|
memcpy(newBuff, samplesBuffer, sBuffSize);
|
1053
1042
|
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
1054
1043
|
|
1055
|
-
result =
|
1056
|
-
|
1057
|
-
|
1058
|
-
params);
|
1044
|
+
result =
|
1045
|
+
ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
|
1046
|
+
samplesSizes, nbSamples, params);
|
1059
1047
|
free(newBuff);
|
1060
1048
|
return result;
|
1061
1049
|
}
|
@@ -1064,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|
1064
1052
|
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
1065
1053
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
1066
1054
|
{
|
1067
|
-
|
1055
|
+
ZDICT_cover_params_t params;
|
1068
1056
|
memset(¶ms, 0, sizeof(params));
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1057
|
+
params.d = 8;
|
1058
|
+
params.steps = 4;
|
1059
|
+
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
1060
|
+
samplesBuffer, samplesSizes,
|
1061
|
+
nbSamples, ¶ms);
|
1072
1062
|
}
|
1073
1063
|
|
1074
1064
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
@@ -20,10 +20,12 @@ extern "C" {
|
|
20
20
|
|
21
21
|
|
22
22
|
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
23
|
+
#ifndef ZDICTLIB_VISIBILITY
|
24
|
+
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
25
|
+
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
26
|
+
# else
|
27
|
+
# define ZDICTLIB_VISIBILITY
|
28
|
+
# endif
|
27
29
|
#endif
|
28
30
|
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
29
31
|
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
@@ -34,18 +36,20 @@ extern "C" {
|
|
34
36
|
#endif
|
35
37
|
|
36
38
|
|
37
|
-
/*! ZDICT_trainFromBuffer()
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
39
|
+
/*! ZDICT_trainFromBuffer():
|
40
|
+
* Train a dictionary from an array of samples.
|
41
|
+
* Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
42
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
43
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
44
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
45
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
46
|
+
* or an error code, which can be tested with ZDICT_isError().
|
47
|
+
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
48
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
49
|
+
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
50
|
+
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
51
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
52
|
+
*/
|
49
53
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
50
54
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
51
55
|
|
@@ -67,94 +71,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
67
71
|
* ==================================================================================== */
|
68
72
|
|
69
73
|
typedef struct {
|
70
|
-
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
71
74
|
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
72
75
|
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
73
76
|
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
74
|
-
unsigned reserved[2]; /* reserved space for future parameters */
|
75
77
|
} ZDICT_params_t;
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
|
82
|
-
or an error code, which can be tested by ZDICT_isError().
|
83
|
-
note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
|
84
|
-
*/
|
85
|
-
ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
|
86
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
87
|
-
ZDICT_params_t parameters);
|
88
|
-
|
89
|
-
/*! COVER_params_t :
|
90
|
-
For all values 0 means default.
|
91
|
-
k and d are the only required parameters.
|
92
|
-
*/
|
79
|
+
/*! ZDICT_cover_params_t:
|
80
|
+
* For all values 0 means default.
|
81
|
+
* k and d are the only required parameters.
|
82
|
+
*/
|
93
83
|
typedef struct {
|
94
84
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
95
85
|
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
96
86
|
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
|
97
|
-
|
98
87
|
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
/*! ZDICT_finalizeDictionary()
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
*/
|
88
|
+
ZDICT_params_t zParams;
|
89
|
+
} ZDICT_cover_params_t;
|
90
|
+
|
91
|
+
|
92
|
+
/*! ZDICT_trainFromBuffer_cover():
|
93
|
+
* Train a dictionary from an array of samples using the COVER algorithm.
|
94
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
95
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
96
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
97
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
98
|
+
* or an error code, which can be tested with ZDICT_isError().
|
99
|
+
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
100
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
101
|
+
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
102
|
+
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
103
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
104
|
+
*/
|
105
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
106
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
107
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
108
|
+
ZDICT_cover_params_t parameters);
|
109
|
+
|
110
|
+
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
111
|
+
* The same requirements as above hold for all the parameters except `parameters`.
|
112
|
+
* This function tries many parameter combinations and picks the best parameters.
|
113
|
+
* `*parameters` is filled with the best parameters found, and the dictionary
|
114
|
+
* constructed with those parameters is stored in `dictBuffer`.
|
115
|
+
*
|
116
|
+
* All of the parameters d, k, steps are optional.
|
117
|
+
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
118
|
+
* if steps is zero it defaults to its default value.
|
119
|
+
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
120
|
+
*
|
121
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
122
|
+
* or an error code, which can be tested with ZDICT_isError().
|
123
|
+
* On success `*parameters` contains the parameters selected.
|
124
|
+
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
125
|
+
*/
|
126
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
127
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
128
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
129
|
+
ZDICT_cover_params_t *parameters);
|
130
|
+
|
131
|
+
/*! ZDICT_finalizeDictionary():
|
132
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
133
|
+
* finalize dictionary by adding headers and statistics.
|
134
|
+
*
|
135
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
136
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
137
|
+
*
|
138
|
+
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
139
|
+
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
140
|
+
*
|
141
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
142
|
+
* or an error code, which can be tested by ZDICT_isError().
|
143
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
144
|
+
* Note 2: dictBuffer and dictContent can overlap
|
145
|
+
*/
|
158
146
|
#define ZDICT_CONTENTSIZE_MIN 128
|
159
147
|
#define ZDICT_DICTSIZE_MIN 256
|
160
148
|
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
@@ -162,7 +150,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer
|
|
162
150
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
163
151
|
ZDICT_params_t parameters);
|
164
152
|
|
165
|
-
|
153
|
+
typedef struct {
|
154
|
+
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
155
|
+
ZDICT_params_t zParams;
|
156
|
+
} ZDICT_legacy_params_t;
|
157
|
+
|
158
|
+
/*! ZDICT_trainFromBuffer_legacy():
|
159
|
+
* Train a dictionary from an array of samples.
|
160
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
161
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
162
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
163
|
+
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
164
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
165
|
+
* or an error code, which can be tested with ZDICT_isError().
|
166
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
167
|
+
* It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
168
|
+
* In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
169
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
170
|
+
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
171
|
+
*/
|
172
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
173
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
174
|
+
const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
|
166
175
|
|
167
176
|
/* Deprecation warnings */
|
168
177
|
/* It is generally possible to disable deprecation warnings from compiler,
|