zstd-ruby 1.2.0.0 → 1.3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +7 -5
  4. data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
  5. data/ext/zstdruby/libzstd/common/error_private.c +4 -1
  6. data/ext/zstdruby/libzstd/common/huf.h +20 -0
  7. data/ext/zstdruby/libzstd/common/mem.h +0 -14
  8. data/ext/zstdruby/libzstd/common/pool.c +12 -0
  9. data/ext/zstdruby/libzstd/common/pool.h +5 -0
  10. data/ext/zstdruby/libzstd/common/threading.c +0 -1
  11. data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
  12. data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
  13. data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
  14. data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
  16. data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
  17. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
  19. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
  21. data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
  22. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
  23. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
  24. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
  25. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
  26. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
  27. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
  28. data/ext/zstdruby/libzstd/zstd.h +507 -166
  29. data/lib/zstd-ruby/version.rb +1 -1
  30. metadata +2 -2
@@ -398,7 +398,8 @@ typedef struct {
398
398
  */
399
399
  static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
400
400
  COVER_map_t *activeDmers, U32 begin,
401
- U32 end, COVER_params_t parameters) {
401
+ U32 end,
402
+ ZDICT_cover_params_t parameters) {
402
403
  /* Constants */
403
404
  const U32 k = parameters.k;
404
405
  const U32 d = parameters.d;
@@ -478,7 +479,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
478
479
  * Check the validity of the parameters.
479
480
  * Returns non-zero if the parameters are valid and 0 otherwise.
480
481
  */
481
- static int COVER_checkParameters(COVER_params_t parameters) {
482
+ static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
482
483
  /* k and d are required parameters */
483
484
  if (parameters.d == 0 || parameters.k == 0) {
484
485
  return 0;
@@ -600,7 +601,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
600
601
  static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
601
602
  COVER_map_t *activeDmers, void *dictBuffer,
602
603
  size_t dictBufferCapacity,
603
- COVER_params_t parameters) {
604
+ ZDICT_cover_params_t parameters) {
604
605
  BYTE *const dict = (BYTE *)dictBuffer;
605
606
  size_t tail = dictBufferCapacity;
606
607
  /* Divide the data up into epochs of equal size.
@@ -639,22 +640,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
639
640
  return tail;
640
641
  }
641
642
 
642
- /**
643
- * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
644
- * dictionary.
645
- */
646
- static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
647
- ZDICT_params_t zdictParams;
648
- memset(&zdictParams, 0, sizeof(zdictParams));
649
- zdictParams.notificationLevel = 1;
650
- zdictParams.dictID = parameters.dictID;
651
- zdictParams.compressionLevel = parameters.compressionLevel;
652
- return zdictParams;
653
- }
654
-
655
- ZDICTLIB_API size_t COVER_trainFromBuffer(
643
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
656
644
  void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
657
- const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
645
+ const size_t *samplesSizes, unsigned nbSamples,
646
+ ZDICT_cover_params_t parameters) {
658
647
  BYTE *const dict = (BYTE *)dictBuffer;
659
648
  COVER_ctx_t ctx;
660
649
  COVER_map_t activeDmers;
@@ -673,7 +662,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
673
662
  return ERROR(dstSize_tooSmall);
674
663
  }
675
664
  /* Initialize global data */
676
- g_displayLevel = parameters.notificationLevel;
665
+ g_displayLevel = parameters.zParams.notificationLevel;
677
666
  /* Initialize context and activeDmers */
678
667
  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
679
668
  parameters.d)) {
@@ -690,10 +679,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
690
679
  const size_t tail =
691
680
  COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
692
681
  dictBufferCapacity, parameters);
693
- ZDICT_params_t zdictParams = COVER_translateParams(parameters);
694
682
  const size_t dictionarySize = ZDICT_finalizeDictionary(
695
683
  dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
696
- samplesBuffer, samplesSizes, nbSamples, zdictParams);
684
+ samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
697
685
  if (!ZSTD_isError(dictionarySize)) {
698
686
  DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
699
687
  (U32)dictionarySize);
@@ -718,7 +706,7 @@ typedef struct COVER_best_s {
718
706
  size_t liveJobs;
719
707
  void *dict;
720
708
  size_t dictSize;
721
- COVER_params_t parameters;
709
+ ZDICT_cover_params_t parameters;
722
710
  size_t compressedSize;
723
711
  } COVER_best_t;
724
712
 
@@ -786,7 +774,7 @@ static void COVER_best_start(COVER_best_t *best) {
786
774
  * If this dictionary is the best so far save it and its parameters.
787
775
  */
788
776
  static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
789
- COVER_params_t parameters, void *dict,
777
+ ZDICT_cover_params_t parameters, void *dict,
790
778
  size_t dictSize) {
791
779
  if (!best) {
792
780
  return;
@@ -830,7 +818,7 @@ typedef struct COVER_tryParameters_data_s {
830
818
  const COVER_ctx_t *ctx;
831
819
  COVER_best_t *best;
832
820
  size_t dictBufferCapacity;
833
- COVER_params_t parameters;
821
+ ZDICT_cover_params_t parameters;
834
822
  } COVER_tryParameters_data_t;
835
823
 
836
824
  /**
@@ -842,7 +830,7 @@ static void COVER_tryParameters(void *opaque) {
842
830
  /* Save parameters as local variables */
843
831
  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
844
832
  const COVER_ctx_t *const ctx = data->ctx;
845
- const COVER_params_t parameters = data->parameters;
833
+ const ZDICT_cover_params_t parameters = data->parameters;
846
834
  size_t dictBufferCapacity = data->dictBufferCapacity;
847
835
  size_t totalCompressedSize = ERROR(GENERIC);
848
836
  /* Allocate space for hash table, dict, and freqs */
@@ -863,10 +851,10 @@ static void COVER_tryParameters(void *opaque) {
863
851
  {
864
852
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
865
853
  dictBufferCapacity, parameters);
866
- const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
867
854
  dictBufferCapacity = ZDICT_finalizeDictionary(
868
855
  dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
869
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
856
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
857
+ parameters.zParams);
870
858
  if (ZDICT_isError(dictBufferCapacity)) {
871
859
  DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
872
860
  goto _cleanup;
@@ -892,8 +880,8 @@ static void COVER_tryParameters(void *opaque) {
892
880
  }
893
881
  /* Create the cctx and cdict */
894
882
  cctx = ZSTD_createCCtx();
895
- cdict =
896
- ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
883
+ cdict = ZSTD_createCDict(dict, dictBufferCapacity,
884
+ parameters.zParams.compressionLevel);
897
885
  if (!dst || !cctx || !cdict) {
898
886
  goto _compressCleanup;
899
887
  }
@@ -930,12 +918,10 @@ _cleanup:
930
918
  }
931
919
  }
932
920
 
933
- ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
934
- size_t dictBufferCapacity,
935
- const void *samplesBuffer,
936
- const size_t *samplesSizes,
937
- unsigned nbSamples,
938
- COVER_params_t *parameters) {
921
+ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
922
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
923
+ const size_t *samplesSizes, unsigned nbSamples,
924
+ ZDICT_cover_params_t *parameters) {
939
925
  /* constants */
940
926
  const unsigned nbThreads = parameters->nbThreads;
941
927
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
@@ -947,7 +933,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
947
933
  const unsigned kIterations =
948
934
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
949
935
  /* Local variables */
950
- const int displayLevel = parameters->notificationLevel;
936
+ const int displayLevel = parameters->zParams.notificationLevel;
951
937
  unsigned iteration = 1;
952
938
  unsigned d;
953
939
  unsigned k;
@@ -976,7 +962,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
976
962
  /* Initialization */
977
963
  COVER_best_init(&best);
978
964
  /* Turn down global display level to clean up display at level 2 and below */
979
- g_displayLevel = parameters->notificationLevel - 1;
965
+ g_displayLevel = parameters->zParams.notificationLevel - 1;
980
966
  /* Loop through d first because each new value needs a new context */
981
967
  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
982
968
  kIterations);
@@ -94,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
94
94
  unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
95
95
  {
96
96
  if (dictSize < 8) return 0;
97
- if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
97
+ if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
98
98
  return MEM_readLE32((const char*)dictBuffer + 4);
99
99
  }
100
100
 
@@ -487,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
487
487
  }
488
488
 
489
489
 
490
- static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
490
+ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
491
491
  const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
492
492
  const size_t* fileSizes, unsigned nbFiles,
493
493
  U32 minRatio, U32 notificationLevel)
@@ -576,7 +576,7 @@ typedef struct
576
576
  {
577
577
  ZSTD_CCtx* ref;
578
578
  ZSTD_CCtx* zc;
579
- void* workPlace; /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
579
+ void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
580
580
  } EStats_ress_t;
581
581
 
582
582
  #define MAXREPOFFSET 1024
@@ -585,14 +585,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
585
585
  U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
586
586
  const void* src, size_t srcSize, U32 notificationLevel)
587
587
  {
588
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
588
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
589
589
  size_t cSize;
590
590
 
591
591
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
592
592
  { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
593
593
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
594
594
  }
595
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
595
+ cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
596
596
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
597
597
 
598
598
  if (cSize) { /* if == 0; block is not compressible */
@@ -634,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
634
634
  } } }
635
635
  }
636
636
 
637
- /*
638
- static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
639
- {
640
- unsigned u;
641
- size_t max=0;
642
- for (u=0; u<nbFiles; u++)
643
- if (max < fileSizes[u]) max = fileSizes[u];
644
- return max;
645
- }
646
- */
647
-
648
637
  static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
649
638
  {
650
639
  size_t total=0;
@@ -700,7 +689,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
700
689
  /* init */
701
690
  esr.ref = ZSTD_createCCtx();
702
691
  esr.zc = ZSTD_createCCtx();
703
- esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
692
+ esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
704
693
  if (!esr.ref || !esr.zc || !esr.workPlace) {
705
694
  eSize = ERROR(memory_allocation);
706
695
  DISPLAYLEVEL(1, "Not enough memory \n");
@@ -865,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
865
854
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
866
855
 
867
856
  /* dictionary header */
868
- MEM_writeLE32(header, ZSTD_DICT_MAGIC);
857
+ MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
869
858
  { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
870
859
  U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
871
860
  U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -917,7 +906,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
917
906
  }
918
907
 
919
908
  /* add dictionary header (after entropy tables) */
920
- MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
909
+ MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
921
910
  { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
922
911
  U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
923
912
  U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -930,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
930
919
  }
931
920
 
932
921
 
933
- /*! ZDICT_trainFromBuffer_unsafe() :
922
+ /*! ZDICT_trainFromBuffer_unsafe_legacy() :
934
923
  * Warning : `samplesBuffer` must be followed by noisy guard band.
935
924
  * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
936
925
  */
937
- size_t ZDICT_trainFromBuffer_unsafe(
926
+ size_t ZDICT_trainFromBuffer_unsafe_legacy(
938
927
  void* dictBuffer, size_t maxDictSize,
939
928
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
940
- ZDICT_params_t params)
929
+ ZDICT_legacy_params_t params)
941
930
  {
942
931
  U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
943
932
  dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -946,7 +935,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
946
935
  size_t const targetDictSize = maxDictSize;
947
936
  size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
948
937
  size_t dictSize = 0;
949
- U32 const notificationLevel = params.notificationLevel;
938
+ U32 const notificationLevel = params.zParams.notificationLevel;
950
939
 
951
940
  /* checks */
952
941
  if (!dictList) return ERROR(memory_allocation);
@@ -957,13 +946,13 @@ size_t ZDICT_trainFromBuffer_unsafe(
957
946
  ZDICT_initDictItem(dictList);
958
947
 
959
948
  /* build dictionary */
960
- ZDICT_trainBuffer(dictList, dictListSize,
961
- samplesBuffer, samplesBuffSize,
962
- samplesSizes, nbSamples,
963
- minRep, notificationLevel);
949
+ ZDICT_trainBuffer_legacy(dictList, dictListSize,
950
+ samplesBuffer, samplesBuffSize,
951
+ samplesSizes, nbSamples,
952
+ minRep, notificationLevel);
964
953
 
965
954
  /* display best matches */
966
- if (params.notificationLevel>= 3) {
955
+ if (params.zParams.notificationLevel>= 3) {
967
956
  U32 const nb = MIN(25, dictList[0].pos);
968
957
  U32 const dictContentSize = ZDICT_dictSize(dictList);
969
958
  U32 u;
@@ -1026,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
1026
1015
 
1027
1016
  dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
1028
1017
  samplesBuffer, samplesSizes, nbSamples,
1029
- params);
1018
+ params.zParams);
1030
1019
  }
1031
1020
 
1032
1021
  /* clean up */
@@ -1037,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
1037
1026
 
1038
1027
  /* issue : samplesBuffer need to be followed by a noisy guard band.
1039
1028
  * work around : duplicate the buffer, and add the noise */
1040
- size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
1041
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1042
- ZDICT_params_t params)
1029
+ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
1030
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1031
+ ZDICT_legacy_params_t params)
1043
1032
  {
1044
1033
  size_t result;
1045
1034
  void* newBuff;
@@ -1052,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1052
1041
  memcpy(newBuff, samplesBuffer, sBuffSize);
1053
1042
  ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
1054
1043
 
1055
- result = ZDICT_trainFromBuffer_unsafe(
1056
- dictBuffer, dictBufferCapacity,
1057
- newBuff, samplesSizes, nbSamples,
1058
- params);
1044
+ result =
1045
+ ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
1046
+ samplesSizes, nbSamples, params);
1059
1047
  free(newBuff);
1060
1048
  return result;
1061
1049
  }
@@ -1064,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1064
1052
  size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1065
1053
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1066
1054
  {
1067
- ZDICT_params_t params;
1055
+ ZDICT_cover_params_t params;
1068
1056
  memset(&params, 0, sizeof(params));
1069
- return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
1070
- samplesBuffer, samplesSizes, nbSamples,
1071
- params);
1057
+ params.d = 8;
1058
+ params.steps = 4;
1059
+ return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1060
+ samplesBuffer, samplesSizes,
1061
+ nbSamples, &params);
1072
1062
  }
1073
1063
 
1074
1064
  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
@@ -20,10 +20,12 @@ extern "C" {
20
20
 
21
21
 
22
22
  /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23
- #if defined(__GNUC__) && (__GNUC__ >= 4)
24
- # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
25
- #else
26
- # define ZDICTLIB_VISIBILITY
23
+ #ifndef ZDICTLIB_VISIBILITY
24
+ # if defined(__GNUC__) && (__GNUC__ >= 4)
25
+ # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
26
+ # else
27
+ # define ZDICTLIB_VISIBILITY
28
+ # endif
27
29
  #endif
28
30
  #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29
31
  # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
@@ -34,18 +36,20 @@ extern "C" {
34
36
  #endif
35
37
 
36
38
 
37
- /*! ZDICT_trainFromBuffer() :
38
- Train a dictionary from an array of samples.
39
- Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
40
- supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
41
- The resulting dictionary will be saved into `dictBuffer`.
42
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
43
- or an error code, which can be tested with ZDICT_isError().
44
- Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
45
- It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
46
- In general, it's recommended to provide a few thousands samples, but this can vary a lot.
47
- It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
48
- */
39
+ /*! ZDICT_trainFromBuffer():
40
+ * Train a dictionary from an array of samples.
41
+ * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
42
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
43
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
44
+ * The resulting dictionary will be saved into `dictBuffer`.
45
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
46
+ * or an error code, which can be tested with ZDICT_isError().
47
+ * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
48
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
49
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
50
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
51
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
52
+ */
49
53
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
50
54
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
51
55
 
@@ -67,94 +71,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
67
71
  * ==================================================================================== */
68
72
 
69
73
  typedef struct {
70
- unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
71
74
  int compressionLevel; /* 0 means default; target a specific zstd compression level */
72
75
  unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
73
76
  unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
74
- unsigned reserved[2]; /* reserved space for future parameters */
75
77
  } ZDICT_params_t;
76
78
 
77
-
78
- /*! ZDICT_trainFromBuffer_advanced() :
79
- Same as ZDICT_trainFromBuffer() with control over more parameters.
80
- `parameters` is optional and can be provided with values set to 0 to mean "default".
81
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
82
- or an error code, which can be tested by ZDICT_isError().
83
- note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
84
- */
85
- ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
86
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
87
- ZDICT_params_t parameters);
88
-
89
- /*! COVER_params_t :
90
- For all values 0 means default.
91
- k and d are the only required parameters.
92
- */
79
+ /*! ZDICT_cover_params_t:
80
+ * For all values 0 means default.
81
+ * k and d are the only required parameters.
82
+ */
93
83
  typedef struct {
94
84
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95
85
  unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96
86
  unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
-
98
87
  unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99
- unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100
- unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101
- int compressionLevel; /* 0 means default; target a specific zstd compression level */
102
- } COVER_params_t;
103
-
104
-
105
- /*! COVER_trainFromBuffer() :
106
- Train a dictionary from an array of samples using the COVER algorithm.
107
- Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108
- supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109
- The resulting dictionary will be saved into `dictBuffer`.
110
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111
- or an error code, which can be tested with ZDICT_isError().
112
- Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113
- Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114
- It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115
- In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116
- It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117
- */
118
- ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120
- COVER_params_t parameters);
121
-
122
- /*! COVER_optimizeTrainFromBuffer() :
123
- The same requirements as above hold for all the parameters except `parameters`.
124
- This function tries many parameter combinations and picks the best parameters.
125
- `*parameters` is filled with the best parameters found, and the dictionary
126
- constructed with those parameters is stored in `dictBuffer`.
127
-
128
- All of the parameters d, k, steps are optional.
129
- If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130
- if steps is zero it defaults to its default value.
131
- If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
-
133
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134
- or an error code, which can be tested with ZDICT_isError().
135
- On success `*parameters` contains the parameters selected.
136
- Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137
- */
138
- ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139
- const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140
- COVER_params_t *parameters);
141
-
142
- /*! ZDICT_finalizeDictionary() :
143
-
144
- Given a custom content as a basis for dictionary, and a set of samples,
145
- finalize dictionary by adding headers and statistics.
146
-
147
- Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148
- supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
-
150
- dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
151
- maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
152
-
153
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
- or an error code, which can be tested by ZDICT_isError().
155
- note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
- note 2 : dictBuffer and dictContent can overlap
157
- */
88
+ ZDICT_params_t zParams;
89
+ } ZDICT_cover_params_t;
90
+
91
+
92
+ /*! ZDICT_trainFromBuffer_cover():
93
+ * Train a dictionary from an array of samples using the COVER algorithm.
94
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
95
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
96
+ * The resulting dictionary will be saved into `dictBuffer`.
97
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
98
+ * or an error code, which can be tested with ZDICT_isError().
99
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
100
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
101
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
102
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
103
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
104
+ */
105
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
106
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
107
+ const size_t *samplesSizes, unsigned nbSamples,
108
+ ZDICT_cover_params_t parameters);
109
+
110
+ /*! ZDICT_optimizeTrainFromBuffer_cover():
111
+ * The same requirements as above hold for all the parameters except `parameters`.
112
+ * This function tries many parameter combinations and picks the best parameters.
113
+ * `*parameters` is filled with the best parameters found, and the dictionary
114
+ * constructed with those parameters is stored in `dictBuffer`.
115
+ *
116
+ * All of the parameters d, k, steps are optional.
117
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
118
+ * if steps is zero it defaults to its default value.
119
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
120
+ *
121
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
122
+ * or an error code, which can be tested with ZDICT_isError().
123
+ * On success `*parameters` contains the parameters selected.
124
+ * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
125
+ */
126
+ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
127
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
128
+ const size_t *samplesSizes, unsigned nbSamples,
129
+ ZDICT_cover_params_t *parameters);
130
+
131
+ /*! ZDICT_finalizeDictionary():
132
+ * Given a custom content as a basis for dictionary, and a set of samples,
133
+ * finalize dictionary by adding headers and statistics.
134
+ *
135
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
136
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
137
+ *
138
+ * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
139
+ * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
140
+ *
141
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
142
+ * or an error code, which can be tested by ZDICT_isError().
143
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
144
+ * Note 2: dictBuffer and dictContent can overlap
145
+ */
158
146
  #define ZDICT_CONTENTSIZE_MIN 128
159
147
  #define ZDICT_DICTSIZE_MIN 256
160
148
  ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
@@ -162,7 +150,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer
162
150
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
151
  ZDICT_params_t parameters);
164
152
 
165
-
153
+ typedef struct {
154
+ unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
155
+ ZDICT_params_t zParams;
156
+ } ZDICT_legacy_params_t;
157
+
158
+ /*! ZDICT_trainFromBuffer_legacy():
159
+ * Train a dictionary from an array of samples.
160
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
161
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
162
+ * The resulting dictionary will be saved into `dictBuffer`.
163
+ * `parameters` is optional and can be provided with values set to 0 to mean "default".
164
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
165
+ * or an error code, which can be tested with ZDICT_isError().
166
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
167
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
168
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
169
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
170
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
171
+ */
172
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
173
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
174
+ const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
166
175
 
167
176
  /* Deprecation warnings */
168
177
  /* It is generally possible to disable deprecation warnings from compiler,