zstd-ruby 1.2.0.0 → 1.3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +7 -5
  4. data/ext/zstdruby/libzstd/common/bitstream.h +23 -9
  5. data/ext/zstdruby/libzstd/common/error_private.c +4 -1
  6. data/ext/zstdruby/libzstd/common/huf.h +20 -0
  7. data/ext/zstdruby/libzstd/common/mem.h +0 -14
  8. data/ext/zstdruby/libzstd/common/pool.c +12 -0
  9. data/ext/zstdruby/libzstd/common/pool.h +5 -0
  10. data/ext/zstdruby/libzstd/common/threading.c +0 -1
  11. data/ext/zstdruby/libzstd/common/zstd_common.c +25 -18
  12. data/ext/zstdruby/libzstd/common/zstd_errors.h +15 -7
  13. data/ext/zstdruby/libzstd/common/zstd_internal.h +59 -9
  14. data/ext/zstdruby/libzstd/compress/huf_compress.c +7 -3
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1082 -487
  16. data/ext/zstdruby/libzstd/compress/zstd_opt.h +30 -15
  17. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +362 -158
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +49 -13
  19. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +150 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +380 -258
  21. data/ext/zstdruby/libzstd/dictBuilder/cover.c +23 -37
  22. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +30 -40
  23. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +104 -95
  24. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -10
  25. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +14 -19
  26. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -12
  27. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -14
  28. data/ext/zstdruby/libzstd/zstd.h +507 -166
  29. data/lib/zstd-ruby/version.rb +1 -1
  30. metadata +2 -2
@@ -398,7 +398,8 @@ typedef struct {
398
398
  */
399
399
  static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
400
400
  COVER_map_t *activeDmers, U32 begin,
401
- U32 end, COVER_params_t parameters) {
401
+ U32 end,
402
+ ZDICT_cover_params_t parameters) {
402
403
  /* Constants */
403
404
  const U32 k = parameters.k;
404
405
  const U32 d = parameters.d;
@@ -478,7 +479,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
478
479
  * Check the validity of the parameters.
479
480
  * Returns non-zero if the parameters are valid and 0 otherwise.
480
481
  */
481
- static int COVER_checkParameters(COVER_params_t parameters) {
482
+ static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
482
483
  /* k and d are required parameters */
483
484
  if (parameters.d == 0 || parameters.k == 0) {
484
485
  return 0;
@@ -600,7 +601,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
600
601
  static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
601
602
  COVER_map_t *activeDmers, void *dictBuffer,
602
603
  size_t dictBufferCapacity,
603
- COVER_params_t parameters) {
604
+ ZDICT_cover_params_t parameters) {
604
605
  BYTE *const dict = (BYTE *)dictBuffer;
605
606
  size_t tail = dictBufferCapacity;
606
607
  /* Divide the data up into epochs of equal size.
@@ -639,22 +640,10 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
639
640
  return tail;
640
641
  }
641
642
 
642
- /**
643
- * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
644
- * dictionary.
645
- */
646
- static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
647
- ZDICT_params_t zdictParams;
648
- memset(&zdictParams, 0, sizeof(zdictParams));
649
- zdictParams.notificationLevel = 1;
650
- zdictParams.dictID = parameters.dictID;
651
- zdictParams.compressionLevel = parameters.compressionLevel;
652
- return zdictParams;
653
- }
654
-
655
- ZDICTLIB_API size_t COVER_trainFromBuffer(
643
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
656
644
  void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
657
- const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
645
+ const size_t *samplesSizes, unsigned nbSamples,
646
+ ZDICT_cover_params_t parameters) {
658
647
  BYTE *const dict = (BYTE *)dictBuffer;
659
648
  COVER_ctx_t ctx;
660
649
  COVER_map_t activeDmers;
@@ -673,7 +662,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
673
662
  return ERROR(dstSize_tooSmall);
674
663
  }
675
664
  /* Initialize global data */
676
- g_displayLevel = parameters.notificationLevel;
665
+ g_displayLevel = parameters.zParams.notificationLevel;
677
666
  /* Initialize context and activeDmers */
678
667
  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
679
668
  parameters.d)) {
@@ -690,10 +679,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
690
679
  const size_t tail =
691
680
  COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
692
681
  dictBufferCapacity, parameters);
693
- ZDICT_params_t zdictParams = COVER_translateParams(parameters);
694
682
  const size_t dictionarySize = ZDICT_finalizeDictionary(
695
683
  dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
696
- samplesBuffer, samplesSizes, nbSamples, zdictParams);
684
+ samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
697
685
  if (!ZSTD_isError(dictionarySize)) {
698
686
  DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
699
687
  (U32)dictionarySize);
@@ -718,7 +706,7 @@ typedef struct COVER_best_s {
718
706
  size_t liveJobs;
719
707
  void *dict;
720
708
  size_t dictSize;
721
- COVER_params_t parameters;
709
+ ZDICT_cover_params_t parameters;
722
710
  size_t compressedSize;
723
711
  } COVER_best_t;
724
712
 
@@ -786,7 +774,7 @@ static void COVER_best_start(COVER_best_t *best) {
786
774
  * If this dictionary is the best so far save it and its parameters.
787
775
  */
788
776
  static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
789
- COVER_params_t parameters, void *dict,
777
+ ZDICT_cover_params_t parameters, void *dict,
790
778
  size_t dictSize) {
791
779
  if (!best) {
792
780
  return;
@@ -830,7 +818,7 @@ typedef struct COVER_tryParameters_data_s {
830
818
  const COVER_ctx_t *ctx;
831
819
  COVER_best_t *best;
832
820
  size_t dictBufferCapacity;
833
- COVER_params_t parameters;
821
+ ZDICT_cover_params_t parameters;
834
822
  } COVER_tryParameters_data_t;
835
823
 
836
824
  /**
@@ -842,7 +830,7 @@ static void COVER_tryParameters(void *opaque) {
842
830
  /* Save parameters as local variables */
843
831
  COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
844
832
  const COVER_ctx_t *const ctx = data->ctx;
845
- const COVER_params_t parameters = data->parameters;
833
+ const ZDICT_cover_params_t parameters = data->parameters;
846
834
  size_t dictBufferCapacity = data->dictBufferCapacity;
847
835
  size_t totalCompressedSize = ERROR(GENERIC);
848
836
  /* Allocate space for hash table, dict, and freqs */
@@ -863,10 +851,10 @@ static void COVER_tryParameters(void *opaque) {
863
851
  {
864
852
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
865
853
  dictBufferCapacity, parameters);
866
- const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
867
854
  dictBufferCapacity = ZDICT_finalizeDictionary(
868
855
  dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
869
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
856
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
857
+ parameters.zParams);
870
858
  if (ZDICT_isError(dictBufferCapacity)) {
871
859
  DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
872
860
  goto _cleanup;
@@ -892,8 +880,8 @@ static void COVER_tryParameters(void *opaque) {
892
880
  }
893
881
  /* Create the cctx and cdict */
894
882
  cctx = ZSTD_createCCtx();
895
- cdict =
896
- ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
883
+ cdict = ZSTD_createCDict(dict, dictBufferCapacity,
884
+ parameters.zParams.compressionLevel);
897
885
  if (!dst || !cctx || !cdict) {
898
886
  goto _compressCleanup;
899
887
  }
@@ -930,12 +918,10 @@ _cleanup:
930
918
  }
931
919
  }
932
920
 
933
- ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
934
- size_t dictBufferCapacity,
935
- const void *samplesBuffer,
936
- const size_t *samplesSizes,
937
- unsigned nbSamples,
938
- COVER_params_t *parameters) {
921
+ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
922
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
923
+ const size_t *samplesSizes, unsigned nbSamples,
924
+ ZDICT_cover_params_t *parameters) {
939
925
  /* constants */
940
926
  const unsigned nbThreads = parameters->nbThreads;
941
927
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
@@ -947,7 +933,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
947
933
  const unsigned kIterations =
948
934
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
949
935
  /* Local variables */
950
- const int displayLevel = parameters->notificationLevel;
936
+ const int displayLevel = parameters->zParams.notificationLevel;
951
937
  unsigned iteration = 1;
952
938
  unsigned d;
953
939
  unsigned k;
@@ -976,7 +962,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
976
962
  /* Initialization */
977
963
  COVER_best_init(&best);
978
964
  /* Turn down global display level to clean up display at level 2 and below */
979
- g_displayLevel = parameters->notificationLevel - 1;
965
+ g_displayLevel = parameters->zParams.notificationLevel - 1;
980
966
  /* Loop through d first because each new value needs a new context */
981
967
  LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
982
968
  kIterations);
@@ -94,7 +94,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
94
94
  unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
95
95
  {
96
96
  if (dictSize < 8) return 0;
97
- if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
97
+ if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
98
98
  return MEM_readLE32((const char*)dictBuffer + 4);
99
99
  }
100
100
 
@@ -487,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
487
487
  }
488
488
 
489
489
 
490
- static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
490
+ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
491
491
  const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
492
492
  const size_t* fileSizes, unsigned nbFiles,
493
493
  U32 minRatio, U32 notificationLevel)
@@ -576,7 +576,7 @@ typedef struct
576
576
  {
577
577
  ZSTD_CCtx* ref;
578
578
  ZSTD_CCtx* zc;
579
- void* workPlace; /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
579
+ void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
580
580
  } EStats_ress_t;
581
581
 
582
582
  #define MAXREPOFFSET 1024
@@ -585,14 +585,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
585
585
  U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
586
586
  const void* src, size_t srcSize, U32 notificationLevel)
587
587
  {
588
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
588
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
589
589
  size_t cSize;
590
590
 
591
591
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
592
592
  { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
593
593
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
594
594
  }
595
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
595
+ cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
596
596
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
597
597
 
598
598
  if (cSize) { /* if == 0; block is not compressible */
@@ -634,17 +634,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
634
634
  } } }
635
635
  }
636
636
 
637
- /*
638
- static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
639
- {
640
- unsigned u;
641
- size_t max=0;
642
- for (u=0; u<nbFiles; u++)
643
- if (max < fileSizes[u]) max = fileSizes[u];
644
- return max;
645
- }
646
- */
647
-
648
637
  static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
649
638
  {
650
639
  size_t total=0;
@@ -700,7 +689,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
700
689
  /* init */
701
690
  esr.ref = ZSTD_createCCtx();
702
691
  esr.zc = ZSTD_createCCtx();
703
- esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
692
+ esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
704
693
  if (!esr.ref || !esr.zc || !esr.workPlace) {
705
694
  eSize = ERROR(memory_allocation);
706
695
  DISPLAYLEVEL(1, "Not enough memory \n");
@@ -865,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
865
854
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
866
855
 
867
856
  /* dictionary header */
868
- MEM_writeLE32(header, ZSTD_DICT_MAGIC);
857
+ MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
869
858
  { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
870
859
  U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
871
860
  U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -917,7 +906,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
917
906
  }
918
907
 
919
908
  /* add dictionary header (after entropy tables) */
920
- MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
909
+ MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
921
910
  { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
922
911
  U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
923
912
  U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -930,14 +919,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
930
919
  }
931
920
 
932
921
 
933
- /*! ZDICT_trainFromBuffer_unsafe() :
922
+ /*! ZDICT_trainFromBuffer_unsafe_legacy() :
934
923
  * Warning : `samplesBuffer` must be followed by noisy guard band.
935
924
  * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
936
925
  */
937
- size_t ZDICT_trainFromBuffer_unsafe(
926
+ size_t ZDICT_trainFromBuffer_unsafe_legacy(
938
927
  void* dictBuffer, size_t maxDictSize,
939
928
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
940
- ZDICT_params_t params)
929
+ ZDICT_legacy_params_t params)
941
930
  {
942
931
  U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
943
932
  dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -946,7 +935,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
946
935
  size_t const targetDictSize = maxDictSize;
947
936
  size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
948
937
  size_t dictSize = 0;
949
- U32 const notificationLevel = params.notificationLevel;
938
+ U32 const notificationLevel = params.zParams.notificationLevel;
950
939
 
951
940
  /* checks */
952
941
  if (!dictList) return ERROR(memory_allocation);
@@ -957,13 +946,13 @@ size_t ZDICT_trainFromBuffer_unsafe(
957
946
  ZDICT_initDictItem(dictList);
958
947
 
959
948
  /* build dictionary */
960
- ZDICT_trainBuffer(dictList, dictListSize,
961
- samplesBuffer, samplesBuffSize,
962
- samplesSizes, nbSamples,
963
- minRep, notificationLevel);
949
+ ZDICT_trainBuffer_legacy(dictList, dictListSize,
950
+ samplesBuffer, samplesBuffSize,
951
+ samplesSizes, nbSamples,
952
+ minRep, notificationLevel);
964
953
 
965
954
  /* display best matches */
966
- if (params.notificationLevel>= 3) {
955
+ if (params.zParams.notificationLevel>= 3) {
967
956
  U32 const nb = MIN(25, dictList[0].pos);
968
957
  U32 const dictContentSize = ZDICT_dictSize(dictList);
969
958
  U32 u;
@@ -1026,7 +1015,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
1026
1015
 
1027
1016
  dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
1028
1017
  samplesBuffer, samplesSizes, nbSamples,
1029
- params);
1018
+ params.zParams);
1030
1019
  }
1031
1020
 
1032
1021
  /* clean up */
@@ -1037,9 +1026,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
1037
1026
 
1038
1027
  /* issue : samplesBuffer need to be followed by a noisy guard band.
1039
1028
  * work around : duplicate the buffer, and add the noise */
1040
- size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
1041
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1042
- ZDICT_params_t params)
1029
+ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
1030
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1031
+ ZDICT_legacy_params_t params)
1043
1032
  {
1044
1033
  size_t result;
1045
1034
  void* newBuff;
@@ -1052,10 +1041,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1052
1041
  memcpy(newBuff, samplesBuffer, sBuffSize);
1053
1042
  ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
1054
1043
 
1055
- result = ZDICT_trainFromBuffer_unsafe(
1056
- dictBuffer, dictBufferCapacity,
1057
- newBuff, samplesSizes, nbSamples,
1058
- params);
1044
+ result =
1045
+ ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
1046
+ samplesSizes, nbSamples, params);
1059
1047
  free(newBuff);
1060
1048
  return result;
1061
1049
  }
@@ -1064,11 +1052,13 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1064
1052
  size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1065
1053
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1066
1054
  {
1067
- ZDICT_params_t params;
1055
+ ZDICT_cover_params_t params;
1068
1056
  memset(&params, 0, sizeof(params));
1069
- return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
1070
- samplesBuffer, samplesSizes, nbSamples,
1071
- params);
1057
+ params.d = 8;
1058
+ params.steps = 4;
1059
+ return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1060
+ samplesBuffer, samplesSizes,
1061
+ nbSamples, &params);
1072
1062
  }
1073
1063
 
1074
1064
  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
@@ -20,10 +20,12 @@ extern "C" {
20
20
 
21
21
 
22
22
  /* ===== ZDICTLIB_API : control library symbols visibility ===== */
23
- #if defined(__GNUC__) && (__GNUC__ >= 4)
24
- # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
25
- #else
26
- # define ZDICTLIB_VISIBILITY
23
+ #ifndef ZDICTLIB_VISIBILITY
24
+ # if defined(__GNUC__) && (__GNUC__ >= 4)
25
+ # define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
26
+ # else
27
+ # define ZDICTLIB_VISIBILITY
28
+ # endif
27
29
  #endif
28
30
  #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
29
31
  # define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
@@ -34,18 +36,20 @@ extern "C" {
34
36
  #endif
35
37
 
36
38
 
37
- /*! ZDICT_trainFromBuffer() :
38
- Train a dictionary from an array of samples.
39
- Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
40
- supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
41
- The resulting dictionary will be saved into `dictBuffer`.
42
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
43
- or an error code, which can be tested with ZDICT_isError().
44
- Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
45
- It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
46
- In general, it's recommended to provide a few thousands samples, but this can vary a lot.
47
- It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
48
- */
39
+ /*! ZDICT_trainFromBuffer():
40
+ * Train a dictionary from an array of samples.
41
+ * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
42
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
43
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
44
+ * The resulting dictionary will be saved into `dictBuffer`.
45
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
46
+ * or an error code, which can be tested with ZDICT_isError().
47
+ * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
48
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
49
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
50
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
51
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
52
+ */
49
53
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
50
54
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
51
55
 
@@ -67,94 +71,78 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
67
71
  * ==================================================================================== */
68
72
 
69
73
  typedef struct {
70
- unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
71
74
  int compressionLevel; /* 0 means default; target a specific zstd compression level */
72
75
  unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
73
76
  unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
74
- unsigned reserved[2]; /* reserved space for future parameters */
75
77
  } ZDICT_params_t;
76
78
 
77
-
78
- /*! ZDICT_trainFromBuffer_advanced() :
79
- Same as ZDICT_trainFromBuffer() with control over more parameters.
80
- `parameters` is optional and can be provided with values set to 0 to mean "default".
81
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
82
- or an error code, which can be tested by ZDICT_isError().
83
- note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
84
- */
85
- ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
86
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
87
- ZDICT_params_t parameters);
88
-
89
- /*! COVER_params_t :
90
- For all values 0 means default.
91
- k and d are the only required parameters.
92
- */
79
+ /*! ZDICT_cover_params_t:
80
+ * For all values 0 means default.
81
+ * k and d are the only required parameters.
82
+ */
93
83
  typedef struct {
94
84
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
95
85
  unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
96
86
  unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
97
-
98
87
  unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
99
- unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
100
- unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
101
- int compressionLevel; /* 0 means default; target a specific zstd compression level */
102
- } COVER_params_t;
103
-
104
-
105
- /*! COVER_trainFromBuffer() :
106
- Train a dictionary from an array of samples using the COVER algorithm.
107
- Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
108
- supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
109
- The resulting dictionary will be saved into `dictBuffer`.
110
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
111
- or an error code, which can be tested with ZDICT_isError().
112
- Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
113
- Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
114
- It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
115
- In general, it's recommended to provide a few thousands samples, but this can vary a lot.
116
- It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
117
- */
118
- ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
119
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
120
- COVER_params_t parameters);
121
-
122
- /*! COVER_optimizeTrainFromBuffer() :
123
- The same requirements as above hold for all the parameters except `parameters`.
124
- This function tries many parameter combinations and picks the best parameters.
125
- `*parameters` is filled with the best parameters found, and the dictionary
126
- constructed with those parameters is stored in `dictBuffer`.
127
-
128
- All of the parameters d, k, steps are optional.
129
- If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
130
- if steps is zero it defaults to its default value.
131
- If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
132
-
133
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
134
- or an error code, which can be tested with ZDICT_isError().
135
- On success `*parameters` contains the parameters selected.
136
- Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
137
- */
138
- ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
139
- const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
140
- COVER_params_t *parameters);
141
-
142
- /*! ZDICT_finalizeDictionary() :
143
-
144
- Given a custom content as a basis for dictionary, and a set of samples,
145
- finalize dictionary by adding headers and statistics.
146
-
147
- Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148
- supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
-
150
- dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
151
- maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
152
-
153
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
- or an error code, which can be tested by ZDICT_isError().
155
- note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
- note 2 : dictBuffer and dictContent can overlap
157
- */
88
+ ZDICT_params_t zParams;
89
+ } ZDICT_cover_params_t;
90
+
91
+
92
+ /*! ZDICT_trainFromBuffer_cover():
93
+ * Train a dictionary from an array of samples using the COVER algorithm.
94
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
95
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
96
+ * The resulting dictionary will be saved into `dictBuffer`.
97
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
98
+ * or an error code, which can be tested with ZDICT_isError().
99
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
100
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
101
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
102
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
103
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
104
+ */
105
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
106
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
107
+ const size_t *samplesSizes, unsigned nbSamples,
108
+ ZDICT_cover_params_t parameters);
109
+
110
+ /*! ZDICT_optimizeTrainFromBuffer_cover():
111
+ * The same requirements as above hold for all the parameters except `parameters`.
112
+ * This function tries many parameter combinations and picks the best parameters.
113
+ * `*parameters` is filled with the best parameters found, and the dictionary
114
+ * constructed with those parameters is stored in `dictBuffer`.
115
+ *
116
+ * All of the parameters d, k, steps are optional.
117
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
118
+ * if steps is zero it defaults to its default value.
119
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
120
+ *
121
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
122
+ * or an error code, which can be tested with ZDICT_isError().
123
+ * On success `*parameters` contains the parameters selected.
124
+ * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
125
+ */
126
+ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
127
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
128
+ const size_t *samplesSizes, unsigned nbSamples,
129
+ ZDICT_cover_params_t *parameters);
130
+
131
+ /*! ZDICT_finalizeDictionary():
132
+ * Given a custom content as a basis for dictionary, and a set of samples,
133
+ * finalize dictionary by adding headers and statistics.
134
+ *
135
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
136
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
137
+ *
138
+ * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
139
+ * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
140
+ *
141
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
142
+ * or an error code, which can be tested by ZDICT_isError().
143
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
144
+ * Note 2: dictBuffer and dictContent can overlap
145
+ */
158
146
  #define ZDICT_CONTENTSIZE_MIN 128
159
147
  #define ZDICT_DICTSIZE_MIN 256
160
148
  ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
@@ -162,7 +150,28 @@ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBuffer
162
150
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
151
  ZDICT_params_t parameters);
164
152
 
165
-
153
+ typedef struct {
154
+ unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
155
+ ZDICT_params_t zParams;
156
+ } ZDICT_legacy_params_t;
157
+
158
+ /*! ZDICT_trainFromBuffer_legacy():
159
+ * Train a dictionary from an array of samples.
160
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
161
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
162
+ * The resulting dictionary will be saved into `dictBuffer`.
163
+ * `parameters` is optional and can be provided with values set to 0 to mean "default".
164
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
165
+ * or an error code, which can be tested with ZDICT_isError().
166
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
167
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
168
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
169
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
170
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
171
+ */
172
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
173
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
174
+ const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
166
175
 
167
176
  /* Deprecation warnings */
168
177
  /* It is generally possible to disable deprecation warnings from compiler,