zstd-ruby 1.3.3.0 → 1.3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +13 -0
  4. data/ext/zstdruby/libzstd/README.md +32 -25
  5. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  6. data/ext/zstdruby/libzstd/common/compiler.h +25 -0
  7. data/ext/zstdruby/libzstd/common/cpu.h +216 -0
  8. data/ext/zstdruby/libzstd/common/error_private.c +1 -0
  9. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  10. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
  11. data/ext/zstdruby/libzstd/common/huf.h +114 -89
  12. data/ext/zstdruby/libzstd/common/pool.c +46 -17
  13. data/ext/zstdruby/libzstd/common/pool.h +18 -9
  14. data/ext/zstdruby/libzstd/common/threading.h +12 -12
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
  18. data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
  19. data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
  21. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
  26. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
  28. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
  30. data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
  32. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
  33. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
  34. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
  35. data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
  36. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
  37. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
  38. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
  42. data/ext/zstdruby/libzstd/zstd.h +254 -254
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. metadata +4 -3
@@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
537
537
  /* Checks */
538
538
  if (totalSamplesSize < MAX(d, sizeof(U64)) ||
539
539
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
540
- DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
541
- (COVER_MAX_SAMPLES_SIZE >> 20));
540
+ DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
541
+ (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
542
542
  return 0;
543
543
  }
544
544
  /* Zero the context */
@@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
651
651
  }
652
652
 
653
653
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
654
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
655
- const size_t *samplesSizes, unsigned nbSamples,
656
- ZDICT_cover_params_t parameters) {
657
- BYTE *const dict = (BYTE *)dictBuffer;
654
+ void *dictBuffer, size_t dictBufferCapacity,
655
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
656
+ ZDICT_cover_params_t parameters)
657
+ {
658
+ BYTE* const dict = (BYTE*)dictBuffer;
658
659
  COVER_ctx_t ctx;
659
660
  COVER_map_t activeDmers;
661
+
662
+ /* Initialize global data */
663
+ g_displayLevel = parameters.zParams.notificationLevel;
660
664
  /* Checks */
661
665
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
662
666
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
@@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
671
675
  ZDICT_DICTSIZE_MIN);
672
676
  return ERROR(dstSize_tooSmall);
673
677
  }
674
- /* Initialize global data */
675
- g_displayLevel = parameters.zParams.notificationLevel;
676
678
  /* Initialize context and activeDmers */
677
679
  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
678
680
  parameters.d)) {
@@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
947
949
  unsigned k;
948
950
  COVER_best_t best;
949
951
  POOL_ctx *pool = NULL;
952
+
950
953
  /* Checks */
951
954
  if (kMinK < kMaxD || kMaxK < kMinK) {
952
955
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
@@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
207
207
  U32 cumulLength[LLIMIT] = {0};
208
208
  U32 savings[LLIMIT] = {0};
209
209
  const BYTE* b = (const BYTE*)buffer;
210
- size_t length;
211
210
  size_t maxLength = LLIMIT;
212
211
  size_t pos = suffix[start];
213
212
  U32 end = start;
@@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
222
221
  ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
223
222
  ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
224
223
  /* skip and mark segment */
225
- U16 u16 = MEM_read16(b+pos+4);
226
- U32 u, e = 6;
227
- while (MEM_read16(b+pos+e) == u16) e+=2 ;
228
- if (b[pos+e] == b[pos+e-1]) e++;
229
- for (u=1; u<e; u++)
224
+ U16 const pattern16 = MEM_read16(b+pos+4);
225
+ U32 u, patternEnd = 6;
226
+ while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
227
+ if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
228
+ for (u=1; u<patternEnd; u++)
230
229
  doneMarks[pos+u] = 1;
231
230
  return solution;
232
231
  }
233
232
 
234
233
  /* look forward */
235
- do {
236
- end++;
237
- length = ZDICT_count(b + pos, b + suffix[end]);
238
- } while (length >=MINMATCHLENGTH);
234
+ { size_t length;
235
+ do {
236
+ end++;
237
+ length = ZDICT_count(b + pos, b + suffix[end]);
238
+ } while (length >= MINMATCHLENGTH);
239
+ }
239
240
 
240
241
  /* look backward */
241
- do {
242
- length = ZDICT_count(b + pos, b + *(suffix+start-1));
243
- if (length >=MINMATCHLENGTH) start--;
244
- } while(length >= MINMATCHLENGTH);
242
+ { size_t length;
243
+ do {
244
+ length = ZDICT_count(b + pos, b + *(suffix+start-1));
245
+ if (length >=MINMATCHLENGTH) start--;
246
+ } while(length >= MINMATCHLENGTH);
247
+ }
245
248
 
246
249
  /* exit if not found a minimum nb of repetitions */
247
250
  if (end-start < minRatio) {
@@ -268,7 +271,7 @@ static dictItem ZDICT_analyzePos(
268
271
  U32 selectedCount = 0;
269
272
  U32 selectedID = currentID;
270
273
  for (id =refinedStart; id < refinedEnd; id++) {
271
- if (b[ suffix[id] + searchLength] != currentChar) {
274
+ if (b[suffix[id] + searchLength] != currentChar) {
272
275
  if (currentCount > selectedCount) {
273
276
  selectedCount = currentCount;
274
277
  selectedID = currentID;
@@ -297,20 +300,23 @@ static dictItem ZDICT_analyzePos(
297
300
  memset(lengthList, 0, sizeof(lengthList));
298
301
 
299
302
  /* look forward */
300
- do {
301
- end++;
302
- length = ZDICT_count(b + pos, b + suffix[end]);
303
- if (length >= LLIMIT) length = LLIMIT-1;
304
- lengthList[length]++;
305
- } while (length >=MINMATCHLENGTH);
303
+ { size_t length;
304
+ do {
305
+ end++;
306
+ length = ZDICT_count(b + pos, b + suffix[end]);
307
+ if (length >= LLIMIT) length = LLIMIT-1;
308
+ lengthList[length]++;
309
+ } while (length >=MINMATCHLENGTH);
310
+ }
306
311
 
307
312
  /* look backward */
308
- length = MINMATCHLENGTH;
309
- while ((length >= MINMATCHLENGTH) & (start > 0)) {
310
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
311
- if (length >= LLIMIT) length = LLIMIT - 1;
312
- lengthList[length]++;
313
- if (length >= MINMATCHLENGTH) start--;
313
+ { size_t length = MINMATCHLENGTH;
314
+ while ((length >= MINMATCHLENGTH) & (start > 0)) {
315
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
316
+ if (length >= LLIMIT) length = LLIMIT - 1;
317
+ lengthList[length]++;
318
+ if (length >= MINMATCHLENGTH) start--;
319
+ }
314
320
  }
315
321
 
316
322
  /* largest useful length */
@@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
345
351
  /* mark positions done */
346
352
  { U32 id;
347
353
  for (id=start; id<end; id++) {
348
- U32 p, pEnd;
354
+ U32 p, pEnd, length;
349
355
  U32 const testedPos = suffix[id];
350
356
  if (testedPos == pos)
351
357
  length = solution.length;
352
358
  else {
353
- length = ZDICT_count(b+pos, b+testedPos);
359
+ length = (U32)ZDICT_count(b+pos, b+testedPos);
354
360
  if (length > solution.length) length = solution.length;
355
361
  }
356
362
  pEnd = (U32)(testedPos + length);
@@ -575,29 +581,30 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
575
581
 
576
582
  typedef struct
577
583
  {
578
- ZSTD_CCtx* ref;
579
- ZSTD_CCtx* zc;
584
+ ZSTD_CCtx* ref; /* contains reference to dictionary */
585
+ ZSTD_CCtx* zc; /* working context */
580
586
  void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
581
587
  } EStats_ress_t;
582
588
 
583
589
  #define MAXREPOFFSET 1024
584
590
 
585
591
  static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
586
- U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
587
- const void* src, size_t srcSize, U32 notificationLevel)
592
+ U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
593
+ const void* src, size_t srcSize,
594
+ U32 notificationLevel)
588
595
  {
589
596
  size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
590
597
  size_t cSize;
591
598
 
592
599
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
593
- { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
594
- if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
600
+ { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
601
+ if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
595
602
  }
596
603
  cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
597
604
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
598
605
 
599
606
  if (cSize) { /* if == 0; block is not compressible */
600
- const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
607
+ const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
601
608
 
602
609
  /* literals stats */
603
610
  { const BYTE* bytePtr;
@@ -659,6 +666,18 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
659
666
  }
660
667
  }
661
668
 
669
+ /* ZDICT_flatLit() :
670
+ * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
671
+ * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
672
+ */
673
+ static void ZDICT_flatLit(U32* countLit)
674
+ {
675
+ int u;
676
+ for (u=1; u<256; u++) countLit[u] = 2;
677
+ countLit[0] = 4;
678
+ countLit[253] = 1;
679
+ countLit[254] = 1;
680
+ }
662
681
 
663
682
  #define OFFCODE_MAX 30 /* only applicable to first block */
664
683
  static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
@@ -688,6 +707,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
688
707
  BYTE* dstPtr = (BYTE*)dstBuffer;
689
708
 
690
709
  /* init */
710
+ DEBUGLOG(4, "ZDICT_analyzeEntropy");
691
711
  esr.ref = ZSTD_createCCtx();
692
712
  esr.zc = ZSTD_createCCtx();
693
713
  esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
@@ -713,7 +733,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
713
733
  goto _cleanup;
714
734
  } }
715
735
 
716
- /* collect stats on all files */
736
+ /* collect stats on all samples */
717
737
  for (u=0; u<nbFiles; u++) {
718
738
  ZDICT_countEStats(esr, params,
719
739
  countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
@@ -722,14 +742,21 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
722
742
  pos += fileSizes[u];
723
743
  }
724
744
 
725
- /* analyze */
726
- errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
727
- if (HUF_isError(errorCode)) {
728
- eSize = ERROR(GENERIC);
729
- DISPLAYLEVEL(1, "HUF_buildCTable error \n");
730
- goto _cleanup;
745
+ /* analyze, build stats, starting with literals */
746
+ { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
747
+ if (HUF_isError(maxNbBits)) {
748
+ eSize = ERROR(GENERIC);
749
+ DISPLAYLEVEL(1, " HUF_buildCTable error \n");
750
+ goto _cleanup;
751
+ }
752
+ if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
753
+ DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
754
+ ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
755
+ maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
756
+ assert(maxNbBits==9);
757
+ }
758
+ huffLog = (U32)maxNbBits;
731
759
  }
732
- huffLog = (U32)errorCode;
733
760
 
734
761
  /* looking for most common first offsets */
735
762
  { U32 offset;
@@ -850,6 +877,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
850
877
  U32 const notificationLevel = params.notificationLevel;
851
878
 
852
879
  /* check conditions */
880
+ DEBUGLOG(4, "ZDICT_finalizeDictionary");
853
881
  if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
854
882
  if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
855
883
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
@@ -1025,8 +1053,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
1025
1053
  }
1026
1054
 
1027
1055
 
1028
- /* issue : samplesBuffer need to be followed by a noisy guard band.
1029
- * work around : duplicate the buffer, and add the noise */
1056
+ /* ZDICT_trainFromBuffer_legacy() :
1057
+ * issue : samplesBuffer need to be followed by a noisy guard band.
1058
+ * work around : duplicate the buffer, and add the noise */
1030
1059
  size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
1031
1060
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1032
1061
  ZDICT_legacy_params_t params)
@@ -1054,18 +1083,22 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1054
1083
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1055
1084
  {
1056
1085
  ZDICT_cover_params_t params;
1086
+ DEBUGLOG(3, "ZDICT_trainFromBuffer");
1057
1087
  memset(&params, 0, sizeof(params));
1058
1088
  params.d = 8;
1059
1089
  params.steps = 4;
1060
- /* Default to level 6 since no compression level information is avaialble */
1090
+ /* Default to level 6 since no compression level information is available */
1061
1091
  params.zParams.compressionLevel = 6;
1092
+ #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
1093
+ params.zParams.notificationLevel = ZSTD_DEBUG;
1094
+ #endif
1062
1095
  return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1063
- samplesBuffer, samplesSizes,
1064
- nbSamples, &params);
1096
+ samplesBuffer, samplesSizes, nbSamples,
1097
+ &params);
1065
1098
  }
1066
1099
 
1067
1100
  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
1068
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1101
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1069
1102
  {
1070
1103
  ZDICT_params_t params;
1071
1104
  memset(&params, 0, sizeof(params));
@@ -38,21 +38,21 @@ extern "C" {
38
38
 
39
39
 
40
40
  /*! ZDICT_trainFromBuffer():
41
- * Train a dictionary from an array of samples.
42
- * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
43
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45
- * The resulting dictionary will be saved into `dictBuffer`.
41
+ * Train a dictionary from an array of samples.
42
+ * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
43
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45
+ * The resulting dictionary will be saved into `dictBuffer`.
46
46
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
47
- * or an error code, which can be tested with ZDICT_isError().
48
- * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
49
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
50
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
51
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
47
+ * or an error code, which can be tested with ZDICT_isError().
48
+ * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
49
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
50
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
51
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
52
52
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
53
53
  */
54
54
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
55
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
55
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
56
56
 
57
57
 
58
58
  /*====== Helper functions ======*/
@@ -72,14 +72,14 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
72
72
  * ==================================================================================== */
73
73
 
74
74
  typedef struct {
75
- int compressionLevel; /* 0 means default; target a specific zstd compression level */
76
- unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
77
- unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
75
+ int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
76
+ unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
77
+ unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
78
78
  } ZDICT_params_t;
79
79
 
80
80
  /*! ZDICT_cover_params_t:
81
- * For all values 0 means default.
82
81
  * k and d are the only required parameters.
82
+ * For others, value 0 means default.
83
83
  */
84
84
  typedef struct {
85
85
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
@@ -91,28 +91,28 @@ typedef struct {
91
91
 
92
92
 
93
93
  /*! ZDICT_trainFromBuffer_cover():
94
- * Train a dictionary from an array of samples using the COVER algorithm.
95
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
96
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
97
- * The resulting dictionary will be saved into `dictBuffer`.
94
+ * Train a dictionary from an array of samples using the COVER algorithm.
95
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
96
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
97
+ * The resulting dictionary will be saved into `dictBuffer`.
98
98
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
99
- * or an error code, which can be tested with ZDICT_isError().
100
- * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
101
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
102
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
103
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
99
+ * or an error code, which can be tested with ZDICT_isError().
100
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
101
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
102
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
103
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
104
104
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
105
105
  */
106
106
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
107
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
108
- const size_t *samplesSizes, unsigned nbSamples,
109
- ZDICT_cover_params_t parameters);
107
+ void *dictBuffer, size_t dictBufferCapacity,
108
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
109
+ ZDICT_cover_params_t parameters);
110
110
 
111
111
  /*! ZDICT_optimizeTrainFromBuffer_cover():
112
112
  * The same requirements as above hold for all the parameters except `parameters`.
113
113
  * This function tries many parameter combinations and picks the best parameters.
114
- * `*parameters` is filled with the best parameters found, and the dictionary
115
- * constructed with those parameters is stored in `dictBuffer`.
114
+ * `*parameters` is filled with the best parameters found,
115
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
116
116
  *
117
117
  * All of the parameters d, k, steps are optional.
118
118
  * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
@@ -125,9 +125,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
125
125
  * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
126
126
  */
127
127
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
128
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
129
- const size_t *samplesSizes, unsigned nbSamples,
130
- ZDICT_cover_params_t *parameters);
128
+ void* dictBuffer, size_t dictBufferCapacity,
129
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
130
+ ZDICT_cover_params_t* parameters);
131
131
 
132
132
  /*! ZDICT_finalizeDictionary():
133
133
  * Given a custom content as a basis for dictionary, and a set of samples,
@@ -157,22 +157,23 @@ typedef struct {
157
157
  } ZDICT_legacy_params_t;
158
158
 
159
159
  /*! ZDICT_trainFromBuffer_legacy():
160
- * Train a dictionary from an array of samples.
161
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
162
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
163
- * The resulting dictionary will be saved into `dictBuffer`.
160
+ * Train a dictionary from an array of samples.
161
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
162
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
163
+ * The resulting dictionary will be saved into `dictBuffer`.
164
164
  * `parameters` is optional and can be provided with values set to 0 to mean "default".
165
165
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
166
- * or an error code, which can be tested with ZDICT_isError().
167
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
168
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
169
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
166
+ * or an error code, which can be tested with ZDICT_isError().
167
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
168
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
169
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
170
170
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
171
- * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
171
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
172
172
  */
173
173
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
174
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
175
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
174
+ void *dictBuffer, size_t dictBufferCapacity,
175
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
176
+ ZDICT_legacy_params_t parameters);
176
177
 
177
178
  /* Deprecation warnings */
178
179
  /* It is generally possible to disable deprecation warnings from compiler,
@@ -246,6 +246,7 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
246
246
  MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
247
247
  const void* dict, size_t dictSize)
248
248
  {
249
+ DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
249
250
  if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
250
251
  switch(newVersion)
251
252
  {
@@ -304,6 +305,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
304
305
  MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
305
306
  ZSTD_outBuffer* output, ZSTD_inBuffer* input)
306
307
  {
308
+ DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
307
309
  switch(version)
308
310
  {
309
311
  default :