zstd-ruby 1.3.8.0 → 1.4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +7 -3
  5. data/ext/zstdruby/libzstd/README.md +4 -2
  6. data/ext/zstdruby/libzstd/common/compiler.h +1 -1
  7. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  8. data/ext/zstdruby/libzstd/common/threading.c +2 -2
  9. data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
  10. data/ext/zstdruby/libzstd/common/zstd_internal.h +55 -2
  11. data/ext/zstdruby/libzstd/compress/fse_compress.c +2 -2
  12. data/ext/zstdruby/libzstd/compress/zstd_compress.c +423 -296
  13. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +14 -11
  14. data/ext/zstdruby/libzstd/compress/zstd_fast.c +203 -124
  15. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +1 -1
  16. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
  17. data/ext/zstdruby/libzstd/compress/zstd_opt.c +27 -11
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +41 -49
  19. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +43 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +4 -4
  21. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +257 -164
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +51 -47
  23. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +7 -0
  24. data/ext/zstdruby/libzstd/dictBuilder/cover.c +58 -13
  25. data/ext/zstdruby/libzstd/dictBuilder/cover.h +29 -0
  26. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +25 -13
  27. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +18 -8
  28. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  29. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +42 -12
  30. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +32 -7
  31. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +12 -7
  32. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +31 -12
  33. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +12 -7
  34. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +32 -12
  35. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +12 -7
  36. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +32 -12
  37. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +12 -7
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +32 -7
  39. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +12 -7
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +36 -8
  41. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +10 -5
  42. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +40 -9
  43. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +10 -5
  44. data/ext/zstdruby/libzstd/zstd.h +689 -542
  45. data/lib/zstd-ruby/version.rb +1 -1
  46. data/zstd-ruby.gemspec +1 -1
  47. metadata +6 -7
  48. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
56
56
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
57
  blockProperties_t* bpPtr)
58
58
  {
59
- if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
60
+
60
61
  { U32 const cBlockHeader = MEM_readLE24(src);
61
62
  U32 const cSize = cBlockHeader >> 3;
62
63
  bpPtr->lastBlock = cBlockHeader & 1;
63
64
  bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
64
65
  bpPtr->origSize = cSize; /* only useful for RLE */
65
66
  if (bpPtr->blockType == bt_rle) return 1;
66
- if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
67
68
  return cSize;
68
69
  }
69
70
  }
@@ -78,7 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
78
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
80
81
  {
81
- if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
82
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
82
83
 
83
84
  { const BYTE* const istart = (const BYTE*) src;
84
85
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -86,11 +87,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
86
87
  switch(litEncType)
87
88
  {
88
89
  case set_repeat:
89
- if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
90
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
90
91
  /* fall-through */
91
92
 
92
93
  case set_compressed:
93
- if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
94
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
94
95
  { size_t lhSize, litSize, litCSize;
95
96
  U32 singleStream=0;
96
97
  U32 const lhlCode = (istart[0] >> 2) & 3;
@@ -118,8 +119,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
118
119
  litCSize = (lhc >> 22) + (istart[4] << 10);
119
120
  break;
120
121
  }
121
- if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
122
- if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
122
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
123
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
123
124
 
124
125
  /* prefetch huffman table if cold */
125
126
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -157,7 +158,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
157
158
  }
158
159
  }
159
160
 
160
- if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
161
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
161
162
 
162
163
  dctx->litPtr = dctx->litBuffer;
163
164
  dctx->litSize = litSize;
@@ -187,7 +188,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
187
188
  }
188
189
 
189
190
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
190
- if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
191
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
191
192
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
192
193
  dctx->litPtr = dctx->litBuffer;
193
194
  dctx->litSize = litSize;
@@ -216,17 +217,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
216
217
  case 3:
217
218
  lhSize = 3;
218
219
  litSize = MEM_readLE24(istart) >> 4;
219
- if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
220
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
220
221
  break;
221
222
  }
222
- if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
223
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
223
224
  memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
224
225
  dctx->litPtr = dctx->litBuffer;
225
226
  dctx->litSize = litSize;
226
227
  return lhSize+1;
227
228
  }
228
229
  default:
229
- return ERROR(corruption_detected); /* impossible */
230
+ RETURN_ERROR(corruption_detected, "impossible");
230
231
  }
231
232
  }
232
233
  }
@@ -436,8 +437,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
436
437
  switch(type)
437
438
  {
438
439
  case set_rle :
439
- if (!srcSize) return ERROR(srcSize_wrong);
440
- if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
440
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong);
441
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
441
442
  { U32 const symbol = *(const BYTE*)src;
442
443
  U32 const baseline = baseValue[symbol];
443
444
  U32 const nbBits = nbAdditionalBits[symbol];
@@ -449,7 +450,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
449
450
  *DTablePtr = defaultTable;
450
451
  return 0;
451
452
  case set_repeat:
452
- if (!flagRepeatTable) return ERROR(corruption_detected);
453
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
453
454
  /* prefetch FSE table if used */
454
455
  if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
455
456
  const void* const pStart = *DTablePtr;
@@ -461,15 +462,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
461
462
  { unsigned tableLog;
462
463
  S16 norm[MaxSeq+1];
463
464
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
464
- if (FSE_isError(headerSize)) return ERROR(corruption_detected);
465
- if (tableLog > maxLog) return ERROR(corruption_detected);
465
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
466
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
466
467
  ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
467
468
  *DTablePtr = DTableSpace;
468
469
  return headerSize;
469
470
  }
470
- default : /* impossible */
471
+ default :
471
472
  assert(0);
472
- return ERROR(GENERIC);
473
+ RETURN_ERROR(GENERIC, "impossible");
473
474
  }
474
475
  }
475
476
 
@@ -483,28 +484,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
483
484
  DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
484
485
 
485
486
  /* check */
486
- if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
487
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
487
488
 
488
489
  /* SeqHead */
489
490
  nbSeq = *ip++;
490
491
  if (!nbSeq) {
491
492
  *nbSeqPtr=0;
492
- if (srcSize != 1) return ERROR(srcSize_wrong);
493
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
493
494
  return 1;
494
495
  }
495
496
  if (nbSeq > 0x7F) {
496
497
  if (nbSeq == 0xFF) {
497
- if (ip+2 > iend) return ERROR(srcSize_wrong);
498
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
498
499
  nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
499
500
  } else {
500
- if (ip >= iend) return ERROR(srcSize_wrong);
501
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
501
502
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
502
503
  }
503
504
  }
504
505
  *nbSeqPtr = nbSeq;
505
506
 
506
507
  /* FSE table descriptors */
507
- if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
508
+ RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
508
509
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
509
510
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
510
511
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -517,7 +518,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
517
518
  LL_base, LL_bits,
518
519
  LL_defaultDTable, dctx->fseEntropy,
519
520
  dctx->ddictIsCold, nbSeq);
520
- if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
521
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
521
522
  ip += llhSize;
522
523
  }
523
524
 
@@ -527,7 +528,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
527
528
  OF_base, OF_bits,
528
529
  OF_defaultDTable, dctx->fseEntropy,
529
530
  dctx->ddictIsCold, nbSeq);
530
- if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
531
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
531
532
  ip += ofhSize;
532
533
  }
533
534
 
@@ -537,7 +538,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
537
538
  ML_base, ML_bits,
538
539
  ML_defaultDTable, dctx->fseEntropy,
539
540
  dctx->ddictIsCold, nbSeq);
540
- if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
541
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
541
542
  ip += mlhSize;
542
543
  }
543
544
  }
@@ -590,8 +591,8 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
590
591
  const BYTE* match = oLitEnd - sequence.offset;
591
592
 
592
593
  /* check */
593
- if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
594
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
594
+ RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
595
+ RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
595
596
 
596
597
  /* copy literals */
597
598
  while (op < oLitEnd) *op++ = *(*litPtr)++;
@@ -599,7 +600,7 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
599
600
  /* copy Match */
600
601
  if (sequence.offset > (size_t)(oLitEnd - base)) {
601
602
  /* offset beyond prefix */
602
- if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
603
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
603
604
  match = dictEnd - (base-match);
604
605
  if (match + sequence.matchLength <= dictEnd) {
605
606
  memmove(oLitEnd, match, sequence.matchLength);
@@ -631,8 +632,8 @@ size_t ZSTD_execSequence(BYTE* op,
631
632
  const BYTE* match = oLitEnd - sequence.offset;
632
633
 
633
634
  /* check */
634
- if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
635
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
635
+ RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636
+ RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
636
637
  if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
637
638
 
638
639
  /* copy Literals */
@@ -645,8 +646,7 @@ size_t ZSTD_execSequence(BYTE* op,
645
646
  /* copy Match */
646
647
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
647
648
  /* offset beyond prefix -> go into extDict */
648
- if (sequence.offset > (size_t)(oLitEnd - virtualStart))
649
- return ERROR(corruption_detected);
649
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
650
650
  match = dictEnd + (match - prefixStart);
651
651
  if (match + sequence.matchLength <= dictEnd) {
652
652
  memmove(oLitEnd, match, sequence.matchLength);
@@ -712,8 +712,8 @@ size_t ZSTD_execSequenceLong(BYTE* op,
712
712
  const BYTE* match = sequence.match;
713
713
 
714
714
  /* check */
715
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
716
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
715
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
716
+ RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
717
717
  if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
718
 
719
719
  /* copy Literals */
@@ -726,7 +726,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
726
726
  /* copy Match */
727
727
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728
728
  /* offset beyond prefix */
729
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
729
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
730
730
  if (match + sequence.matchLength <= dictEnd) {
731
731
  memmove(oLitEnd, match, sequence.matchLength);
732
732
  return sequenceLength;
@@ -801,7 +801,7 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
801
801
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802
802
  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803
803
  * bits before reloading. This value is the maximum number of bytes we read
804
- * after reloading when we are decoding long offets.
804
+ * after reloading when we are decoding long offsets.
805
805
  */
806
806
  #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
807
807
  (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
@@ -911,7 +911,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
911
911
  seqState_t seqState;
912
912
  dctx->fseEntropy = 1;
913
913
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914
- CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
914
+ RETURN_ERROR_IF(
915
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
916
+ corruption_detected);
915
917
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
916
918
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
917
919
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
@@ -927,14 +929,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
927
929
 
928
930
  /* check if reached exact end */
929
931
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
930
- if (nbSeq) return ERROR(corruption_detected);
932
+ RETURN_ERROR_IF(nbSeq, corruption_detected);
931
933
  /* save reps for next block */
932
934
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
933
935
  }
934
936
 
935
937
  /* last literal segment */
936
938
  { size_t const lastLLSize = litEnd - litPtr;
937
- if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
939
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
938
940
  memcpy(op, litPtr, lastLLSize);
939
941
  op += lastLLSize;
940
942
  }
@@ -1066,7 +1068,9 @@ ZSTD_decompressSequencesLong_body(
1066
1068
  seqState.pos = (size_t)(op-prefixStart);
1067
1069
  seqState.dictEnd = dictEnd;
1068
1070
  assert(iend >= ip);
1069
- CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1071
+ RETURN_ERROR_IF(
1072
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1073
+ corruption_detected);
1070
1074
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1071
1075
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1072
1076
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
@@ -1076,7 +1080,7 @@ ZSTD_decompressSequencesLong_body(
1076
1080
  sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1077
1081
  PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1078
1082
  }
1079
- if (seqNb<seqAdvance) return ERROR(corruption_detected);
1083
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
1080
1084
 
1081
1085
  /* decode and decompress */
1082
1086
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
@@ -1087,7 +1091,7 @@ ZSTD_decompressSequencesLong_body(
1087
1091
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1088
1092
  op += oneSeqSize;
1089
1093
  }
1090
- if (seqNb<nbSeq) return ERROR(corruption_detected);
1094
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
1091
1095
 
1092
1096
  /* finish queue */
1093
1097
  seqNb -= seqAdvance;
@@ -1103,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
1103
1107
 
1104
1108
  /* last literal segment */
1105
1109
  { size_t const lastLLSize = litEnd - litPtr;
1106
- if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1110
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
1107
1111
  memcpy(op, litPtr, lastLLSize);
1108
1112
  op += lastLLSize;
1109
1113
  }
@@ -1176,7 +1180,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1176
1180
  /* ZSTD_decompressSequencesLong() :
1177
1181
  * decompression function triggered when a minimum share of offsets is considered "long",
1178
1182
  * aka out of cache.
1179
- * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
1183
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1180
1184
  * This function will try to mitigate main memory latency through the use of prefetching */
1181
1185
  static size_t
1182
1186
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
@@ -1240,7 +1244,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1240
1244
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1241
1245
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1242
1246
 
1243
- if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
1247
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
1244
1248
 
1245
1249
  /* Decode literals section */
1246
1250
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -89,6 +89,12 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
89
89
  typedef enum { zdss_init=0, zdss_loadHeader,
90
90
  zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
91
91
 
92
+ typedef enum {
93
+ ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
94
+ ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
95
+ ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
+ } ZSTD_dictUses_e;
97
+
92
98
  struct ZSTD_DCtx_s
93
99
  {
94
100
  const ZSTD_seqSymbol* LLTptr;
@@ -123,6 +129,7 @@ struct ZSTD_DCtx_s
123
129
  const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
124
130
  U32 dictID;
125
131
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
132
+ ZSTD_dictUses_e dictUses;
126
133
 
127
134
  /* streaming */
128
135
  ZSTD_dStreamStage streamStage;
@@ -391,7 +391,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
391
391
  *
392
392
  * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
393
393
  *
394
- * Once the dmer d is in the dictionay we set F(d) = 0.
394
+ * Once the dmer d is in the dictionary we set F(d) = 0.
395
395
  */
396
396
  static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
397
397
  COVER_map_t *activeDmers, U32 begin,
@@ -435,7 +435,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
435
435
  U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
436
436
  activeSegment.begin += 1;
437
437
  *delDmerOcc -= 1;
438
- /* If this is the last occurence of the dmer, subtract its score */
438
+ /* If this is the last occurrence of the dmer, subtract its score */
439
439
  if (*delDmerOcc == 0) {
440
440
  COVER_map_remove(activeDmers, delDmer);
441
441
  activeSegment.score -= freqs[delDmer];
@@ -627,6 +627,39 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
627
627
  return 1;
628
628
  }
629
629
 
630
+ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
631
+ {
632
+ const double ratio = (double)nbDmers / maxDictSize;
633
+ if (ratio >= 10) {
634
+ return;
635
+ }
636
+ LOCALDISPLAYLEVEL(displayLevel, 1,
637
+ "WARNING: The maximum dictionary size %u is too large "
638
+ "compared to the source size %u! "
639
+ "size(source)/size(dictionary) = %f, but it should be >= "
640
+ "10! This may lead to a subpar dictionary! We recommend "
641
+ "training on sources at least 10x, and up to 100x the "
642
+ "size of the dictionary!\n", (U32)maxDictSize,
643
+ (U32)nbDmers, ratio);
644
+ }
645
+
646
+ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
647
+ U32 nbDmers, U32 k, U32 passes)
648
+ {
649
+ const U32 minEpochSize = k * 10;
650
+ COVER_epoch_info_t epochs;
651
+ epochs.num = MAX(1, maxDictSize / k / passes);
652
+ epochs.size = nbDmers / epochs.num;
653
+ if (epochs.size >= minEpochSize) {
654
+ assert(epochs.size * epochs.num <= nbDmers);
655
+ return epochs;
656
+ }
657
+ epochs.size = MIN(minEpochSize, nbDmers);
658
+ epochs.num = nbDmers / epochs.size;
659
+ assert(epochs.size * epochs.num <= nbDmers);
660
+ return epochs;
661
+ }
662
+
630
663
  /**
631
664
  * Given the prepared context build the dictionary.
632
665
  */
@@ -636,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
636
669
  ZDICT_cover_params_t parameters) {
637
670
  BYTE *const dict = (BYTE *)dictBuffer;
638
671
  size_t tail = dictBufferCapacity;
639
- /* Divide the data up into epochs of equal size.
640
- * We will select at least one segment from each epoch.
641
- */
642
- const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
643
- const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
672
+ /* Divide the data into epochs. We will select one segment from each epoch. */
673
+ const COVER_epoch_info_t epochs = COVER_computeEpochs(
674
+ (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
675
+ const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
676
+ size_t zeroScoreRun = 0;
644
677
  size_t epoch;
645
678
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
646
- epochs, epochSize);
679
+ (U32)epochs.num, (U32)epochs.size);
647
680
  /* Loop through the epochs until there are no more segments or the dictionary
648
681
  * is full.
649
682
  */
650
- for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
651
- const U32 epochBegin = (U32)(epoch * epochSize);
652
- const U32 epochEnd = epochBegin + epochSize;
683
+ for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
684
+ const U32 epochBegin = (U32)(epoch * epochs.size);
685
+ const U32 epochEnd = epochBegin + epochs.size;
653
686
  size_t segmentSize;
654
687
  /* Select a segment */
655
688
  COVER_segment_t segment = COVER_selectSegment(
656
689
  ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
657
- /* If the segment covers no dmers, then we are out of content */
690
+ /* If the segment covers no dmers, then we are out of content.
691
+ * There may be new content in other epochs, for continue for some time.
692
+ */
658
693
  if (segment.score == 0) {
659
- break;
694
+ if (++zeroScoreRun >= maxZeroScoreRun) {
695
+ break;
696
+ }
697
+ continue;
660
698
  }
699
+ zeroScoreRun = 0;
661
700
  /* Trim the segment if necessary and if it is too small then we are done */
662
701
  segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
663
702
  if (segmentSize < parameters.d) {
@@ -706,6 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
706
745
  parameters.d, parameters.splitPoint)) {
707
746
  return ERROR(GENERIC);
708
747
  }
748
+ COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
709
749
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
710
750
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
711
751
  COVER_ctx_destroy(&ctx);
@@ -977,6 +1017,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
977
1017
  unsigned k;
978
1018
  COVER_best_t best;
979
1019
  POOL_ctx *pool = NULL;
1020
+ int warned = 0;
980
1021
 
981
1022
  /* Checks */
982
1023
  if (splitPoint <= 0 || splitPoint > 1) {
@@ -1019,6 +1060,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1019
1060
  POOL_free(pool);
1020
1061
  return ERROR(GENERIC);
1021
1062
  }
1063
+ if (!warned) {
1064
+ COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
1065
+ warned = 1;
1066
+ }
1022
1067
  /* Loop through k reusing the same context */
1023
1068
  for (k = kMinK; k <= kMaxK; k += kStepSize) {
1024
1069
  /* Prepare the arguments */