zstd-ruby 1.3.8.0 → 1.4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +7 -3
  5. data/ext/zstdruby/libzstd/README.md +4 -2
  6. data/ext/zstdruby/libzstd/common/compiler.h +1 -1
  7. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  8. data/ext/zstdruby/libzstd/common/threading.c +2 -2
  9. data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
  10. data/ext/zstdruby/libzstd/common/zstd_internal.h +55 -2
  11. data/ext/zstdruby/libzstd/compress/fse_compress.c +2 -2
  12. data/ext/zstdruby/libzstd/compress/zstd_compress.c +423 -296
  13. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +14 -11
  14. data/ext/zstdruby/libzstd/compress/zstd_fast.c +203 -124
  15. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +1 -1
  16. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
  17. data/ext/zstdruby/libzstd/compress/zstd_opt.c +27 -11
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +41 -49
  19. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +43 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +4 -4
  21. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +257 -164
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +51 -47
  23. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +7 -0
  24. data/ext/zstdruby/libzstd/dictBuilder/cover.c +58 -13
  25. data/ext/zstdruby/libzstd/dictBuilder/cover.h +29 -0
  26. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +25 -13
  27. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +18 -8
  28. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  29. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +42 -12
  30. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +32 -7
  31. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +12 -7
  32. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +31 -12
  33. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +12 -7
  34. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +32 -12
  35. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +12 -7
  36. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +32 -12
  37. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +12 -7
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +32 -7
  39. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +12 -7
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +36 -8
  41. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +10 -5
  42. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +40 -9
  43. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +10 -5
  44. data/ext/zstdruby/libzstd/zstd.h +689 -542
  45. data/lib/zstd-ruby/version.rb +1 -1
  46. data/zstd-ruby.gemspec +1 -1
  47. metadata +6 -7
  48. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
56
56
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
57
  blockProperties_t* bpPtr)
58
58
  {
59
- if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
60
+
60
61
  { U32 const cBlockHeader = MEM_readLE24(src);
61
62
  U32 const cSize = cBlockHeader >> 3;
62
63
  bpPtr->lastBlock = cBlockHeader & 1;
63
64
  bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
64
65
  bpPtr->origSize = cSize; /* only useful for RLE */
65
66
  if (bpPtr->blockType == bt_rle) return 1;
66
- if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
67
68
  return cSize;
68
69
  }
69
70
  }
@@ -78,7 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
78
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
80
81
  {
81
- if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
82
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
82
83
 
83
84
  { const BYTE* const istart = (const BYTE*) src;
84
85
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@@ -86,11 +87,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
86
87
  switch(litEncType)
87
88
  {
88
89
  case set_repeat:
89
- if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
90
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
90
91
  /* fall-through */
91
92
 
92
93
  case set_compressed:
93
- if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
94
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
94
95
  { size_t lhSize, litSize, litCSize;
95
96
  U32 singleStream=0;
96
97
  U32 const lhlCode = (istart[0] >> 2) & 3;
@@ -118,8 +119,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
118
119
  litCSize = (lhc >> 22) + (istart[4] << 10);
119
120
  break;
120
121
  }
121
- if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
122
- if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
122
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
123
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);
123
124
 
124
125
  /* prefetch huffman table if cold */
125
126
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -157,7 +158,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
157
158
  }
158
159
  }
159
160
 
160
- if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
161
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);
161
162
 
162
163
  dctx->litPtr = dctx->litBuffer;
163
164
  dctx->litSize = litSize;
@@ -187,7 +188,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
187
188
  }
188
189
 
189
190
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
190
- if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
191
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
191
192
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
192
193
  dctx->litPtr = dctx->litBuffer;
193
194
  dctx->litSize = litSize;
@@ -216,17 +217,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
216
217
  case 3:
217
218
  lhSize = 3;
218
219
  litSize = MEM_readLE24(istart) >> 4;
219
- if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
220
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
220
221
  break;
221
222
  }
222
- if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
223
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
223
224
  memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
224
225
  dctx->litPtr = dctx->litBuffer;
225
226
  dctx->litSize = litSize;
226
227
  return lhSize+1;
227
228
  }
228
229
  default:
229
- return ERROR(corruption_detected); /* impossible */
230
+ RETURN_ERROR(corruption_detected, "impossible");
230
231
  }
231
232
  }
232
233
  }
@@ -436,8 +437,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
436
437
  switch(type)
437
438
  {
438
439
  case set_rle :
439
- if (!srcSize) return ERROR(srcSize_wrong);
440
- if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
440
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong);
441
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
441
442
  { U32 const symbol = *(const BYTE*)src;
442
443
  U32 const baseline = baseValue[symbol];
443
444
  U32 const nbBits = nbAdditionalBits[symbol];
@@ -449,7 +450,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
449
450
  *DTablePtr = defaultTable;
450
451
  return 0;
451
452
  case set_repeat:
452
- if (!flagRepeatTable) return ERROR(corruption_detected);
453
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
453
454
  /* prefetch FSE table if used */
454
455
  if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
455
456
  const void* const pStart = *DTablePtr;
@@ -461,15 +462,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
461
462
  { unsigned tableLog;
462
463
  S16 norm[MaxSeq+1];
463
464
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
464
- if (FSE_isError(headerSize)) return ERROR(corruption_detected);
465
- if (tableLog > maxLog) return ERROR(corruption_detected);
465
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
466
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
466
467
  ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
467
468
  *DTablePtr = DTableSpace;
468
469
  return headerSize;
469
470
  }
470
- default : /* impossible */
471
+ default :
471
472
  assert(0);
472
- return ERROR(GENERIC);
473
+ RETURN_ERROR(GENERIC, "impossible");
473
474
  }
474
475
  }
475
476
 
@@ -483,28 +484,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
483
484
  DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
484
485
 
485
486
  /* check */
486
- if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
487
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);
487
488
 
488
489
  /* SeqHead */
489
490
  nbSeq = *ip++;
490
491
  if (!nbSeq) {
491
492
  *nbSeqPtr=0;
492
- if (srcSize != 1) return ERROR(srcSize_wrong);
493
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
493
494
  return 1;
494
495
  }
495
496
  if (nbSeq > 0x7F) {
496
497
  if (nbSeq == 0xFF) {
497
- if (ip+2 > iend) return ERROR(srcSize_wrong);
498
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
498
499
  nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
499
500
  } else {
500
- if (ip >= iend) return ERROR(srcSize_wrong);
501
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
501
502
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
502
503
  }
503
504
  }
504
505
  *nbSeqPtr = nbSeq;
505
506
 
506
507
  /* FSE table descriptors */
507
- if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
508
+ RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
508
509
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
509
510
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
510
511
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -517,7 +518,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
517
518
  LL_base, LL_bits,
518
519
  LL_defaultDTable, dctx->fseEntropy,
519
520
  dctx->ddictIsCold, nbSeq);
520
- if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
521
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
521
522
  ip += llhSize;
522
523
  }
523
524
 
@@ -527,7 +528,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
527
528
  OF_base, OF_bits,
528
529
  OF_defaultDTable, dctx->fseEntropy,
529
530
  dctx->ddictIsCold, nbSeq);
530
- if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
531
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
531
532
  ip += ofhSize;
532
533
  }
533
534
 
@@ -537,7 +538,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
537
538
  ML_base, ML_bits,
538
539
  ML_defaultDTable, dctx->fseEntropy,
539
540
  dctx->ddictIsCold, nbSeq);
540
- if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
541
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
541
542
  ip += mlhSize;
542
543
  }
543
544
  }
@@ -590,8 +591,8 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
590
591
  const BYTE* match = oLitEnd - sequence.offset;
591
592
 
592
593
  /* check */
593
- if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must fit within dstBuffer */
594
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* try to read beyond literal buffer */
594
+ RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
595
+ RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
595
596
 
596
597
  /* copy literals */
597
598
  while (op < oLitEnd) *op++ = *(*litPtr)++;
@@ -599,7 +600,7 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
599
600
  /* copy Match */
600
601
  if (sequence.offset > (size_t)(oLitEnd - base)) {
601
602
  /* offset beyond prefix */
602
- if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
603
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
603
604
  match = dictEnd - (base-match);
604
605
  if (match + sequence.matchLength <= dictEnd) {
605
606
  memmove(oLitEnd, match, sequence.matchLength);
@@ -631,8 +632,8 @@ size_t ZSTD_execSequence(BYTE* op,
631
632
  const BYTE* match = oLitEnd - sequence.offset;
632
633
 
633
634
  /* check */
634
- if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
635
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
635
+ RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636
+ RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
636
637
  if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
637
638
 
638
639
  /* copy Literals */
@@ -645,8 +646,7 @@ size_t ZSTD_execSequence(BYTE* op,
645
646
  /* copy Match */
646
647
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
647
648
  /* offset beyond prefix -> go into extDict */
648
- if (sequence.offset > (size_t)(oLitEnd - virtualStart))
649
- return ERROR(corruption_detected);
649
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
650
650
  match = dictEnd + (match - prefixStart);
651
651
  if (match + sequence.matchLength <= dictEnd) {
652
652
  memmove(oLitEnd, match, sequence.matchLength);
@@ -712,8 +712,8 @@ size_t ZSTD_execSequenceLong(BYTE* op,
712
712
  const BYTE* match = sequence.match;
713
713
 
714
714
  /* check */
715
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
716
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
715
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
716
+ RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
717
717
  if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
718
 
719
719
  /* copy Literals */
@@ -726,7 +726,7 @@ size_t ZSTD_execSequenceLong(BYTE* op,
726
726
  /* copy Match */
727
727
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
728
728
  /* offset beyond prefix */
729
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
729
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
730
730
  if (match + sequence.matchLength <= dictEnd) {
731
731
  memmove(oLitEnd, match, sequence.matchLength);
732
732
  return sequenceLength;
@@ -801,7 +801,7 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
801
801
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802
802
  * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803
803
  * bits before reloading. This value is the maximum number of bytes we read
804
- * after reloading when we are decoding long offets.
804
+ * after reloading when we are decoding long offsets.
805
805
  */
806
806
  #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
807
807
  (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
@@ -911,7 +911,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
911
911
  seqState_t seqState;
912
912
  dctx->fseEntropy = 1;
913
913
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
914
- CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
914
+ RETURN_ERROR_IF(
915
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
916
+ corruption_detected);
915
917
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
916
918
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
917
919
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
@@ -927,14 +929,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
927
929
 
928
930
  /* check if reached exact end */
929
931
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
930
- if (nbSeq) return ERROR(corruption_detected);
932
+ RETURN_ERROR_IF(nbSeq, corruption_detected);
931
933
  /* save reps for next block */
932
934
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
933
935
  }
934
936
 
935
937
  /* last literal segment */
936
938
  { size_t const lastLLSize = litEnd - litPtr;
937
- if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
939
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
938
940
  memcpy(op, litPtr, lastLLSize);
939
941
  op += lastLLSize;
940
942
  }
@@ -1066,7 +1068,9 @@ ZSTD_decompressSequencesLong_body(
1066
1068
  seqState.pos = (size_t)(op-prefixStart);
1067
1069
  seqState.dictEnd = dictEnd;
1068
1070
  assert(iend >= ip);
1069
- CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
1071
+ RETURN_ERROR_IF(
1072
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1073
+ corruption_detected);
1070
1074
  ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1071
1075
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1072
1076
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
@@ -1076,7 +1080,7 @@ ZSTD_decompressSequencesLong_body(
1076
1080
  sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1077
1081
  PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1078
1082
  }
1079
- if (seqNb<seqAdvance) return ERROR(corruption_detected);
1083
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);
1080
1084
 
1081
1085
  /* decode and decompress */
1082
1086
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
@@ -1087,7 +1091,7 @@ ZSTD_decompressSequencesLong_body(
1087
1091
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
1088
1092
  op += oneSeqSize;
1089
1093
  }
1090
- if (seqNb<nbSeq) return ERROR(corruption_detected);
1094
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);
1091
1095
 
1092
1096
  /* finish queue */
1093
1097
  seqNb -= seqAdvance;
@@ -1103,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
1103
1107
 
1104
1108
  /* last literal segment */
1105
1109
  { size_t const lastLLSize = litEnd - litPtr;
1106
- if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
1110
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
1107
1111
  memcpy(op, litPtr, lastLLSize);
1108
1112
  op += lastLLSize;
1109
1113
  }
@@ -1176,7 +1180,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1176
1180
  /* ZSTD_decompressSequencesLong() :
1177
1181
  * decompression function triggered when a minimum share of offsets is considered "long",
1178
1182
  * aka out of cache.
1179
- * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
1183
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1180
1184
  * This function will try to mitigate main memory latency through the use of prefetching */
1181
1185
  static size_t
1182
1186
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
@@ -1240,7 +1244,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1240
1244
  ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1241
1245
  DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1242
1246
 
1243
- if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
1247
+ RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);
1244
1248
 
1245
1249
  /* Decode literals section */
1246
1250
  { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
@@ -89,6 +89,12 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
89
89
  typedef enum { zdss_init=0, zdss_loadHeader,
90
90
  zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
91
91
 
92
+ typedef enum {
93
+ ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */
94
+ ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */
95
+ ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
+ } ZSTD_dictUses_e;
97
+
92
98
  struct ZSTD_DCtx_s
93
99
  {
94
100
  const ZSTD_seqSymbol* LLTptr;
@@ -123,6 +129,7 @@ struct ZSTD_DCtx_s
123
129
  const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
124
130
  U32 dictID;
125
131
  int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
132
+ ZSTD_dictUses_e dictUses;
126
133
 
127
134
  /* streaming */
128
135
  ZSTD_dStreamStage streamStage;
@@ -391,7 +391,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
391
391
  *
392
392
  * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
393
393
  *
394
- * Once the dmer d is in the dictionay we set F(d) = 0.
394
+ * Once the dmer d is in the dictionary we set F(d) = 0.
395
395
  */
396
396
  static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
397
397
  COVER_map_t *activeDmers, U32 begin,
@@ -435,7 +435,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
435
435
  U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
436
436
  activeSegment.begin += 1;
437
437
  *delDmerOcc -= 1;
438
- /* If this is the last occurence of the dmer, subtract its score */
438
+ /* If this is the last occurrence of the dmer, subtract its score */
439
439
  if (*delDmerOcc == 0) {
440
440
  COVER_map_remove(activeDmers, delDmer);
441
441
  activeSegment.score -= freqs[delDmer];
@@ -627,6 +627,39 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
627
627
  return 1;
628
628
  }
629
629
 
630
+ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
631
+ {
632
+ const double ratio = (double)nbDmers / maxDictSize;
633
+ if (ratio >= 10) {
634
+ return;
635
+ }
636
+ LOCALDISPLAYLEVEL(displayLevel, 1,
637
+ "WARNING: The maximum dictionary size %u is too large "
638
+ "compared to the source size %u! "
639
+ "size(source)/size(dictionary) = %f, but it should be >= "
640
+ "10! This may lead to a subpar dictionary! We recommend "
641
+ "training on sources at least 10x, and up to 100x the "
642
+ "size of the dictionary!\n", (U32)maxDictSize,
643
+ (U32)nbDmers, ratio);
644
+ }
645
+
646
+ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
647
+ U32 nbDmers, U32 k, U32 passes)
648
+ {
649
+ const U32 minEpochSize = k * 10;
650
+ COVER_epoch_info_t epochs;
651
+ epochs.num = MAX(1, maxDictSize / k / passes);
652
+ epochs.size = nbDmers / epochs.num;
653
+ if (epochs.size >= minEpochSize) {
654
+ assert(epochs.size * epochs.num <= nbDmers);
655
+ return epochs;
656
+ }
657
+ epochs.size = MIN(minEpochSize, nbDmers);
658
+ epochs.num = nbDmers / epochs.size;
659
+ assert(epochs.size * epochs.num <= nbDmers);
660
+ return epochs;
661
+ }
662
+
630
663
  /**
631
664
  * Given the prepared context build the dictionary.
632
665
  */
@@ -636,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
636
669
  ZDICT_cover_params_t parameters) {
637
670
  BYTE *const dict = (BYTE *)dictBuffer;
638
671
  size_t tail = dictBufferCapacity;
639
- /* Divide the data up into epochs of equal size.
640
- * We will select at least one segment from each epoch.
641
- */
642
- const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
643
- const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
672
+ /* Divide the data into epochs. We will select one segment from each epoch. */
673
+ const COVER_epoch_info_t epochs = COVER_computeEpochs(
674
+ (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
675
+ const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
676
+ size_t zeroScoreRun = 0;
644
677
  size_t epoch;
645
678
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
646
- epochs, epochSize);
679
+ (U32)epochs.num, (U32)epochs.size);
647
680
  /* Loop through the epochs until there are no more segments or the dictionary
648
681
  * is full.
649
682
  */
650
- for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
651
- const U32 epochBegin = (U32)(epoch * epochSize);
652
- const U32 epochEnd = epochBegin + epochSize;
683
+ for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
684
+ const U32 epochBegin = (U32)(epoch * epochs.size);
685
+ const U32 epochEnd = epochBegin + epochs.size;
653
686
  size_t segmentSize;
654
687
  /* Select a segment */
655
688
  COVER_segment_t segment = COVER_selectSegment(
656
689
  ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
657
- /* If the segment covers no dmers, then we are out of content */
690
+ /* If the segment covers no dmers, then we are out of content.
691
+ * There may be new content in other epochs, for continue for some time.
692
+ */
658
693
  if (segment.score == 0) {
659
- break;
694
+ if (++zeroScoreRun >= maxZeroScoreRun) {
695
+ break;
696
+ }
697
+ continue;
660
698
  }
699
+ zeroScoreRun = 0;
661
700
  /* Trim the segment if necessary and if it is too small then we are done */
662
701
  segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
663
702
  if (segmentSize < parameters.d) {
@@ -706,6 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
706
745
  parameters.d, parameters.splitPoint)) {
707
746
  return ERROR(GENERIC);
708
747
  }
748
+ COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
709
749
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
710
750
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
711
751
  COVER_ctx_destroy(&ctx);
@@ -977,6 +1017,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
977
1017
  unsigned k;
978
1018
  COVER_best_t best;
979
1019
  POOL_ctx *pool = NULL;
1020
+ int warned = 0;
980
1021
 
981
1022
  /* Checks */
982
1023
  if (splitPoint <= 0 || splitPoint > 1) {
@@ -1019,6 +1060,10 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1019
1060
  POOL_free(pool);
1020
1061
  return ERROR(GENERIC);
1021
1062
  }
1063
+ if (!warned) {
1064
+ COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
1065
+ warned = 1;
1066
+ }
1022
1067
  /* Loop through k reusing the same context */
1023
1068
  for (k = kMinK; k <= kMaxK; k += kStepSize) {
1024
1069
  /* Prepare the arguments */