zstd-ruby 1.4.0.0 → 1.4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +5 -0
  4. data/ext/zstdruby/libzstd/common/compiler.h +7 -0
  5. data/ext/zstdruby/libzstd/common/zstd_internal.h +58 -6
  6. data/ext/zstdruby/libzstd/compress/zstd_compress.c +175 -117
  7. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +74 -30
  8. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +56 -36
  9. data/ext/zstdruby/libzstd/compress/zstd_fast.c +35 -14
  10. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +10 -5
  11. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
  12. data/ext/zstdruby/libzstd/compress/zstd_opt.c +45 -32
  13. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +18 -7
  14. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -0
  15. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +12 -9
  16. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +20 -9
  17. data/ext/zstdruby/libzstd/dictBuilder/cover.c +154 -43
  18. data/ext/zstdruby/libzstd/dictBuilder/cover.h +38 -3
  19. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +46 -39
  20. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -9
  21. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -0
  22. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -0
  23. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +95 -101
  24. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +11 -6
  25. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +11 -6
  26. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -8
  27. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +88 -84
  28. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -4
  29. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -4
  30. data/ext/zstdruby/libzstd/zstd.h +53 -21
  31. data/lib/zstd-ruby/version.rb +1 -1
  32. metadata +3 -4
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
- U32 const windowLow = ms->window.lowLimit;
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
89
+
87
90
 
88
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
89
92
  current, dictLimit, windowLow);
@@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
239
242
 
240
243
  const BYTE* const base = ms->window.base;
241
244
  U32 const current = (U32)(ip-base);
242
- U32 const windowLow = ms->window.lowLimit;
245
+ U32 const maxDistance = 1U << cParams->windowLog;
246
+ U32 const windowValid = ms->window.lowLimit;
247
+ U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
243
248
 
244
249
  U32* const bt = ms->chainTable;
245
250
  U32 const btLog = cParams->chainLog - 1;
@@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
490
495
  const U32 dictLimit = ms->window.dictLimit;
491
496
  const BYTE* const prefixStart = base + dictLimit;
492
497
  const BYTE* const dictEnd = dictBase + dictLimit;
493
- const U32 lowLimit = ms->window.lowLimit;
494
498
  const U32 current = (U32)(ip-base);
499
+ const U32 maxDistance = 1U << cParams->windowLog;
500
+ const U32 lowValid = ms->window.lowLimit;
501
+ const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
495
502
  const U32 minChain = current > chainSize ? current - chainSize : 0;
496
503
  U32 nbAttempts = 1U << cParams->searchLog;
497
504
  size_t ml=4-1;
@@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
653
660
 
654
661
  /* init */
655
662
  ip += (dictAndPrefixLength == 0);
656
- ms->nextToUpdate3 = ms->nextToUpdate;
657
663
  if (dictMode == ZSTD_noDict) {
658
664
  U32 const maxRep = (U32)(ip - prefixLowest);
659
665
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
933
939
  U32 offset_1 = rep[0], offset_2 = rep[1];
934
940
 
935
941
  /* init */
936
- ms->nextToUpdate3 = ms->nextToUpdate;
937
942
  ip += (ip == prefixStart);
938
943
 
939
944
  /* Match Loop */
@@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
447
447
  if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
448
  U32 const ldmHSize = 1U << params->hashLog;
449
449
  U32 const correction = ZSTD_window_correctOverflow(
450
- &ldmState->window, /* cycleLog */ 0, maxDist, src);
450
+ &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
451
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
452
452
  }
453
453
  /* 2. We enforce the maximum offset allowed.
@@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
255
255
  * to provide a cost which is directly comparable to a match ending at same position */
256
256
  static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
257
257
  {
258
- if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
258
+ if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
259
259
 
260
260
  /* dynamic statistics */
261
261
  { U32 const llCode = ZSTD_LLcode(litLength);
262
- int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
263
- + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
264
- - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
262
+ int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
263
+ + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
264
+ - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
265
265
  #if 1
266
266
  return contribution;
267
267
  #else
@@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
278
278
  const optState_t* const optPtr,
279
279
  int optLevel)
280
280
  {
281
- int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
281
+ int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
282
282
  + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
283
283
  return contribution;
284
284
  }
@@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
372
372
 
373
373
  /* Update hashTable3 up to ip (excluded)
374
374
  Assumption : always within prefix (i.e. not within extDict) */
375
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
375
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
376
+ U32* nextToUpdate3,
377
+ const BYTE* const ip)
376
378
  {
377
379
  U32* const hashTable3 = ms->hashTable3;
378
380
  U32 const hashLog3 = ms->hashLog3;
379
381
  const BYTE* const base = ms->window.base;
380
- U32 idx = ms->nextToUpdate3;
381
- U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
382
+ U32 idx = *nextToUpdate3;
383
+ U32 const target = (U32)(ip - base);
382
384
  size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
383
385
  assert(hashLog3 > 0);
384
386
 
@@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
387
389
  idx++;
388
390
  }
389
391
 
392
+ *nextToUpdate3 = target;
390
393
  return hashTable3[hash3];
391
394
  }
392
395
 
@@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
503
506
  } }
504
507
 
505
508
  *smallerPtr = *largerPtr = 0;
506
- if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
507
- assert(matchEndIdx > current + 8);
508
- return matchEndIdx - (current + 8);
509
+ { U32 positions = 0;
510
+ if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
511
+ assert(matchEndIdx > current + 8);
512
+ return MAX(positions, matchEndIdx - (current + 8));
513
+ }
509
514
  }
510
515
 
511
516
  FORCE_INLINE_TEMPLATE
@@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
520
525
  DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
521
526
  idx, target, dictMode);
522
527
 
523
- while(idx < target)
524
- idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
528
+ while(idx < target) {
529
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
530
+ assert(idx < (U32)(idx + forward));
531
+ idx += forward;
532
+ }
533
+ assert((size_t)(ip - base) <= (size_t)(U32)(-1));
534
+ assert((size_t)(iend - base) <= (size_t)(U32)(-1));
525
535
  ms->nextToUpdate = target;
526
536
  }
527
537
 
@@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
531
541
 
532
542
  FORCE_INLINE_TEMPLATE
533
543
  U32 ZSTD_insertBtAndGetAllMatches (
544
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
534
545
  ZSTD_matchState_t* ms,
546
+ U32* nextToUpdate3,
535
547
  const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
536
- U32 rep[ZSTD_REP_NUM],
548
+ const U32 rep[ZSTD_REP_NUM],
537
549
  U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
538
- ZSTD_match_t* matches,
539
550
  const U32 lengthToBeat,
540
551
  U32 const mls /* template */)
541
552
  {
542
553
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
543
554
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555
+ U32 const maxDistance = 1U << cParams->windowLog;
544
556
  const BYTE* const base = ms->window.base;
545
557
  U32 const current = (U32)(ip-base);
546
558
  U32 const hashLog = cParams->hashLog;
@@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
556
568
  U32 const dictLimit = ms->window.dictLimit;
557
569
  const BYTE* const dictEnd = dictBase + dictLimit;
558
570
  const BYTE* const prefixStart = base + dictLimit;
559
- U32 const btLow = btMask >= current ? 0 : current - btMask;
560
- U32 const windowLow = ms->window.lowLimit;
571
+ U32 const btLow = (btMask >= current) ? 0 : current - btMask;
572
+ U32 const windowValid = ms->window.lowLimit;
573
+ U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
561
574
  U32 const matchLow = windowLow ? windowLow : 1;
562
575
  U32* smallerPtr = bt + 2*(current&btMask);
563
576
  U32* largerPtr = bt + 2*(current&btMask) + 1;
@@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
627
640
 
628
641
  /* HC3 match finder */
629
642
  if ((mls == 3) /*static*/ && (bestLength < mls)) {
630
- U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
643
+ U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
631
644
  if ((matchIndex3 >= matchLow)
632
645
  & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
633
646
  size_t mlen;
@@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
653
666
  (ip+mlen == iLimit) ) { /* best possible length */
654
667
  ms->nextToUpdate = current+1; /* skip insertion */
655
668
  return 1;
656
- }
657
- }
658
- }
669
+ } } }
659
670
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
660
671
  }
661
672
 
@@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
760
771
 
761
772
 
762
773
  FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
774
+ ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
763
775
  ZSTD_matchState_t* ms,
776
+ U32* nextToUpdate3,
764
777
  const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
765
- U32 rep[ZSTD_REP_NUM], U32 const ll0,
766
- ZSTD_match_t* matches, U32 const lengthToBeat)
778
+ const U32 rep[ZSTD_REP_NUM],
779
+ U32 const ll0,
780
+ U32 const lengthToBeat)
767
781
  {
768
782
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
769
783
  U32 const matchLengthSearch = cParams->minMatch;
@@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
772
786
  ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
773
787
  switch(matchLengthSearch)
774
788
  {
775
- case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
789
+ case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
776
790
  default :
777
- case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
778
- case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
791
+ case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
792
+ case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
779
793
  case 7 :
780
- case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
794
+ case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
781
795
  }
782
796
  }
783
797
 
@@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
853
867
 
854
868
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
855
869
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
870
+ U32 nextToUpdate3 = ms->nextToUpdate;
856
871
 
857
872
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
858
873
  ZSTD_match_t* const matches = optStatePtr->matchTable;
@@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
862
877
  DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
863
878
  (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
864
879
  assert(optLevel <= 2);
865
- ms->nextToUpdate3 = ms->nextToUpdate;
866
880
  ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
867
881
  ip += (ip==prefixStart);
868
882
 
@@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
873
887
  /* find first match */
874
888
  { U32 const litlen = (U32)(ip - anchor);
875
889
  U32 const ll0 = !litlen;
876
- U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
890
+ U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
877
891
  if (!nbMatches) { ip++; continue; }
878
892
 
879
893
  /* initialize opt[0] */
@@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
970
984
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
971
985
  U32 const previousPrice = opt[cur].price;
972
986
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
973
- U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
987
+ U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
974
988
  U32 matchNb;
975
989
  if (!nbMatches) {
976
990
  DEBUGLOG(7, "rPos:%u : no match found", cur);
@@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1094
1108
  } /* while (ip < ilimit) */
1095
1109
 
1096
1110
  /* Return the last literals size */
1097
- return iend - anchor;
1111
+ return (size_t)(iend - anchor);
1098
1112
  }
1099
1113
 
1100
1114
 
@@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1158
1172
  ms->window.dictLimit += (U32)srcSize;
1159
1173
  ms->window.lowLimit = ms->window.dictLimit;
1160
1174
  ms->nextToUpdate = ms->window.dictLimit;
1161
- ms->nextToUpdate3 = ms->window.dictLimit;
1162
1175
 
1163
1176
  /* re-inforce weight of collected statistics */
1164
1177
  ZSTD_upscaleStats(&ms->opt);
@@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1129
1129
  size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1130
1130
  size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1131
1131
  assert(flushed <= produced);
1132
+ assert(jobPtr->consumed <= jobPtr->src.size);
1132
1133
  toFlush = produced - flushed;
1133
- if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
1134
- /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
1134
+ /* if toFlush==0, nothing is available to flush.
1135
+ * However, jobID is expected to still be active:
1136
+ * if jobID was already completed and fully flushed,
1137
+ * ZSTDMT_flushProduced() should have already moved onto next job.
1138
+ * Therefore, some input has not yet been consumed. */
1139
+ if (toFlush==0) {
1135
1140
  assert(jobPtr->consumed < jobPtr->src.size);
1136
1141
  }
1137
1142
  }
@@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1148
1153
 
1149
1154
  static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1150
1155
  {
1151
- if (params.ldmParams.enableLdm)
1156
+ unsigned jobLog;
1157
+ if (params.ldmParams.enableLdm) {
1152
1158
  /* In Long Range Mode, the windowLog is typically oversized.
1153
1159
  * In which case, it's preferable to determine the jobSize
1154
1160
  * based on chainLog instead. */
1155
- return MAX(21, params.cParams.chainLog + 4);
1156
- return MAX(20, params.cParams.windowLog + 2);
1161
+ jobLog = MAX(21, params.cParams.chainLog + 4);
1162
+ } else {
1163
+ jobLog = MAX(20, params.cParams.windowLog + 2);
1164
+ }
1165
+ return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
1157
1166
  }
1158
1167
 
1159
1168
  static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1197
1206
  ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1198
1207
  - overlapRLog;
1199
1208
  }
1200
- assert(0 <= ovLog && ovLog <= 30);
1209
+ assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
1201
1210
  DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1202
1211
  DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1203
1212
  return (ovLog==0) ? 0 : (size_t)1 << ovLog;
@@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
1391
1400
  FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
1392
1401
 
1393
1402
  if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1394
- if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1403
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
1395
1404
 
1396
1405
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1397
1406
  if (mtctx->singleBlockingThread) {
@@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
1432
1441
  if (mtctx->targetSectionSize == 0) {
1433
1442
  mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1434
1443
  }
1444
+ assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
1445
+
1435
1446
  if (params.rsyncable) {
1436
1447
  /* Aim for the targetsectionSize as the average job size. */
1437
1448
  U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
@@ -50,6 +50,7 @@
50
50
  #ifndef ZSTDMT_JOBSIZE_MIN
51
51
  # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
52
  #endif
53
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
53
54
  #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
54
55
 
55
56
 
@@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
360
360
  sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
361
361
  RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
362
362
  frameParameter_unsupported);
363
-
364
- return skippableHeaderSize + sizeU32;
363
+ {
364
+ size_t const skippableSize = skippableHeaderSize + sizeU32;
365
+ RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
366
+ return skippableSize;
367
+ }
365
368
  }
366
369
 
367
370
  /** ZSTD_findDecompressedSize() :
@@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
378
381
 
379
382
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
380
383
  size_t const skippableSize = readSkippableFrameSize(src, srcSize);
381
- if (ZSTD_isError(skippableSize))
382
- return skippableSize;
383
- if (srcSize < skippableSize) {
384
+ if (ZSTD_isError(skippableSize)) {
384
385
  return ZSTD_CONTENTSIZE_ERROR;
385
386
  }
387
+ assert(skippableSize <= srcSize);
386
388
 
387
389
  src = (const BYTE *)src + skippableSize;
388
390
  srcSize -= skippableSize;
@@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
467
469
  if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
468
470
  && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
469
471
  frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
472
+ assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
473
+ frameSizeInfo.compressedSize <= srcSize);
470
474
  return frameSizeInfo;
471
475
  } else {
472
476
  const BYTE* ip = (const BYTE*)src;
@@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
529
533
  return frameSizeInfo.compressedSize;
530
534
  }
531
535
 
532
-
533
536
  /** ZSTD_decompressBound() :
534
537
  * compatible with legacy mode
535
538
  * `src` must point to the start of a ZSTD frame or a skippeable frame
@@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
546
549
  unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
547
550
  if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
548
551
  return ZSTD_CONTENTSIZE_ERROR;
552
+ assert(srcSize >= compressedSize);
549
553
  src = (const BYTE*)src + compressedSize;
550
554
  srcSize -= compressedSize;
551
555
  bound += decompressedBound;
@@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
738
742
  (unsigned)magicNumber, ZSTD_MAGICNUMBER);
739
743
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
740
744
  size_t const skippableSize = readSkippableFrameSize(src, srcSize);
741
- if (ZSTD_isError(skippableSize))
742
- return skippableSize;
743
- RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
745
+ FORWARD_IF_ERROR(skippableSize);
746
+ assert(skippableSize <= srcSize);
744
747
 
745
748
  src = (const BYTE *)src + skippableSize;
746
749
  srcSize -= skippableSize;
@@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
505
505
  *nbSeqPtr = nbSeq;
506
506
 
507
507
  /* FSE table descriptors */
508
- RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
508
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
509
509
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
510
510
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
511
511
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
637
637
  if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
638
638
 
639
639
  /* copy Literals */
640
- ZSTD_copy8(op, *litPtr);
641
640
  if (sequence.litLength > 8)
642
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
641
+ ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
642
+ else
643
+ ZSTD_copy8(op, *litPtr);
643
644
  op = oLitEnd;
644
645
  *litPtr = iLitEnd; /* update for next sequence */
645
646
 
@@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
686
687
 
687
688
  if (oMatchEnd > oend-(16-MINMATCH)) {
688
689
  if (op < oend_w) {
689
- ZSTD_wildcopy(op, match, oend_w - op);
690
+ ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
690
691
  match += oend_w - op;
691
692
  op = oend_w;
692
693
  }
693
694
  while (op < oMatchEnd) *op++ = *match++;
694
695
  } else {
695
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
696
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
696
697
  }
697
698
  return sequenceLength;
698
699
  }
@@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
717
718
  if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
718
719
 
719
720
  /* copy Literals */
720
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
721
721
  if (sequence.litLength > 8)
722
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
722
+ ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723
+ else
724
+ ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
725
+
723
726
  op = oLitEnd;
724
727
  *litPtr = iLitEnd; /* update for next sequence */
725
728
 
@@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
766
769
 
767
770
  if (oMatchEnd > oend-(16-MINMATCH)) {
768
771
  if (op < oend_w) {
769
- ZSTD_wildcopy(op, match, oend_w - op);
772
+ ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
770
773
  match += oend_w - op;
771
774
  op = oend_w;
772
775
  }
773
776
  while (op < oMatchEnd) *op++ = *match++;
774
777
  } else {
775
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
778
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
776
779
  }
777
780
  return sequenceLength;
778
781
  }
@@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
889
892
  }
890
893
 
891
894
  FORCE_INLINE_TEMPLATE size_t
895
+ DONT_VECTORIZE
892
896
  ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
893
897
  void* dst, size_t maxDstSize,
894
898
  const void* seqStart, size_t seqSize, int nbSeq,
@@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
918
922
  ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
919
923
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
920
924
 
925
+ ZSTD_STATIC_ASSERT(
926
+ BIT_DStream_unfinished < BIT_DStream_completed &&
927
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
928
+ BIT_DStream_completed < BIT_DStream_overflow);
929
+
921
930
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
922
931
  nbSeq--;
923
932
  { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
@@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
930
939
  /* check if reached exact end */
931
940
  DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
932
941
  RETURN_ERROR_IF(nbSeq, corruption_detected);
942
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
933
943
  /* save reps for next block */
934
944
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
935
945
  }
@@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1131
1141
 
1132
1142
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1133
1143
  static TARGET_ATTRIBUTE("bmi2") size_t
1144
+ DONT_VECTORIZE
1134
1145
  ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1135
1146
  void* dst, size_t maxDstSize,
1136
1147
  const void* seqStart, size_t seqSize, int nbSeq,