zstd-ruby 1.4.0.0 → 1.4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +5 -0
- data/ext/zstdruby/libzstd/common/compiler.h +7 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +58 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +175 -117
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +74 -30
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +56 -36
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +35 -14
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +10 -5
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +45 -32
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +18 -7
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +12 -9
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +154 -43
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +38 -3
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +46 -39
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -9
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -0
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +95 -101
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +11 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +11 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +88 -84
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -4
- data/ext/zstdruby/libzstd/zstd.h +53 -21
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +3 -4
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
|
83
83
|
U32* largerPtr = smallerPtr + 1;
|
84
84
|
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
85
85
|
U32 dummy32; /* to be nullified at the end */
|
86
|
-
U32 const
|
86
|
+
U32 const windowValid = ms->window.lowLimit;
|
87
|
+
U32 const maxDistance = 1U << cParams->windowLog;
|
88
|
+
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
89
|
+
|
87
90
|
|
88
91
|
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
89
92
|
current, dictLimit, windowLow);
|
@@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
|
239
242
|
|
240
243
|
const BYTE* const base = ms->window.base;
|
241
244
|
U32 const current = (U32)(ip-base);
|
242
|
-
U32 const
|
245
|
+
U32 const maxDistance = 1U << cParams->windowLog;
|
246
|
+
U32 const windowValid = ms->window.lowLimit;
|
247
|
+
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
243
248
|
|
244
249
|
U32* const bt = ms->chainTable;
|
245
250
|
U32 const btLog = cParams->chainLog - 1;
|
@@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
|
|
490
495
|
const U32 dictLimit = ms->window.dictLimit;
|
491
496
|
const BYTE* const prefixStart = base + dictLimit;
|
492
497
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
493
|
-
const U32 lowLimit = ms->window.lowLimit;
|
494
498
|
const U32 current = (U32)(ip-base);
|
499
|
+
const U32 maxDistance = 1U << cParams->windowLog;
|
500
|
+
const U32 lowValid = ms->window.lowLimit;
|
501
|
+
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
|
495
502
|
const U32 minChain = current > chainSize ? current - chainSize : 0;
|
496
503
|
U32 nbAttempts = 1U << cParams->searchLog;
|
497
504
|
size_t ml=4-1;
|
@@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
|
|
653
660
|
|
654
661
|
/* init */
|
655
662
|
ip += (dictAndPrefixLength == 0);
|
656
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
657
663
|
if (dictMode == ZSTD_noDict) {
|
658
664
|
U32 const maxRep = (U32)(ip - prefixLowest);
|
659
665
|
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
@@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
|
933
939
|
U32 offset_1 = rep[0], offset_2 = rep[1];
|
934
940
|
|
935
941
|
/* init */
|
936
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
937
942
|
ip += (ip == prefixStart);
|
938
943
|
|
939
944
|
/* Match Loop */
|
@@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
|
|
447
447
|
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
448
448
|
U32 const ldmHSize = 1U << params->hashLog;
|
449
449
|
U32 const correction = ZSTD_window_correctOverflow(
|
450
|
-
&ldmState->window, /* cycleLog */ 0, maxDist,
|
450
|
+
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
451
451
|
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
452
452
|
}
|
453
453
|
/* 2. We enforce the maximum offset allowed.
|
@@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
255
255
|
* to provide a cost which is directly comparable to a match ending at same position */
|
256
256
|
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
257
257
|
{
|
258
|
-
if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
|
258
|
+
if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
|
259
259
|
|
260
260
|
/* dynamic statistics */
|
261
261
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
262
|
-
int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
263
|
-
+ WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
264
|
-
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
262
|
+
int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
263
|
+
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
264
|
+
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
265
265
|
#if 1
|
266
266
|
return contribution;
|
267
267
|
#else
|
@@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
|
|
278
278
|
const optState_t* const optPtr,
|
279
279
|
int optLevel)
|
280
280
|
{
|
281
|
-
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
281
|
+
int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
282
282
|
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
|
283
283
|
return contribution;
|
284
284
|
}
|
@@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
372
372
|
|
373
373
|
/* Update hashTable3 up to ip (excluded)
|
374
374
|
Assumption : always within prefix (i.e. not within extDict) */
|
375
|
-
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
375
|
+
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
376
|
+
U32* nextToUpdate3,
|
377
|
+
const BYTE* const ip)
|
376
378
|
{
|
377
379
|
U32* const hashTable3 = ms->hashTable3;
|
378
380
|
U32 const hashLog3 = ms->hashLog3;
|
379
381
|
const BYTE* const base = ms->window.base;
|
380
|
-
U32 idx =
|
381
|
-
U32 const target =
|
382
|
+
U32 idx = *nextToUpdate3;
|
383
|
+
U32 const target = (U32)(ip - base);
|
382
384
|
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
|
383
385
|
assert(hashLog3 > 0);
|
384
386
|
|
@@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
|
|
387
389
|
idx++;
|
388
390
|
}
|
389
391
|
|
392
|
+
*nextToUpdate3 = target;
|
390
393
|
return hashTable3[hash3];
|
391
394
|
}
|
392
395
|
|
@@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
|
|
503
506
|
} }
|
504
507
|
|
505
508
|
*smallerPtr = *largerPtr = 0;
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
+
{ U32 positions = 0;
|
510
|
+
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
511
|
+
assert(matchEndIdx > current + 8);
|
512
|
+
return MAX(positions, matchEndIdx - (current + 8));
|
513
|
+
}
|
509
514
|
}
|
510
515
|
|
511
516
|
FORCE_INLINE_TEMPLATE
|
@@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
|
|
520
525
|
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
521
526
|
idx, target, dictMode);
|
522
527
|
|
523
|
-
while(idx < target)
|
524
|
-
|
528
|
+
while(idx < target) {
|
529
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
530
|
+
assert(idx < (U32)(idx + forward));
|
531
|
+
idx += forward;
|
532
|
+
}
|
533
|
+
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
|
534
|
+
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
|
525
535
|
ms->nextToUpdate = target;
|
526
536
|
}
|
527
537
|
|
@@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
531
541
|
|
532
542
|
FORCE_INLINE_TEMPLATE
|
533
543
|
U32 ZSTD_insertBtAndGetAllMatches (
|
544
|
+
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
534
545
|
ZSTD_matchState_t* ms,
|
546
|
+
U32* nextToUpdate3,
|
535
547
|
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
|
536
|
-
U32 rep[ZSTD_REP_NUM],
|
548
|
+
const U32 rep[ZSTD_REP_NUM],
|
537
549
|
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
538
|
-
ZSTD_match_t* matches,
|
539
550
|
const U32 lengthToBeat,
|
540
551
|
U32 const mls /* template */)
|
541
552
|
{
|
542
553
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
543
554
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
555
|
+
U32 const maxDistance = 1U << cParams->windowLog;
|
544
556
|
const BYTE* const base = ms->window.base;
|
545
557
|
U32 const current = (U32)(ip-base);
|
546
558
|
U32 const hashLog = cParams->hashLog;
|
@@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
556
568
|
U32 const dictLimit = ms->window.dictLimit;
|
557
569
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
558
570
|
const BYTE* const prefixStart = base + dictLimit;
|
559
|
-
U32 const btLow = btMask >= current ? 0 : current - btMask;
|
560
|
-
U32 const
|
571
|
+
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
572
|
+
U32 const windowValid = ms->window.lowLimit;
|
573
|
+
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
|
561
574
|
U32 const matchLow = windowLow ? windowLow : 1;
|
562
575
|
U32* smallerPtr = bt + 2*(current&btMask);
|
563
576
|
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
@@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
627
640
|
|
628
641
|
/* HC3 match finder */
|
629
642
|
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
630
|
-
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
|
643
|
+
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
631
644
|
if ((matchIndex3 >= matchLow)
|
632
645
|
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
633
646
|
size_t mlen;
|
@@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
653
666
|
(ip+mlen == iLimit) ) { /* best possible length */
|
654
667
|
ms->nextToUpdate = current+1; /* skip insertion */
|
655
668
|
return 1;
|
656
|
-
|
657
|
-
}
|
658
|
-
}
|
669
|
+
} } }
|
659
670
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
660
671
|
}
|
661
672
|
|
@@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
760
771
|
|
761
772
|
|
762
773
|
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
774
|
+
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
|
763
775
|
ZSTD_matchState_t* ms,
|
776
|
+
U32* nextToUpdate3,
|
764
777
|
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
765
|
-
U32 rep[ZSTD_REP_NUM],
|
766
|
-
|
778
|
+
const U32 rep[ZSTD_REP_NUM],
|
779
|
+
U32 const ll0,
|
780
|
+
U32 const lengthToBeat)
|
767
781
|
{
|
768
782
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
769
783
|
U32 const matchLengthSearch = cParams->minMatch;
|
@@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
|
772
786
|
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
|
773
787
|
switch(matchLengthSearch)
|
774
788
|
{
|
775
|
-
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0,
|
789
|
+
case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
|
776
790
|
default :
|
777
|
-
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0,
|
778
|
-
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0,
|
791
|
+
case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
|
792
|
+
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
|
779
793
|
case 7 :
|
780
|
-
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0,
|
794
|
+
case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
|
781
795
|
}
|
782
796
|
}
|
783
797
|
|
@@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
853
867
|
|
854
868
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
855
869
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
870
|
+
U32 nextToUpdate3 = ms->nextToUpdate;
|
856
871
|
|
857
872
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
858
873
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
@@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
862
877
|
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
863
878
|
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
|
864
879
|
assert(optLevel <= 2);
|
865
|
-
ms->nextToUpdate3 = ms->nextToUpdate;
|
866
880
|
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
|
867
881
|
ip += (ip==prefixStart);
|
868
882
|
|
@@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
873
887
|
/* find first match */
|
874
888
|
{ U32 const litlen = (U32)(ip - anchor);
|
875
889
|
U32 const ll0 = !litlen;
|
876
|
-
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0,
|
890
|
+
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
877
891
|
if (!nbMatches) { ip++; continue; }
|
878
892
|
|
879
893
|
/* initialize opt[0] */
|
@@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
970
984
|
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
971
985
|
U32 const previousPrice = opt[cur].price;
|
972
986
|
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
973
|
-
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0,
|
987
|
+
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
974
988
|
U32 matchNb;
|
975
989
|
if (!nbMatches) {
|
976
990
|
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
@@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1094
1108
|
} /* while (ip < ilimit) */
|
1095
1109
|
|
1096
1110
|
/* Return the last literals size */
|
1097
|
-
return iend - anchor;
|
1111
|
+
return (size_t)(iend - anchor);
|
1098
1112
|
}
|
1099
1113
|
|
1100
1114
|
|
@@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1158
1172
|
ms->window.dictLimit += (U32)srcSize;
|
1159
1173
|
ms->window.lowLimit = ms->window.dictLimit;
|
1160
1174
|
ms->nextToUpdate = ms->window.dictLimit;
|
1161
|
-
ms->nextToUpdate3 = ms->window.dictLimit;
|
1162
1175
|
|
1163
1176
|
/* re-inforce weight of collected statistics */
|
1164
1177
|
ZSTD_upscaleStats(&ms->opt);
|
@@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1129
1129
|
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
|
1130
1130
|
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
|
1131
1131
|
assert(flushed <= produced);
|
1132
|
+
assert(jobPtr->consumed <= jobPtr->src.size);
|
1132
1133
|
toFlush = produced - flushed;
|
1133
|
-
if
|
1134
|
-
|
1134
|
+
/* if toFlush==0, nothing is available to flush.
|
1135
|
+
* However, jobID is expected to still be active:
|
1136
|
+
* if jobID was already completed and fully flushed,
|
1137
|
+
* ZSTDMT_flushProduced() should have already moved onto next job.
|
1138
|
+
* Therefore, some input has not yet been consumed. */
|
1139
|
+
if (toFlush==0) {
|
1135
1140
|
assert(jobPtr->consumed < jobPtr->src.size);
|
1136
1141
|
}
|
1137
1142
|
}
|
@@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1148
1153
|
|
1149
1154
|
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
1150
1155
|
{
|
1151
|
-
|
1156
|
+
unsigned jobLog;
|
1157
|
+
if (params.ldmParams.enableLdm) {
|
1152
1158
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1153
1159
|
* In which case, it's preferable to determine the jobSize
|
1154
1160
|
* based on chainLog instead. */
|
1155
|
-
|
1156
|
-
|
1161
|
+
jobLog = MAX(21, params.cParams.chainLog + 4);
|
1162
|
+
} else {
|
1163
|
+
jobLog = MAX(20, params.cParams.windowLog + 2);
|
1164
|
+
}
|
1165
|
+
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
|
1157
1166
|
}
|
1158
1167
|
|
1159
1168
|
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
@@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
|
|
1197
1206
|
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
1198
1207
|
- overlapRLog;
|
1199
1208
|
}
|
1200
|
-
assert(0 <= ovLog && ovLog <=
|
1209
|
+
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
|
1201
1210
|
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
|
1202
1211
|
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
1203
1212
|
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
@@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
1391
1400
|
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1392
1401
|
|
1393
1402
|
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1394
|
-
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1403
|
+
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
1395
1404
|
|
1396
1405
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
1397
1406
|
if (mtctx->singleBlockingThread) {
|
@@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
|
|
1432
1441
|
if (mtctx->targetSectionSize == 0) {
|
1433
1442
|
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1434
1443
|
}
|
1444
|
+
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
|
1445
|
+
|
1435
1446
|
if (params.rsyncable) {
|
1436
1447
|
/* Aim for the targetsectionSize as the average job size. */
|
1437
1448
|
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
@@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
|
360
360
|
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
361
361
|
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
362
362
|
frameParameter_unsupported);
|
363
|
-
|
364
|
-
|
363
|
+
{
|
364
|
+
size_t const skippableSize = skippableHeaderSize + sizeU32;
|
365
|
+
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
|
366
|
+
return skippableSize;
|
367
|
+
}
|
365
368
|
}
|
366
369
|
|
367
370
|
/** ZSTD_findDecompressedSize() :
|
@@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
|
378
381
|
|
379
382
|
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
380
383
|
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
381
|
-
if (ZSTD_isError(skippableSize))
|
382
|
-
return skippableSize;
|
383
|
-
if (srcSize < skippableSize) {
|
384
|
+
if (ZSTD_isError(skippableSize)) {
|
384
385
|
return ZSTD_CONTENTSIZE_ERROR;
|
385
386
|
}
|
387
|
+
assert(skippableSize <= srcSize);
|
386
388
|
|
387
389
|
src = (const BYTE *)src + skippableSize;
|
388
390
|
srcSize -= skippableSize;
|
@@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
|
|
467
469
|
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
468
470
|
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
469
471
|
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
|
472
|
+
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
|
473
|
+
frameSizeInfo.compressedSize <= srcSize);
|
470
474
|
return frameSizeInfo;
|
471
475
|
} else {
|
472
476
|
const BYTE* ip = (const BYTE*)src;
|
@@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
|
529
533
|
return frameSizeInfo.compressedSize;
|
530
534
|
}
|
531
535
|
|
532
|
-
|
533
536
|
/** ZSTD_decompressBound() :
|
534
537
|
* compatible with legacy mode
|
535
538
|
* `src` must point to the start of a ZSTD frame or a skippeable frame
|
@@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
|
|
546
549
|
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
|
547
550
|
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
|
548
551
|
return ZSTD_CONTENTSIZE_ERROR;
|
552
|
+
assert(srcSize >= compressedSize);
|
549
553
|
src = (const BYTE*)src + compressedSize;
|
550
554
|
srcSize -= compressedSize;
|
551
555
|
bound += decompressedBound;
|
@@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
|
738
742
|
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
|
739
743
|
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
740
744
|
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
741
|
-
|
742
|
-
|
743
|
-
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
|
745
|
+
FORWARD_IF_ERROR(skippableSize);
|
746
|
+
assert(skippableSize <= srcSize);
|
744
747
|
|
745
748
|
src = (const BYTE *)src + skippableSize;
|
746
749
|
srcSize -= skippableSize;
|
@@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|
505
505
|
*nbSeqPtr = nbSeq;
|
506
506
|
|
507
507
|
/* FSE table descriptors */
|
508
|
-
RETURN_ERROR_IF(ip+
|
508
|
+
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
509
509
|
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
510
510
|
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
511
511
|
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
@@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
637
637
|
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
638
638
|
|
639
639
|
/* copy Literals */
|
640
|
-
ZSTD_copy8(op, *litPtr);
|
641
640
|
if (sequence.litLength > 8)
|
642
|
-
|
641
|
+
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
642
|
+
else
|
643
|
+
ZSTD_copy8(op, *litPtr);
|
643
644
|
op = oLitEnd;
|
644
645
|
*litPtr = iLitEnd; /* update for next sequence */
|
645
646
|
|
@@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
686
687
|
|
687
688
|
if (oMatchEnd > oend-(16-MINMATCH)) {
|
688
689
|
if (op < oend_w) {
|
689
|
-
ZSTD_wildcopy(op, match, oend_w - op);
|
690
|
+
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
690
691
|
match += oend_w - op;
|
691
692
|
op = oend_w;
|
692
693
|
}
|
693
694
|
while (op < oMatchEnd) *op++ = *match++;
|
694
695
|
} else {
|
695
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
696
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
696
697
|
}
|
697
698
|
return sequenceLength;
|
698
699
|
}
|
@@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
|
717
718
|
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
718
719
|
|
719
720
|
/* copy Literals */
|
720
|
-
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
721
721
|
if (sequence.litLength > 8)
|
722
|
-
|
722
|
+
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
723
|
+
else
|
724
|
+
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
725
|
+
|
723
726
|
op = oLitEnd;
|
724
727
|
*litPtr = iLitEnd; /* update for next sequence */
|
725
728
|
|
@@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
|
766
769
|
|
767
770
|
if (oMatchEnd > oend-(16-MINMATCH)) {
|
768
771
|
if (op < oend_w) {
|
769
|
-
ZSTD_wildcopy(op, match, oend_w - op);
|
772
|
+
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
770
773
|
match += oend_w - op;
|
771
774
|
op = oend_w;
|
772
775
|
}
|
773
776
|
while (op < oMatchEnd) *op++ = *match++;
|
774
777
|
} else {
|
775
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
778
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
776
779
|
}
|
777
780
|
return sequenceLength;
|
778
781
|
}
|
@@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
|
889
892
|
}
|
890
893
|
|
891
894
|
FORCE_INLINE_TEMPLATE size_t
|
895
|
+
DONT_VECTORIZE
|
892
896
|
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
893
897
|
void* dst, size_t maxDstSize,
|
894
898
|
const void* seqStart, size_t seqSize, int nbSeq,
|
@@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
918
922
|
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
919
923
|
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
920
924
|
|
925
|
+
ZSTD_STATIC_ASSERT(
|
926
|
+
BIT_DStream_unfinished < BIT_DStream_completed &&
|
927
|
+
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
928
|
+
BIT_DStream_completed < BIT_DStream_overflow);
|
929
|
+
|
921
930
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
|
922
931
|
nbSeq--;
|
923
932
|
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
@@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
|
930
939
|
/* check if reached exact end */
|
931
940
|
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
932
941
|
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
942
|
+
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
933
943
|
/* save reps for next block */
|
934
944
|
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
935
945
|
}
|
@@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
|
1131
1141
|
|
1132
1142
|
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
1133
1143
|
static TARGET_ATTRIBUTE("bmi2") size_t
|
1144
|
+
DONT_VECTORIZE
|
1134
1145
|
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
1135
1146
|
void* dst, size_t maxDstSize,
|
1136
1147
|
const void* seqStart, size_t seqSize, int nbSeq,
|