zstdlib 0.6.0-x64-mingw32 → 0.7.0-x64-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -0
- data/ext/zstdlib/extconf.rb +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/bitstream.h +31 -37
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/compiler.h +19 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/debug.h +11 -31
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.c +2 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.h +6 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse.h +11 -31
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -37
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/huf.h +15 -33
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/mem.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.c +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.c +4 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.h +4 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.c +15 -33
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_internal.h +112 -15
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.c +15 -35
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.h +12 -32
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress.c +450 -275
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +136 -14
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.c +10 -6
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.c +24 -20
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_cwksp.h +3 -13
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +11 -8
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.c +36 -24
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.c +34 -11
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.c +27 -5
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.c +38 -84
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +48 -21
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -62
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +264 -148
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +312 -203
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/zstd.h +62 -21
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzclose.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzlib.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzread.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzwrite.c +0 -0
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +1 -1
- data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
- metadata +64 -62
- data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -249,40 +249,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
249
249
|
}
|
250
250
|
}
|
251
251
|
|
252
|
-
/* ZSTD_litLengthContribution() :
|
253
|
-
* @return ( cost(litlength) - cost(0) )
|
254
|
-
* this value can then be added to rawLiteralsCost()
|
255
|
-
* to provide a cost which is directly comparable to a match ending at same position */
|
256
|
-
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
257
|
-
{
|
258
|
-
if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
|
259
|
-
|
260
|
-
/* dynamic statistics */
|
261
|
-
{ U32 const llCode = ZSTD_LLcode(litLength);
|
262
|
-
int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
263
|
-
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
264
|
-
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
265
|
-
#if 1
|
266
|
-
return contribution;
|
267
|
-
#else
|
268
|
-
return MAX(0, contribution); /* sometimes better, sometimes not ... */
|
269
|
-
#endif
|
270
|
-
}
|
271
|
-
}
|
272
|
-
|
273
|
-
/* ZSTD_literalsContribution() :
|
274
|
-
* creates a fake cost for the literals part of a sequence
|
275
|
-
* which can be compared to the ending cost of a match
|
276
|
-
* should a new match start at this position */
|
277
|
-
static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
|
278
|
-
const optState_t* const optPtr,
|
279
|
-
int optLevel)
|
280
|
-
{
|
281
|
-
int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
282
|
-
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
|
283
|
-
return contribution;
|
284
|
-
}
|
285
|
-
|
286
252
|
/* ZSTD_getMatchPrice() :
|
287
253
|
* Provides the cost of the match part (offset + matchLength) of a sequence
|
288
254
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
@@ -603,7 +569,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
603
569
|
U32 repLen = 0;
|
604
570
|
assert(current >= dictLimit);
|
605
571
|
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
|
606
|
-
|
572
|
+
/* We must validate the repcode offset because when we're using a dictionary the
|
573
|
+
* valid offset range shrinks when the dictionary goes out of bounds.
|
574
|
+
*/
|
575
|
+
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
|
607
576
|
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
608
577
|
}
|
609
578
|
} else { /* repIndex < dictLimit || repIndex >= current */
|
@@ -799,30 +768,6 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
|
799
768
|
/*-*******************************
|
800
769
|
* Optimal parser
|
801
770
|
*********************************/
|
802
|
-
typedef struct repcodes_s {
|
803
|
-
U32 rep[3];
|
804
|
-
} repcodes_t;
|
805
|
-
|
806
|
-
static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
807
|
-
{
|
808
|
-
repcodes_t newReps;
|
809
|
-
if (offset >= ZSTD_REP_NUM) { /* full offset */
|
810
|
-
newReps.rep[2] = rep[1];
|
811
|
-
newReps.rep[1] = rep[0];
|
812
|
-
newReps.rep[0] = offset - ZSTD_REP_MOVE;
|
813
|
-
} else { /* repcode */
|
814
|
-
U32 const repCode = offset + ll0;
|
815
|
-
if (repCode > 0) { /* note : if repCode==0, no change */
|
816
|
-
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
817
|
-
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
|
818
|
-
newReps.rep[1] = rep[0];
|
819
|
-
newReps.rep[0] = currentOffset;
|
820
|
-
} else { /* repCode == 0 */
|
821
|
-
memcpy(&newReps, rep, sizeof(newReps));
|
822
|
-
}
|
823
|
-
}
|
824
|
-
return newReps;
|
825
|
-
}
|
826
771
|
|
827
772
|
|
828
773
|
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
@@ -839,7 +784,7 @@ listStats(const U32* table, int lastEltID)
|
|
839
784
|
int enb;
|
840
785
|
for (enb=0; enb < nbElts; enb++) {
|
841
786
|
(void)table;
|
842
|
-
|
787
|
+
/* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
|
843
788
|
RAWLOG(2, "%4i,", table[enb]);
|
844
789
|
}
|
845
790
|
RAWLOG(2, " \n");
|
@@ -894,7 +839,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
894
839
|
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
|
895
840
|
opt[0].mlen = 0; /* means is_a_literal */
|
896
841
|
opt[0].litlen = litlen;
|
897
|
-
|
842
|
+
/* We don't need to include the actual price of the literals because
|
843
|
+
* it is static for the duration of the forward pass, and is included
|
844
|
+
* in every price. We include the literal length to avoid negative
|
845
|
+
* prices when we subtract the previous literal length.
|
846
|
+
*/
|
847
|
+
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
898
848
|
|
899
849
|
/* large match -> immediate encoding */
|
900
850
|
{ U32 const maxML = matches[nbMatches-1].len;
|
@@ -923,7 +873,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
923
873
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
924
874
|
U32 const offset = matches[matchNb].off;
|
925
875
|
U32 const end = matches[matchNb].len;
|
926
|
-
repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
|
927
876
|
for ( ; pos <= end ; pos++ ) {
|
928
877
|
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
|
929
878
|
U32 const sequencePrice = literalsPrice + matchPrice;
|
@@ -933,8 +882,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
933
882
|
opt[pos].off = offset;
|
934
883
|
opt[pos].litlen = litlen;
|
935
884
|
opt[pos].price = sequencePrice;
|
936
|
-
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
|
937
|
-
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
|
938
885
|
} }
|
939
886
|
last_pos = pos-1;
|
940
887
|
}
|
@@ -961,7 +908,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
961
908
|
opt[cur].off = 0;
|
962
909
|
opt[cur].litlen = litlen;
|
963
910
|
opt[cur].price = price;
|
964
|
-
memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
|
965
911
|
} else {
|
966
912
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
|
967
913
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
|
@@ -969,6 +915,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
969
915
|
}
|
970
916
|
}
|
971
917
|
|
918
|
+
/* Set the repcodes of the current position. We must do it here
|
919
|
+
* because we rely on the repcodes of the 2nd to last sequence being
|
920
|
+
* correct to set the next chunks repcodes during the backward
|
921
|
+
* traversal.
|
922
|
+
*/
|
923
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
|
924
|
+
assert(cur >= opt[cur].mlen);
|
925
|
+
if (opt[cur].mlen != 0) {
|
926
|
+
U32 const prev = cur - opt[cur].mlen;
|
927
|
+
repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
928
|
+
memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
929
|
+
} else {
|
930
|
+
memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
931
|
+
}
|
932
|
+
|
972
933
|
/* last match must start at a minimum distance of 8 from oend */
|
973
934
|
if (inr > ilimit) continue;
|
974
935
|
|
@@ -1009,7 +970,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1009
970
|
/* set prices using matches found at position == cur */
|
1010
971
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
1011
972
|
U32 const offset = matches[matchNb].off;
|
1012
|
-
repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
|
1013
973
|
U32 const lastML = matches[matchNb].len;
|
1014
974
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
1015
975
|
U32 mlen;
|
@@ -1029,8 +989,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1029
989
|
opt[pos].off = offset;
|
1030
990
|
opt[pos].litlen = litlen;
|
1031
991
|
opt[pos].price = price;
|
1032
|
-
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
|
1033
|
-
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
|
1034
992
|
} else {
|
1035
993
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
1036
994
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
@@ -1046,6 +1004,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1046
1004
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
1047
1005
|
assert(opt[0].mlen == 0);
|
1048
1006
|
|
1007
|
+
/* Set the next chunk's repcodes based on the repcodes of the beginning
|
1008
|
+
* of the last match, and the last sequence. This avoids us having to
|
1009
|
+
* update them while traversing the sequences.
|
1010
|
+
*/
|
1011
|
+
if (lastSequence.mlen != 0) {
|
1012
|
+
repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
1013
|
+
memcpy(rep, &reps, sizeof(reps));
|
1014
|
+
} else {
|
1015
|
+
memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
1016
|
+
}
|
1017
|
+
|
1049
1018
|
{ U32 const storeEnd = cur + 1;
|
1050
1019
|
U32 storeStart = storeEnd;
|
1051
1020
|
U32 seqPos = cur;
|
@@ -1082,20 +1051,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1082
1051
|
continue; /* will finish */
|
1083
1052
|
}
|
1084
1053
|
|
1085
|
-
/* repcodes update : like ZSTD_updateRep(), but update in place */
|
1086
|
-
if (offCode >= ZSTD_REP_NUM) { /* full offset */
|
1087
|
-
rep[2] = rep[1];
|
1088
|
-
rep[1] = rep[0];
|
1089
|
-
rep[0] = offCode - ZSTD_REP_MOVE;
|
1090
|
-
} else { /* repcode */
|
1091
|
-
U32 const repCode = offCode + (llen==0);
|
1092
|
-
if (repCode) { /* note : if repCode==0, no change */
|
1093
|
-
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
1094
|
-
if (repCode >= 2) rep[2] = rep[1];
|
1095
|
-
rep[1] = rep[0];
|
1096
|
-
rep[0] = currentOffset;
|
1097
|
-
} }
|
1098
|
-
|
1099
1054
|
assert(anchor + llen <= iend);
|
1100
1055
|
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
1101
1056
|
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
|
@@ -1104,7 +1059,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1104
1059
|
} }
|
1105
1060
|
ZSTD_setBasePrices(optStatePtr, optLevel);
|
1106
1061
|
}
|
1107
|
-
|
1108
1062
|
} /* while (ip < ilimit) */
|
1109
1063
|
|
1110
1064
|
/* Return the last literals size */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -22,9 +22,9 @@
|
|
22
22
|
/* ====== Dependencies ====== */
|
23
23
|
#include <string.h> /* memcpy, memset */
|
24
24
|
#include <limits.h> /* INT_MAX, UINT_MAX */
|
25
|
-
#include "mem.h" /* MEM_STATIC */
|
26
|
-
#include "pool.h" /* threadpool */
|
27
|
-
#include "threading.h" /* mutex */
|
25
|
+
#include "../common/mem.h" /* MEM_STATIC */
|
26
|
+
#include "../common/pool.h" /* threadpool */
|
27
|
+
#include "../common/threading.h" /* mutex */
|
28
28
|
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
29
29
|
#include "zstd_ldm.h"
|
30
30
|
#include "zstdmt_compress.h"
|
@@ -461,7 +461,13 @@ typedef struct {
|
|
461
461
|
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
462
462
|
} serialState_t;
|
463
463
|
|
464
|
-
static int
|
464
|
+
static int
|
465
|
+
ZSTDMT_serialState_reset(serialState_t* serialState,
|
466
|
+
ZSTDMT_seqPool* seqPool,
|
467
|
+
ZSTD_CCtx_params params,
|
468
|
+
size_t jobSize,
|
469
|
+
const void* dict, size_t const dictSize,
|
470
|
+
ZSTD_dictContentType_e dictContentType)
|
465
471
|
{
|
466
472
|
/* Adjust parameters */
|
467
473
|
if (params.ldmParams.enableLdm) {
|
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
490
496
|
/* Size the seq pool tables */
|
491
497
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
492
498
|
/* Reset the window */
|
493
|
-
|
494
|
-
serialState->ldmWindow = serialState->ldmState.window;
|
499
|
+
ZSTD_window_init(&serialState->ldmState.window);
|
495
500
|
/* Resize tables and output space if necessary. */
|
496
501
|
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
|
497
502
|
ZSTD_free(serialState->ldmState.hashTable, cMem);
|
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
506
511
|
/* Zero the tables */
|
507
512
|
memset(serialState->ldmState.hashTable, 0, hashSize);
|
508
513
|
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
514
|
+
|
515
|
+
/* Update window state and fill hash table with dict */
|
516
|
+
serialState->ldmState.loadedDictEnd = 0;
|
517
|
+
if (dictSize > 0) {
|
518
|
+
if (dictContentType == ZSTD_dct_rawContent) {
|
519
|
+
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
520
|
+
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
521
|
+
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
522
|
+
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
523
|
+
} else {
|
524
|
+
/* don't even load anything */
|
525
|
+
}
|
526
|
+
}
|
527
|
+
|
528
|
+
/* Initialize serialState's copy of ldmWindow. */
|
529
|
+
serialState->ldmWindow = serialState->ldmState.window;
|
509
530
|
}
|
531
|
+
|
510
532
|
serialState->params = params;
|
511
533
|
serialState->params.jobSize = (U32)jobSize;
|
512
534
|
return 0;
|
@@ -1054,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
|
|
1054
1076
|
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
1055
1077
|
{
|
1056
1078
|
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
1057
|
-
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
1079
|
+
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
1058
1080
|
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
1059
1081
|
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
1060
1082
|
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
@@ -1076,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
|
1076
1098
|
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
|
1077
1099
|
compressionLevel);
|
1078
1100
|
mtctx->params.compressionLevel = compressionLevel;
|
1079
|
-
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams,
|
1101
|
+
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
1080
1102
|
cParams.windowLog = saved_wlog;
|
1081
1103
|
mtctx->params.cParams = cParams;
|
1082
1104
|
}
|
@@ -1235,7 +1257,8 @@ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nb
|
|
1235
1257
|
/* ZSTDMT_compress_advanced_internal() :
|
1236
1258
|
* This is a blocking function : it will only give back control to caller after finishing its compression job.
|
1237
1259
|
*/
|
1238
|
-
static size_t
|
1260
|
+
static size_t
|
1261
|
+
ZSTDMT_compress_advanced_internal(
|
1239
1262
|
ZSTDMT_CCtx* mtctx,
|
1240
1263
|
void* dst, size_t dstCapacity,
|
1241
1264
|
const void* src, size_t srcSize,
|
@@ -1267,10 +1290,11 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1267
1290
|
|
1268
1291
|
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
1269
1292
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
|
1270
|
-
|
1293
|
+
/* LDM doesn't even try to load the dictionary in single-ingestion mode */
|
1294
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
|
1271
1295
|
return ERROR(memory_allocation);
|
1272
1296
|
|
1273
|
-
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
1297
|
+
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
|
1274
1298
|
|
1275
1299
|
{ unsigned u;
|
1276
1300
|
for (u=0; u<nbJobs; u++) {
|
@@ -1403,7 +1427,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
1403
1427
|
|
1404
1428
|
/* init */
|
1405
1429
|
if (params.nbWorkers != mtctx->params.nbWorkers)
|
1406
|
-
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1430
|
+
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
|
1407
1431
|
|
1408
1432
|
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1409
1433
|
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
@@ -1500,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
|
|
1500
1524
|
mtctx->allJobsCompleted = 0;
|
1501
1525
|
mtctx->consumed = 0;
|
1502
1526
|
mtctx->produced = 0;
|
1503
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize
|
1527
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
|
1528
|
+
dict, dictSize, dictContentType))
|
1504
1529
|
return ERROR(memory_allocation);
|
1505
1530
|
return 0;
|
1506
1531
|
}
|
@@ -1714,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
|
|
1714
1739
|
assert(mtctx->doneJobID < mtctx->nextJobID);
|
1715
1740
|
assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
|
1716
1741
|
assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
|
1717
|
-
|
1718
|
-
|
1719
|
-
|
1742
|
+
if (toFlush > 0) {
|
1743
|
+
memcpy((char*)output->dst + output->pos,
|
1744
|
+
(const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
|
1745
|
+
toFlush);
|
1746
|
+
}
|
1720
1747
|
output->pos += toFlush;
|
1721
1748
|
mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
|
1722
1749
|
|
@@ -1786,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
|
1786
1813
|
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
1787
1814
|
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
1788
1815
|
BYTE const* const rangeStart = (BYTE const*)range.start;
|
1789
|
-
BYTE const* const rangeEnd = rangeStart + range.size;
|
1816
|
+
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
1790
1817
|
|
1791
1818
|
if (rangeStart == NULL || bufferStart == NULL)
|
1792
1819
|
return 0;
|
@@ -2060,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
2060
2087
|
|| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
|
2061
2088
|
size_t const jobSize = mtctx->inBuff.filled;
|
2062
2089
|
assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
|
2063
|
-
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
2090
|
+
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
|
2064
2091
|
}
|
2065
2092
|
|
2066
2093
|
/* check for potential compressed data ready to be flushed */
|
@@ -2074,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
2074
2101
|
|
2075
2102
|
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
2076
2103
|
{
|
2077
|
-
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
2104
|
+
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
|
2078
2105
|
|
2079
2106
|
/* recommended next input size : fill current input buffer */
|
2080
2107
|
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
@@ -2091,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
|
|
2091
2118
|
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
|
2092
2119
|
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
|
2093
2120
|
(U32)srcSize, (U32)endFrame);
|
2094
|
-
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
2121
|
+
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
|
2095
2122
|
}
|
2096
2123
|
|
2097
2124
|
/* check if there is any data available to flush */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -40,7 +40,7 @@
|
|
40
40
|
/* === Dependencies === */
|
41
41
|
#include <stddef.h> /* size_t */
|
42
42
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
|
43
|
-
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
43
|
+
#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
44
44
|
|
45
45
|
|
46
46
|
/* === Constants === */
|
@@ -1,47 +1,27 @@
|
|
1
1
|
/* ******************************************************************
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
notice, this list of conditions and the following disclaimer.
|
14
|
-
* Redistributions in binary form must reproduce the above
|
15
|
-
copyright notice, this list of conditions and the following disclaimer
|
16
|
-
in the documentation and/or other materials provided with the
|
17
|
-
distribution.
|
18
|
-
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
-
|
31
|
-
You can contact the author at :
|
32
|
-
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
2
|
+
* huff0 huffman decoder,
|
3
|
+
* part of Finite State Entropy library
|
4
|
+
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
5
|
+
*
|
6
|
+
* You can contact the author at :
|
7
|
+
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
8
|
+
*
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
33
13
|
****************************************************************** */
|
34
14
|
|
35
15
|
/* **************************************************************
|
36
16
|
* Dependencies
|
37
17
|
****************************************************************/
|
38
18
|
#include <string.h> /* memcpy, memset */
|
39
|
-
#include "compiler.h"
|
40
|
-
#include "bitstream.h" /* BIT_* */
|
41
|
-
#include "fse.h" /* to compress headers */
|
19
|
+
#include "../common/compiler.h"
|
20
|
+
#include "../common/bitstream.h" /* BIT_* */
|
21
|
+
#include "../common/fse.h" /* to compress headers */
|
42
22
|
#define HUF_STATIC_LINKING_ONLY
|
43
|
-
#include "huf.h"
|
44
|
-
#include "error_private.h"
|
23
|
+
#include "../common/huf.h"
|
24
|
+
#include "../common/error_private.h"
|
45
25
|
|
46
26
|
/* **************************************************************
|
47
27
|
* Macros
|
@@ -61,9 +41,6 @@
|
|
61
41
|
* Error Management
|
62
42
|
****************************************************************/
|
63
43
|
#define HUF_isError ERR_isError
|
64
|
-
#ifndef CHECK_F
|
65
|
-
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
66
|
-
#endif
|
67
44
|
|
68
45
|
|
69
46
|
/* **************************************************************
|
@@ -181,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
181
158
|
|
182
159
|
/* fill DTable */
|
183
160
|
{ U32 n;
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
161
|
+
size_t const nEnd = nbSymbols;
|
162
|
+
for (n=0; n<nEnd; n++) {
|
163
|
+
size_t const w = huffWeight[n];
|
164
|
+
size_t const length = (1 << w) >> 1;
|
165
|
+
size_t const uStart = rankVal[w];
|
166
|
+
size_t const uEnd = uStart + length;
|
167
|
+
size_t u;
|
188
168
|
HUF_DEltX1 D;
|
189
|
-
D.byte = (BYTE)n;
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
169
|
+
D.byte = (BYTE)n;
|
170
|
+
D.nbBits = (BYTE)(tableLog + 1 - w);
|
171
|
+
rankVal[w] = (U32)uEnd;
|
172
|
+
if (length < 4) {
|
173
|
+
/* Use length in the loop bound so the compiler knows it is short. */
|
174
|
+
for (u = 0; u < length; ++u)
|
175
|
+
dt[uStart + u] = D;
|
176
|
+
} else {
|
177
|
+
/* Unroll the loop 4 times, we know it is a power of 2. */
|
178
|
+
for (u = uStart; u < uEnd; u += 4) {
|
179
|
+
dt[u + 0] = D;
|
180
|
+
dt[u + 1] = D;
|
181
|
+
dt[u + 2] = D;
|
182
|
+
dt[u + 3] = D;
|
183
|
+
} } } }
|
195
184
|
return iSize;
|
196
185
|
}
|
197
186
|
|
@@ -282,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
282
271
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
283
272
|
BYTE* const ostart = (BYTE*) dst;
|
284
273
|
BYTE* const oend = ostart + dstSize;
|
274
|
+
BYTE* const olimit = oend - 3;
|
285
275
|
const void* const dtPtr = DTable + 1;
|
286
276
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
287
277
|
|
@@ -306,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
306
296
|
BYTE* op2 = opStart2;
|
307
297
|
BYTE* op3 = opStart3;
|
308
298
|
BYTE* op4 = opStart4;
|
309
|
-
U32 endSignal = BIT_DStream_unfinished;
|
310
299
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
311
300
|
U32 const dtLog = dtd.tableLog;
|
301
|
+
U32 endSignal = 1;
|
312
302
|
|
313
303
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
314
304
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
@@ -317,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
317
307
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
318
308
|
|
319
309
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
320
|
-
|
321
|
-
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
310
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
322
311
|
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
323
312
|
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
324
313
|
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
@@ -335,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
335
324
|
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
336
325
|
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
337
326
|
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
327
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
328
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
329
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
330
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
342
331
|
}
|
343
332
|
|
344
333
|
/* check corruption */
|
@@ -757,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
757
746
|
return dstSize;
|
758
747
|
}
|
759
748
|
|
760
|
-
|
761
749
|
FORCE_INLINE_TEMPLATE size_t
|
762
750
|
HUF_decompress4X2_usingDTable_internal_body(
|
763
751
|
void* dst, size_t dstSize,
|
@@ -769,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
769
757
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
770
758
|
BYTE* const ostart = (BYTE*) dst;
|
771
759
|
BYTE* const oend = ostart + dstSize;
|
760
|
+
BYTE* const olimit = oend - (sizeof(size_t)-1);
|
772
761
|
const void* const dtPtr = DTable+1;
|
773
762
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
774
763
|
|
@@ -793,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
793
782
|
BYTE* op2 = opStart2;
|
794
783
|
BYTE* op3 = opStart3;
|
795
784
|
BYTE* op4 = opStart4;
|
796
|
-
U32 endSignal;
|
785
|
+
U32 endSignal = 1;
|
797
786
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
798
787
|
U32 const dtLog = dtd.tableLog;
|
799
788
|
|
@@ -804,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
804
793
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
805
794
|
|
806
795
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
807
|
-
|
808
|
-
|
796
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
797
|
+
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
798
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
799
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
800
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
801
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
802
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
803
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
804
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
805
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
806
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
807
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
808
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
809
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
810
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
811
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
812
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
813
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
814
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
815
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
816
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
817
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
818
|
+
#else
|
809
819
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
810
820
|
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
811
821
|
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
@@ -822,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
822
832
|
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
823
833
|
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
824
834
|
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
825
|
-
|
826
|
-
|
835
|
+
endSignal = (U32)LIKELY(
|
836
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
837
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
838
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
839
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
840
|
+
#endif
|
827
841
|
}
|
828
842
|
|
829
843
|
/* check corruption */
|