zstdlib 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib/extconf.rb +1 -1
  4. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/bitstream.h +31 -37
  5. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/compiler.h +19 -3
  6. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/cpu.h +1 -1
  7. data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
  8. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/debug.h +11 -31
  9. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
  10. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.c +2 -1
  11. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.h +6 -2
  12. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse.h +11 -31
  13. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -37
  14. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/huf.h +15 -33
  15. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/mem.h +1 -1
  16. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.c +1 -1
  17. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.h +2 -2
  18. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.c +4 -3
  19. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.h +4 -3
  20. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.c +15 -33
  21. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
  22. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
  23. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
  24. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_internal.h +112 -15
  25. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
  26. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.c +15 -35
  27. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.h +12 -32
  28. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
  29. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress.c +450 -275
  30. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +136 -14
  31. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.c +10 -6
  32. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.h +1 -1
  33. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.c +24 -20
  34. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.h +10 -3
  35. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
  36. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
  37. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_cwksp.h +3 -13
  38. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +11 -8
  39. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
  40. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.c +36 -24
  41. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
  42. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.c +34 -11
  43. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
  44. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.c +27 -5
  45. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
  46. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.c +38 -84
  47. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
  48. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +48 -21
  49. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +2 -2
  50. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -62
  51. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
  52. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
  53. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +264 -148
  54. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +312 -203
  55. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
  56. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
  57. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/zstd.h +62 -21
  58. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzclose.c +0 -0
  59. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
  60. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
  61. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzlib.c +0 -0
  62. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzread.c +0 -0
  63. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzwrite.c +0 -0
  64. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +1 -1
  65. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  66. metadata +65 -63
  67. data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
@@ -1,10 +1,11 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #ifndef ZSTD_LDM_H
@@ -15,7 +16,7 @@ extern "C" {
15
16
  #endif
16
17
 
17
18
  #include "zstd_compress_internal.h" /* ldmParams_t, U32 */
18
- #include "zstd.h" /* ZSTD_CCtx, size_t */
19
+ #include "../zstd.h" /* ZSTD_CCtx, size_t */
19
20
 
20
21
  /*-*************************************
21
22
  * Long distance matching
@@ -23,6 +24,10 @@ extern "C" {
23
24
 
24
25
  #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
25
26
 
27
+ void ZSTD_ldm_fillHashTable(
28
+ ldmState_t* state, const BYTE* ip,
29
+ const BYTE* iend, ldmParams_t const* params);
30
+
26
31
  /**
27
32
  * ZSTD_ldm_generateSequences():
28
33
  *
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -249,40 +249,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
249
249
  }
250
250
  }
251
251
 
252
- /* ZSTD_litLengthContribution() :
253
- * @return ( cost(litlength) - cost(0) )
254
- * this value can then be added to rawLiteralsCost()
255
- * to provide a cost which is directly comparable to a match ending at same position */
256
- static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
257
- {
258
- if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
259
-
260
- /* dynamic statistics */
261
- { U32 const llCode = ZSTD_LLcode(litLength);
262
- int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
263
- + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
264
- - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
265
- #if 1
266
- return contribution;
267
- #else
268
- return MAX(0, contribution); /* sometimes better, sometimes not ... */
269
- #endif
270
- }
271
- }
272
-
273
- /* ZSTD_literalsContribution() :
274
- * creates a fake cost for the literals part of a sequence
275
- * which can be compared to the ending cost of a match
276
- * should a new match start at this position */
277
- static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
278
- const optState_t* const optPtr,
279
- int optLevel)
280
- {
281
- int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
282
- + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
283
- return contribution;
284
- }
285
-
286
252
  /* ZSTD_getMatchPrice() :
287
253
  * Provides the cost of the match part (offset + matchLength) of a sequence
288
254
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
@@ -603,7 +569,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
603
569
  U32 repLen = 0;
604
570
  assert(current >= dictLimit);
605
571
  if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
606
- if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
572
+ /* We must validate the repcode offset because when we're using a dictionary the
573
+ * valid offset range shrinks when the dictionary goes out of bounds.
574
+ */
575
+ if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
607
576
  repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
608
577
  }
609
578
  } else { /* repIndex < dictLimit || repIndex >= current */
@@ -799,30 +768,6 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
799
768
  /*-*******************************
800
769
  * Optimal parser
801
770
  *********************************/
802
- typedef struct repcodes_s {
803
- U32 rep[3];
804
- } repcodes_t;
805
-
806
- static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
807
- {
808
- repcodes_t newReps;
809
- if (offset >= ZSTD_REP_NUM) { /* full offset */
810
- newReps.rep[2] = rep[1];
811
- newReps.rep[1] = rep[0];
812
- newReps.rep[0] = offset - ZSTD_REP_MOVE;
813
- } else { /* repcode */
814
- U32 const repCode = offset + ll0;
815
- if (repCode > 0) { /* note : if repCode==0, no change */
816
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
817
- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
818
- newReps.rep[1] = rep[0];
819
- newReps.rep[0] = currentOffset;
820
- } else { /* repCode == 0 */
821
- memcpy(&newReps, rep, sizeof(newReps));
822
- }
823
- }
824
- return newReps;
825
- }
826
771
 
827
772
 
828
773
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
@@ -839,7 +784,7 @@ listStats(const U32* table, int lastEltID)
839
784
  int enb;
840
785
  for (enb=0; enb < nbElts; enb++) {
841
786
  (void)table;
842
- //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
787
+ /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
843
788
  RAWLOG(2, "%4i,", table[enb]);
844
789
  }
845
790
  RAWLOG(2, " \n");
@@ -894,7 +839,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
894
839
  { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
895
840
  opt[0].mlen = 0; /* means is_a_literal */
896
841
  opt[0].litlen = litlen;
897
- opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
842
+ /* We don't need to include the actual price of the literals because
843
+ * it is static for the duration of the forward pass, and is included
844
+ * in every price. We include the literal length to avoid negative
845
+ * prices when we subtract the previous literal length.
846
+ */
847
+ opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
898
848
 
899
849
  /* large match -> immediate encoding */
900
850
  { U32 const maxML = matches[nbMatches-1].len;
@@ -923,7 +873,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
923
873
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
924
874
  U32 const offset = matches[matchNb].off;
925
875
  U32 const end = matches[matchNb].len;
926
- repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
927
876
  for ( ; pos <= end ; pos++ ) {
928
877
  U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
929
878
  U32 const sequencePrice = literalsPrice + matchPrice;
@@ -933,8 +882,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
933
882
  opt[pos].off = offset;
934
883
  opt[pos].litlen = litlen;
935
884
  opt[pos].price = sequencePrice;
936
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
937
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
938
885
  } }
939
886
  last_pos = pos-1;
940
887
  }
@@ -961,7 +908,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
961
908
  opt[cur].off = 0;
962
909
  opt[cur].litlen = litlen;
963
910
  opt[cur].price = price;
964
- memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
965
911
  } else {
966
912
  DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
967
913
  inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -969,6 +915,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
969
915
  }
970
916
  }
971
917
 
918
+ /* Set the repcodes of the current position. We must do it here
919
+ * because we rely on the repcodes of the 2nd to last sequence being
920
+ * correct to set the next chunks repcodes during the backward
921
+ * traversal.
922
+ */
923
+ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
924
+ assert(cur >= opt[cur].mlen);
925
+ if (opt[cur].mlen != 0) {
926
+ U32 const prev = cur - opt[cur].mlen;
927
+ repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
928
+ memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
929
+ } else {
930
+ memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
931
+ }
932
+
972
933
  /* last match must start at a minimum distance of 8 from oend */
973
934
  if (inr > ilimit) continue;
974
935
 
@@ -1009,7 +970,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1009
970
  /* set prices using matches found at position == cur */
1010
971
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1011
972
  U32 const offset = matches[matchNb].off;
1012
- repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
1013
973
  U32 const lastML = matches[matchNb].len;
1014
974
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1015
975
  U32 mlen;
@@ -1029,8 +989,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1029
989
  opt[pos].off = offset;
1030
990
  opt[pos].litlen = litlen;
1031
991
  opt[pos].price = price;
1032
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
1033
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
1034
992
  } else {
1035
993
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1036
994
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
@@ -1046,6 +1004,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1046
1004
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1047
1005
  assert(opt[0].mlen == 0);
1048
1006
 
1007
+ /* Set the next chunk's repcodes based on the repcodes of the beginning
1008
+ * of the last match, and the last sequence. This avoids us having to
1009
+ * update them while traversing the sequences.
1010
+ */
1011
+ if (lastSequence.mlen != 0) {
1012
+ repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1013
+ memcpy(rep, &reps, sizeof(reps));
1014
+ } else {
1015
+ memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1016
+ }
1017
+
1049
1018
  { U32 const storeEnd = cur + 1;
1050
1019
  U32 storeStart = storeEnd;
1051
1020
  U32 seqPos = cur;
@@ -1082,20 +1051,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1082
1051
  continue; /* will finish */
1083
1052
  }
1084
1053
 
1085
- /* repcodes update : like ZSTD_updateRep(), but update in place */
1086
- if (offCode >= ZSTD_REP_NUM) { /* full offset */
1087
- rep[2] = rep[1];
1088
- rep[1] = rep[0];
1089
- rep[0] = offCode - ZSTD_REP_MOVE;
1090
- } else { /* repcode */
1091
- U32 const repCode = offCode + (llen==0);
1092
- if (repCode) { /* note : if repCode==0, no change */
1093
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
1094
- if (repCode >= 2) rep[2] = rep[1];
1095
- rep[1] = rep[0];
1096
- rep[0] = currentOffset;
1097
- } }
1098
-
1099
1054
  assert(anchor + llen <= iend);
1100
1055
  ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1101
1056
  ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
@@ -1104,7 +1059,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1104
1059
  } }
1105
1060
  ZSTD_setBasePrices(optStatePtr, optLevel);
1106
1061
  }
1107
-
1108
1062
  } /* while (ip < ilimit) */
1109
1063
 
1110
1064
  /* Return the last literals size */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -22,9 +22,9 @@
22
22
  /* ====== Dependencies ====== */
23
23
  #include <string.h> /* memcpy, memset */
24
24
  #include <limits.h> /* INT_MAX, UINT_MAX */
25
- #include "mem.h" /* MEM_STATIC */
26
- #include "pool.h" /* threadpool */
27
- #include "threading.h" /* mutex */
25
+ #include "../common/mem.h" /* MEM_STATIC */
26
+ #include "../common/pool.h" /* threadpool */
27
+ #include "../common/threading.h" /* mutex */
28
28
  #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
29
29
  #include "zstd_ldm.h"
30
30
  #include "zstdmt_compress.h"
@@ -461,7 +461,13 @@ typedef struct {
461
461
  ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
462
462
  } serialState_t;
463
463
 
464
- static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
464
+ static int
465
+ ZSTDMT_serialState_reset(serialState_t* serialState,
466
+ ZSTDMT_seqPool* seqPool,
467
+ ZSTD_CCtx_params params,
468
+ size_t jobSize,
469
+ const void* dict, size_t const dictSize,
470
+ ZSTD_dictContentType_e dictContentType)
465
471
  {
466
472
  /* Adjust parameters */
467
473
  if (params.ldmParams.enableLdm) {
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
490
496
  /* Size the seq pool tables */
491
497
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
492
498
  /* Reset the window */
493
- ZSTD_window_clear(&serialState->ldmState.window);
494
- serialState->ldmWindow = serialState->ldmState.window;
499
+ ZSTD_window_init(&serialState->ldmState.window);
495
500
  /* Resize tables and output space if necessary. */
496
501
  if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
497
502
  ZSTD_free(serialState->ldmState.hashTable, cMem);
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
506
511
  /* Zero the tables */
507
512
  memset(serialState->ldmState.hashTable, 0, hashSize);
508
513
  memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
514
+
515
+ /* Update window state and fill hash table with dict */
516
+ serialState->ldmState.loadedDictEnd = 0;
517
+ if (dictSize > 0) {
518
+ if (dictContentType == ZSTD_dct_rawContent) {
519
+ BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
520
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
521
+ ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
522
+ serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
523
+ } else {
524
+ /* don't even load anything */
525
+ }
526
+ }
527
+
528
+ /* Initialize serialState's copy of ldmWindow. */
529
+ serialState->ldmWindow = serialState->ldmState.window;
509
530
  }
531
+
510
532
  serialState->params = params;
511
533
  serialState->params.jobSize = (U32)jobSize;
512
534
  return 0;
@@ -1054,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1054
1076
  static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1055
1077
  {
1056
1078
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1057
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1079
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1058
1080
  mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1059
1081
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1060
1082
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
@@ -1076,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
1076
1098
  DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
1077
1099
  compressionLevel);
1078
1100
  mtctx->params.compressionLevel = compressionLevel;
1079
- { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
1101
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
1080
1102
  cParams.windowLog = saved_wlog;
1081
1103
  mtctx->params.cParams = cParams;
1082
1104
  }
@@ -1235,7 +1257,8 @@ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nb
1235
1257
  /* ZSTDMT_compress_advanced_internal() :
1236
1258
  * This is a blocking function : it will only give back control to caller after finishing its compression job.
1237
1259
  */
1238
- static size_t ZSTDMT_compress_advanced_internal(
1260
+ static size_t
1261
+ ZSTDMT_compress_advanced_internal(
1239
1262
  ZSTDMT_CCtx* mtctx,
1240
1263
  void* dst, size_t dstCapacity,
1241
1264
  const void* src, size_t srcSize,
@@ -1267,10 +1290,11 @@ static size_t ZSTDMT_compress_advanced_internal(
1267
1290
 
1268
1291
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1269
1292
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1270
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1293
+ /* LDM doesn't even try to load the dictionary in single-ingestion mode */
1294
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
1271
1295
  return ERROR(memory_allocation);
1272
1296
 
1273
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1297
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
1274
1298
 
1275
1299
  { unsigned u;
1276
1300
  for (u=0; u<nbJobs; u++) {
@@ -1403,7 +1427,7 @@ size_t ZSTDMT_initCStream_internal(
1403
1427
 
1404
1428
  /* init */
1405
1429
  if (params.nbWorkers != mtctx->params.nbWorkers)
1406
- FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
1430
+ FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
1407
1431
 
1408
1432
  if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1409
1433
  if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
@@ -1500,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
1500
1524
  mtctx->allJobsCompleted = 0;
1501
1525
  mtctx->consumed = 0;
1502
1526
  mtctx->produced = 0;
1503
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1527
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
1528
+ dict, dictSize, dictContentType))
1504
1529
  return ERROR(memory_allocation);
1505
1530
  return 0;
1506
1531
  }
@@ -1714,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
1714
1739
  assert(mtctx->doneJobID < mtctx->nextJobID);
1715
1740
  assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
1716
1741
  assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
1717
- memcpy((char*)output->dst + output->pos,
1718
- (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1719
- toFlush);
1742
+ if (toFlush > 0) {
1743
+ memcpy((char*)output->dst + output->pos,
1744
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1745
+ toFlush);
1746
+ }
1720
1747
  output->pos += toFlush;
1721
1748
  mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1722
1749
 
@@ -1786,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1786
1813
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1787
1814
  BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1788
1815
  BYTE const* const rangeStart = (BYTE const*)range.start;
1789
- BYTE const* const rangeEnd = rangeStart + range.size;
1816
+ BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1790
1817
 
1791
1818
  if (rangeStart == NULL || bufferStart == NULL)
1792
1819
  return 0;
@@ -2060,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2060
2087
  || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
2061
2088
  size_t const jobSize = mtctx->inBuff.filled;
2062
2089
  assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
2063
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
2090
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
2064
2091
  }
2065
2092
 
2066
2093
  /* check for potential compressed data ready to be flushed */
@@ -2074,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2074
2101
 
2075
2102
  size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2076
2103
  {
2077
- FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
2104
+ FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
2078
2105
 
2079
2106
  /* recommended next input size : fill current input buffer */
2080
2107
  return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@@ -2091,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
2091
2118
  || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
2092
2119
  DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
2093
2120
  (U32)srcSize, (U32)endFrame);
2094
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
2121
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
2095
2122
  }
2096
2123
 
2097
2124
  /* check if there is any data available to flush */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -40,7 +40,7 @@
40
40
  /* === Dependencies === */
41
41
  #include <stddef.h> /* size_t */
42
42
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
43
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
43
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
44
44
 
45
45
 
46
46
  /* === Constants === */
@@ -1,47 +1,27 @@
1
1
  /* ******************************************************************
2
- huff0 huffman decoder,
3
- part of Finite State Entropy library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
36
16
  * Dependencies
37
17
  ****************************************************************/
38
18
  #include <string.h> /* memcpy, memset */
39
- #include "compiler.h"
40
- #include "bitstream.h" /* BIT_* */
41
- #include "fse.h" /* to compress headers */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
42
22
  #define HUF_STATIC_LINKING_ONLY
43
- #include "huf.h"
44
- #include "error_private.h"
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
45
25
 
46
26
  /* **************************************************************
47
27
  * Macros
@@ -61,9 +41,6 @@
61
41
  * Error Management
62
42
  ****************************************************************/
63
43
  #define HUF_isError ERR_isError
64
- #ifndef CHECK_F
65
- #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66
- #endif
67
44
 
68
45
 
69
46
  /* **************************************************************
@@ -181,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
181
158
 
182
159
  /* fill DTable */
183
160
  { U32 n;
184
- for (n=0; n<nbSymbols; n++) {
185
- U32 const w = huffWeight[n];
186
- U32 const length = (1 << w) >> 1;
187
- U32 u;
161
+ size_t const nEnd = nbSymbols;
162
+ for (n=0; n<nEnd; n++) {
163
+ size_t const w = huffWeight[n];
164
+ size_t const length = (1 << w) >> 1;
165
+ size_t const uStart = rankVal[w];
166
+ size_t const uEnd = uStart + length;
167
+ size_t u;
188
168
  HUF_DEltX1 D;
189
- D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
190
- for (u = rankVal[w]; u < rankVal[w] + length; u++)
191
- dt[u] = D;
192
- rankVal[w] += length;
193
- } }
194
-
169
+ D.byte = (BYTE)n;
170
+ D.nbBits = (BYTE)(tableLog + 1 - w);
171
+ rankVal[w] = (U32)uEnd;
172
+ if (length < 4) {
173
+ /* Use length in the loop bound so the compiler knows it is short. */
174
+ for (u = 0; u < length; ++u)
175
+ dt[uStart + u] = D;
176
+ } else {
177
+ /* Unroll the loop 4 times, we know it is a power of 2. */
178
+ for (u = uStart; u < uEnd; u += 4) {
179
+ dt[u + 0] = D;
180
+ dt[u + 1] = D;
181
+ dt[u + 2] = D;
182
+ dt[u + 3] = D;
183
+ } } } }
195
184
  return iSize;
196
185
  }
197
186
 
@@ -282,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
282
271
  { const BYTE* const istart = (const BYTE*) cSrc;
283
272
  BYTE* const ostart = (BYTE*) dst;
284
273
  BYTE* const oend = ostart + dstSize;
274
+ BYTE* const olimit = oend - 3;
285
275
  const void* const dtPtr = DTable + 1;
286
276
  const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
287
277
 
@@ -306,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
306
296
  BYTE* op2 = opStart2;
307
297
  BYTE* op3 = opStart3;
308
298
  BYTE* op4 = opStart4;
309
- U32 endSignal = BIT_DStream_unfinished;
310
299
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
311
300
  U32 const dtLog = dtd.tableLog;
301
+ U32 endSignal = 1;
312
302
 
313
303
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
314
304
  CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
@@ -317,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
317
307
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
318
308
 
319
309
  /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
320
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
321
- while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
310
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
322
311
  HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
323
312
  HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
324
313
  HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
@@ -335,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
335
324
  HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
336
325
  HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
337
326
  HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
338
- BIT_reloadDStream(&bitD1);
339
- BIT_reloadDStream(&bitD2);
340
- BIT_reloadDStream(&bitD3);
341
- BIT_reloadDStream(&bitD4);
327
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
328
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
329
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
330
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
342
331
  }
343
332
 
344
333
  /* check corruption */
@@ -757,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
757
746
  return dstSize;
758
747
  }
759
748
 
760
-
761
749
  FORCE_INLINE_TEMPLATE size_t
762
750
  HUF_decompress4X2_usingDTable_internal_body(
763
751
  void* dst, size_t dstSize,
@@ -769,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
769
757
  { const BYTE* const istart = (const BYTE*) cSrc;
770
758
  BYTE* const ostart = (BYTE*) dst;
771
759
  BYTE* const oend = ostart + dstSize;
760
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
772
761
  const void* const dtPtr = DTable+1;
773
762
  const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
774
763
 
@@ -793,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
793
782
  BYTE* op2 = opStart2;
794
783
  BYTE* op3 = opStart3;
795
784
  BYTE* op4 = opStart4;
796
- U32 endSignal;
785
+ U32 endSignal = 1;
797
786
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
798
787
  U32 const dtLog = dtd.tableLog;
799
788
 
@@ -804,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
804
793
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
805
794
 
806
795
  /* 16-32 symbols per loop (4-8 symbols per stream) */
807
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
808
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
796
+ for ( ; (endSignal) & (op4 < olimit); ) {
797
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
798
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
799
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
800
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
801
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
802
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
803
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
804
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
805
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
806
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
807
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
808
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
809
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
811
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
812
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
814
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
816
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
817
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
818
+ #else
809
819
  HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
810
820
  HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
811
821
  HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -822,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
822
832
  HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
823
833
  HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
824
834
  HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
825
-
826
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
835
+ endSignal = (U32)LIKELY(
836
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
837
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
838
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
839
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
840
+ #endif
827
841
  }
828
842
 
829
843
  /* check corruption */