zstdlib 0.2.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +30 -1
  3. data/README.md +2 -2
  4. data/Rakefile +1 -1
  5. data/ext/zstdlib/extconf.rb +3 -3
  6. data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
  7. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/bitstream.h +38 -39
  8. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/compiler.h +40 -5
  9. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/cpu.h +1 -1
  10. data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
  11. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/debug.h +11 -31
  12. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
  13. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.c +2 -1
  14. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.h +6 -2
  15. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse.h +12 -32
  16. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -35
  17. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/huf.h +15 -33
  18. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/mem.h +75 -2
  19. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.c +8 -4
  20. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.h +2 -2
  21. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.c +50 -4
  22. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.h +36 -4
  23. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.c +23 -35
  24. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
  25. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
  26. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
  27. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_internal.h +154 -26
  28. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
  29. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.c +15 -35
  30. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.h +12 -32
  31. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
  32. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress.c +1191 -1330
  33. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +317 -55
  34. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.c +158 -0
  35. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.h +29 -0
  36. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.c +419 -0
  37. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.h +54 -0
  38. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
  39. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
  40. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_cwksp.h +525 -0
  41. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +65 -43
  42. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
  43. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.c +92 -66
  44. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
  45. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.c +74 -42
  46. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
  47. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.c +32 -10
  48. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
  49. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.c +81 -114
  50. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
  51. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +95 -51
  52. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +3 -2
  53. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -60
  54. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
  55. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
  56. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +292 -172
  57. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +459 -338
  58. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
  59. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
  60. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/zstd.h +265 -88
  61. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzclose.c +1 -1
  62. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
  63. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
  64. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzlib.c +9 -9
  65. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzread.c +16 -8
  66. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzwrite.c +8 -8
  67. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +16 -12
  68. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  69. metadata +69 -62
  70. data/ext/zstdlib/zstd-1.4.0/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -22,9 +22,9 @@
22
22
  /* ====== Dependencies ====== */
23
23
  #include <string.h> /* memcpy, memset */
24
24
  #include <limits.h> /* INT_MAX, UINT_MAX */
25
- #include "mem.h" /* MEM_STATIC */
26
- #include "pool.h" /* threadpool */
27
- #include "threading.h" /* mutex */
25
+ #include "../common/mem.h" /* MEM_STATIC */
26
+ #include "../common/pool.h" /* threadpool */
27
+ #include "../common/threading.h" /* mutex */
28
28
  #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
29
29
  #include "zstd_ldm.h"
30
30
  #include "zstdmt_compress.h"
@@ -461,7 +461,13 @@ typedef struct {
461
461
  ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
462
462
  } serialState_t;
463
463
 
464
- static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
464
+ static int
465
+ ZSTDMT_serialState_reset(serialState_t* serialState,
466
+ ZSTDMT_seqPool* seqPool,
467
+ ZSTD_CCtx_params params,
468
+ size_t jobSize,
469
+ const void* dict, size_t const dictSize,
470
+ ZSTD_dictContentType_e dictContentType)
465
471
  {
466
472
  /* Adjust parameters */
467
473
  if (params.ldmParams.enableLdm) {
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
490
496
  /* Size the seq pool tables */
491
497
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
492
498
  /* Reset the window */
493
- ZSTD_window_clear(&serialState->ldmState.window);
494
- serialState->ldmWindow = serialState->ldmState.window;
499
+ ZSTD_window_init(&serialState->ldmState.window);
495
500
  /* Resize tables and output space if necessary. */
496
501
  if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
497
502
  ZSTD_free(serialState->ldmState.hashTable, cMem);
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
506
511
  /* Zero the tables */
507
512
  memset(serialState->ldmState.hashTable, 0, hashSize);
508
513
  memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
514
+
515
+ /* Update window state and fill hash table with dict */
516
+ serialState->ldmState.loadedDictEnd = 0;
517
+ if (dictSize > 0) {
518
+ if (dictContentType == ZSTD_dct_rawContent) {
519
+ BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
520
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
521
+ ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
522
+ serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
523
+ } else {
524
+ /* don't even load anything */
525
+ }
526
+ }
527
+
528
+ /* Initialize serialState's copy of ldmWindow. */
529
+ serialState->ldmWindow = serialState->ldmState.window;
509
530
  }
531
+
510
532
  serialState->params = params;
511
533
  serialState->params.jobSize = (U32)jobSize;
512
534
  return 0;
@@ -668,7 +690,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
668
690
 
669
691
  /* init */
670
692
  if (job->cdict) {
671
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
693
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
672
694
  assert(job->firstJob); /* only allowed for first job */
673
695
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
674
696
  } else { /* srcStart points at reloaded section */
@@ -680,7 +702,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
680
702
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681
703
  ZSTD_dtlm_fast,
682
704
  NULL, /*cdict*/
683
- jobParams, pledgedSrcSize);
705
+ &jobParams, pledgedSrcSize);
684
706
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
685
707
  } }
686
708
 
@@ -927,12 +949,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
927
949
  unsigned jobID;
928
950
  DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
929
951
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
952
+ /* Copy the mutex/cond out */
953
+ ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
954
+ ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
955
+
930
956
  DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
931
957
  ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
932
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
933
- mtctx->jobs[jobID].cSize = 0;
958
+
959
+ /* Clear the job description, but keep the mutex/cond */
960
+ memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
961
+ mtctx->jobs[jobID].job_mutex = mutex;
962
+ mtctx->jobs[jobID].job_cond = cond;
934
963
  }
935
- memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
936
964
  mtctx->inBuff.buffer = g_nullBuffer;
937
965
  mtctx->inBuff.filled = 0;
938
966
  mtctx->allJobsCompleted = 1;
@@ -1028,9 +1056,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
1028
1056
 
1029
1057
  /* Sets parameters relevant to the compression job,
1030
1058
  * initializing others to default values. */
1031
- static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1059
+ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1032
1060
  {
1033
- ZSTD_CCtx_params jobParams = params;
1061
+ ZSTD_CCtx_params jobParams = *params;
1034
1062
  /* Clear parameters related to multithreading */
1035
1063
  jobParams.forceWindow = 0;
1036
1064
  jobParams.nbWorkers = 0;
@@ -1048,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1048
1076
  static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1049
1077
  {
1050
1078
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1051
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1079
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1052
1080
  mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1053
1081
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1054
1082
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
@@ -1070,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
1070
1098
  DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
1071
1099
  compressionLevel);
1072
1100
  mtctx->params.compressionLevel = compressionLevel;
1073
- { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
1101
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
1074
1102
  cParams.windowLog = saved_wlog;
1075
1103
  mtctx->params.cParams = cParams;
1076
1104
  }
@@ -1129,9 +1157,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1129
1157
  size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1130
1158
  size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1131
1159
  assert(flushed <= produced);
1160
+ assert(jobPtr->consumed <= jobPtr->src.size);
1132
1161
  toFlush = produced - flushed;
1133
- if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
1134
- /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
1162
+ /* if toFlush==0, nothing is available to flush.
1163
+ * However, jobID is expected to still be active:
1164
+ * if jobID was already completed and fully flushed,
1165
+ * ZSTDMT_flushProduced() should have already moved onto next job.
1166
+ * Therefore, some input has not yet been consumed. */
1167
+ if (toFlush==0) {
1135
1168
  assert(jobPtr->consumed < jobPtr->src.size);
1136
1169
  }
1137
1170
  }
@@ -1146,14 +1179,18 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1146
1179
  /* ===== Multi-threaded compression ===== */
1147
1180
  /* ------------------------------------------ */
1148
1181
 
1149
- static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1182
+ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1150
1183
  {
1151
- if (params.ldmParams.enableLdm)
1184
+ unsigned jobLog;
1185
+ if (params->ldmParams.enableLdm) {
1152
1186
  /* In Long Range Mode, the windowLog is typically oversized.
1153
1187
  * In which case, it's preferable to determine the jobSize
1154
1188
  * based on chainLog instead. */
1155
- return MAX(21, params.cParams.chainLog + 4);
1156
- return MAX(20, params.cParams.windowLog + 2);
1189
+ jobLog = MAX(21, params->cParams.chainLog + 4);
1190
+ } else {
1191
+ jobLog = MAX(20, params->cParams.windowLog + 2);
1192
+ }
1193
+ return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
1157
1194
  }
1158
1195
 
1159
1196
  static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@@ -1184,27 +1221,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1184
1221
  return ovlog;
1185
1222
  }
1186
1223
 
1187
- static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1224
+ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1188
1225
  {
1189
- int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1190
- int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1226
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1227
+ int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1191
1228
  assert(0 <= overlapRLog && overlapRLog <= 8);
1192
- if (params.ldmParams.enableLdm) {
1229
+ if (params->ldmParams.enableLdm) {
1193
1230
  /* In Long Range Mode, the windowLog is typically oversized.
1194
1231
  * In which case, it's preferable to determine the jobSize
1195
1232
  * based on chainLog instead.
1196
1233
  * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1197
- ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1234
+ ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1198
1235
  - overlapRLog;
1199
1236
  }
1200
- assert(0 <= ovLog && ovLog <= 30);
1201
- DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1237
+ assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
1238
+ DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
1202
1239
  DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1203
1240
  return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1204
1241
  }
1205
1242
 
1206
1243
  static unsigned
1207
- ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1244
+ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
1208
1245
  {
1209
1246
  assert(nbWorkers>0);
1210
1247
  { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1220,16 +1257,17 @@ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers
1220
1257
  /* ZSTDMT_compress_advanced_internal() :
1221
1258
  * This is a blocking function : it will only give back control to caller after finishing its compression job.
1222
1259
  */
1223
- static size_t ZSTDMT_compress_advanced_internal(
1260
+ static size_t
1261
+ ZSTDMT_compress_advanced_internal(
1224
1262
  ZSTDMT_CCtx* mtctx,
1225
1263
  void* dst, size_t dstCapacity,
1226
1264
  const void* src, size_t srcSize,
1227
1265
  const ZSTD_CDict* cdict,
1228
1266
  ZSTD_CCtx_params params)
1229
1267
  {
1230
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1231
- size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1232
- unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1268
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
1269
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
1270
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
1233
1271
  size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1234
1272
  size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1235
1273
  const char* const srcStart = (const char*)src;
@@ -1247,15 +1285,16 @@ static size_t ZSTDMT_compress_advanced_internal(
1247
1285
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1248
1286
  DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1249
1287
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1250
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
1288
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
1251
1289
  }
1252
1290
 
1253
1291
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1254
1292
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1255
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1293
+ /* LDM doesn't even try to load the dictionary in single-ingestion mode */
1294
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
1256
1295
  return ERROR(memory_allocation);
1257
1296
 
1258
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1297
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
1259
1298
 
1260
1299
  { unsigned u;
1261
1300
  for (u=0; u<nbJobs; u++) {
@@ -1388,19 +1427,19 @@ size_t ZSTDMT_initCStream_internal(
1388
1427
 
1389
1428
  /* init */
1390
1429
  if (params.nbWorkers != mtctx->params.nbWorkers)
1391
- FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
1430
+ FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
1392
1431
 
1393
1432
  if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1394
- if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1433
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
1395
1434
 
1396
1435
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1397
1436
  if (mtctx->singleBlockingThread) {
1398
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1437
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
1399
1438
  DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1400
1439
  assert(singleThreadParams.nbWorkers == 0);
1401
1440
  return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
1402
1441
  dict, dictSize, cdict,
1403
- singleThreadParams, pledgedSrcSize);
1442
+ &singleThreadParams, pledgedSrcSize);
1404
1443
  }
1405
1444
 
1406
1445
  DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1426,12 +1465,14 @@ size_t ZSTDMT_initCStream_internal(
1426
1465
  mtctx->cdict = cdict;
1427
1466
  }
1428
1467
 
1429
- mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1468
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
1430
1469
  DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1431
1470
  mtctx->targetSectionSize = params.jobSize;
1432
1471
  if (mtctx->targetSectionSize == 0) {
1433
- mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1472
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
1434
1473
  }
1474
+ assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
1475
+
1435
1476
  if (params.rsyncable) {
1436
1477
  /* Aim for the targetsectionSize as the average job size. */
1437
1478
  U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
@@ -1483,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
1483
1524
  mtctx->allJobsCompleted = 0;
1484
1525
  mtctx->consumed = 0;
1485
1526
  mtctx->produced = 0;
1486
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1527
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
1528
+ dict, dictSize, dictContentType))
1487
1529
  return ERROR(memory_allocation);
1488
1530
  return 0;
1489
1531
  }
@@ -1697,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
1697
1739
  assert(mtctx->doneJobID < mtctx->nextJobID);
1698
1740
  assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
1699
1741
  assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
1700
- memcpy((char*)output->dst + output->pos,
1701
- (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1702
- toFlush);
1742
+ if (toFlush > 0) {
1743
+ memcpy((char*)output->dst + output->pos,
1744
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1745
+ toFlush);
1746
+ }
1703
1747
  output->pos += toFlush;
1704
1748
  mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1705
1749
 
@@ -1769,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1769
1813
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1770
1814
  BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1771
1815
  BYTE const* const rangeStart = (BYTE const*)range.start;
1772
- BYTE const* const rangeEnd = rangeStart + range.size;
1816
+ BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1773
1817
 
1774
1818
  if (rangeStart == NULL || bufferStart == NULL)
1775
1819
  return 0;
@@ -2043,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2043
2087
  || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
2044
2088
  size_t const jobSize = mtctx->inBuff.filled;
2045
2089
  assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
2046
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
2090
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
2047
2091
  }
2048
2092
 
2049
2093
  /* check for potential compressed data ready to be flushed */
@@ -2057,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2057
2101
 
2058
2102
  size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2059
2103
  {
2060
- FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
2104
+ FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
2061
2105
 
2062
2106
  /* recommended next input size : fill current input buffer */
2063
2107
  return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@@ -2074,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
2074
2118
  || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
2075
2119
  DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
2076
2120
  (U32)srcSize, (U32)endFrame);
2077
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
2121
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
2078
2122
  }
2079
2123
 
2080
2124
  /* check if there is any data available to flush */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -40,7 +40,7 @@
40
40
  /* === Dependencies === */
41
41
  #include <stddef.h> /* size_t */
42
42
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
43
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
43
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
44
44
 
45
45
 
46
46
  /* === Constants === */
@@ -50,6 +50,7 @@
50
50
  #ifndef ZSTDMT_JOBSIZE_MIN
51
51
  # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
52
  #endif
53
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
53
54
  #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
54
55
 
55
56
 
@@ -1,47 +1,27 @@
1
1
  /* ******************************************************************
2
- huff0 huffman decoder,
3
- part of Finite State Entropy library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
36
16
  * Dependencies
37
17
  ****************************************************************/
38
18
  #include <string.h> /* memcpy, memset */
39
- #include "compiler.h"
40
- #include "bitstream.h" /* BIT_* */
41
- #include "fse.h" /* to compress headers */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
42
22
  #define HUF_STATIC_LINKING_ONLY
43
- #include "huf.h"
44
- #include "error_private.h"
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
45
25
 
46
26
  /* **************************************************************
47
27
  * Macros
@@ -61,7 +41,6 @@
61
41
  * Error Management
62
42
  ****************************************************************/
63
43
  #define HUF_isError ERR_isError
64
- #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
65
44
 
66
45
 
67
46
  /* **************************************************************
@@ -179,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
179
158
 
180
159
  /* fill DTable */
181
160
  { U32 n;
182
- for (n=0; n<nbSymbols; n++) {
183
- U32 const w = huffWeight[n];
184
- U32 const length = (1 << w) >> 1;
185
- U32 u;
161
+ size_t const nEnd = nbSymbols;
162
+ for (n=0; n<nEnd; n++) {
163
+ size_t const w = huffWeight[n];
164
+ size_t const length = (1 << w) >> 1;
165
+ size_t const uStart = rankVal[w];
166
+ size_t const uEnd = uStart + length;
167
+ size_t u;
186
168
  HUF_DEltX1 D;
187
- D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
188
- for (u = rankVal[w]; u < rankVal[w] + length; u++)
189
- dt[u] = D;
190
- rankVal[w] += length;
191
- } }
192
-
169
+ D.byte = (BYTE)n;
170
+ D.nbBits = (BYTE)(tableLog + 1 - w);
171
+ rankVal[w] = (U32)uEnd;
172
+ if (length < 4) {
173
+ /* Use length in the loop bound so the compiler knows it is short. */
174
+ for (u = 0; u < length; ++u)
175
+ dt[uStart + u] = D;
176
+ } else {
177
+ /* Unroll the loop 4 times, we know it is a power of 2. */
178
+ for (u = uStart; u < uEnd; u += 4) {
179
+ dt[u + 0] = D;
180
+ dt[u + 1] = D;
181
+ dt[u + 2] = D;
182
+ dt[u + 3] = D;
183
+ } } } }
193
184
  return iSize;
194
185
  }
195
186
 
@@ -280,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
280
271
  { const BYTE* const istart = (const BYTE*) cSrc;
281
272
  BYTE* const ostart = (BYTE*) dst;
282
273
  BYTE* const oend = ostart + dstSize;
274
+ BYTE* const olimit = oend - 3;
283
275
  const void* const dtPtr = DTable + 1;
284
276
  const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
285
277
 
@@ -304,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
304
296
  BYTE* op2 = opStart2;
305
297
  BYTE* op3 = opStart3;
306
298
  BYTE* op4 = opStart4;
307
- U32 endSignal = BIT_DStream_unfinished;
308
299
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
309
300
  U32 const dtLog = dtd.tableLog;
301
+ U32 endSignal = 1;
310
302
 
311
303
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
312
304
  CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
@@ -315,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
315
307
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
316
308
 
317
309
  /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
318
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319
- while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
310
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
320
311
  HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321
312
  HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322
313
  HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
@@ -333,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
333
324
  HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334
325
  HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335
326
  HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336
- BIT_reloadDStream(&bitD1);
337
- BIT_reloadDStream(&bitD2);
338
- BIT_reloadDStream(&bitD3);
339
- BIT_reloadDStream(&bitD4);
327
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
328
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
329
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
330
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
340
331
  }
341
332
 
342
333
  /* check corruption */
@@ -755,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
755
746
  return dstSize;
756
747
  }
757
748
 
758
-
759
749
  FORCE_INLINE_TEMPLATE size_t
760
750
  HUF_decompress4X2_usingDTable_internal_body(
761
751
  void* dst, size_t dstSize,
@@ -767,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
767
757
  { const BYTE* const istart = (const BYTE*) cSrc;
768
758
  BYTE* const ostart = (BYTE*) dst;
769
759
  BYTE* const oend = ostart + dstSize;
760
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
770
761
  const void* const dtPtr = DTable+1;
771
762
  const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
772
763
 
@@ -791,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
791
782
  BYTE* op2 = opStart2;
792
783
  BYTE* op3 = opStart3;
793
784
  BYTE* op4 = opStart4;
794
- U32 endSignal;
785
+ U32 endSignal = 1;
795
786
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
796
787
  U32 const dtLog = dtd.tableLog;
797
788
 
@@ -802,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
802
793
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
803
794
 
804
795
  /* 16-32 symbols per loop (4-8 symbols per stream) */
805
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
806
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
796
+ for ( ; (endSignal) & (op4 < olimit); ) {
797
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
798
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
799
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
800
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
801
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
802
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
803
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
804
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
805
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
806
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
807
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
808
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
809
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
811
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
812
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
814
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
816
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
817
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
818
+ #else
807
819
  HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808
820
  HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809
821
  HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -820,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
820
832
  HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821
833
  HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822
834
  HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
823
-
824
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
835
+ endSignal = (U32)LIKELY(
836
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
837
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
838
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
839
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
840
+ #endif
825
841
  }
826
842
 
827
843
  /* check corruption */