zstdlib 0.2.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +30 -1
  3. data/README.md +2 -2
  4. data/Rakefile +1 -1
  5. data/ext/zstdlib/extconf.rb +3 -3
  6. data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
  7. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/bitstream.h +38 -39
  8. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/compiler.h +40 -5
  9. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/cpu.h +1 -1
  10. data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
  11. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/debug.h +11 -31
  12. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
  13. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.c +2 -1
  14. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.h +6 -2
  15. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse.h +12 -32
  16. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -35
  17. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/huf.h +15 -33
  18. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/mem.h +75 -2
  19. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.c +8 -4
  20. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.h +2 -2
  21. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.c +50 -4
  22. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.h +36 -4
  23. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.c +23 -35
  24. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
  25. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
  26. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
  27. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_internal.h +154 -26
  28. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
  29. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.c +15 -35
  30. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.h +12 -32
  31. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
  32. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress.c +1191 -1330
  33. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +317 -55
  34. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.c +158 -0
  35. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.h +29 -0
  36. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.c +419 -0
  37. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.h +54 -0
  38. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
  39. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
  40. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_cwksp.h +525 -0
  41. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +65 -43
  42. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
  43. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.c +92 -66
  44. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
  45. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.c +74 -42
  46. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
  47. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.c +32 -10
  48. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
  49. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.c +81 -114
  50. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
  51. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +95 -51
  52. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +3 -2
  53. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -60
  54. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
  55. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
  56. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +292 -172
  57. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +459 -338
  58. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
  59. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
  60. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/zstd.h +265 -88
  61. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzclose.c +1 -1
  62. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
  63. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
  64. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzlib.c +9 -9
  65. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzread.c +16 -8
  66. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzwrite.c +8 -8
  67. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +16 -12
  68. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  69. metadata +69 -62
  70. data/ext/zstdlib/zstd-1.4.0/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -22,9 +22,9 @@
22
22
  /* ====== Dependencies ====== */
23
23
  #include <string.h> /* memcpy, memset */
24
24
  #include <limits.h> /* INT_MAX, UINT_MAX */
25
- #include "mem.h" /* MEM_STATIC */
26
- #include "pool.h" /* threadpool */
27
- #include "threading.h" /* mutex */
25
+ #include "../common/mem.h" /* MEM_STATIC */
26
+ #include "../common/pool.h" /* threadpool */
27
+ #include "../common/threading.h" /* mutex */
28
28
  #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
29
29
  #include "zstd_ldm.h"
30
30
  #include "zstdmt_compress.h"
@@ -461,7 +461,13 @@ typedef struct {
461
461
  ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
462
462
  } serialState_t;
463
463
 
464
- static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
464
+ static int
465
+ ZSTDMT_serialState_reset(serialState_t* serialState,
466
+ ZSTDMT_seqPool* seqPool,
467
+ ZSTD_CCtx_params params,
468
+ size_t jobSize,
469
+ const void* dict, size_t const dictSize,
470
+ ZSTD_dictContentType_e dictContentType)
465
471
  {
466
472
  /* Adjust parameters */
467
473
  if (params.ldmParams.enableLdm) {
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
490
496
  /* Size the seq pool tables */
491
497
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
492
498
  /* Reset the window */
493
- ZSTD_window_clear(&serialState->ldmState.window);
494
- serialState->ldmWindow = serialState->ldmState.window;
499
+ ZSTD_window_init(&serialState->ldmState.window);
495
500
  /* Resize tables and output space if necessary. */
496
501
  if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
497
502
  ZSTD_free(serialState->ldmState.hashTable, cMem);
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
506
511
  /* Zero the tables */
507
512
  memset(serialState->ldmState.hashTable, 0, hashSize);
508
513
  memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
514
+
515
+ /* Update window state and fill hash table with dict */
516
+ serialState->ldmState.loadedDictEnd = 0;
517
+ if (dictSize > 0) {
518
+ if (dictContentType == ZSTD_dct_rawContent) {
519
+ BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
520
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
521
+ ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
522
+ serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
523
+ } else {
524
+ /* don't even load anything */
525
+ }
526
+ }
527
+
528
+ /* Initialize serialState's copy of ldmWindow. */
529
+ serialState->ldmWindow = serialState->ldmState.window;
509
530
  }
531
+
510
532
  serialState->params = params;
511
533
  serialState->params.jobSize = (U32)jobSize;
512
534
  return 0;
@@ -668,7 +690,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
668
690
 
669
691
  /* init */
670
692
  if (job->cdict) {
671
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
693
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
672
694
  assert(job->firstJob); /* only allowed for first job */
673
695
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
674
696
  } else { /* srcStart points at reloaded section */
@@ -680,7 +702,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
680
702
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681
703
  ZSTD_dtlm_fast,
682
704
  NULL, /*cdict*/
683
- jobParams, pledgedSrcSize);
705
+ &jobParams, pledgedSrcSize);
684
706
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
685
707
  } }
686
708
 
@@ -927,12 +949,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
927
949
  unsigned jobID;
928
950
  DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
929
951
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
952
+ /* Copy the mutex/cond out */
953
+ ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
954
+ ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
955
+
930
956
  DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
931
957
  ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
932
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
933
- mtctx->jobs[jobID].cSize = 0;
958
+
959
+ /* Clear the job description, but keep the mutex/cond */
960
+ memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
961
+ mtctx->jobs[jobID].job_mutex = mutex;
962
+ mtctx->jobs[jobID].job_cond = cond;
934
963
  }
935
- memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
936
964
  mtctx->inBuff.buffer = g_nullBuffer;
937
965
  mtctx->inBuff.filled = 0;
938
966
  mtctx->allJobsCompleted = 1;
@@ -1028,9 +1056,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
1028
1056
 
1029
1057
  /* Sets parameters relevant to the compression job,
1030
1058
  * initializing others to default values. */
1031
- static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1059
+ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1032
1060
  {
1033
- ZSTD_CCtx_params jobParams = params;
1061
+ ZSTD_CCtx_params jobParams = *params;
1034
1062
  /* Clear parameters related to multithreading */
1035
1063
  jobParams.forceWindow = 0;
1036
1064
  jobParams.nbWorkers = 0;
@@ -1048,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1048
1076
  static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1049
1077
  {
1050
1078
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1051
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1079
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1052
1080
  mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1053
1081
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1054
1082
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
@@ -1070,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
1070
1098
  DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
1071
1099
  compressionLevel);
1072
1100
  mtctx->params.compressionLevel = compressionLevel;
1073
- { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
1101
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
1074
1102
  cParams.windowLog = saved_wlog;
1075
1103
  mtctx->params.cParams = cParams;
1076
1104
  }
@@ -1129,9 +1157,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1129
1157
  size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1130
1158
  size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1131
1159
  assert(flushed <= produced);
1160
+ assert(jobPtr->consumed <= jobPtr->src.size);
1132
1161
  toFlush = produced - flushed;
1133
- if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
1134
- /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
1162
+ /* if toFlush==0, nothing is available to flush.
1163
+ * However, jobID is expected to still be active:
1164
+ * if jobID was already completed and fully flushed,
1165
+ * ZSTDMT_flushProduced() should have already moved onto next job.
1166
+ * Therefore, some input has not yet been consumed. */
1167
+ if (toFlush==0) {
1135
1168
  assert(jobPtr->consumed < jobPtr->src.size);
1136
1169
  }
1137
1170
  }
@@ -1146,14 +1179,18 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1146
1179
  /* ===== Multi-threaded compression ===== */
1147
1180
  /* ------------------------------------------ */
1148
1181
 
1149
- static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1182
+ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1150
1183
  {
1151
- if (params.ldmParams.enableLdm)
1184
+ unsigned jobLog;
1185
+ if (params->ldmParams.enableLdm) {
1152
1186
  /* In Long Range Mode, the windowLog is typically oversized.
1153
1187
  * In which case, it's preferable to determine the jobSize
1154
1188
  * based on chainLog instead. */
1155
- return MAX(21, params.cParams.chainLog + 4);
1156
- return MAX(20, params.cParams.windowLog + 2);
1189
+ jobLog = MAX(21, params->cParams.chainLog + 4);
1190
+ } else {
1191
+ jobLog = MAX(20, params->cParams.windowLog + 2);
1192
+ }
1193
+ return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
1157
1194
  }
1158
1195
 
1159
1196
  static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@@ -1184,27 +1221,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1184
1221
  return ovlog;
1185
1222
  }
1186
1223
 
1187
- static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1224
+ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1188
1225
  {
1189
- int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1190
- int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1226
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1227
+ int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1191
1228
  assert(0 <= overlapRLog && overlapRLog <= 8);
1192
- if (params.ldmParams.enableLdm) {
1229
+ if (params->ldmParams.enableLdm) {
1193
1230
  /* In Long Range Mode, the windowLog is typically oversized.
1194
1231
  * In which case, it's preferable to determine the jobSize
1195
1232
  * based on chainLog instead.
1196
1233
  * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1197
- ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1234
+ ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1198
1235
  - overlapRLog;
1199
1236
  }
1200
- assert(0 <= ovLog && ovLog <= 30);
1201
- DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1237
+ assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
1238
+ DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
1202
1239
  DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1203
1240
  return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1204
1241
  }
1205
1242
 
1206
1243
  static unsigned
1207
- ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1244
+ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
1208
1245
  {
1209
1246
  assert(nbWorkers>0);
1210
1247
  { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1220,16 +1257,17 @@ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers
1220
1257
  /* ZSTDMT_compress_advanced_internal() :
1221
1258
  * This is a blocking function : it will only give back control to caller after finishing its compression job.
1222
1259
  */
1223
- static size_t ZSTDMT_compress_advanced_internal(
1260
+ static size_t
1261
+ ZSTDMT_compress_advanced_internal(
1224
1262
  ZSTDMT_CCtx* mtctx,
1225
1263
  void* dst, size_t dstCapacity,
1226
1264
  const void* src, size_t srcSize,
1227
1265
  const ZSTD_CDict* cdict,
1228
1266
  ZSTD_CCtx_params params)
1229
1267
  {
1230
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1231
- size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1232
- unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1268
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
1269
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
1270
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
1233
1271
  size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1234
1272
  size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1235
1273
  const char* const srcStart = (const char*)src;
@@ -1247,15 +1285,16 @@ static size_t ZSTDMT_compress_advanced_internal(
1247
1285
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1248
1286
  DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1249
1287
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1250
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
1288
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
1251
1289
  }
1252
1290
 
1253
1291
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1254
1292
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1255
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1293
+ /* LDM doesn't even try to load the dictionary in single-ingestion mode */
1294
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
1256
1295
  return ERROR(memory_allocation);
1257
1296
 
1258
- FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1297
+ FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
1259
1298
 
1260
1299
  { unsigned u;
1261
1300
  for (u=0; u<nbJobs; u++) {
@@ -1388,19 +1427,19 @@ size_t ZSTDMT_initCStream_internal(
1388
1427
 
1389
1428
  /* init */
1390
1429
  if (params.nbWorkers != mtctx->params.nbWorkers)
1391
- FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
1430
+ FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
1392
1431
 
1393
1432
  if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1394
- if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1433
+ if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
1395
1434
 
1396
1435
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1397
1436
  if (mtctx->singleBlockingThread) {
1398
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1437
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
1399
1438
  DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1400
1439
  assert(singleThreadParams.nbWorkers == 0);
1401
1440
  return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
1402
1441
  dict, dictSize, cdict,
1403
- singleThreadParams, pledgedSrcSize);
1442
+ &singleThreadParams, pledgedSrcSize);
1404
1443
  }
1405
1444
 
1406
1445
  DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1426,12 +1465,14 @@ size_t ZSTDMT_initCStream_internal(
1426
1465
  mtctx->cdict = cdict;
1427
1466
  }
1428
1467
 
1429
- mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1468
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
1430
1469
  DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1431
1470
  mtctx->targetSectionSize = params.jobSize;
1432
1471
  if (mtctx->targetSectionSize == 0) {
1433
- mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1472
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
1434
1473
  }
1474
+ assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
1475
+
1435
1476
  if (params.rsyncable) {
1436
1477
  /* Aim for the targetsectionSize as the average job size. */
1437
1478
  U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
@@ -1483,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
1483
1524
  mtctx->allJobsCompleted = 0;
1484
1525
  mtctx->consumed = 0;
1485
1526
  mtctx->produced = 0;
1486
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1527
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
1528
+ dict, dictSize, dictContentType))
1487
1529
  return ERROR(memory_allocation);
1488
1530
  return 0;
1489
1531
  }
@@ -1697,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
1697
1739
  assert(mtctx->doneJobID < mtctx->nextJobID);
1698
1740
  assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
1699
1741
  assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
1700
- memcpy((char*)output->dst + output->pos,
1701
- (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1702
- toFlush);
1742
+ if (toFlush > 0) {
1743
+ memcpy((char*)output->dst + output->pos,
1744
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
1745
+ toFlush);
1746
+ }
1703
1747
  output->pos += toFlush;
1704
1748
  mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1705
1749
 
@@ -1769,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1769
1813
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1770
1814
  BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1771
1815
  BYTE const* const rangeStart = (BYTE const*)range.start;
1772
- BYTE const* const rangeEnd = rangeStart + range.size;
1816
+ BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1773
1817
 
1774
1818
  if (rangeStart == NULL || bufferStart == NULL)
1775
1819
  return 0;
@@ -2043,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2043
2087
  || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
2044
2088
  size_t const jobSize = mtctx->inBuff.filled;
2045
2089
  assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
2046
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
2090
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
2047
2091
  }
2048
2092
 
2049
2093
  /* check for potential compressed data ready to be flushed */
@@ -2057,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
2057
2101
 
2058
2102
  size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
2059
2103
  {
2060
- FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
2104
+ FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
2061
2105
 
2062
2106
  /* recommended next input size : fill current input buffer */
2063
2107
  return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@@ -2074,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
2074
2118
  || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
2075
2119
  DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
2076
2120
  (U32)srcSize, (U32)endFrame);
2077
- FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
2121
+ FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
2078
2122
  }
2079
2123
 
2080
2124
  /* check if there is any data available to flush */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -40,7 +40,7 @@
40
40
  /* === Dependencies === */
41
41
  #include <stddef.h> /* size_t */
42
42
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
43
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
43
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
44
44
 
45
45
 
46
46
  /* === Constants === */
@@ -50,6 +50,7 @@
50
50
  #ifndef ZSTDMT_JOBSIZE_MIN
51
51
  # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
52
  #endif
53
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
53
54
  #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
54
55
 
55
56
 
@@ -1,47 +1,27 @@
1
1
  /* ******************************************************************
2
- huff0 huffman decoder,
3
- part of Finite State Entropy library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
36
16
  * Dependencies
37
17
  ****************************************************************/
38
18
  #include <string.h> /* memcpy, memset */
39
- #include "compiler.h"
40
- #include "bitstream.h" /* BIT_* */
41
- #include "fse.h" /* to compress headers */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
42
22
  #define HUF_STATIC_LINKING_ONLY
43
- #include "huf.h"
44
- #include "error_private.h"
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
45
25
 
46
26
  /* **************************************************************
47
27
  * Macros
@@ -61,7 +41,6 @@
61
41
  * Error Management
62
42
  ****************************************************************/
63
43
  #define HUF_isError ERR_isError
64
- #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
65
44
 
66
45
 
67
46
  /* **************************************************************
@@ -179,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
179
158
 
180
159
  /* fill DTable */
181
160
  { U32 n;
182
- for (n=0; n<nbSymbols; n++) {
183
- U32 const w = huffWeight[n];
184
- U32 const length = (1 << w) >> 1;
185
- U32 u;
161
+ size_t const nEnd = nbSymbols;
162
+ for (n=0; n<nEnd; n++) {
163
+ size_t const w = huffWeight[n];
164
+ size_t const length = (1 << w) >> 1;
165
+ size_t const uStart = rankVal[w];
166
+ size_t const uEnd = uStart + length;
167
+ size_t u;
186
168
  HUF_DEltX1 D;
187
- D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
188
- for (u = rankVal[w]; u < rankVal[w] + length; u++)
189
- dt[u] = D;
190
- rankVal[w] += length;
191
- } }
192
-
169
+ D.byte = (BYTE)n;
170
+ D.nbBits = (BYTE)(tableLog + 1 - w);
171
+ rankVal[w] = (U32)uEnd;
172
+ if (length < 4) {
173
+ /* Use length in the loop bound so the compiler knows it is short. */
174
+ for (u = 0; u < length; ++u)
175
+ dt[uStart + u] = D;
176
+ } else {
177
+ /* Unroll the loop 4 times, we know it is a power of 2. */
178
+ for (u = uStart; u < uEnd; u += 4) {
179
+ dt[u + 0] = D;
180
+ dt[u + 1] = D;
181
+ dt[u + 2] = D;
182
+ dt[u + 3] = D;
183
+ } } } }
193
184
  return iSize;
194
185
  }
195
186
 
@@ -280,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
280
271
  { const BYTE* const istart = (const BYTE*) cSrc;
281
272
  BYTE* const ostart = (BYTE*) dst;
282
273
  BYTE* const oend = ostart + dstSize;
274
+ BYTE* const olimit = oend - 3;
283
275
  const void* const dtPtr = DTable + 1;
284
276
  const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
285
277
 
@@ -304,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
304
296
  BYTE* op2 = opStart2;
305
297
  BYTE* op3 = opStart3;
306
298
  BYTE* op4 = opStart4;
307
- U32 endSignal = BIT_DStream_unfinished;
308
299
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
309
300
  U32 const dtLog = dtd.tableLog;
301
+ U32 endSignal = 1;
310
302
 
311
303
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
312
304
  CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
@@ -315,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
315
307
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
316
308
 
317
309
  /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
318
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319
- while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
310
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
320
311
  HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321
312
  HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322
313
  HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
@@ -333,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
333
324
  HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334
325
  HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335
326
  HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336
- BIT_reloadDStream(&bitD1);
337
- BIT_reloadDStream(&bitD2);
338
- BIT_reloadDStream(&bitD3);
339
- BIT_reloadDStream(&bitD4);
327
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
328
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
329
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
330
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
340
331
  }
341
332
 
342
333
  /* check corruption */
@@ -755,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
755
746
  return dstSize;
756
747
  }
757
748
 
758
-
759
749
  FORCE_INLINE_TEMPLATE size_t
760
750
  HUF_decompress4X2_usingDTable_internal_body(
761
751
  void* dst, size_t dstSize,
@@ -767,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
767
757
  { const BYTE* const istart = (const BYTE*) cSrc;
768
758
  BYTE* const ostart = (BYTE*) dst;
769
759
  BYTE* const oend = ostart + dstSize;
760
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
770
761
  const void* const dtPtr = DTable+1;
771
762
  const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
772
763
 
@@ -791,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
791
782
  BYTE* op2 = opStart2;
792
783
  BYTE* op3 = opStart3;
793
784
  BYTE* op4 = opStart4;
794
- U32 endSignal;
785
+ U32 endSignal = 1;
795
786
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
796
787
  U32 const dtLog = dtd.tableLog;
797
788
 
@@ -802,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
802
793
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
803
794
 
804
795
  /* 16-32 symbols per loop (4-8 symbols per stream) */
805
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
806
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
796
+ for ( ; (endSignal) & (op4 < olimit); ) {
797
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
798
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
799
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
800
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
801
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
802
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
803
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
804
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
805
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
806
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
807
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
808
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
809
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
811
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
812
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
814
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
816
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
817
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
818
+ #else
807
819
  HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808
820
  HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809
821
  HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -820,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
820
832
  HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821
833
  HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822
834
  HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
823
-
824
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
835
+ endSignal = (U32)LIKELY(
836
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
837
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
838
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
839
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
840
+ #endif
825
841
  }
826
842
 
827
843
  /* check corruption */