zstd-ruby 1.3.4.0 → 1.3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +56 -10
  4. data/ext/zstdruby/libzstd/README.md +4 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
  6. data/ext/zstdruby/libzstd/common/compiler.h +3 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -2
  8. data/ext/zstdruby/libzstd/common/debug.c +44 -0
  9. data/ext/zstdruby/libzstd/common/debug.h +123 -0
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
  11. data/ext/zstdruby/libzstd/common/fse.h +45 -41
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +34 -27
  14. data/ext/zstdruby/libzstd/common/pool.c +89 -32
  15. data/ext/zstdruby/libzstd/common/pool.h +29 -19
  16. data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
  19. data/ext/zstdruby/libzstd/compress/hist.c +195 -0
  20. data/ext/zstdruby/libzstd/compress/hist.h +92 -0
  21. data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
  22. data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
  23. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
  24. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
  25. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
  26. data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
  27. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
  28. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
  29. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
  30. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
  31. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
  32. data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
  34. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
  38. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  39. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
  41. data/ext/zstdruby/libzstd/zstd.h +137 -69
  42. data/lib/zstd-ruby/version.rb +1 -1
  43. metadata +7 -3
@@ -28,6 +28,13 @@ size_t ZSTD_compressBlock_btultra(
28
28
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
29
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
30
30
 
31
+ size_t ZSTD_compressBlock_btopt_dictMatchState(
32
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
33
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
34
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
35
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
36
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
37
+
31
38
  size_t ZSTD_compressBlock_btopt_extDict(
32
39
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
33
40
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
@@ -37,18 +37,17 @@
37
37
  #define ZSTD_RESIZE_SEQPOOL 0
38
38
 
39
39
  /* ====== Debug ====== */
40
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
40
+ #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) && !defined(_MSC_VER)
41
41
 
42
42
  # include <stdio.h>
43
43
  # include <unistd.h>
44
44
  # include <sys/times.h>
45
- # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
46
45
 
47
46
  # define DEBUG_PRINTHEX(l,p,n) { \
48
47
  unsigned debug_u; \
49
48
  for (debug_u=0; debug_u<(n); debug_u++) \
50
- DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51
- DEBUGLOGRAW(l, " \n"); \
49
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
50
+ RAWLOG(l, " \n"); \
52
51
  }
53
52
 
54
53
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -62,7 +61,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
62
61
 
63
62
  #define MUTEX_WAIT_TIME_DLEVEL 6
64
63
  #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
65
- if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
64
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
66
65
  unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
67
66
  ZSTD_pthread_mutex_lock(mutex); \
68
67
  { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
@@ -160,6 +159,25 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
160
159
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
161
160
  }
162
161
 
162
+
163
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
164
+ {
165
+ unsigned const maxNbBuffers = 2*nbWorkers + 3;
166
+ if (srcBufPool==NULL) return NULL;
167
+ if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
168
+ return srcBufPool;
169
+ /* need a larger buffer pool */
170
+ { ZSTD_customMem const cMem = srcBufPool->cMem;
171
+ size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
172
+ ZSTDMT_bufferPool* newBufPool;
173
+ ZSTDMT_freeBufferPool(srcBufPool);
174
+ newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
175
+ if (newBufPool==NULL) return newBufPool;
176
+ ZSTDMT_setBufferSize(newBufPool, bSize);
177
+ return newBufPool;
178
+ }
179
+ }
180
+
163
181
  /** ZSTDMT_getBuffer() :
164
182
  * assumption : bufPool must be valid
165
183
  * @return : a buffer, with start pointer and size
@@ -310,6 +328,10 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
310
328
  ZSTDMT_freeBufferPool(seqPool);
311
329
  }
312
330
 
331
+ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
332
+ {
333
+ return ZSTDMT_expandBufferPool(pool, nbWorkers);
334
+ }
313
335
 
314
336
 
315
337
  /* ===== CCtx Pool ===== */
@@ -355,6 +377,18 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
355
377
  return cctxPool;
356
378
  }
357
379
 
380
+ static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
381
+ unsigned nbWorkers)
382
+ {
383
+ if (srcPool==NULL) return NULL;
384
+ if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
385
+ /* need a larger cctx pool */
386
+ { ZSTD_customMem const cMem = srcPool->cMem;
387
+ ZSTDMT_freeCCtxPool(srcPool);
388
+ return ZSTDMT_createCCtxPool(nbWorkers, cMem);
389
+ }
390
+ }
391
+
358
392
  /* only works during initialization phase, not during compression */
359
393
  static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
360
394
  {
@@ -425,12 +459,11 @@ typedef struct {
425
459
  ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
426
460
  } serialState_t;
427
461
 
428
- static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
462
+ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
429
463
  {
430
464
  /* Adjust parameters */
431
465
  if (params.ldmParams.enableLdm) {
432
466
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
433
- params.ldmParams.windowLog = params.cParams.windowLog;
434
467
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
435
468
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
436
469
  assert(params.ldmParams.hashEveryLog < 32);
@@ -453,7 +486,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
453
486
  serialState->params.ldmParams.hashLog -
454
487
  serialState->params.ldmParams.bucketSizeLog;
455
488
  /* Size the seq pool tables */
456
- ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
489
+ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
457
490
  /* Reset the window */
458
491
  ZSTD_window_clear(&serialState->ldmState.window);
459
492
  serialState->ldmWindow = serialState->ldmState.window;
@@ -473,6 +506,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
473
506
  memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
474
507
  }
475
508
  serialState->params = params;
509
+ serialState->params.jobSize = (U32)jobSize;
476
510
  return 0;
477
511
  }
478
512
 
@@ -514,6 +548,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
514
548
  size_t error;
515
549
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
516
550
  seqStore.size == 0 && seqStore.capacity > 0);
551
+ assert(src.size <= serialState->params.jobSize);
517
552
  ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
518
553
  error = ZSTD_ldm_generateSequences(
519
554
  &serialState->ldmState, &seqStore,
@@ -602,13 +637,6 @@ void ZSTDMT_compressionJob(void* jobDescription)
602
637
  rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
603
638
  buffer_t dstBuff = job->dstBuff;
604
639
 
605
- /* Don't compute the checksum for chunks, since we compute it externally,
606
- * but write it in the header.
607
- */
608
- if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
609
- /* Don't run LDM for the chunks, since we handle it externally */
610
- jobParams.ldmParams.enableLdm = 0;
611
-
612
640
  /* ressources */
613
641
  if (cctx==NULL) {
614
642
  job->cSize = ERROR(memory_allocation);
@@ -622,10 +650,22 @@ void ZSTDMT_compressionJob(void* jobDescription)
622
650
  }
623
651
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
624
652
  }
653
+ if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) {
654
+ job->cSize = ERROR(memory_allocation);
655
+ goto _endJob;
656
+ }
657
+
658
+ /* Don't compute the checksum for chunks, since we compute it externally,
659
+ * but write it in the header.
660
+ */
661
+ if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
662
+ /* Don't run LDM for the chunks, since we handle it externally */
663
+ jobParams.ldmParams.enableLdm = 0;
664
+
625
665
 
626
666
  /* init */
627
667
  if (job->cdict) {
628
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
668
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
629
669
  assert(job->firstJob); /* only allowed for first job */
630
670
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
631
671
  } else { /* srcStart points at reloaded section */
@@ -637,6 +677,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
637
677
  } }
638
678
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
639
679
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
680
+ ZSTD_dtlm_fast,
640
681
  NULL, /*cdict*/
641
682
  jobParams, pledgedSrcSize);
642
683
  if (ZSTD_isError(initError)) {
@@ -745,9 +786,9 @@ struct ZSTDMT_CCtx_s {
745
786
  ZSTD_CCtx_params params;
746
787
  size_t targetSectionSize;
747
788
  size_t targetPrefixSize;
748
- roundBuff_t roundBuff;
789
+ int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
749
790
  inBuff_t inBuff;
750
- int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
791
+ roundBuff_t roundBuff;
751
792
  serialState_t serial;
752
793
  unsigned singleBlockingThread;
753
794
  unsigned jobIDMask;
@@ -798,6 +839,20 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
798
839
  return jobTable;
799
840
  }
800
841
 
842
+ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
843
+ U32 nbJobs = nbWorkers + 2;
844
+ if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
845
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
846
+ mtctx->jobIDMask = 0;
847
+ mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
848
+ if (mtctx->jobs==NULL) return ERROR(memory_allocation);
849
+ assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
850
+ mtctx->jobIDMask = nbJobs - 1;
851
+ }
852
+ return 0;
853
+ }
854
+
855
+
801
856
  /* ZSTDMT_CCtxParam_setNbWorkers():
802
857
  * Internal use only */
803
858
  size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
@@ -924,6 +979,8 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
924
979
  if ( (value > 0) /* value==0 => automatic job size */
925
980
  & (value < ZSTDMT_JOBSIZE_MIN) )
926
981
  value = ZSTDMT_JOBSIZE_MIN;
982
+ if (value > ZSTDMT_JOBSIZE_MAX)
983
+ value = ZSTDMT_JOBSIZE_MAX;
927
984
  params->jobSize = value;
928
985
  return value;
929
986
  case ZSTDMT_p_overlapSectionLog :
@@ -950,6 +1007,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
950
1007
  }
951
1008
  }
952
1009
 
1010
+ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1011
+ {
1012
+ switch (parameter) {
1013
+ case ZSTDMT_p_jobSize:
1014
+ *value = mtctx->params.jobSize;
1015
+ break;
1016
+ case ZSTDMT_p_overlapSectionLog:
1017
+ *value = mtctx->params.overlapSizeLog;
1018
+ break;
1019
+ default:
1020
+ return ERROR(parameter_unsupported);
1021
+ }
1022
+ return 0;
1023
+ }
1024
+
953
1025
  /* Sets parameters relevant to the compression job,
954
1026
  * initializing others to default values. */
955
1027
  static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
@@ -960,11 +1032,28 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
960
1032
  jobParams.cParams = params.cParams;
961
1033
  jobParams.fParams = params.fParams;
962
1034
  jobParams.compressionLevel = params.compressionLevel;
963
- jobParams.disableLiteralCompression = params.disableLiteralCompression;
964
1035
 
965
1036
  return jobParams;
966
1037
  }
967
1038
 
1039
+
1040
+ /* ZSTDMT_resize() :
1041
+ * @return : error code if fails, 0 on success */
1042
+ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1043
+ {
1044
+ if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1045
+ CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1046
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1047
+ if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1048
+ mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1049
+ if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
1050
+ mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
1051
+ if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
1052
+ ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
1053
+ return 0;
1054
+ }
1055
+
1056
+
968
1057
  /*! ZSTDMT_updateCParams_whileCompressing() :
969
1058
  * Updates only a selected set of compression parameters, to remain compatible with current frame.
970
1059
  * New parameters will be applied to next compression job. */
@@ -981,15 +1070,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
981
1070
  }
982
1071
  }
983
1072
 
984
- /* ZSTDMT_getNbWorkers():
985
- * @return nb threads currently active in mtctx.
986
- * mtctx must be valid */
987
- unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
988
- {
989
- assert(mtctx != NULL);
990
- return mtctx->params.nbWorkers;
991
- }
992
-
993
1073
  /* ZSTDMT_getFrameProgression():
994
1074
  * tells how much data has been consumed (input) and produced (output) for current frame.
995
1075
  * able to count progression inside worker threads.
@@ -1087,18 +1167,10 @@ static size_t ZSTDMT_compress_advanced_internal(
1087
1167
 
1088
1168
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1089
1169
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1090
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1170
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1091
1171
  return ERROR(memory_allocation);
1092
1172
 
1093
- if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */
1094
- U32 jobsTableSize = nbJobs;
1095
- ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
1096
- mtctx->jobIDMask = 0;
1097
- mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
1098
- if (mtctx->jobs==NULL) return ERROR(memory_allocation);
1099
- assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
1100
- mtctx->jobIDMask = jobsTableSize - 1;
1101
- }
1173
+ CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1102
1174
 
1103
1175
  { unsigned u;
1104
1176
  for (u=0; u<nbJobs; u++) {
@@ -1221,17 +1293,18 @@ size_t ZSTDMT_initCStream_internal(
1221
1293
  const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
1222
1294
  unsigned long long pledgedSrcSize)
1223
1295
  {
1224
- DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
1225
- (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
1226
- /* params are supposed to be fully validated at this point */
1296
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
1297
+ (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
1298
+
1299
+ /* params supposed partially fully validated at this point */
1227
1300
  assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1228
1301
  assert(!((dict) && (cdict))); /* either dict or cdict, not both */
1229
- assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1230
1302
 
1231
1303
  /* init */
1232
- if (params.jobSize == 0) {
1233
- params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params);
1234
- }
1304
+ if (params.nbWorkers != mtctx->params.nbWorkers)
1305
+ CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1306
+
1307
+ if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1235
1308
  if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1236
1309
 
1237
1310
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
@@ -1270,7 +1343,9 @@ size_t ZSTDMT_initCStream_internal(
1270
1343
  mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1271
1344
  DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1272
1345
  mtctx->targetSectionSize = params.jobSize;
1273
- if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
1346
+ if (mtctx->targetSectionSize == 0) {
1347
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1348
+ }
1274
1349
  if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1275
1350
  DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1276
1351
  DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
@@ -1312,7 +1387,7 @@ size_t ZSTDMT_initCStream_internal(
1312
1387
  mtctx->allJobsCompleted = 0;
1313
1388
  mtctx->consumed = 0;
1314
1389
  mtctx->produced = 0;
1315
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1390
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1316
1391
  return ERROR(memory_allocation);
1317
1392
  return 0;
1318
1393
  }
@@ -95,6 +95,11 @@ typedef enum {
95
95
  * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96
96
  ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
97
97
 
98
+ /* ZSTDMT_getMTCtxParameter() :
99
+ * Query the ZSTDMT_CCtx for a parameter value.
100
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
101
+ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
102
+
98
103
 
99
104
  /*! ZSTDMT_compressStream_generic() :
100
105
  * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
@@ -126,11 +131,6 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
126
131
  * New parameters will be applied to next compression job. */
127
132
  void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
128
133
 
129
- /* ZSTDMT_getNbWorkers():
130
- * @return nb threads currently active in mtctx.
131
- * mtctx must be valid */
132
- unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
133
-
134
134
  /* ZSTDMT_getFrameProgression():
135
135
  * tells how much data has been consumed (input) and produced (output) for current frame.
136
136
  * able to count progression inside worker threads.
@@ -1,6 +1,7 @@
1
1
  /* ******************************************************************
2
- Huffman decoder, part of New Generation Entropy library
3
- Copyright (C) 2013-2016, Yann Collet.
2
+ huff0 huffman decoder,
3
+ part of Finite State Entropy library
4
+ Copyright (C) 2013-present, Yann Collet.
4
5
 
5
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7
 
@@ -29,16 +30,15 @@
29
30
 
30
31
  You can contact the author at :
31
32
  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
33
  ****************************************************************** */
34
34
 
35
35
  /* **************************************************************
36
36
  * Dependencies
37
37
  ****************************************************************/
38
38
  #include <string.h> /* memcpy, memset */
39
- #include "bitstream.h" /* BIT_* */
40
39
  #include "compiler.h"
41
- #include "fse.h" /* header compression */
40
+ #include "bitstream.h" /* BIT_* */
41
+ #include "fse.h" /* to compress headers */
42
42
  #define HUF_STATIC_LINKING_ONLY
43
43
  #include "huf.h"
44
44
  #include "error_private.h"
@@ -48,7 +48,6 @@
48
48
  * Error Management
49
49
  ****************************************************************/
50
50
  #define HUF_isError ERR_isError
51
- #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52
51
  #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
53
52
 
54
53
 
@@ -75,15 +74,15 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
75
74
  /*-***************************/
76
75
  /* single-symbol decoding */
77
76
  /*-***************************/
78
- typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
77
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
79
78
 
80
- size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
79
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81
80
  {
82
81
  U32 tableLog = 0;
83
82
  U32 nbSymbols = 0;
84
83
  size_t iSize;
85
84
  void* const dtPtr = DTable + 1;
86
- HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
85
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87
86
 
88
87
  U32* rankVal;
89
88
  BYTE* huffWeight;
@@ -96,7 +95,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
96
95
 
97
96
  if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
98
97
 
99
- HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
98
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
100
99
  /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
101
100
 
102
101
  iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@@ -124,7 +123,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
124
123
  U32 const w = huffWeight[n];
125
124
  U32 const length = (1 << w) >> 1;
126
125
  U32 u;
127
- HUF_DEltX2 D;
126
+ HUF_DEltX1 D;
128
127
  D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
129
128
  for (u = rankVal[w]; u < rankVal[w] + length; u++)
130
129
  dt[u] = D;
@@ -134,17 +133,15 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
134
133
  return iSize;
135
134
  }
136
135
 
137
- size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
136
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
138
137
  {
139
138
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
140
- return HUF_readDTableX2_wksp(DTable, src, srcSize,
139
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
141
140
  workSpace, sizeof(workSpace));
142
141
  }
143
142
 
144
- typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
145
-
146
143
  FORCE_INLINE_TEMPLATE BYTE
147
- HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
144
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
148
145
  {
149
146
  size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
150
147
  BYTE const c = dt[val].byte;
@@ -152,44 +149,44 @@ HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog
152
149
  return c;
153
150
  }
154
151
 
155
- #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
156
- *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
152
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
153
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
157
154
 
158
- #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
155
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
159
156
  if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
160
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
157
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
161
158
 
162
- #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
159
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
163
160
  if (MEM_64bits()) \
164
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
161
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
165
162
 
166
163
  HINT_INLINE size_t
167
- HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
164
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168
165
  {
169
166
  BYTE* const pStart = p;
170
167
 
171
168
  /* up to 4 symbols at a time */
172
169
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
173
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
174
- HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
175
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
176
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
170
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
171
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
172
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
173
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177
174
  }
178
175
 
179
176
  /* [0-3] symbols remaining */
180
177
  if (MEM_32bits())
181
178
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
182
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
179
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
183
180
 
184
181
  /* no more data to retrieve from bitstream, no need to reload */
185
182
  while (p < pEnd)
186
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
183
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
187
184
 
188
185
  return pEnd-pStart;
189
186
  }
190
187
 
191
188
  FORCE_INLINE_TEMPLATE size_t
192
- HUF_decompress1X2_usingDTable_internal_body(
189
+ HUF_decompress1X1_usingDTable_internal_body(
193
190
  void* dst, size_t dstSize,
194
191
  const void* cSrc, size_t cSrcSize,
195
192
  const HUF_DTable* DTable)
@@ -197,14 +194,14 @@ HUF_decompress1X2_usingDTable_internal_body(
197
194
  BYTE* op = (BYTE*)dst;
198
195
  BYTE* const oend = op + dstSize;
199
196
  const void* dtPtr = DTable + 1;
200
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
197
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
201
198
  BIT_DStream_t bitD;
202
199
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
203
200
  U32 const dtLog = dtd.tableLog;
204
201
 
205
202
  CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
206
203
 
207
- HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
204
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
208
205
 
209
206
  if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210
207
 
@@ -212,7 +209,7 @@ HUF_decompress1X2_usingDTable_internal_body(
212
209
  }
213
210
 
214
211
  FORCE_INLINE_TEMPLATE size_t
215
- HUF_decompress4X2_usingDTable_internal_body(
212
+ HUF_decompress4X1_usingDTable_internal_body(
216
213
  void* dst, size_t dstSize,
217
214
  const void* cSrc, size_t cSrcSize,
218
215
  const HUF_DTable* DTable)
@@ -224,7 +221,7 @@ HUF_decompress4X2_usingDTable_internal_body(
224
221
  BYTE* const ostart = (BYTE*) dst;
225
222
  BYTE* const oend = ostart + dstSize;
226
223
  const void* const dtPtr = DTable + 1;
227
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
224
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
228
225
 
229
226
  /* Init */
230
227
  BIT_DStream_t bitD1;
@@ -260,22 +257,22 @@ HUF_decompress4X2_usingDTable_internal_body(
260
257
  /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
261
258
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
262
259
  while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
263
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
264
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
265
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
266
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
267
- HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
268
- HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
269
- HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
270
- HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
271
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
272
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
273
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
274
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
275
- HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
276
- HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
277
- HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
278
- HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
260
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
261
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
262
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
263
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
264
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
265
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
266
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
267
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
268
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
269
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
270
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
271
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
272
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
273
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
274
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
275
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
279
276
  BIT_reloadDStream(&bitD1);
280
277
  BIT_reloadDStream(&bitD2);
281
278
  BIT_reloadDStream(&bitD3);
@@ -291,191 +288,10 @@ HUF_decompress4X2_usingDTable_internal_body(
291
288
  /* note : op4 supposed already verified within main loop */
292
289
 
293
290
  /* finish bitStreams one by one */
294
- HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
295
- HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
296
- HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
297
- HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
298
-
299
- /* check */
300
- { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
301
- if (!endCheck) return ERROR(corruption_detected); }
302
-
303
- /* decoded size */
304
- return dstSize;
305
- }
306
- }
307
-
308
-
309
- FORCE_INLINE_TEMPLATE U32
310
- HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
311
- {
312
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
313
- memcpy(op, dt+val, 2);
314
- BIT_skipBits(DStream, dt[val].nbBits);
315
- return dt[val].length;
316
- }
317
-
318
- FORCE_INLINE_TEMPLATE U32
319
- HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
320
- {
321
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
322
- memcpy(op, dt+val, 1);
323
- if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
324
- else {
325
- if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
326
- BIT_skipBits(DStream, dt[val].nbBits);
327
- if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
328
- /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
329
- DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
330
- } }
331
- return 1;
332
- }
333
-
334
- #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
335
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
336
-
337
- #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
338
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
339
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
340
-
341
- #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
342
- if (MEM_64bits()) \
343
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
344
-
345
- HINT_INLINE size_t
346
- HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
347
- const HUF_DEltX4* const dt, const U32 dtLog)
348
- {
349
- BYTE* const pStart = p;
350
-
351
- /* up to 8 symbols at a time */
352
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
353
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
354
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
355
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
356
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
357
- }
358
-
359
- /* closer to end : up to 2 symbols at a time */
360
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
361
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
362
-
363
- while (p <= pEnd-2)
364
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
365
-
366
- if (p < pEnd)
367
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
368
-
369
- return p-pStart;
370
- }
371
-
372
- FORCE_INLINE_TEMPLATE size_t
373
- HUF_decompress1X4_usingDTable_internal_body(
374
- void* dst, size_t dstSize,
375
- const void* cSrc, size_t cSrcSize,
376
- const HUF_DTable* DTable)
377
- {
378
- BIT_DStream_t bitD;
379
-
380
- /* Init */
381
- CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
382
-
383
- /* decode */
384
- { BYTE* const ostart = (BYTE*) dst;
385
- BYTE* const oend = ostart + dstSize;
386
- const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
387
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
388
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
389
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
390
- }
391
-
392
- /* check */
393
- if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
394
-
395
- /* decoded size */
396
- return dstSize;
397
- }
398
-
399
-
400
- FORCE_INLINE_TEMPLATE size_t
401
- HUF_decompress4X4_usingDTable_internal_body(
402
- void* dst, size_t dstSize,
403
- const void* cSrc, size_t cSrcSize,
404
- const HUF_DTable* DTable)
405
- {
406
- if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
407
-
408
- { const BYTE* const istart = (const BYTE*) cSrc;
409
- BYTE* const ostart = (BYTE*) dst;
410
- BYTE* const oend = ostart + dstSize;
411
- const void* const dtPtr = DTable+1;
412
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
413
-
414
- /* Init */
415
- BIT_DStream_t bitD1;
416
- BIT_DStream_t bitD2;
417
- BIT_DStream_t bitD3;
418
- BIT_DStream_t bitD4;
419
- size_t const length1 = MEM_readLE16(istart);
420
- size_t const length2 = MEM_readLE16(istart+2);
421
- size_t const length3 = MEM_readLE16(istart+4);
422
- size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
423
- const BYTE* const istart1 = istart + 6; /* jumpTable */
424
- const BYTE* const istart2 = istart1 + length1;
425
- const BYTE* const istart3 = istart2 + length2;
426
- const BYTE* const istart4 = istart3 + length3;
427
- size_t const segmentSize = (dstSize+3) / 4;
428
- BYTE* const opStart2 = ostart + segmentSize;
429
- BYTE* const opStart3 = opStart2 + segmentSize;
430
- BYTE* const opStart4 = opStart3 + segmentSize;
431
- BYTE* op1 = ostart;
432
- BYTE* op2 = opStart2;
433
- BYTE* op3 = opStart3;
434
- BYTE* op4 = opStart4;
435
- U32 endSignal;
436
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
437
- U32 const dtLog = dtd.tableLog;
438
-
439
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
440
- CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
441
- CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
442
- CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
443
- CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
444
-
445
- /* 16-32 symbols per loop (4-8 symbols per stream) */
446
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
447
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
448
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
449
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
450
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
451
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
452
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
453
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
454
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
455
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
456
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
457
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
458
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
459
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
460
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
461
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
462
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
463
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
464
-
465
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
466
- }
467
-
468
- /* check corruption */
469
- if (op1 > opStart2) return ERROR(corruption_detected);
470
- if (op2 > opStart3) return ERROR(corruption_detected);
471
- if (op3 > opStart4) return ERROR(corruption_detected);
472
- /* note : op4 already verified within main loop */
473
-
474
- /* finish bitStreams one by one */
475
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
476
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
477
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
478
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
291
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
292
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
293
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
294
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
479
295
 
480
296
  /* check */
481
297
  { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
@@ -493,7 +309,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
493
309
  const HUF_DTable *DTable);
494
310
  #if DYNAMIC_BMI2
495
311
 
496
- #define X(fn) \
312
+ #define HUF_DGEN(fn) \
497
313
  \
498
314
  static size_t fn##_default( \
499
315
  void* dst, size_t dstSize, \
@@ -522,7 +338,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
522
338
 
523
339
  #else
524
340
 
525
- #define X(fn) \
341
+ #define HUF_DGEN(fn) \
526
342
  static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
527
343
  size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
528
344
  { \
@@ -532,112 +348,114 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
532
348
 
533
349
  #endif
534
350
 
535
- X(HUF_decompress1X2_usingDTable_internal)
536
- X(HUF_decompress4X2_usingDTable_internal)
537
- X(HUF_decompress1X4_usingDTable_internal)
538
- X(HUF_decompress4X4_usingDTable_internal)
351
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
539
353
 
540
- #undef X
541
354
 
542
355
 
543
- size_t HUF_decompress1X2_usingDTable(
356
+ size_t HUF_decompress1X1_usingDTable(
544
357
  void* dst, size_t dstSize,
545
358
  const void* cSrc, size_t cSrcSize,
546
359
  const HUF_DTable* DTable)
547
360
  {
548
361
  DTableDesc dtd = HUF_getDTableDesc(DTable);
549
362
  if (dtd.tableType != 0) return ERROR(GENERIC);
550
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
363
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
551
364
  }
552
365
 
553
- size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
366
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
554
367
  const void* cSrc, size_t cSrcSize,
555
368
  void* workSpace, size_t wkspSize)
556
369
  {
557
370
  const BYTE* ip = (const BYTE*) cSrc;
558
371
 
559
- size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
372
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
560
373
  if (HUF_isError(hSize)) return hSize;
561
374
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
562
375
  ip += hSize; cSrcSize -= hSize;
563
376
 
564
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
377
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
565
378
  }
566
379
 
567
380
 
568
- size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
381
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
569
382
  const void* cSrc, size_t cSrcSize)
570
383
  {
571
384
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
572
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
385
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
573
386
  workSpace, sizeof(workSpace));
574
387
  }
575
388
 
576
- size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
389
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
577
390
  {
578
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
579
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
391
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
580
393
  }
581
394
 
582
- size_t HUF_decompress4X2_usingDTable(
395
+ size_t HUF_decompress4X1_usingDTable(
583
396
  void* dst, size_t dstSize,
584
397
  const void* cSrc, size_t cSrcSize,
585
398
  const HUF_DTable* DTable)
586
399
  {
587
400
  DTableDesc dtd = HUF_getDTableDesc(DTable);
588
401
  if (dtd.tableType != 0) return ERROR(GENERIC);
589
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
402
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
590
403
  }
591
404
 
592
- static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
405
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
593
406
  const void* cSrc, size_t cSrcSize,
594
407
  void* workSpace, size_t wkspSize, int bmi2)
595
408
  {
596
409
  const BYTE* ip = (const BYTE*) cSrc;
597
410
 
598
- size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
411
+ size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
599
412
  workSpace, wkspSize);
600
413
  if (HUF_isError(hSize)) return hSize;
601
414
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
602
415
  ip += hSize; cSrcSize -= hSize;
603
416
 
604
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
417
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
605
418
  }
606
419
 
607
- size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
420
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
608
421
  const void* cSrc, size_t cSrcSize,
609
422
  void* workSpace, size_t wkspSize)
610
423
  {
611
- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
424
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
612
425
  }
613
426
 
614
427
 
615
- size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
428
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
616
429
  {
617
430
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
618
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
431
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
619
432
  workSpace, sizeof(workSpace));
620
433
  }
621
- size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
434
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
622
435
  {
623
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
624
- return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
436
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
437
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
625
438
  }
626
439
 
627
440
 
628
441
  /* *************************/
629
442
  /* double-symbols decoding */
630
443
  /* *************************/
444
+
445
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
631
446
  typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
632
449
 
633
- /* HUF_fillDTableX4Level2() :
450
+
451
+ /* HUF_fillDTableX2Level2() :
634
452
  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
635
- static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
453
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
636
454
  const U32* rankValOrigin, const int minWeight,
637
455
  const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
638
456
  U32 nbBitsBaseline, U16 baseSeq)
639
457
  {
640
- HUF_DEltX4 DElt;
458
+ HUF_DEltX2 DElt;
641
459
  U32 rankVal[HUF_TABLELOG_MAX + 1];
642
460
 
643
461
  /* get pre-calculated rankVal */
@@ -672,10 +490,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
672
490
  } }
673
491
  }
674
492
 
675
- typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
676
- typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
677
493
 
678
- static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
494
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
679
495
  const sortedSymbol_t* sortedList, const U32 sortedListSize,
680
496
  const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
681
497
  const U32 nbBitsBaseline)
@@ -700,12 +516,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
700
516
  int minWeight = nbBits + scaleLog;
701
517
  if (minWeight < 1) minWeight = 1;
702
518
  sortedRank = rankStart[minWeight];
703
- HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
519
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
704
520
  rankValOrigin[nbBits], minWeight,
705
521
  sortedList+sortedRank, sortedListSize-sortedRank,
706
522
  nbBitsBaseline, symbol);
707
523
  } else {
708
- HUF_DEltX4 DElt;
524
+ HUF_DEltX2 DElt;
709
525
  MEM_writeLE16(&(DElt.sequence), symbol);
710
526
  DElt.nbBits = (BYTE)(nbBits);
711
527
  DElt.length = 1;
@@ -717,7 +533,7 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
717
533
  }
718
534
  }
719
535
 
720
- size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
536
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src,
721
537
  size_t srcSize, void* workSpace,
722
538
  size_t wkspSize)
723
539
  {
@@ -726,7 +542,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
726
542
  U32 const maxTableLog = dtd.maxTableLog;
727
543
  size_t iSize;
728
544
  void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
729
- HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
545
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
730
546
  U32 *rankStart;
731
547
 
732
548
  rankValCol_t* rankVal;
@@ -752,7 +568,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
752
568
  rankStart = rankStart0 + 1;
753
569
  memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
754
570
 
755
- HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
571
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
756
572
  if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
757
573
  /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
758
574
 
@@ -806,7 +622,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
806
622
  rankValPtr[w] = rankVal0[w] >> consumed;
807
623
  } } } }
808
624
 
809
- HUF_fillDTableX4(dt, maxTableLog,
625
+ HUF_fillDTableX2(dt, maxTableLog,
810
626
  sortedSymbol, sizeOfSort,
811
627
  rankStart0, rankVal, maxW,
812
628
  tableLog+1);
@@ -817,112 +633,296 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
817
633
  return iSize;
818
634
  }
819
635
 
820
- size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
636
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
821
637
  {
822
638
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
823
- return HUF_readDTableX4_wksp(DTable, src, srcSize,
639
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
824
640
  workSpace, sizeof(workSpace));
825
641
  }
826
642
 
827
- size_t HUF_decompress1X4_usingDTable(
643
+
644
+ FORCE_INLINE_TEMPLATE U32
645
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
646
+ {
647
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
648
+ memcpy(op, dt+val, 2);
649
+ BIT_skipBits(DStream, dt[val].nbBits);
650
+ return dt[val].length;
651
+ }
652
+
653
+ FORCE_INLINE_TEMPLATE U32
654
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
655
+ {
656
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
657
+ memcpy(op, dt+val, 1);
658
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
659
+ else {
660
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
661
+ BIT_skipBits(DStream, dt[val].nbBits);
662
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
663
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
664
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
665
+ } }
666
+ return 1;
667
+ }
668
+
669
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
670
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
671
+
672
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
673
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
674
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
675
+
676
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
677
+ if (MEM_64bits()) \
678
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
679
+
680
+ HINT_INLINE size_t
681
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
682
+ const HUF_DEltX2* const dt, const U32 dtLog)
683
+ {
684
+ BYTE* const pStart = p;
685
+
686
+ /* up to 8 symbols at a time */
687
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
688
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
689
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
690
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
691
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
692
+ }
693
+
694
+ /* closer to end : up to 2 symbols at a time */
695
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
696
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
697
+
698
+ while (p <= pEnd-2)
699
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
700
+
701
+ if (p < pEnd)
702
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
703
+
704
+ return p-pStart;
705
+ }
706
+
707
+ FORCE_INLINE_TEMPLATE size_t
708
+ HUF_decompress1X2_usingDTable_internal_body(
709
+ void* dst, size_t dstSize,
710
+ const void* cSrc, size_t cSrcSize,
711
+ const HUF_DTable* DTable)
712
+ {
713
+ BIT_DStream_t bitD;
714
+
715
+ /* Init */
716
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
717
+
718
+ /* decode */
719
+ { BYTE* const ostart = (BYTE*) dst;
720
+ BYTE* const oend = ostart + dstSize;
721
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
722
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
723
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
724
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
725
+ }
726
+
727
+ /* check */
728
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
729
+
730
+ /* decoded size */
731
+ return dstSize;
732
+ }
733
+
734
+
735
+ FORCE_INLINE_TEMPLATE size_t
736
+ HUF_decompress4X2_usingDTable_internal_body(
737
+ void* dst, size_t dstSize,
738
+ const void* cSrc, size_t cSrcSize,
739
+ const HUF_DTable* DTable)
740
+ {
741
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
742
+
743
+ { const BYTE* const istart = (const BYTE*) cSrc;
744
+ BYTE* const ostart = (BYTE*) dst;
745
+ BYTE* const oend = ostart + dstSize;
746
+ const void* const dtPtr = DTable+1;
747
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
748
+
749
+ /* Init */
750
+ BIT_DStream_t bitD1;
751
+ BIT_DStream_t bitD2;
752
+ BIT_DStream_t bitD3;
753
+ BIT_DStream_t bitD4;
754
+ size_t const length1 = MEM_readLE16(istart);
755
+ size_t const length2 = MEM_readLE16(istart+2);
756
+ size_t const length3 = MEM_readLE16(istart+4);
757
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
758
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
759
+ const BYTE* const istart2 = istart1 + length1;
760
+ const BYTE* const istart3 = istart2 + length2;
761
+ const BYTE* const istart4 = istart3 + length3;
762
+ size_t const segmentSize = (dstSize+3) / 4;
763
+ BYTE* const opStart2 = ostart + segmentSize;
764
+ BYTE* const opStart3 = opStart2 + segmentSize;
765
+ BYTE* const opStart4 = opStart3 + segmentSize;
766
+ BYTE* op1 = ostart;
767
+ BYTE* op2 = opStart2;
768
+ BYTE* op3 = opStart3;
769
+ BYTE* op4 = opStart4;
770
+ U32 endSignal;
771
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
772
+ U32 const dtLog = dtd.tableLog;
773
+
774
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
775
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
776
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
777
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
778
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
779
+
780
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
781
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
782
+ for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
783
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
784
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
785
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
786
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
787
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
788
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
789
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
790
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
791
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
792
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
793
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
794
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
795
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
796
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
797
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
798
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
799
+
800
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
801
+ }
802
+
803
+ /* check corruption */
804
+ if (op1 > opStart2) return ERROR(corruption_detected);
805
+ if (op2 > opStart3) return ERROR(corruption_detected);
806
+ if (op3 > opStart4) return ERROR(corruption_detected);
807
+ /* note : op4 already verified within main loop */
808
+
809
+ /* finish bitStreams one by one */
810
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
811
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
812
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
813
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
814
+
815
+ /* check */
816
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
817
+ if (!endCheck) return ERROR(corruption_detected); }
818
+
819
+ /* decoded size */
820
+ return dstSize;
821
+ }
822
+ }
823
+
824
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
825
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
826
+
827
+ size_t HUF_decompress1X2_usingDTable(
828
828
  void* dst, size_t dstSize,
829
829
  const void* cSrc, size_t cSrcSize,
830
830
  const HUF_DTable* DTable)
831
831
  {
832
832
  DTableDesc dtd = HUF_getDTableDesc(DTable);
833
833
  if (dtd.tableType != 1) return ERROR(GENERIC);
834
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
834
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835
835
  }
836
836
 
837
- size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
837
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838
838
  const void* cSrc, size_t cSrcSize,
839
839
  void* workSpace, size_t wkspSize)
840
840
  {
841
841
  const BYTE* ip = (const BYTE*) cSrc;
842
842
 
843
- size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
843
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844
844
  workSpace, wkspSize);
845
845
  if (HUF_isError(hSize)) return hSize;
846
846
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847
847
  ip += hSize; cSrcSize -= hSize;
848
848
 
849
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
849
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850
850
  }
851
851
 
852
852
 
853
- size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
853
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854
854
  const void* cSrc, size_t cSrcSize)
855
855
  {
856
856
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857
- return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
857
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858
858
  workSpace, sizeof(workSpace));
859
859
  }
860
860
 
861
- size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
861
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862
862
  {
863
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
864
- return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
863
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
864
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865
865
  }
866
866
 
867
- size_t HUF_decompress4X4_usingDTable(
867
+ size_t HUF_decompress4X2_usingDTable(
868
868
  void* dst, size_t dstSize,
869
869
  const void* cSrc, size_t cSrcSize,
870
870
  const HUF_DTable* DTable)
871
871
  {
872
872
  DTableDesc dtd = HUF_getDTableDesc(DTable);
873
873
  if (dtd.tableType != 1) return ERROR(GENERIC);
874
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
874
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875
875
  }
876
876
 
877
- static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
877
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878
878
  const void* cSrc, size_t cSrcSize,
879
879
  void* workSpace, size_t wkspSize, int bmi2)
880
880
  {
881
881
  const BYTE* ip = (const BYTE*) cSrc;
882
882
 
883
- size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
883
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
884
884
  workSpace, wkspSize);
885
885
  if (HUF_isError(hSize)) return hSize;
886
886
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887
887
  ip += hSize; cSrcSize -= hSize;
888
888
 
889
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
889
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890
890
  }
891
891
 
892
- size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
892
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893
893
  const void* cSrc, size_t cSrcSize,
894
894
  void* workSpace, size_t wkspSize)
895
895
  {
896
- return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
896
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897
897
  }
898
898
 
899
899
 
900
- size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
900
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901
901
  const void* cSrc, size_t cSrcSize)
902
902
  {
903
903
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904
- return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
904
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905
905
  workSpace, sizeof(workSpace));
906
906
  }
907
907
 
908
- size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
908
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909
909
  {
910
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
911
- return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
910
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912
912
  }
913
913
 
914
914
 
915
- /* ********************************/
916
- /* Generic decompression selector */
917
- /* ********************************/
915
+ /* ***********************************/
916
+ /* Universal decompression selectors */
917
+ /* ***********************************/
918
918
 
919
919
  size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920
920
  const void* cSrc, size_t cSrcSize,
921
921
  const HUF_DTable* DTable)
922
922
  {
923
923
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
924
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
924
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926
926
  }
927
927
 
928
928
  size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -930,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
930
930
  const HUF_DTable* DTable)
931
931
  {
932
932
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
933
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
933
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935
935
  }
936
936
 
937
937
 
@@ -960,12 +960,12 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
960
960
  /** HUF_selectDecoder() :
961
961
  * Tells which decoder is likely to decode faster,
962
962
  * based on a set of pre-computed metrics.
963
- * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
963
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964
964
  * Assumption : 0 < dstSize <= 128 KB */
965
965
  U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966
966
  {
967
967
  assert(dstSize > 0);
968
- assert(dstSize <= 128 KB);
968
+ assert(dstSize <= 128*1024);
969
969
  /* decoder timing evaluation */
970
970
  { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971
971
  U32 const D256 = (U32)(dstSize >> 8);
@@ -980,7 +980,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
980
980
 
981
981
  size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982
982
  {
983
- static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
983
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984
984
 
985
985
  /* validation checks */
986
986
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -1002,8 +1002,8 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
1002
1002
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003
1003
 
1004
1004
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005
- return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006
- HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1005
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007
1007
  }
1008
1008
  }
1009
1009
 
@@ -1025,8 +1025,8 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1025
1025
  if (cSrcSize == 0) return ERROR(corruption_detected);
1026
1026
 
1027
1027
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028
- return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029
- HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1028
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030
1030
  }
1031
1031
  }
1032
1032
 
@@ -1041,9 +1041,9 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1041
1041
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042
1042
 
1043
1043
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044
- return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
1044
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045
1045
  cSrcSize, workSpace, wkspSize):
1046
- HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1046
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047
1047
  cSrcSize, workSpace, wkspSize);
1048
1048
  }
1049
1049
  }
@@ -1060,27 +1060,27 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1060
1060
  size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061
1061
  {
1062
1062
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1063
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065
1065
  }
1066
1066
 
1067
- size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1067
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068
1068
  {
1069
1069
  const BYTE* ip = (const BYTE*) cSrc;
1070
1070
 
1071
- size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1071
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072
1072
  if (HUF_isError(hSize)) return hSize;
1073
1073
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074
1074
  ip += hSize; cSrcSize -= hSize;
1075
1075
 
1076
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1076
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077
1077
  }
1078
1078
 
1079
1079
  size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080
1080
  {
1081
1081
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1082
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084
1084
  }
1085
1085
 
1086
1086
  size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
@@ -1090,7 +1090,7 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
1090
1090
  if (cSrcSize == 0) return ERROR(corruption_detected);
1091
1091
 
1092
1092
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093
- return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094
- HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1093
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095
1095
  }
1096
1096
  }