zstd-ruby 1.3.4.0 → 1.3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +56 -10
  4. data/ext/zstdruby/libzstd/README.md +4 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
  6. data/ext/zstdruby/libzstd/common/compiler.h +3 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -2
  8. data/ext/zstdruby/libzstd/common/debug.c +44 -0
  9. data/ext/zstdruby/libzstd/common/debug.h +123 -0
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
  11. data/ext/zstdruby/libzstd/common/fse.h +45 -41
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +34 -27
  14. data/ext/zstdruby/libzstd/common/pool.c +89 -32
  15. data/ext/zstdruby/libzstd/common/pool.h +29 -19
  16. data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
  19. data/ext/zstdruby/libzstd/compress/hist.c +195 -0
  20. data/ext/zstdruby/libzstd/compress/hist.h +92 -0
  21. data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
  22. data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
  23. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
  24. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
  25. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
  26. data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
  27. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
  28. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
  29. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
  30. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
  31. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
  32. data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
  34. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
  38. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  39. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
  41. data/ext/zstdruby/libzstd/zstd.h +137 -69
  42. data/lib/zstd-ruby/version.rb +1 -1
  43. metadata +7 -3
@@ -28,6 +28,13 @@ size_t ZSTD_compressBlock_btultra(
28
28
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
29
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
30
30
 
31
+ size_t ZSTD_compressBlock_btopt_dictMatchState(
32
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
33
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
34
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
35
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
36
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
37
+
31
38
  size_t ZSTD_compressBlock_btopt_extDict(
32
39
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
33
40
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
@@ -37,18 +37,17 @@
37
37
  #define ZSTD_RESIZE_SEQPOOL 0
38
38
 
39
39
  /* ====== Debug ====== */
40
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
40
+ #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) && !defined(_MSC_VER)
41
41
 
42
42
  # include <stdio.h>
43
43
  # include <unistd.h>
44
44
  # include <sys/times.h>
45
- # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
46
45
 
47
46
  # define DEBUG_PRINTHEX(l,p,n) { \
48
47
  unsigned debug_u; \
49
48
  for (debug_u=0; debug_u<(n); debug_u++) \
50
- DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51
- DEBUGLOGRAW(l, " \n"); \
49
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
50
+ RAWLOG(l, " \n"); \
52
51
  }
53
52
 
54
53
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -62,7 +61,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
62
61
 
63
62
  #define MUTEX_WAIT_TIME_DLEVEL 6
64
63
  #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
65
- if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
64
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
66
65
  unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
67
66
  ZSTD_pthread_mutex_lock(mutex); \
68
67
  { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
@@ -160,6 +159,25 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
160
159
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
161
160
  }
162
161
 
162
+
163
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
164
+ {
165
+ unsigned const maxNbBuffers = 2*nbWorkers + 3;
166
+ if (srcBufPool==NULL) return NULL;
167
+ if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
168
+ return srcBufPool;
169
+ /* need a larger buffer pool */
170
+ { ZSTD_customMem const cMem = srcBufPool->cMem;
171
+ size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
172
+ ZSTDMT_bufferPool* newBufPool;
173
+ ZSTDMT_freeBufferPool(srcBufPool);
174
+ newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
175
+ if (newBufPool==NULL) return newBufPool;
176
+ ZSTDMT_setBufferSize(newBufPool, bSize);
177
+ return newBufPool;
178
+ }
179
+ }
180
+
163
181
  /** ZSTDMT_getBuffer() :
164
182
  * assumption : bufPool must be valid
165
183
  * @return : a buffer, with start pointer and size
@@ -310,6 +328,10 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
310
328
  ZSTDMT_freeBufferPool(seqPool);
311
329
  }
312
330
 
331
+ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
332
+ {
333
+ return ZSTDMT_expandBufferPool(pool, nbWorkers);
334
+ }
313
335
 
314
336
 
315
337
  /* ===== CCtx Pool ===== */
@@ -355,6 +377,18 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
355
377
  return cctxPool;
356
378
  }
357
379
 
380
+ static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
381
+ unsigned nbWorkers)
382
+ {
383
+ if (srcPool==NULL) return NULL;
384
+ if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
385
+ /* need a larger cctx pool */
386
+ { ZSTD_customMem const cMem = srcPool->cMem;
387
+ ZSTDMT_freeCCtxPool(srcPool);
388
+ return ZSTDMT_createCCtxPool(nbWorkers, cMem);
389
+ }
390
+ }
391
+
358
392
  /* only works during initialization phase, not during compression */
359
393
  static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
360
394
  {
@@ -425,12 +459,11 @@ typedef struct {
425
459
  ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
426
460
  } serialState_t;
427
461
 
428
- static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
462
+ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
429
463
  {
430
464
  /* Adjust parameters */
431
465
  if (params.ldmParams.enableLdm) {
432
466
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
433
- params.ldmParams.windowLog = params.cParams.windowLog;
434
467
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
435
468
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
436
469
  assert(params.ldmParams.hashEveryLog < 32);
@@ -453,7 +486,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
453
486
  serialState->params.ldmParams.hashLog -
454
487
  serialState->params.ldmParams.bucketSizeLog;
455
488
  /* Size the seq pool tables */
456
- ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
489
+ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
457
490
  /* Reset the window */
458
491
  ZSTD_window_clear(&serialState->ldmState.window);
459
492
  serialState->ldmWindow = serialState->ldmState.window;
@@ -473,6 +506,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
473
506
  memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
474
507
  }
475
508
  serialState->params = params;
509
+ serialState->params.jobSize = (U32)jobSize;
476
510
  return 0;
477
511
  }
478
512
 
@@ -514,6 +548,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
514
548
  size_t error;
515
549
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
516
550
  seqStore.size == 0 && seqStore.capacity > 0);
551
+ assert(src.size <= serialState->params.jobSize);
517
552
  ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
518
553
  error = ZSTD_ldm_generateSequences(
519
554
  &serialState->ldmState, &seqStore,
@@ -602,13 +637,6 @@ void ZSTDMT_compressionJob(void* jobDescription)
602
637
  rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
603
638
  buffer_t dstBuff = job->dstBuff;
604
639
 
605
- /* Don't compute the checksum for chunks, since we compute it externally,
606
- * but write it in the header.
607
- */
608
- if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
609
- /* Don't run LDM for the chunks, since we handle it externally */
610
- jobParams.ldmParams.enableLdm = 0;
611
-
612
640
  /* ressources */
613
641
  if (cctx==NULL) {
614
642
  job->cSize = ERROR(memory_allocation);
@@ -622,10 +650,22 @@ void ZSTDMT_compressionJob(void* jobDescription)
622
650
  }
623
651
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
624
652
  }
653
+ if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) {
654
+ job->cSize = ERROR(memory_allocation);
655
+ goto _endJob;
656
+ }
657
+
658
+ /* Don't compute the checksum for chunks, since we compute it externally,
659
+ * but write it in the header.
660
+ */
661
+ if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
662
+ /* Don't run LDM for the chunks, since we handle it externally */
663
+ jobParams.ldmParams.enableLdm = 0;
664
+
625
665
 
626
666
  /* init */
627
667
  if (job->cdict) {
628
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
668
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
629
669
  assert(job->firstJob); /* only allowed for first job */
630
670
  if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
631
671
  } else { /* srcStart points at reloaded section */
@@ -637,6 +677,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
637
677
  } }
638
678
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
639
679
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
680
+ ZSTD_dtlm_fast,
640
681
  NULL, /*cdict*/
641
682
  jobParams, pledgedSrcSize);
642
683
  if (ZSTD_isError(initError)) {
@@ -745,9 +786,9 @@ struct ZSTDMT_CCtx_s {
745
786
  ZSTD_CCtx_params params;
746
787
  size_t targetSectionSize;
747
788
  size_t targetPrefixSize;
748
- roundBuff_t roundBuff;
789
+ int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
749
790
  inBuff_t inBuff;
750
- int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
791
+ roundBuff_t roundBuff;
751
792
  serialState_t serial;
752
793
  unsigned singleBlockingThread;
753
794
  unsigned jobIDMask;
@@ -798,6 +839,20 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
798
839
  return jobTable;
799
840
  }
800
841
 
842
+ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
843
+ U32 nbJobs = nbWorkers + 2;
844
+ if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
845
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
846
+ mtctx->jobIDMask = 0;
847
+ mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
848
+ if (mtctx->jobs==NULL) return ERROR(memory_allocation);
849
+ assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
850
+ mtctx->jobIDMask = nbJobs - 1;
851
+ }
852
+ return 0;
853
+ }
854
+
855
+
801
856
  /* ZSTDMT_CCtxParam_setNbWorkers():
802
857
  * Internal use only */
803
858
  size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
@@ -924,6 +979,8 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
924
979
  if ( (value > 0) /* value==0 => automatic job size */
925
980
  & (value < ZSTDMT_JOBSIZE_MIN) )
926
981
  value = ZSTDMT_JOBSIZE_MIN;
982
+ if (value > ZSTDMT_JOBSIZE_MAX)
983
+ value = ZSTDMT_JOBSIZE_MAX;
927
984
  params->jobSize = value;
928
985
  return value;
929
986
  case ZSTDMT_p_overlapSectionLog :
@@ -950,6 +1007,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
950
1007
  }
951
1008
  }
952
1009
 
1010
+ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1011
+ {
1012
+ switch (parameter) {
1013
+ case ZSTDMT_p_jobSize:
1014
+ *value = mtctx->params.jobSize;
1015
+ break;
1016
+ case ZSTDMT_p_overlapSectionLog:
1017
+ *value = mtctx->params.overlapSizeLog;
1018
+ break;
1019
+ default:
1020
+ return ERROR(parameter_unsupported);
1021
+ }
1022
+ return 0;
1023
+ }
1024
+
953
1025
  /* Sets parameters relevant to the compression job,
954
1026
  * initializing others to default values. */
955
1027
  static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
@@ -960,11 +1032,28 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
960
1032
  jobParams.cParams = params.cParams;
961
1033
  jobParams.fParams = params.fParams;
962
1034
  jobParams.compressionLevel = params.compressionLevel;
963
- jobParams.disableLiteralCompression = params.disableLiteralCompression;
964
1035
 
965
1036
  return jobParams;
966
1037
  }
967
1038
 
1039
+
1040
+ /* ZSTDMT_resize() :
1041
+ * @return : error code if fails, 0 on success */
1042
+ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1043
+ {
1044
+ if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1045
+ CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1046
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1047
+ if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1048
+ mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1049
+ if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
1050
+ mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
1051
+ if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
1052
+ ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
1053
+ return 0;
1054
+ }
1055
+
1056
+
968
1057
  /*! ZSTDMT_updateCParams_whileCompressing() :
969
1058
  * Updates only a selected set of compression parameters, to remain compatible with current frame.
970
1059
  * New parameters will be applied to next compression job. */
@@ -981,15 +1070,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
981
1070
  }
982
1071
  }
983
1072
 
984
- /* ZSTDMT_getNbWorkers():
985
- * @return nb threads currently active in mtctx.
986
- * mtctx must be valid */
987
- unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
988
- {
989
- assert(mtctx != NULL);
990
- return mtctx->params.nbWorkers;
991
- }
992
-
993
1073
  /* ZSTDMT_getFrameProgression():
994
1074
  * tells how much data has been consumed (input) and produced (output) for current frame.
995
1075
  * able to count progression inside worker threads.
@@ -1087,18 +1167,10 @@ static size_t ZSTDMT_compress_advanced_internal(
1087
1167
 
1088
1168
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1089
1169
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1090
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1170
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1091
1171
  return ERROR(memory_allocation);
1092
1172
 
1093
- if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */
1094
- U32 jobsTableSize = nbJobs;
1095
- ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
1096
- mtctx->jobIDMask = 0;
1097
- mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
1098
- if (mtctx->jobs==NULL) return ERROR(memory_allocation);
1099
- assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
1100
- mtctx->jobIDMask = jobsTableSize - 1;
1101
- }
1173
+ CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1102
1174
 
1103
1175
  { unsigned u;
1104
1176
  for (u=0; u<nbJobs; u++) {
@@ -1221,17 +1293,18 @@ size_t ZSTDMT_initCStream_internal(
1221
1293
  const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
1222
1294
  unsigned long long pledgedSrcSize)
1223
1295
  {
1224
- DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
1225
- (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
1226
- /* params are supposed to be fully validated at this point */
1296
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
1297
+ (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
1298
+
1299
+ /* params supposed partially fully validated at this point */
1227
1300
  assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1228
1301
  assert(!((dict) && (cdict))); /* either dict or cdict, not both */
1229
- assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1230
1302
 
1231
1303
  /* init */
1232
- if (params.jobSize == 0) {
1233
- params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params);
1234
- }
1304
+ if (params.nbWorkers != mtctx->params.nbWorkers)
1305
+ CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1306
+
1307
+ if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1235
1308
  if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1236
1309
 
1237
1310
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
@@ -1270,7 +1343,9 @@ size_t ZSTDMT_initCStream_internal(
1270
1343
  mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1271
1344
  DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1272
1345
  mtctx->targetSectionSize = params.jobSize;
1273
- if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
1346
+ if (mtctx->targetSectionSize == 0) {
1347
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1348
+ }
1274
1349
  if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1275
1350
  DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1276
1351
  DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
@@ -1312,7 +1387,7 @@ size_t ZSTDMT_initCStream_internal(
1312
1387
  mtctx->allJobsCompleted = 0;
1313
1388
  mtctx->consumed = 0;
1314
1389
  mtctx->produced = 0;
1315
- if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1390
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1316
1391
  return ERROR(memory_allocation);
1317
1392
  return 0;
1318
1393
  }
@@ -95,6 +95,11 @@ typedef enum {
95
95
  * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96
96
  ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
97
97
 
98
+ /* ZSTDMT_getMTCtxParameter() :
99
+ * Query the ZSTDMT_CCtx for a parameter value.
100
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
101
+ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
102
+
98
103
 
99
104
  /*! ZSTDMT_compressStream_generic() :
100
105
  * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
@@ -126,11 +131,6 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
126
131
  * New parameters will be applied to next compression job. */
127
132
  void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
128
133
 
129
- /* ZSTDMT_getNbWorkers():
130
- * @return nb threads currently active in mtctx.
131
- * mtctx must be valid */
132
- unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
133
-
134
134
  /* ZSTDMT_getFrameProgression():
135
135
  * tells how much data has been consumed (input) and produced (output) for current frame.
136
136
  * able to count progression inside worker threads.
@@ -1,6 +1,7 @@
1
1
  /* ******************************************************************
2
- Huffman decoder, part of New Generation Entropy library
3
- Copyright (C) 2013-2016, Yann Collet.
2
+ huff0 huffman decoder,
3
+ part of Finite State Entropy library
4
+ Copyright (C) 2013-present, Yann Collet.
4
5
 
5
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7
 
@@ -29,16 +30,15 @@
29
30
 
30
31
  You can contact the author at :
31
32
  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
33
  ****************************************************************** */
34
34
 
35
35
  /* **************************************************************
36
36
  * Dependencies
37
37
  ****************************************************************/
38
38
  #include <string.h> /* memcpy, memset */
39
- #include "bitstream.h" /* BIT_* */
40
39
  #include "compiler.h"
41
- #include "fse.h" /* header compression */
40
+ #include "bitstream.h" /* BIT_* */
41
+ #include "fse.h" /* to compress headers */
42
42
  #define HUF_STATIC_LINKING_ONLY
43
43
  #include "huf.h"
44
44
  #include "error_private.h"
@@ -48,7 +48,6 @@
48
48
  * Error Management
49
49
  ****************************************************************/
50
50
  #define HUF_isError ERR_isError
51
- #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52
51
  #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
53
52
 
54
53
 
@@ -75,15 +74,15 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
75
74
  /*-***************************/
76
75
  /* single-symbol decoding */
77
76
  /*-***************************/
78
- typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
77
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
79
78
 
80
- size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
79
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81
80
  {
82
81
  U32 tableLog = 0;
83
82
  U32 nbSymbols = 0;
84
83
  size_t iSize;
85
84
  void* const dtPtr = DTable + 1;
86
- HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
85
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87
86
 
88
87
  U32* rankVal;
89
88
  BYTE* huffWeight;
@@ -96,7 +95,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
96
95
 
97
96
  if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
98
97
 
99
- HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
98
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
100
99
  /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
101
100
 
102
101
  iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@@ -124,7 +123,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
124
123
  U32 const w = huffWeight[n];
125
124
  U32 const length = (1 << w) >> 1;
126
125
  U32 u;
127
- HUF_DEltX2 D;
126
+ HUF_DEltX1 D;
128
127
  D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
129
128
  for (u = rankVal[w]; u < rankVal[w] + length; u++)
130
129
  dt[u] = D;
@@ -134,17 +133,15 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
134
133
  return iSize;
135
134
  }
136
135
 
137
- size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
136
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
138
137
  {
139
138
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
140
- return HUF_readDTableX2_wksp(DTable, src, srcSize,
139
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
141
140
  workSpace, sizeof(workSpace));
142
141
  }
143
142
 
144
- typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
145
-
146
143
  FORCE_INLINE_TEMPLATE BYTE
147
- HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
144
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
148
145
  {
149
146
  size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
150
147
  BYTE const c = dt[val].byte;
@@ -152,44 +149,44 @@ HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog
152
149
  return c;
153
150
  }
154
151
 
155
- #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
156
- *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
152
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
153
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
157
154
 
158
- #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
155
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
159
156
  if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
160
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
157
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
161
158
 
162
- #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
159
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
163
160
  if (MEM_64bits()) \
164
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
161
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
165
162
 
166
163
  HINT_INLINE size_t
167
- HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
164
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168
165
  {
169
166
  BYTE* const pStart = p;
170
167
 
171
168
  /* up to 4 symbols at a time */
172
169
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
173
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
174
- HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
175
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
176
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
170
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
171
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
172
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
173
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177
174
  }
178
175
 
179
176
  /* [0-3] symbols remaining */
180
177
  if (MEM_32bits())
181
178
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
182
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
179
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
183
180
 
184
181
  /* no more data to retrieve from bitstream, no need to reload */
185
182
  while (p < pEnd)
186
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
183
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
187
184
 
188
185
  return pEnd-pStart;
189
186
  }
190
187
 
191
188
  FORCE_INLINE_TEMPLATE size_t
192
- HUF_decompress1X2_usingDTable_internal_body(
189
+ HUF_decompress1X1_usingDTable_internal_body(
193
190
  void* dst, size_t dstSize,
194
191
  const void* cSrc, size_t cSrcSize,
195
192
  const HUF_DTable* DTable)
@@ -197,14 +194,14 @@ HUF_decompress1X2_usingDTable_internal_body(
197
194
  BYTE* op = (BYTE*)dst;
198
195
  BYTE* const oend = op + dstSize;
199
196
  const void* dtPtr = DTable + 1;
200
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
197
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
201
198
  BIT_DStream_t bitD;
202
199
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
203
200
  U32 const dtLog = dtd.tableLog;
204
201
 
205
202
  CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
206
203
 
207
- HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
204
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
208
205
 
209
206
  if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210
207
 
@@ -212,7 +209,7 @@ HUF_decompress1X2_usingDTable_internal_body(
212
209
  }
213
210
 
214
211
  FORCE_INLINE_TEMPLATE size_t
215
- HUF_decompress4X2_usingDTable_internal_body(
212
+ HUF_decompress4X1_usingDTable_internal_body(
216
213
  void* dst, size_t dstSize,
217
214
  const void* cSrc, size_t cSrcSize,
218
215
  const HUF_DTable* DTable)
@@ -224,7 +221,7 @@ HUF_decompress4X2_usingDTable_internal_body(
224
221
  BYTE* const ostart = (BYTE*) dst;
225
222
  BYTE* const oend = ostart + dstSize;
226
223
  const void* const dtPtr = DTable + 1;
227
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
224
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
228
225
 
229
226
  /* Init */
230
227
  BIT_DStream_t bitD1;
@@ -260,22 +257,22 @@ HUF_decompress4X2_usingDTable_internal_body(
260
257
  /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
261
258
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
262
259
  while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
263
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
264
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
265
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
266
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
267
- HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
268
- HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
269
- HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
270
- HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
271
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
272
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
273
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
274
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
275
- HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
276
- HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
277
- HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
278
- HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
260
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
261
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
262
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
263
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
264
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
265
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
266
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
267
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
268
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
269
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
270
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
271
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
272
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
273
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
274
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
275
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
279
276
  BIT_reloadDStream(&bitD1);
280
277
  BIT_reloadDStream(&bitD2);
281
278
  BIT_reloadDStream(&bitD3);
@@ -291,191 +288,10 @@ HUF_decompress4X2_usingDTable_internal_body(
291
288
  /* note : op4 supposed already verified within main loop */
292
289
 
293
290
  /* finish bitStreams one by one */
294
- HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
295
- HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
296
- HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
297
- HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
298
-
299
- /* check */
300
- { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
301
- if (!endCheck) return ERROR(corruption_detected); }
302
-
303
- /* decoded size */
304
- return dstSize;
305
- }
306
- }
307
-
308
-
309
- FORCE_INLINE_TEMPLATE U32
310
- HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
311
- {
312
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
313
- memcpy(op, dt+val, 2);
314
- BIT_skipBits(DStream, dt[val].nbBits);
315
- return dt[val].length;
316
- }
317
-
318
- FORCE_INLINE_TEMPLATE U32
319
- HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
320
- {
321
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
322
- memcpy(op, dt+val, 1);
323
- if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
324
- else {
325
- if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
326
- BIT_skipBits(DStream, dt[val].nbBits);
327
- if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
328
- /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
329
- DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
330
- } }
331
- return 1;
332
- }
333
-
334
- #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
335
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
336
-
337
- #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
338
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
339
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
340
-
341
- #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
342
- if (MEM_64bits()) \
343
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
344
-
345
- HINT_INLINE size_t
346
- HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
347
- const HUF_DEltX4* const dt, const U32 dtLog)
348
- {
349
- BYTE* const pStart = p;
350
-
351
- /* up to 8 symbols at a time */
352
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
353
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
354
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
355
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
356
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
357
- }
358
-
359
- /* closer to end : up to 2 symbols at a time */
360
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
361
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
362
-
363
- while (p <= pEnd-2)
364
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
365
-
366
- if (p < pEnd)
367
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
368
-
369
- return p-pStart;
370
- }
371
-
372
- FORCE_INLINE_TEMPLATE size_t
373
- HUF_decompress1X4_usingDTable_internal_body(
374
- void* dst, size_t dstSize,
375
- const void* cSrc, size_t cSrcSize,
376
- const HUF_DTable* DTable)
377
- {
378
- BIT_DStream_t bitD;
379
-
380
- /* Init */
381
- CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
382
-
383
- /* decode */
384
- { BYTE* const ostart = (BYTE*) dst;
385
- BYTE* const oend = ostart + dstSize;
386
- const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
387
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
388
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
389
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
390
- }
391
-
392
- /* check */
393
- if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
394
-
395
- /* decoded size */
396
- return dstSize;
397
- }
398
-
399
-
400
- FORCE_INLINE_TEMPLATE size_t
401
- HUF_decompress4X4_usingDTable_internal_body(
402
- void* dst, size_t dstSize,
403
- const void* cSrc, size_t cSrcSize,
404
- const HUF_DTable* DTable)
405
- {
406
- if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
407
-
408
- { const BYTE* const istart = (const BYTE*) cSrc;
409
- BYTE* const ostart = (BYTE*) dst;
410
- BYTE* const oend = ostart + dstSize;
411
- const void* const dtPtr = DTable+1;
412
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
413
-
414
- /* Init */
415
- BIT_DStream_t bitD1;
416
- BIT_DStream_t bitD2;
417
- BIT_DStream_t bitD3;
418
- BIT_DStream_t bitD4;
419
- size_t const length1 = MEM_readLE16(istart);
420
- size_t const length2 = MEM_readLE16(istart+2);
421
- size_t const length3 = MEM_readLE16(istart+4);
422
- size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
423
- const BYTE* const istart1 = istart + 6; /* jumpTable */
424
- const BYTE* const istart2 = istart1 + length1;
425
- const BYTE* const istart3 = istart2 + length2;
426
- const BYTE* const istart4 = istart3 + length3;
427
- size_t const segmentSize = (dstSize+3) / 4;
428
- BYTE* const opStart2 = ostart + segmentSize;
429
- BYTE* const opStart3 = opStart2 + segmentSize;
430
- BYTE* const opStart4 = opStart3 + segmentSize;
431
- BYTE* op1 = ostart;
432
- BYTE* op2 = opStart2;
433
- BYTE* op3 = opStart3;
434
- BYTE* op4 = opStart4;
435
- U32 endSignal;
436
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
437
- U32 const dtLog = dtd.tableLog;
438
-
439
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
440
- CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
441
- CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
442
- CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
443
- CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
444
-
445
- /* 16-32 symbols per loop (4-8 symbols per stream) */
446
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
447
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
448
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
449
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
450
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
451
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
452
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
453
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
454
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
455
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
456
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
457
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
458
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
459
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
460
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
461
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
462
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
463
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
464
-
465
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
466
- }
467
-
468
- /* check corruption */
469
- if (op1 > opStart2) return ERROR(corruption_detected);
470
- if (op2 > opStart3) return ERROR(corruption_detected);
471
- if (op3 > opStart4) return ERROR(corruption_detected);
472
- /* note : op4 already verified within main loop */
473
-
474
- /* finish bitStreams one by one */
475
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
476
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
477
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
478
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
291
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
292
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
293
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
294
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
479
295
 
480
296
  /* check */
481
297
  { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
@@ -493,7 +309,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
493
309
  const HUF_DTable *DTable);
494
310
  #if DYNAMIC_BMI2
495
311
 
496
- #define X(fn) \
312
+ #define HUF_DGEN(fn) \
497
313
  \
498
314
  static size_t fn##_default( \
499
315
  void* dst, size_t dstSize, \
@@ -522,7 +338,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
522
338
 
523
339
  #else
524
340
 
525
- #define X(fn) \
341
+ #define HUF_DGEN(fn) \
526
342
  static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
527
343
  size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
528
344
  { \
@@ -532,112 +348,114 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
532
348
 
533
349
  #endif
534
350
 
535
- X(HUF_decompress1X2_usingDTable_internal)
536
- X(HUF_decompress4X2_usingDTable_internal)
537
- X(HUF_decompress1X4_usingDTable_internal)
538
- X(HUF_decompress4X4_usingDTable_internal)
351
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
539
353
 
540
- #undef X
541
354
 
542
355
 
543
- size_t HUF_decompress1X2_usingDTable(
356
+ size_t HUF_decompress1X1_usingDTable(
544
357
  void* dst, size_t dstSize,
545
358
  const void* cSrc, size_t cSrcSize,
546
359
  const HUF_DTable* DTable)
547
360
  {
548
361
  DTableDesc dtd = HUF_getDTableDesc(DTable);
549
362
  if (dtd.tableType != 0) return ERROR(GENERIC);
550
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
363
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
551
364
  }
552
365
 
553
- size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
366
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
554
367
  const void* cSrc, size_t cSrcSize,
555
368
  void* workSpace, size_t wkspSize)
556
369
  {
557
370
  const BYTE* ip = (const BYTE*) cSrc;
558
371
 
559
- size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
372
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
560
373
  if (HUF_isError(hSize)) return hSize;
561
374
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
562
375
  ip += hSize; cSrcSize -= hSize;
563
376
 
564
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
377
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
565
378
  }
566
379
 
567
380
 
568
- size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
381
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
569
382
  const void* cSrc, size_t cSrcSize)
570
383
  {
571
384
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
572
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
385
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
573
386
  workSpace, sizeof(workSpace));
574
387
  }
575
388
 
576
- size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
389
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
577
390
  {
578
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
579
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
391
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
580
393
  }
581
394
 
582
- size_t HUF_decompress4X2_usingDTable(
395
+ size_t HUF_decompress4X1_usingDTable(
583
396
  void* dst, size_t dstSize,
584
397
  const void* cSrc, size_t cSrcSize,
585
398
  const HUF_DTable* DTable)
586
399
  {
587
400
  DTableDesc dtd = HUF_getDTableDesc(DTable);
588
401
  if (dtd.tableType != 0) return ERROR(GENERIC);
589
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
402
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
590
403
  }
591
404
 
592
- static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
405
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
593
406
  const void* cSrc, size_t cSrcSize,
594
407
  void* workSpace, size_t wkspSize, int bmi2)
595
408
  {
596
409
  const BYTE* ip = (const BYTE*) cSrc;
597
410
 
598
- size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
411
+ size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
599
412
  workSpace, wkspSize);
600
413
  if (HUF_isError(hSize)) return hSize;
601
414
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
602
415
  ip += hSize; cSrcSize -= hSize;
603
416
 
604
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
417
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
605
418
  }
606
419
 
607
- size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
420
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
608
421
  const void* cSrc, size_t cSrcSize,
609
422
  void* workSpace, size_t wkspSize)
610
423
  {
611
- return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
424
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
612
425
  }
613
426
 
614
427
 
615
- size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
428
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
616
429
  {
617
430
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
618
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
431
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
619
432
  workSpace, sizeof(workSpace));
620
433
  }
621
- size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
434
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
622
435
  {
623
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
624
- return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
436
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
437
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
625
438
  }
626
439
 
627
440
 
628
441
  /* *************************/
629
442
  /* double-symbols decoding */
630
443
  /* *************************/
444
+
445
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
631
446
  typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
632
449
 
633
- /* HUF_fillDTableX4Level2() :
450
+
451
+ /* HUF_fillDTableX2Level2() :
634
452
  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
635
- static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
453
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
636
454
  const U32* rankValOrigin, const int minWeight,
637
455
  const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
638
456
  U32 nbBitsBaseline, U16 baseSeq)
639
457
  {
640
- HUF_DEltX4 DElt;
458
+ HUF_DEltX2 DElt;
641
459
  U32 rankVal[HUF_TABLELOG_MAX + 1];
642
460
 
643
461
  /* get pre-calculated rankVal */
@@ -672,10 +490,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
672
490
  } }
673
491
  }
674
492
 
675
- typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
676
- typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
677
493
 
678
- static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
494
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
679
495
  const sortedSymbol_t* sortedList, const U32 sortedListSize,
680
496
  const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
681
497
  const U32 nbBitsBaseline)
@@ -700,12 +516,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
700
516
  int minWeight = nbBits + scaleLog;
701
517
  if (minWeight < 1) minWeight = 1;
702
518
  sortedRank = rankStart[minWeight];
703
- HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
519
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
704
520
  rankValOrigin[nbBits], minWeight,
705
521
  sortedList+sortedRank, sortedListSize-sortedRank,
706
522
  nbBitsBaseline, symbol);
707
523
  } else {
708
- HUF_DEltX4 DElt;
524
+ HUF_DEltX2 DElt;
709
525
  MEM_writeLE16(&(DElt.sequence), symbol);
710
526
  DElt.nbBits = (BYTE)(nbBits);
711
527
  DElt.length = 1;
@@ -717,7 +533,7 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
717
533
  }
718
534
  }
719
535
 
720
- size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
536
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src,
721
537
  size_t srcSize, void* workSpace,
722
538
  size_t wkspSize)
723
539
  {
@@ -726,7 +542,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
726
542
  U32 const maxTableLog = dtd.maxTableLog;
727
543
  size_t iSize;
728
544
  void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
729
- HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
545
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
730
546
  U32 *rankStart;
731
547
 
732
548
  rankValCol_t* rankVal;
@@ -752,7 +568,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
752
568
  rankStart = rankStart0 + 1;
753
569
  memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
754
570
 
755
- HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
571
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
756
572
  if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
757
573
  /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
758
574
 
@@ -806,7 +622,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
806
622
  rankValPtr[w] = rankVal0[w] >> consumed;
807
623
  } } } }
808
624
 
809
- HUF_fillDTableX4(dt, maxTableLog,
625
+ HUF_fillDTableX2(dt, maxTableLog,
810
626
  sortedSymbol, sizeOfSort,
811
627
  rankStart0, rankVal, maxW,
812
628
  tableLog+1);
@@ -817,112 +633,296 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
817
633
  return iSize;
818
634
  }
819
635
 
820
- size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
636
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
821
637
  {
822
638
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
823
- return HUF_readDTableX4_wksp(DTable, src, srcSize,
639
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
824
640
  workSpace, sizeof(workSpace));
825
641
  }
826
642
 
827
- size_t HUF_decompress1X4_usingDTable(
643
+
644
+ FORCE_INLINE_TEMPLATE U32
645
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
646
+ {
647
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
648
+ memcpy(op, dt+val, 2);
649
+ BIT_skipBits(DStream, dt[val].nbBits);
650
+ return dt[val].length;
651
+ }
652
+
653
+ FORCE_INLINE_TEMPLATE U32
654
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
655
+ {
656
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
657
+ memcpy(op, dt+val, 1);
658
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
659
+ else {
660
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
661
+ BIT_skipBits(DStream, dt[val].nbBits);
662
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
663
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
664
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
665
+ } }
666
+ return 1;
667
+ }
668
+
669
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
670
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
671
+
672
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
673
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
674
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
675
+
676
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
677
+ if (MEM_64bits()) \
678
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
679
+
680
+ HINT_INLINE size_t
681
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
682
+ const HUF_DEltX2* const dt, const U32 dtLog)
683
+ {
684
+ BYTE* const pStart = p;
685
+
686
+ /* up to 8 symbols at a time */
687
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
688
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
689
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
690
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
691
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
692
+ }
693
+
694
+ /* closer to end : up to 2 symbols at a time */
695
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
696
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
697
+
698
+ while (p <= pEnd-2)
699
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
700
+
701
+ if (p < pEnd)
702
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
703
+
704
+ return p-pStart;
705
+ }
706
+
707
+ FORCE_INLINE_TEMPLATE size_t
708
+ HUF_decompress1X2_usingDTable_internal_body(
709
+ void* dst, size_t dstSize,
710
+ const void* cSrc, size_t cSrcSize,
711
+ const HUF_DTable* DTable)
712
+ {
713
+ BIT_DStream_t bitD;
714
+
715
+ /* Init */
716
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
717
+
718
+ /* decode */
719
+ { BYTE* const ostart = (BYTE*) dst;
720
+ BYTE* const oend = ostart + dstSize;
721
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
722
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
723
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
724
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
725
+ }
726
+
727
+ /* check */
728
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
729
+
730
+ /* decoded size */
731
+ return dstSize;
732
+ }
733
+
734
+
735
+ FORCE_INLINE_TEMPLATE size_t
736
+ HUF_decompress4X2_usingDTable_internal_body(
737
+ void* dst, size_t dstSize,
738
+ const void* cSrc, size_t cSrcSize,
739
+ const HUF_DTable* DTable)
740
+ {
741
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
742
+
743
+ { const BYTE* const istart = (const BYTE*) cSrc;
744
+ BYTE* const ostart = (BYTE*) dst;
745
+ BYTE* const oend = ostart + dstSize;
746
+ const void* const dtPtr = DTable+1;
747
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
748
+
749
+ /* Init */
750
+ BIT_DStream_t bitD1;
751
+ BIT_DStream_t bitD2;
752
+ BIT_DStream_t bitD3;
753
+ BIT_DStream_t bitD4;
754
+ size_t const length1 = MEM_readLE16(istart);
755
+ size_t const length2 = MEM_readLE16(istart+2);
756
+ size_t const length3 = MEM_readLE16(istart+4);
757
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
758
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
759
+ const BYTE* const istart2 = istart1 + length1;
760
+ const BYTE* const istart3 = istart2 + length2;
761
+ const BYTE* const istart4 = istart3 + length3;
762
+ size_t const segmentSize = (dstSize+3) / 4;
763
+ BYTE* const opStart2 = ostart + segmentSize;
764
+ BYTE* const opStart3 = opStart2 + segmentSize;
765
+ BYTE* const opStart4 = opStart3 + segmentSize;
766
+ BYTE* op1 = ostart;
767
+ BYTE* op2 = opStart2;
768
+ BYTE* op3 = opStart3;
769
+ BYTE* op4 = opStart4;
770
+ U32 endSignal;
771
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
772
+ U32 const dtLog = dtd.tableLog;
773
+
774
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
775
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
776
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
777
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
778
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
779
+
780
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
781
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
782
+ for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
783
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
784
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
785
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
786
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
787
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
788
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
789
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
790
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
791
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
792
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
793
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
794
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
795
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
796
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
797
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
798
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
799
+
800
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
801
+ }
802
+
803
+ /* check corruption */
804
+ if (op1 > opStart2) return ERROR(corruption_detected);
805
+ if (op2 > opStart3) return ERROR(corruption_detected);
806
+ if (op3 > opStart4) return ERROR(corruption_detected);
807
+ /* note : op4 already verified within main loop */
808
+
809
+ /* finish bitStreams one by one */
810
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
811
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
812
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
813
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
814
+
815
+ /* check */
816
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
817
+ if (!endCheck) return ERROR(corruption_detected); }
818
+
819
+ /* decoded size */
820
+ return dstSize;
821
+ }
822
+ }
823
+
824
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
825
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
826
+
827
+ size_t HUF_decompress1X2_usingDTable(
828
828
  void* dst, size_t dstSize,
829
829
  const void* cSrc, size_t cSrcSize,
830
830
  const HUF_DTable* DTable)
831
831
  {
832
832
  DTableDesc dtd = HUF_getDTableDesc(DTable);
833
833
  if (dtd.tableType != 1) return ERROR(GENERIC);
834
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
834
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835
835
  }
836
836
 
837
- size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
837
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838
838
  const void* cSrc, size_t cSrcSize,
839
839
  void* workSpace, size_t wkspSize)
840
840
  {
841
841
  const BYTE* ip = (const BYTE*) cSrc;
842
842
 
843
- size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
843
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844
844
  workSpace, wkspSize);
845
845
  if (HUF_isError(hSize)) return hSize;
846
846
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847
847
  ip += hSize; cSrcSize -= hSize;
848
848
 
849
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
849
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850
850
  }
851
851
 
852
852
 
853
- size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
853
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854
854
  const void* cSrc, size_t cSrcSize)
855
855
  {
856
856
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857
- return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
857
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858
858
  workSpace, sizeof(workSpace));
859
859
  }
860
860
 
861
- size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
861
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862
862
  {
863
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
864
- return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
863
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
864
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865
865
  }
866
866
 
867
- size_t HUF_decompress4X4_usingDTable(
867
+ size_t HUF_decompress4X2_usingDTable(
868
868
  void* dst, size_t dstSize,
869
869
  const void* cSrc, size_t cSrcSize,
870
870
  const HUF_DTable* DTable)
871
871
  {
872
872
  DTableDesc dtd = HUF_getDTableDesc(DTable);
873
873
  if (dtd.tableType != 1) return ERROR(GENERIC);
874
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
874
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875
875
  }
876
876
 
877
- static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
877
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878
878
  const void* cSrc, size_t cSrcSize,
879
879
  void* workSpace, size_t wkspSize, int bmi2)
880
880
  {
881
881
  const BYTE* ip = (const BYTE*) cSrc;
882
882
 
883
- size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
883
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
884
884
  workSpace, wkspSize);
885
885
  if (HUF_isError(hSize)) return hSize;
886
886
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887
887
  ip += hSize; cSrcSize -= hSize;
888
888
 
889
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
889
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890
890
  }
891
891
 
892
- size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
892
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893
893
  const void* cSrc, size_t cSrcSize,
894
894
  void* workSpace, size_t wkspSize)
895
895
  {
896
- return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
896
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897
897
  }
898
898
 
899
899
 
900
- size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
900
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901
901
  const void* cSrc, size_t cSrcSize)
902
902
  {
903
903
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904
- return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
904
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905
905
  workSpace, sizeof(workSpace));
906
906
  }
907
907
 
908
- size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
908
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909
909
  {
910
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
911
- return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
910
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912
912
  }
913
913
 
914
914
 
915
- /* ********************************/
916
- /* Generic decompression selector */
917
- /* ********************************/
915
+ /* ***********************************/
916
+ /* Universal decompression selectors */
917
+ /* ***********************************/
918
918
 
919
919
  size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920
920
  const void* cSrc, size_t cSrcSize,
921
921
  const HUF_DTable* DTable)
922
922
  {
923
923
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
924
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
924
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926
926
  }
927
927
 
928
928
  size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -930,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
930
930
  const HUF_DTable* DTable)
931
931
  {
932
932
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
933
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
933
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935
935
  }
936
936
 
937
937
 
@@ -960,12 +960,12 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
960
960
  /** HUF_selectDecoder() :
961
961
  * Tells which decoder is likely to decode faster,
962
962
  * based on a set of pre-computed metrics.
963
- * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
963
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964
964
  * Assumption : 0 < dstSize <= 128 KB */
965
965
  U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966
966
  {
967
967
  assert(dstSize > 0);
968
- assert(dstSize <= 128 KB);
968
+ assert(dstSize <= 128*1024);
969
969
  /* decoder timing evaluation */
970
970
  { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971
971
  U32 const D256 = (U32)(dstSize >> 8);
@@ -980,7 +980,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
980
980
 
981
981
  size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982
982
  {
983
- static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
983
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984
984
 
985
985
  /* validation checks */
986
986
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -1002,8 +1002,8 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
1002
1002
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003
1003
 
1004
1004
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005
- return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006
- HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1005
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007
1007
  }
1008
1008
  }
1009
1009
 
@@ -1025,8 +1025,8 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1025
1025
  if (cSrcSize == 0) return ERROR(corruption_detected);
1026
1026
 
1027
1027
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028
- return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029
- HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1028
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030
1030
  }
1031
1031
  }
1032
1032
 
@@ -1041,9 +1041,9 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1041
1041
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042
1042
 
1043
1043
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044
- return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
1044
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045
1045
  cSrcSize, workSpace, wkspSize):
1046
- HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1046
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047
1047
  cSrcSize, workSpace, wkspSize);
1048
1048
  }
1049
1049
  }
@@ -1060,27 +1060,27 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1060
1060
  size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061
1061
  {
1062
1062
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1063
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065
1065
  }
1066
1066
 
1067
- size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1067
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068
1068
  {
1069
1069
  const BYTE* ip = (const BYTE*) cSrc;
1070
1070
 
1071
- size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1071
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072
1072
  if (HUF_isError(hSize)) return hSize;
1073
1073
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074
1074
  ip += hSize; cSrcSize -= hSize;
1075
1075
 
1076
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1076
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077
1077
  }
1078
1078
 
1079
1079
  size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080
1080
  {
1081
1081
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1082
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084
1084
  }
1085
1085
 
1086
1086
  size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
@@ -1090,7 +1090,7 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
1090
1090
  if (cSrcSize == 0) return ERROR(corruption_detected);
1091
1091
 
1092
1092
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093
- return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094
- HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1093
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095
1095
  }
1096
1096
  }