zstd-ruby 1.3.4.0 → 1.3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +56 -10
- data/ext/zstdruby/libzstd/README.md +4 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
- data/ext/zstdruby/libzstd/common/compiler.h +3 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -2
- data/ext/zstdruby/libzstd/common/debug.c +44 -0
- data/ext/zstdruby/libzstd/common/debug.h +123 -0
- data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
- data/ext/zstdruby/libzstd/common/fse.h +45 -41
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +34 -27
- data/ext/zstdruby/libzstd/common/pool.c +89 -32
- data/ext/zstdruby/libzstd/common/pool.h +29 -19
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
- data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
- data/ext/zstdruby/libzstd/compress/hist.c +195 -0
- data/ext/zstdruby/libzstd/compress/hist.h +92 -0
- data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
- data/ext/zstdruby/libzstd/zstd.h +137 -69
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -3
@@ -28,6 +28,13 @@ size_t ZSTD_compressBlock_btultra(
|
|
28
28
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
29
29
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
30
30
|
|
31
|
+
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
32
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
33
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
34
|
+
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
35
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
36
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
37
|
+
|
31
38
|
size_t ZSTD_compressBlock_btopt_extDict(
|
32
39
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
33
40
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
@@ -37,18 +37,17 @@
|
|
37
37
|
#define ZSTD_RESIZE_SEQPOOL 0
|
38
38
|
|
39
39
|
/* ====== Debug ====== */
|
40
|
-
#if defined(
|
40
|
+
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) && !defined(_MSC_VER)
|
41
41
|
|
42
42
|
# include <stdio.h>
|
43
43
|
# include <unistd.h>
|
44
44
|
# include <sys/times.h>
|
45
|
-
# define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
|
46
45
|
|
47
46
|
# define DEBUG_PRINTHEX(l,p,n) { \
|
48
47
|
unsigned debug_u; \
|
49
48
|
for (debug_u=0; debug_u<(n); debug_u++) \
|
50
|
-
|
51
|
-
|
49
|
+
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
|
50
|
+
RAWLOG(l, " \n"); \
|
52
51
|
}
|
53
52
|
|
54
53
|
static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
@@ -62,7 +61,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
62
61
|
|
63
62
|
#define MUTEX_WAIT_TIME_DLEVEL 6
|
64
63
|
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
|
65
|
-
if (
|
64
|
+
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
|
66
65
|
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
|
67
66
|
ZSTD_pthread_mutex_lock(mutex); \
|
68
67
|
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
|
@@ -160,6 +159,25 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
|
|
160
159
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
161
160
|
}
|
162
161
|
|
162
|
+
|
163
|
+
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
|
164
|
+
{
|
165
|
+
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
166
|
+
if (srcBufPool==NULL) return NULL;
|
167
|
+
if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
|
168
|
+
return srcBufPool;
|
169
|
+
/* need a larger buffer pool */
|
170
|
+
{ ZSTD_customMem const cMem = srcBufPool->cMem;
|
171
|
+
size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
|
172
|
+
ZSTDMT_bufferPool* newBufPool;
|
173
|
+
ZSTDMT_freeBufferPool(srcBufPool);
|
174
|
+
newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
175
|
+
if (newBufPool==NULL) return newBufPool;
|
176
|
+
ZSTDMT_setBufferSize(newBufPool, bSize);
|
177
|
+
return newBufPool;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
163
181
|
/** ZSTDMT_getBuffer() :
|
164
182
|
* assumption : bufPool must be valid
|
165
183
|
* @return : a buffer, with start pointer and size
|
@@ -310,6 +328,10 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
|
|
310
328
|
ZSTDMT_freeBufferPool(seqPool);
|
311
329
|
}
|
312
330
|
|
331
|
+
static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
332
|
+
{
|
333
|
+
return ZSTDMT_expandBufferPool(pool, nbWorkers);
|
334
|
+
}
|
313
335
|
|
314
336
|
|
315
337
|
/* ===== CCtx Pool ===== */
|
@@ -355,6 +377,18 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
|
|
355
377
|
return cctxPool;
|
356
378
|
}
|
357
379
|
|
380
|
+
static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
|
381
|
+
unsigned nbWorkers)
|
382
|
+
{
|
383
|
+
if (srcPool==NULL) return NULL;
|
384
|
+
if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
|
385
|
+
/* need a larger cctx pool */
|
386
|
+
{ ZSTD_customMem const cMem = srcPool->cMem;
|
387
|
+
ZSTDMT_freeCCtxPool(srcPool);
|
388
|
+
return ZSTDMT_createCCtxPool(nbWorkers, cMem);
|
389
|
+
}
|
390
|
+
}
|
391
|
+
|
358
392
|
/* only works during initialization phase, not during compression */
|
359
393
|
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
360
394
|
{
|
@@ -425,12 +459,11 @@ typedef struct {
|
|
425
459
|
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
426
460
|
} serialState_t;
|
427
461
|
|
428
|
-
static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
|
462
|
+
static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
|
429
463
|
{
|
430
464
|
/* Adjust parameters */
|
431
465
|
if (params.ldmParams.enableLdm) {
|
432
466
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
433
|
-
params.ldmParams.windowLog = params.cParams.windowLog;
|
434
467
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
435
468
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
436
469
|
assert(params.ldmParams.hashEveryLog < 32);
|
@@ -453,7 +486,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
453
486
|
serialState->params.ldmParams.hashLog -
|
454
487
|
serialState->params.ldmParams.bucketSizeLog;
|
455
488
|
/* Size the seq pool tables */
|
456
|
-
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams,
|
489
|
+
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
457
490
|
/* Reset the window */
|
458
491
|
ZSTD_window_clear(&serialState->ldmState.window);
|
459
492
|
serialState->ldmWindow = serialState->ldmState.window;
|
@@ -473,6 +506,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
473
506
|
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
474
507
|
}
|
475
508
|
serialState->params = params;
|
509
|
+
serialState->params.jobSize = (U32)jobSize;
|
476
510
|
return 0;
|
477
511
|
}
|
478
512
|
|
@@ -514,6 +548,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
514
548
|
size_t error;
|
515
549
|
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
516
550
|
seqStore.size == 0 && seqStore.capacity > 0);
|
551
|
+
assert(src.size <= serialState->params.jobSize);
|
517
552
|
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
|
518
553
|
error = ZSTD_ldm_generateSequences(
|
519
554
|
&serialState->ldmState, &seqStore,
|
@@ -602,13 +637,6 @@ void ZSTDMT_compressionJob(void* jobDescription)
|
|
602
637
|
rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
|
603
638
|
buffer_t dstBuff = job->dstBuff;
|
604
639
|
|
605
|
-
/* Don't compute the checksum for chunks, since we compute it externally,
|
606
|
-
* but write it in the header.
|
607
|
-
*/
|
608
|
-
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
609
|
-
/* Don't run LDM for the chunks, since we handle it externally */
|
610
|
-
jobParams.ldmParams.enableLdm = 0;
|
611
|
-
|
612
640
|
/* ressources */
|
613
641
|
if (cctx==NULL) {
|
614
642
|
job->cSize = ERROR(memory_allocation);
|
@@ -622,10 +650,22 @@ void ZSTDMT_compressionJob(void* jobDescription)
|
|
622
650
|
}
|
623
651
|
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
624
652
|
}
|
653
|
+
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) {
|
654
|
+
job->cSize = ERROR(memory_allocation);
|
655
|
+
goto _endJob;
|
656
|
+
}
|
657
|
+
|
658
|
+
/* Don't compute the checksum for chunks, since we compute it externally,
|
659
|
+
* but write it in the header.
|
660
|
+
*/
|
661
|
+
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
662
|
+
/* Don't run LDM for the chunks, since we handle it externally */
|
663
|
+
jobParams.ldmParams.enableLdm = 0;
|
664
|
+
|
625
665
|
|
626
666
|
/* init */
|
627
667
|
if (job->cdict) {
|
628
|
-
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
|
668
|
+
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
|
629
669
|
assert(job->firstJob); /* only allowed for first job */
|
630
670
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
631
671
|
} else { /* srcStart points at reloaded section */
|
@@ -637,6 +677,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
|
|
637
677
|
} }
|
638
678
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
639
679
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
680
|
+
ZSTD_dtlm_fast,
|
640
681
|
NULL, /*cdict*/
|
641
682
|
jobParams, pledgedSrcSize);
|
642
683
|
if (ZSTD_isError(initError)) {
|
@@ -745,9 +786,9 @@ struct ZSTDMT_CCtx_s {
|
|
745
786
|
ZSTD_CCtx_params params;
|
746
787
|
size_t targetSectionSize;
|
747
788
|
size_t targetPrefixSize;
|
748
|
-
|
789
|
+
int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
|
749
790
|
inBuff_t inBuff;
|
750
|
-
|
791
|
+
roundBuff_t roundBuff;
|
751
792
|
serialState_t serial;
|
752
793
|
unsigned singleBlockingThread;
|
753
794
|
unsigned jobIDMask;
|
@@ -798,6 +839,20 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
|
|
798
839
|
return jobTable;
|
799
840
|
}
|
800
841
|
|
842
|
+
static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
|
843
|
+
U32 nbJobs = nbWorkers + 2;
|
844
|
+
if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
|
845
|
+
ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
|
846
|
+
mtctx->jobIDMask = 0;
|
847
|
+
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
|
848
|
+
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
|
849
|
+
assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
|
850
|
+
mtctx->jobIDMask = nbJobs - 1;
|
851
|
+
}
|
852
|
+
return 0;
|
853
|
+
}
|
854
|
+
|
855
|
+
|
801
856
|
/* ZSTDMT_CCtxParam_setNbWorkers():
|
802
857
|
* Internal use only */
|
803
858
|
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
|
@@ -924,6 +979,8 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
|
924
979
|
if ( (value > 0) /* value==0 => automatic job size */
|
925
980
|
& (value < ZSTDMT_JOBSIZE_MIN) )
|
926
981
|
value = ZSTDMT_JOBSIZE_MIN;
|
982
|
+
if (value > ZSTDMT_JOBSIZE_MAX)
|
983
|
+
value = ZSTDMT_JOBSIZE_MAX;
|
927
984
|
params->jobSize = value;
|
928
985
|
return value;
|
929
986
|
case ZSTDMT_p_overlapSectionLog :
|
@@ -950,6 +1007,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
|
950
1007
|
}
|
951
1008
|
}
|
952
1009
|
|
1010
|
+
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
|
1011
|
+
{
|
1012
|
+
switch (parameter) {
|
1013
|
+
case ZSTDMT_p_jobSize:
|
1014
|
+
*value = mtctx->params.jobSize;
|
1015
|
+
break;
|
1016
|
+
case ZSTDMT_p_overlapSectionLog:
|
1017
|
+
*value = mtctx->params.overlapSizeLog;
|
1018
|
+
break;
|
1019
|
+
default:
|
1020
|
+
return ERROR(parameter_unsupported);
|
1021
|
+
}
|
1022
|
+
return 0;
|
1023
|
+
}
|
1024
|
+
|
953
1025
|
/* Sets parameters relevant to the compression job,
|
954
1026
|
* initializing others to default values. */
|
955
1027
|
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
@@ -960,11 +1032,28 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
|
960
1032
|
jobParams.cParams = params.cParams;
|
961
1033
|
jobParams.fParams = params.fParams;
|
962
1034
|
jobParams.compressionLevel = params.compressionLevel;
|
963
|
-
jobParams.disableLiteralCompression = params.disableLiteralCompression;
|
964
1035
|
|
965
1036
|
return jobParams;
|
966
1037
|
}
|
967
1038
|
|
1039
|
+
|
1040
|
+
/* ZSTDMT_resize() :
|
1041
|
+
* @return : error code if fails, 0 on success */
|
1042
|
+
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
1043
|
+
{
|
1044
|
+
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
1045
|
+
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
1046
|
+
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
1047
|
+
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
1048
|
+
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
1049
|
+
if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
|
1050
|
+
mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
|
1051
|
+
if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
|
1052
|
+
ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
|
1053
|
+
return 0;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
|
968
1057
|
/*! ZSTDMT_updateCParams_whileCompressing() :
|
969
1058
|
* Updates only a selected set of compression parameters, to remain compatible with current frame.
|
970
1059
|
* New parameters will be applied to next compression job. */
|
@@ -981,15 +1070,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
|
981
1070
|
}
|
982
1071
|
}
|
983
1072
|
|
984
|
-
/* ZSTDMT_getNbWorkers():
|
985
|
-
* @return nb threads currently active in mtctx.
|
986
|
-
* mtctx must be valid */
|
987
|
-
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
|
988
|
-
{
|
989
|
-
assert(mtctx != NULL);
|
990
|
-
return mtctx->params.nbWorkers;
|
991
|
-
}
|
992
|
-
|
993
1073
|
/* ZSTDMT_getFrameProgression():
|
994
1074
|
* tells how much data has been consumed (input) and produced (output) for current frame.
|
995
1075
|
* able to count progression inside worker threads.
|
@@ -1087,18 +1167,10 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1087
1167
|
|
1088
1168
|
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
1089
1169
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
|
1090
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
|
1170
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
|
1091
1171
|
return ERROR(memory_allocation);
|
1092
1172
|
|
1093
|
-
|
1094
|
-
U32 jobsTableSize = nbJobs;
|
1095
|
-
ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
|
1096
|
-
mtctx->jobIDMask = 0;
|
1097
|
-
mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
|
1098
|
-
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
|
1099
|
-
assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
|
1100
|
-
mtctx->jobIDMask = jobsTableSize - 1;
|
1101
|
-
}
|
1173
|
+
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
1102
1174
|
|
1103
1175
|
{ unsigned u;
|
1104
1176
|
for (u=0; u<nbJobs; u++) {
|
@@ -1221,17 +1293,18 @@ size_t ZSTDMT_initCStream_internal(
|
|
1221
1293
|
const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
|
1222
1294
|
unsigned long long pledgedSrcSize)
|
1223
1295
|
{
|
1224
|
-
DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u
|
1225
|
-
(U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx
|
1226
|
-
|
1296
|
+
DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
|
1297
|
+
(U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
|
1298
|
+
|
1299
|
+
/* params supposed partially fully validated at this point */
|
1227
1300
|
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
1228
1301
|
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
|
1229
|
-
assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
|
1230
1302
|
|
1231
1303
|
/* init */
|
1232
|
-
if (params.
|
1233
|
-
|
1234
|
-
|
1304
|
+
if (params.nbWorkers != mtctx->params.nbWorkers)
|
1305
|
+
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1306
|
+
|
1307
|
+
if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1235
1308
|
if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1236
1309
|
|
1237
1310
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
@@ -1270,7 +1343,9 @@ size_t ZSTDMT_initCStream_internal(
|
|
1270
1343
|
mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
|
1271
1344
|
DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
|
1272
1345
|
mtctx->targetSectionSize = params.jobSize;
|
1273
|
-
if (mtctx->targetSectionSize
|
1346
|
+
if (mtctx->targetSectionSize == 0) {
|
1347
|
+
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1348
|
+
}
|
1274
1349
|
if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
|
1275
1350
|
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
|
1276
1351
|
DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
|
@@ -1312,7 +1387,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
1312
1387
|
mtctx->allJobsCompleted = 0;
|
1313
1388
|
mtctx->consumed = 0;
|
1314
1389
|
mtctx->produced = 0;
|
1315
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
|
1390
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
|
1316
1391
|
return ERROR(memory_allocation);
|
1317
1392
|
return 0;
|
1318
1393
|
}
|
@@ -95,6 +95,11 @@ typedef enum {
|
|
95
95
|
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
96
96
|
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
|
97
97
|
|
98
|
+
/* ZSTDMT_getMTCtxParameter() :
|
99
|
+
* Query the ZSTDMT_CCtx for a parameter value.
|
100
|
+
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
101
|
+
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
|
102
|
+
|
98
103
|
|
99
104
|
/*! ZSTDMT_compressStream_generic() :
|
100
105
|
* Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
|
@@ -126,11 +131,6 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
|
|
126
131
|
* New parameters will be applied to next compression job. */
|
127
132
|
void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
|
128
133
|
|
129
|
-
/* ZSTDMT_getNbWorkers():
|
130
|
-
* @return nb threads currently active in mtctx.
|
131
|
-
* mtctx must be valid */
|
132
|
-
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
|
133
|
-
|
134
134
|
/* ZSTDMT_getFrameProgression():
|
135
135
|
* tells how much data has been consumed (input) and produced (output) for current frame.
|
136
136
|
* able to count progression inside worker threads.
|
@@ -1,6 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
|
-
|
3
|
-
|
2
|
+
huff0 huffman decoder,
|
3
|
+
part of Finite State Entropy library
|
4
|
+
Copyright (C) 2013-present, Yann Collet.
|
4
5
|
|
5
6
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
6
7
|
|
@@ -29,16 +30,15 @@
|
|
29
30
|
|
30
31
|
You can contact the author at :
|
31
32
|
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
32
|
-
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
33
33
|
****************************************************************** */
|
34
34
|
|
35
35
|
/* **************************************************************
|
36
36
|
* Dependencies
|
37
37
|
****************************************************************/
|
38
38
|
#include <string.h> /* memcpy, memset */
|
39
|
-
#include "bitstream.h" /* BIT_* */
|
40
39
|
#include "compiler.h"
|
41
|
-
#include "
|
40
|
+
#include "bitstream.h" /* BIT_* */
|
41
|
+
#include "fse.h" /* to compress headers */
|
42
42
|
#define HUF_STATIC_LINKING_ONLY
|
43
43
|
#include "huf.h"
|
44
44
|
#include "error_private.h"
|
@@ -48,7 +48,6 @@
|
|
48
48
|
* Error Management
|
49
49
|
****************************************************************/
|
50
50
|
#define HUF_isError ERR_isError
|
51
|
-
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
52
51
|
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
53
52
|
|
54
53
|
|
@@ -75,15 +74,15 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
75
74
|
/*-***************************/
|
76
75
|
/* single-symbol decoding */
|
77
76
|
/*-***************************/
|
78
|
-
typedef struct { BYTE byte; BYTE nbBits; }
|
77
|
+
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
|
79
78
|
|
80
|
-
size_t
|
79
|
+
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
81
80
|
{
|
82
81
|
U32 tableLog = 0;
|
83
82
|
U32 nbSymbols = 0;
|
84
83
|
size_t iSize;
|
85
84
|
void* const dtPtr = DTable + 1;
|
86
|
-
|
85
|
+
HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
|
87
86
|
|
88
87
|
U32* rankVal;
|
89
88
|
BYTE* huffWeight;
|
@@ -96,7 +95,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
96
95
|
|
97
96
|
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
98
97
|
|
99
|
-
|
98
|
+
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
100
99
|
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
101
100
|
|
102
101
|
iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
@@ -124,7 +123,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
124
123
|
U32 const w = huffWeight[n];
|
125
124
|
U32 const length = (1 << w) >> 1;
|
126
125
|
U32 u;
|
127
|
-
|
126
|
+
HUF_DEltX1 D;
|
128
127
|
D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
|
129
128
|
for (u = rankVal[w]; u < rankVal[w] + length; u++)
|
130
129
|
dt[u] = D;
|
@@ -134,17 +133,15 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
134
133
|
return iSize;
|
135
134
|
}
|
136
135
|
|
137
|
-
size_t
|
136
|
+
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
138
137
|
{
|
139
138
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
140
|
-
return
|
139
|
+
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
141
140
|
workSpace, sizeof(workSpace));
|
142
141
|
}
|
143
142
|
|
144
|
-
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
|
145
|
-
|
146
143
|
FORCE_INLINE_TEMPLATE BYTE
|
147
|
-
|
144
|
+
HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
|
148
145
|
{
|
149
146
|
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
|
150
147
|
BYTE const c = dt[val].byte;
|
@@ -152,44 +149,44 @@ HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog
|
|
152
149
|
return c;
|
153
150
|
}
|
154
151
|
|
155
|
-
#define
|
156
|
-
*ptr++ =
|
152
|
+
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
|
153
|
+
*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
|
157
154
|
|
158
|
-
#define
|
155
|
+
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
|
159
156
|
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
160
|
-
|
157
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
|
161
158
|
|
162
|
-
#define
|
159
|
+
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
|
163
160
|
if (MEM_64bits()) \
|
164
|
-
|
161
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
|
165
162
|
|
166
163
|
HINT_INLINE size_t
|
167
|
-
|
164
|
+
HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
|
168
165
|
{
|
169
166
|
BYTE* const pStart = p;
|
170
167
|
|
171
168
|
/* up to 4 symbols at a time */
|
172
169
|
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
170
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
171
|
+
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
|
172
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
173
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
177
174
|
}
|
178
175
|
|
179
176
|
/* [0-3] symbols remaining */
|
180
177
|
if (MEM_32bits())
|
181
178
|
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
|
182
|
-
|
179
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
183
180
|
|
184
181
|
/* no more data to retrieve from bitstream, no need to reload */
|
185
182
|
while (p < pEnd)
|
186
|
-
|
183
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
187
184
|
|
188
185
|
return pEnd-pStart;
|
189
186
|
}
|
190
187
|
|
191
188
|
FORCE_INLINE_TEMPLATE size_t
|
192
|
-
|
189
|
+
HUF_decompress1X1_usingDTable_internal_body(
|
193
190
|
void* dst, size_t dstSize,
|
194
191
|
const void* cSrc, size_t cSrcSize,
|
195
192
|
const HUF_DTable* DTable)
|
@@ -197,14 +194,14 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
197
194
|
BYTE* op = (BYTE*)dst;
|
198
195
|
BYTE* const oend = op + dstSize;
|
199
196
|
const void* dtPtr = DTable + 1;
|
200
|
-
const
|
197
|
+
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
201
198
|
BIT_DStream_t bitD;
|
202
199
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
203
200
|
U32 const dtLog = dtd.tableLog;
|
204
201
|
|
205
202
|
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
206
203
|
|
207
|
-
|
204
|
+
HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
|
208
205
|
|
209
206
|
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
210
207
|
|
@@ -212,7 +209,7 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
212
209
|
}
|
213
210
|
|
214
211
|
FORCE_INLINE_TEMPLATE size_t
|
215
|
-
|
212
|
+
HUF_decompress4X1_usingDTable_internal_body(
|
216
213
|
void* dst, size_t dstSize,
|
217
214
|
const void* cSrc, size_t cSrcSize,
|
218
215
|
const HUF_DTable* DTable)
|
@@ -224,7 +221,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
224
221
|
BYTE* const ostart = (BYTE*) dst;
|
225
222
|
BYTE* const oend = ostart + dstSize;
|
226
223
|
const void* const dtPtr = DTable + 1;
|
227
|
-
const
|
224
|
+
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
228
225
|
|
229
226
|
/* Init */
|
230
227
|
BIT_DStream_t bitD1;
|
@@ -260,22 +257,22 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
260
257
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
261
258
|
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
262
259
|
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
260
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
261
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
262
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
263
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
264
|
+
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
|
265
|
+
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
|
266
|
+
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
|
267
|
+
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
|
268
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
269
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
270
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
271
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
272
|
+
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
|
273
|
+
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
274
|
+
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
275
|
+
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
279
276
|
BIT_reloadDStream(&bitD1);
|
280
277
|
BIT_reloadDStream(&bitD2);
|
281
278
|
BIT_reloadDStream(&bitD3);
|
@@ -291,191 +288,10 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
291
288
|
/* note : op4 supposed already verified within main loop */
|
292
289
|
|
293
290
|
/* finish bitStreams one by one */
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
/* check */
|
300
|
-
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
301
|
-
if (!endCheck) return ERROR(corruption_detected); }
|
302
|
-
|
303
|
-
/* decoded size */
|
304
|
-
return dstSize;
|
305
|
-
}
|
306
|
-
}
|
307
|
-
|
308
|
-
|
309
|
-
FORCE_INLINE_TEMPLATE U32
|
310
|
-
HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
311
|
-
{
|
312
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
313
|
-
memcpy(op, dt+val, 2);
|
314
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
315
|
-
return dt[val].length;
|
316
|
-
}
|
317
|
-
|
318
|
-
FORCE_INLINE_TEMPLATE U32
|
319
|
-
HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
320
|
-
{
|
321
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
322
|
-
memcpy(op, dt+val, 1);
|
323
|
-
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
324
|
-
else {
|
325
|
-
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
326
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
327
|
-
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
328
|
-
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
329
|
-
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
330
|
-
} }
|
331
|
-
return 1;
|
332
|
-
}
|
333
|
-
|
334
|
-
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
|
335
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
336
|
-
|
337
|
-
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
|
338
|
-
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
339
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
340
|
-
|
341
|
-
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
|
342
|
-
if (MEM_64bits()) \
|
343
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
344
|
-
|
345
|
-
HINT_INLINE size_t
|
346
|
-
HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
347
|
-
const HUF_DEltX4* const dt, const U32 dtLog)
|
348
|
-
{
|
349
|
-
BYTE* const pStart = p;
|
350
|
-
|
351
|
-
/* up to 8 symbols at a time */
|
352
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
353
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
354
|
-
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
|
355
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
356
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
357
|
-
}
|
358
|
-
|
359
|
-
/* closer to end : up to 2 symbols at a time */
|
360
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
361
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
362
|
-
|
363
|
-
while (p <= pEnd-2)
|
364
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
365
|
-
|
366
|
-
if (p < pEnd)
|
367
|
-
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
|
368
|
-
|
369
|
-
return p-pStart;
|
370
|
-
}
|
371
|
-
|
372
|
-
FORCE_INLINE_TEMPLATE size_t
|
373
|
-
HUF_decompress1X4_usingDTable_internal_body(
|
374
|
-
void* dst, size_t dstSize,
|
375
|
-
const void* cSrc, size_t cSrcSize,
|
376
|
-
const HUF_DTable* DTable)
|
377
|
-
{
|
378
|
-
BIT_DStream_t bitD;
|
379
|
-
|
380
|
-
/* Init */
|
381
|
-
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
382
|
-
|
383
|
-
/* decode */
|
384
|
-
{ BYTE* const ostart = (BYTE*) dst;
|
385
|
-
BYTE* const oend = ostart + dstSize;
|
386
|
-
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
387
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
388
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
389
|
-
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
|
390
|
-
}
|
391
|
-
|
392
|
-
/* check */
|
393
|
-
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
394
|
-
|
395
|
-
/* decoded size */
|
396
|
-
return dstSize;
|
397
|
-
}
|
398
|
-
|
399
|
-
|
400
|
-
FORCE_INLINE_TEMPLATE size_t
|
401
|
-
HUF_decompress4X4_usingDTable_internal_body(
|
402
|
-
void* dst, size_t dstSize,
|
403
|
-
const void* cSrc, size_t cSrcSize,
|
404
|
-
const HUF_DTable* DTable)
|
405
|
-
{
|
406
|
-
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
407
|
-
|
408
|
-
{ const BYTE* const istart = (const BYTE*) cSrc;
|
409
|
-
BYTE* const ostart = (BYTE*) dst;
|
410
|
-
BYTE* const oend = ostart + dstSize;
|
411
|
-
const void* const dtPtr = DTable+1;
|
412
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
413
|
-
|
414
|
-
/* Init */
|
415
|
-
BIT_DStream_t bitD1;
|
416
|
-
BIT_DStream_t bitD2;
|
417
|
-
BIT_DStream_t bitD3;
|
418
|
-
BIT_DStream_t bitD4;
|
419
|
-
size_t const length1 = MEM_readLE16(istart);
|
420
|
-
size_t const length2 = MEM_readLE16(istart+2);
|
421
|
-
size_t const length3 = MEM_readLE16(istart+4);
|
422
|
-
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
423
|
-
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
424
|
-
const BYTE* const istart2 = istart1 + length1;
|
425
|
-
const BYTE* const istart3 = istart2 + length2;
|
426
|
-
const BYTE* const istart4 = istart3 + length3;
|
427
|
-
size_t const segmentSize = (dstSize+3) / 4;
|
428
|
-
BYTE* const opStart2 = ostart + segmentSize;
|
429
|
-
BYTE* const opStart3 = opStart2 + segmentSize;
|
430
|
-
BYTE* const opStart4 = opStart3 + segmentSize;
|
431
|
-
BYTE* op1 = ostart;
|
432
|
-
BYTE* op2 = opStart2;
|
433
|
-
BYTE* op3 = opStart3;
|
434
|
-
BYTE* op4 = opStart4;
|
435
|
-
U32 endSignal;
|
436
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
437
|
-
U32 const dtLog = dtd.tableLog;
|
438
|
-
|
439
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
440
|
-
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
441
|
-
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
442
|
-
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
443
|
-
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
444
|
-
|
445
|
-
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
446
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
447
|
-
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
448
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
449
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
450
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
451
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
452
|
-
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
|
453
|
-
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
|
454
|
-
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
|
455
|
-
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
|
456
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
457
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
458
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
459
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
460
|
-
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
|
461
|
-
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
|
462
|
-
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
|
463
|
-
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
|
464
|
-
|
465
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
466
|
-
}
|
467
|
-
|
468
|
-
/* check corruption */
|
469
|
-
if (op1 > opStart2) return ERROR(corruption_detected);
|
470
|
-
if (op2 > opStart3) return ERROR(corruption_detected);
|
471
|
-
if (op3 > opStart4) return ERROR(corruption_detected);
|
472
|
-
/* note : op4 already verified within main loop */
|
473
|
-
|
474
|
-
/* finish bitStreams one by one */
|
475
|
-
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
|
476
|
-
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
|
477
|
-
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
|
478
|
-
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
|
291
|
+
HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
|
292
|
+
HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
|
293
|
+
HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
|
294
|
+
HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
|
479
295
|
|
480
296
|
/* check */
|
481
297
|
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
@@ -493,7 +309,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
493
309
|
const HUF_DTable *DTable);
|
494
310
|
#if DYNAMIC_BMI2
|
495
311
|
|
496
|
-
#define
|
312
|
+
#define HUF_DGEN(fn) \
|
497
313
|
\
|
498
314
|
static size_t fn##_default( \
|
499
315
|
void* dst, size_t dstSize, \
|
@@ -522,7 +338,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
522
338
|
|
523
339
|
#else
|
524
340
|
|
525
|
-
#define
|
341
|
+
#define HUF_DGEN(fn) \
|
526
342
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
527
343
|
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
528
344
|
{ \
|
@@ -532,112 +348,114 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
532
348
|
|
533
349
|
#endif
|
534
350
|
|
535
|
-
|
536
|
-
|
537
|
-
X(HUF_decompress1X4_usingDTable_internal)
|
538
|
-
X(HUF_decompress4X4_usingDTable_internal)
|
351
|
+
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
352
|
+
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
539
353
|
|
540
|
-
#undef X
|
541
354
|
|
542
355
|
|
543
|
-
size_t
|
356
|
+
size_t HUF_decompress1X1_usingDTable(
|
544
357
|
void* dst, size_t dstSize,
|
545
358
|
const void* cSrc, size_t cSrcSize,
|
546
359
|
const HUF_DTable* DTable)
|
547
360
|
{
|
548
361
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
549
362
|
if (dtd.tableType != 0) return ERROR(GENERIC);
|
550
|
-
return
|
363
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
551
364
|
}
|
552
365
|
|
553
|
-
size_t
|
366
|
+
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
554
367
|
const void* cSrc, size_t cSrcSize,
|
555
368
|
void* workSpace, size_t wkspSize)
|
556
369
|
{
|
557
370
|
const BYTE* ip = (const BYTE*) cSrc;
|
558
371
|
|
559
|
-
size_t const hSize =
|
372
|
+
size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
560
373
|
if (HUF_isError(hSize)) return hSize;
|
561
374
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
562
375
|
ip += hSize; cSrcSize -= hSize;
|
563
376
|
|
564
|
-
return
|
377
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
565
378
|
}
|
566
379
|
|
567
380
|
|
568
|
-
size_t
|
381
|
+
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
569
382
|
const void* cSrc, size_t cSrcSize)
|
570
383
|
{
|
571
384
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
572
|
-
return
|
385
|
+
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
573
386
|
workSpace, sizeof(workSpace));
|
574
387
|
}
|
575
388
|
|
576
|
-
size_t
|
389
|
+
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
577
390
|
{
|
578
|
-
|
579
|
-
return
|
391
|
+
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
392
|
+
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
580
393
|
}
|
581
394
|
|
582
|
-
size_t
|
395
|
+
size_t HUF_decompress4X1_usingDTable(
|
583
396
|
void* dst, size_t dstSize,
|
584
397
|
const void* cSrc, size_t cSrcSize,
|
585
398
|
const HUF_DTable* DTable)
|
586
399
|
{
|
587
400
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
588
401
|
if (dtd.tableType != 0) return ERROR(GENERIC);
|
589
|
-
return
|
402
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
590
403
|
}
|
591
404
|
|
592
|
-
static size_t
|
405
|
+
static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
593
406
|
const void* cSrc, size_t cSrcSize,
|
594
407
|
void* workSpace, size_t wkspSize, int bmi2)
|
595
408
|
{
|
596
409
|
const BYTE* ip = (const BYTE*) cSrc;
|
597
410
|
|
598
|
-
size_t const hSize =
|
411
|
+
size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
|
599
412
|
workSpace, wkspSize);
|
600
413
|
if (HUF_isError(hSize)) return hSize;
|
601
414
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
602
415
|
ip += hSize; cSrcSize -= hSize;
|
603
416
|
|
604
|
-
return
|
417
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
605
418
|
}
|
606
419
|
|
607
|
-
size_t
|
420
|
+
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
608
421
|
const void* cSrc, size_t cSrcSize,
|
609
422
|
void* workSpace, size_t wkspSize)
|
610
423
|
{
|
611
|
-
return
|
424
|
+
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
612
425
|
}
|
613
426
|
|
614
427
|
|
615
|
-
size_t
|
428
|
+
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
616
429
|
{
|
617
430
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
618
|
-
return
|
431
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
619
432
|
workSpace, sizeof(workSpace));
|
620
433
|
}
|
621
|
-
size_t
|
434
|
+
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
622
435
|
{
|
623
|
-
|
624
|
-
return
|
436
|
+
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
437
|
+
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
625
438
|
}
|
626
439
|
|
627
440
|
|
628
441
|
/* *************************/
|
629
442
|
/* double-symbols decoding */
|
630
443
|
/* *************************/
|
444
|
+
|
445
|
+
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
|
631
446
|
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
|
447
|
+
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
448
|
+
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
632
449
|
|
633
|
-
|
450
|
+
|
451
|
+
/* HUF_fillDTableX2Level2() :
|
634
452
|
* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
|
635
|
-
static void
|
453
|
+
static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
|
636
454
|
const U32* rankValOrigin, const int minWeight,
|
637
455
|
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
|
638
456
|
U32 nbBitsBaseline, U16 baseSeq)
|
639
457
|
{
|
640
|
-
|
458
|
+
HUF_DEltX2 DElt;
|
641
459
|
U32 rankVal[HUF_TABLELOG_MAX + 1];
|
642
460
|
|
643
461
|
/* get pre-calculated rankVal */
|
@@ -672,10 +490,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
|
|
672
490
|
} }
|
673
491
|
}
|
674
492
|
|
675
|
-
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
676
|
-
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
677
493
|
|
678
|
-
static void
|
494
|
+
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
679
495
|
const sortedSymbol_t* sortedList, const U32 sortedListSize,
|
680
496
|
const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
|
681
497
|
const U32 nbBitsBaseline)
|
@@ -700,12 +516,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
|
|
700
516
|
int minWeight = nbBits + scaleLog;
|
701
517
|
if (minWeight < 1) minWeight = 1;
|
702
518
|
sortedRank = rankStart[minWeight];
|
703
|
-
|
519
|
+
HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
|
704
520
|
rankValOrigin[nbBits], minWeight,
|
705
521
|
sortedList+sortedRank, sortedListSize-sortedRank,
|
706
522
|
nbBitsBaseline, symbol);
|
707
523
|
} else {
|
708
|
-
|
524
|
+
HUF_DEltX2 DElt;
|
709
525
|
MEM_writeLE16(&(DElt.sequence), symbol);
|
710
526
|
DElt.nbBits = (BYTE)(nbBits);
|
711
527
|
DElt.length = 1;
|
@@ -717,7 +533,7 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
|
|
717
533
|
}
|
718
534
|
}
|
719
535
|
|
720
|
-
size_t
|
536
|
+
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src,
|
721
537
|
size_t srcSize, void* workSpace,
|
722
538
|
size_t wkspSize)
|
723
539
|
{
|
@@ -726,7 +542,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
726
542
|
U32 const maxTableLog = dtd.maxTableLog;
|
727
543
|
size_t iSize;
|
728
544
|
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
|
729
|
-
|
545
|
+
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
|
730
546
|
U32 *rankStart;
|
731
547
|
|
732
548
|
rankValCol_t* rankVal;
|
@@ -752,7 +568,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
752
568
|
rankStart = rankStart0 + 1;
|
753
569
|
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
754
570
|
|
755
|
-
|
571
|
+
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
756
572
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
757
573
|
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
758
574
|
|
@@ -806,7 +622,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
806
622
|
rankValPtr[w] = rankVal0[w] >> consumed;
|
807
623
|
} } } }
|
808
624
|
|
809
|
-
|
625
|
+
HUF_fillDTableX2(dt, maxTableLog,
|
810
626
|
sortedSymbol, sizeOfSort,
|
811
627
|
rankStart0, rankVal, maxW,
|
812
628
|
tableLog+1);
|
@@ -817,112 +633,296 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
817
633
|
return iSize;
|
818
634
|
}
|
819
635
|
|
820
|
-
size_t
|
636
|
+
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
821
637
|
{
|
822
638
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
823
|
-
return
|
639
|
+
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
824
640
|
workSpace, sizeof(workSpace));
|
825
641
|
}
|
826
642
|
|
827
|
-
|
643
|
+
|
644
|
+
FORCE_INLINE_TEMPLATE U32
|
645
|
+
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
646
|
+
{
|
647
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
648
|
+
memcpy(op, dt+val, 2);
|
649
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
650
|
+
return dt[val].length;
|
651
|
+
}
|
652
|
+
|
653
|
+
FORCE_INLINE_TEMPLATE U32
|
654
|
+
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
655
|
+
{
|
656
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
657
|
+
memcpy(op, dt+val, 1);
|
658
|
+
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
659
|
+
else {
|
660
|
+
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
661
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
662
|
+
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
663
|
+
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
664
|
+
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
665
|
+
} }
|
666
|
+
return 1;
|
667
|
+
}
|
668
|
+
|
669
|
+
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
|
670
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
671
|
+
|
672
|
+
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
|
673
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
674
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
675
|
+
|
676
|
+
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
|
677
|
+
if (MEM_64bits()) \
|
678
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
679
|
+
|
680
|
+
HINT_INLINE size_t
|
681
|
+
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
682
|
+
const HUF_DEltX2* const dt, const U32 dtLog)
|
683
|
+
{
|
684
|
+
BYTE* const pStart = p;
|
685
|
+
|
686
|
+
/* up to 8 symbols at a time */
|
687
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
688
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
689
|
+
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
690
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
691
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
692
|
+
}
|
693
|
+
|
694
|
+
/* closer to end : up to 2 symbols at a time */
|
695
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
696
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
697
|
+
|
698
|
+
while (p <= pEnd-2)
|
699
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
700
|
+
|
701
|
+
if (p < pEnd)
|
702
|
+
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
|
703
|
+
|
704
|
+
return p-pStart;
|
705
|
+
}
|
706
|
+
|
707
|
+
FORCE_INLINE_TEMPLATE size_t
|
708
|
+
HUF_decompress1X2_usingDTable_internal_body(
|
709
|
+
void* dst, size_t dstSize,
|
710
|
+
const void* cSrc, size_t cSrcSize,
|
711
|
+
const HUF_DTable* DTable)
|
712
|
+
{
|
713
|
+
BIT_DStream_t bitD;
|
714
|
+
|
715
|
+
/* Init */
|
716
|
+
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
717
|
+
|
718
|
+
/* decode */
|
719
|
+
{ BYTE* const ostart = (BYTE*) dst;
|
720
|
+
BYTE* const oend = ostart + dstSize;
|
721
|
+
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
722
|
+
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
723
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
724
|
+
HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
|
725
|
+
}
|
726
|
+
|
727
|
+
/* check */
|
728
|
+
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
729
|
+
|
730
|
+
/* decoded size */
|
731
|
+
return dstSize;
|
732
|
+
}
|
733
|
+
|
734
|
+
|
735
|
+
FORCE_INLINE_TEMPLATE size_t
|
736
|
+
HUF_decompress4X2_usingDTable_internal_body(
|
737
|
+
void* dst, size_t dstSize,
|
738
|
+
const void* cSrc, size_t cSrcSize,
|
739
|
+
const HUF_DTable* DTable)
|
740
|
+
{
|
741
|
+
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
742
|
+
|
743
|
+
{ const BYTE* const istart = (const BYTE*) cSrc;
|
744
|
+
BYTE* const ostart = (BYTE*) dst;
|
745
|
+
BYTE* const oend = ostart + dstSize;
|
746
|
+
const void* const dtPtr = DTable+1;
|
747
|
+
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
748
|
+
|
749
|
+
/* Init */
|
750
|
+
BIT_DStream_t bitD1;
|
751
|
+
BIT_DStream_t bitD2;
|
752
|
+
BIT_DStream_t bitD3;
|
753
|
+
BIT_DStream_t bitD4;
|
754
|
+
size_t const length1 = MEM_readLE16(istart);
|
755
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
756
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
757
|
+
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
758
|
+
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
759
|
+
const BYTE* const istart2 = istart1 + length1;
|
760
|
+
const BYTE* const istart3 = istart2 + length2;
|
761
|
+
const BYTE* const istart4 = istart3 + length3;
|
762
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
763
|
+
BYTE* const opStart2 = ostart + segmentSize;
|
764
|
+
BYTE* const opStart3 = opStart2 + segmentSize;
|
765
|
+
BYTE* const opStart4 = opStart3 + segmentSize;
|
766
|
+
BYTE* op1 = ostart;
|
767
|
+
BYTE* op2 = opStart2;
|
768
|
+
BYTE* op3 = opStart3;
|
769
|
+
BYTE* op4 = opStart4;
|
770
|
+
U32 endSignal;
|
771
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
772
|
+
U32 const dtLog = dtd.tableLog;
|
773
|
+
|
774
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
775
|
+
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
776
|
+
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
777
|
+
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
778
|
+
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
779
|
+
|
780
|
+
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
781
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
782
|
+
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
783
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
784
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
785
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
786
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
787
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
788
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
789
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
790
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
791
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
792
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
793
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
794
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
795
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
796
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
797
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
798
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
799
|
+
|
800
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
801
|
+
}
|
802
|
+
|
803
|
+
/* check corruption */
|
804
|
+
if (op1 > opStart2) return ERROR(corruption_detected);
|
805
|
+
if (op2 > opStart3) return ERROR(corruption_detected);
|
806
|
+
if (op3 > opStart4) return ERROR(corruption_detected);
|
807
|
+
/* note : op4 already verified within main loop */
|
808
|
+
|
809
|
+
/* finish bitStreams one by one */
|
810
|
+
HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
|
811
|
+
HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
|
812
|
+
HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
|
813
|
+
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
814
|
+
|
815
|
+
/* check */
|
816
|
+
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
817
|
+
if (!endCheck) return ERROR(corruption_detected); }
|
818
|
+
|
819
|
+
/* decoded size */
|
820
|
+
return dstSize;
|
821
|
+
}
|
822
|
+
}
|
823
|
+
|
824
|
+
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
825
|
+
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
|
826
|
+
|
827
|
+
size_t HUF_decompress1X2_usingDTable(
|
828
828
|
void* dst, size_t dstSize,
|
829
829
|
const void* cSrc, size_t cSrcSize,
|
830
830
|
const HUF_DTable* DTable)
|
831
831
|
{
|
832
832
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
833
833
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
834
|
-
return
|
834
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
835
835
|
}
|
836
836
|
|
837
|
-
size_t
|
837
|
+
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
838
838
|
const void* cSrc, size_t cSrcSize,
|
839
839
|
void* workSpace, size_t wkspSize)
|
840
840
|
{
|
841
841
|
const BYTE* ip = (const BYTE*) cSrc;
|
842
842
|
|
843
|
-
size_t const hSize =
|
843
|
+
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
|
844
844
|
workSpace, wkspSize);
|
845
845
|
if (HUF_isError(hSize)) return hSize;
|
846
846
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
847
847
|
ip += hSize; cSrcSize -= hSize;
|
848
848
|
|
849
|
-
return
|
849
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
850
850
|
}
|
851
851
|
|
852
852
|
|
853
|
-
size_t
|
853
|
+
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
854
854
|
const void* cSrc, size_t cSrcSize)
|
855
855
|
{
|
856
856
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
857
|
-
return
|
857
|
+
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
858
858
|
workSpace, sizeof(workSpace));
|
859
859
|
}
|
860
860
|
|
861
|
-
size_t
|
861
|
+
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
862
862
|
{
|
863
|
-
|
864
|
-
return
|
863
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
864
|
+
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
865
865
|
}
|
866
866
|
|
867
|
-
size_t
|
867
|
+
size_t HUF_decompress4X2_usingDTable(
|
868
868
|
void* dst, size_t dstSize,
|
869
869
|
const void* cSrc, size_t cSrcSize,
|
870
870
|
const HUF_DTable* DTable)
|
871
871
|
{
|
872
872
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
873
873
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
874
|
-
return
|
874
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
875
875
|
}
|
876
876
|
|
877
|
-
static size_t
|
877
|
+
static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
878
878
|
const void* cSrc, size_t cSrcSize,
|
879
879
|
void* workSpace, size_t wkspSize, int bmi2)
|
880
880
|
{
|
881
881
|
const BYTE* ip = (const BYTE*) cSrc;
|
882
882
|
|
883
|
-
size_t hSize =
|
883
|
+
size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
|
884
884
|
workSpace, wkspSize);
|
885
885
|
if (HUF_isError(hSize)) return hSize;
|
886
886
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
887
887
|
ip += hSize; cSrcSize -= hSize;
|
888
888
|
|
889
|
-
return
|
889
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
890
890
|
}
|
891
891
|
|
892
|
-
size_t
|
892
|
+
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
893
893
|
const void* cSrc, size_t cSrcSize,
|
894
894
|
void* workSpace, size_t wkspSize)
|
895
895
|
{
|
896
|
-
return
|
896
|
+
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
897
897
|
}
|
898
898
|
|
899
899
|
|
900
|
-
size_t
|
900
|
+
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
901
901
|
const void* cSrc, size_t cSrcSize)
|
902
902
|
{
|
903
903
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
904
|
-
return
|
904
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
905
905
|
workSpace, sizeof(workSpace));
|
906
906
|
}
|
907
907
|
|
908
|
-
size_t
|
908
|
+
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
909
909
|
{
|
910
|
-
|
911
|
-
return
|
910
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
911
|
+
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
912
912
|
}
|
913
913
|
|
914
914
|
|
915
|
-
/*
|
916
|
-
/*
|
917
|
-
/*
|
915
|
+
/* ***********************************/
|
916
|
+
/* Universal decompression selectors */
|
917
|
+
/* ***********************************/
|
918
918
|
|
919
919
|
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
920
920
|
const void* cSrc, size_t cSrcSize,
|
921
921
|
const HUF_DTable* DTable)
|
922
922
|
{
|
923
923
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
924
|
-
return dtd.tableType ?
|
925
|
-
|
924
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
925
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
926
926
|
}
|
927
927
|
|
928
928
|
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
@@ -930,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
930
930
|
const HUF_DTable* DTable)
|
931
931
|
{
|
932
932
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
933
|
-
return dtd.tableType ?
|
934
|
-
|
933
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
934
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
935
935
|
}
|
936
936
|
|
937
937
|
|
@@ -960,12 +960,12 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
|
|
960
960
|
/** HUF_selectDecoder() :
|
961
961
|
* Tells which decoder is likely to decode faster,
|
962
962
|
* based on a set of pre-computed metrics.
|
963
|
-
* @return : 0==
|
963
|
+
* @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
|
964
964
|
* Assumption : 0 < dstSize <= 128 KB */
|
965
965
|
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
966
966
|
{
|
967
967
|
assert(dstSize > 0);
|
968
|
-
assert(dstSize <= 128
|
968
|
+
assert(dstSize <= 128*1024);
|
969
969
|
/* decoder timing evaluation */
|
970
970
|
{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
971
971
|
U32 const D256 = (U32)(dstSize >> 8);
|
@@ -980,7 +980,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
|
|
980
980
|
|
981
981
|
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
982
982
|
{
|
983
|
-
static const decompressionAlgo decompress[2] = {
|
983
|
+
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
984
984
|
|
985
985
|
/* validation checks */
|
986
986
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
@@ -1002,8 +1002,8 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
|
|
1002
1002
|
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1003
1003
|
|
1004
1004
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1005
|
-
return algoNb ?
|
1006
|
-
|
1005
|
+
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
1006
|
+
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
1007
1007
|
}
|
1008
1008
|
}
|
1009
1009
|
|
@@ -1025,8 +1025,8 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
|
1025
1025
|
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1026
1026
|
|
1027
1027
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1028
|
-
return algoNb ?
|
1029
|
-
|
1028
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
|
1029
|
+
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1030
1030
|
}
|
1031
1031
|
}
|
1032
1032
|
|
@@ -1041,9 +1041,9 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1041
1041
|
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1042
1042
|
|
1043
1043
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1044
|
-
return algoNb ?
|
1044
|
+
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1045
1045
|
cSrcSize, workSpace, wkspSize):
|
1046
|
-
|
1046
|
+
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1047
1047
|
cSrcSize, workSpace, wkspSize);
|
1048
1048
|
}
|
1049
1049
|
}
|
@@ -1060,27 +1060,27 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1060
1060
|
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1061
1061
|
{
|
1062
1062
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1063
|
-
return dtd.tableType ?
|
1064
|
-
|
1063
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1064
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1065
1065
|
}
|
1066
1066
|
|
1067
|
-
size_t
|
1067
|
+
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
1068
1068
|
{
|
1069
1069
|
const BYTE* ip = (const BYTE*) cSrc;
|
1070
1070
|
|
1071
|
-
size_t const hSize =
|
1071
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
1072
1072
|
if (HUF_isError(hSize)) return hSize;
|
1073
1073
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1074
1074
|
ip += hSize; cSrcSize -= hSize;
|
1075
1075
|
|
1076
|
-
return
|
1076
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
1077
1077
|
}
|
1078
1078
|
|
1079
1079
|
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1080
1080
|
{
|
1081
1081
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1082
|
-
return dtd.tableType ?
|
1083
|
-
|
1082
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1083
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1084
1084
|
}
|
1085
1085
|
|
1086
1086
|
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
@@ -1090,7 +1090,7 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
1090
1090
|
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1091
1091
|
|
1092
1092
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1093
|
-
return algoNb ?
|
1094
|
-
|
1093
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
|
1094
|
+
HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
1095
1095
|
}
|
1096
1096
|
}
|