zstd-ruby 1.3.4.0 → 1.3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +56 -10
- data/ext/zstdruby/libzstd/README.md +4 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
- data/ext/zstdruby/libzstd/common/compiler.h +3 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -2
- data/ext/zstdruby/libzstd/common/debug.c +44 -0
- data/ext/zstdruby/libzstd/common/debug.h +123 -0
- data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
- data/ext/zstdruby/libzstd/common/fse.h +45 -41
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +34 -27
- data/ext/zstdruby/libzstd/common/pool.c +89 -32
- data/ext/zstdruby/libzstd/common/pool.h +29 -19
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
- data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
- data/ext/zstdruby/libzstd/compress/hist.c +195 -0
- data/ext/zstdruby/libzstd/compress/hist.h +92 -0
- data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
- data/ext/zstdruby/libzstd/zstd.h +137 -69
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -3
@@ -28,6 +28,13 @@ size_t ZSTD_compressBlock_btultra(
|
|
28
28
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
29
29
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
30
30
|
|
31
|
+
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
32
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
33
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
34
|
+
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
35
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
36
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
37
|
+
|
31
38
|
size_t ZSTD_compressBlock_btopt_extDict(
|
32
39
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
33
40
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
@@ -37,18 +37,17 @@
|
|
37
37
|
#define ZSTD_RESIZE_SEQPOOL 0
|
38
38
|
|
39
39
|
/* ====== Debug ====== */
|
40
|
-
#if defined(
|
40
|
+
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) && !defined(_MSC_VER)
|
41
41
|
|
42
42
|
# include <stdio.h>
|
43
43
|
# include <unistd.h>
|
44
44
|
# include <sys/times.h>
|
45
|
-
# define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
|
46
45
|
|
47
46
|
# define DEBUG_PRINTHEX(l,p,n) { \
|
48
47
|
unsigned debug_u; \
|
49
48
|
for (debug_u=0; debug_u<(n); debug_u++) \
|
50
|
-
|
51
|
-
|
49
|
+
RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
|
50
|
+
RAWLOG(l, " \n"); \
|
52
51
|
}
|
53
52
|
|
54
53
|
static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
@@ -62,7 +61,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
|
62
61
|
|
63
62
|
#define MUTEX_WAIT_TIME_DLEVEL 6
|
64
63
|
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
|
65
|
-
if (
|
64
|
+
if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
|
66
65
|
unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
|
67
66
|
ZSTD_pthread_mutex_lock(mutex); \
|
68
67
|
{ unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
|
@@ -160,6 +159,25 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
|
|
160
159
|
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
|
161
160
|
}
|
162
161
|
|
162
|
+
|
163
|
+
static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
|
164
|
+
{
|
165
|
+
unsigned const maxNbBuffers = 2*nbWorkers + 3;
|
166
|
+
if (srcBufPool==NULL) return NULL;
|
167
|
+
if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
|
168
|
+
return srcBufPool;
|
169
|
+
/* need a larger buffer pool */
|
170
|
+
{ ZSTD_customMem const cMem = srcBufPool->cMem;
|
171
|
+
size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
|
172
|
+
ZSTDMT_bufferPool* newBufPool;
|
173
|
+
ZSTDMT_freeBufferPool(srcBufPool);
|
174
|
+
newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
|
175
|
+
if (newBufPool==NULL) return newBufPool;
|
176
|
+
ZSTDMT_setBufferSize(newBufPool, bSize);
|
177
|
+
return newBufPool;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
163
181
|
/** ZSTDMT_getBuffer() :
|
164
182
|
* assumption : bufPool must be valid
|
165
183
|
* @return : a buffer, with start pointer and size
|
@@ -310,6 +328,10 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
|
|
310
328
|
ZSTDMT_freeBufferPool(seqPool);
|
311
329
|
}
|
312
330
|
|
331
|
+
static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
332
|
+
{
|
333
|
+
return ZSTDMT_expandBufferPool(pool, nbWorkers);
|
334
|
+
}
|
313
335
|
|
314
336
|
|
315
337
|
/* ===== CCtx Pool ===== */
|
@@ -355,6 +377,18 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
|
|
355
377
|
return cctxPool;
|
356
378
|
}
|
357
379
|
|
380
|
+
static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
|
381
|
+
unsigned nbWorkers)
|
382
|
+
{
|
383
|
+
if (srcPool==NULL) return NULL;
|
384
|
+
if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
|
385
|
+
/* need a larger cctx pool */
|
386
|
+
{ ZSTD_customMem const cMem = srcPool->cMem;
|
387
|
+
ZSTDMT_freeCCtxPool(srcPool);
|
388
|
+
return ZSTDMT_createCCtxPool(nbWorkers, cMem);
|
389
|
+
}
|
390
|
+
}
|
391
|
+
|
358
392
|
/* only works during initialization phase, not during compression */
|
359
393
|
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
|
360
394
|
{
|
@@ -425,12 +459,11 @@ typedef struct {
|
|
425
459
|
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
426
460
|
} serialState_t;
|
427
461
|
|
428
|
-
static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
|
462
|
+
static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
|
429
463
|
{
|
430
464
|
/* Adjust parameters */
|
431
465
|
if (params.ldmParams.enableLdm) {
|
432
466
|
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
433
|
-
params.ldmParams.windowLog = params.cParams.windowLog;
|
434
467
|
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
435
468
|
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
436
469
|
assert(params.ldmParams.hashEveryLog < 32);
|
@@ -453,7 +486,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
453
486
|
serialState->params.ldmParams.hashLog -
|
454
487
|
serialState->params.ldmParams.bucketSizeLog;
|
455
488
|
/* Size the seq pool tables */
|
456
|
-
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams,
|
489
|
+
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
457
490
|
/* Reset the window */
|
458
491
|
ZSTD_window_clear(&serialState->ldmState.window);
|
459
492
|
serialState->ldmWindow = serialState->ldmState.window;
|
@@ -473,6 +506,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
473
506
|
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
474
507
|
}
|
475
508
|
serialState->params = params;
|
509
|
+
serialState->params.jobSize = (U32)jobSize;
|
476
510
|
return 0;
|
477
511
|
}
|
478
512
|
|
@@ -514,6 +548,7 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
|
|
514
548
|
size_t error;
|
515
549
|
assert(seqStore.seq != NULL && seqStore.pos == 0 &&
|
516
550
|
seqStore.size == 0 && seqStore.capacity > 0);
|
551
|
+
assert(src.size <= serialState->params.jobSize);
|
517
552
|
ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
|
518
553
|
error = ZSTD_ldm_generateSequences(
|
519
554
|
&serialState->ldmState, &seqStore,
|
@@ -602,13 +637,6 @@ void ZSTDMT_compressionJob(void* jobDescription)
|
|
602
637
|
rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
|
603
638
|
buffer_t dstBuff = job->dstBuff;
|
604
639
|
|
605
|
-
/* Don't compute the checksum for chunks, since we compute it externally,
|
606
|
-
* but write it in the header.
|
607
|
-
*/
|
608
|
-
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
609
|
-
/* Don't run LDM for the chunks, since we handle it externally */
|
610
|
-
jobParams.ldmParams.enableLdm = 0;
|
611
|
-
|
612
640
|
/* ressources */
|
613
641
|
if (cctx==NULL) {
|
614
642
|
job->cSize = ERROR(memory_allocation);
|
@@ -622,10 +650,22 @@ void ZSTDMT_compressionJob(void* jobDescription)
|
|
622
650
|
}
|
623
651
|
job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
|
624
652
|
}
|
653
|
+
if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) {
|
654
|
+
job->cSize = ERROR(memory_allocation);
|
655
|
+
goto _endJob;
|
656
|
+
}
|
657
|
+
|
658
|
+
/* Don't compute the checksum for chunks, since we compute it externally,
|
659
|
+
* but write it in the header.
|
660
|
+
*/
|
661
|
+
if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
|
662
|
+
/* Don't run LDM for the chunks, since we handle it externally */
|
663
|
+
jobParams.ldmParams.enableLdm = 0;
|
664
|
+
|
625
665
|
|
626
666
|
/* init */
|
627
667
|
if (job->cdict) {
|
628
|
-
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
|
668
|
+
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
|
629
669
|
assert(job->firstJob); /* only allowed for first job */
|
630
670
|
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
|
631
671
|
} else { /* srcStart points at reloaded section */
|
@@ -637,6 +677,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
|
|
637
677
|
} }
|
638
678
|
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
639
679
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
680
|
+
ZSTD_dtlm_fast,
|
640
681
|
NULL, /*cdict*/
|
641
682
|
jobParams, pledgedSrcSize);
|
642
683
|
if (ZSTD_isError(initError)) {
|
@@ -745,9 +786,9 @@ struct ZSTDMT_CCtx_s {
|
|
745
786
|
ZSTD_CCtx_params params;
|
746
787
|
size_t targetSectionSize;
|
747
788
|
size_t targetPrefixSize;
|
748
|
-
|
789
|
+
int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
|
749
790
|
inBuff_t inBuff;
|
750
|
-
|
791
|
+
roundBuff_t roundBuff;
|
751
792
|
serialState_t serial;
|
752
793
|
unsigned singleBlockingThread;
|
753
794
|
unsigned jobIDMask;
|
@@ -798,6 +839,20 @@ static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_custom
|
|
798
839
|
return jobTable;
|
799
840
|
}
|
800
841
|
|
842
|
+
static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
|
843
|
+
U32 nbJobs = nbWorkers + 2;
|
844
|
+
if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
|
845
|
+
ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
|
846
|
+
mtctx->jobIDMask = 0;
|
847
|
+
mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
|
848
|
+
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
|
849
|
+
assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
|
850
|
+
mtctx->jobIDMask = nbJobs - 1;
|
851
|
+
}
|
852
|
+
return 0;
|
853
|
+
}
|
854
|
+
|
855
|
+
|
801
856
|
/* ZSTDMT_CCtxParam_setNbWorkers():
|
802
857
|
* Internal use only */
|
803
858
|
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
|
@@ -924,6 +979,8 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
|
924
979
|
if ( (value > 0) /* value==0 => automatic job size */
|
925
980
|
& (value < ZSTDMT_JOBSIZE_MIN) )
|
926
981
|
value = ZSTDMT_JOBSIZE_MIN;
|
982
|
+
if (value > ZSTDMT_JOBSIZE_MAX)
|
983
|
+
value = ZSTDMT_JOBSIZE_MAX;
|
927
984
|
params->jobSize = value;
|
928
985
|
return value;
|
929
986
|
case ZSTDMT_p_overlapSectionLog :
|
@@ -950,6 +1007,21 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
|
950
1007
|
}
|
951
1008
|
}
|
952
1009
|
|
1010
|
+
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
|
1011
|
+
{
|
1012
|
+
switch (parameter) {
|
1013
|
+
case ZSTDMT_p_jobSize:
|
1014
|
+
*value = mtctx->params.jobSize;
|
1015
|
+
break;
|
1016
|
+
case ZSTDMT_p_overlapSectionLog:
|
1017
|
+
*value = mtctx->params.overlapSizeLog;
|
1018
|
+
break;
|
1019
|
+
default:
|
1020
|
+
return ERROR(parameter_unsupported);
|
1021
|
+
}
|
1022
|
+
return 0;
|
1023
|
+
}
|
1024
|
+
|
953
1025
|
/* Sets parameters relevant to the compression job,
|
954
1026
|
* initializing others to default values. */
|
955
1027
|
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
@@ -960,11 +1032,28 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
|
960
1032
|
jobParams.cParams = params.cParams;
|
961
1033
|
jobParams.fParams = params.fParams;
|
962
1034
|
jobParams.compressionLevel = params.compressionLevel;
|
963
|
-
jobParams.disableLiteralCompression = params.disableLiteralCompression;
|
964
1035
|
|
965
1036
|
return jobParams;
|
966
1037
|
}
|
967
1038
|
|
1039
|
+
|
1040
|
+
/* ZSTDMT_resize() :
|
1041
|
+
* @return : error code if fails, 0 on success */
|
1042
|
+
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
1043
|
+
{
|
1044
|
+
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
1045
|
+
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
1046
|
+
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
1047
|
+
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
1048
|
+
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
1049
|
+
if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
|
1050
|
+
mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
|
1051
|
+
if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
|
1052
|
+
ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
|
1053
|
+
return 0;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
|
968
1057
|
/*! ZSTDMT_updateCParams_whileCompressing() :
|
969
1058
|
* Updates only a selected set of compression parameters, to remain compatible with current frame.
|
970
1059
|
* New parameters will be applied to next compression job. */
|
@@ -981,15 +1070,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
|
981
1070
|
}
|
982
1071
|
}
|
983
1072
|
|
984
|
-
/* ZSTDMT_getNbWorkers():
|
985
|
-
* @return nb threads currently active in mtctx.
|
986
|
-
* mtctx must be valid */
|
987
|
-
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
|
988
|
-
{
|
989
|
-
assert(mtctx != NULL);
|
990
|
-
return mtctx->params.nbWorkers;
|
991
|
-
}
|
992
|
-
|
993
1073
|
/* ZSTDMT_getFrameProgression():
|
994
1074
|
* tells how much data has been consumed (input) and produced (output) for current frame.
|
995
1075
|
* able to count progression inside worker threads.
|
@@ -1087,18 +1167,10 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1087
1167
|
|
1088
1168
|
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
1089
1169
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
|
1090
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
|
1170
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
|
1091
1171
|
return ERROR(memory_allocation);
|
1092
1172
|
|
1093
|
-
|
1094
|
-
U32 jobsTableSize = nbJobs;
|
1095
|
-
ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
|
1096
|
-
mtctx->jobIDMask = 0;
|
1097
|
-
mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
|
1098
|
-
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
|
1099
|
-
assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
|
1100
|
-
mtctx->jobIDMask = jobsTableSize - 1;
|
1101
|
-
}
|
1173
|
+
CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
1102
1174
|
|
1103
1175
|
{ unsigned u;
|
1104
1176
|
for (u=0; u<nbJobs; u++) {
|
@@ -1221,17 +1293,18 @@ size_t ZSTDMT_initCStream_internal(
|
|
1221
1293
|
const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
|
1222
1294
|
unsigned long long pledgedSrcSize)
|
1223
1295
|
{
|
1224
|
-
DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u
|
1225
|
-
(U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx
|
1226
|
-
|
1296
|
+
DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
|
1297
|
+
(U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
|
1298
|
+
|
1299
|
+
/* params supposed partially fully validated at this point */
|
1227
1300
|
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
|
1228
1301
|
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
|
1229
|
-
assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
|
1230
1302
|
|
1231
1303
|
/* init */
|
1232
|
-
if (params.
|
1233
|
-
|
1234
|
-
|
1304
|
+
if (params.nbWorkers != mtctx->params.nbWorkers)
|
1305
|
+
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1306
|
+
|
1307
|
+
if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1235
1308
|
if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1236
1309
|
|
1237
1310
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
@@ -1270,7 +1343,9 @@ size_t ZSTDMT_initCStream_internal(
|
|
1270
1343
|
mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
|
1271
1344
|
DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
|
1272
1345
|
mtctx->targetSectionSize = params.jobSize;
|
1273
|
-
if (mtctx->targetSectionSize
|
1346
|
+
if (mtctx->targetSectionSize == 0) {
|
1347
|
+
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1348
|
+
}
|
1274
1349
|
if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
|
1275
1350
|
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
|
1276
1351
|
DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
|
@@ -1312,7 +1387,7 @@ size_t ZSTDMT_initCStream_internal(
|
|
1312
1387
|
mtctx->allJobsCompleted = 0;
|
1313
1388
|
mtctx->consumed = 0;
|
1314
1389
|
mtctx->produced = 0;
|
1315
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
|
1390
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
|
1316
1391
|
return ERROR(memory_allocation);
|
1317
1392
|
return 0;
|
1318
1393
|
}
|
@@ -95,6 +95,11 @@ typedef enum {
|
|
95
95
|
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
96
96
|
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
|
97
97
|
|
98
|
+
/* ZSTDMT_getMTCtxParameter() :
|
99
|
+
* Query the ZSTDMT_CCtx for a parameter value.
|
100
|
+
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
101
|
+
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
|
102
|
+
|
98
103
|
|
99
104
|
/*! ZSTDMT_compressStream_generic() :
|
100
105
|
* Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
|
@@ -126,11 +131,6 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
|
|
126
131
|
* New parameters will be applied to next compression job. */
|
127
132
|
void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
|
128
133
|
|
129
|
-
/* ZSTDMT_getNbWorkers():
|
130
|
-
* @return nb threads currently active in mtctx.
|
131
|
-
* mtctx must be valid */
|
132
|
-
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
|
133
|
-
|
134
134
|
/* ZSTDMT_getFrameProgression():
|
135
135
|
* tells how much data has been consumed (input) and produced (output) for current frame.
|
136
136
|
* able to count progression inside worker threads.
|
@@ -1,6 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
|
-
|
3
|
-
|
2
|
+
huff0 huffman decoder,
|
3
|
+
part of Finite State Entropy library
|
4
|
+
Copyright (C) 2013-present, Yann Collet.
|
4
5
|
|
5
6
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
6
7
|
|
@@ -29,16 +30,15 @@
|
|
29
30
|
|
30
31
|
You can contact the author at :
|
31
32
|
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
32
|
-
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
33
33
|
****************************************************************** */
|
34
34
|
|
35
35
|
/* **************************************************************
|
36
36
|
* Dependencies
|
37
37
|
****************************************************************/
|
38
38
|
#include <string.h> /* memcpy, memset */
|
39
|
-
#include "bitstream.h" /* BIT_* */
|
40
39
|
#include "compiler.h"
|
41
|
-
#include "
|
40
|
+
#include "bitstream.h" /* BIT_* */
|
41
|
+
#include "fse.h" /* to compress headers */
|
42
42
|
#define HUF_STATIC_LINKING_ONLY
|
43
43
|
#include "huf.h"
|
44
44
|
#include "error_private.h"
|
@@ -48,7 +48,6 @@
|
|
48
48
|
* Error Management
|
49
49
|
****************************************************************/
|
50
50
|
#define HUF_isError ERR_isError
|
51
|
-
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
52
51
|
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
53
52
|
|
54
53
|
|
@@ -75,15 +74,15 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
75
74
|
/*-***************************/
|
76
75
|
/* single-symbol decoding */
|
77
76
|
/*-***************************/
|
78
|
-
typedef struct { BYTE byte; BYTE nbBits; }
|
77
|
+
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
|
79
78
|
|
80
|
-
size_t
|
79
|
+
size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
81
80
|
{
|
82
81
|
U32 tableLog = 0;
|
83
82
|
U32 nbSymbols = 0;
|
84
83
|
size_t iSize;
|
85
84
|
void* const dtPtr = DTable + 1;
|
86
|
-
|
85
|
+
HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
|
87
86
|
|
88
87
|
U32* rankVal;
|
89
88
|
BYTE* huffWeight;
|
@@ -96,7 +95,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
96
95
|
|
97
96
|
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
98
97
|
|
99
|
-
|
98
|
+
DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
100
99
|
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
101
100
|
|
102
101
|
iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
@@ -124,7 +123,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
124
123
|
U32 const w = huffWeight[n];
|
125
124
|
U32 const length = (1 << w) >> 1;
|
126
125
|
U32 u;
|
127
|
-
|
126
|
+
HUF_DEltX1 D;
|
128
127
|
D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
|
129
128
|
for (u = rankVal[w]; u < rankVal[w] + length; u++)
|
130
129
|
dt[u] = D;
|
@@ -134,17 +133,15 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
134
133
|
return iSize;
|
135
134
|
}
|
136
135
|
|
137
|
-
size_t
|
136
|
+
size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
|
138
137
|
{
|
139
138
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
140
|
-
return
|
139
|
+
return HUF_readDTableX1_wksp(DTable, src, srcSize,
|
141
140
|
workSpace, sizeof(workSpace));
|
142
141
|
}
|
143
142
|
|
144
|
-
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
|
145
|
-
|
146
143
|
FORCE_INLINE_TEMPLATE BYTE
|
147
|
-
|
144
|
+
HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
|
148
145
|
{
|
149
146
|
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
|
150
147
|
BYTE const c = dt[val].byte;
|
@@ -152,44 +149,44 @@ HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog
|
|
152
149
|
return c;
|
153
150
|
}
|
154
151
|
|
155
|
-
#define
|
156
|
-
*ptr++ =
|
152
|
+
#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
|
153
|
+
*ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
|
157
154
|
|
158
|
-
#define
|
155
|
+
#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
|
159
156
|
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
160
|
-
|
157
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
|
161
158
|
|
162
|
-
#define
|
159
|
+
#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
|
163
160
|
if (MEM_64bits()) \
|
164
|
-
|
161
|
+
HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
|
165
162
|
|
166
163
|
HINT_INLINE size_t
|
167
|
-
|
164
|
+
HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
|
168
165
|
{
|
169
166
|
BYTE* const pStart = p;
|
170
167
|
|
171
168
|
/* up to 4 symbols at a time */
|
172
169
|
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
170
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
171
|
+
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
|
172
|
+
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
|
173
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
177
174
|
}
|
178
175
|
|
179
176
|
/* [0-3] symbols remaining */
|
180
177
|
if (MEM_32bits())
|
181
178
|
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
|
182
|
-
|
179
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
183
180
|
|
184
181
|
/* no more data to retrieve from bitstream, no need to reload */
|
185
182
|
while (p < pEnd)
|
186
|
-
|
183
|
+
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
|
187
184
|
|
188
185
|
return pEnd-pStart;
|
189
186
|
}
|
190
187
|
|
191
188
|
FORCE_INLINE_TEMPLATE size_t
|
192
|
-
|
189
|
+
HUF_decompress1X1_usingDTable_internal_body(
|
193
190
|
void* dst, size_t dstSize,
|
194
191
|
const void* cSrc, size_t cSrcSize,
|
195
192
|
const HUF_DTable* DTable)
|
@@ -197,14 +194,14 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
197
194
|
BYTE* op = (BYTE*)dst;
|
198
195
|
BYTE* const oend = op + dstSize;
|
199
196
|
const void* dtPtr = DTable + 1;
|
200
|
-
const
|
197
|
+
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
201
198
|
BIT_DStream_t bitD;
|
202
199
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
203
200
|
U32 const dtLog = dtd.tableLog;
|
204
201
|
|
205
202
|
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
206
203
|
|
207
|
-
|
204
|
+
HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
|
208
205
|
|
209
206
|
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
210
207
|
|
@@ -212,7 +209,7 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
212
209
|
}
|
213
210
|
|
214
211
|
FORCE_INLINE_TEMPLATE size_t
|
215
|
-
|
212
|
+
HUF_decompress4X1_usingDTable_internal_body(
|
216
213
|
void* dst, size_t dstSize,
|
217
214
|
const void* cSrc, size_t cSrcSize,
|
218
215
|
const HUF_DTable* DTable)
|
@@ -224,7 +221,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
224
221
|
BYTE* const ostart = (BYTE*) dst;
|
225
222
|
BYTE* const oend = ostart + dstSize;
|
226
223
|
const void* const dtPtr = DTable + 1;
|
227
|
-
const
|
224
|
+
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
228
225
|
|
229
226
|
/* Init */
|
230
227
|
BIT_DStream_t bitD1;
|
@@ -260,22 +257,22 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
260
257
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
261
258
|
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
262
259
|
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
260
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
261
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
262
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
263
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
264
|
+
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
|
265
|
+
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
|
266
|
+
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
|
267
|
+
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
|
268
|
+
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
269
|
+
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
270
|
+
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
271
|
+
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
|
272
|
+
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
|
273
|
+
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
274
|
+
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
275
|
+
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
279
276
|
BIT_reloadDStream(&bitD1);
|
280
277
|
BIT_reloadDStream(&bitD2);
|
281
278
|
BIT_reloadDStream(&bitD3);
|
@@ -291,191 +288,10 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
291
288
|
/* note : op4 supposed already verified within main loop */
|
292
289
|
|
293
290
|
/* finish bitStreams one by one */
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
/* check */
|
300
|
-
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
301
|
-
if (!endCheck) return ERROR(corruption_detected); }
|
302
|
-
|
303
|
-
/* decoded size */
|
304
|
-
return dstSize;
|
305
|
-
}
|
306
|
-
}
|
307
|
-
|
308
|
-
|
309
|
-
FORCE_INLINE_TEMPLATE U32
|
310
|
-
HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
311
|
-
{
|
312
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
313
|
-
memcpy(op, dt+val, 2);
|
314
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
315
|
-
return dt[val].length;
|
316
|
-
}
|
317
|
-
|
318
|
-
FORCE_INLINE_TEMPLATE U32
|
319
|
-
HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
320
|
-
{
|
321
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
322
|
-
memcpy(op, dt+val, 1);
|
323
|
-
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
324
|
-
else {
|
325
|
-
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
326
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
327
|
-
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
328
|
-
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
329
|
-
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
330
|
-
} }
|
331
|
-
return 1;
|
332
|
-
}
|
333
|
-
|
334
|
-
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
|
335
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
336
|
-
|
337
|
-
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
|
338
|
-
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
339
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
340
|
-
|
341
|
-
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
|
342
|
-
if (MEM_64bits()) \
|
343
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
344
|
-
|
345
|
-
HINT_INLINE size_t
|
346
|
-
HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
347
|
-
const HUF_DEltX4* const dt, const U32 dtLog)
|
348
|
-
{
|
349
|
-
BYTE* const pStart = p;
|
350
|
-
|
351
|
-
/* up to 8 symbols at a time */
|
352
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
353
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
354
|
-
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
|
355
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
356
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
357
|
-
}
|
358
|
-
|
359
|
-
/* closer to end : up to 2 symbols at a time */
|
360
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
361
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
362
|
-
|
363
|
-
while (p <= pEnd-2)
|
364
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
365
|
-
|
366
|
-
if (p < pEnd)
|
367
|
-
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
|
368
|
-
|
369
|
-
return p-pStart;
|
370
|
-
}
|
371
|
-
|
372
|
-
FORCE_INLINE_TEMPLATE size_t
|
373
|
-
HUF_decompress1X4_usingDTable_internal_body(
|
374
|
-
void* dst, size_t dstSize,
|
375
|
-
const void* cSrc, size_t cSrcSize,
|
376
|
-
const HUF_DTable* DTable)
|
377
|
-
{
|
378
|
-
BIT_DStream_t bitD;
|
379
|
-
|
380
|
-
/* Init */
|
381
|
-
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
382
|
-
|
383
|
-
/* decode */
|
384
|
-
{ BYTE* const ostart = (BYTE*) dst;
|
385
|
-
BYTE* const oend = ostart + dstSize;
|
386
|
-
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
387
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
388
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
389
|
-
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
|
390
|
-
}
|
391
|
-
|
392
|
-
/* check */
|
393
|
-
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
394
|
-
|
395
|
-
/* decoded size */
|
396
|
-
return dstSize;
|
397
|
-
}
|
398
|
-
|
399
|
-
|
400
|
-
FORCE_INLINE_TEMPLATE size_t
|
401
|
-
HUF_decompress4X4_usingDTable_internal_body(
|
402
|
-
void* dst, size_t dstSize,
|
403
|
-
const void* cSrc, size_t cSrcSize,
|
404
|
-
const HUF_DTable* DTable)
|
405
|
-
{
|
406
|
-
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
407
|
-
|
408
|
-
{ const BYTE* const istart = (const BYTE*) cSrc;
|
409
|
-
BYTE* const ostart = (BYTE*) dst;
|
410
|
-
BYTE* const oend = ostart + dstSize;
|
411
|
-
const void* const dtPtr = DTable+1;
|
412
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
413
|
-
|
414
|
-
/* Init */
|
415
|
-
BIT_DStream_t bitD1;
|
416
|
-
BIT_DStream_t bitD2;
|
417
|
-
BIT_DStream_t bitD3;
|
418
|
-
BIT_DStream_t bitD4;
|
419
|
-
size_t const length1 = MEM_readLE16(istart);
|
420
|
-
size_t const length2 = MEM_readLE16(istart+2);
|
421
|
-
size_t const length3 = MEM_readLE16(istart+4);
|
422
|
-
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
423
|
-
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
424
|
-
const BYTE* const istart2 = istart1 + length1;
|
425
|
-
const BYTE* const istart3 = istart2 + length2;
|
426
|
-
const BYTE* const istart4 = istart3 + length3;
|
427
|
-
size_t const segmentSize = (dstSize+3) / 4;
|
428
|
-
BYTE* const opStart2 = ostart + segmentSize;
|
429
|
-
BYTE* const opStart3 = opStart2 + segmentSize;
|
430
|
-
BYTE* const opStart4 = opStart3 + segmentSize;
|
431
|
-
BYTE* op1 = ostart;
|
432
|
-
BYTE* op2 = opStart2;
|
433
|
-
BYTE* op3 = opStart3;
|
434
|
-
BYTE* op4 = opStart4;
|
435
|
-
U32 endSignal;
|
436
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
437
|
-
U32 const dtLog = dtd.tableLog;
|
438
|
-
|
439
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
440
|
-
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
441
|
-
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
442
|
-
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
443
|
-
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
444
|
-
|
445
|
-
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
446
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
447
|
-
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
448
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
449
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
450
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
451
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
452
|
-
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
|
453
|
-
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
|
454
|
-
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
|
455
|
-
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
|
456
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
457
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
458
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
459
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
460
|
-
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
|
461
|
-
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
|
462
|
-
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
|
463
|
-
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
|
464
|
-
|
465
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
466
|
-
}
|
467
|
-
|
468
|
-
/* check corruption */
|
469
|
-
if (op1 > opStart2) return ERROR(corruption_detected);
|
470
|
-
if (op2 > opStart3) return ERROR(corruption_detected);
|
471
|
-
if (op3 > opStart4) return ERROR(corruption_detected);
|
472
|
-
/* note : op4 already verified within main loop */
|
473
|
-
|
474
|
-
/* finish bitStreams one by one */
|
475
|
-
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
|
476
|
-
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
|
477
|
-
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
|
478
|
-
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
|
291
|
+
HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
|
292
|
+
HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
|
293
|
+
HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
|
294
|
+
HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
|
479
295
|
|
480
296
|
/* check */
|
481
297
|
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
@@ -493,7 +309,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
493
309
|
const HUF_DTable *DTable);
|
494
310
|
#if DYNAMIC_BMI2
|
495
311
|
|
496
|
-
#define
|
312
|
+
#define HUF_DGEN(fn) \
|
497
313
|
\
|
498
314
|
static size_t fn##_default( \
|
499
315
|
void* dst, size_t dstSize, \
|
@@ -522,7 +338,7 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
522
338
|
|
523
339
|
#else
|
524
340
|
|
525
|
-
#define
|
341
|
+
#define HUF_DGEN(fn) \
|
526
342
|
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
527
343
|
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
528
344
|
{ \
|
@@ -532,112 +348,114 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
|
532
348
|
|
533
349
|
#endif
|
534
350
|
|
535
|
-
|
536
|
-
|
537
|
-
X(HUF_decompress1X4_usingDTable_internal)
|
538
|
-
X(HUF_decompress4X4_usingDTable_internal)
|
351
|
+
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
352
|
+
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
539
353
|
|
540
|
-
#undef X
|
541
354
|
|
542
355
|
|
543
|
-
size_t
|
356
|
+
size_t HUF_decompress1X1_usingDTable(
|
544
357
|
void* dst, size_t dstSize,
|
545
358
|
const void* cSrc, size_t cSrcSize,
|
546
359
|
const HUF_DTable* DTable)
|
547
360
|
{
|
548
361
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
549
362
|
if (dtd.tableType != 0) return ERROR(GENERIC);
|
550
|
-
return
|
363
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
551
364
|
}
|
552
365
|
|
553
|
-
size_t
|
366
|
+
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
554
367
|
const void* cSrc, size_t cSrcSize,
|
555
368
|
void* workSpace, size_t wkspSize)
|
556
369
|
{
|
557
370
|
const BYTE* ip = (const BYTE*) cSrc;
|
558
371
|
|
559
|
-
size_t const hSize =
|
372
|
+
size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
560
373
|
if (HUF_isError(hSize)) return hSize;
|
561
374
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
562
375
|
ip += hSize; cSrcSize -= hSize;
|
563
376
|
|
564
|
-
return
|
377
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
565
378
|
}
|
566
379
|
|
567
380
|
|
568
|
-
size_t
|
381
|
+
size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
569
382
|
const void* cSrc, size_t cSrcSize)
|
570
383
|
{
|
571
384
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
572
|
-
return
|
385
|
+
return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
573
386
|
workSpace, sizeof(workSpace));
|
574
387
|
}
|
575
388
|
|
576
|
-
size_t
|
389
|
+
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
577
390
|
{
|
578
|
-
|
579
|
-
return
|
391
|
+
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
392
|
+
return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
580
393
|
}
|
581
394
|
|
582
|
-
size_t
|
395
|
+
size_t HUF_decompress4X1_usingDTable(
|
583
396
|
void* dst, size_t dstSize,
|
584
397
|
const void* cSrc, size_t cSrcSize,
|
585
398
|
const HUF_DTable* DTable)
|
586
399
|
{
|
587
400
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
588
401
|
if (dtd.tableType != 0) return ERROR(GENERIC);
|
589
|
-
return
|
402
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
590
403
|
}
|
591
404
|
|
592
|
-
static size_t
|
405
|
+
static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
593
406
|
const void* cSrc, size_t cSrcSize,
|
594
407
|
void* workSpace, size_t wkspSize, int bmi2)
|
595
408
|
{
|
596
409
|
const BYTE* ip = (const BYTE*) cSrc;
|
597
410
|
|
598
|
-
size_t const hSize =
|
411
|
+
size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
|
599
412
|
workSpace, wkspSize);
|
600
413
|
if (HUF_isError(hSize)) return hSize;
|
601
414
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
602
415
|
ip += hSize; cSrcSize -= hSize;
|
603
416
|
|
604
|
-
return
|
417
|
+
return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
605
418
|
}
|
606
419
|
|
607
|
-
size_t
|
420
|
+
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
608
421
|
const void* cSrc, size_t cSrcSize,
|
609
422
|
void* workSpace, size_t wkspSize)
|
610
423
|
{
|
611
|
-
return
|
424
|
+
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
612
425
|
}
|
613
426
|
|
614
427
|
|
615
|
-
size_t
|
428
|
+
size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
616
429
|
{
|
617
430
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
618
|
-
return
|
431
|
+
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
619
432
|
workSpace, sizeof(workSpace));
|
620
433
|
}
|
621
|
-
size_t
|
434
|
+
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
622
435
|
{
|
623
|
-
|
624
|
-
return
|
436
|
+
HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
|
437
|
+
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
625
438
|
}
|
626
439
|
|
627
440
|
|
628
441
|
/* *************************/
|
629
442
|
/* double-symbols decoding */
|
630
443
|
/* *************************/
|
444
|
+
|
445
|
+
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
|
631
446
|
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
|
447
|
+
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
448
|
+
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
632
449
|
|
633
|
-
|
450
|
+
|
451
|
+
/* HUF_fillDTableX2Level2() :
|
634
452
|
* `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
|
635
|
-
static void
|
453
|
+
static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
|
636
454
|
const U32* rankValOrigin, const int minWeight,
|
637
455
|
const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
|
638
456
|
U32 nbBitsBaseline, U16 baseSeq)
|
639
457
|
{
|
640
|
-
|
458
|
+
HUF_DEltX2 DElt;
|
641
459
|
U32 rankVal[HUF_TABLELOG_MAX + 1];
|
642
460
|
|
643
461
|
/* get pre-calculated rankVal */
|
@@ -672,10 +490,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
|
|
672
490
|
} }
|
673
491
|
}
|
674
492
|
|
675
|
-
typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
|
676
|
-
typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
|
677
493
|
|
678
|
-
static void
|
494
|
+
static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
|
679
495
|
const sortedSymbol_t* sortedList, const U32 sortedListSize,
|
680
496
|
const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
|
681
497
|
const U32 nbBitsBaseline)
|
@@ -700,12 +516,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
|
|
700
516
|
int minWeight = nbBits + scaleLog;
|
701
517
|
if (minWeight < 1) minWeight = 1;
|
702
518
|
sortedRank = rankStart[minWeight];
|
703
|
-
|
519
|
+
HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
|
704
520
|
rankValOrigin[nbBits], minWeight,
|
705
521
|
sortedList+sortedRank, sortedListSize-sortedRank,
|
706
522
|
nbBitsBaseline, symbol);
|
707
523
|
} else {
|
708
|
-
|
524
|
+
HUF_DEltX2 DElt;
|
709
525
|
MEM_writeLE16(&(DElt.sequence), symbol);
|
710
526
|
DElt.nbBits = (BYTE)(nbBits);
|
711
527
|
DElt.length = 1;
|
@@ -717,7 +533,7 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
|
|
717
533
|
}
|
718
534
|
}
|
719
535
|
|
720
|
-
size_t
|
536
|
+
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src,
|
721
537
|
size_t srcSize, void* workSpace,
|
722
538
|
size_t wkspSize)
|
723
539
|
{
|
@@ -726,7 +542,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
726
542
|
U32 const maxTableLog = dtd.maxTableLog;
|
727
543
|
size_t iSize;
|
728
544
|
void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
|
729
|
-
|
545
|
+
HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
|
730
546
|
U32 *rankStart;
|
731
547
|
|
732
548
|
rankValCol_t* rankVal;
|
@@ -752,7 +568,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
752
568
|
rankStart = rankStart0 + 1;
|
753
569
|
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
754
570
|
|
755
|
-
|
571
|
+
DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
|
756
572
|
if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
757
573
|
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
758
574
|
|
@@ -806,7 +622,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
806
622
|
rankValPtr[w] = rankVal0[w] >> consumed;
|
807
623
|
} } } }
|
808
624
|
|
809
|
-
|
625
|
+
HUF_fillDTableX2(dt, maxTableLog,
|
810
626
|
sortedSymbol, sizeOfSort,
|
811
627
|
rankStart0, rankVal, maxW,
|
812
628
|
tableLog+1);
|
@@ -817,112 +633,296 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
817
633
|
return iSize;
|
818
634
|
}
|
819
635
|
|
820
|
-
size_t
|
636
|
+
size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
821
637
|
{
|
822
638
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
823
|
-
return
|
639
|
+
return HUF_readDTableX2_wksp(DTable, src, srcSize,
|
824
640
|
workSpace, sizeof(workSpace));
|
825
641
|
}
|
826
642
|
|
827
|
-
|
643
|
+
|
644
|
+
FORCE_INLINE_TEMPLATE U32
|
645
|
+
HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
646
|
+
{
|
647
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
648
|
+
memcpy(op, dt+val, 2);
|
649
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
650
|
+
return dt[val].length;
|
651
|
+
}
|
652
|
+
|
653
|
+
FORCE_INLINE_TEMPLATE U32
|
654
|
+
HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
|
655
|
+
{
|
656
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
657
|
+
memcpy(op, dt+val, 1);
|
658
|
+
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
659
|
+
else {
|
660
|
+
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
661
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
662
|
+
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
663
|
+
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
664
|
+
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
665
|
+
} }
|
666
|
+
return 1;
|
667
|
+
}
|
668
|
+
|
669
|
+
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
|
670
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
671
|
+
|
672
|
+
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
|
673
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
674
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
675
|
+
|
676
|
+
#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
|
677
|
+
if (MEM_64bits()) \
|
678
|
+
ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
|
679
|
+
|
680
|
+
HINT_INLINE size_t
|
681
|
+
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
682
|
+
const HUF_DEltX2* const dt, const U32 dtLog)
|
683
|
+
{
|
684
|
+
BYTE* const pStart = p;
|
685
|
+
|
686
|
+
/* up to 8 symbols at a time */
|
687
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
688
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
689
|
+
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
690
|
+
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
691
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
692
|
+
}
|
693
|
+
|
694
|
+
/* closer to end : up to 2 symbols at a time */
|
695
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
696
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
697
|
+
|
698
|
+
while (p <= pEnd-2)
|
699
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
700
|
+
|
701
|
+
if (p < pEnd)
|
702
|
+
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
|
703
|
+
|
704
|
+
return p-pStart;
|
705
|
+
}
|
706
|
+
|
707
|
+
FORCE_INLINE_TEMPLATE size_t
|
708
|
+
HUF_decompress1X2_usingDTable_internal_body(
|
709
|
+
void* dst, size_t dstSize,
|
710
|
+
const void* cSrc, size_t cSrcSize,
|
711
|
+
const HUF_DTable* DTable)
|
712
|
+
{
|
713
|
+
BIT_DStream_t bitD;
|
714
|
+
|
715
|
+
/* Init */
|
716
|
+
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
717
|
+
|
718
|
+
/* decode */
|
719
|
+
{ BYTE* const ostart = (BYTE*) dst;
|
720
|
+
BYTE* const oend = ostart + dstSize;
|
721
|
+
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
722
|
+
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
723
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
724
|
+
HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
|
725
|
+
}
|
726
|
+
|
727
|
+
/* check */
|
728
|
+
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
729
|
+
|
730
|
+
/* decoded size */
|
731
|
+
return dstSize;
|
732
|
+
}
|
733
|
+
|
734
|
+
|
735
|
+
FORCE_INLINE_TEMPLATE size_t
|
736
|
+
HUF_decompress4X2_usingDTable_internal_body(
|
737
|
+
void* dst, size_t dstSize,
|
738
|
+
const void* cSrc, size_t cSrcSize,
|
739
|
+
const HUF_DTable* DTable)
|
740
|
+
{
|
741
|
+
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
742
|
+
|
743
|
+
{ const BYTE* const istart = (const BYTE*) cSrc;
|
744
|
+
BYTE* const ostart = (BYTE*) dst;
|
745
|
+
BYTE* const oend = ostart + dstSize;
|
746
|
+
const void* const dtPtr = DTable+1;
|
747
|
+
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
748
|
+
|
749
|
+
/* Init */
|
750
|
+
BIT_DStream_t bitD1;
|
751
|
+
BIT_DStream_t bitD2;
|
752
|
+
BIT_DStream_t bitD3;
|
753
|
+
BIT_DStream_t bitD4;
|
754
|
+
size_t const length1 = MEM_readLE16(istart);
|
755
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
756
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
757
|
+
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
758
|
+
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
759
|
+
const BYTE* const istart2 = istart1 + length1;
|
760
|
+
const BYTE* const istart3 = istart2 + length2;
|
761
|
+
const BYTE* const istart4 = istart3 + length3;
|
762
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
763
|
+
BYTE* const opStart2 = ostart + segmentSize;
|
764
|
+
BYTE* const opStart3 = opStart2 + segmentSize;
|
765
|
+
BYTE* const opStart4 = opStart3 + segmentSize;
|
766
|
+
BYTE* op1 = ostart;
|
767
|
+
BYTE* op2 = opStart2;
|
768
|
+
BYTE* op3 = opStart3;
|
769
|
+
BYTE* op4 = opStart4;
|
770
|
+
U32 endSignal;
|
771
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
772
|
+
U32 const dtLog = dtd.tableLog;
|
773
|
+
|
774
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
775
|
+
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
776
|
+
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
777
|
+
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
778
|
+
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
779
|
+
|
780
|
+
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
781
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
782
|
+
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
783
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
784
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
785
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
786
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
787
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
788
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
789
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
790
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
791
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
792
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
793
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
794
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
795
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
796
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
797
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
798
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
799
|
+
|
800
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
801
|
+
}
|
802
|
+
|
803
|
+
/* check corruption */
|
804
|
+
if (op1 > opStart2) return ERROR(corruption_detected);
|
805
|
+
if (op2 > opStart3) return ERROR(corruption_detected);
|
806
|
+
if (op3 > opStart4) return ERROR(corruption_detected);
|
807
|
+
/* note : op4 already verified within main loop */
|
808
|
+
|
809
|
+
/* finish bitStreams one by one */
|
810
|
+
HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
|
811
|
+
HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
|
812
|
+
HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
|
813
|
+
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
814
|
+
|
815
|
+
/* check */
|
816
|
+
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
817
|
+
if (!endCheck) return ERROR(corruption_detected); }
|
818
|
+
|
819
|
+
/* decoded size */
|
820
|
+
return dstSize;
|
821
|
+
}
|
822
|
+
}
|
823
|
+
|
824
|
+
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
825
|
+
HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
|
826
|
+
|
827
|
+
size_t HUF_decompress1X2_usingDTable(
|
828
828
|
void* dst, size_t dstSize,
|
829
829
|
const void* cSrc, size_t cSrcSize,
|
830
830
|
const HUF_DTable* DTable)
|
831
831
|
{
|
832
832
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
833
833
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
834
|
-
return
|
834
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
835
835
|
}
|
836
836
|
|
837
|
-
size_t
|
837
|
+
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
838
838
|
const void* cSrc, size_t cSrcSize,
|
839
839
|
void* workSpace, size_t wkspSize)
|
840
840
|
{
|
841
841
|
const BYTE* ip = (const BYTE*) cSrc;
|
842
842
|
|
843
|
-
size_t const hSize =
|
843
|
+
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
|
844
844
|
workSpace, wkspSize);
|
845
845
|
if (HUF_isError(hSize)) return hSize;
|
846
846
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
847
847
|
ip += hSize; cSrcSize -= hSize;
|
848
848
|
|
849
|
-
return
|
849
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
850
850
|
}
|
851
851
|
|
852
852
|
|
853
|
-
size_t
|
853
|
+
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
854
854
|
const void* cSrc, size_t cSrcSize)
|
855
855
|
{
|
856
856
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
857
|
-
return
|
857
|
+
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
858
858
|
workSpace, sizeof(workSpace));
|
859
859
|
}
|
860
860
|
|
861
|
-
size_t
|
861
|
+
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
862
862
|
{
|
863
|
-
|
864
|
-
return
|
863
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
864
|
+
return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
865
865
|
}
|
866
866
|
|
867
|
-
size_t
|
867
|
+
size_t HUF_decompress4X2_usingDTable(
|
868
868
|
void* dst, size_t dstSize,
|
869
869
|
const void* cSrc, size_t cSrcSize,
|
870
870
|
const HUF_DTable* DTable)
|
871
871
|
{
|
872
872
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
873
873
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
874
|
-
return
|
874
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
875
875
|
}
|
876
876
|
|
877
|
-
static size_t
|
877
|
+
static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
878
878
|
const void* cSrc, size_t cSrcSize,
|
879
879
|
void* workSpace, size_t wkspSize, int bmi2)
|
880
880
|
{
|
881
881
|
const BYTE* ip = (const BYTE*) cSrc;
|
882
882
|
|
883
|
-
size_t hSize =
|
883
|
+
size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
|
884
884
|
workSpace, wkspSize);
|
885
885
|
if (HUF_isError(hSize)) return hSize;
|
886
886
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
887
887
|
ip += hSize; cSrcSize -= hSize;
|
888
888
|
|
889
|
-
return
|
889
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
890
890
|
}
|
891
891
|
|
892
|
-
size_t
|
892
|
+
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
893
893
|
const void* cSrc, size_t cSrcSize,
|
894
894
|
void* workSpace, size_t wkspSize)
|
895
895
|
{
|
896
|
-
return
|
896
|
+
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
897
897
|
}
|
898
898
|
|
899
899
|
|
900
|
-
size_t
|
900
|
+
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
901
901
|
const void* cSrc, size_t cSrcSize)
|
902
902
|
{
|
903
903
|
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
904
|
-
return
|
904
|
+
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
905
905
|
workSpace, sizeof(workSpace));
|
906
906
|
}
|
907
907
|
|
908
|
-
size_t
|
908
|
+
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
909
909
|
{
|
910
|
-
|
911
|
-
return
|
910
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
911
|
+
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
912
912
|
}
|
913
913
|
|
914
914
|
|
915
|
-
/*
|
916
|
-
/*
|
917
|
-
/*
|
915
|
+
/* ***********************************/
|
916
|
+
/* Universal decompression selectors */
|
917
|
+
/* ***********************************/
|
918
918
|
|
919
919
|
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
920
920
|
const void* cSrc, size_t cSrcSize,
|
921
921
|
const HUF_DTable* DTable)
|
922
922
|
{
|
923
923
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
924
|
-
return dtd.tableType ?
|
925
|
-
|
924
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
925
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
926
926
|
}
|
927
927
|
|
928
928
|
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
@@ -930,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
930
930
|
const HUF_DTable* DTable)
|
931
931
|
{
|
932
932
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
933
|
-
return dtd.tableType ?
|
934
|
-
|
933
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
934
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
935
935
|
}
|
936
936
|
|
937
937
|
|
@@ -960,12 +960,12 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
|
|
960
960
|
/** HUF_selectDecoder() :
|
961
961
|
* Tells which decoder is likely to decode faster,
|
962
962
|
* based on a set of pre-computed metrics.
|
963
|
-
* @return : 0==
|
963
|
+
* @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
|
964
964
|
* Assumption : 0 < dstSize <= 128 KB */
|
965
965
|
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
966
966
|
{
|
967
967
|
assert(dstSize > 0);
|
968
|
-
assert(dstSize <= 128
|
968
|
+
assert(dstSize <= 128*1024);
|
969
969
|
/* decoder timing evaluation */
|
970
970
|
{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
971
971
|
U32 const D256 = (U32)(dstSize >> 8);
|
@@ -980,7 +980,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
|
|
980
980
|
|
981
981
|
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
982
982
|
{
|
983
|
-
static const decompressionAlgo decompress[2] = {
|
983
|
+
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
984
984
|
|
985
985
|
/* validation checks */
|
986
986
|
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
@@ -1002,8 +1002,8 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
|
|
1002
1002
|
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1003
1003
|
|
1004
1004
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1005
|
-
return algoNb ?
|
1006
|
-
|
1005
|
+
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
1006
|
+
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
1007
1007
|
}
|
1008
1008
|
}
|
1009
1009
|
|
@@ -1025,8 +1025,8 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
|
1025
1025
|
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1026
1026
|
|
1027
1027
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1028
|
-
return algoNb ?
|
1029
|
-
|
1028
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
|
1029
|
+
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
1030
1030
|
}
|
1031
1031
|
}
|
1032
1032
|
|
@@ -1041,9 +1041,9 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1041
1041
|
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
1042
1042
|
|
1043
1043
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1044
|
-
return algoNb ?
|
1044
|
+
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1045
1045
|
cSrcSize, workSpace, wkspSize):
|
1046
|
-
|
1046
|
+
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
1047
1047
|
cSrcSize, workSpace, wkspSize);
|
1048
1048
|
}
|
1049
1049
|
}
|
@@ -1060,27 +1060,27 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
1060
1060
|
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1061
1061
|
{
|
1062
1062
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1063
|
-
return dtd.tableType ?
|
1064
|
-
|
1063
|
+
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1064
|
+
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1065
1065
|
}
|
1066
1066
|
|
1067
|
-
size_t
|
1067
|
+
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
1068
1068
|
{
|
1069
1069
|
const BYTE* ip = (const BYTE*) cSrc;
|
1070
1070
|
|
1071
|
-
size_t const hSize =
|
1071
|
+
size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
1072
1072
|
if (HUF_isError(hSize)) return hSize;
|
1073
1073
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1074
1074
|
ip += hSize; cSrcSize -= hSize;
|
1075
1075
|
|
1076
|
-
return
|
1076
|
+
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
1077
1077
|
}
|
1078
1078
|
|
1079
1079
|
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1080
1080
|
{
|
1081
1081
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1082
|
-
return dtd.tableType ?
|
1083
|
-
|
1082
|
+
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1083
|
+
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1084
1084
|
}
|
1085
1085
|
|
1086
1086
|
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
@@ -1090,7 +1090,7 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
|
1090
1090
|
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1091
1091
|
|
1092
1092
|
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1093
|
-
return algoNb ?
|
1094
|
-
|
1093
|
+
return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
|
1094
|
+
HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
1095
1095
|
}
|
1096
1096
|
}
|