zstdlib 0.2.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +30 -1
- data/README.md +2 -2
- data/Rakefile +1 -1
- data/ext/zstdlib/extconf.rb +3 -3
- data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/bitstream.h +38 -39
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/compiler.h +40 -5
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/debug.h +11 -31
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.c +2 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.h +6 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse.h +12 -32
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -35
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/huf.h +15 -33
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/mem.h +75 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.c +8 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.c +50 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.h +36 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.c +23 -35
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_internal.h +154 -26
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.c +15 -35
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.h +12 -32
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress.c +1191 -1330
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +317 -55
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.c +419 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_cwksp.h +525 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +65 -43
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.c +92 -66
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.c +74 -42
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.c +32 -10
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.c +81 -114
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +95 -51
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +3 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -60
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +292 -172
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +459 -338
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/zstd.h +265 -88
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzlib.c +9 -9
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzread.c +16 -8
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzwrite.c +8 -8
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +16 -12
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- metadata +69 -62
- data/ext/zstdlib/zstd-1.4.0/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -22,9 +22,9 @@
|
|
22
22
|
/* ====== Dependencies ====== */
|
23
23
|
#include <string.h> /* memcpy, memset */
|
24
24
|
#include <limits.h> /* INT_MAX, UINT_MAX */
|
25
|
-
#include "mem.h" /* MEM_STATIC */
|
26
|
-
#include "pool.h" /* threadpool */
|
27
|
-
#include "threading.h" /* mutex */
|
25
|
+
#include "../common/mem.h" /* MEM_STATIC */
|
26
|
+
#include "../common/pool.h" /* threadpool */
|
27
|
+
#include "../common/threading.h" /* mutex */
|
28
28
|
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
29
29
|
#include "zstd_ldm.h"
|
30
30
|
#include "zstdmt_compress.h"
|
@@ -461,7 +461,13 @@ typedef struct {
|
|
461
461
|
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
462
462
|
} serialState_t;
|
463
463
|
|
464
|
-
static int
|
464
|
+
static int
|
465
|
+
ZSTDMT_serialState_reset(serialState_t* serialState,
|
466
|
+
ZSTDMT_seqPool* seqPool,
|
467
|
+
ZSTD_CCtx_params params,
|
468
|
+
size_t jobSize,
|
469
|
+
const void* dict, size_t const dictSize,
|
470
|
+
ZSTD_dictContentType_e dictContentType)
|
465
471
|
{
|
466
472
|
/* Adjust parameters */
|
467
473
|
if (params.ldmParams.enableLdm) {
|
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
490
496
|
/* Size the seq pool tables */
|
491
497
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
492
498
|
/* Reset the window */
|
493
|
-
|
494
|
-
serialState->ldmWindow = serialState->ldmState.window;
|
499
|
+
ZSTD_window_init(&serialState->ldmState.window);
|
495
500
|
/* Resize tables and output space if necessary. */
|
496
501
|
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
|
497
502
|
ZSTD_free(serialState->ldmState.hashTable, cMem);
|
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
506
511
|
/* Zero the tables */
|
507
512
|
memset(serialState->ldmState.hashTable, 0, hashSize);
|
508
513
|
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
514
|
+
|
515
|
+
/* Update window state and fill hash table with dict */
|
516
|
+
serialState->ldmState.loadedDictEnd = 0;
|
517
|
+
if (dictSize > 0) {
|
518
|
+
if (dictContentType == ZSTD_dct_rawContent) {
|
519
|
+
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
520
|
+
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
521
|
+
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
522
|
+
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
523
|
+
} else {
|
524
|
+
/* don't even load anything */
|
525
|
+
}
|
526
|
+
}
|
527
|
+
|
528
|
+
/* Initialize serialState's copy of ldmWindow. */
|
529
|
+
serialState->ldmWindow = serialState->ldmState.window;
|
509
530
|
}
|
531
|
+
|
510
532
|
serialState->params = params;
|
511
533
|
serialState->params.jobSize = (U32)jobSize;
|
512
534
|
return 0;
|
@@ -668,7 +690,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
668
690
|
|
669
691
|
/* init */
|
670
692
|
if (job->cdict) {
|
671
|
-
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
|
693
|
+
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
|
672
694
|
assert(job->firstJob); /* only allowed for first job */
|
673
695
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
674
696
|
} else { /* srcStart points at reloaded section */
|
@@ -680,7 +702,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
680
702
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
681
703
|
ZSTD_dtlm_fast,
|
682
704
|
NULL, /*cdict*/
|
683
|
-
jobParams, pledgedSrcSize);
|
705
|
+
&jobParams, pledgedSrcSize);
|
684
706
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
685
707
|
} }
|
686
708
|
|
@@ -927,12 +949,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
|
927
949
|
unsigned jobID;
|
928
950
|
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
|
929
951
|
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
|
952
|
+
/* Copy the mutex/cond out */
|
953
|
+
ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
|
954
|
+
ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
|
955
|
+
|
930
956
|
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
|
931
957
|
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
|
932
|
-
|
933
|
-
|
958
|
+
|
959
|
+
/* Clear the job description, but keep the mutex/cond */
|
960
|
+
memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
|
961
|
+
mtctx->jobs[jobID].job_mutex = mutex;
|
962
|
+
mtctx->jobs[jobID].job_cond = cond;
|
934
963
|
}
|
935
|
-
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
|
936
964
|
mtctx->inBuff.buffer = g_nullBuffer;
|
937
965
|
mtctx->inBuff.filled = 0;
|
938
966
|
mtctx->allJobsCompleted = 1;
|
@@ -1028,9 +1056,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
|
1028
1056
|
|
1029
1057
|
/* Sets parameters relevant to the compression job,
|
1030
1058
|
* initializing others to default values. */
|
1031
|
-
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params
|
1059
|
+
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
|
1032
1060
|
{
|
1033
|
-
ZSTD_CCtx_params jobParams = params;
|
1061
|
+
ZSTD_CCtx_params jobParams = *params;
|
1034
1062
|
/* Clear parameters related to multithreading */
|
1035
1063
|
jobParams.forceWindow = 0;
|
1036
1064
|
jobParams.nbWorkers = 0;
|
@@ -1048,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
|
1048
1076
|
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
1049
1077
|
{
|
1050
1078
|
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
1051
|
-
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
1079
|
+
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
1052
1080
|
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
1053
1081
|
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
1054
1082
|
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
@@ -1070,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
|
1070
1098
|
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
|
1071
1099
|
compressionLevel);
|
1072
1100
|
mtctx->params.compressionLevel = compressionLevel;
|
1073
|
-
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams,
|
1101
|
+
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
1074
1102
|
cParams.windowLog = saved_wlog;
|
1075
1103
|
mtctx->params.cParams = cParams;
|
1076
1104
|
}
|
@@ -1129,9 +1157,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1129
1157
|
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
|
1130
1158
|
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
|
1131
1159
|
assert(flushed <= produced);
|
1160
|
+
assert(jobPtr->consumed <= jobPtr->src.size);
|
1132
1161
|
toFlush = produced - flushed;
|
1133
|
-
if
|
1134
|
-
|
1162
|
+
/* if toFlush==0, nothing is available to flush.
|
1163
|
+
* However, jobID is expected to still be active:
|
1164
|
+
* if jobID was already completed and fully flushed,
|
1165
|
+
* ZSTDMT_flushProduced() should have already moved onto next job.
|
1166
|
+
* Therefore, some input has not yet been consumed. */
|
1167
|
+
if (toFlush==0) {
|
1135
1168
|
assert(jobPtr->consumed < jobPtr->src.size);
|
1136
1169
|
}
|
1137
1170
|
}
|
@@ -1146,14 +1179,18 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1146
1179
|
/* ===== Multi-threaded compression ===== */
|
1147
1180
|
/* ------------------------------------------ */
|
1148
1181
|
|
1149
|
-
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params
|
1182
|
+
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
1150
1183
|
{
|
1151
|
-
|
1184
|
+
unsigned jobLog;
|
1185
|
+
if (params->ldmParams.enableLdm) {
|
1152
1186
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1153
1187
|
* In which case, it's preferable to determine the jobSize
|
1154
1188
|
* based on chainLog instead. */
|
1155
|
-
|
1156
|
-
|
1189
|
+
jobLog = MAX(21, params->cParams.chainLog + 4);
|
1190
|
+
} else {
|
1191
|
+
jobLog = MAX(20, params->cParams.windowLog + 2);
|
1192
|
+
}
|
1193
|
+
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
|
1157
1194
|
}
|
1158
1195
|
|
1159
1196
|
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
@@ -1184,27 +1221,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
|
|
1184
1221
|
return ovlog;
|
1185
1222
|
}
|
1186
1223
|
|
1187
|
-
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params
|
1224
|
+
static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
1188
1225
|
{
|
1189
|
-
int const overlapRLog = 9 - ZSTDMT_overlapLog(params
|
1190
|
-
int ovLog = (overlapRLog >= 8) ? 0 : (params
|
1226
|
+
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
1227
|
+
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
1191
1228
|
assert(0 <= overlapRLog && overlapRLog <= 8);
|
1192
|
-
if (params
|
1229
|
+
if (params->ldmParams.enableLdm) {
|
1193
1230
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1194
1231
|
* In which case, it's preferable to determine the jobSize
|
1195
1232
|
* based on chainLog instead.
|
1196
1233
|
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
|
1197
|
-
ovLog = MIN(params
|
1234
|
+
ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
1198
1235
|
- overlapRLog;
|
1199
1236
|
}
|
1200
|
-
assert(0 <= ovLog && ovLog <=
|
1201
|
-
DEBUGLOG(4, "overlapLog : %i", params
|
1237
|
+
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
|
1238
|
+
DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
|
1202
1239
|
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
1203
1240
|
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
1204
1241
|
}
|
1205
1242
|
|
1206
1243
|
static unsigned
|
1207
|
-
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
|
1244
|
+
ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
|
1208
1245
|
{
|
1209
1246
|
assert(nbWorkers>0);
|
1210
1247
|
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
@@ -1220,16 +1257,17 @@ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers
|
|
1220
1257
|
/* ZSTDMT_compress_advanced_internal() :
|
1221
1258
|
* This is a blocking function : it will only give back control to caller after finishing its compression job.
|
1222
1259
|
*/
|
1223
|
-
static size_t
|
1260
|
+
static size_t
|
1261
|
+
ZSTDMT_compress_advanced_internal(
|
1224
1262
|
ZSTDMT_CCtx* mtctx,
|
1225
1263
|
void* dst, size_t dstCapacity,
|
1226
1264
|
const void* src, size_t srcSize,
|
1227
1265
|
const ZSTD_CDict* cdict,
|
1228
1266
|
ZSTD_CCtx_params params)
|
1229
1267
|
{
|
1230
|
-
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
|
1231
|
-
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
|
1232
|
-
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
|
1268
|
+
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms);
|
1269
|
+
size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms);
|
1270
|
+
unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers);
|
1233
1271
|
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
1234
1272
|
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
1235
1273
|
const char* const srcStart = (const char*)src;
|
@@ -1247,15 +1285,16 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1247
1285
|
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
|
1248
1286
|
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
|
1249
1287
|
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
|
1250
|
-
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
|
1288
|
+
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
|
1251
1289
|
}
|
1252
1290
|
|
1253
1291
|
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
1254
1292
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
|
1255
|
-
|
1293
|
+
/* LDM doesn't even try to load the dictionary in single-ingestion mode */
|
1294
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
|
1256
1295
|
return ERROR(memory_allocation);
|
1257
1296
|
|
1258
|
-
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
1297
|
+
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
|
1259
1298
|
|
1260
1299
|
{ unsigned u;
|
1261
1300
|
for (u=0; u<nbJobs; u++) {
|
@@ -1388,19 +1427,19 @@ size_t ZSTDMT_initCStream_internal(
|
|
1388
1427
|
|
1389
1428
|
/* init */
|
1390
1429
|
if (params.nbWorkers != mtctx->params.nbWorkers)
|
1391
|
-
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
1430
|
+
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
|
1392
1431
|
|
1393
1432
|
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
1394
|
-
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
1433
|
+
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
1395
1434
|
|
1396
1435
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
1397
1436
|
if (mtctx->singleBlockingThread) {
|
1398
|
-
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
|
1437
|
+
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms);
|
1399
1438
|
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
|
1400
1439
|
assert(singleThreadParams.nbWorkers == 0);
|
1401
1440
|
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
|
1402
1441
|
dict, dictSize, cdict,
|
1403
|
-
singleThreadParams, pledgedSrcSize);
|
1442
|
+
&singleThreadParams, pledgedSrcSize);
|
1404
1443
|
}
|
1405
1444
|
|
1406
1445
|
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
|
@@ -1426,12 +1465,14 @@ size_t ZSTDMT_initCStream_internal(
|
|
1426
1465
|
mtctx->cdict = cdict;
|
1427
1466
|
}
|
1428
1467
|
|
1429
|
-
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
|
1468
|
+
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms);
|
1430
1469
|
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
|
1431
1470
|
mtctx->targetSectionSize = params.jobSize;
|
1432
1471
|
if (mtctx->targetSectionSize == 0) {
|
1433
|
-
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1472
|
+
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms);
|
1434
1473
|
}
|
1474
|
+
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
|
1475
|
+
|
1435
1476
|
if (params.rsyncable) {
|
1436
1477
|
/* Aim for the targetsectionSize as the average job size. */
|
1437
1478
|
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
@@ -1483,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
|
|
1483
1524
|
mtctx->allJobsCompleted = 0;
|
1484
1525
|
mtctx->consumed = 0;
|
1485
1526
|
mtctx->produced = 0;
|
1486
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize
|
1527
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
|
1528
|
+
dict, dictSize, dictContentType))
|
1487
1529
|
return ERROR(memory_allocation);
|
1488
1530
|
return 0;
|
1489
1531
|
}
|
@@ -1697,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
|
|
1697
1739
|
assert(mtctx->doneJobID < mtctx->nextJobID);
|
1698
1740
|
assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
|
1699
1741
|
assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1742
|
+
if (toFlush > 0) {
|
1743
|
+
memcpy((char*)output->dst + output->pos,
|
1744
|
+
(const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
|
1745
|
+
toFlush);
|
1746
|
+
}
|
1703
1747
|
output->pos += toFlush;
|
1704
1748
|
mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
|
1705
1749
|
|
@@ -1769,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
|
1769
1813
|
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
1770
1814
|
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
1771
1815
|
BYTE const* const rangeStart = (BYTE const*)range.start;
|
1772
|
-
BYTE const* const rangeEnd = rangeStart + range.size;
|
1816
|
+
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
1773
1817
|
|
1774
1818
|
if (rangeStart == NULL || bufferStart == NULL)
|
1775
1819
|
return 0;
|
@@ -2043,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
2043
2087
|
|| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
|
2044
2088
|
size_t const jobSize = mtctx->inBuff.filled;
|
2045
2089
|
assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
|
2046
|
-
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
2090
|
+
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
|
2047
2091
|
}
|
2048
2092
|
|
2049
2093
|
/* check for potential compressed data ready to be flushed */
|
@@ -2057,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
2057
2101
|
|
2058
2102
|
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
2059
2103
|
{
|
2060
|
-
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
2104
|
+
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
|
2061
2105
|
|
2062
2106
|
/* recommended next input size : fill current input buffer */
|
2063
2107
|
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
@@ -2074,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
|
|
2074
2118
|
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
|
2075
2119
|
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
|
2076
2120
|
(U32)srcSize, (U32)endFrame);
|
2077
|
-
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
2121
|
+
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
|
2078
2122
|
}
|
2079
2123
|
|
2080
2124
|
/* check if there is any data available to flush */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -40,7 +40,7 @@
|
|
40
40
|
/* === Dependencies === */
|
41
41
|
#include <stddef.h> /* size_t */
|
42
42
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
|
43
|
-
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
43
|
+
#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
44
44
|
|
45
45
|
|
46
46
|
/* === Constants === */
|
@@ -50,6 +50,7 @@
|
|
50
50
|
#ifndef ZSTDMT_JOBSIZE_MIN
|
51
51
|
# define ZSTDMT_JOBSIZE_MIN (1 MB)
|
52
52
|
#endif
|
53
|
+
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
|
53
54
|
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
54
55
|
|
55
56
|
|
@@ -1,47 +1,27 @@
|
|
1
1
|
/* ******************************************************************
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
notice, this list of conditions and the following disclaimer.
|
14
|
-
* Redistributions in binary form must reproduce the above
|
15
|
-
copyright notice, this list of conditions and the following disclaimer
|
16
|
-
in the documentation and/or other materials provided with the
|
17
|
-
distribution.
|
18
|
-
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
-
|
31
|
-
You can contact the author at :
|
32
|
-
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
2
|
+
* huff0 huffman decoder,
|
3
|
+
* part of Finite State Entropy library
|
4
|
+
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
5
|
+
*
|
6
|
+
* You can contact the author at :
|
7
|
+
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
8
|
+
*
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
33
13
|
****************************************************************** */
|
34
14
|
|
35
15
|
/* **************************************************************
|
36
16
|
* Dependencies
|
37
17
|
****************************************************************/
|
38
18
|
#include <string.h> /* memcpy, memset */
|
39
|
-
#include "compiler.h"
|
40
|
-
#include "bitstream.h" /* BIT_* */
|
41
|
-
#include "fse.h" /* to compress headers */
|
19
|
+
#include "../common/compiler.h"
|
20
|
+
#include "../common/bitstream.h" /* BIT_* */
|
21
|
+
#include "../common/fse.h" /* to compress headers */
|
42
22
|
#define HUF_STATIC_LINKING_ONLY
|
43
|
-
#include "huf.h"
|
44
|
-
#include "error_private.h"
|
23
|
+
#include "../common/huf.h"
|
24
|
+
#include "../common/error_private.h"
|
45
25
|
|
46
26
|
/* **************************************************************
|
47
27
|
* Macros
|
@@ -61,7 +41,6 @@
|
|
61
41
|
* Error Management
|
62
42
|
****************************************************************/
|
63
43
|
#define HUF_isError ERR_isError
|
64
|
-
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
65
44
|
|
66
45
|
|
67
46
|
/* **************************************************************
|
@@ -179,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
179
158
|
|
180
159
|
/* fill DTable */
|
181
160
|
{ U32 n;
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
161
|
+
size_t const nEnd = nbSymbols;
|
162
|
+
for (n=0; n<nEnd; n++) {
|
163
|
+
size_t const w = huffWeight[n];
|
164
|
+
size_t const length = (1 << w) >> 1;
|
165
|
+
size_t const uStart = rankVal[w];
|
166
|
+
size_t const uEnd = uStart + length;
|
167
|
+
size_t u;
|
186
168
|
HUF_DEltX1 D;
|
187
|
-
D.byte = (BYTE)n;
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
169
|
+
D.byte = (BYTE)n;
|
170
|
+
D.nbBits = (BYTE)(tableLog + 1 - w);
|
171
|
+
rankVal[w] = (U32)uEnd;
|
172
|
+
if (length < 4) {
|
173
|
+
/* Use length in the loop bound so the compiler knows it is short. */
|
174
|
+
for (u = 0; u < length; ++u)
|
175
|
+
dt[uStart + u] = D;
|
176
|
+
} else {
|
177
|
+
/* Unroll the loop 4 times, we know it is a power of 2. */
|
178
|
+
for (u = uStart; u < uEnd; u += 4) {
|
179
|
+
dt[u + 0] = D;
|
180
|
+
dt[u + 1] = D;
|
181
|
+
dt[u + 2] = D;
|
182
|
+
dt[u + 3] = D;
|
183
|
+
} } } }
|
193
184
|
return iSize;
|
194
185
|
}
|
195
186
|
|
@@ -280,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
280
271
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
281
272
|
BYTE* const ostart = (BYTE*) dst;
|
282
273
|
BYTE* const oend = ostart + dstSize;
|
274
|
+
BYTE* const olimit = oend - 3;
|
283
275
|
const void* const dtPtr = DTable + 1;
|
284
276
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
285
277
|
|
@@ -304,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
304
296
|
BYTE* op2 = opStart2;
|
305
297
|
BYTE* op3 = opStart3;
|
306
298
|
BYTE* op4 = opStart4;
|
307
|
-
U32 endSignal = BIT_DStream_unfinished;
|
308
299
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
309
300
|
U32 const dtLog = dtd.tableLog;
|
301
|
+
U32 endSignal = 1;
|
310
302
|
|
311
303
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
312
304
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
@@ -315,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
315
307
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
316
308
|
|
317
309
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
318
|
-
|
319
|
-
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
310
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
320
311
|
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
321
312
|
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
322
313
|
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
@@ -333,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
333
324
|
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
334
325
|
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
335
326
|
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
327
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
328
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
329
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
330
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
340
331
|
}
|
341
332
|
|
342
333
|
/* check corruption */
|
@@ -755,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
755
746
|
return dstSize;
|
756
747
|
}
|
757
748
|
|
758
|
-
|
759
749
|
FORCE_INLINE_TEMPLATE size_t
|
760
750
|
HUF_decompress4X2_usingDTable_internal_body(
|
761
751
|
void* dst, size_t dstSize,
|
@@ -767,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
767
757
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
768
758
|
BYTE* const ostart = (BYTE*) dst;
|
769
759
|
BYTE* const oend = ostart + dstSize;
|
760
|
+
BYTE* const olimit = oend - (sizeof(size_t)-1);
|
770
761
|
const void* const dtPtr = DTable+1;
|
771
762
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
772
763
|
|
@@ -791,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
791
782
|
BYTE* op2 = opStart2;
|
792
783
|
BYTE* op3 = opStart3;
|
793
784
|
BYTE* op4 = opStart4;
|
794
|
-
U32 endSignal;
|
785
|
+
U32 endSignal = 1;
|
795
786
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
796
787
|
U32 const dtLog = dtd.tableLog;
|
797
788
|
|
@@ -802,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
802
793
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
803
794
|
|
804
795
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
805
|
-
|
806
|
-
|
796
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
797
|
+
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
798
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
799
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
800
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
801
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
802
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
803
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
804
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
805
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
806
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
807
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
808
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
809
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
810
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
811
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
812
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
813
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
814
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
815
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
816
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
817
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
818
|
+
#else
|
807
819
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
808
820
|
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
809
821
|
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
@@ -820,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
820
832
|
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
821
833
|
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
822
834
|
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
823
|
-
|
824
|
-
|
835
|
+
endSignal = (U32)LIKELY(
|
836
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
837
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
838
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
839
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
840
|
+
#endif
|
825
841
|
}
|
826
842
|
|
827
843
|
/* check corruption */
|