zstdlib 0.2.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +30 -1
- data/README.md +2 -2
- data/Rakefile +1 -1
- data/ext/zstdlib/extconf.rb +3 -3
- data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/bitstream.h +38 -39
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/compiler.h +40 -5
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/cpu.h +1 -1
- data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/debug.h +11 -31
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.c +2 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.h +6 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse.h +12 -32
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -35
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/huf.h +15 -33
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/mem.h +75 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.c +8 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.c +50 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.h +36 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.c +23 -35
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_internal.h +154 -26
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.c +15 -35
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.h +12 -32
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress.c +1191 -1330
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +317 -55
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.c +419 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_cwksp.h +525 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +65 -43
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.c +92 -66
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.c +74 -42
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.c +32 -10
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.c +81 -114
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +95 -51
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +3 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -60
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +292 -172
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +459 -338
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/zstd.h +265 -88
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzlib.c +9 -9
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzread.c +16 -8
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzwrite.c +8 -8
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +16 -12
- data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- metadata +69 -62
- data/ext/zstdlib/zstd-1.4.0/lib/common/debug.c +0 -44
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -22,9 +22,9 @@
|
|
|
22
22
|
/* ====== Dependencies ====== */
|
|
23
23
|
#include <string.h> /* memcpy, memset */
|
|
24
24
|
#include <limits.h> /* INT_MAX, UINT_MAX */
|
|
25
|
-
#include "mem.h" /* MEM_STATIC */
|
|
26
|
-
#include "pool.h" /* threadpool */
|
|
27
|
-
#include "threading.h" /* mutex */
|
|
25
|
+
#include "../common/mem.h" /* MEM_STATIC */
|
|
26
|
+
#include "../common/pool.h" /* threadpool */
|
|
27
|
+
#include "../common/threading.h" /* mutex */
|
|
28
28
|
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
|
29
29
|
#include "zstd_ldm.h"
|
|
30
30
|
#include "zstdmt_compress.h"
|
|
@@ -461,7 +461,13 @@ typedef struct {
|
|
|
461
461
|
ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
|
|
462
462
|
} serialState_t;
|
|
463
463
|
|
|
464
|
-
static int
|
|
464
|
+
static int
|
|
465
|
+
ZSTDMT_serialState_reset(serialState_t* serialState,
|
|
466
|
+
ZSTDMT_seqPool* seqPool,
|
|
467
|
+
ZSTD_CCtx_params params,
|
|
468
|
+
size_t jobSize,
|
|
469
|
+
const void* dict, size_t const dictSize,
|
|
470
|
+
ZSTD_dictContentType_e dictContentType)
|
|
465
471
|
{
|
|
466
472
|
/* Adjust parameters */
|
|
467
473
|
if (params.ldmParams.enableLdm) {
|
|
@@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
|
490
496
|
/* Size the seq pool tables */
|
|
491
497
|
ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
|
|
492
498
|
/* Reset the window */
|
|
493
|
-
|
|
494
|
-
serialState->ldmWindow = serialState->ldmState.window;
|
|
499
|
+
ZSTD_window_init(&serialState->ldmState.window);
|
|
495
500
|
/* Resize tables and output space if necessary. */
|
|
496
501
|
if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
|
|
497
502
|
ZSTD_free(serialState->ldmState.hashTable, cMem);
|
|
@@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
|
|
506
511
|
/* Zero the tables */
|
|
507
512
|
memset(serialState->ldmState.hashTable, 0, hashSize);
|
|
508
513
|
memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
|
|
514
|
+
|
|
515
|
+
/* Update window state and fill hash table with dict */
|
|
516
|
+
serialState->ldmState.loadedDictEnd = 0;
|
|
517
|
+
if (dictSize > 0) {
|
|
518
|
+
if (dictContentType == ZSTD_dct_rawContent) {
|
|
519
|
+
BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
|
|
520
|
+
ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
|
|
521
|
+
ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams);
|
|
522
|
+
serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
|
|
523
|
+
} else {
|
|
524
|
+
/* don't even load anything */
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/* Initialize serialState's copy of ldmWindow. */
|
|
529
|
+
serialState->ldmWindow = serialState->ldmState.window;
|
|
509
530
|
}
|
|
531
|
+
|
|
510
532
|
serialState->params = params;
|
|
511
533
|
serialState->params.jobSize = (U32)jobSize;
|
|
512
534
|
return 0;
|
|
@@ -668,7 +690,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
668
690
|
|
|
669
691
|
/* init */
|
|
670
692
|
if (job->cdict) {
|
|
671
|
-
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
|
|
693
|
+
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
|
|
672
694
|
assert(job->firstJob); /* only allowed for first job */
|
|
673
695
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
|
674
696
|
} else { /* srcStart points at reloaded section */
|
|
@@ -680,7 +702,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
|
680
702
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
|
681
703
|
ZSTD_dtlm_fast,
|
|
682
704
|
NULL, /*cdict*/
|
|
683
|
-
jobParams, pledgedSrcSize);
|
|
705
|
+
&jobParams, pledgedSrcSize);
|
|
684
706
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
|
685
707
|
} }
|
|
686
708
|
|
|
@@ -927,12 +949,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
|
|
927
949
|
unsigned jobID;
|
|
928
950
|
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
|
|
929
951
|
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
|
|
952
|
+
/* Copy the mutex/cond out */
|
|
953
|
+
ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
|
|
954
|
+
ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
|
|
955
|
+
|
|
930
956
|
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
|
|
931
957
|
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
|
|
932
|
-
|
|
933
|
-
|
|
958
|
+
|
|
959
|
+
/* Clear the job description, but keep the mutex/cond */
|
|
960
|
+
memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
|
|
961
|
+
mtctx->jobs[jobID].job_mutex = mutex;
|
|
962
|
+
mtctx->jobs[jobID].job_cond = cond;
|
|
934
963
|
}
|
|
935
|
-
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
|
|
936
964
|
mtctx->inBuff.buffer = g_nullBuffer;
|
|
937
965
|
mtctx->inBuff.filled = 0;
|
|
938
966
|
mtctx->allJobsCompleted = 1;
|
|
@@ -1028,9 +1056,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
|
|
1028
1056
|
|
|
1029
1057
|
/* Sets parameters relevant to the compression job,
|
|
1030
1058
|
* initializing others to default values. */
|
|
1031
|
-
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params
|
|
1059
|
+
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
|
|
1032
1060
|
{
|
|
1033
|
-
ZSTD_CCtx_params jobParams = params;
|
|
1061
|
+
ZSTD_CCtx_params jobParams = *params;
|
|
1034
1062
|
/* Clear parameters related to multithreading */
|
|
1035
1063
|
jobParams.forceWindow = 0;
|
|
1036
1064
|
jobParams.nbWorkers = 0;
|
|
@@ -1048,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
|
|
|
1048
1076
|
static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
|
|
1049
1077
|
{
|
|
1050
1078
|
if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
|
|
1051
|
-
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
|
|
1079
|
+
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
|
|
1052
1080
|
mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
|
|
1053
1081
|
if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
|
|
1054
1082
|
mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
|
|
@@ -1070,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
|
|
|
1070
1098
|
DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
|
|
1071
1099
|
compressionLevel);
|
|
1072
1100
|
mtctx->params.compressionLevel = compressionLevel;
|
|
1073
|
-
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams,
|
|
1101
|
+
{ ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0);
|
|
1074
1102
|
cParams.windowLog = saved_wlog;
|
|
1075
1103
|
mtctx->params.cParams = cParams;
|
|
1076
1104
|
}
|
|
@@ -1129,9 +1157,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
|
1129
1157
|
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
|
|
1130
1158
|
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
|
|
1131
1159
|
assert(flushed <= produced);
|
|
1160
|
+
assert(jobPtr->consumed <= jobPtr->src.size);
|
|
1132
1161
|
toFlush = produced - flushed;
|
|
1133
|
-
if
|
|
1134
|
-
|
|
1162
|
+
/* if toFlush==0, nothing is available to flush.
|
|
1163
|
+
* However, jobID is expected to still be active:
|
|
1164
|
+
* if jobID was already completed and fully flushed,
|
|
1165
|
+
* ZSTDMT_flushProduced() should have already moved onto next job.
|
|
1166
|
+
* Therefore, some input has not yet been consumed. */
|
|
1167
|
+
if (toFlush==0) {
|
|
1135
1168
|
assert(jobPtr->consumed < jobPtr->src.size);
|
|
1136
1169
|
}
|
|
1137
1170
|
}
|
|
@@ -1146,14 +1179,18 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
|
1146
1179
|
/* ===== Multi-threaded compression ===== */
|
|
1147
1180
|
/* ------------------------------------------ */
|
|
1148
1181
|
|
|
1149
|
-
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params
|
|
1182
|
+
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
|
1150
1183
|
{
|
|
1151
|
-
|
|
1184
|
+
unsigned jobLog;
|
|
1185
|
+
if (params->ldmParams.enableLdm) {
|
|
1152
1186
|
/* In Long Range Mode, the windowLog is typically oversized.
|
|
1153
1187
|
* In which case, it's preferable to determine the jobSize
|
|
1154
1188
|
* based on chainLog instead. */
|
|
1155
|
-
|
|
1156
|
-
|
|
1189
|
+
jobLog = MAX(21, params->cParams.chainLog + 4);
|
|
1190
|
+
} else {
|
|
1191
|
+
jobLog = MAX(20, params->cParams.windowLog + 2);
|
|
1192
|
+
}
|
|
1193
|
+
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
|
|
1157
1194
|
}
|
|
1158
1195
|
|
|
1159
1196
|
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
|
@@ -1184,27 +1221,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
|
|
|
1184
1221
|
return ovlog;
|
|
1185
1222
|
}
|
|
1186
1223
|
|
|
1187
|
-
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params
|
|
1224
|
+
static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
|
1188
1225
|
{
|
|
1189
|
-
int const overlapRLog = 9 - ZSTDMT_overlapLog(params
|
|
1190
|
-
int ovLog = (overlapRLog >= 8) ? 0 : (params
|
|
1226
|
+
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
|
1227
|
+
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
|
1191
1228
|
assert(0 <= overlapRLog && overlapRLog <= 8);
|
|
1192
|
-
if (params
|
|
1229
|
+
if (params->ldmParams.enableLdm) {
|
|
1193
1230
|
/* In Long Range Mode, the windowLog is typically oversized.
|
|
1194
1231
|
* In which case, it's preferable to determine the jobSize
|
|
1195
1232
|
* based on chainLog instead.
|
|
1196
1233
|
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
|
|
1197
|
-
ovLog = MIN(params
|
|
1234
|
+
ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
|
1198
1235
|
- overlapRLog;
|
|
1199
1236
|
}
|
|
1200
|
-
assert(0 <= ovLog && ovLog <=
|
|
1201
|
-
DEBUGLOG(4, "overlapLog : %i", params
|
|
1237
|
+
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
|
|
1238
|
+
DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
|
|
1202
1239
|
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
|
1203
1240
|
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
|
1204
1241
|
}
|
|
1205
1242
|
|
|
1206
1243
|
static unsigned
|
|
1207
|
-
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
|
|
1244
|
+
ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
|
|
1208
1245
|
{
|
|
1209
1246
|
assert(nbWorkers>0);
|
|
1210
1247
|
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
|
@@ -1220,16 +1257,17 @@ ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers
|
|
|
1220
1257
|
/* ZSTDMT_compress_advanced_internal() :
|
|
1221
1258
|
* This is a blocking function : it will only give back control to caller after finishing its compression job.
|
|
1222
1259
|
*/
|
|
1223
|
-
static size_t
|
|
1260
|
+
static size_t
|
|
1261
|
+
ZSTDMT_compress_advanced_internal(
|
|
1224
1262
|
ZSTDMT_CCtx* mtctx,
|
|
1225
1263
|
void* dst, size_t dstCapacity,
|
|
1226
1264
|
const void* src, size_t srcSize,
|
|
1227
1265
|
const ZSTD_CDict* cdict,
|
|
1228
1266
|
ZSTD_CCtx_params params)
|
|
1229
1267
|
{
|
|
1230
|
-
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
|
|
1231
|
-
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
|
|
1232
|
-
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
|
|
1268
|
+
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms);
|
|
1269
|
+
size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms);
|
|
1270
|
+
unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers);
|
|
1233
1271
|
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
|
1234
1272
|
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
|
1235
1273
|
const char* const srcStart = (const char*)src;
|
|
@@ -1247,15 +1285,16 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
|
1247
1285
|
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
|
|
1248
1286
|
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
|
|
1249
1287
|
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
|
|
1250
|
-
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
|
|
1288
|
+
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
|
|
1251
1289
|
}
|
|
1252
1290
|
|
|
1253
1291
|
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
|
1254
1292
|
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
|
|
1255
|
-
|
|
1293
|
+
/* LDM doesn't even try to load the dictionary in single-ingestion mode */
|
|
1294
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto))
|
|
1256
1295
|
return ERROR(memory_allocation);
|
|
1257
1296
|
|
|
1258
|
-
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
|
|
1297
|
+
FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */
|
|
1259
1298
|
|
|
1260
1299
|
{ unsigned u;
|
|
1261
1300
|
for (u=0; u<nbJobs; u++) {
|
|
@@ -1388,19 +1427,19 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1388
1427
|
|
|
1389
1428
|
/* init */
|
|
1390
1429
|
if (params.nbWorkers != mtctx->params.nbWorkers)
|
|
1391
|
-
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
|
1430
|
+
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , "");
|
|
1392
1431
|
|
|
1393
1432
|
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
|
1394
|
-
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
|
1433
|
+
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
|
1395
1434
|
|
|
1396
1435
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
|
1397
1436
|
if (mtctx->singleBlockingThread) {
|
|
1398
|
-
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
|
|
1437
|
+
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms);
|
|
1399
1438
|
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
|
|
1400
1439
|
assert(singleThreadParams.nbWorkers == 0);
|
|
1401
1440
|
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
|
|
1402
1441
|
dict, dictSize, cdict,
|
|
1403
|
-
singleThreadParams, pledgedSrcSize);
|
|
1442
|
+
&singleThreadParams, pledgedSrcSize);
|
|
1404
1443
|
}
|
|
1405
1444
|
|
|
1406
1445
|
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
|
|
@@ -1426,12 +1465,14 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1426
1465
|
mtctx->cdict = cdict;
|
|
1427
1466
|
}
|
|
1428
1467
|
|
|
1429
|
-
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
|
|
1468
|
+
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms);
|
|
1430
1469
|
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
|
|
1431
1470
|
mtctx->targetSectionSize = params.jobSize;
|
|
1432
1471
|
if (mtctx->targetSectionSize == 0) {
|
|
1433
|
-
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
|
1472
|
+
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms);
|
|
1434
1473
|
}
|
|
1474
|
+
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
|
|
1475
|
+
|
|
1435
1476
|
if (params.rsyncable) {
|
|
1436
1477
|
/* Aim for the targetsectionSize as the average job size. */
|
|
1437
1478
|
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
|
@@ -1483,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal(
|
|
|
1483
1524
|
mtctx->allJobsCompleted = 0;
|
|
1484
1525
|
mtctx->consumed = 0;
|
|
1485
1526
|
mtctx->produced = 0;
|
|
1486
|
-
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize
|
|
1527
|
+
if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize,
|
|
1528
|
+
dict, dictSize, dictContentType))
|
|
1487
1529
|
return ERROR(memory_allocation);
|
|
1488
1530
|
return 0;
|
|
1489
1531
|
}
|
|
@@ -1697,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u
|
|
|
1697
1739
|
assert(mtctx->doneJobID < mtctx->nextJobID);
|
|
1698
1740
|
assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
|
|
1699
1741
|
assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1742
|
+
if (toFlush > 0) {
|
|
1743
|
+
memcpy((char*)output->dst + output->pos,
|
|
1744
|
+
(const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
|
|
1745
|
+
toFlush);
|
|
1746
|
+
}
|
|
1703
1747
|
output->pos += toFlush;
|
|
1704
1748
|
mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
|
|
1705
1749
|
|
|
@@ -1769,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
|
|
|
1769
1813
|
BYTE const* const bufferStart = (BYTE const*)buffer.start;
|
|
1770
1814
|
BYTE const* const bufferEnd = bufferStart + buffer.capacity;
|
|
1771
1815
|
BYTE const* const rangeStart = (BYTE const*)range.start;
|
|
1772
|
-
BYTE const* const rangeEnd = rangeStart + range.size;
|
|
1816
|
+
BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
|
|
1773
1817
|
|
|
1774
1818
|
if (rangeStart == NULL || bufferStart == NULL)
|
|
1775
1819
|
return 0;
|
|
@@ -2043,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
|
2043
2087
|
|| ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
|
|
2044
2088
|
size_t const jobSize = mtctx->inBuff.filled;
|
|
2045
2089
|
assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
|
|
2046
|
-
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
|
|
2090
|
+
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , "");
|
|
2047
2091
|
}
|
|
2048
2092
|
|
|
2049
2093
|
/* check for potential compressed data ready to be flushed */
|
|
@@ -2057,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
|
|
2057
2101
|
|
|
2058
2102
|
size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
|
2059
2103
|
{
|
|
2060
|
-
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
|
|
2104
|
+
FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , "");
|
|
2061
2105
|
|
|
2062
2106
|
/* recommended next input size : fill current input buffer */
|
|
2063
2107
|
return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
|
|
@@ -2074,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
|
|
|
2074
2118
|
|| ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
|
|
2075
2119
|
DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
|
|
2076
2120
|
(U32)srcSize, (U32)endFrame);
|
|
2077
|
-
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
|
|
2121
|
+
FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , "");
|
|
2078
2122
|
}
|
|
2079
2123
|
|
|
2080
2124
|
/* check if there is any data available to flush */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
/* === Dependencies === */
|
|
41
41
|
#include <stddef.h> /* size_t */
|
|
42
42
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
|
|
43
|
-
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
|
43
|
+
#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
/* === Constants === */
|
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
#ifndef ZSTDMT_JOBSIZE_MIN
|
|
51
51
|
# define ZSTDMT_JOBSIZE_MIN (1 MB)
|
|
52
52
|
#endif
|
|
53
|
+
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
|
|
53
54
|
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
|
54
55
|
|
|
55
56
|
|
|
@@ -1,47 +1,27 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
notice, this list of conditions and the following disclaimer.
|
|
14
|
-
* Redistributions in binary form must reproduce the above
|
|
15
|
-
copyright notice, this list of conditions and the following disclaimer
|
|
16
|
-
in the documentation and/or other materials provided with the
|
|
17
|
-
distribution.
|
|
18
|
-
|
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
20
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
21
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
22
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
23
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
24
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
25
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
26
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
27
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
28
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
29
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
30
|
-
|
|
31
|
-
You can contact the author at :
|
|
32
|
-
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
2
|
+
* huff0 huffman decoder,
|
|
3
|
+
* part of Finite State Entropy library
|
|
4
|
+
* Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
|
|
5
|
+
*
|
|
6
|
+
* You can contact the author at :
|
|
7
|
+
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
8
|
+
*
|
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
33
13
|
****************************************************************** */
|
|
34
14
|
|
|
35
15
|
/* **************************************************************
|
|
36
16
|
* Dependencies
|
|
37
17
|
****************************************************************/
|
|
38
18
|
#include <string.h> /* memcpy, memset */
|
|
39
|
-
#include "compiler.h"
|
|
40
|
-
#include "bitstream.h" /* BIT_* */
|
|
41
|
-
#include "fse.h" /* to compress headers */
|
|
19
|
+
#include "../common/compiler.h"
|
|
20
|
+
#include "../common/bitstream.h" /* BIT_* */
|
|
21
|
+
#include "../common/fse.h" /* to compress headers */
|
|
42
22
|
#define HUF_STATIC_LINKING_ONLY
|
|
43
|
-
#include "huf.h"
|
|
44
|
-
#include "error_private.h"
|
|
23
|
+
#include "../common/huf.h"
|
|
24
|
+
#include "../common/error_private.h"
|
|
45
25
|
|
|
46
26
|
/* **************************************************************
|
|
47
27
|
* Macros
|
|
@@ -61,7 +41,6 @@
|
|
|
61
41
|
* Error Management
|
|
62
42
|
****************************************************************/
|
|
63
43
|
#define HUF_isError ERR_isError
|
|
64
|
-
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
|
65
44
|
|
|
66
45
|
|
|
67
46
|
/* **************************************************************
|
|
@@ -179,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
|
179
158
|
|
|
180
159
|
/* fill DTable */
|
|
181
160
|
{ U32 n;
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
161
|
+
size_t const nEnd = nbSymbols;
|
|
162
|
+
for (n=0; n<nEnd; n++) {
|
|
163
|
+
size_t const w = huffWeight[n];
|
|
164
|
+
size_t const length = (1 << w) >> 1;
|
|
165
|
+
size_t const uStart = rankVal[w];
|
|
166
|
+
size_t const uEnd = uStart + length;
|
|
167
|
+
size_t u;
|
|
186
168
|
HUF_DEltX1 D;
|
|
187
|
-
D.byte = (BYTE)n;
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
169
|
+
D.byte = (BYTE)n;
|
|
170
|
+
D.nbBits = (BYTE)(tableLog + 1 - w);
|
|
171
|
+
rankVal[w] = (U32)uEnd;
|
|
172
|
+
if (length < 4) {
|
|
173
|
+
/* Use length in the loop bound so the compiler knows it is short. */
|
|
174
|
+
for (u = 0; u < length; ++u)
|
|
175
|
+
dt[uStart + u] = D;
|
|
176
|
+
} else {
|
|
177
|
+
/* Unroll the loop 4 times, we know it is a power of 2. */
|
|
178
|
+
for (u = uStart; u < uEnd; u += 4) {
|
|
179
|
+
dt[u + 0] = D;
|
|
180
|
+
dt[u + 1] = D;
|
|
181
|
+
dt[u + 2] = D;
|
|
182
|
+
dt[u + 3] = D;
|
|
183
|
+
} } } }
|
|
193
184
|
return iSize;
|
|
194
185
|
}
|
|
195
186
|
|
|
@@ -280,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
280
271
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
|
281
272
|
BYTE* const ostart = (BYTE*) dst;
|
|
282
273
|
BYTE* const oend = ostart + dstSize;
|
|
274
|
+
BYTE* const olimit = oend - 3;
|
|
283
275
|
const void* const dtPtr = DTable + 1;
|
|
284
276
|
const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
|
|
285
277
|
|
|
@@ -304,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
304
296
|
BYTE* op2 = opStart2;
|
|
305
297
|
BYTE* op3 = opStart3;
|
|
306
298
|
BYTE* op4 = opStart4;
|
|
307
|
-
U32 endSignal = BIT_DStream_unfinished;
|
|
308
299
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
309
300
|
U32 const dtLog = dtd.tableLog;
|
|
301
|
+
U32 endSignal = 1;
|
|
310
302
|
|
|
311
303
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
|
312
304
|
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
|
@@ -315,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
315
307
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
316
308
|
|
|
317
309
|
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
|
318
|
-
|
|
319
|
-
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
|
310
|
+
for ( ; (endSignal) & (op4 < olimit) ; ) {
|
|
320
311
|
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
|
|
321
312
|
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
|
|
322
313
|
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
|
|
@@ -333,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body(
|
|
|
333
324
|
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
|
|
334
325
|
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
|
|
335
326
|
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
327
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
328
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
329
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
330
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
340
331
|
}
|
|
341
332
|
|
|
342
333
|
/* check corruption */
|
|
@@ -755,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body(
|
|
|
755
746
|
return dstSize;
|
|
756
747
|
}
|
|
757
748
|
|
|
758
|
-
|
|
759
749
|
FORCE_INLINE_TEMPLATE size_t
|
|
760
750
|
HUF_decompress4X2_usingDTable_internal_body(
|
|
761
751
|
void* dst, size_t dstSize,
|
|
@@ -767,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
767
757
|
{ const BYTE* const istart = (const BYTE*) cSrc;
|
|
768
758
|
BYTE* const ostart = (BYTE*) dst;
|
|
769
759
|
BYTE* const oend = ostart + dstSize;
|
|
760
|
+
BYTE* const olimit = oend - (sizeof(size_t)-1);
|
|
770
761
|
const void* const dtPtr = DTable+1;
|
|
771
762
|
const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
|
|
772
763
|
|
|
@@ -791,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
791
782
|
BYTE* op2 = opStart2;
|
|
792
783
|
BYTE* op3 = opStart3;
|
|
793
784
|
BYTE* op4 = opStart4;
|
|
794
|
-
U32 endSignal;
|
|
785
|
+
U32 endSignal = 1;
|
|
795
786
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
|
796
787
|
U32 const dtLog = dtd.tableLog;
|
|
797
788
|
|
|
@@ -802,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
802
793
|
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
|
803
794
|
|
|
804
795
|
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
|
805
|
-
|
|
806
|
-
|
|
796
|
+
for ( ; (endSignal) & (op4 < olimit); ) {
|
|
797
|
+
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
|
|
798
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
799
|
+
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
|
|
800
|
+
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
801
|
+
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
|
|
802
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
803
|
+
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
|
|
804
|
+
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
805
|
+
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
806
|
+
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
|
|
807
|
+
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
|
|
808
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
809
|
+
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
|
|
810
|
+
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
811
|
+
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
812
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
813
|
+
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
|
|
814
|
+
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
|
|
815
|
+
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
816
|
+
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
|
|
817
|
+
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
|
|
818
|
+
#else
|
|
807
819
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
|
808
820
|
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
|
809
821
|
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
|
@@ -820,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body(
|
|
|
820
832
|
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
|
821
833
|
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
|
822
834
|
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
|
823
|
-
|
|
824
|
-
|
|
835
|
+
endSignal = (U32)LIKELY(
|
|
836
|
+
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
|
|
837
|
+
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
|
|
838
|
+
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
|
|
839
|
+
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
|
|
840
|
+
#endif
|
|
825
841
|
}
|
|
826
842
|
|
|
827
843
|
/* check corruption */
|