zstd-ruby 1.4.2.0 → 1.4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +0 -2
- data/ext/zstdruby/libzstd/README.md +13 -2
- data/ext/zstdruby/libzstd/common/bitstream.h +7 -2
- data/ext/zstdruby/libzstd/common/compiler.h +17 -5
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -0
- data/ext/zstdruby/libzstd/common/mem.h +74 -1
- data/ext/zstdruby/libzstd/common/pool.c +7 -3
- data/ext/zstdruby/libzstd/common/threading.c +46 -1
- data/ext/zstdruby/libzstd/common/threading.h +32 -1
- data/ext/zstdruby/libzstd/common/xxhash.c +8 -2
- data/ext/zstdruby/libzstd/common/zstd_internal.h +37 -58
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +644 -445
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +98 -26
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -5
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +535 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -12
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +38 -45
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +35 -31
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +6 -6
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -26
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +2 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +16 -17
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +149 -148
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +6 -5
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +2 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +2 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +6 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
- data/ext/zstdruby/libzstd/zstd.h +170 -66
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +5 -4
@@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
|
|
49
49
|
{
|
50
50
|
size_t const ldmHSize = ((size_t)1) << params.hashLog;
|
51
51
|
size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
|
52
|
-
size_t const ldmBucketSize =
|
53
|
-
|
54
|
-
|
52
|
+
size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
|
53
|
+
size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
|
54
|
+
+ ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
|
55
55
|
return params.enableLdm ? totalSize : 0;
|
56
56
|
}
|
57
57
|
|
@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
583
583
|
rep[i] = rep[i-1];
|
584
584
|
rep[0] = sequence.offset;
|
585
585
|
/* Store the sequence */
|
586
|
-
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
|
586
|
+
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
587
587
|
sequence.offset + ZSTD_REP_MOVE,
|
588
588
|
sequence.matchLength - MINMATCH);
|
589
589
|
ip += sequence.matchLength;
|
@@ -552,7 +552,6 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
552
552
|
{
|
553
553
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
554
554
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
555
|
-
U32 const maxDistance = 1U << cParams->windowLog;
|
556
555
|
const BYTE* const base = ms->window.base;
|
557
556
|
U32 const current = (U32)(ip-base);
|
558
557
|
U32 const hashLog = cParams->hashLog;
|
@@ -569,8 +568,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
569
568
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
570
569
|
const BYTE* const prefixStart = base + dictLimit;
|
571
570
|
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
572
|
-
U32 const
|
573
|
-
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
|
571
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
|
574
572
|
U32 const matchLow = windowLow ? windowLow : 1;
|
575
573
|
U32* smallerPtr = bt + 2*(current&btMask);
|
576
574
|
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
@@ -674,19 +672,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
674
672
|
|
675
673
|
while (nbCompares-- && (matchIndex >= matchLow)) {
|
676
674
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
677
|
-
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
678
675
|
const BYTE* match;
|
676
|
+
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
679
677
|
assert(current > matchIndex);
|
680
678
|
|
681
679
|
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
682
680
|
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
683
681
|
match = base + matchIndex;
|
682
|
+
if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
|
684
683
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
|
685
684
|
} else {
|
686
685
|
match = dictBase + matchIndex;
|
686
|
+
assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
|
687
687
|
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
|
688
688
|
if (matchIndex+matchLength >= dictLimit)
|
689
|
-
match = base + matchIndex; /* prepare for match[matchLength] */
|
689
|
+
match = base + matchIndex; /* prepare for match[matchLength] read */
|
690
690
|
}
|
691
691
|
|
692
692
|
if (matchLength > bestLength) {
|
@@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1098
1098
|
|
1099
1099
|
assert(anchor + llen <= iend);
|
1100
1100
|
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
1101
|
-
ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
|
1101
|
+
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
|
1102
1102
|
anchor += advance;
|
1103
1103
|
ip = anchor;
|
1104
1104
|
} }
|
@@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
668
668
|
|
669
669
|
/* init */
|
670
670
|
if (job->cdict) {
|
671
|
-
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
|
671
|
+
size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
|
672
672
|
assert(job->firstJob); /* only allowed for first job */
|
673
673
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
674
674
|
} else { /* srcStart points at reloaded section */
|
@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
|
680
680
|
job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
|
681
681
|
ZSTD_dtlm_fast,
|
682
682
|
NULL, /*cdict*/
|
683
|
-
jobParams, pledgedSrcSize);
|
683
|
+
&jobParams, pledgedSrcSize);
|
684
684
|
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
685
685
|
} }
|
686
686
|
|
@@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
|
|
927
927
|
unsigned jobID;
|
928
928
|
DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
|
929
929
|
for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
|
930
|
+
/* Copy the mutex/cond out */
|
931
|
+
ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
|
932
|
+
ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
|
933
|
+
|
930
934
|
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
|
931
935
|
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
|
932
|
-
|
933
|
-
|
936
|
+
|
937
|
+
/* Clear the job description, but keep the mutex/cond */
|
938
|
+
memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
|
939
|
+
mtctx->jobs[jobID].job_mutex = mutex;
|
940
|
+
mtctx->jobs[jobID].job_cond = cond;
|
934
941
|
}
|
935
|
-
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
|
936
942
|
mtctx->inBuff.buffer = g_nullBuffer;
|
937
943
|
mtctx->inBuff.filled = 0;
|
938
944
|
mtctx->allJobsCompleted = 1;
|
@@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
|
|
1028
1034
|
|
1029
1035
|
/* Sets parameters relevant to the compression job,
|
1030
1036
|
* initializing others to default values. */
|
1031
|
-
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params
|
1037
|
+
static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
|
1032
1038
|
{
|
1033
|
-
ZSTD_CCtx_params jobParams = params;
|
1039
|
+
ZSTD_CCtx_params jobParams = *params;
|
1034
1040
|
/* Clear parameters related to multithreading */
|
1035
1041
|
jobParams.forceWindow = 0;
|
1036
1042
|
jobParams.nbWorkers = 0;
|
@@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
|
1151
1157
|
/* ===== Multi-threaded compression ===== */
|
1152
1158
|
/* ------------------------------------------ */
|
1153
1159
|
|
1154
|
-
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params
|
1160
|
+
static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
|
1155
1161
|
{
|
1156
1162
|
unsigned jobLog;
|
1157
|
-
if (params
|
1163
|
+
if (params->ldmParams.enableLdm) {
|
1158
1164
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1159
1165
|
* In which case, it's preferable to determine the jobSize
|
1160
1166
|
* based on chainLog instead. */
|
1161
|
-
jobLog = MAX(21, params
|
1167
|
+
jobLog = MAX(21, params->cParams.chainLog + 4);
|
1162
1168
|
} else {
|
1163
|
-
jobLog = MAX(20, params
|
1169
|
+
jobLog = MAX(20, params->cParams.windowLog + 2);
|
1164
1170
|
}
|
1165
1171
|
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
|
1166
1172
|
}
|
@@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
|
|
1193
1199
|
return ovlog;
|
1194
1200
|
}
|
1195
1201
|
|
1196
|
-
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params
|
1202
|
+
static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
|
1197
1203
|
{
|
1198
|
-
int const overlapRLog = 9 - ZSTDMT_overlapLog(params
|
1199
|
-
int ovLog = (overlapRLog >= 8) ? 0 : (params
|
1204
|
+
int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
|
1205
|
+
int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
|
1200
1206
|
assert(0 <= overlapRLog && overlapRLog <= 8);
|
1201
|
-
if (params
|
1207
|
+
if (params->ldmParams.enableLdm) {
|
1202
1208
|
/* In Long Range Mode, the windowLog is typically oversized.
|
1203
1209
|
* In which case, it's preferable to determine the jobSize
|
1204
1210
|
* based on chainLog instead.
|
1205
1211
|
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
|
1206
|
-
ovLog = MIN(params
|
1212
|
+
ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
1207
1213
|
- overlapRLog;
|
1208
1214
|
}
|
1209
1215
|
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
|
1210
|
-
DEBUGLOG(4, "overlapLog : %i", params
|
1216
|
+
DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
|
1211
1217
|
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
1212
1218
|
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
1213
1219
|
}
|
1214
1220
|
|
1215
1221
|
static unsigned
|
1216
|
-
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
|
1222
|
+
ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
|
1217
1223
|
{
|
1218
1224
|
assert(nbWorkers>0);
|
1219
1225
|
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
@@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1236
1242
|
const ZSTD_CDict* cdict,
|
1237
1243
|
ZSTD_CCtx_params params)
|
1238
1244
|
{
|
1239
|
-
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
|
1240
|
-
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
|
1241
|
-
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
|
1245
|
+
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(¶ms);
|
1246
|
+
size_t const overlapSize = ZSTDMT_computeOverlapSize(¶ms);
|
1247
|
+
unsigned const nbJobs = ZSTDMT_computeNbJobs(¶ms, srcSize, params.nbWorkers);
|
1242
1248
|
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
1243
1249
|
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
1244
1250
|
const char* const srcStart = (const char*)src;
|
@@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
|
|
1256
1262
|
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
|
1257
1263
|
DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
|
1258
1264
|
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
|
1259
|
-
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
|
1265
|
+
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
|
1260
1266
|
}
|
1261
1267
|
|
1262
1268
|
assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
|
@@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
|
|
1404
1410
|
|
1405
1411
|
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
1406
1412
|
if (mtctx->singleBlockingThread) {
|
1407
|
-
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
|
1413
|
+
ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(¶ms);
|
1408
1414
|
DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
|
1409
1415
|
assert(singleThreadParams.nbWorkers == 0);
|
1410
1416
|
return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
|
1411
1417
|
dict, dictSize, cdict,
|
1412
|
-
singleThreadParams, pledgedSrcSize);
|
1418
|
+
&singleThreadParams, pledgedSrcSize);
|
1413
1419
|
}
|
1414
1420
|
|
1415
1421
|
DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
|
@@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
|
|
1435
1441
|
mtctx->cdict = cdict;
|
1436
1442
|
}
|
1437
1443
|
|
1438
|
-
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
|
1444
|
+
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms);
|
1439
1445
|
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
|
1440
1446
|
mtctx->targetSectionSize = params.jobSize;
|
1441
1447
|
if (mtctx->targetSectionSize == 0) {
|
1442
|
-
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
1448
|
+
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms);
|
1443
1449
|
}
|
1444
1450
|
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
|
1445
1451
|
|
@@ -61,7 +61,9 @@
|
|
61
61
|
* Error Management
|
62
62
|
****************************************************************/
|
63
63
|
#define HUF_isError ERR_isError
|
64
|
+
#ifndef CHECK_F
|
64
65
|
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
66
|
+
#endif
|
65
67
|
|
66
68
|
|
67
69
|
/* **************************************************************
|
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
|
|
88
88
|
|
89
89
|
static size_t ZSTD_startingInputLength(ZSTD_format_e format)
|
90
90
|
{
|
91
|
-
size_t const startingInputLength = (format
|
92
|
-
ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
|
93
|
-
ZSTD_FRAMEHEADERSIZE_PREFIX;
|
94
|
-
ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
|
91
|
+
size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
|
95
92
|
/* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
|
96
93
|
assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
|
97
94
|
return startingInputLength;
|
@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
|
376
373
|
{
|
377
374
|
unsigned long long totalDstSize = 0;
|
378
375
|
|
379
|
-
while (srcSize >=
|
376
|
+
while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
|
380
377
|
U32 const magicNumber = MEM_readLE32(src);
|
381
378
|
|
382
379
|
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
@@ -574,9 +571,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
|
|
574
571
|
}
|
575
572
|
|
576
573
|
/** ZSTD_insertBlock() :
|
577
|
-
|
574
|
+
* insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
|
578
575
|
size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
|
579
576
|
{
|
577
|
+
DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
|
580
578
|
ZSTD_checkContinuity(dctx, blockStart);
|
581
579
|
dctx->previousDstEnd = (const char*)blockStart + blockSize;
|
582
580
|
return blockSize;
|
@@ -628,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
|
|
628
626
|
|
629
627
|
/* check */
|
630
628
|
RETURN_ERROR_IF(
|
631
|
-
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
|
629
|
+
remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
|
632
630
|
srcSize_wrong);
|
633
631
|
|
634
632
|
/* Frame Header */
|
635
|
-
{ size_t const frameHeaderSize =
|
633
|
+
{ size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
|
634
|
+
ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
|
636
635
|
if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
|
637
636
|
RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
|
638
637
|
srcSize_wrong);
|
@@ -713,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
|
713
712
|
dictSize = ZSTD_DDict_dictSize(ddict);
|
714
713
|
}
|
715
714
|
|
716
|
-
while (srcSize >=
|
715
|
+
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
|
717
716
|
|
718
717
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
|
719
718
|
if (ZSTD_isLegacy(src, srcSize)) {
|
@@ -1097,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
1097
1096
|
size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
|
1098
1097
|
for (i=0; i<3; i++) {
|
1099
1098
|
U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
|
1100
|
-
RETURN_ERROR_IF(rep==0 || rep
|
1099
|
+
RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
|
1101
1100
|
dictionary_corrupted);
|
1102
1101
|
entropy->rep[i] = rep;
|
1103
1102
|
} }
|
@@ -1266,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
|
|
1266
1265
|
{
|
1267
1266
|
RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
|
1268
1267
|
ZSTD_clearDict(dctx);
|
1269
|
-
if (dict && dictSize
|
1268
|
+
if (dict && dictSize != 0) {
|
1270
1269
|
dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
|
1271
1270
|
RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
|
1272
1271
|
dctx->ddict = dctx->ddictLocal;
|
@@ -1299,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
|
|
1299
1298
|
|
1300
1299
|
|
1301
1300
|
/* ZSTD_initDStream_usingDict() :
|
1302
|
-
* return : expected size, aka
|
1301
|
+
* return : expected size, aka ZSTD_startingInputLength().
|
1303
1302
|
* this function cannot fail */
|
1304
1303
|
size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
|
1305
1304
|
{
|
1306
1305
|
DEBUGLOG(4, "ZSTD_initDStream_usingDict");
|
1307
1306
|
FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
|
1308
1307
|
FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
|
1309
|
-
return
|
1308
|
+
return ZSTD_startingInputLength(zds->format);
|
1310
1309
|
}
|
1311
1310
|
|
1312
1311
|
/* note : this variant can't fail */
|
@@ -1323,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
|
|
1323
1322
|
{
|
1324
1323
|
FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
|
1325
1324
|
FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
|
1326
|
-
return
|
1325
|
+
return ZSTD_startingInputLength(dctx->format);
|
1327
1326
|
}
|
1328
1327
|
|
1329
1328
|
/* ZSTD_resetDStream() :
|
1330
|
-
* return : expected size, aka
|
1329
|
+
* return : expected size, aka ZSTD_startingInputLength().
|
1331
1330
|
* this function cannot fail */
|
1332
1331
|
size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
|
1333
1332
|
{
|
1334
1333
|
FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
|
1335
|
-
return
|
1334
|
+
return ZSTD_startingInputLength(dctx->format);
|
1336
1335
|
}
|
1337
1336
|
|
1338
1337
|
|
@@ -1563,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
|
1563
1562
|
zds->lhSize += remainingInput;
|
1564
1563
|
}
|
1565
1564
|
input->pos = input->size;
|
1566
|
-
return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
|
1565
|
+
return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
|
1567
1566
|
}
|
1568
1567
|
assert(ip != NULL);
|
1569
1568
|
memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
|
@@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
79
79
|
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
80
80
|
const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
|
81
81
|
{
|
82
|
+
DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
|
82
83
|
RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
|
83
84
|
|
84
85
|
{ const BYTE* const istart = (const BYTE*) src;
|
@@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
87
88
|
switch(litEncType)
|
88
89
|
{
|
89
90
|
case set_repeat:
|
91
|
+
DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
|
90
92
|
RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
|
91
93
|
/* fall-through */
|
92
94
|
|
@@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|
116
118
|
/* 2 - 2 - 18 - 18 */
|
117
119
|
lhSize = 5;
|
118
120
|
litSize = (lhc >> 4) & 0x3FFFF;
|
119
|
-
litCSize = (lhc >> 22) + (istart[4] << 10);
|
121
|
+
litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
|
120
122
|
break;
|
121
123
|
}
|
122
124
|
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
|
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
|
391
393
|
symbolNext[s] = 1;
|
392
394
|
} else {
|
393
395
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
394
|
-
|
396
|
+
assert(normalizedCounter[s]>=0);
|
397
|
+
symbolNext[s] = (U16)normalizedCounter[s];
|
395
398
|
} } }
|
396
399
|
memcpy(dt, &DTableH, sizeof(DTableH));
|
397
400
|
}
|
@@ -570,38 +573,118 @@ typedef struct {
|
|
570
573
|
size_t pos;
|
571
574
|
} seqState_t;
|
572
575
|
|
576
|
+
/*! ZSTD_overlapCopy8() :
|
577
|
+
* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
|
578
|
+
* If the offset is < 8 then the offset is spread to at least 8 bytes.
|
579
|
+
*
|
580
|
+
* Precondition: *ip <= *op
|
581
|
+
* Postcondition: *op - *op >= 8
|
582
|
+
*/
|
583
|
+
static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
|
584
|
+
assert(*ip <= *op);
|
585
|
+
if (offset < 8) {
|
586
|
+
/* close range match, overlap */
|
587
|
+
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
588
|
+
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
589
|
+
int const sub2 = dec64table[offset];
|
590
|
+
(*op)[0] = (*ip)[0];
|
591
|
+
(*op)[1] = (*ip)[1];
|
592
|
+
(*op)[2] = (*ip)[2];
|
593
|
+
(*op)[3] = (*ip)[3];
|
594
|
+
*ip += dec32table[offset];
|
595
|
+
ZSTD_copy4(*op+4, *ip);
|
596
|
+
*ip -= sub2;
|
597
|
+
} else {
|
598
|
+
ZSTD_copy8(*op, *ip);
|
599
|
+
}
|
600
|
+
*ip += 8;
|
601
|
+
*op += 8;
|
602
|
+
assert(*op - *ip >= 8);
|
603
|
+
}
|
604
|
+
|
605
|
+
/*! ZSTD_safecopy() :
|
606
|
+
* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
|
607
|
+
* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
|
608
|
+
* This function is only called in the uncommon case where the sequence is near the end of the block. It
|
609
|
+
* should be fast for a single long sequence, but can be slow for several short sequences.
|
610
|
+
*
|
611
|
+
* @param ovtype controls the overlap detection
|
612
|
+
* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
|
613
|
+
* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
|
614
|
+
* The src buffer must be before the dst buffer.
|
615
|
+
*/
|
616
|
+
static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
|
617
|
+
ptrdiff_t const diff = op - ip;
|
618
|
+
BYTE* const oend = op + length;
|
573
619
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
620
|
+
assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
|
621
|
+
(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
|
622
|
+
|
623
|
+
if (length < 8) {
|
624
|
+
/* Handle short lengths. */
|
625
|
+
while (op < oend) *op++ = *ip++;
|
626
|
+
return;
|
627
|
+
}
|
628
|
+
if (ovtype == ZSTD_overlap_src_before_dst) {
|
629
|
+
/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
|
630
|
+
assert(length >= 8);
|
631
|
+
ZSTD_overlapCopy8(&op, &ip, diff);
|
632
|
+
assert(op - ip >= 8);
|
633
|
+
assert(op <= oend);
|
634
|
+
}
|
635
|
+
|
636
|
+
if (oend <= oend_w) {
|
637
|
+
/* No risk of overwrite. */
|
638
|
+
ZSTD_wildcopy(op, ip, length, ovtype);
|
639
|
+
return;
|
640
|
+
}
|
641
|
+
if (op <= oend_w) {
|
642
|
+
/* Wildcopy until we get close to the end. */
|
643
|
+
assert(oend > oend_w);
|
644
|
+
ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
|
645
|
+
ip += oend_w - op;
|
646
|
+
op = oend_w;
|
647
|
+
}
|
648
|
+
/* Handle the leftovers. */
|
649
|
+
while (op < oend) *op++ = *ip++;
|
650
|
+
}
|
651
|
+
|
652
|
+
/* ZSTD_execSequenceEnd():
|
653
|
+
* This version handles cases that are near the end of the output buffer. It requires
|
654
|
+
* more careful checks to make sure there is no overflow. By separating out these hard
|
655
|
+
* and unlikely cases, we can speed up the common cases.
|
656
|
+
*
|
657
|
+
* NOTE: This function needs to be fast for a single long sequence, but doesn't need
|
658
|
+
* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
|
659
|
+
*/
|
581
660
|
FORCE_NOINLINE
|
582
|
-
size_t
|
583
|
-
|
584
|
-
|
585
|
-
|
661
|
+
size_t ZSTD_execSequenceEnd(BYTE* op,
|
662
|
+
BYTE* const oend, seq_t sequence,
|
663
|
+
const BYTE** litPtr, const BYTE* const litLimit,
|
664
|
+
const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
|
586
665
|
{
|
587
666
|
BYTE* const oLitEnd = op + sequence.litLength;
|
588
667
|
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
589
668
|
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
590
669
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
591
670
|
const BYTE* match = oLitEnd - sequence.offset;
|
671
|
+
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
592
672
|
|
593
|
-
/*
|
594
|
-
|
673
|
+
/* bounds checks */
|
674
|
+
assert(oLitEnd < oMatchEnd);
|
675
|
+
RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
|
595
676
|
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
|
596
677
|
|
597
678
|
/* copy literals */
|
598
|
-
|
679
|
+
ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
|
680
|
+
op = oLitEnd;
|
681
|
+
*litPtr = iLitEnd;
|
599
682
|
|
600
683
|
/* copy Match */
|
601
|
-
if (sequence.offset > (size_t)(oLitEnd -
|
684
|
+
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
602
685
|
/* offset beyond prefix */
|
603
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd -
|
604
|
-
match = dictEnd - (
|
686
|
+
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
687
|
+
match = dictEnd - (prefixStart-match);
|
605
688
|
if (match + sequence.matchLength <= dictEnd) {
|
606
689
|
memmove(oLitEnd, match, sequence.matchLength);
|
607
690
|
return sequenceLength;
|
@@ -611,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
|
|
611
694
|
memmove(oLitEnd, match, length1);
|
612
695
|
op = oLitEnd + length1;
|
613
696
|
sequence.matchLength -= length1;
|
614
|
-
match =
|
697
|
+
match = prefixStart;
|
615
698
|
} }
|
616
|
-
|
699
|
+
ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
|
617
700
|
return sequenceLength;
|
618
701
|
}
|
619
702
|
|
620
|
-
|
621
703
|
HINT_INLINE
|
622
704
|
size_t ZSTD_execSequence(BYTE* op,
|
623
705
|
BYTE* const oend, seq_t sequence,
|
@@ -631,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
631
713
|
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
632
714
|
const BYTE* match = oLitEnd - sequence.offset;
|
633
715
|
|
634
|
-
/*
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
/*
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
716
|
+
/* Errors and uncommon cases handled here. */
|
717
|
+
assert(oLitEnd < oMatchEnd);
|
718
|
+
if (iLitEnd > litLimit || oMatchEnd > oend_w)
|
719
|
+
return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
720
|
+
|
721
|
+
/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
|
722
|
+
assert(iLitEnd <= litLimit /* Literal length is in bounds */);
|
723
|
+
assert(oLitEnd <= oend_w /* Can wildcopy literals */);
|
724
|
+
assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
|
725
|
+
|
726
|
+
/* Copy Literals:
|
727
|
+
* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
|
728
|
+
* We likely don't need the full 32-byte wildcopy.
|
729
|
+
*/
|
730
|
+
assert(WILDCOPY_OVERLENGTH >= 16);
|
731
|
+
ZSTD_copy16(op, (*litPtr));
|
732
|
+
if (sequence.litLength > 16) {
|
733
|
+
ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
|
734
|
+
}
|
644
735
|
op = oLitEnd;
|
645
736
|
*litPtr = iLitEnd; /* update for next sequence */
|
646
737
|
|
647
|
-
/*
|
738
|
+
/* Copy Match */
|
648
739
|
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
649
740
|
/* offset beyond prefix -> go into extDict */
|
650
741
|
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
|
@@ -659,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
|
|
659
750
|
op = oLitEnd + length1;
|
660
751
|
sequence.matchLength -= length1;
|
661
752
|
match = prefixStart;
|
662
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
663
|
-
U32 i;
|
664
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
665
|
-
return sequenceLength;
|
666
|
-
}
|
667
753
|
} }
|
668
|
-
/*
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
match
|
683
|
-
|
684
|
-
ZSTD_copy8(op, match);
|
685
|
-
}
|
686
|
-
op += 8; match += 8;
|
687
|
-
|
688
|
-
if (oMatchEnd > oend-(16-MINMATCH)) {
|
689
|
-
if (op < oend_w) {
|
690
|
-
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
691
|
-
match += oend_w - op;
|
692
|
-
op = oend_w;
|
693
|
-
}
|
694
|
-
while (op < oMatchEnd) *op++ = *match++;
|
695
|
-
} else {
|
696
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
754
|
+
/* Match within prefix of 1 or more bytes */
|
755
|
+
assert(op <= oMatchEnd);
|
756
|
+
assert(oMatchEnd <= oend_w);
|
757
|
+
assert(match >= prefixStart);
|
758
|
+
assert(sequence.matchLength >= 1);
|
759
|
+
|
760
|
+
/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
|
761
|
+
* without overlap checking.
|
762
|
+
*/
|
763
|
+
if (sequence.offset >= WILDCOPY_VECLEN) {
|
764
|
+
/* We bet on a full wildcopy for matches, since we expect matches to be
|
765
|
+
* longer than literals (in general). In silesia, ~10% of matches are longer
|
766
|
+
* than 16 bytes.
|
767
|
+
*/
|
768
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
|
769
|
+
return sequenceLength;
|
697
770
|
}
|
698
|
-
|
699
|
-
}
|
700
|
-
|
701
|
-
|
702
|
-
HINT_INLINE
|
703
|
-
size_t ZSTD_execSequenceLong(BYTE* op,
|
704
|
-
BYTE* const oend, seq_t sequence,
|
705
|
-
const BYTE** litPtr, const BYTE* const litLimit,
|
706
|
-
const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
|
707
|
-
{
|
708
|
-
BYTE* const oLitEnd = op + sequence.litLength;
|
709
|
-
size_t const sequenceLength = sequence.litLength + sequence.matchLength;
|
710
|
-
BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
|
711
|
-
BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
|
712
|
-
const BYTE* const iLitEnd = *litPtr + sequence.litLength;
|
713
|
-
const BYTE* match = sequence.match;
|
771
|
+
assert(sequence.offset < WILDCOPY_VECLEN);
|
714
772
|
|
715
|
-
/*
|
716
|
-
|
717
|
-
RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
|
718
|
-
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
719
|
-
|
720
|
-
/* copy Literals */
|
721
|
-
if (sequence.litLength > 8)
|
722
|
-
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
723
|
-
else
|
724
|
-
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
773
|
+
/* Copy 8 bytes and spread the offset to be >= 8. */
|
774
|
+
ZSTD_overlapCopy8(&op, &match, sequence.offset);
|
725
775
|
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
|
731
|
-
/* offset beyond prefix */
|
732
|
-
RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
|
733
|
-
if (match + sequence.matchLength <= dictEnd) {
|
734
|
-
memmove(oLitEnd, match, sequence.matchLength);
|
735
|
-
return sequenceLength;
|
736
|
-
}
|
737
|
-
/* span extDict & currentPrefixSegment */
|
738
|
-
{ size_t const length1 = dictEnd - match;
|
739
|
-
memmove(oLitEnd, match, length1);
|
740
|
-
op = oLitEnd + length1;
|
741
|
-
sequence.matchLength -= length1;
|
742
|
-
match = prefixStart;
|
743
|
-
if (op > oend_w || sequence.matchLength < MINMATCH) {
|
744
|
-
U32 i;
|
745
|
-
for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
|
746
|
-
return sequenceLength;
|
747
|
-
}
|
748
|
-
} }
|
749
|
-
assert(op <= oend_w);
|
750
|
-
assert(sequence.matchLength >= MINMATCH);
|
751
|
-
|
752
|
-
/* match within prefix */
|
753
|
-
if (sequence.offset < 8) {
|
754
|
-
/* close range match, overlap */
|
755
|
-
static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
|
756
|
-
static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
|
757
|
-
int const sub2 = dec64table[sequence.offset];
|
758
|
-
op[0] = match[0];
|
759
|
-
op[1] = match[1];
|
760
|
-
op[2] = match[2];
|
761
|
-
op[3] = match[3];
|
762
|
-
match += dec32table[sequence.offset];
|
763
|
-
ZSTD_copy4(op+4, match);
|
764
|
-
match -= sub2;
|
765
|
-
} else {
|
766
|
-
ZSTD_copy8(op, match);
|
767
|
-
}
|
768
|
-
op += 8; match += 8;
|
769
|
-
|
770
|
-
if (oMatchEnd > oend-(16-MINMATCH)) {
|
771
|
-
if (op < oend_w) {
|
772
|
-
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
773
|
-
match += oend_w - op;
|
774
|
-
op = oend_w;
|
775
|
-
}
|
776
|
-
while (op < oMatchEnd) *op++ = *match++;
|
777
|
-
} else {
|
778
|
-
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
776
|
+
/* If the match length is > 8 bytes, then continue with the wildcopy. */
|
777
|
+
if (sequence.matchLength > 8) {
|
778
|
+
assert(op < oMatchEnd);
|
779
|
+
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
|
779
780
|
}
|
780
781
|
return sequenceLength;
|
781
782
|
}
|
@@ -1095,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
|
|
1095
1096
|
/* decode and decompress */
|
1096
1097
|
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
|
1097
1098
|
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
|
1098
|
-
size_t const oneSeqSize =
|
1099
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
1099
1100
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1100
1101
|
PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
|
1101
1102
|
sequences[seqNb & STORED_SEQS_MASK] = sequence;
|
@@ -1106,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
|
|
1106
1107
|
/* finish queue */
|
1107
1108
|
seqNb -= seqAdvance;
|
1108
1109
|
for ( ; seqNb<nbSeq ; seqNb++) {
|
1109
|
-
size_t const oneSeqSize =
|
1110
|
+
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
|
1110
1111
|
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
|
1111
1112
|
op += oneSeqSize;
|
1112
1113
|
}
|