zstd-ruby 1.4.2.0 → 1.4.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +0 -2
  4. data/ext/zstdruby/libzstd/README.md +13 -2
  5. data/ext/zstdruby/libzstd/common/bitstream.h +7 -2
  6. data/ext/zstdruby/libzstd/common/compiler.h +17 -5
  7. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  8. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -0
  9. data/ext/zstdruby/libzstd/common/mem.h +74 -1
  10. data/ext/zstdruby/libzstd/common/pool.c +7 -3
  11. data/ext/zstdruby/libzstd/common/threading.c +46 -1
  12. data/ext/zstdruby/libzstd/common/threading.h +32 -1
  13. data/ext/zstdruby/libzstd/common/xxhash.c +8 -2
  14. data/ext/zstdruby/libzstd/common/zstd_internal.h +37 -58
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +644 -445
  16. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +98 -26
  17. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -5
  18. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  19. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  21. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +535 -0
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -12
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +38 -45
  24. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +35 -31
  25. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
  26. data/ext/zstdruby/libzstd/compress/zstd_opt.c +6 -6
  27. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -26
  28. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +2 -0
  29. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +16 -17
  30. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +149 -148
  31. data/ext/zstdruby/libzstd/deprecated/zbuff.h +6 -5
  32. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -8
  33. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
  34. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  35. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +2 -1
  36. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +2 -1
  37. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +6 -2
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
  39. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
  40. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
  41. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  42. data/ext/zstdruby/libzstd/zstd.h +170 -66
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. data/zstd-ruby.gemspec +1 -1
  45. metadata +5 -4
@@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
49
  {
50
50
  size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
51
  size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
- size_t const ldmBucketSize =
53
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
52
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
53
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
54
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55
55
  return params.enableLdm ? totalSize : 0;
56
56
  }
57
57
 
@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
583
583
  rep[i] = rep[i-1];
584
584
  rep[0] = sequence.offset;
585
585
  /* Store the sequence */
586
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
586
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587
587
  sequence.offset + ZSTD_REP_MOVE,
588
588
  sequence.matchLength - MINMATCH);
589
589
  ip += sequence.matchLength;
@@ -552,7 +552,6 @@ U32 ZSTD_insertBtAndGetAllMatches (
552
552
  {
553
553
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
554
554
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555
- U32 const maxDistance = 1U << cParams->windowLog;
556
555
  const BYTE* const base = ms->window.base;
557
556
  U32 const current = (U32)(ip-base);
558
557
  U32 const hashLog = cParams->hashLog;
@@ -569,8 +568,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
569
568
  const BYTE* const dictEnd = dictBase + dictLimit;
570
569
  const BYTE* const prefixStart = base + dictLimit;
571
570
  U32 const btLow = (btMask >= current) ? 0 : current - btMask;
572
- U32 const windowValid = ms->window.lowLimit;
573
- U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
571
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
574
572
  U32 const matchLow = windowLow ? windowLow : 1;
575
573
  U32* smallerPtr = bt + 2*(current&btMask);
576
574
  U32* largerPtr = bt + 2*(current&btMask) + 1;
@@ -674,19 +672,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
674
672
 
675
673
  while (nbCompares-- && (matchIndex >= matchLow)) {
676
674
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
677
- size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
678
675
  const BYTE* match;
676
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
679
677
  assert(current > matchIndex);
680
678
 
681
679
  if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
682
680
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
683
681
  match = base + matchIndex;
682
+ if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
684
683
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
685
684
  } else {
686
685
  match = dictBase + matchIndex;
686
+ assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
687
687
  matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
688
688
  if (matchIndex+matchLength >= dictLimit)
689
- match = base + matchIndex; /* prepare for match[matchLength] */
689
+ match = base + matchIndex; /* prepare for match[matchLength] read */
690
690
  }
691
691
 
692
692
  if (matchLength > bestLength) {
@@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1098
1098
 
1099
1099
  assert(anchor + llen <= iend);
1100
1100
  ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1101
- ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
1101
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1102
1102
  anchor += advance;
1103
1103
  ip = anchor;
1104
1104
  } }
@@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
668
668
 
669
669
  /* init */
670
670
  if (job->cdict) {
671
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
671
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
672
672
  assert(job->firstJob); /* only allowed for first job */
673
673
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
674
674
  } else { /* srcStart points at reloaded section */
@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
680
680
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681
681
  ZSTD_dtlm_fast,
682
682
  NULL, /*cdict*/
683
- jobParams, pledgedSrcSize);
683
+ &jobParams, pledgedSrcSize);
684
684
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
685
685
  } }
686
686
 
@@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
927
927
  unsigned jobID;
928
928
  DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
929
929
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
930
+ /* Copy the mutex/cond out */
931
+ ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
932
+ ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
933
+
930
934
  DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
931
935
  ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
932
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
933
- mtctx->jobs[jobID].cSize = 0;
936
+
937
+ /* Clear the job description, but keep the mutex/cond */
938
+ memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
939
+ mtctx->jobs[jobID].job_mutex = mutex;
940
+ mtctx->jobs[jobID].job_cond = cond;
934
941
  }
935
- memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
936
942
  mtctx->inBuff.buffer = g_nullBuffer;
937
943
  mtctx->inBuff.filled = 0;
938
944
  mtctx->allJobsCompleted = 1;
@@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
1028
1034
 
1029
1035
  /* Sets parameters relevant to the compression job,
1030
1036
  * initializing others to default values. */
1031
- static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1037
+ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1032
1038
  {
1033
- ZSTD_CCtx_params jobParams = params;
1039
+ ZSTD_CCtx_params jobParams = *params;
1034
1040
  /* Clear parameters related to multithreading */
1035
1041
  jobParams.forceWindow = 0;
1036
1042
  jobParams.nbWorkers = 0;
@@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1151
1157
  /* ===== Multi-threaded compression ===== */
1152
1158
  /* ------------------------------------------ */
1153
1159
 
1154
- static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1160
+ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1155
1161
  {
1156
1162
  unsigned jobLog;
1157
- if (params.ldmParams.enableLdm) {
1163
+ if (params->ldmParams.enableLdm) {
1158
1164
  /* In Long Range Mode, the windowLog is typically oversized.
1159
1165
  * In which case, it's preferable to determine the jobSize
1160
1166
  * based on chainLog instead. */
1161
- jobLog = MAX(21, params.cParams.chainLog + 4);
1167
+ jobLog = MAX(21, params->cParams.chainLog + 4);
1162
1168
  } else {
1163
- jobLog = MAX(20, params.cParams.windowLog + 2);
1169
+ jobLog = MAX(20, params->cParams.windowLog + 2);
1164
1170
  }
1165
1171
  return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
1166
1172
  }
@@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1193
1199
  return ovlog;
1194
1200
  }
1195
1201
 
1196
- static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1202
+ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1197
1203
  {
1198
- int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1199
- int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1204
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1205
+ int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1200
1206
  assert(0 <= overlapRLog && overlapRLog <= 8);
1201
- if (params.ldmParams.enableLdm) {
1207
+ if (params->ldmParams.enableLdm) {
1202
1208
  /* In Long Range Mode, the windowLog is typically oversized.
1203
1209
  * In which case, it's preferable to determine the jobSize
1204
1210
  * based on chainLog instead.
1205
1211
  * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1206
- ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1212
+ ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1207
1213
  - overlapRLog;
1208
1214
  }
1209
1215
  assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
1210
- DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1216
+ DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
1211
1217
  DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1212
1218
  return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1213
1219
  }
1214
1220
 
1215
1221
  static unsigned
1216
- ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1222
+ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
1217
1223
  {
1218
1224
  assert(nbWorkers>0);
1219
1225
  { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
1236
1242
  const ZSTD_CDict* cdict,
1237
1243
  ZSTD_CCtx_params params)
1238
1244
  {
1239
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1240
- size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1241
- unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1245
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
1246
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
1247
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
1242
1248
  size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1243
1249
  size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1244
1250
  const char* const srcStart = (const char*)src;
@@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
1256
1262
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1257
1263
  DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1258
1264
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1259
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
1265
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
1260
1266
  }
1261
1267
 
1262
1268
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
1404
1410
 
1405
1411
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1406
1412
  if (mtctx->singleBlockingThread) {
1407
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1413
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
1408
1414
  DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1409
1415
  assert(singleThreadParams.nbWorkers == 0);
1410
1416
  return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
1411
1417
  dict, dictSize, cdict,
1412
- singleThreadParams, pledgedSrcSize);
1418
+ &singleThreadParams, pledgedSrcSize);
1413
1419
  }
1414
1420
 
1415
1421
  DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
1435
1441
  mtctx->cdict = cdict;
1436
1442
  }
1437
1443
 
1438
- mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1444
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
1439
1445
  DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1440
1446
  mtctx->targetSectionSize = params.jobSize;
1441
1447
  if (mtctx->targetSectionSize == 0) {
1442
- mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1448
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
1443
1449
  }
1444
1450
  assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
1445
1451
 
@@ -61,7 +61,9 @@
61
61
  * Error Management
62
62
  ****************************************************************/
63
63
  #define HUF_isError ERR_isError
64
+ #ifndef CHECK_F
64
65
  #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66
+ #endif
65
67
 
66
68
 
67
69
  /* **************************************************************
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
88
88
 
89
89
  static size_t ZSTD_startingInputLength(ZSTD_format_e format)
90
90
  {
91
- size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
92
- ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
93
- ZSTD_FRAMEHEADERSIZE_PREFIX;
94
- ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
91
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
95
92
  /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
96
93
  assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
97
94
  return startingInputLength;
@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
376
373
  {
377
374
  unsigned long long totalDstSize = 0;
378
375
 
379
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
376
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
380
377
  U32 const magicNumber = MEM_readLE32(src);
381
378
 
382
379
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -574,9 +571,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
574
571
  }
575
572
 
576
573
  /** ZSTD_insertBlock() :
577
- insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
574
+ * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
578
575
  size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
579
576
  {
577
+ DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
580
578
  ZSTD_checkContinuity(dctx, blockStart);
581
579
  dctx->previousDstEnd = (const char*)blockStart + blockSize;
582
580
  return blockSize;
@@ -628,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
628
626
 
629
627
  /* check */
630
628
  RETURN_ERROR_IF(
631
- remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
629
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
632
630
  srcSize_wrong);
633
631
 
634
632
  /* Frame Header */
635
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
633
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
634
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
636
635
  if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
637
636
  RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
638
637
  srcSize_wrong);
@@ -713,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
713
712
  dictSize = ZSTD_DDict_dictSize(ddict);
714
713
  }
715
714
 
716
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
715
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
717
716
 
718
717
  #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
719
718
  if (ZSTD_isLegacy(src, srcSize)) {
@@ -1097,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
1097
1096
  size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
1098
1097
  for (i=0; i<3; i++) {
1099
1098
  U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
1100
- RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
1099
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
1101
1100
  dictionary_corrupted);
1102
1101
  entropy->rep[i] = rep;
1103
1102
  } }
@@ -1266,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
1266
1265
  {
1267
1266
  RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
1268
1267
  ZSTD_clearDict(dctx);
1269
- if (dict && dictSize >= 8) {
1268
+ if (dict && dictSize != 0) {
1270
1269
  dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
1271
1270
  RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
1272
1271
  dctx->ddict = dctx->ddictLocal;
@@ -1299,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
1299
1298
 
1300
1299
 
1301
1300
  /* ZSTD_initDStream_usingDict() :
1302
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1301
+ * return : expected size, aka ZSTD_startingInputLength().
1303
1302
  * this function cannot fail */
1304
1303
  size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
1305
1304
  {
1306
1305
  DEBUGLOG(4, "ZSTD_initDStream_usingDict");
1307
1306
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
1308
1307
  FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
1309
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1308
+ return ZSTD_startingInputLength(zds->format);
1310
1309
  }
1311
1310
 
1312
1311
  /* note : this variant can't fail */
@@ -1323,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
1323
1322
  {
1324
1323
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
1325
1324
  FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
1326
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1325
+ return ZSTD_startingInputLength(dctx->format);
1327
1326
  }
1328
1327
 
1329
1328
  /* ZSTD_resetDStream() :
1330
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1329
+ * return : expected size, aka ZSTD_startingInputLength().
1331
1330
  * this function cannot fail */
1332
1331
  size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
1333
1332
  {
1334
1333
  FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
1335
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1334
+ return ZSTD_startingInputLength(dctx->format);
1336
1335
  }
1337
1336
 
1338
1337
 
@@ -1563,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
1563
1562
  zds->lhSize += remainingInput;
1564
1563
  }
1565
1564
  input->pos = input->size;
1566
- return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1565
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1567
1566
  }
1568
1567
  assert(ip != NULL);
1569
1568
  memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
80
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
81
81
  {
82
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
82
83
  RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
83
84
 
84
85
  { const BYTE* const istart = (const BYTE*) src;
@@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
87
88
  switch(litEncType)
88
89
  {
89
90
  case set_repeat:
91
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
90
92
  RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
91
93
  /* fall-through */
92
94
 
@@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
116
118
  /* 2 - 2 - 18 - 18 */
117
119
  lhSize = 5;
118
120
  litSize = (lhc >> 4) & 0x3FFFF;
119
- litCSize = (lhc >> 22) + (istart[4] << 10);
121
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
120
122
  break;
121
123
  }
122
124
  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
391
393
  symbolNext[s] = 1;
392
394
  } else {
393
395
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
394
- symbolNext[s] = normalizedCounter[s];
396
+ assert(normalizedCounter[s]>=0);
397
+ symbolNext[s] = (U16)normalizedCounter[s];
395
398
  } } }
396
399
  memcpy(dt, &DTableH, sizeof(DTableH));
397
400
  }
@@ -570,38 +573,118 @@ typedef struct {
570
573
  size_t pos;
571
574
  } seqState_t;
572
575
 
576
+ /*! ZSTD_overlapCopy8() :
577
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
579
+ *
580
+ * Precondition: *ip <= *op
581
+ * Postcondition: *op - *op >= 8
582
+ */
583
+ static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
+ assert(*ip <= *op);
585
+ if (offset < 8) {
586
+ /* close range match, overlap */
587
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589
+ int const sub2 = dec64table[offset];
590
+ (*op)[0] = (*ip)[0];
591
+ (*op)[1] = (*ip)[1];
592
+ (*op)[2] = (*ip)[2];
593
+ (*op)[3] = (*ip)[3];
594
+ *ip += dec32table[offset];
595
+ ZSTD_copy4(*op+4, *ip);
596
+ *ip -= sub2;
597
+ } else {
598
+ ZSTD_copy8(*op, *ip);
599
+ }
600
+ *ip += 8;
601
+ *op += 8;
602
+ assert(*op - *ip >= 8);
603
+ }
604
+
605
+ /*! ZSTD_safecopy() :
606
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
609
+ * should be fast for a single long sequence, but can be slow for several short sequences.
610
+ *
611
+ * @param ovtype controls the overlap detection
612
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614
+ * The src buffer must be before the dst buffer.
615
+ */
616
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617
+ ptrdiff_t const diff = op - ip;
618
+ BYTE* const oend = op + length;
573
619
 
574
- /* ZSTD_execSequenceLast7():
575
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
576
- * requires more careful checks, to ensure there is no overflow.
577
- * performance does not matter though.
578
- * note : this case is supposed to be never generated "naturally" by reference encoder,
579
- * since in most cases it needs at least 8 bytes to look for a match.
580
- * but it's allowed by the specification. */
620
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
622
+
623
+ if (length < 8) {
624
+ /* Handle short lengths. */
625
+ while (op < oend) *op++ = *ip++;
626
+ return;
627
+ }
628
+ if (ovtype == ZSTD_overlap_src_before_dst) {
629
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630
+ assert(length >= 8);
631
+ ZSTD_overlapCopy8(&op, &ip, diff);
632
+ assert(op - ip >= 8);
633
+ assert(op <= oend);
634
+ }
635
+
636
+ if (oend <= oend_w) {
637
+ /* No risk of overwrite. */
638
+ ZSTD_wildcopy(op, ip, length, ovtype);
639
+ return;
640
+ }
641
+ if (op <= oend_w) {
642
+ /* Wildcopy until we get close to the end. */
643
+ assert(oend > oend_w);
644
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645
+ ip += oend_w - op;
646
+ op = oend_w;
647
+ }
648
+ /* Handle the leftovers. */
649
+ while (op < oend) *op++ = *ip++;
650
+ }
651
+
652
+ /* ZSTD_execSequenceEnd():
653
+ * This version handles cases that are near the end of the output buffer. It requires
654
+ * more careful checks to make sure there is no overflow. By separating out these hard
655
+ * and unlikely cases, we can speed up the common cases.
656
+ *
657
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659
+ */
581
660
  FORCE_NOINLINE
582
- size_t ZSTD_execSequenceLast7(BYTE* op,
583
- BYTE* const oend, seq_t sequence,
584
- const BYTE** litPtr, const BYTE* const litLimit,
585
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661
+ size_t ZSTD_execSequenceEnd(BYTE* op,
662
+ BYTE* const oend, seq_t sequence,
663
+ const BYTE** litPtr, const BYTE* const litLimit,
664
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
586
665
  {
587
666
  BYTE* const oLitEnd = op + sequence.litLength;
588
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
589
668
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
590
669
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
591
670
  const BYTE* match = oLitEnd - sequence.offset;
671
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
592
672
 
593
- /* check */
594
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
673
+ /* bounds checks */
674
+ assert(oLitEnd < oMatchEnd);
675
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
595
676
  RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
596
677
 
597
678
  /* copy literals */
598
- while (op < oLitEnd) *op++ = *(*litPtr)++;
679
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680
+ op = oLitEnd;
681
+ *litPtr = iLitEnd;
599
682
 
600
683
  /* copy Match */
601
- if (sequence.offset > (size_t)(oLitEnd - base)) {
684
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
602
685
  /* offset beyond prefix */
603
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
604
- match = dictEnd - (base-match);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
687
+ match = dictEnd - (prefixStart-match);
605
688
  if (match + sequence.matchLength <= dictEnd) {
606
689
  memmove(oLitEnd, match, sequence.matchLength);
607
690
  return sequenceLength;
@@ -611,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
611
694
  memmove(oLitEnd, match, length1);
612
695
  op = oLitEnd + length1;
613
696
  sequence.matchLength -= length1;
614
- match = base;
697
+ match = prefixStart;
615
698
  } }
616
- while (op < oMatchEnd) *op++ = *match++;
699
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
617
700
  return sequenceLength;
618
701
  }
619
702
 
620
-
621
703
  HINT_INLINE
622
704
  size_t ZSTD_execSequence(BYTE* op,
623
705
  BYTE* const oend, seq_t sequence,
@@ -631,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
631
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
632
714
  const BYTE* match = oLitEnd - sequence.offset;
633
715
 
634
- /* check */
635
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
637
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
638
-
639
- /* copy Literals */
640
- if (sequence.litLength > 8)
641
- ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
642
- else
643
- ZSTD_copy8(op, *litPtr);
716
+ /* Errors and uncommon cases handled here. */
717
+ assert(oLitEnd < oMatchEnd);
718
+ if (iLitEnd > litLimit || oMatchEnd > oend_w)
719
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
720
+
721
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
722
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
723
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
724
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
725
+
726
+ /* Copy Literals:
727
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
728
+ * We likely don't need the full 32-byte wildcopy.
729
+ */
730
+ assert(WILDCOPY_OVERLENGTH >= 16);
731
+ ZSTD_copy16(op, (*litPtr));
732
+ if (sequence.litLength > 16) {
733
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
734
+ }
644
735
  op = oLitEnd;
645
736
  *litPtr = iLitEnd; /* update for next sequence */
646
737
 
647
- /* copy Match */
738
+ /* Copy Match */
648
739
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
649
740
  /* offset beyond prefix -> go into extDict */
650
741
  RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@@ -659,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
659
750
  op = oLitEnd + length1;
660
751
  sequence.matchLength -= length1;
661
752
  match = prefixStart;
662
- if (op > oend_w || sequence.matchLength < MINMATCH) {
663
- U32 i;
664
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
665
- return sequenceLength;
666
- }
667
753
  } }
668
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
669
-
670
- /* match within prefix */
671
- if (sequence.offset < 8) {
672
- /* close range match, overlap */
673
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
674
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
675
- int const sub2 = dec64table[sequence.offset];
676
- op[0] = match[0];
677
- op[1] = match[1];
678
- op[2] = match[2];
679
- op[3] = match[3];
680
- match += dec32table[sequence.offset];
681
- ZSTD_copy4(op+4, match);
682
- match -= sub2;
683
- } else {
684
- ZSTD_copy8(op, match);
685
- }
686
- op += 8; match += 8;
687
-
688
- if (oMatchEnd > oend-(16-MINMATCH)) {
689
- if (op < oend_w) {
690
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
691
- match += oend_w - op;
692
- op = oend_w;
693
- }
694
- while (op < oMatchEnd) *op++ = *match++;
695
- } else {
696
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
754
+ /* Match within prefix of 1 or more bytes */
755
+ assert(op <= oMatchEnd);
756
+ assert(oMatchEnd <= oend_w);
757
+ assert(match >= prefixStart);
758
+ assert(sequence.matchLength >= 1);
759
+
760
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
761
+ * without overlap checking.
762
+ */
763
+ if (sequence.offset >= WILDCOPY_VECLEN) {
764
+ /* We bet on a full wildcopy for matches, since we expect matches to be
765
+ * longer than literals (in general). In silesia, ~10% of matches are longer
766
+ * than 16 bytes.
767
+ */
768
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
769
+ return sequenceLength;
697
770
  }
698
- return sequenceLength;
699
- }
700
-
701
-
702
- HINT_INLINE
703
- size_t ZSTD_execSequenceLong(BYTE* op,
704
- BYTE* const oend, seq_t sequence,
705
- const BYTE** litPtr, const BYTE* const litLimit,
706
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
707
- {
708
- BYTE* const oLitEnd = op + sequence.litLength;
709
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
710
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
711
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
712
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
713
- const BYTE* match = sequence.match;
771
+ assert(sequence.offset < WILDCOPY_VECLEN);
714
772
 
715
- /* check */
716
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
717
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
718
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
719
-
720
- /* copy Literals */
721
- if (sequence.litLength > 8)
722
- ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723
- else
724
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
773
+ /* Copy 8 bytes and spread the offset to be >= 8. */
774
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
725
775
 
726
- op = oLitEnd;
727
- *litPtr = iLitEnd; /* update for next sequence */
728
-
729
- /* copy Match */
730
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
731
- /* offset beyond prefix */
732
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
733
- if (match + sequence.matchLength <= dictEnd) {
734
- memmove(oLitEnd, match, sequence.matchLength);
735
- return sequenceLength;
736
- }
737
- /* span extDict & currentPrefixSegment */
738
- { size_t const length1 = dictEnd - match;
739
- memmove(oLitEnd, match, length1);
740
- op = oLitEnd + length1;
741
- sequence.matchLength -= length1;
742
- match = prefixStart;
743
- if (op > oend_w || sequence.matchLength < MINMATCH) {
744
- U32 i;
745
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
746
- return sequenceLength;
747
- }
748
- } }
749
- assert(op <= oend_w);
750
- assert(sequence.matchLength >= MINMATCH);
751
-
752
- /* match within prefix */
753
- if (sequence.offset < 8) {
754
- /* close range match, overlap */
755
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
756
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
757
- int const sub2 = dec64table[sequence.offset];
758
- op[0] = match[0];
759
- op[1] = match[1];
760
- op[2] = match[2];
761
- op[3] = match[3];
762
- match += dec32table[sequence.offset];
763
- ZSTD_copy4(op+4, match);
764
- match -= sub2;
765
- } else {
766
- ZSTD_copy8(op, match);
767
- }
768
- op += 8; match += 8;
769
-
770
- if (oMatchEnd > oend-(16-MINMATCH)) {
771
- if (op < oend_w) {
772
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
773
- match += oend_w - op;
774
- op = oend_w;
775
- }
776
- while (op < oMatchEnd) *op++ = *match++;
777
- } else {
778
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
776
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
777
+ if (sequence.matchLength > 8) {
778
+ assert(op < oMatchEnd);
779
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
779
780
  }
780
781
  return sequenceLength;
781
782
  }
@@ -1095,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
1095
1096
  /* decode and decompress */
1096
1097
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1097
1098
  seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1098
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099
1100
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1100
1101
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1101
1102
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
@@ -1106,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
1106
1107
  /* finish queue */
1107
1108
  seqNb -= seqAdvance;
1108
1109
  for ( ; seqNb<nbSeq ; seqNb++) {
1109
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110
1111
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1111
1112
  op += oneSeqSize;
1112
1113
  }