zstd-ruby 1.4.2.0 → 1.4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +0 -2
  4. data/ext/zstdruby/libzstd/README.md +13 -2
  5. data/ext/zstdruby/libzstd/common/bitstream.h +7 -2
  6. data/ext/zstdruby/libzstd/common/compiler.h +17 -5
  7. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  8. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -0
  9. data/ext/zstdruby/libzstd/common/mem.h +74 -1
  10. data/ext/zstdruby/libzstd/common/pool.c +7 -3
  11. data/ext/zstdruby/libzstd/common/threading.c +46 -1
  12. data/ext/zstdruby/libzstd/common/threading.h +32 -1
  13. data/ext/zstdruby/libzstd/common/xxhash.c +8 -2
  14. data/ext/zstdruby/libzstd/common/zstd_internal.h +37 -58
  15. data/ext/zstdruby/libzstd/compress/zstd_compress.c +644 -445
  16. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +98 -26
  17. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -5
  18. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  19. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  21. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +535 -0
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -12
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +38 -45
  24. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +35 -31
  25. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
  26. data/ext/zstdruby/libzstd/compress/zstd_opt.c +6 -6
  27. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +32 -26
  28. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +2 -0
  29. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +16 -17
  30. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +149 -148
  31. data/ext/zstdruby/libzstd/deprecated/zbuff.h +6 -5
  32. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -8
  33. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
  34. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  35. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +2 -1
  36. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +2 -1
  37. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +6 -2
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
  39. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
  40. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
  41. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  42. data/ext/zstdruby/libzstd/zstd.h +170 -66
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. data/zstd-ruby.gemspec +1 -1
  45. metadata +5 -4
@@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
49
  {
50
50
  size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
51
  size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
- size_t const ldmBucketSize =
53
- ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
- size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
52
+ size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
53
+ size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
54
+ + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
55
55
  return params.enableLdm ? totalSize : 0;
56
56
  }
57
57
 
@@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
583
583
  rep[i] = rep[i-1];
584
584
  rep[0] = sequence.offset;
585
585
  /* Store the sequence */
586
- ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
586
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
587
587
  sequence.offset + ZSTD_REP_MOVE,
588
588
  sequence.matchLength - MINMATCH);
589
589
  ip += sequence.matchLength;
@@ -552,7 +552,6 @@ U32 ZSTD_insertBtAndGetAllMatches (
552
552
  {
553
553
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
554
554
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555
- U32 const maxDistance = 1U << cParams->windowLog;
556
555
  const BYTE* const base = ms->window.base;
557
556
  U32 const current = (U32)(ip-base);
558
557
  U32 const hashLog = cParams->hashLog;
@@ -569,8 +568,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
569
568
  const BYTE* const dictEnd = dictBase + dictLimit;
570
569
  const BYTE* const prefixStart = base + dictLimit;
571
570
  U32 const btLow = (btMask >= current) ? 0 : current - btMask;
572
- U32 const windowValid = ms->window.lowLimit;
573
- U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
571
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
574
572
  U32 const matchLow = windowLow ? windowLow : 1;
575
573
  U32* smallerPtr = bt + 2*(current&btMask);
576
574
  U32* largerPtr = bt + 2*(current&btMask) + 1;
@@ -674,19 +672,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
674
672
 
675
673
  while (nbCompares-- && (matchIndex >= matchLow)) {
676
674
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
677
- size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
678
675
  const BYTE* match;
676
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
679
677
  assert(current > matchIndex);
680
678
 
681
679
  if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
682
680
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
683
681
  match = base + matchIndex;
682
+ if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
684
683
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
685
684
  } else {
686
685
  match = dictBase + matchIndex;
686
+ assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
687
687
  matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
688
688
  if (matchIndex+matchLength >= dictLimit)
689
- match = base + matchIndex; /* prepare for match[matchLength] */
689
+ match = base + matchIndex; /* prepare for match[matchLength] read */
690
690
  }
691
691
 
692
692
  if (matchLength > bestLength) {
@@ -1098,7 +1098,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1098
1098
 
1099
1099
  assert(anchor + llen <= iend);
1100
1100
  ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1101
- ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
1101
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1102
1102
  anchor += advance;
1103
1103
  ip = anchor;
1104
1104
  } }
@@ -668,7 +668,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
668
668
 
669
669
  /* init */
670
670
  if (job->cdict) {
671
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
671
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
672
672
  assert(job->firstJob); /* only allowed for first job */
673
673
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
674
674
  } else { /* srcStart points at reloaded section */
@@ -680,7 +680,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
680
680
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681
681
  ZSTD_dtlm_fast,
682
682
  NULL, /*cdict*/
683
- jobParams, pledgedSrcSize);
683
+ &jobParams, pledgedSrcSize);
684
684
  if (ZSTD_isError(initError)) JOB_ERROR(initError);
685
685
  } }
686
686
 
@@ -927,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
927
927
  unsigned jobID;
928
928
  DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
929
929
  for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
930
+ /* Copy the mutex/cond out */
931
+ ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
932
+ ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
933
+
930
934
  DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
931
935
  ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
932
- mtctx->jobs[jobID].dstBuff = g_nullBuffer;
933
- mtctx->jobs[jobID].cSize = 0;
936
+
937
+ /* Clear the job description, but keep the mutex/cond */
938
+ memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
939
+ mtctx->jobs[jobID].job_mutex = mutex;
940
+ mtctx->jobs[jobID].job_cond = cond;
934
941
  }
935
- memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
936
942
  mtctx->inBuff.buffer = g_nullBuffer;
937
943
  mtctx->inBuff.filled = 0;
938
944
  mtctx->allJobsCompleted = 1;
@@ -1028,9 +1034,9 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
1028
1034
 
1029
1035
  /* Sets parameters relevant to the compression job,
1030
1036
  * initializing others to default values. */
1031
- static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1037
+ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
1032
1038
  {
1033
- ZSTD_CCtx_params jobParams = params;
1039
+ ZSTD_CCtx_params jobParams = *params;
1034
1040
  /* Clear parameters related to multithreading */
1035
1041
  jobParams.forceWindow = 0;
1036
1042
  jobParams.nbWorkers = 0;
@@ -1151,16 +1157,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1151
1157
  /* ===== Multi-threaded compression ===== */
1152
1158
  /* ------------------------------------------ */
1153
1159
 
1154
- static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
1160
+ static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1155
1161
  {
1156
1162
  unsigned jobLog;
1157
- if (params.ldmParams.enableLdm) {
1163
+ if (params->ldmParams.enableLdm) {
1158
1164
  /* In Long Range Mode, the windowLog is typically oversized.
1159
1165
  * In which case, it's preferable to determine the jobSize
1160
1166
  * based on chainLog instead. */
1161
- jobLog = MAX(21, params.cParams.chainLog + 4);
1167
+ jobLog = MAX(21, params->cParams.chainLog + 4);
1162
1168
  } else {
1163
- jobLog = MAX(20, params.cParams.windowLog + 2);
1169
+ jobLog = MAX(20, params->cParams.windowLog + 2);
1164
1170
  }
1165
1171
  return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
1166
1172
  }
@@ -1193,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
1193
1199
  return ovlog;
1194
1200
  }
1195
1201
 
1196
- static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
1202
+ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1197
1203
  {
1198
- int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
1199
- int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
1204
+ int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1205
+ int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1200
1206
  assert(0 <= overlapRLog && overlapRLog <= 8);
1201
- if (params.ldmParams.enableLdm) {
1207
+ if (params->ldmParams.enableLdm) {
1202
1208
  /* In Long Range Mode, the windowLog is typically oversized.
1203
1209
  * In which case, it's preferable to determine the jobSize
1204
1210
  * based on chainLog instead.
1205
1211
  * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
1206
- ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1212
+ ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
1207
1213
  - overlapRLog;
1208
1214
  }
1209
1215
  assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
1210
- DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
1216
+ DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
1211
1217
  DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
1212
1218
  return (ovLog==0) ? 0 : (size_t)1 << ovLog;
1213
1219
  }
1214
1220
 
1215
1221
  static unsigned
1216
- ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
1222
+ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
1217
1223
  {
1218
1224
  assert(nbWorkers>0);
1219
1225
  { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@@ -1236,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
1236
1242
  const ZSTD_CDict* cdict,
1237
1243
  ZSTD_CCtx_params params)
1238
1244
  {
1239
- ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
1240
- size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
1241
- unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
1245
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
1246
+ size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
1247
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
1242
1248
  size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
1243
1249
  size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
1244
1250
  const char* const srcStart = (const char*)src;
@@ -1256,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
1256
1262
  ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
1257
1263
  DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
1258
1264
  if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
1259
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
1265
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
1260
1266
  }
1261
1267
 
1262
1268
  assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@@ -1404,12 +1410,12 @@ size_t ZSTDMT_initCStream_internal(
1404
1410
 
1405
1411
  mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1406
1412
  if (mtctx->singleBlockingThread) {
1407
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
1413
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
1408
1414
  DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
1409
1415
  assert(singleThreadParams.nbWorkers == 0);
1410
1416
  return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
1411
1417
  dict, dictSize, cdict,
1412
- singleThreadParams, pledgedSrcSize);
1418
+ &singleThreadParams, pledgedSrcSize);
1413
1419
  }
1414
1420
 
1415
1421
  DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@@ -1435,11 +1441,11 @@ size_t ZSTDMT_initCStream_internal(
1435
1441
  mtctx->cdict = cdict;
1436
1442
  }
1437
1443
 
1438
- mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
1444
+ mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
1439
1445
  DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
1440
1446
  mtctx->targetSectionSize = params.jobSize;
1441
1447
  if (mtctx->targetSectionSize == 0) {
1442
- mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1448
+ mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
1443
1449
  }
1444
1450
  assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
1445
1451
 
@@ -61,7 +61,9 @@
61
61
  * Error Management
62
62
  ****************************************************************/
63
63
  #define HUF_isError ERR_isError
64
+ #ifndef CHECK_F
64
65
  #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
66
+ #endif
65
67
 
66
68
 
67
69
  /* **************************************************************
@@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
88
88
 
89
89
  static size_t ZSTD_startingInputLength(ZSTD_format_e format)
90
90
  {
91
- size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
92
- ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
93
- ZSTD_FRAMEHEADERSIZE_PREFIX;
94
- ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
91
+ size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
95
92
  /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
96
93
  assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
97
94
  return startingInputLength;
@@ -376,7 +373,7 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
376
373
  {
377
374
  unsigned long long totalDstSize = 0;
378
375
 
379
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
376
+ while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
380
377
  U32 const magicNumber = MEM_readLE32(src);
381
378
 
382
379
  if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -574,9 +571,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
574
571
  }
575
572
 
576
573
  /** ZSTD_insertBlock() :
577
- insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
574
+ * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
578
575
  size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
579
576
  {
577
+ DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
580
578
  ZSTD_checkContinuity(dctx, blockStart);
581
579
  dctx->previousDstEnd = (const char*)blockStart + blockSize;
582
580
  return blockSize;
@@ -628,11 +626,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
628
626
 
629
627
  /* check */
630
628
  RETURN_ERROR_IF(
631
- remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize,
629
+ remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
632
630
  srcSize_wrong);
633
631
 
634
632
  /* Frame Header */
635
- { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
633
+ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
634
+ ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
636
635
  if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
637
636
  RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
638
637
  srcSize_wrong);
@@ -713,7 +712,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
713
712
  dictSize = ZSTD_DDict_dictSize(ddict);
714
713
  }
715
714
 
716
- while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
715
+ while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
717
716
 
718
717
  #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
719
718
  if (ZSTD_isLegacy(src, srcSize)) {
@@ -1097,7 +1096,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
1097
1096
  size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
1098
1097
  for (i=0; i<3; i++) {
1099
1098
  U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
1100
- RETURN_ERROR_IF(rep==0 || rep >= dictContentSize,
1099
+ RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
1101
1100
  dictionary_corrupted);
1102
1101
  entropy->rep[i] = rep;
1103
1102
  } }
@@ -1266,7 +1265,7 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
1266
1265
  {
1267
1266
  RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
1268
1267
  ZSTD_clearDict(dctx);
1269
- if (dict && dictSize >= 8) {
1268
+ if (dict && dictSize != 0) {
1270
1269
  dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
1271
1270
  RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
1272
1271
  dctx->ddict = dctx->ddictLocal;
@@ -1299,14 +1298,14 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz
1299
1298
 
1300
1299
 
1301
1300
  /* ZSTD_initDStream_usingDict() :
1302
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1301
+ * return : expected size, aka ZSTD_startingInputLength().
1303
1302
  * this function cannot fail */
1304
1303
  size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
1305
1304
  {
1306
1305
  DEBUGLOG(4, "ZSTD_initDStream_usingDict");
1307
1306
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
1308
1307
  FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
1309
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1308
+ return ZSTD_startingInputLength(zds->format);
1310
1309
  }
1311
1310
 
1312
1311
  /* note : this variant can't fail */
@@ -1323,16 +1322,16 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
1323
1322
  {
1324
1323
  FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
1325
1324
  FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
1326
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1325
+ return ZSTD_startingInputLength(dctx->format);
1327
1326
  }
1328
1327
 
1329
1328
  /* ZSTD_resetDStream() :
1330
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
1329
+ * return : expected size, aka ZSTD_startingInputLength().
1331
1330
  * this function cannot fail */
1332
1331
  size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
1333
1332
  {
1334
1333
  FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
1335
- return ZSTD_FRAMEHEADERSIZE_PREFIX;
1334
+ return ZSTD_startingInputLength(dctx->format);
1336
1335
  }
1337
1336
 
1338
1337
 
@@ -1563,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
1563
1562
  zds->lhSize += remainingInput;
1564
1563
  }
1565
1564
  input->pos = input->size;
1566
- return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1565
+ return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
1567
1566
  }
1568
1567
  assert(ip != NULL);
1569
1568
  memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
79
  size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
80
80
  const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
81
81
  {
82
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
82
83
  RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);
83
84
 
84
85
  { const BYTE* const istart = (const BYTE*) src;
@@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
87
88
  switch(litEncType)
88
89
  {
89
90
  case set_repeat:
91
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
90
92
  RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
91
93
  /* fall-through */
92
94
 
@@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
116
118
  /* 2 - 2 - 18 - 18 */
117
119
  lhSize = 5;
118
120
  litSize = (lhc >> 4) & 0x3FFFF;
119
- litCSize = (lhc >> 22) + (istart[4] << 10);
121
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
120
122
  break;
121
123
  }
122
124
  RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
@@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
391
393
  symbolNext[s] = 1;
392
394
  } else {
393
395
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
394
- symbolNext[s] = normalizedCounter[s];
396
+ assert(normalizedCounter[s]>=0);
397
+ symbolNext[s] = (U16)normalizedCounter[s];
395
398
  } } }
396
399
  memcpy(dt, &DTableH, sizeof(DTableH));
397
400
  }
@@ -570,38 +573,118 @@ typedef struct {
570
573
  size_t pos;
571
574
  } seqState_t;
572
575
 
576
+ /*! ZSTD_overlapCopy8() :
577
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
578
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
579
+ *
580
+ * Precondition: *ip <= *op
581
+ * Postcondition: *op - *op >= 8
582
+ */
583
+ static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
584
+ assert(*ip <= *op);
585
+ if (offset < 8) {
586
+ /* close range match, overlap */
587
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589
+ int const sub2 = dec64table[offset];
590
+ (*op)[0] = (*ip)[0];
591
+ (*op)[1] = (*ip)[1];
592
+ (*op)[2] = (*ip)[2];
593
+ (*op)[3] = (*ip)[3];
594
+ *ip += dec32table[offset];
595
+ ZSTD_copy4(*op+4, *ip);
596
+ *ip -= sub2;
597
+ } else {
598
+ ZSTD_copy8(*op, *ip);
599
+ }
600
+ *ip += 8;
601
+ *op += 8;
602
+ assert(*op - *ip >= 8);
603
+ }
604
+
605
+ /*! ZSTD_safecopy() :
606
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
607
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
608
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
609
+ * should be fast for a single long sequence, but can be slow for several short sequences.
610
+ *
611
+ * @param ovtype controls the overlap detection
612
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
613
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
614
+ * The src buffer must be before the dst buffer.
615
+ */
616
+ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617
+ ptrdiff_t const diff = op - ip;
618
+ BYTE* const oend = op + length;
573
619
 
574
- /* ZSTD_execSequenceLast7():
575
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
576
- * requires more careful checks, to ensure there is no overflow.
577
- * performance does not matter though.
578
- * note : this case is supposed to be never generated "naturally" by reference encoder,
579
- * since in most cases it needs at least 8 bytes to look for a match.
580
- * but it's allowed by the specification. */
620
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
622
+
623
+ if (length < 8) {
624
+ /* Handle short lengths. */
625
+ while (op < oend) *op++ = *ip++;
626
+ return;
627
+ }
628
+ if (ovtype == ZSTD_overlap_src_before_dst) {
629
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
630
+ assert(length >= 8);
631
+ ZSTD_overlapCopy8(&op, &ip, diff);
632
+ assert(op - ip >= 8);
633
+ assert(op <= oend);
634
+ }
635
+
636
+ if (oend <= oend_w) {
637
+ /* No risk of overwrite. */
638
+ ZSTD_wildcopy(op, ip, length, ovtype);
639
+ return;
640
+ }
641
+ if (op <= oend_w) {
642
+ /* Wildcopy until we get close to the end. */
643
+ assert(oend > oend_w);
644
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
645
+ ip += oend_w - op;
646
+ op = oend_w;
647
+ }
648
+ /* Handle the leftovers. */
649
+ while (op < oend) *op++ = *ip++;
650
+ }
651
+
652
+ /* ZSTD_execSequenceEnd():
653
+ * This version handles cases that are near the end of the output buffer. It requires
654
+ * more careful checks to make sure there is no overflow. By separating out these hard
655
+ * and unlikely cases, we can speed up the common cases.
656
+ *
657
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
659
+ */
581
660
  FORCE_NOINLINE
582
- size_t ZSTD_execSequenceLast7(BYTE* op,
583
- BYTE* const oend, seq_t sequence,
584
- const BYTE** litPtr, const BYTE* const litLimit,
585
- const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
661
+ size_t ZSTD_execSequenceEnd(BYTE* op,
662
+ BYTE* const oend, seq_t sequence,
663
+ const BYTE** litPtr, const BYTE* const litLimit,
664
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
586
665
  {
587
666
  BYTE* const oLitEnd = op + sequence.litLength;
588
667
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
589
668
  BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
590
669
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
591
670
  const BYTE* match = oLitEnd - sequence.offset;
671
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
592
672
 
593
- /* check */
594
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer");
673
+ /* bounds checks */
674
+ assert(oLitEnd < oMatchEnd);
675
+ RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
595
676
  RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");
596
677
 
597
678
  /* copy literals */
598
- while (op < oLitEnd) *op++ = *(*litPtr)++;
679
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
680
+ op = oLitEnd;
681
+ *litPtr = iLitEnd;
599
682
 
600
683
  /* copy Match */
601
- if (sequence.offset > (size_t)(oLitEnd - base)) {
684
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
602
685
  /* offset beyond prefix */
603
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected);
604
- match = dictEnd - (base-match);
686
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
687
+ match = dictEnd - (prefixStart-match);
605
688
  if (match + sequence.matchLength <= dictEnd) {
606
689
  memmove(oLitEnd, match, sequence.matchLength);
607
690
  return sequenceLength;
@@ -611,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
611
694
  memmove(oLitEnd, match, length1);
612
695
  op = oLitEnd + length1;
613
696
  sequence.matchLength -= length1;
614
- match = base;
697
+ match = prefixStart;
615
698
  } }
616
- while (op < oMatchEnd) *op++ = *match++;
699
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
617
700
  return sequenceLength;
618
701
  }
619
702
 
620
-
621
703
  HINT_INLINE
622
704
  size_t ZSTD_execSequence(BYTE* op,
623
705
  BYTE* const oend, seq_t sequence,
@@ -631,20 +713,29 @@ size_t ZSTD_execSequence(BYTE* op,
631
713
  const BYTE* const iLitEnd = *litPtr + sequence.litLength;
632
714
  const BYTE* match = oLitEnd - sequence.offset;
633
715
 
634
- /* check */
635
- RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
637
- if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
638
-
639
- /* copy Literals */
640
- if (sequence.litLength > 8)
641
- ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
642
- else
643
- ZSTD_copy8(op, *litPtr);
716
+ /* Errors and uncommon cases handled here. */
717
+ assert(oLitEnd < oMatchEnd);
718
+ if (iLitEnd > litLimit || oMatchEnd > oend_w)
719
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
720
+
721
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
722
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
723
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
724
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
725
+
726
+ /* Copy Literals:
727
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
728
+ * We likely don't need the full 32-byte wildcopy.
729
+ */
730
+ assert(WILDCOPY_OVERLENGTH >= 16);
731
+ ZSTD_copy16(op, (*litPtr));
732
+ if (sequence.litLength > 16) {
733
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
734
+ }
644
735
  op = oLitEnd;
645
736
  *litPtr = iLitEnd; /* update for next sequence */
646
737
 
647
- /* copy Match */
738
+ /* Copy Match */
648
739
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
649
740
  /* offset beyond prefix -> go into extDict */
650
741
  RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
@@ -659,123 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
659
750
  op = oLitEnd + length1;
660
751
  sequence.matchLength -= length1;
661
752
  match = prefixStart;
662
- if (op > oend_w || sequence.matchLength < MINMATCH) {
663
- U32 i;
664
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
665
- return sequenceLength;
666
- }
667
753
  } }
668
- /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
669
-
670
- /* match within prefix */
671
- if (sequence.offset < 8) {
672
- /* close range match, overlap */
673
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
674
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
675
- int const sub2 = dec64table[sequence.offset];
676
- op[0] = match[0];
677
- op[1] = match[1];
678
- op[2] = match[2];
679
- op[3] = match[3];
680
- match += dec32table[sequence.offset];
681
- ZSTD_copy4(op+4, match);
682
- match -= sub2;
683
- } else {
684
- ZSTD_copy8(op, match);
685
- }
686
- op += 8; match += 8;
687
-
688
- if (oMatchEnd > oend-(16-MINMATCH)) {
689
- if (op < oend_w) {
690
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
691
- match += oend_w - op;
692
- op = oend_w;
693
- }
694
- while (op < oMatchEnd) *op++ = *match++;
695
- } else {
696
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
754
+ /* Match within prefix of 1 or more bytes */
755
+ assert(op <= oMatchEnd);
756
+ assert(oMatchEnd <= oend_w);
757
+ assert(match >= prefixStart);
758
+ assert(sequence.matchLength >= 1);
759
+
760
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
761
+ * without overlap checking.
762
+ */
763
+ if (sequence.offset >= WILDCOPY_VECLEN) {
764
+ /* We bet on a full wildcopy for matches, since we expect matches to be
765
+ * longer than literals (in general). In silesia, ~10% of matches are longer
766
+ * than 16 bytes.
767
+ */
768
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
769
+ return sequenceLength;
697
770
  }
698
- return sequenceLength;
699
- }
700
-
701
-
702
- HINT_INLINE
703
- size_t ZSTD_execSequenceLong(BYTE* op,
704
- BYTE* const oend, seq_t sequence,
705
- const BYTE** litPtr, const BYTE* const litLimit,
706
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
707
- {
708
- BYTE* const oLitEnd = op + sequence.litLength;
709
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
710
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
711
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
712
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
713
- const BYTE* match = sequence.match;
771
+ assert(sequence.offset < WILDCOPY_VECLEN);
714
772
 
715
- /* check */
716
- RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
717
- RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer");
718
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
719
-
720
- /* copy Literals */
721
- if (sequence.litLength > 8)
722
- ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
723
- else
724
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
773
+ /* Copy 8 bytes and spread the offset to be >= 8. */
774
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
725
775
 
726
- op = oLitEnd;
727
- *litPtr = iLitEnd; /* update for next sequence */
728
-
729
- /* copy Match */
730
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
731
- /* offset beyond prefix */
732
- RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected);
733
- if (match + sequence.matchLength <= dictEnd) {
734
- memmove(oLitEnd, match, sequence.matchLength);
735
- return sequenceLength;
736
- }
737
- /* span extDict & currentPrefixSegment */
738
- { size_t const length1 = dictEnd - match;
739
- memmove(oLitEnd, match, length1);
740
- op = oLitEnd + length1;
741
- sequence.matchLength -= length1;
742
- match = prefixStart;
743
- if (op > oend_w || sequence.matchLength < MINMATCH) {
744
- U32 i;
745
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
746
- return sequenceLength;
747
- }
748
- } }
749
- assert(op <= oend_w);
750
- assert(sequence.matchLength >= MINMATCH);
751
-
752
- /* match within prefix */
753
- if (sequence.offset < 8) {
754
- /* close range match, overlap */
755
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
756
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
757
- int const sub2 = dec64table[sequence.offset];
758
- op[0] = match[0];
759
- op[1] = match[1];
760
- op[2] = match[2];
761
- op[3] = match[3];
762
- match += dec32table[sequence.offset];
763
- ZSTD_copy4(op+4, match);
764
- match -= sub2;
765
- } else {
766
- ZSTD_copy8(op, match);
767
- }
768
- op += 8; match += 8;
769
-
770
- if (oMatchEnd > oend-(16-MINMATCH)) {
771
- if (op < oend_w) {
772
- ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
773
- match += oend_w - op;
774
- op = oend_w;
775
- }
776
- while (op < oMatchEnd) *op++ = *match++;
777
- } else {
778
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
776
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
777
+ if (sequence.matchLength > 8) {
778
+ assert(op < oMatchEnd);
779
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
779
780
  }
780
781
  return sequenceLength;
781
782
  }
@@ -1095,7 +1096,7 @@ ZSTD_decompressSequencesLong_body(
1095
1096
  /* decode and decompress */
1096
1097
  for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1097
1098
  seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
1098
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1099
1100
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1100
1101
  PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1101
1102
  sequences[seqNb & STORED_SEQS_MASK] = sequence;
@@ -1106,7 +1107,7 @@ ZSTD_decompressSequencesLong_body(
1106
1107
  /* finish queue */
1107
1108
  seqNb -= seqAdvance;
1108
1109
  for ( ; seqNb<nbSeq ; seqNb++) {
1109
- size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1110
1111
  if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1111
1112
  op += oneSeqSize;
1112
1113
  }