zstd-ruby 1.5.0.0 → 1.5.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/README.md +1 -1
  4. data/ext/zstdruby/extconf.rb +2 -1
  5. data/ext/zstdruby/libzstd/Makefile +50 -175
  6. data/ext/zstdruby/libzstd/README.md +7 -1
  7. data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
  8. data/ext/zstdruby/libzstd/common/compiler.h +89 -43
  9. data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
  10. data/ext/zstdruby/libzstd/common/error_private.h +79 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -1
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +24 -22
  14. data/ext/zstdruby/libzstd/common/mem.h +18 -0
  15. data/ext/zstdruby/libzstd/common/pool.c +11 -6
  16. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  17. data/ext/zstdruby/libzstd/common/portability_macros.h +137 -0
  18. data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
  19. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  20. data/ext/zstdruby/libzstd/common/zstd_internal.h +95 -92
  21. data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
  22. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  23. data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
  24. data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
  25. data/ext/zstdruby/libzstd/compress/zstd_compress.c +307 -373
  26. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +174 -83
  27. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
  28. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
  29. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +15 -14
  30. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +4 -3
  31. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +41 -27
  32. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +295 -120
  33. data/ext/zstdruby/libzstd/compress/zstd_fast.c +309 -130
  34. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +482 -562
  35. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +9 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
  38. data/ext/zstdruby/libzstd/compress/zstd_opt.c +249 -148
  39. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +76 -38
  40. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +4 -1
  41. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
  42. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +585 -0
  43. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
  44. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
  45. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
  46. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
  47. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
  48. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  49. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
  50. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +101 -30
  51. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
  52. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
  53. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
  54. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
  55. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
  56. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
  57. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
  58. data/ext/zstdruby/libzstd/libzstd.mk +203 -0
  59. data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
  60. data/ext/zstdruby/libzstd/module.modulemap +25 -0
  61. data/ext/zstdruby/libzstd/zdict.h +4 -4
  62. data/ext/zstdruby/libzstd/zstd.h +179 -136
  63. data/ext/zstdruby/zstdruby.c +2 -2
  64. data/lib/zstd-ruby/version.rb +1 -1
  65. metadata +11 -6
@@ -63,7 +63,7 @@ typedef struct {
63
63
  } ZSTD_localDict;
64
64
 
65
65
  typedef struct {
66
- HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
66
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
67
67
  HUF_repeat repeatMode;
68
68
  } ZSTD_hufCTables_t;
69
69
 
@@ -129,7 +129,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
129
129
  *********************************/
130
130
 
131
131
  typedef struct {
132
- U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
132
+ U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
133
133
  U32 len; /* Raw length of match */
134
134
  } ZSTD_match_t;
135
135
 
@@ -179,7 +179,7 @@ typedef struct {
179
179
  U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
180
180
  ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
181
181
  const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
182
- ZSTD_literalCompressionMode_e literalCompressionMode;
182
+ ZSTD_paramSwitch_e literalCompressionMode;
183
183
  } optState_t;
184
184
 
185
185
  typedef struct {
@@ -199,6 +199,8 @@ typedef struct {
199
199
  */
200
200
  } ZSTD_window_t;
201
201
 
202
+ #define ZSTD_WINDOW_START_INDEX 2
203
+
202
204
  typedef struct ZSTD_matchState_t ZSTD_matchState_t;
203
205
 
204
206
  #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
@@ -264,7 +266,7 @@ typedef struct {
264
266
  } ldmState_t;
265
267
 
266
268
  typedef struct {
267
- U32 enableLdm; /* 1 if enable long distance matching */
269
+ ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
268
270
  U32 hashLog; /* Log size of hashTable */
269
271
  U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
270
272
  U32 minMatchLength; /* Minimum match length */
@@ -295,7 +297,7 @@ struct ZSTD_CCtx_params_s {
295
297
  * There is no guarantee that hint is close to actual source size */
296
298
 
297
299
  ZSTD_dictAttachPref_e attachDictPref;
298
- ZSTD_literalCompressionMode_e literalCompressionMode;
300
+ ZSTD_paramSwitch_e literalCompressionMode;
299
301
 
300
302
  /* Multithreading: used to pass parameters to mtctx */
301
303
  int nbWorkers;
@@ -318,10 +320,10 @@ struct ZSTD_CCtx_params_s {
318
320
  int validateSequences;
319
321
 
320
322
  /* Block splitting */
321
- int splitBlocks;
323
+ ZSTD_paramSwitch_e useBlockSplitter;
322
324
 
323
325
  /* Param for deciding whether to use row-based matchfinder */
324
- ZSTD_useRowMatchFinderMode_e useRowMatchFinder;
326
+ ZSTD_paramSwitch_e useRowMatchFinder;
325
327
 
326
328
  /* Always load a dictionary in ext-dict mode (not prefix mode)? */
327
329
  int deterministicRefPrefix;
@@ -343,6 +345,22 @@ typedef enum {
343
345
  ZSTDb_buffered
344
346
  } ZSTD_buffered_policy_e;
345
347
 
348
+ /**
349
+ * Struct that contains all elements of block splitter that should be allocated
350
+ * in a wksp.
351
+ */
352
+ #define ZSTD_MAX_NB_BLOCK_SPLITS 196
353
+ typedef struct {
354
+ seqStore_t fullSeqStoreChunk;
355
+ seqStore_t firstHalfSeqStore;
356
+ seqStore_t secondHalfSeqStore;
357
+ seqStore_t currSeqStore;
358
+ seqStore_t nextSeqStore;
359
+
360
+ U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
361
+ ZSTD_entropyCTablesMetadata_t entropyMetadata;
362
+ } ZSTD_blockSplitCtx;
363
+
346
364
  struct ZSTD_CCtx_s {
347
365
  ZSTD_compressionStage_e stage;
348
366
  int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -374,7 +392,7 @@ struct ZSTD_CCtx_s {
374
392
  ZSTD_blockState_t blockState;
375
393
  U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
376
394
 
377
- /* Wether we are streaming or not */
395
+ /* Whether we are streaming or not */
378
396
  ZSTD_buffered_policy_e bufferedPolicy;
379
397
 
380
398
  /* streaming */
@@ -408,6 +426,9 @@ struct ZSTD_CCtx_s {
408
426
  #if ZSTD_TRACE
409
427
  ZSTD_TraceCtx traceCtx;
410
428
  #endif
429
+
430
+ /* Workspace for block splitter */
431
+ ZSTD_blockSplitCtx blockSplitCtx;
411
432
  };
412
433
 
413
434
  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -442,7 +463,7 @@ typedef enum {
442
463
  typedef size_t (*ZSTD_blockCompressor) (
443
464
  ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
444
465
  void const* src, size_t srcSize);
445
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
466
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
446
467
 
447
468
 
448
469
  MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -476,31 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
476
497
  return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
477
498
  }
478
499
 
479
- typedef struct repcodes_s {
480
- U32 rep[3];
481
- } repcodes_t;
482
-
483
- MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
484
- {
485
- repcodes_t newReps;
486
- if (offset >= ZSTD_REP_NUM) { /* full offset */
487
- newReps.rep[2] = rep[1];
488
- newReps.rep[1] = rep[0];
489
- newReps.rep[0] = offset - ZSTD_REP_MOVE;
490
- } else { /* repcode */
491
- U32 const repCode = offset + ll0;
492
- if (repCode > 0) { /* note : if repCode==0, no change */
493
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
494
- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
495
- newReps.rep[1] = rep[0];
496
- newReps.rep[0] = currentOffset;
497
- } else { /* repCode == 0 */
498
- ZSTD_memcpy(&newReps, rep, sizeof(newReps));
499
- }
500
- }
501
- return newReps;
502
- }
503
-
504
500
  /* ZSTD_cParam_withinBounds:
505
501
  * @return 1 if value is within cParam bounds,
506
502
  * 0 otherwise */
@@ -549,17 +545,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
549
545
  return (srcSize >> minlog) + 2;
550
546
  }
551
547
 
552
- MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
548
+ MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
553
549
  {
554
550
  switch (cctxParams->literalCompressionMode) {
555
- case ZSTD_lcm_huffman:
551
+ case ZSTD_ps_enable:
556
552
  return 0;
557
- case ZSTD_lcm_uncompressed:
553
+ case ZSTD_ps_disable:
558
554
  return 1;
559
555
  default:
560
556
  assert(0 /* impossible: pre-validated */);
561
- /* fall-through */
562
- case ZSTD_lcm_auto:
557
+ ZSTD_FALLTHROUGH;
558
+ case ZSTD_ps_auto:
563
559
  return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
564
560
  }
565
561
  }
@@ -569,7 +565,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
569
565
  * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
570
566
  * large copies.
571
567
  */
572
- static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
568
+ static void
569
+ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
570
+ {
573
571
  assert(iend > ilimit_w);
574
572
  if (ip <= ilimit_w) {
575
573
  ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
@@ -579,14 +577,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
579
577
  while (ip < iend) *op++ = *ip++;
580
578
  }
581
579
 
580
+ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
581
+ #define STORE_REPCODE_1 STORE_REPCODE(1)
582
+ #define STORE_REPCODE_2 STORE_REPCODE(2)
583
+ #define STORE_REPCODE_3 STORE_REPCODE(3)
584
+ #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
585
+ #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
586
+ #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
587
+ #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
588
+ #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
589
+ #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
590
+ #define STORED_TO_OFFBASE(o) ((o)+1)
591
+ #define OFFBASE_TO_STORED(o) ((o)-1)
592
+
582
593
  /*! ZSTD_storeSeq() :
583
- * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
584
- * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
585
- * `mlBase` : matchLength - MINMATCH
594
+ * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
595
+ * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
596
+ * @matchLength : must be >= MINMATCH
586
597
  * Allowed to overread literals up to litLimit.
587
598
  */
588
- HINT_INLINE UNUSED_ATTR
589
- void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
599
+ HINT_INLINE UNUSED_ATTR void
600
+ ZSTD_storeSeq(seqStore_t* seqStorePtr,
601
+ size_t litLength, const BYTE* literals, const BYTE* litLimit,
602
+ U32 offBase_minus1,
603
+ size_t matchLength)
590
604
  {
591
605
  BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
592
606
  BYTE const* const litEnd = literals + litLength;
@@ -595,7 +609,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
595
609
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
596
610
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
597
611
  DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
598
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
612
+ pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
599
613
  }
600
614
  #endif
601
615
  assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@@ -626,19 +640,59 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
626
640
  seqStorePtr->sequences[0].litLength = (U16)litLength;
627
641
 
628
642
  /* match offset */
629
- seqStorePtr->sequences[0].offset = offCode + 1;
643
+ seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
630
644
 
631
645
  /* match Length */
632
- if (mlBase>0xFFFF) {
633
- assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
634
- seqStorePtr->longLengthType = ZSTD_llt_matchLength;
635
- seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
646
+ assert(matchLength >= MINMATCH);
647
+ { size_t const mlBase = matchLength - MINMATCH;
648
+ if (mlBase>0xFFFF) {
649
+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
650
+ seqStorePtr->longLengthType = ZSTD_llt_matchLength;
651
+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
652
+ }
653
+ seqStorePtr->sequences[0].mlBase = (U16)mlBase;
636
654
  }
637
- seqStorePtr->sequences[0].matchLength = (U16)mlBase;
638
655
 
639
656
  seqStorePtr->sequences++;
640
657
  }
641
658
 
659
+ /* ZSTD_updateRep() :
660
+ * updates in-place @rep (array of repeat offsets)
661
+ * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
662
+ */
663
+ MEM_STATIC void
664
+ ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
665
+ {
666
+ if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
667
+ rep[2] = rep[1];
668
+ rep[1] = rep[0];
669
+ rep[0] = STORED_OFFSET(offBase_minus1);
670
+ } else { /* repcode */
671
+ U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
672
+ if (repCode > 0) { /* note : if repCode==0, no change */
673
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
674
+ rep[2] = (repCode >= 2) ? rep[1] : rep[2];
675
+ rep[1] = rep[0];
676
+ rep[0] = currentOffset;
677
+ } else { /* repCode == 0 */
678
+ /* nothing to do */
679
+ }
680
+ }
681
+ }
682
+
683
+ typedef struct repcodes_s {
684
+ U32 rep[3];
685
+ } repcodes_t;
686
+
687
+ MEM_STATIC repcodes_t
688
+ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
689
+ {
690
+ repcodes_t newReps;
691
+ ZSTD_memcpy(&newReps, rep, sizeof(newReps));
692
+ ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
693
+ return newReps;
694
+ }
695
+
642
696
 
643
697
  /*-*************************************
644
698
  * Match length counter
@@ -651,8 +705,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
651
705
  # if STATIC_BMI2
652
706
  return _tzcnt_u64(val) >> 3;
653
707
  # else
654
- unsigned long r = 0;
655
- return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
708
+ if (val != 0) {
709
+ unsigned long r;
710
+ _BitScanForward64(&r, (U64)val);
711
+ return (unsigned)(r >> 3);
712
+ } else {
713
+ /* Should not reach this code path */
714
+ __assume(0);
715
+ }
656
716
  # endif
657
717
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
658
718
  return (__builtin_ctzll((U64)val) >> 3);
@@ -669,8 +729,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
669
729
  # endif
670
730
  } else { /* 32 bits */
671
731
  # if defined(_MSC_VER)
672
- unsigned long r=0;
673
- return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
732
+ if (val != 0) {
733
+ unsigned long r;
734
+ _BitScanForward(&r, (U32)val);
735
+ return (unsigned)(r >> 3);
736
+ } else {
737
+ /* Should not reach this code path */
738
+ __assume(0);
739
+ }
674
740
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
675
741
  return (__builtin_ctz((U32)val) >> 3);
676
742
  # else
@@ -687,8 +753,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
687
753
  # if STATIC_BMI2
688
754
  return _lzcnt_u64(val) >> 3;
689
755
  # else
690
- unsigned long r = 0;
691
- return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
756
+ if (val != 0) {
757
+ unsigned long r;
758
+ _BitScanReverse64(&r, (U64)val);
759
+ return (unsigned)(r >> 3);
760
+ } else {
761
+ /* Should not reach this code path */
762
+ __assume(0);
763
+ }
692
764
  # endif
693
765
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
694
766
  return (__builtin_clzll(val) >> 3);
@@ -702,8 +774,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
702
774
  # endif
703
775
  } else { /* 32 bits */
704
776
  # if defined(_MSC_VER)
705
- unsigned long r = 0;
706
- return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
777
+ if (val != 0) {
778
+ unsigned long r;
779
+ _BitScanReverse(&r, (unsigned long)val);
780
+ return (unsigned)(r >> 3);
781
+ } else {
782
+ /* Should not reach this code path */
783
+ __assume(0);
784
+ }
707
785
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
708
786
  return (__builtin_clz((U32)val) >> 3);
709
787
  # else
@@ -884,9 +962,9 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
884
962
 
885
963
  MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
886
964
  {
887
- return window.dictLimit == 1 &&
888
- window.lowLimit == 1 &&
889
- (window.nextSrc - window.base) == 1;
965
+ return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
966
+ window.lowLimit == ZSTD_WINDOW_START_INDEX &&
967
+ (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
890
968
  }
891
969
 
892
970
  /**
@@ -937,7 +1015,9 @@ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
937
1015
  {
938
1016
  U32 const cycleSize = 1u << cycleLog;
939
1017
  U32 const curr = (U32)((BYTE const*)src - window.base);
940
- U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize);
1018
+ U32 const minIndexToOverflowCorrect = cycleSize
1019
+ + MAX(maxDist, cycleSize)
1020
+ + ZSTD_WINDOW_START_INDEX;
941
1021
 
942
1022
  /* Adjust the min index to backoff the overflow correction frequency,
943
1023
  * so we don't waste too much CPU in overflow correction. If this
@@ -1012,10 +1092,14 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1012
1092
  U32 const cycleSize = 1u << cycleLog;
1013
1093
  U32 const cycleMask = cycleSize - 1;
1014
1094
  U32 const curr = (U32)((BYTE const*)src - window->base);
1015
- U32 const currentCycle0 = curr & cycleMask;
1016
- /* Exclude zero so that newCurrent - maxDist >= 1. */
1017
- U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0;
1018
- U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize);
1095
+ U32 const currentCycle = curr & cycleMask;
1096
+ /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1097
+ U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1098
+ ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1099
+ : 0;
1100
+ U32 const newCurrent = currentCycle
1101
+ + currentCycleCorrection
1102
+ + MAX(maxDist, cycleSize);
1019
1103
  U32 const correction = curr - newCurrent;
1020
1104
  /* maxDist must be a power of two so that:
1021
1105
  * (newCurrent & cycleMask) == (curr & cycleMask)
@@ -1031,14 +1115,20 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1031
1115
 
1032
1116
  window->base += correction;
1033
1117
  window->dictBase += correction;
1034
- if (window->lowLimit <= correction) window->lowLimit = 1;
1035
- else window->lowLimit -= correction;
1036
- if (window->dictLimit <= correction) window->dictLimit = 1;
1037
- else window->dictLimit -= correction;
1118
+ if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1119
+ window->lowLimit = ZSTD_WINDOW_START_INDEX;
1120
+ } else {
1121
+ window->lowLimit -= correction;
1122
+ }
1123
+ if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1124
+ window->dictLimit = ZSTD_WINDOW_START_INDEX;
1125
+ } else {
1126
+ window->dictLimit -= correction;
1127
+ }
1038
1128
 
1039
1129
  /* Ensure we can still reference the full window. */
1040
1130
  assert(newCurrent >= maxDist);
1041
- assert(newCurrent - maxDist >= 1);
1131
+ assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1042
1132
  /* Ensure that lowLimit and dictLimit didn't underflow. */
1043
1133
  assert(window->lowLimit <= newCurrent);
1044
1134
  assert(window->dictLimit <= newCurrent);
@@ -1149,11 +1239,12 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
1149
1239
 
1150
1240
  MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1151
1241
  ZSTD_memset(window, 0, sizeof(*window));
1152
- window->base = (BYTE const*)"";
1153
- window->dictBase = (BYTE const*)"";
1154
- window->dictLimit = 1; /* start from 1, so that 1st position is valid */
1155
- window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1156
- window->nextSrc = window->base + 1; /* see issue #1241 */
1242
+ window->base = (BYTE const*)" ";
1243
+ window->dictBase = (BYTE const*)" ";
1244
+ ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1245
+ window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
1246
+ window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
1247
+ window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
1157
1248
  window->nbOverflowCorrections = 0;
1158
1249
  }
1159
1250
 
@@ -1206,15 +1297,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1206
1297
  */
1207
1298
  MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1208
1299
  {
1209
- U32 const maxDistance = 1U << windowLog;
1210
- U32 const lowestValid = ms->window.lowLimit;
1211
- U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1212
- U32 const isDictionary = (ms->loadedDictEnd != 0);
1300
+ U32 const maxDistance = 1U << windowLog;
1301
+ U32 const lowestValid = ms->window.lowLimit;
1302
+ U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1303
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
1213
1304
  /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1214
1305
  * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1215
1306
  * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1216
1307
  */
1217
- U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1308
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1218
1309
  return matchLowest;
1219
1310
  }
1220
1311
 
@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
73
73
  void* dst, size_t dstCapacity,
74
74
  const void* src, size_t srcSize,
75
75
  void* entropyWorkspace, size_t entropyWorkspaceSize,
76
- const int bmi2)
76
+ const int bmi2,
77
+ unsigned suspectUncompressible)
77
78
  {
78
79
  size_t const minGain = ZSTD_minGain(srcSize, strategy);
79
80
  size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
105
106
  HUF_compress1X_repeat(
106
107
  ostart+lhSize, dstCapacity-lhSize, src, srcSize,
107
108
  HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
108
- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
109
+ (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
109
110
  HUF_compress4X_repeat(
110
111
  ostart+lhSize, dstCapacity-lhSize, src, srcSize,
111
112
  HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
112
- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
113
+ (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
113
114
  if (repeat != HUF_repeat_none) {
114
115
  /* reused the existing table */
115
116
  DEBUGLOG(5, "Reusing previous huffman table");
@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
18
18
 
19
19
  size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
20
20
 
21
+ /* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
21
22
  size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
22
23
  ZSTD_hufCTables_t* nextHuf,
23
24
  ZSTD_strategy strategy, int disableLiteralCompression,
24
25
  void* dst, size_t dstCapacity,
25
26
  const void* src, size_t srcSize,
26
27
  void* entropyWorkspace, size_t entropyWorkspaceSize,
27
- const int bmi2);
28
+ const int bmi2,
29
+ unsigned suspectUncompressible);
28
30
 
29
31
  #endif /* ZSTD_COMPRESS_LITERALS_H */
@@ -275,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
275
275
  assert(nbSeq_1 > 1);
276
276
  assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
277
277
  (void)entropyWorkspaceSize;
278
- FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
279
- { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */
278
+ FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
279
+ assert(oend >= op);
280
+ { size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
280
281
  FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
281
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
282
+ FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
282
283
  return NCountSize;
283
284
  }
284
285
  }
@@ -312,19 +313,19 @@ ZSTD_encodeSequences_body(
312
313
  FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
313
314
  BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
314
315
  if (MEM_32bits()) BIT_flushBits(&blockStream);
315
- BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
316
+ BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
316
317
  if (MEM_32bits()) BIT_flushBits(&blockStream);
317
318
  if (longOffsets) {
318
319
  U32 const ofBits = ofCodeTable[nbSeq-1];
319
320
  unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
320
321
  if (extraBits) {
321
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
322
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
322
323
  BIT_flushBits(&blockStream);
323
324
  }
324
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
325
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
325
326
  ofBits - extraBits);
326
327
  } else {
327
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
328
+ BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
328
329
  }
329
330
  BIT_flushBits(&blockStream);
330
331
 
@@ -338,8 +339,8 @@ ZSTD_encodeSequences_body(
338
339
  U32 const mlBits = ML_bits[mlCode];
339
340
  DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
340
341
  (unsigned)sequences[n].litLength,
341
- (unsigned)sequences[n].matchLength + MINMATCH,
342
- (unsigned)sequences[n].offset);
342
+ (unsigned)sequences[n].mlBase + MINMATCH,
343
+ (unsigned)sequences[n].offBase);
343
344
  /* 32b*/ /* 64b*/
344
345
  /* (7)*/ /* (7)*/
345
346
  FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
@@ -350,18 +351,18 @@ ZSTD_encodeSequences_body(
350
351
  BIT_flushBits(&blockStream); /* (7)*/
351
352
  BIT_addBits(&blockStream, sequences[n].litLength, llBits);
352
353
  if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
353
- BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
354
+ BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
354
355
  if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
355
356
  if (longOffsets) {
356
357
  unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
357
358
  if (extraBits) {
358
- BIT_addBits(&blockStream, sequences[n].offset, extraBits);
359
+ BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
359
360
  BIT_flushBits(&blockStream); /* (7)*/
360
361
  }
361
- BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
362
+ BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
362
363
  ofBits - extraBits); /* 31 */
363
364
  } else {
364
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
365
+ BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
365
366
  }
366
367
  BIT_flushBits(&blockStream); /* (7)*/
367
368
  DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
@@ -398,7 +399,7 @@ ZSTD_encodeSequences_default(
398
399
 
399
400
  #if DYNAMIC_BMI2
400
401
 
401
- static TARGET_ATTRIBUTE("bmi2") size_t
402
+ static BMI2_TARGET_ATTRIBUTE size_t
402
403
  ZSTD_encodeSequences_bmi2(
403
404
  void* dst, size_t dstCapacity,
404
405
  FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
@@ -132,6 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
132
132
  const seqDef* sp = sstart;
133
133
  size_t matchLengthSum = 0;
134
134
  size_t litLengthSum = 0;
135
+ (void)(litLengthSum); /* suppress unused variable warning on some environments */
135
136
  while (send-sp > 0) {
136
137
  ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
137
138
  litLengthSum += seqLen.litLength;
@@ -324,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
324
325
  static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
325
326
  const BYTE* codeTable, unsigned maxCode,
326
327
  size_t nbSeq, const FSE_CTable* fseCTable,
327
- const U32* additionalBits,
328
+ const U8* additionalBits,
328
329
  short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
329
330
  void* workspace, size_t wkspSize)
330
331
  {
@@ -474,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
474
475
  /* I think there is an optimization opportunity here.
475
476
  * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
476
477
  * since it recalculates estimate from scratch.
477
- * For example, it would recount literal distribution and symbol codes everytime.
478
+ * For example, it would recount literal distribution and symbol codes every time.
478
479
  */
479
480
  cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
480
481
  &nextCBlock->entropy, entropyMetadata,
@@ -538,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
538
539
  repcodes_t rep;
539
540
  ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
540
541
  for (seq = sstart; seq < sp; ++seq) {
541
- rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
542
+ ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
542
543
  }
543
544
  ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
544
545
  }