zstd-ruby 1.5.2.2 → 1.5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -3
  3. data/ext/zstdruby/common.h +7 -0
  4. data/ext/zstdruby/libzstd/common/bits.h +175 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +18 -59
  6. data/ext/zstdruby/libzstd/common/compiler.h +22 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  8. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.h +1 -1
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
  11. data/ext/zstdruby/libzstd/common/error_private.c +9 -2
  12. data/ext/zstdruby/libzstd/common/error_private.h +1 -1
  13. data/ext/zstdruby/libzstd/common/fse.h +5 -83
  14. data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
  15. data/ext/zstdruby/libzstd/common/huf.h +65 -156
  16. data/ext/zstdruby/libzstd/common/mem.h +39 -46
  17. data/ext/zstdruby/libzstd/common/pool.c +26 -10
  18. data/ext/zstdruby/libzstd/common/pool.h +7 -1
  19. data/ext/zstdruby/libzstd/common/portability_macros.h +22 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +68 -14
  21. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  22. data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
  23. data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +17 -113
  27. data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
  28. data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
  30. data/ext/zstdruby/libzstd/compress/hist.c +1 -1
  31. data/ext/zstdruby/libzstd/compress/hist.h +1 -1
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1055 -455
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +165 -145
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +5 -3
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +433 -148
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +306 -283
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +5 -5
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +104 -80
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +12 -5
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -1
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +30 -39
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +3 -4
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +164 -42
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +186 -65
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
  65. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -87
  67. data/ext/zstdruby/libzstd/zdict.h +53 -31
  68. data/ext/zstdruby/libzstd/zstd.h +489 -90
  69. data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
  70. data/ext/zstdruby/main.c +4 -0
  71. data/ext/zstdruby/streaming_compress.c +1 -7
  72. data/ext/zstdruby/zstdruby.c +110 -26
  73. data/lib/zstd-ruby/version.rb +1 -1
  74. data/lib/zstd-ruby.rb +0 -1
  75. metadata +7 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -23,6 +23,7 @@
23
23
  #ifdef ZSTD_MULTITHREAD
24
24
  # include "zstdmt_compress.h"
25
25
  #endif
26
+ #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
26
27
 
27
28
  #if defined (__cplusplus)
28
29
  extern "C" {
@@ -117,12 +118,13 @@ typedef struct {
117
118
  /** ZSTD_buildBlockEntropyStats() :
118
119
  * Builds entropy for the block.
119
120
  * @return : 0 on success or error code */
120
- size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121
- const ZSTD_entropyCTables_t* prevEntropy,
122
- ZSTD_entropyCTables_t* nextEntropy,
123
- const ZSTD_CCtx_params* cctxParams,
124
- ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125
- void* workspace, size_t wkspSize);
121
+ size_t ZSTD_buildBlockEntropyStats(
122
+ const seqStore_t* seqStorePtr,
123
+ const ZSTD_entropyCTables_t* prevEntropy,
124
+ ZSTD_entropyCTables_t* nextEntropy,
125
+ const ZSTD_CCtx_params* cctxParams,
126
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
127
+ void* workspace, size_t wkspSize);
126
128
 
127
129
  /*********************************
128
130
  * Compression internals structs *
@@ -148,6 +150,12 @@ typedef struct {
148
150
  size_t capacity; /* The capacity starting from `seq` pointer */
149
151
  } rawSeqStore_t;
150
152
 
153
+ typedef struct {
154
+ U32 idx; /* Index in array of ZSTD_Sequence */
155
+ U32 posInSequence; /* Position within sequence at idx */
156
+ size_t posInSrc; /* Number of bytes given by sequences provided so far */
157
+ } ZSTD_sequencePosition;
158
+
151
159
  UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152
160
 
153
161
  typedef struct {
@@ -234,6 +242,11 @@ struct ZSTD_matchState_t {
234
242
  const ZSTD_matchState_t* dictMatchState;
235
243
  ZSTD_compressionParameters cParams;
236
244
  const rawSeqStore_t* ldmSeqStore;
245
+
246
+ /* Controls prefetching in some dictMatchState matchfinders.
247
+ * This behavior is controlled from the cctx ms.
248
+ * This parameter has no effect in the cdict ms. */
249
+ int prefetchCDictTables;
237
250
  };
238
251
 
239
252
  typedef struct {
@@ -330,6 +343,24 @@ struct ZSTD_CCtx_params_s {
330
343
 
331
344
  /* Internal use, for createCCtxParams() and freeCCtxParams() only */
332
345
  ZSTD_customMem customMem;
346
+
347
+ /* Controls prefetching in some dictMatchState matchfinders */
348
+ ZSTD_paramSwitch_e prefetchCDictTables;
349
+
350
+ /* Controls whether zstd will fall back to an internal matchfinder
351
+ * if the external matchfinder returns an error code. */
352
+ int enableMatchFinderFallback;
353
+
354
+ /* Indicates whether an external matchfinder has been referenced.
355
+ * Users can't set this externally.
356
+ * It is set internally in ZSTD_registerSequenceProducer(). */
357
+ int useSequenceProducer;
358
+
359
+ /* Adjust the max block size*/
360
+ size_t maxBlockSize;
361
+
362
+ /* Controls repcode search in external sequence parsing */
363
+ ZSTD_paramSwitch_e searchForExternalRepcodes;
333
364
  }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
334
365
 
335
366
  #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
@@ -361,6 +392,14 @@ typedef struct {
361
392
  ZSTD_entropyCTablesMetadata_t entropyMetadata;
362
393
  } ZSTD_blockSplitCtx;
363
394
 
395
+ /* Context for block-level external matchfinder API */
396
+ typedef struct {
397
+ void* mState;
398
+ ZSTD_sequenceProducer_F* mFinder;
399
+ ZSTD_Sequence* seqBuffer;
400
+ size_t seqBufferCapacity;
401
+ } ZSTD_externalMatchCtx;
402
+
364
403
  struct ZSTD_CCtx_s {
365
404
  ZSTD_compressionStage_e stage;
366
405
  int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -410,6 +449,7 @@ struct ZSTD_CCtx_s {
410
449
 
411
450
  /* Stable in/out buffer verification */
412
451
  ZSTD_inBuffer expectedInBuffer;
452
+ size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
413
453
  size_t expectedOutBufferSize;
414
454
 
415
455
  /* Dictionary */
@@ -429,9 +469,13 @@ struct ZSTD_CCtx_s {
429
469
 
430
470
  /* Workspace for block splitter */
431
471
  ZSTD_blockSplitCtx blockSplitCtx;
472
+
473
+ /* Workspace for external matchfinder */
474
+ ZSTD_externalMatchCtx externalMatchCtx;
432
475
  };
433
476
 
434
477
  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
478
+ typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
435
479
 
436
480
  typedef enum {
437
481
  ZSTD_noDict = 0,
@@ -453,7 +497,7 @@ typedef enum {
453
497
  * In this mode we take both the source size and the dictionary size
454
498
  * into account when selecting and adjusting the parameters.
455
499
  */
456
- ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
500
+ ZSTD_cpm_unknown = 3 /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
457
501
  * We don't know what these parameters are for. We default to the legacy
458
502
  * behavior of taking both the source size and the dict size into account
459
503
  * when selecting and adjusting parameters.
@@ -512,9 +556,11 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
512
556
  /* ZSTD_noCompressBlock() :
513
557
  * Writes uncompressed block to dst buffer from given src.
514
558
  * Returns the size of the block */
515
- MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
559
+ MEM_STATIC size_t
560
+ ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
516
561
  {
517
562
  U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
563
+ DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
518
564
  RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
519
565
  dstSize_tooSmall, "dst buf too small for uncompressed block");
520
566
  MEM_writeLE24(dst, cBlockHeader24);
@@ -522,7 +568,8 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
522
568
  return ZSTD_blockHeaderSize + srcSize;
523
569
  }
524
570
 
525
- MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
571
+ MEM_STATIC size_t
572
+ ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
526
573
  {
527
574
  BYTE* const op = (BYTE*)dst;
528
575
  U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
@@ -541,7 +588,7 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
541
588
  {
542
589
  U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
543
590
  ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
544
- assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
591
+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
545
592
  return (srcSize >> minlog) + 2;
546
593
  }
547
594
 
@@ -577,29 +624,27 @@ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE con
577
624
  while (ip < iend) *op++ = *ip++;
578
625
  }
579
626
 
580
- #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
581
- #define STORE_REPCODE_1 STORE_REPCODE(1)
582
- #define STORE_REPCODE_2 STORE_REPCODE(2)
583
- #define STORE_REPCODE_3 STORE_REPCODE(3)
584
- #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
585
- #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
586
- #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
587
- #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
588
- #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
589
- #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
590
- #define STORED_TO_OFFBASE(o) ((o)+1)
591
- #define OFFBASE_TO_STORED(o) ((o)-1)
627
+
628
+ #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
629
+ #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
630
+ #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
631
+ #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
632
+ #define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
633
+ #define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
634
+ #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
635
+ #define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
636
+ #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
592
637
 
593
638
  /*! ZSTD_storeSeq() :
594
- * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
595
- * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
639
+ * Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t.
640
+ * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
596
641
  * @matchLength : must be >= MINMATCH
597
- * Allowed to overread literals up to litLimit.
642
+ * Allowed to over-read literals up to litLimit.
598
643
  */
599
644
  HINT_INLINE UNUSED_ATTR void
600
645
  ZSTD_storeSeq(seqStore_t* seqStorePtr,
601
646
  size_t litLength, const BYTE* literals, const BYTE* litLimit,
602
- U32 offBase_minus1,
647
+ U32 offBase,
603
648
  size_t matchLength)
604
649
  {
605
650
  BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
@@ -608,8 +653,8 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
608
653
  static const BYTE* g_start = NULL;
609
654
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
610
655
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
611
- DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
612
- pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
656
+ DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
657
+ pos, (U32)litLength, (U32)matchLength, (U32)offBase);
613
658
  }
614
659
  #endif
615
660
  assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@@ -619,9 +664,9 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
619
664
  assert(literals + litLength <= litLimit);
620
665
  if (litEnd <= litLimit_w) {
621
666
  /* Common case we can use wildcopy.
622
- * First copy 16 bytes, because literals are likely short.
623
- */
624
- assert(WILDCOPY_OVERLENGTH >= 16);
667
+ * First copy 16 bytes, because literals are likely short.
668
+ */
669
+ ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
625
670
  ZSTD_copy16(seqStorePtr->lit, literals);
626
671
  if (litLength > 16) {
627
672
  ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
@@ -640,7 +685,7 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
640
685
  seqStorePtr->sequences[0].litLength = (U16)litLength;
641
686
 
642
687
  /* match offset */
643
- seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
688
+ seqStorePtr->sequences[0].offBase = offBase;
644
689
 
645
690
  /* match Length */
646
691
  assert(matchLength >= MINMATCH);
@@ -658,17 +703,17 @@ ZSTD_storeSeq(seqStore_t* seqStorePtr,
658
703
 
659
704
  /* ZSTD_updateRep() :
660
705
  * updates in-place @rep (array of repeat offsets)
661
- * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
706
+ * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
662
707
  */
663
708
  MEM_STATIC void
664
- ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
709
+ ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
665
710
  {
666
- if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
711
+ if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */
667
712
  rep[2] = rep[1];
668
713
  rep[1] = rep[0];
669
- rep[0] = STORED_OFFSET(offBase_minus1);
714
+ rep[0] = OFFBASE_TO_OFFSET(offBase);
670
715
  } else { /* repcode */
671
- U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
716
+ U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
672
717
  if (repCode > 0) { /* note : if repCode==0, no change */
673
718
  U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
674
719
  rep[2] = (repCode >= 2) ? rep[1] : rep[2];
@@ -685,11 +730,11 @@ typedef struct repcodes_s {
685
730
  } repcodes_t;
686
731
 
687
732
  MEM_STATIC repcodes_t
688
- ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
733
+ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
689
734
  {
690
735
  repcodes_t newReps;
691
736
  ZSTD_memcpy(&newReps, rep, sizeof(newReps));
692
- ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
737
+ ZSTD_updateRep(newReps.rep, offBase, ll0);
693
738
  return newReps;
694
739
  }
695
740
 
@@ -697,103 +742,6 @@ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0
697
742
  /*-*************************************
698
743
  * Match length counter
699
744
  ***************************************/
700
- static unsigned ZSTD_NbCommonBytes (size_t val)
701
- {
702
- if (MEM_isLittleEndian()) {
703
- if (MEM_64bits()) {
704
- # if defined(_MSC_VER) && defined(_WIN64)
705
- # if STATIC_BMI2
706
- return _tzcnt_u64(val) >> 3;
707
- # else
708
- if (val != 0) {
709
- unsigned long r;
710
- _BitScanForward64(&r, (U64)val);
711
- return (unsigned)(r >> 3);
712
- } else {
713
- /* Should not reach this code path */
714
- __assume(0);
715
- }
716
- # endif
717
- # elif defined(__GNUC__) && (__GNUC__ >= 4)
718
- return (__builtin_ctzll((U64)val) >> 3);
719
- # else
720
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
721
- 0, 3, 1, 3, 1, 4, 2, 7,
722
- 0, 2, 3, 6, 1, 5, 3, 5,
723
- 1, 3, 4, 4, 2, 5, 6, 7,
724
- 7, 0, 1, 2, 3, 3, 4, 6,
725
- 2, 6, 5, 5, 3, 4, 5, 6,
726
- 7, 1, 2, 4, 6, 4, 4, 5,
727
- 7, 2, 6, 5, 7, 6, 7, 7 };
728
- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
729
- # endif
730
- } else { /* 32 bits */
731
- # if defined(_MSC_VER)
732
- if (val != 0) {
733
- unsigned long r;
734
- _BitScanForward(&r, (U32)val);
735
- return (unsigned)(r >> 3);
736
- } else {
737
- /* Should not reach this code path */
738
- __assume(0);
739
- }
740
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
741
- return (__builtin_ctz((U32)val) >> 3);
742
- # else
743
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
744
- 3, 2, 2, 1, 3, 2, 0, 1,
745
- 3, 3, 1, 2, 2, 2, 2, 0,
746
- 3, 1, 2, 0, 1, 0, 1, 1 };
747
- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
748
- # endif
749
- }
750
- } else { /* Big Endian CPU */
751
- if (MEM_64bits()) {
752
- # if defined(_MSC_VER) && defined(_WIN64)
753
- # if STATIC_BMI2
754
- return _lzcnt_u64(val) >> 3;
755
- # else
756
- if (val != 0) {
757
- unsigned long r;
758
- _BitScanReverse64(&r, (U64)val);
759
- return (unsigned)(r >> 3);
760
- } else {
761
- /* Should not reach this code path */
762
- __assume(0);
763
- }
764
- # endif
765
- # elif defined(__GNUC__) && (__GNUC__ >= 4)
766
- return (__builtin_clzll(val) >> 3);
767
- # else
768
- unsigned r;
769
- const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
770
- if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
771
- if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
772
- r += (!val);
773
- return r;
774
- # endif
775
- } else { /* 32 bits */
776
- # if defined(_MSC_VER)
777
- if (val != 0) {
778
- unsigned long r;
779
- _BitScanReverse(&r, (unsigned long)val);
780
- return (unsigned)(r >> 3);
781
- } else {
782
- /* Should not reach this code path */
783
- __assume(0);
784
- }
785
- # elif defined(__GNUC__) && (__GNUC__ >= 3)
786
- return (__builtin_clz((U32)val) >> 3);
787
- # else
788
- unsigned r;
789
- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
790
- r += (!val);
791
- return r;
792
- # endif
793
- } }
794
- }
795
-
796
-
797
745
  MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
798
746
  {
799
747
  const BYTE* const pStart = pIn;
@@ -839,32 +787,36 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
839
787
  * Hashes
840
788
  ***************************************/
841
789
  static const U32 prime3bytes = 506832829U;
842
- static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
790
+ static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
843
791
  MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
844
792
 
845
793
  static const U32 prime4bytes = 2654435761U;
846
- static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
847
- static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
794
+ static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
795
+ static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h); }
848
796
 
849
797
  static const U64 prime5bytes = 889523592379ULL;
850
- static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
798
+ static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
851
799
  static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
852
800
 
853
801
  static const U64 prime6bytes = 227718039650203ULL;
854
- static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
802
+ static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
855
803
  static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
856
804
 
857
805
  static const U64 prime7bytes = 58295818150454627ULL;
858
- static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
806
+ static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
859
807
  static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
860
808
 
861
809
  static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
862
- static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
810
+ static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
863
811
  static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
864
812
 
865
813
  MEM_STATIC FORCE_INLINE_ATTR
866
814
  size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
867
815
  {
816
+ /* Although some of these hashes do support hBits up to 64, some do not.
817
+ * To be on the safe side, always avoid hBits > 32. */
818
+ assert(hBits <= 32);
819
+
868
820
  switch(mls)
869
821
  {
870
822
  default:
@@ -1223,10 +1175,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
1223
1175
  (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1224
1176
  assert(blockEndIdx >= loadedDictEnd);
1225
1177
 
1226
- if (blockEndIdx > loadedDictEnd + maxDist) {
1178
+ if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
1227
1179
  /* On reaching window size, dictionaries are invalidated.
1228
1180
  * For simplification, if window size is reached anywhere within next block,
1229
1181
  * the dictionary is invalidated for the full block.
1182
+ *
1183
+ * We also have to invalidate the dictionary if ZSTD_window_update() has detected
1184
+ * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
1185
+ * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
1186
+ * dictMatchState, so setting it to NULL is not a problem.
1230
1187
  */
1231
1188
  DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1232
1189
  *loadedDictEndPtr = 0;
@@ -1358,6 +1315,42 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1358
1315
 
1359
1316
  #endif
1360
1317
 
1318
+ /* Short Cache */
1319
+
1320
+ /* Normally, zstd matchfinders follow this flow:
1321
+ * 1. Compute hash at ip
1322
+ * 2. Load index from hashTable[hash]
1323
+ * 3. Check if *ip == *(base + index)
1324
+ * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
1325
+ *
1326
+ * Short cache is an optimization which allows us to avoid step 3 most of the time
1327
+ * when the data doesn't actually match. With short cache, the flow becomes:
1328
+ * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
1329
+ * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
1330
+ * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
1331
+ *
1332
+ * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
1333
+ * dictMatchState matchfinders.
1334
+ */
1335
+ #define ZSTD_SHORT_CACHE_TAG_BITS 8
1336
+ #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
1337
+
1338
+ /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
1339
+ * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
1340
+ MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
1341
+ size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
1342
+ U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
1343
+ assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
1344
+ hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
1345
+ }
1346
+
1347
+ /* Helper function for short cache matchfinders.
1348
+ * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
1349
+ MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
1350
+ U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
1351
+ U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
1352
+ return tag1 == tag2;
1353
+ }
1361
1354
 
1362
1355
  #if defined (__cplusplus)
1363
1356
  }
@@ -1455,4 +1448,31 @@ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1455
1448
  */
1456
1449
  void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1457
1450
 
1451
+ /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
1452
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
1453
+ * Note that the block delimiter must include the last literals of the block.
1454
+ */
1455
+ size_t
1456
+ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
1457
+ ZSTD_sequencePosition* seqPos,
1458
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
1459
+ const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
1460
+
1461
+ /* Returns the number of bytes to move the current read position back by.
1462
+ * Only non-zero if we ended up splitting a sequence.
1463
+ * Otherwise, it may return a ZSTD error if something went wrong.
1464
+ *
1465
+ * This function will attempt to scan through blockSize bytes
1466
+ * represented by the sequences in @inSeqs,
1467
+ * storing any (partial) sequences.
1468
+ *
1469
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
1470
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
1471
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
1472
+ */
1473
+ size_t
1474
+ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
1475
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
1476
+ const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
1477
+
1458
1478
  #endif /* ZSTD_COMPRESS_H */