zstdlib 0.7.0-x86-mingw32 → 0.10.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +20 -0
  3. data/README.md +7 -1
  4. data/Rakefile +38 -8
  5. data/ext/{zstdlib → zstdlib_c}/extconf.rb +11 -6
  6. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
  7. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
  8. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
  9. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
  10. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
  11. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
  12. data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
  13. data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
  14. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/adler32.c +0 -0
  15. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/compress.c +0 -0
  16. data/ext/zstdlib_c/zlib-1.2.12/crc32.c +1116 -0
  17. data/ext/zstdlib_c/zlib-1.2.12/crc32.h +9446 -0
  18. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.c +78 -30
  19. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.h +12 -15
  20. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzclose.c +0 -0
  21. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzguts.h +3 -2
  22. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzlib.c +5 -3
  23. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzread.c +5 -7
  24. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzwrite.c +25 -13
  25. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/infback.c +2 -1
  26. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.c +14 -14
  27. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.h +0 -0
  28. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffixed.h +0 -0
  29. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.c +39 -8
  30. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.h +3 -2
  31. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.c +3 -3
  32. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.h +0 -0
  33. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.c +27 -48
  34. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.h +0 -0
  35. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/uncompr.c +0 -0
  36. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zconf.h +0 -0
  37. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zlib.h +123 -100
  38. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.c +2 -2
  39. data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.h +12 -9
  40. data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
  41. data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
  42. data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
  43. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +46 -22
  44. data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
  45. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
  46. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +1 -1
  47. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +12 -19
  48. data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
  49. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +2 -1
  50. data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
  51. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +41 -12
  52. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +139 -22
  53. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +47 -23
  54. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
  55. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
  56. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +4 -4
  57. data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
  58. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +6 -5
  59. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
  60. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
  61. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
  62. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
  63. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
  64. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +191 -145
  65. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
  66. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
  67. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +89 -46
  68. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +27 -29
  69. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +2 -2
  70. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
  71. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +2917 -868
  72. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +458 -125
  73. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +12 -11
  74. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
  75. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +41 -18
  76. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +1 -1
  77. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +26 -298
  78. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +1 -1
  79. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +234 -83
  80. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +313 -138
  81. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +1 -1
  82. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +329 -150
  83. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +1 -1
  84. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
  85. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
  86. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +321 -216
  87. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +9 -2
  88. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
  89. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +412 -166
  90. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
  91. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +169 -453
  92. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
  93. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +1044 -403
  94. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
  95. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +9 -9
  96. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +2 -2
  97. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +450 -105
  98. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +913 -273
  99. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +14 -5
  100. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +59 -12
  101. data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
  102. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +699 -214
  103. data/ext/{zstdlib/zstd-1.4.5/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +2 -1
  104. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
  105. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
  106. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
  107. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
  108. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
  109. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
  110. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
  111. data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  112. data/ext/zstdlib_c/zstd.mk +15 -0
  113. data/lib/2.4/zstdlib_c.so +0 -0
  114. data/lib/2.5/zstdlib_c.so +0 -0
  115. data/lib/2.6/zstdlib_c.so +0 -0
  116. data/lib/2.7/zstdlib_c.so +0 -0
  117. data/lib/3.0/zstdlib_c.so +0 -0
  118. data/lib/3.1/zstdlib_c.so +0 -0
  119. data/lib/zstdlib.rb +2 -2
  120. metadata +125 -116
  121. data/ext/zstdlib/zlib-1.2.11/crc32.c +0 -442
  122. data/ext/zstdlib/zlib-1.2.11/crc32.h +0 -441
  123. data/ext/zstdlib/zstd-1.4.5/lib/common/compiler.h +0 -175
  124. data/ext/zstdlib/zstd-1.4.5/lib/common/entropy_common.c +0 -216
  125. data/ext/zstdlib/zstd-1.4.5/lib/common/error_private.h +0 -80
  126. data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.c +0 -864
  127. data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.h +0 -285
  128. data/ext/zstdlib/zstd-1.4.5/lib/compress/huf_compress.c +0 -798
  129. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.c +0 -1138
  130. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.h +0 -67
  131. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstdmt_compress.h +0 -192
  132. data/ext/zstdlib/zstd.mk +0 -14
  133. data/lib/2.2/zstdlib.so +0 -0
  134. data/lib/2.3/zstdlib.so +0 -0
  135. data/lib/2.4/zstdlib.so +0 -0
  136. data/lib/2.5/zstdlib.so +0 -0
  137. data/lib/2.6/zstdlib.so +0 -0
  138. data/lib/2.7/zstdlib.so +0 -0
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,6 @@
28
28
  extern "C" {
29
29
  #endif
30
30
 
31
-
32
31
  /*-*************************************
33
32
  * Constants
34
33
  ***************************************/
@@ -64,7 +63,7 @@ typedef struct {
64
63
  } ZSTD_localDict;
65
64
 
66
65
  typedef struct {
67
- U32 CTable[HUF_CTABLE_SIZE_U32(255)];
66
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
68
67
  HUF_repeat repeatMode;
69
68
  } ZSTD_hufCTables_t;
70
69
 
@@ -82,11 +81,75 @@ typedef struct {
82
81
  ZSTD_fseCTables_t fse;
83
82
  } ZSTD_entropyCTables_t;
84
83
 
84
+ /***********************************************
85
+ * Entropy buffer statistics structs and funcs *
86
+ ***********************************************/
87
+ /** ZSTD_hufCTablesMetadata_t :
88
+ * Stores Literals Block Type for a super-block in hType, and
89
+ * huffman tree description in hufDesBuffer.
90
+ * hufDesSize refers to the size of huffman tree description in bytes.
91
+ * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
85
92
  typedef struct {
86
- U32 off;
87
- U32 len;
93
+ symbolEncodingType_e hType;
94
+ BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
95
+ size_t hufDesSize;
96
+ } ZSTD_hufCTablesMetadata_t;
97
+
98
+ /** ZSTD_fseCTablesMetadata_t :
99
+ * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
100
+ * fse tables in fseTablesBuffer.
101
+ * fseTablesSize refers to the size of fse tables in bytes.
102
+ * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
103
+ typedef struct {
104
+ symbolEncodingType_e llType;
105
+ symbolEncodingType_e ofType;
106
+ symbolEncodingType_e mlType;
107
+ BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
108
+ size_t fseTablesSize;
109
+ size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
110
+ } ZSTD_fseCTablesMetadata_t;
111
+
112
+ typedef struct {
113
+ ZSTD_hufCTablesMetadata_t hufMetadata;
114
+ ZSTD_fseCTablesMetadata_t fseMetadata;
115
+ } ZSTD_entropyCTablesMetadata_t;
116
+
117
+ /** ZSTD_buildBlockEntropyStats() :
118
+ * Builds entropy for the block.
119
+ * @return : 0 on success or error code */
120
+ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121
+ const ZSTD_entropyCTables_t* prevEntropy,
122
+ ZSTD_entropyCTables_t* nextEntropy,
123
+ const ZSTD_CCtx_params* cctxParams,
124
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125
+ void* workspace, size_t wkspSize);
126
+
127
+ /*********************************
128
+ * Compression internals structs *
129
+ *********************************/
130
+
131
+ typedef struct {
132
+ U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
133
+ U32 len; /* Raw length of match */
88
134
  } ZSTD_match_t;
89
135
 
136
+ typedef struct {
137
+ U32 offset; /* Offset of sequence */
138
+ U32 litLength; /* Length of literals prior to match */
139
+ U32 matchLength; /* Raw length of match */
140
+ } rawSeq;
141
+
142
+ typedef struct {
143
+ rawSeq* seq; /* The start of the sequences */
144
+ size_t pos; /* The index in seq where reading stopped. pos <= size. */
145
+ size_t posInSequence; /* The position within the sequence at seq[pos] where reading
146
+ stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
147
+ size_t size; /* The number of sequences. <= capacity. */
148
+ size_t capacity; /* The capacity starting from `seq` pointer */
149
+ } rawSeqStore_t;
150
+
151
+ UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152
+
90
153
  typedef struct {
91
154
  int price;
92
155
  U32 off;
@@ -116,7 +179,7 @@ typedef struct {
116
179
  U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
117
180
  ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
118
181
  const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
119
- ZSTD_literalCompressionMode_e literalCompressionMode;
182
+ ZSTD_paramSwitch_e literalCompressionMode;
120
183
  } optState_t;
121
184
 
122
185
  typedef struct {
@@ -125,14 +188,23 @@ typedef struct {
125
188
  } ZSTD_compressedBlockState_t;
126
189
 
127
190
  typedef struct {
128
- BYTE const* nextSrc; /* next block here to continue on current prefix */
129
- BYTE const* base; /* All regular indexes relative to this position */
130
- BYTE const* dictBase; /* extDict indexes relative to this position */
131
- U32 dictLimit; /* below that point, need extDict */
132
- U32 lowLimit; /* below that point, no more valid data */
191
+ BYTE const* nextSrc; /* next block here to continue on current prefix */
192
+ BYTE const* base; /* All regular indexes relative to this position */
193
+ BYTE const* dictBase; /* extDict indexes relative to this position */
194
+ U32 dictLimit; /* below that point, need extDict */
195
+ U32 lowLimit; /* below that point, no more valid data */
196
+ U32 nbOverflowCorrections; /* Number of times overflow correction has run since
197
+ * ZSTD_window_init(). Useful for debugging coredumps
198
+ * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
199
+ */
133
200
  } ZSTD_window_t;
134
201
 
202
+ #define ZSTD_WINDOW_START_INDEX 2
203
+
135
204
  typedef struct ZSTD_matchState_t ZSTD_matchState_t;
205
+
206
+ #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
207
+
136
208
  struct ZSTD_matchState_t {
137
209
  ZSTD_window_t window; /* State for window round buffer management */
138
210
  U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
@@ -144,12 +216,24 @@ struct ZSTD_matchState_t {
144
216
  */
145
217
  U32 nextToUpdate; /* index from which to continue table update */
146
218
  U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
219
+
220
+ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
221
+ U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
222
+ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
223
+
147
224
  U32* hashTable;
148
225
  U32* hashTable3;
149
226
  U32* chainTable;
227
+
228
+ U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
229
+
230
+ int dedicatedDictSearch; /* Indicates whether this matchState is using the
231
+ * dedicated dictionary search structure.
232
+ */
150
233
  optState_t opt; /* optimal parser state */
151
234
  const ZSTD_matchState_t* dictMatchState;
152
235
  ZSTD_compressionParameters cParams;
236
+ const rawSeqStore_t* ldmSeqStore;
153
237
  };
154
238
 
155
239
  typedef struct {
@@ -163,17 +247,26 @@ typedef struct {
163
247
  U32 checksum;
164
248
  } ldmEntry_t;
165
249
 
250
+ typedef struct {
251
+ BYTE const* split;
252
+ U32 hash;
253
+ U32 checksum;
254
+ ldmEntry_t* bucket;
255
+ } ldmMatchCandidate_t;
256
+
257
+ #define LDM_BATCH_SIZE 64
258
+
166
259
  typedef struct {
167
260
  ZSTD_window_t window; /* State for the window round buffer management */
168
261
  ldmEntry_t* hashTable;
169
262
  U32 loadedDictEnd;
170
263
  BYTE* bucketOffsets; /* Next position in bucket to insert entry */
171
- U64 hashPower; /* Used to compute the rolling hash.
172
- * Depends on ldmParams.minMatchLength */
264
+ size_t splitIndices[LDM_BATCH_SIZE];
265
+ ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
173
266
  } ldmState_t;
174
267
 
175
268
  typedef struct {
176
- U32 enableLdm; /* 1 if enable long distance matching */
269
+ ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
177
270
  U32 hashLog; /* Log size of hashTable */
178
271
  U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
179
272
  U32 minMatchLength; /* Minimum match length */
@@ -181,19 +274,6 @@ typedef struct {
181
274
  U32 windowLog; /* Window log for the LDM */
182
275
  } ldmParams_t;
183
276
 
184
- typedef struct {
185
- U32 offset;
186
- U32 litLength;
187
- U32 matchLength;
188
- } rawSeq;
189
-
190
- typedef struct {
191
- rawSeq* seq; /* The start of the sequences */
192
- size_t pos; /* The position where reading stopped. <= size. */
193
- size_t size; /* The number of sequences. <= capacity. */
194
- size_t capacity; /* The capacity starting from `seq` pointer */
195
- } rawSeqStore_t;
196
-
197
277
  typedef struct {
198
278
  int collectSequences;
199
279
  ZSTD_Sequence* seqStart;
@@ -217,7 +297,7 @@ struct ZSTD_CCtx_params_s {
217
297
  * There is no guarantee that hint is close to actual source size */
218
298
 
219
299
  ZSTD_dictAttachPref_e attachDictPref;
220
- ZSTD_literalCompressionMode_e literalCompressionMode;
300
+ ZSTD_paramSwitch_e literalCompressionMode;
221
301
 
222
302
  /* Multithreading: used to pass parameters to mtctx */
223
303
  int nbWorkers;
@@ -228,17 +308,68 @@ struct ZSTD_CCtx_params_s {
228
308
  /* Long distance matching parameters */
229
309
  ldmParams_t ldmParams;
230
310
 
311
+ /* Dedicated dict search algorithm trigger */
312
+ int enableDedicatedDictSearch;
313
+
314
+ /* Input/output buffer modes */
315
+ ZSTD_bufferMode_e inBufferMode;
316
+ ZSTD_bufferMode_e outBufferMode;
317
+
318
+ /* Sequence compression API */
319
+ ZSTD_sequenceFormat_e blockDelimiters;
320
+ int validateSequences;
321
+
322
+ /* Block splitting */
323
+ ZSTD_paramSwitch_e useBlockSplitter;
324
+
325
+ /* Param for deciding whether to use row-based matchfinder */
326
+ ZSTD_paramSwitch_e useRowMatchFinder;
327
+
328
+ /* Always load a dictionary in ext-dict mode (not prefix mode)? */
329
+ int deterministicRefPrefix;
330
+
231
331
  /* Internal use, for createCCtxParams() and freeCCtxParams() only */
232
332
  ZSTD_customMem customMem;
233
333
  }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
234
334
 
335
+ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
336
+ #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
337
+
338
+ /**
339
+ * Indicates whether this compression proceeds directly from user-provided
340
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
341
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
342
+ */
343
+ typedef enum {
344
+ ZSTDb_not_buffered,
345
+ ZSTDb_buffered
346
+ } ZSTD_buffered_policy_e;
347
+
348
+ /**
349
+ * Struct that contains all elements of block splitter that should be allocated
350
+ * in a wksp.
351
+ */
352
+ #define ZSTD_MAX_NB_BLOCK_SPLITS 196
353
+ typedef struct {
354
+ seqStore_t fullSeqStoreChunk;
355
+ seqStore_t firstHalfSeqStore;
356
+ seqStore_t secondHalfSeqStore;
357
+ seqStore_t currSeqStore;
358
+ seqStore_t nextSeqStore;
359
+
360
+ U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
361
+ ZSTD_entropyCTablesMetadata_t entropyMetadata;
362
+ } ZSTD_blockSplitCtx;
363
+
235
364
  struct ZSTD_CCtx_s {
236
365
  ZSTD_compressionStage_e stage;
237
366
  int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
238
367
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
239
368
  ZSTD_CCtx_params requestedParams;
240
369
  ZSTD_CCtx_params appliedParams;
370
+ ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
241
371
  U32 dictID;
372
+ size_t dictContentSize;
242
373
 
243
374
  ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
244
375
  size_t blockSize;
@@ -247,6 +378,7 @@ struct ZSTD_CCtx_s {
247
378
  unsigned long long producedCSize;
248
379
  XXH64_state_t xxhState;
249
380
  ZSTD_customMem customMem;
381
+ ZSTD_threadPool* pool;
250
382
  size_t staticSize;
251
383
  SeqCollector seqCollector;
252
384
  int isFirstBlock;
@@ -258,7 +390,10 @@ struct ZSTD_CCtx_s {
258
390
  size_t maxNbLdmSequences;
259
391
  rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
260
392
  ZSTD_blockState_t blockState;
261
- U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
393
+ U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
394
+
395
+ /* Whether we are streaming or not */
396
+ ZSTD_buffered_policy_e bufferedPolicy;
262
397
 
263
398
  /* streaming */
264
399
  char* inBuff;
@@ -273,6 +408,10 @@ struct ZSTD_CCtx_s {
273
408
  ZSTD_cStreamStage streamStage;
274
409
  U32 frameEnded;
275
410
 
411
+ /* Stable in/out buffer verification */
412
+ ZSTD_inBuffer expectedInBuffer;
413
+ size_t expectedOutBufferSize;
414
+
276
415
  /* Dictionary */
277
416
  ZSTD_localDict localDict;
278
417
  const ZSTD_CDict* cdict;
@@ -282,17 +421,49 @@ struct ZSTD_CCtx_s {
282
421
  #ifdef ZSTD_MULTITHREAD
283
422
  ZSTDMT_CCtx* mtctx;
284
423
  #endif
424
+
425
+ /* Tracing */
426
+ #if ZSTD_TRACE
427
+ ZSTD_TraceCtx traceCtx;
428
+ #endif
429
+
430
+ /* Workspace for block splitter */
431
+ ZSTD_blockSplitCtx blockSplitCtx;
285
432
  };
286
433
 
287
434
  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
288
435
 
289
- typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
290
-
436
+ typedef enum {
437
+ ZSTD_noDict = 0,
438
+ ZSTD_extDict = 1,
439
+ ZSTD_dictMatchState = 2,
440
+ ZSTD_dedicatedDictSearch = 3
441
+ } ZSTD_dictMode_e;
442
+
443
+ typedef enum {
444
+ ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
445
+ * In this mode we use both the srcSize and the dictSize
446
+ * when selecting and adjusting parameters.
447
+ */
448
+ ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
449
+ * In this mode we only take the srcSize into account when selecting
450
+ * and adjusting parameters.
451
+ */
452
+ ZSTD_cpm_createCDict = 2, /* Creating a CDict.
453
+ * In this mode we take both the source size and the dictionary size
454
+ * into account when selecting and adjusting the parameters.
455
+ */
456
+ ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
457
+ * We don't know what these parameters are for. We default to the legacy
458
+ * behavior of taking both the source size and the dict size into account
459
+ * when selecting and adjusting parameters.
460
+ */
461
+ } ZSTD_cParamMode_e;
291
462
 
292
463
  typedef size_t (*ZSTD_blockCompressor) (
293
464
  ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
294
465
  void const* src, size_t srcSize);
295
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
466
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
296
467
 
297
468
 
298
469
  MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -326,31 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
326
497
  return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
327
498
  }
328
499
 
329
- typedef struct repcodes_s {
330
- U32 rep[3];
331
- } repcodes_t;
332
-
333
- MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
334
- {
335
- repcodes_t newReps;
336
- if (offset >= ZSTD_REP_NUM) { /* full offset */
337
- newReps.rep[2] = rep[1];
338
- newReps.rep[1] = rep[0];
339
- newReps.rep[0] = offset - ZSTD_REP_MOVE;
340
- } else { /* repcode */
341
- U32 const repCode = offset + ll0;
342
- if (repCode > 0) { /* note : if repCode==0, no change */
343
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
344
- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
345
- newReps.rep[1] = rep[0];
346
- newReps.rep[0] = currentOffset;
347
- } else { /* repCode == 0 */
348
- memcpy(&newReps, rep, sizeof(newReps));
349
- }
350
- }
351
- return newReps;
352
- }
353
-
354
500
  /* ZSTD_cParam_withinBounds:
355
501
  * @return 1 if value is within cParam bounds,
356
502
  * 0 otherwise */
@@ -372,7 +518,7 @@ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const voi
372
518
  RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
373
519
  dstSize_tooSmall, "dst buf too small for uncompressed block");
374
520
  MEM_writeLE24(dst, cBlockHeader24);
375
- memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
521
+ ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
376
522
  return ZSTD_blockHeaderSize + srcSize;
377
523
  }
378
524
 
@@ -399,17 +545,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
399
545
  return (srcSize >> minlog) + 2;
400
546
  }
401
547
 
402
- MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
548
+ MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
403
549
  {
404
550
  switch (cctxParams->literalCompressionMode) {
405
- case ZSTD_lcm_huffman:
551
+ case ZSTD_ps_enable:
406
552
  return 0;
407
- case ZSTD_lcm_uncompressed:
553
+ case ZSTD_ps_disable:
408
554
  return 1;
409
555
  default:
410
556
  assert(0 /* impossible: pre-validated */);
411
- /* fall-through */
412
- case ZSTD_lcm_auto:
557
+ ZSTD_FALLTHROUGH;
558
+ case ZSTD_ps_auto:
413
559
  return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
414
560
  }
415
561
  }
@@ -419,7 +565,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
419
565
  * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
420
566
  * large copies.
421
567
  */
422
- static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
568
+ static void
569
+ ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
570
+ {
423
571
  assert(iend > ilimit_w);
424
572
  if (ip <= ilimit_w) {
425
573
  ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
@@ -429,14 +577,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
429
577
  while (ip < iend) *op++ = *ip++;
430
578
  }
431
579
 
580
+ #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
581
+ #define STORE_REPCODE_1 STORE_REPCODE(1)
582
+ #define STORE_REPCODE_2 STORE_REPCODE(2)
583
+ #define STORE_REPCODE_3 STORE_REPCODE(3)
584
+ #define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
585
+ #define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
586
+ #define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
587
+ #define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
588
+ #define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
589
+ #define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
590
+ #define STORED_TO_OFFBASE(o) ((o)+1)
591
+ #define OFFBASE_TO_STORED(o) ((o)-1)
592
+
432
593
  /*! ZSTD_storeSeq() :
433
- * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
434
- * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
435
- * `mlBase` : matchLength - MINMATCH
594
+ * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
595
+ * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
596
+ * @matchLength : must be >= MINMATCH
436
597
  * Allowed to overread literals up to litLimit.
437
598
  */
438
- HINT_INLINE UNUSED_ATTR
439
- void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
599
+ HINT_INLINE UNUSED_ATTR void
600
+ ZSTD_storeSeq(seqStore_t* seqStorePtr,
601
+ size_t litLength, const BYTE* literals, const BYTE* litLimit,
602
+ U32 offBase_minus1,
603
+ size_t matchLength)
440
604
  {
441
605
  BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
442
606
  BYTE const* const litEnd = literals + litLength;
@@ -445,7 +609,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
445
609
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
446
610
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
447
611
  DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
448
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
612
+ pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
449
613
  }
450
614
  #endif
451
615
  assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@@ -469,26 +633,66 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
469
633
 
470
634
  /* literal Length */
471
635
  if (litLength>0xFFFF) {
472
- assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
473
- seqStorePtr->longLengthID = 1;
636
+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
637
+ seqStorePtr->longLengthType = ZSTD_llt_literalLength;
474
638
  seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
475
639
  }
476
640
  seqStorePtr->sequences[0].litLength = (U16)litLength;
477
641
 
478
642
  /* match offset */
479
- seqStorePtr->sequences[0].offset = offCode + 1;
643
+ seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
480
644
 
481
645
  /* match Length */
482
- if (mlBase>0xFFFF) {
483
- assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
484
- seqStorePtr->longLengthID = 2;
485
- seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
646
+ assert(matchLength >= MINMATCH);
647
+ { size_t const mlBase = matchLength - MINMATCH;
648
+ if (mlBase>0xFFFF) {
649
+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
650
+ seqStorePtr->longLengthType = ZSTD_llt_matchLength;
651
+ seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
652
+ }
653
+ seqStorePtr->sequences[0].mlBase = (U16)mlBase;
486
654
  }
487
- seqStorePtr->sequences[0].matchLength = (U16)mlBase;
488
655
 
489
656
  seqStorePtr->sequences++;
490
657
  }
491
658
 
659
+ /* ZSTD_updateRep() :
660
+ * updates in-place @rep (array of repeat offsets)
661
+ * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
662
+ */
663
+ MEM_STATIC void
664
+ ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
665
+ {
666
+ if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
667
+ rep[2] = rep[1];
668
+ rep[1] = rep[0];
669
+ rep[0] = STORED_OFFSET(offBase_minus1);
670
+ } else { /* repcode */
671
+ U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
672
+ if (repCode > 0) { /* note : if repCode==0, no change */
673
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
674
+ rep[2] = (repCode >= 2) ? rep[1] : rep[2];
675
+ rep[1] = rep[0];
676
+ rep[0] = currentOffset;
677
+ } else { /* repCode == 0 */
678
+ /* nothing to do */
679
+ }
680
+ }
681
+ }
682
+
683
+ typedef struct repcodes_s {
684
+ U32 rep[3];
685
+ } repcodes_t;
686
+
687
+ MEM_STATIC repcodes_t
688
+ ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
689
+ {
690
+ repcodes_t newReps;
691
+ ZSTD_memcpy(&newReps, rep, sizeof(newReps));
692
+ ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
693
+ return newReps;
694
+ }
695
+
492
696
 
493
697
  /*-*************************************
494
698
  * Match length counter
@@ -498,8 +702,18 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
498
702
  if (MEM_isLittleEndian()) {
499
703
  if (MEM_64bits()) {
500
704
  # if defined(_MSC_VER) && defined(_WIN64)
501
- unsigned long r = 0;
502
- return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
705
+ # if STATIC_BMI2
706
+ return _tzcnt_u64(val) >> 3;
707
+ # else
708
+ if (val != 0) {
709
+ unsigned long r;
710
+ _BitScanForward64(&r, (U64)val);
711
+ return (unsigned)(r >> 3);
712
+ } else {
713
+ /* Should not reach this code path */
714
+ __assume(0);
715
+ }
716
+ # endif
503
717
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
504
718
  return (__builtin_ctzll((U64)val) >> 3);
505
719
  # else
@@ -515,8 +729,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
515
729
  # endif
516
730
  } else { /* 32 bits */
517
731
  # if defined(_MSC_VER)
518
- unsigned long r=0;
519
- return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
732
+ if (val != 0) {
733
+ unsigned long r;
734
+ _BitScanForward(&r, (U32)val);
735
+ return (unsigned)(r >> 3);
736
+ } else {
737
+ /* Should not reach this code path */
738
+ __assume(0);
739
+ }
520
740
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
521
741
  return (__builtin_ctz((U32)val) >> 3);
522
742
  # else
@@ -530,8 +750,18 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
530
750
  } else { /* Big Endian CPU */
531
751
  if (MEM_64bits()) {
532
752
  # if defined(_MSC_VER) && defined(_WIN64)
533
- unsigned long r = 0;
534
- return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
753
+ # if STATIC_BMI2
754
+ return _lzcnt_u64(val) >> 3;
755
+ # else
756
+ if (val != 0) {
757
+ unsigned long r;
758
+ _BitScanReverse64(&r, (U64)val);
759
+ return (unsigned)(r >> 3);
760
+ } else {
761
+ /* Should not reach this code path */
762
+ __assume(0);
763
+ }
764
+ # endif
535
765
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
536
766
  return (__builtin_clzll(val) >> 3);
537
767
  # else
@@ -544,8 +774,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
544
774
  # endif
545
775
  } else { /* 32 bits */
546
776
  # if defined(_MSC_VER)
547
- unsigned long r = 0;
548
- return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
777
+ if (val != 0) {
778
+ unsigned long r;
779
+ _BitScanReverse(&r, (unsigned long)val);
780
+ return (unsigned)(r >> 3);
781
+ } else {
782
+ /* Should not reach this code path */
783
+ __assume(0);
784
+ }
549
785
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
550
786
  return (__builtin_clz((U32)val) >> 3);
551
787
  # else
@@ -626,7 +862,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
626
862
  static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
627
863
  static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
628
864
 
629
- MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
865
+ MEM_STATIC FORCE_INLINE_ATTR
866
+ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
630
867
  {
631
868
  switch(mls)
632
869
  {
@@ -723,6 +960,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
723
960
  window->dictLimit = end;
724
961
  }
725
962
 
963
+ MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
964
+ {
965
+ return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
966
+ window.lowLimit == ZSTD_WINDOW_START_INDEX &&
967
+ (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
968
+ }
969
+
726
970
  /**
727
971
  * ZSTD_window_hasExtDict():
728
972
  * Returns non-zero if the window has a non-empty extDict.
@@ -742,20 +986,76 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
742
986
  return ZSTD_window_hasExtDict(ms->window) ?
743
987
  ZSTD_extDict :
744
988
  ms->dictMatchState != NULL ?
745
- ZSTD_dictMatchState :
989
+ (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
746
990
  ZSTD_noDict;
747
991
  }
748
992
 
993
+ /* Defining this macro to non-zero tells zstd to run the overflow correction
994
+ * code much more frequently. This is very inefficient, and should only be
995
+ * used for tests and fuzzers.
996
+ */
997
+ #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
998
+ # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
999
+ # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
1000
+ # else
1001
+ # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
1002
+ # endif
1003
+ #endif
1004
+
1005
+ /**
1006
+ * ZSTD_window_canOverflowCorrect():
1007
+ * Returns non-zero if the indices are large enough for overflow correction
1008
+ * to work correctly without impacting compression ratio.
1009
+ */
1010
+ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
1011
+ U32 cycleLog,
1012
+ U32 maxDist,
1013
+ U32 loadedDictEnd,
1014
+ void const* src)
1015
+ {
1016
+ U32 const cycleSize = 1u << cycleLog;
1017
+ U32 const curr = (U32)((BYTE const*)src - window.base);
1018
+ U32 const minIndexToOverflowCorrect = cycleSize
1019
+ + MAX(maxDist, cycleSize)
1020
+ + ZSTD_WINDOW_START_INDEX;
1021
+
1022
+ /* Adjust the min index to backoff the overflow correction frequency,
1023
+ * so we don't waste too much CPU in overflow correction. If this
1024
+ * computation overflows we don't really care, we just need to make
1025
+ * sure it is at least minIndexToOverflowCorrect.
1026
+ */
1027
+ U32 const adjustment = window.nbOverflowCorrections + 1;
1028
+ U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
1029
+ minIndexToOverflowCorrect);
1030
+ U32 const indexLargeEnough = curr > adjustedIndex;
1031
+
1032
+ /* Only overflow correct early if the dictionary is invalidated already,
1033
+ * so we don't hurt compression ratio.
1034
+ */
1035
+ U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1036
+
1037
+ return indexLargeEnough && dictionaryInvalidated;
1038
+ }
1039
+
749
1040
  /**
750
1041
  * ZSTD_window_needOverflowCorrection():
751
1042
  * Returns non-zero if the indices are getting too large and need overflow
752
1043
  * protection.
753
1044
  */
754
1045
  MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1046
+ U32 cycleLog,
1047
+ U32 maxDist,
1048
+ U32 loadedDictEnd,
1049
+ void const* src,
755
1050
  void const* srcEnd)
756
1051
  {
757
- U32 const current = (U32)((BYTE const*)srcEnd - window.base);
758
- return current > ZSTD_CURRENT_MAX;
1052
+ U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1053
+ if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1054
+ if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1055
+ return 1;
1056
+ }
1057
+ }
1058
+ return curr > ZSTD_CURRENT_MAX;
759
1059
  }
760
1060
 
761
1061
  /**
@@ -766,7 +1066,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
766
1066
  *
767
1067
  * The least significant cycleLog bits of the indices must remain the same,
768
1068
  * which may be 0. Every index up to maxDist in the past must be valid.
769
- * NOTE: (maxDist & cycleMask) must be zero.
770
1069
  */
771
1070
  MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
772
1071
  U32 maxDist, void const* src)
@@ -790,32 +1089,52 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
790
1089
  * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
791
1090
  * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
792
1091
  */
793
- U32 const cycleMask = (1U << cycleLog) - 1;
794
- U32 const current = (U32)((BYTE const*)src - window->base);
795
- U32 const currentCycle0 = current & cycleMask;
796
- /* Exclude zero so that newCurrent - maxDist >= 1. */
797
- U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
798
- U32 const newCurrent = currentCycle1 + maxDist;
799
- U32 const correction = current - newCurrent;
800
- assert((maxDist & cycleMask) == 0);
801
- assert(current > newCurrent);
802
- /* Loose bound, should be around 1<<29 (see above) */
803
- assert(correction > 1<<28);
1092
+ U32 const cycleSize = 1u << cycleLog;
1093
+ U32 const cycleMask = cycleSize - 1;
1094
+ U32 const curr = (U32)((BYTE const*)src - window->base);
1095
+ U32 const currentCycle = curr & cycleMask;
1096
+ /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1097
+ U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1098
+ ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1099
+ : 0;
1100
+ U32 const newCurrent = currentCycle
1101
+ + currentCycleCorrection
1102
+ + MAX(maxDist, cycleSize);
1103
+ U32 const correction = curr - newCurrent;
1104
+ /* maxDist must be a power of two so that:
1105
+ * (newCurrent & cycleMask) == (curr & cycleMask)
1106
+ * This is required to not corrupt the chains / binary tree.
1107
+ */
1108
+ assert((maxDist & (maxDist - 1)) == 0);
1109
+ assert((curr & cycleMask) == (newCurrent & cycleMask));
1110
+ assert(curr > newCurrent);
1111
+ if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1112
+ /* Loose bound, should be around 1<<29 (see above) */
1113
+ assert(correction > 1<<28);
1114
+ }
804
1115
 
805
1116
  window->base += correction;
806
1117
  window->dictBase += correction;
807
- if (window->lowLimit <= correction) window->lowLimit = 1;
808
- else window->lowLimit -= correction;
809
- if (window->dictLimit <= correction) window->dictLimit = 1;
810
- else window->dictLimit -= correction;
1118
+ if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1119
+ window->lowLimit = ZSTD_WINDOW_START_INDEX;
1120
+ } else {
1121
+ window->lowLimit -= correction;
1122
+ }
1123
+ if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1124
+ window->dictLimit = ZSTD_WINDOW_START_INDEX;
1125
+ } else {
1126
+ window->dictLimit -= correction;
1127
+ }
811
1128
 
812
1129
  /* Ensure we can still reference the full window. */
813
1130
  assert(newCurrent >= maxDist);
814
- assert(newCurrent - maxDist >= 1);
1131
+ assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
815
1132
  /* Ensure that lowLimit and dictLimit didn't underflow. */
816
1133
  assert(window->lowLimit <= newCurrent);
817
1134
  assert(window->dictLimit <= newCurrent);
818
1135
 
1136
+ ++window->nbOverflowCorrections;
1137
+
819
1138
  DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
820
1139
  window->lowLimit);
821
1140
  return correction;
@@ -919,12 +1238,14 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
919
1238
  }
920
1239
 
921
1240
  MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
922
- memset(window, 0, sizeof(*window));
923
- window->base = (BYTE const*)"";
924
- window->dictBase = (BYTE const*)"";
925
- window->dictLimit = 1; /* start from 1, so that 1st position is valid */
926
- window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
927
- window->nextSrc = window->base + 1; /* see issue #1241 */
1241
+ ZSTD_memset(window, 0, sizeof(*window));
1242
+ window->base = (BYTE const*)" ";
1243
+ window->dictBase = (BYTE const*)" ";
1244
+ ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1245
+ window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
1246
+ window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
1247
+ window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
1248
+ window->nbOverflowCorrections = 0;
928
1249
  }
929
1250
 
930
1251
  /**
@@ -935,7 +1256,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
935
1256
  * Returns non-zero if the segment is contiguous.
936
1257
  */
937
1258
  MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
938
- void const* src, size_t srcSize)
1259
+ void const* src, size_t srcSize,
1260
+ int forceNonContiguous)
939
1261
  {
940
1262
  BYTE const* const ip = (BYTE const*)src;
941
1263
  U32 contiguous = 1;
@@ -945,7 +1267,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
945
1267
  assert(window->base != NULL);
946
1268
  assert(window->dictBase != NULL);
947
1269
  /* Check if blocks follow each other */
948
- if (src != window->nextSrc) {
1270
+ if (src != window->nextSrc || forceNonContiguous) {
949
1271
  /* not contiguous */
950
1272
  size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
951
1273
  DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
@@ -973,25 +1295,32 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
973
1295
  /**
974
1296
  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
975
1297
  */
976
- MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
1298
+ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
977
1299
  {
978
- U32 const maxDistance = 1U << windowLog;
979
- U32 const lowestValid = ms->window.lowLimit;
980
- U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
981
- U32 const isDictionary = (ms->loadedDictEnd != 0);
982
- U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1300
+ U32 const maxDistance = 1U << windowLog;
1301
+ U32 const lowestValid = ms->window.lowLimit;
1302
+ U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1303
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
1304
+ /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1305
+ * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1306
+ * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1307
+ */
1308
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
983
1309
  return matchLowest;
984
1310
  }
985
1311
 
986
1312
  /**
987
1313
  * Returns the lowest allowed match index in the prefix.
988
1314
  */
989
- MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
1315
+ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
990
1316
  {
991
1317
  U32 const maxDistance = 1U << windowLog;
992
1318
  U32 const lowestValid = ms->window.dictLimit;
993
- U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
1319
+ U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
994
1320
  U32 const isDictionary = (ms->loadedDictEnd != 0);
1321
+ /* When computing the lowest prefix index we need to take the dictionary into account to handle
1322
+ * the edge case where the dictionary and the source are contiguous in memory.
1323
+ */
995
1324
  U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
996
1325
  return matchLowest;
997
1326
  }
@@ -1045,7 +1374,6 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1045
1374
  * assumptions : magic number supposed already checked
1046
1375
  * and dictSize >= 8 */
1047
1376
  size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1048
- short* offcodeNCount, unsigned* offcodeMaxValue,
1049
1377
  const void* const dict, size_t dictSize);
1050
1378
 
1051
1379
  void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
@@ -1061,7 +1389,7 @@ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1061
1389
  * Note: srcSizeHint == 0 means 0!
1062
1390
  */
1063
1391
  ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1064
- const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
1392
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1065
1393
 
1066
1394
  /*! ZSTD_initCStream_internal() :
1067
1395
  * Private use only. Init streaming operation.
@@ -1122,4 +1450,9 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe
1122
1450
  * condition for correct operation : hashLog > 1 */
1123
1451
  U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1124
1452
 
1453
+ /** ZSTD_CCtx_trace() :
1454
+ * Trace the end of a compression call.
1455
+ */
1456
+ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1457
+
1125
1458
  #endif /* ZSTD_COMPRESS_H */