zstdlib 0.9.0-x64-mingw-ucrt → 0.11.0-x64-mingw-ucrt

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +13 -0
  3. data/ext/zstdlib_c/extconf.rb +3 -3
  4. data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
  5. data/ext/zstdlib_c/zlib-1.2.12/crc32.c +1116 -0
  6. data/ext/zstdlib_c/zlib-1.2.12/crc32.h +9446 -0
  7. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/deflate.c +78 -30
  8. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/deflate.h +12 -15
  9. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzguts.h +3 -2
  10. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzlib.c +5 -3
  11. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzread.c +5 -7
  12. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzwrite.c +25 -13
  13. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/infback.c +2 -1
  14. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inffast.c +14 -14
  15. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inflate.c +39 -8
  16. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inflate.h +3 -2
  17. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inftrees.c +3 -3
  18. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/trees.c +27 -48
  19. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zlib.h +123 -100
  20. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zutil.c +2 -2
  21. data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zutil.h +12 -9
  22. data/ext/zstdlib_c/zstd-1.5.5/lib/common/allocations.h +55 -0
  23. data/ext/zstdlib_c/zstd-1.5.5/lib/common/bits.h +200 -0
  24. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/bitstream.h +19 -60
  25. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/compiler.h +26 -3
  26. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/cpu.h +1 -1
  27. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.c +1 -1
  28. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/debug.h +1 -1
  29. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/entropy_common.c +12 -40
  30. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.c +9 -2
  31. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/error_private.h +1 -1
  32. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse.h +5 -83
  33. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/fse_decompress.c +7 -99
  34. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/huf.h +65 -156
  35. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/mem.h +39 -46
  36. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.c +26 -10
  37. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/pool.h +7 -1
  38. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/portability_macros.h +22 -3
  39. data/ext/zstdlib_c/zstd-1.5.5/lib/common/threading.c +176 -0
  40. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/threading.h +5 -10
  41. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.c +2 -2
  42. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/xxhash.h +8 -8
  43. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_common.c +1 -36
  44. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_deps.h +1 -1
  45. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_internal.h +17 -118
  46. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/common/zstd_trace.h +3 -3
  47. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/clevels.h +1 -1
  48. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/fse_compress.c +7 -124
  49. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.c +1 -1
  50. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/hist.h +1 -1
  51. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/huf_compress.c +234 -169
  52. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress.c +1243 -538
  53. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_internal.h +225 -151
  54. data/ext/zstdlib_c/zstd-1.5.5/lib/compress/zstd_compress_literals.c +235 -0
  55. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_literals.h +16 -8
  56. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.c +3 -3
  57. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_sequences.h +1 -1
  58. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.c +25 -21
  59. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_compress_superblock.h +1 -1
  60. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_cwksp.h +128 -62
  61. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.c +95 -33
  62. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_double_fast.h +3 -2
  63. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.c +433 -148
  64. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_fast.h +3 -2
  65. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.c +398 -345
  66. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_lazy.h +4 -2
  67. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.c +5 -5
  68. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm.h +1 -1
  69. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_ldm_geartab.h +1 -1
  70. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.c +106 -80
  71. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstd_opt.h +1 -1
  72. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.c +17 -9
  73. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/compress/zstdmt_compress.h +1 -1
  74. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress.c +434 -441
  75. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/huf_decompress_amd64.S +30 -39
  76. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.c +4 -4
  77. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_ddict.h +1 -1
  78. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress.c +205 -80
  79. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.c +201 -81
  80. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_block.h +6 -1
  81. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/decompress/zstd_decompress_internal.h +4 -2
  82. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zdict.h +53 -31
  83. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd.h +580 -135
  84. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/lib/zstd_errors.h +27 -8
  85. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzclose.c +1 -1
  86. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzcompatibility.h +8 -8
  87. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzguts.h +10 -10
  88. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzlib.c +3 -3
  89. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzread.c +10 -10
  90. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/gzwrite.c +5 -5
  91. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.c +46 -44
  92. data/ext/zstdlib_c/{zstd-1.5.2 → zstd-1.5.5}/zlibWrapper/zstd_zlibwrapper.h +4 -1
  93. data/lib/3.1/zstdlib_c.so +0 -0
  94. data/lib/3.2/zstdlib_c.so +0 -0
  95. metadata +108 -104
  96. data/ext/zstdlib_c/zlib-1.2.11/crc32.c +0 -442
  97. data/ext/zstdlib_c/zlib-1.2.11/crc32.h +0 -441
  98. data/ext/zstdlib_c/zstd-1.5.2/lib/common/threading.c +0 -122
  99. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_literals.c +0 -159
  100. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/adler32.c +0 -0
  101. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/compress.c +0 -0
  102. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/gzclose.c +0 -0
  103. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inffast.h +0 -0
  104. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inffixed.h +0 -0
  105. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/inftrees.h +0 -0
  106. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/trees.h +0 -0
  107. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/uncompr.c +0 -0
  108. /data/ext/zstdlib_c/{zlib-1.2.11 → zlib-1.2.12}/zconf.h +0 -0
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,9 @@
14
14
  /*-*************************************
15
15
  * Dependencies
16
16
  ***************************************/
17
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
17
18
  #include "../common/zstd_internal.h"
19
+ #include "../common/portability_macros.h"
18
20
 
19
21
  #if defined (__cplusplus)
20
22
  extern "C" {
@@ -44,8 +46,9 @@ extern "C" {
44
46
  ***************************************/
45
47
  typedef enum {
46
48
  ZSTD_cwksp_alloc_objects,
47
- ZSTD_cwksp_alloc_buffers,
48
- ZSTD_cwksp_alloc_aligned
49
+ ZSTD_cwksp_alloc_aligned_init_once,
50
+ ZSTD_cwksp_alloc_aligned,
51
+ ZSTD_cwksp_alloc_buffers
49
52
  } ZSTD_cwksp_alloc_phase_e;
50
53
 
51
54
  /**
@@ -98,8 +101,8 @@ typedef enum {
98
101
  *
99
102
  * Workspace Layout:
100
103
  *
101
- * [ ... workspace ... ]
102
- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
104
+ * [ ... workspace ... ]
105
+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
103
106
  *
104
107
  * The various objects that live in the workspace are divided into the
105
108
  * following categories, and are allocated separately:
@@ -123,9 +126,18 @@ typedef enum {
123
126
  * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
124
127
  * Their sizes depend on the cparams. These tables are 64-byte aligned.
125
128
  *
126
- * - Aligned: these buffers are used for various purposes that require 4 byte
127
- * alignment, but don't require any initialization before they're used. These
128
- * buffers are each aligned to 64 bytes.
129
+ * - Init once: these buffers require to be initialized at least once before
130
+ * use. They should be used when we want to skip memory initialization
131
+ * while not triggering memory checkers (like Valgrind) when reading from
132
+ * from this memory without writing to it first.
133
+ * These buffers should be used carefully as they might contain data
134
+ * from previous compressions.
135
+ * Buffers are aligned to 64 bytes.
136
+ *
137
+ * - Aligned: these buffers don't require any initialization before they're
138
+ * used. The user of the buffer should make sure they write into a buffer
139
+ * location before reading from it.
140
+ * Buffers are aligned to 64 bytes.
129
141
  *
130
142
  * - Buffers: these buffers are used for various purposes that don't require
131
143
  * any alignment or initialization before they're used. This means they can
@@ -137,8 +149,9 @@ typedef enum {
137
149
  * correctly packed into the workspace buffer. That order is:
138
150
  *
139
151
  * 1. Objects
140
- * 2. Buffers
141
- * 3. Aligned/Tables
152
+ * 2. Init once / Tables
153
+ * 3. Aligned / Tables
154
+ * 4. Buffers / Tables
142
155
  *
143
156
  * Attempts to reserve objects of different types out of order will fail.
144
157
  */
@@ -150,6 +163,7 @@ typedef struct {
150
163
  void* tableEnd;
151
164
  void* tableValidEnd;
152
165
  void* allocStart;
166
+ void* initOnceStart;
153
167
 
154
168
  BYTE allocFailed;
155
169
  int workspaceOversizedDuration;
@@ -162,6 +176,7 @@ typedef struct {
162
176
  ***************************************/
163
177
 
164
178
  MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
179
+ MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
165
180
 
166
181
  MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
167
182
  (void)ws;
@@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
171
186
  assert(ws->tableEnd <= ws->allocStart);
172
187
  assert(ws->tableValidEnd <= ws->allocStart);
173
188
  assert(ws->allocStart <= ws->workspaceEnd);
189
+ assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
190
+ assert(ws->workspace <= ws->initOnceStart);
191
+ #if ZSTD_MEMORY_SANITIZER
192
+ {
193
+ intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
194
+ (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
195
+ #if defined(ZSTD_MSAN_PRINT)
196
+ if(offset!=-1) {
197
+ __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
198
+ }
199
+ #endif
200
+ assert(offset==-1);
201
+ };
202
+ #endif
174
203
  }
175
204
 
176
205
  /**
@@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
217
246
  * for internal purposes (currently only alignment).
218
247
  */
219
248
  MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
220
- /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
221
- * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
222
- * to align the beginning of the aligned section.
223
- *
224
- * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
225
- * aligneds being sized in multiples of 64 bytes.
249
+ /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
250
+ * bytes to align the beginning of tables section and end of buffers;
226
251
  */
227
- size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
252
+ size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
228
253
  return slackSpace;
229
254
  }
230
255
 
@@ -237,10 +262,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
237
262
  size_t const alignBytesMask = alignBytes - 1;
238
263
  size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
239
264
  assert((alignBytes & alignBytesMask) == 0);
240
- assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
265
+ assert(bytes < alignBytes);
241
266
  return bytes;
242
267
  }
243
268
 
269
+ /**
270
+ * Returns the initial value for allocStart which is used to determine the position from
271
+ * which we can allocate from the end of the workspace.
272
+ */
273
+ MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
274
+ return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
275
+ }
276
+
244
277
  /**
245
278
  * Internal function. Do not use directly.
246
279
  * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
@@ -281,27 +314,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
281
314
  {
282
315
  assert(phase >= ws->phase);
283
316
  if (phase > ws->phase) {
284
- /* Going from allocating objects to allocating buffers */
285
- if (ws->phase < ZSTD_cwksp_alloc_buffers &&
286
- phase >= ZSTD_cwksp_alloc_buffers) {
317
+ /* Going from allocating objects to allocating initOnce / tables */
318
+ if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
319
+ phase >= ZSTD_cwksp_alloc_aligned_init_once) {
287
320
  ws->tableValidEnd = ws->objectEnd;
288
- }
321
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
289
322
 
290
- /* Going from allocating buffers to allocating aligneds/tables */
291
- if (ws->phase < ZSTD_cwksp_alloc_aligned &&
292
- phase >= ZSTD_cwksp_alloc_aligned) {
293
- { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
294
- size_t const bytesToAlign =
295
- ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
296
- DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
297
- ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
298
- RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
299
- memory_allocation, "aligned phase - alignment initial allocation failed!");
300
- }
301
323
  { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
302
- void* const alloc = ws->objectEnd;
324
+ void *const alloc = ws->objectEnd;
303
325
  size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
304
- void* const objectEnd = (BYTE*)alloc + bytesToAlign;
326
+ void *const objectEnd = (BYTE *) alloc + bytesToAlign;
305
327
  DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
306
328
  RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
307
329
  "table phase - alignment initial allocation failed!");
@@ -309,7 +331,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
309
331
  ws->tableEnd = objectEnd; /* table area starts being empty */
310
332
  if (ws->tableValidEnd < ws->tableEnd) {
311
333
  ws->tableValidEnd = ws->tableEnd;
312
- } } }
334
+ }
335
+ }
336
+ }
313
337
  ws->phase = phase;
314
338
  ZSTD_cwksp_assert_internal_consistency(ws);
315
339
  }
@@ -321,7 +345,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
321
345
  */
322
346
  MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
323
347
  {
324
- return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
348
+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
325
349
  }
326
350
 
327
351
  /**
@@ -348,7 +372,9 @@ ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase
348
372
  if (alloc) {
349
373
  alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
350
374
  if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) {
351
- __asan_unpoison_memory_region(alloc, bytes);
375
+ /* We need to keep the redzone poisoned while unpoisoning the bytes that
376
+ * are actually allocated. */
377
+ __asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE);
352
378
  }
353
379
  }
354
380
  #endif
@@ -364,6 +390,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
364
390
  return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
365
391
  }
366
392
 
393
+ /**
394
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
395
+ * This memory has been initialized at least once in the past.
396
+ * This doesn't mean it has been initialized this time, and it might contain data from previous
397
+ * operations.
398
+ * The main usage is for algorithms that might need read access into uninitialized memory.
399
+ * The algorithm must maintain safety under these conditions and must make sure it doesn't
400
+ * leak any of the past data (directly or in side channels).
401
+ */
402
+ MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
403
+ {
404
+ size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
405
+ void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
406
+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
407
+ if(ptr && ptr < ws->initOnceStart) {
408
+ /* We assume the memory following the current allocation is either:
409
+ * 1. Not usable as initOnce memory (end of workspace)
410
+ * 2. Another initOnce buffer that has been allocated before (and so was previously memset)
411
+ * 3. An ASAN redzone, in which case we don't want to write on it
412
+ * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
413
+ * Note that we assume here that MSAN and ASAN cannot run in the same time. */
414
+ ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
415
+ ws->initOnceStart = ptr;
416
+ }
417
+ #if ZSTD_MEMORY_SANITIZER
418
+ assert(__msan_test_shadow(ptr, bytes) == -1);
419
+ #endif
420
+ return ptr;
421
+ }
422
+
367
423
  /**
368
424
  * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
369
425
  */
@@ -382,13 +438,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
382
438
  */
383
439
  MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
384
440
  {
385
- const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
441
+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
386
442
  void* alloc;
387
443
  void* end;
388
444
  void* top;
389
445
 
390
- if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
391
- return NULL;
446
+ /* We can only start allocating tables after we are done reserving space for objects at the
447
+ * start of the workspace */
448
+ if(ws->phase < phase) {
449
+ if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
450
+ return NULL;
451
+ }
392
452
  }
393
453
  alloc = ws->tableEnd;
394
454
  end = (BYTE *)alloc + bytes;
@@ -467,11 +527,19 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
467
527
  #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
468
528
  /* To validate that the table re-use logic is sound, and that we don't
469
529
  * access table space that we haven't cleaned, we re-"poison" the table
470
- * space every time we mark it dirty. */
530
+ * space every time we mark it dirty.
531
+ * Since tableValidEnd space and initOnce space may overlap we don't poison
532
+ * the initOnce portion as it break its promise. This means that this poisoning
533
+ * check isn't always applied fully. */
471
534
  {
472
535
  size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
473
536
  assert(__msan_test_shadow(ws->objectEnd, size) == -1);
474
- __msan_poison(ws->objectEnd, size);
537
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
538
+ __msan_poison(ws->objectEnd, size);
539
+ } else {
540
+ assert(ws->initOnceStart >= ws->objectEnd);
541
+ __msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
542
+ }
475
543
  }
476
544
  #endif
477
545
 
@@ -499,7 +567,7 @@ MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
499
567
  assert(ws->tableValidEnd >= ws->objectEnd);
500
568
  assert(ws->tableValidEnd <= ws->allocStart);
501
569
  if (ws->tableValidEnd < ws->tableEnd) {
502
- ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
570
+ ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd));
503
571
  }
504
572
  ZSTD_cwksp_mark_tables_clean(ws);
505
573
  }
@@ -536,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
536
604
  #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
537
605
  /* To validate that the context re-use logic is sound, and that we don't
538
606
  * access stuff that this compression hasn't initialized, we re-"poison"
539
- * the workspace (or at least the non-static, non-table parts of it)
540
- * every time we start a new compression. */
607
+ * the workspace except for the areas in which we expect memory re-use
608
+ * without initialization (objects, valid tables area and init once
609
+ * memory). */
541
610
  {
542
- size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
543
- __msan_poison(ws->tableValidEnd, size);
611
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
612
+ size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
613
+ __msan_poison(ws->tableValidEnd, size);
614
+ }
544
615
  }
545
616
  #endif
546
617
 
@@ -556,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
556
627
  #endif
557
628
 
558
629
  ws->tableEnd = ws->objectEnd;
559
- ws->allocStart = ws->workspaceEnd;
630
+ ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
560
631
  ws->allocFailed = 0;
561
- if (ws->phase > ZSTD_cwksp_alloc_buffers) {
562
- ws->phase = ZSTD_cwksp_alloc_buffers;
632
+ if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
633
+ ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
563
634
  }
564
635
  ZSTD_cwksp_assert_internal_consistency(ws);
565
636
  }
@@ -576,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
576
647
  ws->workspaceEnd = (BYTE*)start + size;
577
648
  ws->objectEnd = ws->workspace;
578
649
  ws->tableValidEnd = ws->objectEnd;
650
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
579
651
  ws->phase = ZSTD_cwksp_alloc_objects;
580
652
  ws->isStatic = isStatic;
581
653
  ZSTD_cwksp_clear(ws);
@@ -628,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
628
700
  * Returns if the estimated space needed for a wksp is within an acceptable limit of the
629
701
  * actual amount of space used.
630
702
  */
631
- MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
632
- size_t const estimatedSpace, int resizedWorkspace) {
633
- if (resizedWorkspace) {
634
- /* Resized/newly allocated wksp should have exact bounds */
635
- return ZSTD_cwksp_used(ws) == estimatedSpace;
636
- } else {
637
- /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
638
- * than estimatedSpace. See the comments in zstd_cwksp.h for details.
639
- */
640
- return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
641
- }
703
+ MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
704
+ /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
705
+ * the alignment bytes difference between estimation and actual usage */
706
+ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
707
+ ZSTD_cwksp_used(ws) <= estimatedSpace;
642
708
  }
643
709
 
644
710
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,8 +11,43 @@
11
11
  #include "zstd_compress_internal.h"
12
12
  #include "zstd_double_fast.h"
13
13
 
14
+ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
15
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16
+ {
17
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
18
+ U32* const hashLarge = ms->hashTable;
19
+ U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
20
+ U32 const mls = cParams->minMatch;
21
+ U32* const hashSmall = ms->chainTable;
22
+ U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
23
+ const BYTE* const base = ms->window.base;
24
+ const BYTE* ip = base + ms->nextToUpdate;
25
+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26
+ const U32 fastHashFillStep = 3;
14
27
 
15
- void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
28
+ /* Always insert every fastHashFillStep position into the hash tables.
29
+ * Insert the other positions into the large hash table if their entry
30
+ * is empty.
31
+ */
32
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
33
+ U32 const curr = (U32)(ip - base);
34
+ U32 i;
35
+ for (i = 0; i < fastHashFillStep; ++i) {
36
+ size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
37
+ size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
38
+ if (i == 0) {
39
+ ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
40
+ }
41
+ if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
42
+ ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
43
+ }
44
+ /* Only load extra positions for ZSTD_dtlm_full */
45
+ if (dtlm == ZSTD_dtlm_fast)
46
+ break;
47
+ } }
48
+ }
49
+
50
+ static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
16
51
  void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17
52
  {
18
53
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -43,7 +78,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
43
78
  /* Only load extra positions for ZSTD_dtlm_full */
44
79
  if (dtlm == ZSTD_dtlm_fast)
45
80
  break;
46
- } }
81
+ } }
82
+ }
83
+
84
+ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
85
+ const void* const end,
86
+ ZSTD_dictTableLoadMethod_e dtlm,
87
+ ZSTD_tableFillPurpose_e tfp)
88
+ {
89
+ if (tfp == ZSTD_tfp_forCDict) {
90
+ ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
91
+ } else {
92
+ ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
93
+ }
47
94
  }
48
95
 
49
96
 
@@ -67,7 +114,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
67
114
  const BYTE* const iend = istart + srcSize;
68
115
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
69
116
  U32 offset_1=rep[0], offset_2=rep[1];
70
- U32 offsetSaved = 0;
117
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
71
118
 
72
119
  size_t mLength;
73
120
  U32 offset;
@@ -100,8 +147,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
100
147
  U32 const current = (U32)(ip - base);
101
148
  U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
102
149
  U32 const maxRep = current - windowLow;
103
- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
104
- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
150
+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
151
+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
105
152
  }
106
153
 
107
154
  /* Outer Loop: one iteration per match found and stored */
@@ -131,7 +178,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
131
178
  if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
132
179
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
133
180
  ip++;
134
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
181
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
135
182
  goto _match_stored;
136
183
  }
137
184
 
@@ -175,9 +222,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
175
222
  } while (ip1 <= ilimit);
176
223
 
177
224
  _cleanup:
225
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
226
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
227
+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
228
+
178
229
  /* save reps for next block */
179
- rep[0] = offset_1 ? offset_1 : offsetSaved;
180
- rep[1] = offset_2 ? offset_2 : offsetSaved;
230
+ rep[0] = offset_1 ? offset_1 : offsetSaved1;
231
+ rep[1] = offset_2 ? offset_2 : offsetSaved2;
181
232
 
182
233
  /* Return the last literals size */
183
234
  return (size_t)(iend - anchor);
@@ -217,7 +268,7 @@ _match_found: /* requires ip, offset, mLength */
217
268
  hashLong[hl1] = (U32)(ip1 - base);
218
269
  }
219
270
 
220
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
271
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
221
272
 
222
273
  _match_stored:
223
274
  /* match found */
@@ -243,7 +294,7 @@ _match_stored:
243
294
  U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
244
295
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
245
296
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
246
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
297
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
247
298
  ip += rLength;
248
299
  anchor = ip;
249
300
  continue; /* faster when present ... (?) */
@@ -275,7 +326,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
275
326
  const BYTE* const iend = istart + srcSize;
276
327
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
277
328
  U32 offset_1=rep[0], offset_2=rep[1];
278
- U32 offsetSaved = 0;
279
329
 
280
330
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
281
331
  const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
@@ -286,8 +336,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
286
336
  const BYTE* const dictStart = dictBase + dictStartIndex;
287
337
  const BYTE* const dictEnd = dms->window.nextSrc;
288
338
  const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
289
- const U32 dictHBitsL = dictCParams->hashLog;
290
- const U32 dictHBitsS = dictCParams->chainLog;
339
+ const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
340
+ const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
291
341
  const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
292
342
 
293
343
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
@@ -295,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
295
345
  /* if a dictionary is attached, it must be within window range */
296
346
  assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
297
347
 
348
+ if (ms->prefetchCDictTables) {
349
+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
350
+ size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
351
+ PREFETCH_AREA(dictHashLong, hashTableBytes)
352
+ PREFETCH_AREA(dictHashSmall, chainTableBytes)
353
+ }
354
+
298
355
  /* init */
299
356
  ip += (dictAndPrefixLength == 0);
300
357
 
@@ -309,8 +366,12 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
309
366
  U32 offset;
310
367
  size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
311
368
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
312
- size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
313
- size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
369
+ size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
370
+ size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
371
+ U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
372
+ U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
373
+ int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
374
+ int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
314
375
  U32 const curr = (U32)(ip-base);
315
376
  U32 const matchIndexL = hashLong[h2];
316
377
  U32 matchIndexS = hashSmall[h];
@@ -328,7 +389,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
328
389
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
329
390
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
330
391
  ip++;
331
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
392
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
332
393
  goto _match_stored;
333
394
  }
334
395
 
@@ -340,9 +401,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
340
401
  while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
341
402
  goto _match_found;
342
403
  }
343
- } else {
404
+ } else if (dictTagsMatchL) {
344
405
  /* check dictMatchState long match */
345
- U32 const dictMatchIndexL = dictHashLong[dictHL];
406
+ U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
346
407
  const BYTE* dictMatchL = dictBase + dictMatchIndexL;
347
408
  assert(dictMatchL < dictEnd);
348
409
 
@@ -358,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
358
419
  if (MEM_read32(match) == MEM_read32(ip)) {
359
420
  goto _search_next_long;
360
421
  }
361
- } else {
422
+ } else if (dictTagsMatchS) {
362
423
  /* check dictMatchState short match */
363
- U32 const dictMatchIndexS = dictHashSmall[dictHS];
424
+ U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
364
425
  match = dictBase + dictMatchIndexS;
365
426
  matchIndexS = dictMatchIndexS + dictIndexDelta;
366
427
 
@@ -375,10 +436,11 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
375
436
  continue;
376
437
 
377
438
  _search_next_long:
378
-
379
439
  { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
380
- size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
440
+ size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
381
441
  U32 const matchIndexL3 = hashLong[hl3];
442
+ U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
443
+ int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
382
444
  const BYTE* matchL3 = base + matchIndexL3;
383
445
  hashLong[hl3] = curr + 1;
384
446
 
@@ -391,9 +453,9 @@ _search_next_long:
391
453
  while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
392
454
  goto _match_found;
393
455
  }
394
- } else {
456
+ } else if (dictTagsMatchL3) {
395
457
  /* check dict long +1 match */
396
- U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
458
+ U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
397
459
  const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
398
460
  assert(dictMatchL3 < dictEnd);
399
461
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
@@ -419,7 +481,7 @@ _match_found:
419
481
  offset_2 = offset_1;
420
482
  offset_1 = offset;
421
483
 
422
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
484
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
423
485
 
424
486
  _match_stored:
425
487
  /* match found */
@@ -448,7 +510,7 @@ _match_stored:
448
510
  const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
449
511
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
450
512
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
451
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
513
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
452
514
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
453
515
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
454
516
  ip += repLength2;
@@ -461,8 +523,8 @@ _match_stored:
461
523
  } /* while (ip < ilimit) */
462
524
 
463
525
  /* save reps for next block */
464
- rep[0] = offset_1 ? offset_1 : offsetSaved;
465
- rep[1] = offset_2 ? offset_2 : offsetSaved;
526
+ rep[0] = offset_1;
527
+ rep[1] = offset_2;
466
528
 
467
529
  /* Return the last literals size */
468
530
  return (size_t)(iend - anchor);
@@ -585,7 +647,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
585
647
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
586
648
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
587
649
  ip++;
588
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
650
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
589
651
  } else {
590
652
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
591
653
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -596,7 +658,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
596
658
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
597
659
  offset_2 = offset_1;
598
660
  offset_1 = offset;
599
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
661
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
600
662
 
601
663
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
602
664
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -621,7 +683,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
621
683
  }
622
684
  offset_2 = offset_1;
623
685
  offset_1 = offset;
624
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
686
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
625
687
 
626
688
  } else {
627
689
  ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -653,7 +715,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
653
715
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
654
716
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
655
717
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
656
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
718
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
657
719
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
658
720
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
659
721
  ip += repLength2;