zstd-ruby 1.5.4.1 → 1.5.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -226,8 +226,10 @@ struct ZSTD_matchState_t {
226
226
  U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
227
227
 
228
228
  U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
229
- U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
229
+ BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
230
230
  U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
231
+ U64 hashSalt; /* For row-based matchFinder: salts the hash for re-use of tag table */
232
+ U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */
231
233
 
232
234
  U32* hashTable;
233
235
  U32* hashTable3;
@@ -247,6 +249,13 @@ struct ZSTD_matchState_t {
247
249
  * This behavior is controlled from the cctx ms.
248
250
  * This parameter has no effect in the cdict ms. */
249
251
  int prefetchCDictTables;
252
+
253
+ /* When == 0, lazy match finders insert every position.
254
+ * When != 0, lazy match finders only insert positions they search.
255
+ * This allows them to skip much faster over incompressible data,
256
+ * at a small cost to compression ratio.
257
+ */
258
+ int lazySkipping;
250
259
  };
251
260
 
252
261
  typedef struct {
@@ -787,28 +796,35 @@ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
787
796
  * Hashes
788
797
  ***************************************/
789
798
  static const U32 prime3bytes = 506832829U;
790
- static U32 ZSTD_hash3(U32 u, U32 h) { assert(h <= 32); return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
791
- MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
799
+ static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s) >> (32-h) ; }
800
+ MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
801
+ MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
792
802
 
793
803
  static const U32 prime4bytes = 2654435761U;
794
- static U32 ZSTD_hash4(U32 u, U32 h) { assert(h <= 32); return (u * prime4bytes) >> (32-h) ; }
795
- static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h); }
804
+ static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
805
+ static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
806
+ static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
796
807
 
797
808
  static const U64 prime5bytes = 889523592379ULL;
798
- static size_t ZSTD_hash5(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
799
- static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
809
+ static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
810
+ static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
811
+ static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
800
812
 
801
813
  static const U64 prime6bytes = 227718039650203ULL;
802
- static size_t ZSTD_hash6(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
803
- static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
814
+ static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
815
+ static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
816
+ static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
804
817
 
805
818
  static const U64 prime7bytes = 58295818150454627ULL;
806
- static size_t ZSTD_hash7(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
807
- static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
819
+ static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
820
+ static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
821
+ static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
808
822
 
809
823
  static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
810
- static size_t ZSTD_hash8(U64 u, U32 h) { assert(h <= 64); return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
811
- static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
824
+ static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; }
825
+ static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
826
+ static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
827
+
812
828
 
813
829
  MEM_STATIC FORCE_INLINE_ATTR
814
830
  size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
@@ -828,6 +844,24 @@ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
828
844
  }
829
845
  }
830
846
 
847
+ MEM_STATIC FORCE_INLINE_ATTR
848
+ size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
849
+ /* Although some of these hashes do support hBits up to 64, some do not.
850
+ * To be on the safe side, always avoid hBits > 32. */
851
+ assert(hBits <= 32);
852
+
853
+ switch(mls)
854
+ {
855
+ default:
856
+ case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
857
+ case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
858
+ case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
859
+ case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
860
+ case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
861
+ }
862
+ }
863
+
864
+
831
865
  /** ZSTD_ipow() :
832
866
  * Return base^exponent.
833
867
  */
@@ -1475,4 +1509,24 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
1475
1509
  const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
1476
1510
  const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
1477
1511
 
1512
+
1513
+ /* ===============================================================
1514
+ * Deprecated definitions that are still used internally to avoid
1515
+ * deprecation warnings. These functions are exactly equivalent to
1516
+ * their public variants, but avoid the deprecation warnings.
1517
+ * =============================================================== */
1518
+
1519
+ size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1520
+
1521
+ size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
1522
+ void* dst, size_t dstCapacity,
1523
+ const void* src, size_t srcSize);
1524
+
1525
+ size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
1526
+ void* dst, size_t dstCapacity,
1527
+ const void* src, size_t srcSize);
1528
+
1529
+ size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1530
+
1531
+
1478
1532
  #endif /* ZSTD_COMPRESS_H */
@@ -14,7 +14,9 @@
14
14
  /*-*************************************
15
15
  * Dependencies
16
16
  ***************************************/
17
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
17
18
  #include "../common/zstd_internal.h"
19
+ #include "../common/portability_macros.h"
18
20
 
19
21
  #if defined (__cplusplus)
20
22
  extern "C" {
@@ -44,8 +46,9 @@ extern "C" {
44
46
  ***************************************/
45
47
  typedef enum {
46
48
  ZSTD_cwksp_alloc_objects,
47
- ZSTD_cwksp_alloc_buffers,
48
- ZSTD_cwksp_alloc_aligned
49
+ ZSTD_cwksp_alloc_aligned_init_once,
50
+ ZSTD_cwksp_alloc_aligned,
51
+ ZSTD_cwksp_alloc_buffers
49
52
  } ZSTD_cwksp_alloc_phase_e;
50
53
 
51
54
  /**
@@ -98,8 +101,8 @@ typedef enum {
98
101
  *
99
102
  * Workspace Layout:
100
103
  *
101
- * [ ... workspace ... ]
102
- * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
104
+ * [ ... workspace ... ]
105
+ * [objects][tables ->] free space [<- buffers][<- aligned][<- init once]
103
106
  *
104
107
  * The various objects that live in the workspace are divided into the
105
108
  * following categories, and are allocated separately:
@@ -123,9 +126,18 @@ typedef enum {
123
126
  * uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
124
127
  * Their sizes depend on the cparams. These tables are 64-byte aligned.
125
128
  *
126
- * - Aligned: these buffers are used for various purposes that require 4 byte
127
- * alignment, but don't require any initialization before they're used. These
128
- * buffers are each aligned to 64 bytes.
129
+ * - Init once: these buffers require to be initialized at least once before
130
+ * use. They should be used when we want to skip memory initialization
131
+ * while not triggering memory checkers (like Valgrind) when reading from
132
+ * from this memory without writing to it first.
133
+ * These buffers should be used carefully as they might contain data
134
+ * from previous compressions.
135
+ * Buffers are aligned to 64 bytes.
136
+ *
137
+ * - Aligned: these buffers don't require any initialization before they're
138
+ * used. The user of the buffer should make sure they write into a buffer
139
+ * location before reading from it.
140
+ * Buffers are aligned to 64 bytes.
129
141
  *
130
142
  * - Buffers: these buffers are used for various purposes that don't require
131
143
  * any alignment or initialization before they're used. This means they can
@@ -137,8 +149,9 @@ typedef enum {
137
149
  * correctly packed into the workspace buffer. That order is:
138
150
  *
139
151
  * 1. Objects
140
- * 2. Buffers
141
- * 3. Aligned/Tables
152
+ * 2. Init once / Tables
153
+ * 3. Aligned / Tables
154
+ * 4. Buffers / Tables
142
155
  *
143
156
  * Attempts to reserve objects of different types out of order will fail.
144
157
  */
@@ -150,6 +163,7 @@ typedef struct {
150
163
  void* tableEnd;
151
164
  void* tableValidEnd;
152
165
  void* allocStart;
166
+ void* initOnceStart;
153
167
 
154
168
  BYTE allocFailed;
155
169
  int workspaceOversizedDuration;
@@ -162,6 +176,7 @@ typedef struct {
162
176
  ***************************************/
163
177
 
164
178
  MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
179
+ MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws);
165
180
 
166
181
  MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
167
182
  (void)ws;
@@ -171,6 +186,20 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
171
186
  assert(ws->tableEnd <= ws->allocStart);
172
187
  assert(ws->tableValidEnd <= ws->allocStart);
173
188
  assert(ws->allocStart <= ws->workspaceEnd);
189
+ assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws));
190
+ assert(ws->workspace <= ws->initOnceStart);
191
+ #if ZSTD_MEMORY_SANITIZER
192
+ {
193
+ intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
194
+ (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
195
+ #if defined(ZSTD_MSAN_PRINT)
196
+ if(offset!=-1) {
197
+ __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
198
+ }
199
+ #endif
200
+ assert(offset==-1);
201
+ };
202
+ #endif
174
203
  }
175
204
 
176
205
  /**
@@ -217,14 +246,10 @@ MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
217
246
  * for internal purposes (currently only alignment).
218
247
  */
219
248
  MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
220
- /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
221
- * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
222
- * to align the beginning of the aligned section.
223
- *
224
- * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
225
- * aligneds being sized in multiples of 64 bytes.
249
+ /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES
250
+ * bytes to align the beginning of tables section and end of buffers;
226
251
  */
227
- size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
252
+ size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2;
228
253
  return slackSpace;
229
254
  }
230
255
 
@@ -237,10 +262,18 @@ MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignByt
237
262
  size_t const alignBytesMask = alignBytes - 1;
238
263
  size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
239
264
  assert((alignBytes & alignBytesMask) == 0);
240
- assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
265
+ assert(bytes < alignBytes);
241
266
  return bytes;
242
267
  }
243
268
 
269
+ /**
270
+ * Returns the initial value for allocStart which is used to determine the position from
271
+ * which we can allocate from the end of the workspace.
272
+ */
273
+ MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) {
274
+ return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1));
275
+ }
276
+
244
277
  /**
245
278
  * Internal function. Do not use directly.
246
279
  * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
@@ -281,27 +314,16 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
281
314
  {
282
315
  assert(phase >= ws->phase);
283
316
  if (phase > ws->phase) {
284
- /* Going from allocating objects to allocating buffers */
285
- if (ws->phase < ZSTD_cwksp_alloc_buffers &&
286
- phase >= ZSTD_cwksp_alloc_buffers) {
317
+ /* Going from allocating objects to allocating initOnce / tables */
318
+ if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once &&
319
+ phase >= ZSTD_cwksp_alloc_aligned_init_once) {
287
320
  ws->tableValidEnd = ws->objectEnd;
288
- }
321
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
289
322
 
290
- /* Going from allocating buffers to allocating aligneds/tables */
291
- if (ws->phase < ZSTD_cwksp_alloc_aligned &&
292
- phase >= ZSTD_cwksp_alloc_aligned) {
293
- { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
294
- size_t const bytesToAlign =
295
- ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
296
- DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
297
- ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
298
- RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
299
- memory_allocation, "aligned phase - alignment initial allocation failed!");
300
- }
301
323
  { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
302
- void* const alloc = ws->objectEnd;
324
+ void *const alloc = ws->objectEnd;
303
325
  size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
304
- void* const objectEnd = (BYTE*)alloc + bytesToAlign;
326
+ void *const objectEnd = (BYTE *) alloc + bytesToAlign;
305
327
  DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
306
328
  RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
307
329
  "table phase - alignment initial allocation failed!");
@@ -309,7 +331,9 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
309
331
  ws->tableEnd = objectEnd; /* table area starts being empty */
310
332
  if (ws->tableValidEnd < ws->tableEnd) {
311
333
  ws->tableValidEnd = ws->tableEnd;
312
- } } }
334
+ }
335
+ }
336
+ }
313
337
  ws->phase = phase;
314
338
  ZSTD_cwksp_assert_internal_consistency(ws);
315
339
  }
@@ -321,7 +345,7 @@ ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase
321
345
  */
322
346
  MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
323
347
  {
324
- return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
348
+ return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd);
325
349
  }
326
350
 
327
351
  /**
@@ -366,6 +390,36 @@ MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
366
390
  return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
367
391
  }
368
392
 
393
+ /**
394
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
395
+ * This memory has been initialized at least once in the past.
396
+ * This doesn't mean it has been initialized this time, and it might contain data from previous
397
+ * operations.
398
+ * The main usage is for algorithms that might need read access into uninitialized memory.
399
+ * The algorithm must maintain safety under these conditions and must make sure it doesn't
400
+ * leak any of the past data (directly or in side channels).
401
+ */
402
+ MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes)
403
+ {
404
+ size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES);
405
+ void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once);
406
+ assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
407
+ if(ptr && ptr < ws->initOnceStart) {
408
+ /* We assume the memory following the current allocation is either:
409
+ * 1. Not usable as initOnce memory (end of workspace)
410
+ * 2. Another initOnce buffer that has been allocated before (and so was previously memset)
411
+ * 3. An ASAN redzone, in which case we don't want to write on it
412
+ * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart.
413
+ * Note that we assume here that MSAN and ASAN cannot run in the same time. */
414
+ ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes));
415
+ ws->initOnceStart = ptr;
416
+ }
417
+ #if ZSTD_MEMORY_SANITIZER
418
+ assert(__msan_test_shadow(ptr, bytes) == -1);
419
+ #endif
420
+ return ptr;
421
+ }
422
+
369
423
  /**
370
424
  * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
371
425
  */
@@ -384,13 +438,17 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
384
438
  */
385
439
  MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
386
440
  {
387
- const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
441
+ const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once;
388
442
  void* alloc;
389
443
  void* end;
390
444
  void* top;
391
445
 
392
- if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
393
- return NULL;
446
+ /* We can only start allocating tables after we are done reserving space for objects at the
447
+ * start of the workspace */
448
+ if(ws->phase < phase) {
449
+ if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
450
+ return NULL;
451
+ }
394
452
  }
395
453
  alloc = ws->tableEnd;
396
454
  end = (BYTE *)alloc + bytes;
@@ -469,11 +527,19 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
469
527
  #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
470
528
  /* To validate that the table re-use logic is sound, and that we don't
471
529
  * access table space that we haven't cleaned, we re-"poison" the table
472
- * space every time we mark it dirty. */
530
+ * space every time we mark it dirty.
531
+ * Since tableValidEnd space and initOnce space may overlap we don't poison
532
+ * the initOnce portion as it break its promise. This means that this poisoning
533
+ * check isn't always applied fully. */
473
534
  {
474
535
  size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
475
536
  assert(__msan_test_shadow(ws->objectEnd, size) == -1);
476
- __msan_poison(ws->objectEnd, size);
537
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
538
+ __msan_poison(ws->objectEnd, size);
539
+ } else {
540
+ assert(ws->initOnceStart >= ws->objectEnd);
541
+ __msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd);
542
+ }
477
543
  }
478
544
  #endif
479
545
 
@@ -538,11 +604,14 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
538
604
  #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
539
605
  /* To validate that the context re-use logic is sound, and that we don't
540
606
  * access stuff that this compression hasn't initialized, we re-"poison"
541
- * the workspace (or at least the non-static, non-table parts of it)
542
- * every time we start a new compression. */
607
+ * the workspace except for the areas in which we expect memory re-use
608
+ * without initialization (objects, valid tables area and init once
609
+ * memory). */
543
610
  {
544
- size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
545
- __msan_poison(ws->tableValidEnd, size);
611
+ if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) {
612
+ size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd;
613
+ __msan_poison(ws->tableValidEnd, size);
614
+ }
546
615
  }
547
616
  #endif
548
617
 
@@ -558,10 +627,10 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
558
627
  #endif
559
628
 
560
629
  ws->tableEnd = ws->objectEnd;
561
- ws->allocStart = ws->workspaceEnd;
630
+ ws->allocStart = ZSTD_cwksp_initialAllocStart(ws);
562
631
  ws->allocFailed = 0;
563
- if (ws->phase > ZSTD_cwksp_alloc_buffers) {
564
- ws->phase = ZSTD_cwksp_alloc_buffers;
632
+ if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) {
633
+ ws->phase = ZSTD_cwksp_alloc_aligned_init_once;
565
634
  }
566
635
  ZSTD_cwksp_assert_internal_consistency(ws);
567
636
  }
@@ -578,6 +647,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_c
578
647
  ws->workspaceEnd = (BYTE*)start + size;
579
648
  ws->objectEnd = ws->workspace;
580
649
  ws->tableValidEnd = ws->objectEnd;
650
+ ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws);
581
651
  ws->phase = ZSTD_cwksp_alloc_objects;
582
652
  ws->isStatic = isStatic;
583
653
  ZSTD_cwksp_clear(ws);
@@ -630,17 +700,11 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
630
700
  * Returns if the estimated space needed for a wksp is within an acceptable limit of the
631
701
  * actual amount of space used.
632
702
  */
633
- MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
634
- size_t const estimatedSpace, int resizedWorkspace) {
635
- if (resizedWorkspace) {
636
- /* Resized/newly allocated wksp should have exact bounds */
637
- return ZSTD_cwksp_used(ws) == estimatedSpace;
638
- } else {
639
- /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
640
- * than estimatedSpace. See the comments in zstd_cwksp.h for details.
641
- */
642
- return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
643
- }
703
+ MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) {
704
+ /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice
705
+ * the alignment bytes difference between estimation and actual usage */
706
+ return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) &&
707
+ ZSTD_cwksp_used(ws) <= estimatedSpace;
644
708
  }
645
709
 
646
710