extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,16 +15,13 @@
15
15
  #endif
16
16
 
17
17
 
18
- /* ====== Constants ====== */
19
- #define ZSTDMT_OVERLAPLOG_DEFAULT 0
20
-
21
-
22
18
  /* ====== Dependencies ====== */
19
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
23
20
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
24
21
  #include "../common/mem.h" /* MEM_STATIC */
25
22
  #include "../common/pool.h" /* threadpool */
26
23
  #include "../common/threading.h" /* mutex */
27
- #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
24
+ #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
28
25
  #include "zstd_ldm.h"
29
26
  #include "zstdmt_compress.h"
30
27
 
@@ -43,12 +40,13 @@
43
40
  # include <unistd.h>
44
41
  # include <sys/times.h>
45
42
 
46
- # define DEBUG_PRINTHEX(l,p,n) { \
47
- unsigned debug_u; \
48
- for (debug_u=0; debug_u<(n); debug_u++) \
49
- RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
50
- RAWLOG(l, " \n"); \
51
- }
43
+ # define DEBUG_PRINTHEX(l,p,n) \
44
+ do { \
45
+ unsigned debug_u; \
46
+ for (debug_u=0; debug_u<(n); debug_u++) \
47
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
48
+ RAWLOG(l, " \n"); \
49
+ } while (0)
52
50
 
53
51
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
54
52
  {
@@ -60,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
60
58
  } }
61
59
 
62
60
  #define MUTEX_WAIT_TIME_DLEVEL 6
63
- #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
64
- if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
65
- unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
66
- ZSTD_pthread_mutex_lock(mutex); \
67
- { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
68
- unsigned long long const elapsedTime = (afterTime-beforeTime); \
69
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
70
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
71
- elapsedTime, #mutex); \
72
- } } \
73
- } else { \
74
- ZSTD_pthread_mutex_lock(mutex); \
75
- } \
76
- }
61
+ #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
62
+ do { \
63
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
64
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
65
+ ZSTD_pthread_mutex_lock(mutex); \
66
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
67
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
68
+ if (elapsedTime > 1000) { \
69
+ /* or whatever threshold you like; I'm using 1 millisecond here */ \
70
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
71
+ "Thread took %llu microseconds to acquire mutex %s \n", \
72
+ elapsedTime, #mutex); \
73
+ } } \
74
+ } else { \
75
+ ZSTD_pthread_mutex_lock(mutex); \
76
+ } \
77
+ } while (0)
77
78
 
78
79
  #else
79
80
 
80
81
  # define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
81
- # define DEBUG_PRINTHEX(l,p,n) {}
82
+ # define DEBUG_PRINTHEX(l,p,n) do { } while (0)
82
83
 
83
84
  #endif
84
85
 
@@ -99,19 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
99
100
  unsigned totalBuffers;
100
101
  unsigned nbBuffers;
101
102
  ZSTD_customMem cMem;
102
- buffer_t bTable[1]; /* variable size */
103
+ buffer_t* buffers;
103
104
  } ZSTDMT_bufferPool;
104
105
 
105
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
106
+ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
107
+ {
108
+ DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
109
+ if (!bufPool) return; /* compatibility with free on NULL */
110
+ if (bufPool->buffers) {
111
+ unsigned u;
112
+ for (u=0; u<bufPool->totalBuffers; u++) {
113
+ DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
114
+ ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
115
+ }
116
+ ZSTD_customFree(bufPool->buffers, bufPool->cMem);
117
+ }
118
+ ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
119
+ ZSTD_customFree(bufPool, bufPool->cMem);
120
+ }
121
+
122
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
106
123
  {
107
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
108
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
109
- sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
124
+ ZSTDMT_bufferPool* const bufPool =
125
+ (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
110
126
  if (bufPool==NULL) return NULL;
111
127
  if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
112
128
  ZSTD_customFree(bufPool, cMem);
113
129
  return NULL;
114
130
  }
131
+ bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
132
+ if (bufPool->buffers==NULL) {
133
+ ZSTDMT_freeBufferPool(bufPool);
134
+ return NULL;
135
+ }
115
136
  bufPool->bufferSize = 64 KB;
116
137
  bufPool->totalBuffers = maxNbBuffers;
117
138
  bufPool->nbBuffers = 0;
@@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_custo
119
140
  return bufPool;
120
141
  }
121
142
 
122
- static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
123
- {
124
- unsigned u;
125
- DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
126
- if (!bufPool) return; /* compatibility with free on NULL */
127
- for (u=0; u<bufPool->totalBuffers; u++) {
128
- DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
129
- ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
130
- }
131
- ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
132
- ZSTD_customFree(bufPool, bufPool->cMem);
133
- }
134
-
135
143
  /* only works at initialization, not during compression */
136
144
  static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
137
145
  {
138
- size_t const poolSize = sizeof(*bufPool)
139
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
146
+ size_t const poolSize = sizeof(*bufPool);
147
+ size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
140
148
  unsigned u;
141
149
  size_t totalBufferSize = 0;
142
150
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
143
151
  for (u=0; u<bufPool->totalBuffers; u++)
144
- totalBufferSize += bufPool->bTable[u].capacity;
152
+ totalBufferSize += bufPool->buffers[u].capacity;
145
153
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
146
154
 
147
- return poolSize + totalBufferSize;
155
+ return poolSize + arraySize + totalBufferSize;
148
156
  }
149
157
 
150
158
  /* ZSTDMT_setBufferSize() :
@@ -160,9 +168,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
160
168
  }
161
169
 
162
170
 
163
- static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
171
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
164
172
  {
165
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
166
173
  if (srcBufPool==NULL) return NULL;
167
174
  if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
168
175
  return srcBufPool;
@@ -171,7 +178,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
171
178
  size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
172
179
  ZSTDMT_bufferPool* newBufPool;
173
180
  ZSTDMT_freeBufferPool(srcBufPool);
174
- newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
181
+ newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
175
182
  if (newBufPool==NULL) return newBufPool;
176
183
  ZSTDMT_setBufferSize(newBufPool, bSize);
177
184
  return newBufPool;
@@ -188,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
188
195
  DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
189
196
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
190
197
  if (bufPool->nbBuffers) { /* try to use an existing buffer */
191
- buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
198
+ buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
192
199
  size_t const availBufferSize = buf.capacity;
193
- bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
200
+ bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
194
201
  if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
195
202
  /* large enough, but not too much */
196
203
  DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@@ -251,18 +258,28 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
251
258
  if (buf.start == NULL) return; /* compatible with release on NULL */
252
259
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
253
260
  if (bufPool->nbBuffers < bufPool->totalBuffers) {
254
- bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
261
+ bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
255
262
  DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
256
263
  (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
257
264
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
258
265
  return;
259
266
  }
260
267
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
261
- /* Reached bufferPool capacity (should not happen) */
268
+ /* Reached bufferPool capacity (note: should not happen) */
262
269
  DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
263
270
  ZSTD_customFree(buf.start, bufPool->cMem);
264
271
  }
265
272
 
273
+ /* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
274
+ * The 3 additional buffers are as follows:
275
+ * 1 buffer for input loading
276
+ * 1 buffer for "next input" when submitting current one
277
+ * 1 buffer stuck in queue */
278
+ #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
279
+
280
+ /* After a worker releases its rawSeqStore, it is immediately ready for reuse.
281
+ * So we only need one seq buffer per worker. */
282
+ #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
266
283
 
267
284
  /* ===== Seq Pool Wrapper ====== */
268
285
 
@@ -316,7 +333,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
316
333
 
317
334
  static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
318
335
  {
319
- ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
336
+ ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
320
337
  if (seqPool == NULL) return NULL;
321
338
  ZSTDMT_setNbSeq(seqPool, 0);
322
339
  return seqPool;
@@ -329,7 +346,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
329
346
 
330
347
  static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
331
348
  {
332
- return ZSTDMT_expandBufferPool(pool, nbWorkers);
349
+ return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
333
350
  }
334
351
 
335
352
 
@@ -341,16 +358,20 @@ typedef struct {
341
358
  int totalCCtx;
342
359
  int availCCtx;
343
360
  ZSTD_customMem cMem;
344
- ZSTD_CCtx* cctx[1]; /* variable size */
361
+ ZSTD_CCtx** cctxs;
345
362
  } ZSTDMT_CCtxPool;
346
363
 
347
- /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
364
+ /* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
348
365
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
349
366
  {
350
- int cid;
351
- for (cid=0; cid<pool->totalCCtx; cid++)
352
- ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
367
+ if (!pool) return;
353
368
  ZSTD_pthread_mutex_destroy(&pool->poolMutex);
369
+ if (pool->cctxs) {
370
+ int cid;
371
+ for (cid=0; cid<pool->totalCCtx; cid++)
372
+ ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
373
+ ZSTD_customFree(pool->cctxs, pool->cMem);
374
+ }
354
375
  ZSTD_customFree(pool, pool->cMem);
355
376
  }
356
377
 
@@ -359,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
359
380
  static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
360
381
  ZSTD_customMem cMem)
361
382
  {
362
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
363
- sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
383
+ ZSTDMT_CCtxPool* const cctxPool =
384
+ (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
364
385
  assert(nbWorkers > 0);
365
386
  if (!cctxPool) return NULL;
366
387
  if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
367
388
  ZSTD_customFree(cctxPool, cMem);
368
389
  return NULL;
369
390
  }
370
- cctxPool->cMem = cMem;
371
391
  cctxPool->totalCCtx = nbWorkers;
392
+ cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
393
+ if (!cctxPool->cctxs) {
394
+ ZSTDMT_freeCCtxPool(cctxPool);
395
+ return NULL;
396
+ }
397
+ cctxPool->cMem = cMem;
398
+ cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
399
+ if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
372
400
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
373
- cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
374
- if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
375
401
  DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
376
402
  return cctxPool;
377
403
  }
@@ -393,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
393
419
  {
394
420
  ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
395
421
  { unsigned const nbWorkers = cctxPool->totalCCtx;
396
- size_t const poolSize = sizeof(*cctxPool)
397
- + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
398
- unsigned u;
422
+ size_t const poolSize = sizeof(*cctxPool);
423
+ size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
399
424
  size_t totalCCtxSize = 0;
425
+ unsigned u;
400
426
  for (u=0; u<nbWorkers; u++) {
401
- totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
427
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
402
428
  }
403
429
  ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
404
430
  assert(nbWorkers > 0);
405
- return poolSize + totalCCtxSize;
431
+ return poolSize + arraySize + totalCCtxSize;
406
432
  }
407
433
  }
408
434
 
@@ -412,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
412
438
  ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
413
439
  if (cctxPool->availCCtx) {
414
440
  cctxPool->availCCtx--;
415
- { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
441
+ { ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
416
442
  ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
417
443
  return cctx;
418
444
  } }
@@ -426,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
426
452
  if (cctx==NULL) return; /* compatibility with release on NULL */
427
453
  ZSTD_pthread_mutex_lock(&pool->poolMutex);
428
454
  if (pool->availCCtx < pool->totalCCtx)
429
- pool->cctx[pool->availCCtx++] = cctx;
455
+ pool->cctxs[pool->availCCtx++] = cctx;
430
456
  else {
431
457
  /* pool overflow : should not happen, since totalCCtx==nbWorkers */
432
458
  DEBUGLOG(4, "CCtx pool overflow : free cctx");
@@ -467,29 +493,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
467
493
  ZSTD_dictContentType_e dictContentType)
468
494
  {
469
495
  /* Adjust parameters */
470
- if (params.ldmParams.enableLdm) {
496
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
471
497
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
472
498
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
473
499
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
474
500
  assert(params.ldmParams.hashRateLog < 32);
475
- serialState->ldmState.hashPower =
476
- ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
477
501
  } else {
478
502
  ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
479
503
  }
480
504
  serialState->nextJobID = 0;
481
505
  if (params.fParams.checksumFlag)
482
506
  XXH64_reset(&serialState->xxhState, 0);
483
- if (params.ldmParams.enableLdm) {
507
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
484
508
  ZSTD_customMem cMem = params.customMem;
485
509
  unsigned const hashLog = params.ldmParams.hashLog;
486
510
  size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
487
511
  unsigned const bucketLog =
488
512
  params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
489
- size_t const bucketSize = (size_t)1 << bucketLog;
490
513
  unsigned const prevBucketLog =
491
514
  serialState->params.ldmParams.hashLog -
492
515
  serialState->params.ldmParams.bucketSizeLog;
516
+ size_t const numBuckets = (size_t)1 << bucketLog;
493
517
  /* Size the seq pool tables */
494
518
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
495
519
  /* Reset the window */
@@ -501,20 +525,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
501
525
  }
502
526
  if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
503
527
  ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
504
- serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
528
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
505
529
  }
506
530
  if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
507
531
  return 1;
508
532
  /* Zero the tables */
509
533
  ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
510
- ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
534
+ ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
511
535
 
512
536
  /* Update window state and fill hash table with dict */
513
537
  serialState->ldmState.loadedDictEnd = 0;
514
538
  if (dictSize > 0) {
515
539
  if (dictContentType == ZSTD_dct_rawContent) {
516
540
  BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
517
- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
541
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
518
542
  ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
519
543
  serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
520
544
  } else {
@@ -566,12 +590,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
566
590
  /* A future job may error and skip our job */
567
591
  if (serialState->nextJobID == jobID) {
568
592
  /* It is now our turn, do any processing necessary */
569
- if (serialState->params.ldmParams.enableLdm) {
593
+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
570
594
  size_t error;
571
595
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
572
596
  seqStore.size == 0 && seqStore.capacity > 0);
573
597
  assert(src.size <= serialState->params.jobSize);
574
- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
598
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
575
599
  error = ZSTD_ldm_generateSequences(
576
600
  &serialState->ldmState, &seqStore,
577
601
  &serialState->params.ldmParams, src.start, src.size);
@@ -594,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
594
618
  ZSTD_pthread_mutex_unlock(&serialState->mutex);
595
619
 
596
620
  if (seqStore.size > 0) {
597
- size_t const err = ZSTD_referenceExternalSequences(
598
- jobCCtx, seqStore.seq, seqStore.size);
599
- assert(serialState->params.ldmParams.enableLdm);
600
- assert(!ZSTD_isError(err));
601
- (void)err;
621
+ ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
622
+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
602
623
  }
603
624
  }
604
625
 
@@ -650,12 +671,13 @@ typedef struct {
650
671
  unsigned frameChecksumNeeded; /* used only by mtctx */
651
672
  } ZSTDMT_jobDescription;
652
673
 
653
- #define JOB_ERROR(e) { \
654
- ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
655
- job->cSize = e; \
656
- ZSTD_pthread_mutex_unlock(&job->job_mutex); \
657
- goto _endJob; \
658
- }
674
+ #define JOB_ERROR(e) \
675
+ do { \
676
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
677
+ job->cSize = e; \
678
+ ZSTD_pthread_mutex_unlock(&job->job_mutex); \
679
+ goto _endJob; \
680
+ } while (0)
659
681
 
660
682
  /* ZSTDMT_compressionJob() is a POOL_function type */
661
683
  static void ZSTDMT_compressionJob(void* jobDescription)
@@ -674,7 +696,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
674
696
  if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
675
697
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
676
698
  }
677
- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
699
+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
678
700
  JOB_ERROR(ERROR(memory_allocation));
679
701
 
680
702
  /* Don't compute the checksum for chunks, since we compute it externally,
@@ -682,7 +704,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
682
704
  */
683
705
  if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
684
706
  /* Don't run LDM for the chunks, since we handle it externally */
685
- jobParams.ldmParams.enableLdm = 0;
707
+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
708
+ /* Correct nbWorkers to 0. */
709
+ jobParams.nbWorkers = 0;
686
710
 
687
711
 
688
712
  /* init */
@@ -695,6 +719,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
695
719
  { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
696
720
  if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
697
721
  }
722
+ if (!job->firstJob) {
723
+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
724
+ if (ZSTD_isError(err)) JOB_ERROR(err);
725
+ }
698
726
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
699
727
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
700
728
  ZSTD_dtlm_fast,
@@ -707,7 +735,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
707
735
  ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
708
736
 
709
737
  if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
710
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
738
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
711
739
  if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
712
740
  DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
713
741
  ZSTD_invalidateRepCodes(cctx);
@@ -725,7 +753,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
725
753
  DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
726
754
  assert(job->cSize == 0);
727
755
  for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
728
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
756
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
729
757
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
730
758
  ip += chunkSize;
731
759
  op += cSize; assert(op < oend);
@@ -745,11 +773,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
745
773
  size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
746
774
  size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
747
775
  size_t const cSize = (job->lastJob) ?
748
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
749
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
776
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
777
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
750
778
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
751
779
  lastCBlockSize = cSize;
752
780
  } }
781
+ if (!job->firstJob) {
782
+ /* Double check that we don't have an ext-dict, because then our
783
+ * repcode invalidation doesn't work.
784
+ */
785
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
786
+ }
787
+ ZSTD_CCtx_trace(cctx, 0);
753
788
 
754
789
  _endJob:
755
790
  ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@@ -796,6 +831,15 @@ typedef struct {
796
831
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
797
832
 
798
833
  #define RSYNC_LENGTH 32
834
+ /* Don't create chunks smaller than the zstd block size.
835
+ * This stops us from regressing compression ratio too much,
836
+ * and ensures our output fits in ZSTD_compressBound().
837
+ *
838
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
839
+ * ZSTD_COMPRESSBOUND() will need to be updated.
840
+ */
841
+ #define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
842
+ #define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
799
843
 
800
844
  typedef struct {
801
845
  U64 hash;
@@ -916,7 +960,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
916
960
  mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
917
961
  assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
918
962
  mtctx->jobIDMask = nbJobs - 1;
919
- mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
963
+ mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
920
964
  mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
921
965
  mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
922
966
  initError = ZSTDMT_serialState_init(&mtctx->serial);
@@ -1019,7 +1063,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1019
1063
  {
1020
1064
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1021
1065
  FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1022
- mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1066
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
1023
1067
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1024
1068
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1025
1069
  if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
@@ -1062,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
1062
1106
  { unsigned jobNb;
1063
1107
  unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
1064
1108
  DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1065
- mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1109
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady);
1066
1110
  for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1067
1111
  unsigned const wJobID = jobNb & mtctx->jobIDMask;
1068
1112
  ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
@@ -1124,7 +1168,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1124
1168
  static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1125
1169
  {
1126
1170
  unsigned jobLog;
1127
- if (params->ldmParams.enableLdm) {
1171
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1128
1172
  /* In Long Range Mode, the windowLog is typically oversized.
1129
1173
  * In which case, it's preferable to determine the jobSize
1130
1174
  * based on cycleLog instead. */
@@ -1168,7 +1212,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1168
1212
  int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1169
1213
  int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1170
1214
  assert(0 <= overlapRLog && overlapRLog <= 8);
1171
- if (params->ldmParams.enableLdm) {
1215
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1172
1216
  /* In Long Range Mode, the windowLog is typically oversized.
1173
1217
  * In which case, it's preferable to determine the jobSize
1174
1218
  * based on chainLog instead.
@@ -1239,9 +1283,11 @@ size_t ZSTDMT_initCStream_internal(
1239
1283
 
1240
1284
  if (params.rsyncable) {
1241
1285
  /* Aim for the targetsectionSize as the average job size. */
1242
- U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1243
- U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1244
- assert(jobSizeMB >= 1);
1286
+ U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
1287
+ U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
1288
+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
1289
+ * expected job size is at least 4x larger. */
1290
+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
1245
1291
  DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1246
1292
  mtctx->rsync.hash = 0;
1247
1293
  mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1253,7 +1299,7 @@ size_t ZSTDMT_initCStream_internal(
1253
1299
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1254
1300
  {
1255
1301
  /* If ldm is enabled we need windowSize space. */
1256
- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
1302
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
1257
1303
  /* Two buffers of slack, plus extra space for the overlap
1258
1304
  * This is the minimum slack that LDM works with. One extra because
1259
1305
  * flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1528,17 +1574,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1528
1574
  static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1529
1575
  {
1530
1576
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1531
- BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1532
1577
  BYTE const* const rangeStart = (BYTE const*)range.start;
1533
- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1534
1578
 
1535
1579
  if (rangeStart == NULL || bufferStart == NULL)
1536
1580
  return 0;
1537
- /* Empty ranges cannot overlap */
1538
- if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1539
- return 0;
1540
1581
 
1541
- return bufferStart < rangeEnd && rangeStart < bufferEnd;
1582
+ {
1583
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1584
+ BYTE const* const rangeEnd = rangeStart + range.size;
1585
+
1586
+ /* Empty ranges cannot overlap */
1587
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1588
+ return 0;
1589
+
1590
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
1591
+ }
1542
1592
  }
1543
1593
 
1544
1594
  static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1565,7 +1615,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1565
1615
 
1566
1616
  static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1567
1617
  {
1568
- if (mtctx->params.ldmParams.enableLdm) {
1618
+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
1569
1619
  ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1570
1620
  DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1571
1621
  DEBUGLOG(5, "source [0x%zx, 0x%zx)",
@@ -1668,6 +1718,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1668
1718
  if (!mtctx->params.rsyncable)
1669
1719
  /* Rsync is disabled. */
1670
1720
  return syncPoint;
1721
+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
1722
+ /* We don't emit synchronization points if it would produce too small blocks.
1723
+ * We don't have enough input to find a synchronization point, so don't look.
1724
+ */
1725
+ return syncPoint;
1671
1726
  if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1672
1727
  /* Not enough to compute the hash.
1673
1728
  * We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1678,10 +1733,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1678
1733
  */
1679
1734
  return syncPoint;
1680
1735
  /* Initialize the loop variables. */
1681
- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1682
- /* We have enough bytes buffered to initialize the hash.
1736
+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
1737
+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
1738
+ * because they can't possibly be a sync point. So we can start
1739
+ * part way through the input buffer.
1740
+ */
1741
+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
1742
+ if (pos >= RSYNC_LENGTH) {
1743
+ prev = istart + pos - RSYNC_LENGTH;
1744
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1745
+ } else {
1746
+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
1747
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1748
+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
1749
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1750
+ }
1751
+ } else {
1752
+ /* We have enough bytes buffered to initialize the hash,
1753
+ * and have processed enough bytes to find a sync point.
1683
1754
  * Start scanning at the beginning of the input.
1684
1755
  */
1756
+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
1757
+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
1685
1758
  pos = 0;
1686
1759
  prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1687
1760
  hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
@@ -1695,16 +1768,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1695
1768
  syncPoint.flush = 1;
1696
1769
  return syncPoint;
1697
1770
  }
1698
- } else {
1699
- /* We don't have enough bytes buffered to initialize the hash, but
1700
- * we know we have at least RSYNC_LENGTH bytes total.
1701
- * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1702
- * already buffered.
1703
- */
1704
- pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1705
- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1706
- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1707
- hash = ZSTD_rollingHash_append(hash, istart, pos);
1708
1771
  }
1709
1772
  /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1710
1773
  * through the input. If we hit a synchronization point, then cut the
@@ -1714,16 +1777,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1714
1777
  * then a block will be emitted anyways, but this is okay, since if we
1715
1778
  * are already synchronized we will remain synchronized.
1716
1779
  */
1780
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1717
1781
  for (; pos < syncPoint.toLoad; ++pos) {
1718
1782
  BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1719
- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1783
+ /* This assert is very expensive, and Debian compiles with asserts enabled.
1784
+ * So disable it for now. We can get similar coverage by checking it at the
1785
+ * beginning & end of the loop.
1786
+ * assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1787
+ */
1720
1788
  hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1789
+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
1721
1790
  if ((hash & hitMask) == hitMask) {
1722
1791
  syncPoint.toLoad = pos + 1;
1723
1792
  syncPoint.flush = 1;
1793
+ ++pos; /* for assert */
1724
1794
  break;
1725
1795
  }
1726
1796
  }
1797
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1727
1798
  return syncPoint;
1728
1799
  }
1729
1800