extzstd 0.3.2 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,16 +15,13 @@
15
15
  #endif
16
16
 
17
17
 
18
- /* ====== Constants ====== */
19
- #define ZSTDMT_OVERLAPLOG_DEFAULT 0
20
-
21
-
22
18
  /* ====== Dependencies ====== */
19
+ #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
23
20
  #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
24
21
  #include "../common/mem.h" /* MEM_STATIC */
25
22
  #include "../common/pool.h" /* threadpool */
26
23
  #include "../common/threading.h" /* mutex */
27
- #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
24
+ #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
28
25
  #include "zstd_ldm.h"
29
26
  #include "zstdmt_compress.h"
30
27
 
@@ -43,12 +40,13 @@
43
40
  # include <unistd.h>
44
41
  # include <sys/times.h>
45
42
 
46
- # define DEBUG_PRINTHEX(l,p,n) { \
47
- unsigned debug_u; \
48
- for (debug_u=0; debug_u<(n); debug_u++) \
49
- RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
50
- RAWLOG(l, " \n"); \
51
- }
43
+ # define DEBUG_PRINTHEX(l,p,n) \
44
+ do { \
45
+ unsigned debug_u; \
46
+ for (debug_u=0; debug_u<(n); debug_u++) \
47
+ RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
48
+ RAWLOG(l, " \n"); \
49
+ } while (0)
52
50
 
53
51
  static unsigned long long GetCurrentClockTimeMicroseconds(void)
54
52
  {
@@ -60,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
60
58
  } }
61
59
 
62
60
  #define MUTEX_WAIT_TIME_DLEVEL 6
63
- #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
64
- if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
65
- unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
66
- ZSTD_pthread_mutex_lock(mutex); \
67
- { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
68
- unsigned long long const elapsedTime = (afterTime-beforeTime); \
69
- if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \
70
- DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
71
- elapsedTime, #mutex); \
72
- } } \
73
- } else { \
74
- ZSTD_pthread_mutex_lock(mutex); \
75
- } \
76
- }
61
+ #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \
62
+ do { \
63
+ if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
64
+ unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
65
+ ZSTD_pthread_mutex_lock(mutex); \
66
+ { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
67
+ unsigned long long const elapsedTime = (afterTime-beforeTime); \
68
+ if (elapsedTime > 1000) { \
69
+ /* or whatever threshold you like; I'm using 1 millisecond here */ \
70
+ DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \
71
+ "Thread took %llu microseconds to acquire mutex %s \n", \
72
+ elapsedTime, #mutex); \
73
+ } } \
74
+ } else { \
75
+ ZSTD_pthread_mutex_lock(mutex); \
76
+ } \
77
+ } while (0)
77
78
 
78
79
  #else
79
80
 
80
81
  # define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
81
- # define DEBUG_PRINTHEX(l,p,n) {}
82
+ # define DEBUG_PRINTHEX(l,p,n) do { } while (0)
82
83
 
83
84
  #endif
84
85
 
@@ -99,19 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
99
100
  unsigned totalBuffers;
100
101
  unsigned nbBuffers;
101
102
  ZSTD_customMem cMem;
102
- buffer_t bTable[1]; /* variable size */
103
+ buffer_t* buffers;
103
104
  } ZSTDMT_bufferPool;
104
105
 
105
- static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
106
+ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
107
+ {
108
+ DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
109
+ if (!bufPool) return; /* compatibility with free on NULL */
110
+ if (bufPool->buffers) {
111
+ unsigned u;
112
+ for (u=0; u<bufPool->totalBuffers; u++) {
113
+ DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
114
+ ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
115
+ }
116
+ ZSTD_customFree(bufPool->buffers, bufPool->cMem);
117
+ }
118
+ ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
119
+ ZSTD_customFree(bufPool, bufPool->cMem);
120
+ }
121
+
122
+ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
106
123
  {
107
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
108
- ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
109
- sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
124
+ ZSTDMT_bufferPool* const bufPool =
125
+ (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
110
126
  if (bufPool==NULL) return NULL;
111
127
  if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
112
128
  ZSTD_customFree(bufPool, cMem);
113
129
  return NULL;
114
130
  }
131
+ bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
132
+ if (bufPool->buffers==NULL) {
133
+ ZSTDMT_freeBufferPool(bufPool);
134
+ return NULL;
135
+ }
115
136
  bufPool->bufferSize = 64 KB;
116
137
  bufPool->totalBuffers = maxNbBuffers;
117
138
  bufPool->nbBuffers = 0;
@@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_custo
119
140
  return bufPool;
120
141
  }
121
142
 
122
- static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
123
- {
124
- unsigned u;
125
- DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
126
- if (!bufPool) return; /* compatibility with free on NULL */
127
- for (u=0; u<bufPool->totalBuffers; u++) {
128
- DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
129
- ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
130
- }
131
- ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
132
- ZSTD_customFree(bufPool, bufPool->cMem);
133
- }
134
-
135
143
  /* only works at initialization, not during compression */
136
144
  static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
137
145
  {
138
- size_t const poolSize = sizeof(*bufPool)
139
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
146
+ size_t const poolSize = sizeof(*bufPool);
147
+ size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
140
148
  unsigned u;
141
149
  size_t totalBufferSize = 0;
142
150
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
143
151
  for (u=0; u<bufPool->totalBuffers; u++)
144
- totalBufferSize += bufPool->bTable[u].capacity;
152
+ totalBufferSize += bufPool->buffers[u].capacity;
145
153
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
146
154
 
147
- return poolSize + totalBufferSize;
155
+ return poolSize + arraySize + totalBufferSize;
148
156
  }
149
157
 
150
158
  /* ZSTDMT_setBufferSize() :
@@ -160,9 +168,8 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
160
168
  }
161
169
 
162
170
 
163
- static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
171
+ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers)
164
172
  {
165
- unsigned const maxNbBuffers = 2*nbWorkers + 3;
166
173
  if (srcBufPool==NULL) return NULL;
167
174
  if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
168
175
  return srcBufPool;
@@ -171,7 +178,7 @@ static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool,
171
178
  size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
172
179
  ZSTDMT_bufferPool* newBufPool;
173
180
  ZSTDMT_freeBufferPool(srcBufPool);
174
- newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
181
+ newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem);
175
182
  if (newBufPool==NULL) return newBufPool;
176
183
  ZSTDMT_setBufferSize(newBufPool, bSize);
177
184
  return newBufPool;
@@ -188,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
188
195
  DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
189
196
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
190
197
  if (bufPool->nbBuffers) { /* try to use an existing buffer */
191
- buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
198
+ buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
192
199
  size_t const availBufferSize = buf.capacity;
193
- bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
200
+ bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
194
201
  if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
195
202
  /* large enough, but not too much */
196
203
  DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@@ -251,18 +258,28 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
251
258
  if (buf.start == NULL) return; /* compatible with release on NULL */
252
259
  ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
253
260
  if (bufPool->nbBuffers < bufPool->totalBuffers) {
254
- bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
261
+ bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */
255
262
  DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
256
263
  (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
257
264
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
258
265
  return;
259
266
  }
260
267
  ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
261
- /* Reached bufferPool capacity (should not happen) */
268
+ /* Reached bufferPool capacity (note: should not happen) */
262
269
  DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
263
270
  ZSTD_customFree(buf.start, bufPool->cMem);
264
271
  }
265
272
 
273
+ /* We need 2 output buffers per worker since each dstBuff must be flushed after it is released.
274
+ * The 3 additional buffers are as follows:
275
+ * 1 buffer for input loading
276
+ * 1 buffer for "next input" when submitting current one
277
+ * 1 buffer stuck in queue */
278
+ #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
279
+
280
+ /* After a worker releases its rawSeqStore, it is immediately ready for reuse.
281
+ * So we only need one seq buffer per worker. */
282
+ #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
266
283
 
267
284
  /* ===== Seq Pool Wrapper ====== */
268
285
 
@@ -316,7 +333,7 @@ static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
316
333
 
317
334
  static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
318
335
  {
319
- ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
336
+ ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
320
337
  if (seqPool == NULL) return NULL;
321
338
  ZSTDMT_setNbSeq(seqPool, 0);
322
339
  return seqPool;
@@ -329,7 +346,7 @@ static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
329
346
 
330
347
  static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
331
348
  {
332
- return ZSTDMT_expandBufferPool(pool, nbWorkers);
349
+ return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers));
333
350
  }
334
351
 
335
352
 
@@ -341,16 +358,20 @@ typedef struct {
341
358
  int totalCCtx;
342
359
  int availCCtx;
343
360
  ZSTD_customMem cMem;
344
- ZSTD_CCtx* cctx[1]; /* variable size */
361
+ ZSTD_CCtx** cctxs;
345
362
  } ZSTDMT_CCtxPool;
346
363
 
347
- /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
364
+ /* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
348
365
  static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
349
366
  {
350
- int cid;
351
- for (cid=0; cid<pool->totalCCtx; cid++)
352
- ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
367
+ if (!pool) return;
353
368
  ZSTD_pthread_mutex_destroy(&pool->poolMutex);
369
+ if (pool->cctxs) {
370
+ int cid;
371
+ for (cid=0; cid<pool->totalCCtx; cid++)
372
+ ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */
373
+ ZSTD_customFree(pool->cctxs, pool->cMem);
374
+ }
354
375
  ZSTD_customFree(pool, pool->cMem);
355
376
  }
356
377
 
@@ -359,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
359
380
  static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
360
381
  ZSTD_customMem cMem)
361
382
  {
362
- ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
363
- sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
383
+ ZSTDMT_CCtxPool* const cctxPool =
384
+ (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
364
385
  assert(nbWorkers > 0);
365
386
  if (!cctxPool) return NULL;
366
387
  if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
367
388
  ZSTD_customFree(cctxPool, cMem);
368
389
  return NULL;
369
390
  }
370
- cctxPool->cMem = cMem;
371
391
  cctxPool->totalCCtx = nbWorkers;
392
+ cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
393
+ if (!cctxPool->cctxs) {
394
+ ZSTDMT_freeCCtxPool(cctxPool);
395
+ return NULL;
396
+ }
397
+ cctxPool->cMem = cMem;
398
+ cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
399
+ if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
372
400
  cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
373
- cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
374
- if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
375
401
  DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
376
402
  return cctxPool;
377
403
  }
@@ -393,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
393
419
  {
394
420
  ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
395
421
  { unsigned const nbWorkers = cctxPool->totalCCtx;
396
- size_t const poolSize = sizeof(*cctxPool)
397
- + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
398
- unsigned u;
422
+ size_t const poolSize = sizeof(*cctxPool);
423
+ size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
399
424
  size_t totalCCtxSize = 0;
425
+ unsigned u;
400
426
  for (u=0; u<nbWorkers; u++) {
401
- totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
427
+ totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
402
428
  }
403
429
  ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
404
430
  assert(nbWorkers > 0);
405
- return poolSize + totalCCtxSize;
431
+ return poolSize + arraySize + totalCCtxSize;
406
432
  }
407
433
  }
408
434
 
@@ -412,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
412
438
  ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
413
439
  if (cctxPool->availCCtx) {
414
440
  cctxPool->availCCtx--;
415
- { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
441
+ { ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
416
442
  ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
417
443
  return cctx;
418
444
  } }
@@ -426,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
426
452
  if (cctx==NULL) return; /* compatibility with release on NULL */
427
453
  ZSTD_pthread_mutex_lock(&pool->poolMutex);
428
454
  if (pool->availCCtx < pool->totalCCtx)
429
- pool->cctx[pool->availCCtx++] = cctx;
455
+ pool->cctxs[pool->availCCtx++] = cctx;
430
456
  else {
431
457
  /* pool overflow : should not happen, since totalCCtx==nbWorkers */
432
458
  DEBUGLOG(4, "CCtx pool overflow : free cctx");
@@ -467,29 +493,27 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
467
493
  ZSTD_dictContentType_e dictContentType)
468
494
  {
469
495
  /* Adjust parameters */
470
- if (params.ldmParams.enableLdm) {
496
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
471
497
  DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
472
498
  ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
473
499
  assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
474
500
  assert(params.ldmParams.hashRateLog < 32);
475
- serialState->ldmState.hashPower =
476
- ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
477
501
  } else {
478
502
  ZSTD_memset(&params.ldmParams, 0, sizeof(params.ldmParams));
479
503
  }
480
504
  serialState->nextJobID = 0;
481
505
  if (params.fParams.checksumFlag)
482
506
  XXH64_reset(&serialState->xxhState, 0);
483
- if (params.ldmParams.enableLdm) {
507
+ if (params.ldmParams.enableLdm == ZSTD_ps_enable) {
484
508
  ZSTD_customMem cMem = params.customMem;
485
509
  unsigned const hashLog = params.ldmParams.hashLog;
486
510
  size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
487
511
  unsigned const bucketLog =
488
512
  params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
489
- size_t const bucketSize = (size_t)1 << bucketLog;
490
513
  unsigned const prevBucketLog =
491
514
  serialState->params.ldmParams.hashLog -
492
515
  serialState->params.ldmParams.bucketSizeLog;
516
+ size_t const numBuckets = (size_t)1 << bucketLog;
493
517
  /* Size the seq pool tables */
494
518
  ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
495
519
  /* Reset the window */
@@ -501,20 +525,20 @@ ZSTDMT_serialState_reset(serialState_t* serialState,
501
525
  }
502
526
  if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
503
527
  ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem);
504
- serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(bucketSize, cMem);
528
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem);
505
529
  }
506
530
  if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
507
531
  return 1;
508
532
  /* Zero the tables */
509
533
  ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize);
510
- ZSTD_memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
534
+ ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets);
511
535
 
512
536
  /* Update window state and fill hash table with dict */
513
537
  serialState->ldmState.loadedDictEnd = 0;
514
538
  if (dictSize > 0) {
515
539
  if (dictContentType == ZSTD_dct_rawContent) {
516
540
  BYTE const* const dictEnd = (const BYTE*)dict + dictSize;
517
- ZSTD_window_update(&serialState->ldmState.window, dict, dictSize);
541
+ ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0);
518
542
  ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, &params.ldmParams);
519
543
  serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base);
520
544
  } else {
@@ -566,12 +590,12 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
566
590
  /* A future job may error and skip our job */
567
591
  if (serialState->nextJobID == jobID) {
568
592
  /* It is now our turn, do any processing necessary */
569
- if (serialState->params.ldmParams.enableLdm) {
593
+ if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) {
570
594
  size_t error;
571
595
  assert(seqStore.seq != NULL && seqStore.pos == 0 &&
572
596
  seqStore.size == 0 && seqStore.capacity > 0);
573
597
  assert(src.size <= serialState->params.jobSize);
574
- ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
598
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0);
575
599
  error = ZSTD_ldm_generateSequences(
576
600
  &serialState->ldmState, &seqStore,
577
601
  &serialState->params.ldmParams, src.start, src.size);
@@ -594,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
594
618
  ZSTD_pthread_mutex_unlock(&serialState->mutex);
595
619
 
596
620
  if (seqStore.size > 0) {
597
- size_t const err = ZSTD_referenceExternalSequences(
598
- jobCCtx, seqStore.seq, seqStore.size);
599
- assert(serialState->params.ldmParams.enableLdm);
600
- assert(!ZSTD_isError(err));
601
- (void)err;
621
+ ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
622
+ assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
602
623
  }
603
624
  }
604
625
 
@@ -650,12 +671,13 @@ typedef struct {
650
671
  unsigned frameChecksumNeeded; /* used only by mtctx */
651
672
  } ZSTDMT_jobDescription;
652
673
 
653
- #define JOB_ERROR(e) { \
654
- ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
655
- job->cSize = e; \
656
- ZSTD_pthread_mutex_unlock(&job->job_mutex); \
657
- goto _endJob; \
658
- }
674
+ #define JOB_ERROR(e) \
675
+ do { \
676
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
677
+ job->cSize = e; \
678
+ ZSTD_pthread_mutex_unlock(&job->job_mutex); \
679
+ goto _endJob; \
680
+ } while (0)
659
681
 
660
682
  /* ZSTDMT_compressionJob() is a POOL_function type */
661
683
  static void ZSTDMT_compressionJob(void* jobDescription)
@@ -674,7 +696,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
674
696
  if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
675
697
  job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
676
698
  }
677
- if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
699
+ if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL)
678
700
  JOB_ERROR(ERROR(memory_allocation));
679
701
 
680
702
  /* Don't compute the checksum for chunks, since we compute it externally,
@@ -682,7 +704,9 @@ static void ZSTDMT_compressionJob(void* jobDescription)
682
704
  */
683
705
  if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
684
706
  /* Don't run LDM for the chunks, since we handle it externally */
685
- jobParams.ldmParams.enableLdm = 0;
707
+ jobParams.ldmParams.enableLdm = ZSTD_ps_disable;
708
+ /* Correct nbWorkers to 0. */
709
+ jobParams.nbWorkers = 0;
686
710
 
687
711
 
688
712
  /* init */
@@ -695,6 +719,10 @@ static void ZSTDMT_compressionJob(void* jobDescription)
695
719
  { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
696
720
  if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
697
721
  }
722
+ if (!job->firstJob) {
723
+ size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0);
724
+ if (ZSTD_isError(err)) JOB_ERROR(err);
725
+ }
698
726
  { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
699
727
  job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
700
728
  ZSTD_dtlm_fast,
@@ -707,7 +735,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
707
735
  ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
708
736
 
709
737
  if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
710
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
738
+ size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
711
739
  if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
712
740
  DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
713
741
  ZSTD_invalidateRepCodes(cctx);
@@ -725,7 +753,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
725
753
  DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
726
754
  assert(job->cSize == 0);
727
755
  for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
728
- size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
756
+ size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize);
729
757
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
730
758
  ip += chunkSize;
731
759
  op += cSize; assert(op < oend);
@@ -745,11 +773,18 @@ static void ZSTDMT_compressionJob(void* jobDescription)
745
773
  size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
746
774
  size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
747
775
  size_t const cSize = (job->lastJob) ?
748
- ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
749
- ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
776
+ ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) :
777
+ ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize);
750
778
  if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
751
779
  lastCBlockSize = cSize;
752
780
  } }
781
+ if (!job->firstJob) {
782
+ /* Double check that we don't have an ext-dict, because then our
783
+ * repcode invalidation doesn't work.
784
+ */
785
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
786
+ }
787
+ ZSTD_CCtx_trace(cctx, 0);
753
788
 
754
789
  _endJob:
755
790
  ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
@@ -796,6 +831,15 @@ typedef struct {
796
831
  static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
797
832
 
798
833
  #define RSYNC_LENGTH 32
834
+ /* Don't create chunks smaller than the zstd block size.
835
+ * This stops us from regressing compression ratio too much,
836
+ * and ensures our output fits in ZSTD_compressBound().
837
+ *
838
+ * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then
839
+ * ZSTD_COMPRESSBOUND() will need to be updated.
840
+ */
841
+ #define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX
842
+ #define RSYNC_MIN_BLOCK_SIZE (1<<RSYNC_MIN_BLOCK_LOG)
799
843
 
800
844
  typedef struct {
801
845
  U64 hash;
@@ -916,7 +960,7 @@ MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers,
916
960
  mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
917
961
  assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
918
962
  mtctx->jobIDMask = nbJobs - 1;
919
- mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
963
+ mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem);
920
964
  mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
921
965
  mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
922
966
  initError = ZSTDMT_serialState_init(&mtctx->serial);
@@ -1019,7 +1063,7 @@ static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1019
1063
  {
1020
1064
  if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1021
1065
  FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , "");
1022
- mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1066
+ mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers));
1023
1067
  if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1024
1068
  mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1025
1069
  if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
@@ -1062,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
1062
1106
  { unsigned jobNb;
1063
1107
  unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
1064
1108
  DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1065
- mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1109
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady);
1066
1110
  for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1067
1111
  unsigned const wJobID = jobNb & mtctx->jobIDMask;
1068
1112
  ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
@@ -1124,7 +1168,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1124
1168
  static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
1125
1169
  {
1126
1170
  unsigned jobLog;
1127
- if (params->ldmParams.enableLdm) {
1171
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1128
1172
  /* In Long Range Mode, the windowLog is typically oversized.
1129
1173
  * In which case, it's preferable to determine the jobSize
1130
1174
  * based on cycleLog instead. */
@@ -1168,7 +1212,7 @@ static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
1168
1212
  int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
1169
1213
  int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
1170
1214
  assert(0 <= overlapRLog && overlapRLog <= 8);
1171
- if (params->ldmParams.enableLdm) {
1215
+ if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
1172
1216
  /* In Long Range Mode, the windowLog is typically oversized.
1173
1217
  * In which case, it's preferable to determine the jobSize
1174
1218
  * based on chainLog instead.
@@ -1239,9 +1283,11 @@ size_t ZSTDMT_initCStream_internal(
1239
1283
 
1240
1284
  if (params.rsyncable) {
1241
1285
  /* Aim for the targetsectionSize as the average job size. */
1242
- U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
1243
- U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
1244
- assert(jobSizeMB >= 1);
1286
+ U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10);
1287
+ U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10);
1288
+ /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our
1289
+ * expected job size is at least 4x larger. */
1290
+ assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2);
1245
1291
  DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
1246
1292
  mtctx->rsync.hash = 0;
1247
1293
  mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
@@ -1253,7 +1299,7 @@ size_t ZSTDMT_initCStream_internal(
1253
1299
  ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
1254
1300
  {
1255
1301
  /* If ldm is enabled we need windowSize space. */
1256
- size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
1302
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0;
1257
1303
  /* Two buffers of slack, plus extra space for the overlap
1258
1304
  * This is the minimum slack that LDM works with. One extra because
1259
1305
  * flush might waste up to targetSectionSize-1 bytes. Another extra
@@ -1528,17 +1574,21 @@ static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
1528
1574
  static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
1529
1575
  {
1530
1576
  BYTE const* const bufferStart = (BYTE const*)buffer.start;
1531
- BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1532
1577
  BYTE const* const rangeStart = (BYTE const*)range.start;
1533
- BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart;
1534
1578
 
1535
1579
  if (rangeStart == NULL || bufferStart == NULL)
1536
1580
  return 0;
1537
- /* Empty ranges cannot overlap */
1538
- if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1539
- return 0;
1540
1581
 
1541
- return bufferStart < rangeEnd && rangeStart < bufferEnd;
1582
+ {
1583
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
1584
+ BYTE const* const rangeEnd = rangeStart + range.size;
1585
+
1586
+ /* Empty ranges cannot overlap */
1587
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
1588
+ return 0;
1589
+
1590
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
1591
+ }
1542
1592
  }
1543
1593
 
1544
1594
  static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
@@ -1565,7 +1615,7 @@ static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
1565
1615
 
1566
1616
  static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
1567
1617
  {
1568
- if (mtctx->params.ldmParams.enableLdm) {
1618
+ if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) {
1569
1619
  ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1570
1620
  DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1571
1621
  DEBUGLOG(5, "source [0x%zx, 0x%zx)",
@@ -1668,6 +1718,11 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1668
1718
  if (!mtctx->params.rsyncable)
1669
1719
  /* Rsync is disabled. */
1670
1720
  return syncPoint;
1721
+ if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE)
1722
+ /* We don't emit synchronization points if it would produce too small blocks.
1723
+ * We don't have enough input to find a synchronization point, so don't look.
1724
+ */
1725
+ return syncPoint;
1671
1726
  if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
1672
1727
  /* Not enough to compute the hash.
1673
1728
  * We will miss any synchronization points in this RSYNC_LENGTH byte
@@ -1678,10 +1733,28 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1678
1733
  */
1679
1734
  return syncPoint;
1680
1735
  /* Initialize the loop variables. */
1681
- if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
1682
- /* We have enough bytes buffered to initialize the hash.
1736
+ if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) {
1737
+ /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions
1738
+ * because they can't possibly be a sync point. So we can start
1739
+ * part way through the input buffer.
1740
+ */
1741
+ pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled;
1742
+ if (pos >= RSYNC_LENGTH) {
1743
+ prev = istart + pos - RSYNC_LENGTH;
1744
+ hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
1745
+ } else {
1746
+ assert(mtctx->inBuff.filled >= RSYNC_LENGTH);
1747
+ prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1748
+ hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos));
1749
+ hash = ZSTD_rollingHash_append(hash, istart, pos);
1750
+ }
1751
+ } else {
1752
+ /* We have enough bytes buffered to initialize the hash,
1753
+ * and have processed enough bytes to find a sync point.
1683
1754
  * Start scanning at the beginning of the input.
1684
1755
  */
1756
+ assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
1757
+ assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH);
1685
1758
  pos = 0;
1686
1759
  prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
1687
1760
  hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
@@ -1695,16 +1768,6 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1695
1768
  syncPoint.flush = 1;
1696
1769
  return syncPoint;
1697
1770
  }
1698
- } else {
1699
- /* We don't have enough bytes buffered to initialize the hash, but
1700
- * we know we have at least RSYNC_LENGTH bytes total.
1701
- * Start scanning after the first RSYNC_LENGTH bytes less the bytes
1702
- * already buffered.
1703
- */
1704
- pos = RSYNC_LENGTH - mtctx->inBuff.filled;
1705
- prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
1706
- hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
1707
- hash = ZSTD_rollingHash_append(hash, istart, pos);
1708
1771
  }
1709
1772
  /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
1710
1773
  * through the input. If we hit a synchronization point, then cut the
@@ -1714,16 +1777,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1714
1777
  * then a block will be emitted anyways, but this is okay, since if we
1715
1778
  * are already synchronized we will remain synchronized.
1716
1779
  */
1780
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1717
1781
  for (; pos < syncPoint.toLoad; ++pos) {
1718
1782
  BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1719
- /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
1783
+ /* This assert is very expensive, and Debian compiles with asserts enabled.
1784
+ * So disable it for now. We can get similar coverage by checking it at the
1785
+ * beginning & end of the loop.
1786
+ * assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1787
+ */
1720
1788
  hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1789
+ assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
1721
1790
  if ((hash & hitMask) == hitMask) {
1722
1791
  syncPoint.toLoad = pos + 1;
1723
1792
  syncPoint.flush = 1;
1793
+ ++pos; /* for assert */
1724
1794
  break;
1725
1795
  }
1726
1796
  }
1797
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1727
1798
  return syncPoint;
1728
1799
  }
1729
1800