zstd-ruby 1.4.4.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +1 -0
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +241 -173
  8. data/ext/zstdruby/libzstd/README.md +76 -18
  9. data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
  10. data/ext/zstdruby/libzstd/common/compiler.h +196 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
  15. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +51 -42
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
  19. data/ext/zstdruby/libzstd/common/huf.h +60 -54
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +10 -8
  25. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
  67. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  70. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
  72. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  73. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  74. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
  75. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
  76. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  77. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  78. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
  80. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
  84. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
  86. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
  88. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
  90. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
  92. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  93. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  94. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  95. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  96. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  97. data/ext/zstdruby/libzstd/zstd.h +760 -234
  98. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  99. data/ext/zstdruby/zstdruby.c +2 -2
  100. data/lib/zstd-ruby/version.rb +1 -1
  101. metadata +20 -9
  102. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,7 @@
18
18
  /*-*************************************
19
19
  * Dependencies
20
20
  ***************************************/
21
- #include "zstd_internal.h"
21
+ #include "../common/zstd_internal.h"
22
22
  #include "zstd_cwksp.h"
23
23
  #ifdef ZSTD_MULTITHREAD
24
24
  # include "zstdmt_compress.h"
@@ -28,7 +28,6 @@
28
28
  extern "C" {
29
29
  #endif
30
30
 
31
-
32
31
  /*-*************************************
33
32
  * Constants
34
33
  ***************************************/
@@ -64,7 +63,7 @@ typedef struct {
64
63
  } ZSTD_localDict;
65
64
 
66
65
  typedef struct {
67
- U32 CTable[HUF_CTABLE_SIZE_U32(255)];
66
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
68
67
  HUF_repeat repeatMode;
69
68
  } ZSTD_hufCTables_t;
70
69
 
@@ -82,11 +81,75 @@ typedef struct {
82
81
  ZSTD_fseCTables_t fse;
83
82
  } ZSTD_entropyCTables_t;
84
83
 
84
+ /***********************************************
85
+ * Entropy buffer statistics structs and funcs *
86
+ ***********************************************/
87
+ /** ZSTD_hufCTablesMetadata_t :
88
+ * Stores Literals Block Type for a super-block in hType, and
89
+ * huffman tree description in hufDesBuffer.
90
+ * hufDesSize refers to the size of huffman tree description in bytes.
91
+ * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
85
92
  typedef struct {
86
- U32 off;
87
- U32 len;
93
+ symbolEncodingType_e hType;
94
+ BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
95
+ size_t hufDesSize;
96
+ } ZSTD_hufCTablesMetadata_t;
97
+
98
+ /** ZSTD_fseCTablesMetadata_t :
99
+ * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
100
+ * fse tables in fseTablesBuffer.
101
+ * fseTablesSize refers to the size of fse tables in bytes.
102
+ * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
103
+ typedef struct {
104
+ symbolEncodingType_e llType;
105
+ symbolEncodingType_e ofType;
106
+ symbolEncodingType_e mlType;
107
+ BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
108
+ size_t fseTablesSize;
109
+ size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
110
+ } ZSTD_fseCTablesMetadata_t;
111
+
112
+ typedef struct {
113
+ ZSTD_hufCTablesMetadata_t hufMetadata;
114
+ ZSTD_fseCTablesMetadata_t fseMetadata;
115
+ } ZSTD_entropyCTablesMetadata_t;
116
+
117
+ /** ZSTD_buildBlockEntropyStats() :
118
+ * Builds entropy for the block.
119
+ * @return : 0 on success or error code */
120
+ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121
+ const ZSTD_entropyCTables_t* prevEntropy,
122
+ ZSTD_entropyCTables_t* nextEntropy,
123
+ const ZSTD_CCtx_params* cctxParams,
124
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125
+ void* workspace, size_t wkspSize);
126
+
127
+ /*********************************
128
+ * Compression internals structs *
129
+ *********************************/
130
+
131
+ typedef struct {
132
+ U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
133
+ U32 len; /* Raw length of match */
88
134
  } ZSTD_match_t;
89
135
 
136
+ typedef struct {
137
+ U32 offset; /* Offset of sequence */
138
+ U32 litLength; /* Length of literals prior to match */
139
+ U32 matchLength; /* Raw length of match */
140
+ } rawSeq;
141
+
142
+ typedef struct {
143
+ rawSeq* seq; /* The start of the sequences */
144
+ size_t pos; /* The index in seq where reading stopped. pos <= size. */
145
+ size_t posInSequence; /* The position within the sequence at seq[pos] where reading
146
+ stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
147
+ size_t size; /* The number of sequences. <= capacity. */
148
+ size_t capacity; /* The capacity starting from `seq` pointer */
149
+ } rawSeqStore_t;
150
+
151
+ UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152
+
90
153
  typedef struct {
91
154
  int price;
92
155
  U32 off;
@@ -116,7 +179,7 @@ typedef struct {
116
179
  U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
117
180
  ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
118
181
  const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
119
- ZSTD_literalCompressionMode_e literalCompressionMode;
182
+ ZSTD_paramSwitch_e literalCompressionMode;
120
183
  } optState_t;
121
184
 
122
185
  typedef struct {
@@ -125,14 +188,23 @@ typedef struct {
125
188
  } ZSTD_compressedBlockState_t;
126
189
 
127
190
  typedef struct {
128
- BYTE const* nextSrc; /* next block here to continue on current prefix */
129
- BYTE const* base; /* All regular indexes relative to this position */
130
- BYTE const* dictBase; /* extDict indexes relative to this position */
131
- U32 dictLimit; /* below that point, need extDict */
132
- U32 lowLimit; /* below that point, no more valid data */
191
+ BYTE const* nextSrc; /* next block here to continue on current prefix */
192
+ BYTE const* base; /* All regular indexes relative to this position */
193
+ BYTE const* dictBase; /* extDict indexes relative to this position */
194
+ U32 dictLimit; /* below that point, need extDict */
195
+ U32 lowLimit; /* below that point, no more valid data */
196
+ U32 nbOverflowCorrections; /* Number of times overflow correction has run since
197
+ * ZSTD_window_init(). Useful for debugging coredumps
198
+ * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
199
+ */
133
200
  } ZSTD_window_t;
134
201
 
202
+ #define ZSTD_WINDOW_START_INDEX 2
203
+
135
204
  typedef struct ZSTD_matchState_t ZSTD_matchState_t;
205
+
206
+ #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
207
+
136
208
  struct ZSTD_matchState_t {
137
209
  ZSTD_window_t window; /* State for window round buffer management */
138
210
  U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
@@ -144,12 +216,24 @@ struct ZSTD_matchState_t {
144
216
  */
145
217
  U32 nextToUpdate; /* index from which to continue table update */
146
218
  U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
219
+
220
+ U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
221
+ U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
222
+ U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
223
+
147
224
  U32* hashTable;
148
225
  U32* hashTable3;
149
226
  U32* chainTable;
227
+
228
+ U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
229
+
230
+ int dedicatedDictSearch; /* Indicates whether this matchState is using the
231
+ * dedicated dictionary search structure.
232
+ */
150
233
  optState_t opt; /* optimal parser state */
151
234
  const ZSTD_matchState_t* dictMatchState;
152
235
  ZSTD_compressionParameters cParams;
236
+ const rawSeqStore_t* ldmSeqStore;
153
237
  };
154
238
 
155
239
  typedef struct {
@@ -163,16 +247,26 @@ typedef struct {
163
247
  U32 checksum;
164
248
  } ldmEntry_t;
165
249
 
250
+ typedef struct {
251
+ BYTE const* split;
252
+ U32 hash;
253
+ U32 checksum;
254
+ ldmEntry_t* bucket;
255
+ } ldmMatchCandidate_t;
256
+
257
+ #define LDM_BATCH_SIZE 64
258
+
166
259
  typedef struct {
167
260
  ZSTD_window_t window; /* State for the window round buffer management */
168
261
  ldmEntry_t* hashTable;
262
+ U32 loadedDictEnd;
169
263
  BYTE* bucketOffsets; /* Next position in bucket to insert entry */
170
- U64 hashPower; /* Used to compute the rolling hash.
171
- * Depends on ldmParams.minMatchLength */
264
+ size_t splitIndices[LDM_BATCH_SIZE];
265
+ ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
172
266
  } ldmState_t;
173
267
 
174
268
  typedef struct {
175
- U32 enableLdm; /* 1 if enable long distance matching */
269
+ ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
176
270
  U32 hashLog; /* Log size of hashTable */
177
271
  U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
178
272
  U32 minMatchLength; /* Minimum match length */
@@ -180,19 +274,6 @@ typedef struct {
180
274
  U32 windowLog; /* Window log for the LDM */
181
275
  } ldmParams_t;
182
276
 
183
- typedef struct {
184
- U32 offset;
185
- U32 litLength;
186
- U32 matchLength;
187
- } rawSeq;
188
-
189
- typedef struct {
190
- rawSeq* seq; /* The start of the sequences */
191
- size_t pos; /* The position where reading stopped. <= size. */
192
- size_t size; /* The number of sequences. <= capacity. */
193
- size_t capacity; /* The capacity starting from `seq` pointer */
194
- } rawSeqStore_t;
195
-
196
277
  typedef struct {
197
278
  int collectSequences;
198
279
  ZSTD_Sequence* seqStart;
@@ -216,7 +297,7 @@ struct ZSTD_CCtx_params_s {
216
297
  * There is no guarantee that hint is close to actual source size */
217
298
 
218
299
  ZSTD_dictAttachPref_e attachDictPref;
219
- ZSTD_literalCompressionMode_e literalCompressionMode;
300
+ ZSTD_paramSwitch_e literalCompressionMode;
220
301
 
221
302
  /* Multithreading: used to pass parameters to mtctx */
222
303
  int nbWorkers;
@@ -227,17 +308,68 @@ struct ZSTD_CCtx_params_s {
227
308
  /* Long distance matching parameters */
228
309
  ldmParams_t ldmParams;
229
310
 
311
+ /* Dedicated dict search algorithm trigger */
312
+ int enableDedicatedDictSearch;
313
+
314
+ /* Input/output buffer modes */
315
+ ZSTD_bufferMode_e inBufferMode;
316
+ ZSTD_bufferMode_e outBufferMode;
317
+
318
+ /* Sequence compression API */
319
+ ZSTD_sequenceFormat_e blockDelimiters;
320
+ int validateSequences;
321
+
322
+ /* Block splitting */
323
+ ZSTD_paramSwitch_e useBlockSplitter;
324
+
325
+ /* Param for deciding whether to use row-based matchfinder */
326
+ ZSTD_paramSwitch_e useRowMatchFinder;
327
+
328
+ /* Always load a dictionary in ext-dict mode (not prefix mode)? */
329
+ int deterministicRefPrefix;
330
+
230
331
  /* Internal use, for createCCtxParams() and freeCCtxParams() only */
231
332
  ZSTD_customMem customMem;
232
333
  }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
233
334
 
335
+ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
336
+ #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
337
+
338
+ /**
339
+ * Indicates whether this compression proceeds directly from user-provided
340
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
341
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
342
+ */
343
+ typedef enum {
344
+ ZSTDb_not_buffered,
345
+ ZSTDb_buffered
346
+ } ZSTD_buffered_policy_e;
347
+
348
+ /**
349
+ * Struct that contains all elements of block splitter that should be allocated
350
+ * in a wksp.
351
+ */
352
+ #define ZSTD_MAX_NB_BLOCK_SPLITS 196
353
+ typedef struct {
354
+ seqStore_t fullSeqStoreChunk;
355
+ seqStore_t firstHalfSeqStore;
356
+ seqStore_t secondHalfSeqStore;
357
+ seqStore_t currSeqStore;
358
+ seqStore_t nextSeqStore;
359
+
360
+ U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
361
+ ZSTD_entropyCTablesMetadata_t entropyMetadata;
362
+ } ZSTD_blockSplitCtx;
363
+
234
364
  struct ZSTD_CCtx_s {
235
365
  ZSTD_compressionStage_e stage;
236
366
  int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
237
367
  int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
238
368
  ZSTD_CCtx_params requestedParams;
239
369
  ZSTD_CCtx_params appliedParams;
370
+ ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
240
371
  U32 dictID;
372
+ size_t dictContentSize;
241
373
 
242
374
  ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
243
375
  size_t blockSize;
@@ -246,9 +378,11 @@ struct ZSTD_CCtx_s {
246
378
  unsigned long long producedCSize;
247
379
  XXH64_state_t xxhState;
248
380
  ZSTD_customMem customMem;
381
+ ZSTD_threadPool* pool;
249
382
  size_t staticSize;
250
383
  SeqCollector seqCollector;
251
384
  int isFirstBlock;
385
+ int initialized;
252
386
 
253
387
  seqStore_t seqStore; /* sequences storage ptrs */
254
388
  ldmState_t ldmState; /* long distance matching state */
@@ -256,7 +390,10 @@ struct ZSTD_CCtx_s {
256
390
  size_t maxNbLdmSequences;
257
391
  rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
258
392
  ZSTD_blockState_t blockState;
259
- U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
393
+ U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
394
+
395
+ /* Whether we are streaming or not */
396
+ ZSTD_buffered_policy_e bufferedPolicy;
260
397
 
261
398
  /* streaming */
262
399
  char* inBuff;
@@ -271,6 +408,10 @@ struct ZSTD_CCtx_s {
271
408
  ZSTD_cStreamStage streamStage;
272
409
  U32 frameEnded;
273
410
 
411
+ /* Stable in/out buffer verification */
412
+ ZSTD_inBuffer expectedInBuffer;
413
+ size_t expectedOutBufferSize;
414
+
274
415
  /* Dictionary */
275
416
  ZSTD_localDict localDict;
276
417
  const ZSTD_CDict* cdict;
@@ -280,17 +421,49 @@ struct ZSTD_CCtx_s {
280
421
  #ifdef ZSTD_MULTITHREAD
281
422
  ZSTDMT_CCtx* mtctx;
282
423
  #endif
424
+
425
+ /* Tracing */
426
+ #if ZSTD_TRACE
427
+ ZSTD_TraceCtx traceCtx;
428
+ #endif
429
+
430
+ /* Workspace for block splitter */
431
+ ZSTD_blockSplitCtx blockSplitCtx;
283
432
  };
284
433
 
285
434
  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
286
435
 
287
- typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
288
-
436
+ typedef enum {
437
+ ZSTD_noDict = 0,
438
+ ZSTD_extDict = 1,
439
+ ZSTD_dictMatchState = 2,
440
+ ZSTD_dedicatedDictSearch = 3
441
+ } ZSTD_dictMode_e;
442
+
443
+ typedef enum {
444
+ ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
445
+ * In this mode we use both the srcSize and the dictSize
446
+ * when selecting and adjusting parameters.
447
+ */
448
+ ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
449
+ * In this mode we only take the srcSize into account when selecting
450
+ * and adjusting parameters.
451
+ */
452
+ ZSTD_cpm_createCDict = 2, /* Creating a CDict.
453
+ * In this mode we take both the source size and the dictionary size
454
+ * into account when selecting and adjusting the parameters.
455
+ */
456
+ ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
457
+ * We don't know what these parameters are for. We default to the legacy
458
+ * behavior of taking both the source size and the dict size into account
459
+ * when selecting and adjusting parameters.
460
+ */
461
+ } ZSTD_cParamMode_e;
289
462
 
290
463
  typedef size_t (*ZSTD_blockCompressor) (
291
464
  ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
292
465
  void const* src, size_t srcSize);
293
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
466
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
294
467
 
295
468
 
296
469
  MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -324,6 +497,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
324
497
  return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
325
498
  }
326
499
 
500
+ typedef struct repcodes_s {
501
+ U32 rep[3];
502
+ } repcodes_t;
503
+
504
+ MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
505
+ {
506
+ repcodes_t newReps;
507
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
508
+ newReps.rep[2] = rep[1];
509
+ newReps.rep[1] = rep[0];
510
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
511
+ } else { /* repcode */
512
+ U32 const repCode = offset + ll0;
513
+ if (repCode > 0) { /* note : if repCode==0, no change */
514
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
515
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
516
+ newReps.rep[1] = rep[0];
517
+ newReps.rep[0] = currentOffset;
518
+ } else { /* repCode == 0 */
519
+ ZSTD_memcpy(&newReps, rep, sizeof(newReps));
520
+ }
521
+ }
522
+ return newReps;
523
+ }
524
+
327
525
  /* ZSTD_cParam_withinBounds:
328
526
  * @return 1 if value is within cParam bounds,
329
527
  * 0 otherwise */
@@ -336,6 +534,30 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
336
534
  return 1;
337
535
  }
338
536
 
537
+ /* ZSTD_noCompressBlock() :
538
+ * Writes uncompressed block to dst buffer from given src.
539
+ * Returns the size of the block */
540
+ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
541
+ {
542
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
543
+ RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
544
+ dstSize_tooSmall, "dst buf too small for uncompressed block");
545
+ MEM_writeLE24(dst, cBlockHeader24);
546
+ ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
547
+ return ZSTD_blockHeaderSize + srcSize;
548
+ }
549
+
550
+ MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
551
+ {
552
+ BYTE* const op = (BYTE*)dst;
553
+ U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
554
+ RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
555
+ MEM_writeLE24(op, cBlockHeader);
556
+ op[3] = src;
557
+ return 4;
558
+ }
559
+
560
+
339
561
  /* ZSTD_minGain() :
340
562
  * minimum compression required
341
563
  * to generate a compress block or a compressed literals section.
@@ -348,6 +570,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
348
570
  return (srcSize >> minlog) + 2;
349
571
  }
350
572
 
573
+ MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
574
+ {
575
+ switch (cctxParams->literalCompressionMode) {
576
+ case ZSTD_ps_enable:
577
+ return 0;
578
+ case ZSTD_ps_disable:
579
+ return 1;
580
+ default:
581
+ assert(0 /* impossible: pre-validated */);
582
+ ZSTD_FALLTHROUGH;
583
+ case ZSTD_ps_auto:
584
+ return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
585
+ }
586
+ }
587
+
351
588
  /*! ZSTD_safecopyLiterals() :
352
589
  * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
353
590
  * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
@@ -403,8 +640,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
403
640
 
404
641
  /* literal Length */
405
642
  if (litLength>0xFFFF) {
406
- assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
407
- seqStorePtr->longLengthID = 1;
643
+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
644
+ seqStorePtr->longLengthType = ZSTD_llt_literalLength;
408
645
  seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
409
646
  }
410
647
  seqStorePtr->sequences[0].litLength = (U16)litLength;
@@ -414,8 +651,8 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
414
651
 
415
652
  /* match Length */
416
653
  if (mlBase>0xFFFF) {
417
- assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
418
- seqStorePtr->longLengthID = 2;
654
+ assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
655
+ seqStorePtr->longLengthType = ZSTD_llt_matchLength;
419
656
  seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
420
657
  }
421
658
  seqStorePtr->sequences[0].matchLength = (U16)mlBase;
@@ -432,9 +669,18 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
432
669
  if (MEM_isLittleEndian()) {
433
670
  if (MEM_64bits()) {
434
671
  # if defined(_MSC_VER) && defined(_WIN64)
435
- unsigned long r = 0;
436
- _BitScanForward64( &r, (U64)val );
437
- return (unsigned)(r>>3);
672
+ # if STATIC_BMI2
673
+ return _tzcnt_u64(val) >> 3;
674
+ # else
675
+ if (val != 0) {
676
+ unsigned long r;
677
+ _BitScanForward64(&r, (U64)val);
678
+ return (unsigned)(r >> 3);
679
+ } else {
680
+ /* Should not reach this code path */
681
+ __assume(0);
682
+ }
683
+ # endif
438
684
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
439
685
  return (__builtin_ctzll((U64)val) >> 3);
440
686
  # else
@@ -450,9 +696,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
450
696
  # endif
451
697
  } else { /* 32 bits */
452
698
  # if defined(_MSC_VER)
453
- unsigned long r=0;
454
- _BitScanForward( &r, (U32)val );
455
- return (unsigned)(r>>3);
699
+ if (val != 0) {
700
+ unsigned long r;
701
+ _BitScanForward(&r, (U32)val);
702
+ return (unsigned)(r >> 3);
703
+ } else {
704
+ /* Should not reach this code path */
705
+ __assume(0);
706
+ }
456
707
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
457
708
  return (__builtin_ctz((U32)val) >> 3);
458
709
  # else
@@ -466,9 +717,18 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
466
717
  } else { /* Big Endian CPU */
467
718
  if (MEM_64bits()) {
468
719
  # if defined(_MSC_VER) && defined(_WIN64)
469
- unsigned long r = 0;
470
- _BitScanReverse64( &r, val );
471
- return (unsigned)(r>>3);
720
+ # if STATIC_BMI2
721
+ return _lzcnt_u64(val) >> 3;
722
+ # else
723
+ if (val != 0) {
724
+ unsigned long r;
725
+ _BitScanReverse64(&r, (U64)val);
726
+ return (unsigned)(r >> 3);
727
+ } else {
728
+ /* Should not reach this code path */
729
+ __assume(0);
730
+ }
731
+ # endif
472
732
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
473
733
  return (__builtin_clzll(val) >> 3);
474
734
  # else
@@ -481,9 +741,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
481
741
  # endif
482
742
  } else { /* 32 bits */
483
743
  # if defined(_MSC_VER)
484
- unsigned long r = 0;
485
- _BitScanReverse( &r, (unsigned long)val );
486
- return (unsigned)(r>>3);
744
+ if (val != 0) {
745
+ unsigned long r;
746
+ _BitScanReverse(&r, (unsigned long)val);
747
+ return (unsigned)(r >> 3);
748
+ } else {
749
+ /* Should not reach this code path */
750
+ __assume(0);
751
+ }
487
752
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
488
753
  return (__builtin_clz((U32)val) >> 3);
489
754
  # else
@@ -564,7 +829,8 @@ static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
564
829
  static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
565
830
  static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
566
831
 
567
- MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
832
+ MEM_STATIC FORCE_INLINE_ATTR
833
+ size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
568
834
  {
569
835
  switch(mls)
570
836
  {
@@ -661,6 +927,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
661
927
  window->dictLimit = end;
662
928
  }
663
929
 
930
+ MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
931
+ {
932
+ return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
933
+ window.lowLimit == ZSTD_WINDOW_START_INDEX &&
934
+ (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
935
+ }
936
+
664
937
  /**
665
938
  * ZSTD_window_hasExtDict():
666
939
  * Returns non-zero if the window has a non-empty extDict.
@@ -680,20 +953,76 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
680
953
  return ZSTD_window_hasExtDict(ms->window) ?
681
954
  ZSTD_extDict :
682
955
  ms->dictMatchState != NULL ?
683
- ZSTD_dictMatchState :
956
+ (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
684
957
  ZSTD_noDict;
685
958
  }
686
959
 
960
+ /* Defining this macro to non-zero tells zstd to run the overflow correction
961
+ * code much more frequently. This is very inefficient, and should only be
962
+ * used for tests and fuzzers.
963
+ */
964
+ #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
965
+ # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
966
+ # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
967
+ # else
968
+ # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
969
+ # endif
970
+ #endif
971
+
972
+ /**
973
+ * ZSTD_window_canOverflowCorrect():
974
+ * Returns non-zero if the indices are large enough for overflow correction
975
+ * to work correctly without impacting compression ratio.
976
+ */
977
+ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
978
+ U32 cycleLog,
979
+ U32 maxDist,
980
+ U32 loadedDictEnd,
981
+ void const* src)
982
+ {
983
+ U32 const cycleSize = 1u << cycleLog;
984
+ U32 const curr = (U32)((BYTE const*)src - window.base);
985
+ U32 const minIndexToOverflowCorrect = cycleSize
986
+ + MAX(maxDist, cycleSize)
987
+ + ZSTD_WINDOW_START_INDEX;
988
+
989
+ /* Adjust the min index to backoff the overflow correction frequency,
990
+ * so we don't waste too much CPU in overflow correction. If this
991
+ * computation overflows we don't really care, we just need to make
992
+ * sure it is at least minIndexToOverflowCorrect.
993
+ */
994
+ U32 const adjustment = window.nbOverflowCorrections + 1;
995
+ U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
996
+ minIndexToOverflowCorrect);
997
+ U32 const indexLargeEnough = curr > adjustedIndex;
998
+
999
+ /* Only overflow correct early if the dictionary is invalidated already,
1000
+ * so we don't hurt compression ratio.
1001
+ */
1002
+ U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1003
+
1004
+ return indexLargeEnough && dictionaryInvalidated;
1005
+ }
1006
+
687
1007
  /**
688
1008
  * ZSTD_window_needOverflowCorrection():
689
1009
  * Returns non-zero if the indices are getting too large and need overflow
690
1010
  * protection.
691
1011
  */
692
1012
  MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1013
+ U32 cycleLog,
1014
+ U32 maxDist,
1015
+ U32 loadedDictEnd,
1016
+ void const* src,
693
1017
  void const* srcEnd)
694
1018
  {
695
- U32 const current = (U32)((BYTE const*)srcEnd - window.base);
696
- return current > ZSTD_CURRENT_MAX;
1019
+ U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1020
+ if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1021
+ if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1022
+ return 1;
1023
+ }
1024
+ }
1025
+ return curr > ZSTD_CURRENT_MAX;
697
1026
  }
698
1027
 
699
1028
  /**
@@ -704,7 +1033,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
704
1033
  *
705
1034
  * The least significant cycleLog bits of the indices must remain the same,
706
1035
  * which may be 0. Every index up to maxDist in the past must be valid.
707
- * NOTE: (maxDist & cycleMask) must be zero.
708
1036
  */
709
1037
  MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
710
1038
  U32 maxDist, void const* src)
@@ -728,19 +1056,51 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
728
1056
  * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
729
1057
  * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
730
1058
  */
731
- U32 const cycleMask = (1U << cycleLog) - 1;
732
- U32 const current = (U32)((BYTE const*)src - window->base);
733
- U32 const newCurrent = (current & cycleMask) + maxDist;
734
- U32 const correction = current - newCurrent;
735
- assert((maxDist & cycleMask) == 0);
736
- assert(current > newCurrent);
737
- /* Loose bound, should be around 1<<29 (see above) */
738
- assert(correction > 1<<28);
1059
+ U32 const cycleSize = 1u << cycleLog;
1060
+ U32 const cycleMask = cycleSize - 1;
1061
+ U32 const curr = (U32)((BYTE const*)src - window->base);
1062
+ U32 const currentCycle = curr & cycleMask;
1063
+ /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1064
+ U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1065
+ ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1066
+ : 0;
1067
+ U32 const newCurrent = currentCycle
1068
+ + currentCycleCorrection
1069
+ + MAX(maxDist, cycleSize);
1070
+ U32 const correction = curr - newCurrent;
1071
+ /* maxDist must be a power of two so that:
1072
+ * (newCurrent & cycleMask) == (curr & cycleMask)
1073
+ * This is required to not corrupt the chains / binary tree.
1074
+ */
1075
+ assert((maxDist & (maxDist - 1)) == 0);
1076
+ assert((curr & cycleMask) == (newCurrent & cycleMask));
1077
+ assert(curr > newCurrent);
1078
+ if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1079
+ /* Loose bound, should be around 1<<29 (see above) */
1080
+ assert(correction > 1<<28);
1081
+ }
739
1082
 
740
1083
  window->base += correction;
741
1084
  window->dictBase += correction;
742
- window->lowLimit -= correction;
743
- window->dictLimit -= correction;
1085
+ if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1086
+ window->lowLimit = ZSTD_WINDOW_START_INDEX;
1087
+ } else {
1088
+ window->lowLimit -= correction;
1089
+ }
1090
+ if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1091
+ window->dictLimit = ZSTD_WINDOW_START_INDEX;
1092
+ } else {
1093
+ window->dictLimit -= correction;
1094
+ }
1095
+
1096
+ /* Ensure we can still reference the full window. */
1097
+ assert(newCurrent >= maxDist);
1098
+ assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1099
+ /* Ensure that lowLimit and dictLimit didn't underflow. */
1100
+ assert(window->lowLimit <= newCurrent);
1101
+ assert(window->dictLimit <= newCurrent);
1102
+
1103
+ ++window->nbOverflowCorrections;
744
1104
 
745
1105
  DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
746
1106
  window->lowLimit);
@@ -844,6 +1204,17 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
844
1204
  } } }
845
1205
  }
846
1206
 
1207
+ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1208
+ ZSTD_memset(window, 0, sizeof(*window));
1209
+ window->base = (BYTE const*)" ";
1210
+ window->dictBase = (BYTE const*)" ";
1211
+ ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1212
+ window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
1213
+ window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
1214
+ window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
1215
+ window->nbOverflowCorrections = 0;
1216
+ }
1217
+
847
1218
  /**
848
1219
  * ZSTD_window_update():
849
1220
  * Updates the window by appending [src, src + srcSize) to the window.
@@ -852,13 +1223,18 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
852
1223
  * Returns non-zero if the segment is contiguous.
853
1224
  */
854
1225
  MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
855
- void const* src, size_t srcSize)
1226
+ void const* src, size_t srcSize,
1227
+ int forceNonContiguous)
856
1228
  {
857
1229
  BYTE const* const ip = (BYTE const*)src;
858
1230
  U32 contiguous = 1;
859
1231
  DEBUGLOG(5, "ZSTD_window_update");
1232
+ if (srcSize == 0)
1233
+ return contiguous;
1234
+ assert(window->base != NULL);
1235
+ assert(window->dictBase != NULL);
860
1236
  /* Check if blocks follow each other */
861
- if (src != window->nextSrc) {
1237
+ if (src != window->nextSrc || forceNonContiguous) {
862
1238
  /* not contiguous */
863
1239
  size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
864
1240
  DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
@@ -867,7 +1243,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
867
1243
  window->dictLimit = (U32)distanceFromBase;
868
1244
  window->dictBase = window->base;
869
1245
  window->base = ip - distanceFromBase;
870
- // ms->nextToUpdate = window->dictLimit;
1246
+ /* ms->nextToUpdate = window->dictLimit; */
871
1247
  if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
872
1248
  contiguous = 0;
873
1249
  }
@@ -883,12 +1259,35 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
883
1259
  return contiguous;
884
1260
  }
885
1261
 
886
- MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
1262
+ /**
1263
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1264
+ */
1265
+ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1266
+ {
1267
+ U32 const maxDistance = 1U << windowLog;
1268
+ U32 const lowestValid = ms->window.lowLimit;
1269
+ U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1270
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
1271
+ /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1272
+ * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1273
+ * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1274
+ */
1275
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1276
+ return matchLowest;
1277
+ }
1278
+
1279
+ /**
1280
+ * Returns the lowest allowed match index in the prefix.
1281
+ */
1282
+ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
887
1283
  {
888
1284
  U32 const maxDistance = 1U << windowLog;
889
- U32 const lowestValid = ms->window.lowLimit;
890
- U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
1285
+ U32 const lowestValid = ms->window.dictLimit;
1286
+ U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
891
1287
  U32 const isDictionary = (ms->loadedDictEnd != 0);
1288
+ /* When computing the lowest prefix index we need to take the dictionary into account to handle
1289
+ * the edge case where the dictionary and the source are contiguous in memory.
1290
+ */
892
1291
  U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
893
1292
  return matchLowest;
894
1293
  }
@@ -931,6 +1330,20 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
931
1330
  }
932
1331
  #endif
933
1332
 
1333
+ /* ===============================================================
1334
+ * Shared internal declarations
1335
+ * These prototypes may be called from sources not in lib/compress
1336
+ * =============================================================== */
1337
+
1338
+ /* ZSTD_loadCEntropy() :
1339
+ * dict : must point at beginning of a valid zstd dictionary.
1340
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1341
+ * assumptions : magic number supposed already checked
1342
+ * and dictSize >= 8 */
1343
+ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1344
+ const void* const dict, size_t dictSize);
1345
+
1346
+ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
934
1347
 
935
1348
  /* ==============================================================
936
1349
  * Private declarations
@@ -940,9 +1353,10 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
940
1353
  /* ZSTD_getCParamsFromCCtxParams() :
941
1354
  * cParams are built depending on compressionLevel, src size hints,
942
1355
  * LDM and manually set compression parameters.
1356
+ * Note: srcSizeHint == 0 means 0!
943
1357
  */
944
1358
  ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
945
- const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
1359
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
946
1360
 
947
1361
  /*! ZSTD_initCStream_internal() :
948
1362
  * Private use only. Init streaming operation.
@@ -999,5 +1413,13 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
999
1413
  */
1000
1414
  size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1001
1415
 
1416
+ /** ZSTD_cycleLog() :
1417
+ * condition for correct operation : hashLog > 1 */
1418
+ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1419
+
1420
+ /** ZSTD_CCtx_trace() :
1421
+ * Trace the end of a compression call.
1422
+ */
1423
+ void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1002
1424
 
1003
1425
  #endif /* ZSTD_COMPRESS_H */