zstd-ruby 1.3.8.0 → 1.4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +133 -61
  5. data/ext/zstdruby/libzstd/README.md +51 -18
  6. data/ext/zstdruby/libzstd/common/bitstream.h +38 -39
  7. data/ext/zstdruby/libzstd/common/compiler.h +41 -6
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +11 -31
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +6 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +13 -33
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -35
  16. data/ext/zstdruby/libzstd/common/huf.h +15 -33
  17. data/ext/zstdruby/libzstd/common/mem.h +75 -2
  18. data/ext/zstdruby/libzstd/common/pool.c +8 -4
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +52 -6
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +25 -37
  23. data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +203 -22
  27. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -42
  28. data/ext/zstdruby/libzstd/compress/hist.c +15 -35
  29. data/ext/zstdruby/libzstd/compress/hist.h +12 -32
  30. data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
  31. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1460 -1472
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +330 -65
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +419 -0
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +525 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +65 -43
  41. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.c +264 -159
  43. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +74 -42
  45. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +2 -2
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +33 -11
  47. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_opt.c +108 -125
  49. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +129 -93
  51. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +46 -28
  52. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -60
  53. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +14 -10
  54. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  55. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +471 -258
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +471 -346
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +25 -4
  59. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  60. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  62. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.c +220 -65
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.h +81 -7
  65. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +85 -56
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +43 -19
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +73 -35
  68. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  69. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  70. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +49 -15
  71. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +142 -117
  72. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +13 -8
  73. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +54 -25
  74. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +13 -8
  75. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +55 -25
  76. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +13 -8
  77. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +62 -29
  78. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +13 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +145 -109
  80. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +14 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +56 -26
  82. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +11 -6
  83. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +65 -28
  84. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +11 -6
  85. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  86. data/ext/zstdruby/libzstd/zstd.h +921 -597
  87. data/lib/zstd-ruby/version.rb +1 -1
  88. data/zstd-ruby.gemspec +2 -2
  89. metadata +19 -14
  90. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,8 @@
18
18
  /*-*************************************
19
19
  * Dependencies
20
20
  ***************************************/
21
- #include "zstd_internal.h"
21
+ #include "../common/zstd_internal.h"
22
+ #include "zstd_cwksp.h"
22
23
  #ifdef ZSTD_MULTITHREAD
23
24
  # include "zstdmt_compress.h"
24
25
  #endif
@@ -33,13 +34,13 @@ extern "C" {
33
34
  ***************************************/
34
35
  #define kSearchStrength 8
35
36
  #define HASH_READ_SIZE 8
36
- #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
37
+ #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37
38
  It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38
39
  It's not a big deal though : candidate will just be sorted again.
39
- Additionnally, candidate position 1 will be lost.
40
+ Additionally, candidate position 1 will be lost.
40
41
  But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
42
- Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
42
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
43
+ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43
44
 
44
45
 
45
46
  /*-*************************************
@@ -54,6 +55,14 @@ typedef struct ZSTD_prefixDict_s {
54
55
  ZSTD_dictContentType_e dictContentType;
55
56
  } ZSTD_prefixDict;
56
57
 
58
+ typedef struct {
59
+ void* dictBuffer;
60
+ void const* dict;
61
+ size_t dictSize;
62
+ ZSTD_dictContentType_e dictContentType;
63
+ ZSTD_CDict* cdict;
64
+ } ZSTD_localDict;
65
+
57
66
  typedef struct {
58
67
  U32 CTable[HUF_CTABLE_SIZE_U32(255)];
59
68
  HUF_repeat repeatMode;
@@ -107,6 +116,7 @@ typedef struct {
107
116
  U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
108
117
  ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
109
118
  const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
119
+ ZSTD_literalCompressionMode_e literalCompressionMode;
110
120
  } optState_t;
111
121
 
112
122
  typedef struct {
@@ -119,21 +129,26 @@ typedef struct {
119
129
  BYTE const* base; /* All regular indexes relative to this position */
120
130
  BYTE const* dictBase; /* extDict indexes relative to this position */
121
131
  U32 dictLimit; /* below that point, need extDict */
122
- U32 lowLimit; /* below that point, no more data */
132
+ U32 lowLimit; /* below that point, no more valid data */
123
133
  } ZSTD_window_t;
124
134
 
125
135
  typedef struct ZSTD_matchState_t ZSTD_matchState_t;
126
136
  struct ZSTD_matchState_t {
127
137
  ZSTD_window_t window; /* State for window round buffer management */
128
- U32 loadedDictEnd; /* index of end of dictionary */
138
+ U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
139
+ * When loadedDictEnd != 0, a dictionary is in use, and still valid.
140
+ * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
141
+ * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
142
+ * When dict referential is copied into active context (i.e. not attached),
143
+ * loadedDictEnd == dictSize, since referential starts from zero.
144
+ */
129
145
  U32 nextToUpdate; /* index from which to continue table update */
130
- U32 nextToUpdate3; /* index from which to continue table update */
131
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
146
+ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
132
147
  U32* hashTable;
133
148
  U32* hashTable3;
134
149
  U32* chainTable;
135
150
  optState_t opt; /* optimal parser state */
136
- const ZSTD_matchState_t * dictMatchState;
151
+ const ZSTD_matchState_t* dictMatchState;
137
152
  ZSTD_compressionParameters cParams;
138
153
  };
139
154
 
@@ -151,6 +166,7 @@ typedef struct {
151
166
  typedef struct {
152
167
  ZSTD_window_t window; /* State for the window round buffer management */
153
168
  ldmEntry_t* hashTable;
169
+ U32 loadedDictEnd;
154
170
  BYTE* bucketOffsets; /* Next position in bucket to insert entry */
155
171
  U64 hashPower; /* Used to compute the rolling hash.
156
172
  * Depends on ldmParams.minMatchLength */
@@ -178,6 +194,13 @@ typedef struct {
178
194
  size_t capacity; /* The capacity starting from `seq` pointer */
179
195
  } rawSeqStore_t;
180
196
 
197
+ typedef struct {
198
+ int collectSequences;
199
+ ZSTD_Sequence* seqStart;
200
+ size_t seqIndex;
201
+ size_t maxSequences;
202
+ } SeqCollector;
203
+
181
204
  struct ZSTD_CCtx_params_s {
182
205
  ZSTD_format_e format;
183
206
  ZSTD_compressionParameters cParams;
@@ -186,8 +209,15 @@ struct ZSTD_CCtx_params_s {
186
209
  int compressionLevel;
187
210
  int forceWindow; /* force back-references to respect limit of
188
211
  * 1<<wLog, even for dictionary */
212
+ size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
213
+ * No target when targetCBlockSize == 0.
214
+ * There is no guarantee on compressed block size */
215
+ int srcSizeHint; /* User's best guess of source size.
216
+ * Hint is not valid when srcSizeHint == 0.
217
+ * There is no guarantee that hint is close to actual source size */
189
218
 
190
219
  ZSTD_dictAttachPref_e attachDictPref;
220
+ ZSTD_literalCompressionMode_e literalCompressionMode;
191
221
 
192
222
  /* Multithreading: used to pass parameters to mtctx */
193
223
  int nbWorkers;
@@ -210,9 +240,7 @@ struct ZSTD_CCtx_s {
210
240
  ZSTD_CCtx_params appliedParams;
211
241
  U32 dictID;
212
242
 
213
- int workSpaceOversizedDuration;
214
- void* workSpace;
215
- size_t workSpaceSize;
243
+ ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
216
244
  size_t blockSize;
217
245
  unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
218
246
  unsigned long long consumedSrcSize;
@@ -220,6 +248,9 @@ struct ZSTD_CCtx_s {
220
248
  XXH64_state_t xxhState;
221
249
  ZSTD_customMem customMem;
222
250
  size_t staticSize;
251
+ SeqCollector seqCollector;
252
+ int isFirstBlock;
253
+ int initialized;
223
254
 
224
255
  seqStore_t seqStore; /* sequences storage ptrs */
225
256
  ldmState_t ldmState; /* long distance matching state */
@@ -243,7 +274,7 @@ struct ZSTD_CCtx_s {
243
274
  U32 frameEnded;
244
275
 
245
276
  /* Dictionary */
246
- ZSTD_CDict* cdictLocal;
277
+ ZSTD_localDict localDict;
247
278
  const ZSTD_CDict* cdict;
248
279
  ZSTD_prefixDict prefixDict; /* single-usage dictionary */
249
280
 
@@ -295,26 +326,145 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
295
326
  return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
296
327
  }
297
328
 
329
+ typedef struct repcodes_s {
330
+ U32 rep[3];
331
+ } repcodes_t;
332
+
333
+ MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
334
+ {
335
+ repcodes_t newReps;
336
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
337
+ newReps.rep[2] = rep[1];
338
+ newReps.rep[1] = rep[0];
339
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
340
+ } else { /* repcode */
341
+ U32 const repCode = offset + ll0;
342
+ if (repCode > 0) { /* note : if repCode==0, no change */
343
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
344
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
345
+ newReps.rep[1] = rep[0];
346
+ newReps.rep[0] = currentOffset;
347
+ } else { /* repCode == 0 */
348
+ memcpy(&newReps, rep, sizeof(newReps));
349
+ }
350
+ }
351
+ return newReps;
352
+ }
353
+
354
+ /* ZSTD_cParam_withinBounds:
355
+ * @return 1 if value is within cParam bounds,
356
+ * 0 otherwise */
357
+ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
358
+ {
359
+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
360
+ if (ZSTD_isError(bounds.error)) return 0;
361
+ if (value < bounds.lowerBound) return 0;
362
+ if (value > bounds.upperBound) return 0;
363
+ return 1;
364
+ }
365
+
366
+ /* ZSTD_noCompressBlock() :
367
+ * Writes uncompressed block to dst buffer from given src.
368
+ * Returns the size of the block */
369
+ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
370
+ {
371
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
372
+ RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
373
+ dstSize_tooSmall, "dst buf too small for uncompressed block");
374
+ MEM_writeLE24(dst, cBlockHeader24);
375
+ memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
376
+ return ZSTD_blockHeaderSize + srcSize;
377
+ }
378
+
379
+ MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
380
+ {
381
+ BYTE* const op = (BYTE*)dst;
382
+ U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
383
+ RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
384
+ MEM_writeLE24(op, cBlockHeader);
385
+ op[3] = src;
386
+ return 4;
387
+ }
388
+
389
+
390
+ /* ZSTD_minGain() :
391
+ * minimum compression required
392
+ * to generate a compress block or a compressed literals section.
393
+ * note : use same formula for both situations */
394
+ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
395
+ {
396
+ U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
397
+ ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
398
+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
399
+ return (srcSize >> minlog) + 2;
400
+ }
401
+
402
+ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
403
+ {
404
+ switch (cctxParams->literalCompressionMode) {
405
+ case ZSTD_lcm_huffman:
406
+ return 0;
407
+ case ZSTD_lcm_uncompressed:
408
+ return 1;
409
+ default:
410
+ assert(0 /* impossible: pre-validated */);
411
+ /* fall-through */
412
+ case ZSTD_lcm_auto:
413
+ return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
414
+ }
415
+ }
416
+
417
+ /*! ZSTD_safecopyLiterals() :
418
+ * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
419
+ * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
420
+ * large copies.
421
+ */
422
+ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
423
+ assert(iend > ilimit_w);
424
+ if (ip <= ilimit_w) {
425
+ ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
426
+ op += ilimit_w - ip;
427
+ ip = ilimit_w;
428
+ }
429
+ while (ip < iend) *op++ = *ip++;
430
+ }
431
+
298
432
  /*! ZSTD_storeSeq() :
299
- * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
300
- * `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
433
+ * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
434
+ * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
301
435
  * `mlBase` : matchLength - MINMATCH
436
+ * Allowed to overread literals up to litLimit.
302
437
  */
303
- MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
438
+ HINT_INLINE UNUSED_ATTR
439
+ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
304
440
  {
441
+ BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
442
+ BYTE const* const litEnd = literals + litLength;
305
443
  #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
306
444
  static const BYTE* g_start = NULL;
307
445
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
308
446
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
309
447
  DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
310
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
448
+ pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
311
449
  }
312
450
  #endif
313
451
  assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
314
452
  /* copy Literals */
315
453
  assert(seqStorePtr->maxNbLit <= 128 KB);
316
454
  assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
317
- ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
455
+ assert(literals + litLength <= litLimit);
456
+ if (litEnd <= litLimit_w) {
457
+ /* Common case we can use wildcopy.
458
+ * First copy 16 bytes, because literals are likely short.
459
+ */
460
+ assert(WILDCOPY_OVERLENGTH >= 16);
461
+ ZSTD_copy16(seqStorePtr->lit, literals);
462
+ if (litLength > 16) {
463
+ ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
464
+ }
465
+ } else {
466
+ ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
467
+ }
318
468
  seqStorePtr->lit += litLength;
319
469
 
320
470
  /* literal Length */
@@ -326,7 +476,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
326
476
  seqStorePtr->sequences[0].litLength = (U16)litLength;
327
477
 
328
478
  /* match offset */
329
- seqStorePtr->sequences[0].offset = offsetCode + 1;
479
+ seqStorePtr->sequences[0].offset = offCode + 1;
330
480
 
331
481
  /* match Length */
332
482
  if (mlBase>0xFFFF) {
@@ -349,8 +499,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
349
499
  if (MEM_64bits()) {
350
500
  # if defined(_MSC_VER) && defined(_WIN64)
351
501
  unsigned long r = 0;
352
- _BitScanForward64( &r, (U64)val );
353
- return (unsigned)(r>>3);
502
+ return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
354
503
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
355
504
  return (__builtin_ctzll((U64)val) >> 3);
356
505
  # else
@@ -367,8 +516,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
367
516
  } else { /* 32 bits */
368
517
  # if defined(_MSC_VER)
369
518
  unsigned long r=0;
370
- _BitScanForward( &r, (U32)val );
371
- return (unsigned)(r>>3);
519
+ return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
372
520
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
373
521
  return (__builtin_ctz((U32)val) >> 3);
374
522
  # else
@@ -383,8 +531,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
383
531
  if (MEM_64bits()) {
384
532
  # if defined(_MSC_VER) && defined(_WIN64)
385
533
  unsigned long r = 0;
386
- _BitScanReverse64( &r, val );
387
- return (unsigned)(r>>3);
534
+ return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
388
535
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
389
536
  return (__builtin_clzll(val) >> 3);
390
537
  # else
@@ -398,8 +545,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
398
545
  } else { /* 32 bits */
399
546
  # if defined(_MSC_VER)
400
547
  unsigned long r = 0;
401
- _BitScanReverse( &r, (unsigned long)val );
402
- return (unsigned)(r>>3);
548
+ return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
403
549
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
404
550
  return (__builtin_clz((U32)val) >> 3);
405
551
  # else
@@ -554,6 +700,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
554
700
  /*-*************************************
555
701
  * Round buffer management
556
702
  ***************************************/
703
+ #if (ZSTD_WINDOWLOG_MAX_64 > 31)
704
+ # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
705
+ #endif
557
706
  /* Max current allowed */
558
707
  #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
559
708
  /* Maximum chunk size before overflow correction needs to be called again */
@@ -643,7 +792,10 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
643
792
  */
644
793
  U32 const cycleMask = (1U << cycleLog) - 1;
645
794
  U32 const current = (U32)((BYTE const*)src - window->base);
646
- U32 const newCurrent = (current & cycleMask) + maxDist;
795
+ U32 const currentCycle0 = current & cycleMask;
796
+ /* Exclude zero so that newCurrent - maxDist >= 1. */
797
+ U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
798
+ U32 const newCurrent = currentCycle1 + maxDist;
647
799
  U32 const correction = current - newCurrent;
648
800
  assert((maxDist & cycleMask) == 0);
649
801
  assert(current > newCurrent);
@@ -652,8 +804,17 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
652
804
 
653
805
  window->base += correction;
654
806
  window->dictBase += correction;
655
- window->lowLimit -= correction;
656
- window->dictLimit -= correction;
807
+ if (window->lowLimit <= correction) window->lowLimit = 1;
808
+ else window->lowLimit -= correction;
809
+ if (window->dictLimit <= correction) window->dictLimit = 1;
810
+ else window->dictLimit -= correction;
811
+
812
+ /* Ensure we can still reference the full window. */
813
+ assert(newCurrent >= maxDist);
814
+ assert(newCurrent - maxDist >= 1);
815
+ /* Ensure that lowLimit and dictLimit didn't underflow. */
816
+ assert(window->lowLimit <= newCurrent);
817
+ assert(window->dictLimit <= newCurrent);
657
818
 
658
819
  DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
659
820
  window->lowLimit);
@@ -665,31 +826,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
665
826
  * Updates lowLimit so that:
666
827
  * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
667
828
  *
668
- * This allows a simple check that index >= lowLimit to see if index is valid.
669
- * This must be called before a block compression call, with srcEnd as the block
670
- * source end.
829
+ * It ensures index is valid as long as index >= lowLimit.
830
+ * This must be called before a block compression call.
831
+ *
832
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
833
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
834
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
671
835
  *
672
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
673
- * This is because dictionaries are allowed to be referenced as long as the last
674
- * byte of the dictionary is in the window, but once they are out of range,
675
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
676
- * loadedDictEnd == 0.
836
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
837
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
838
+ * This is because dictionaries are allowed to be referenced fully
839
+ * as long as the last byte of the dictionary is in the window.
840
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
677
841
  *
678
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
679
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
680
- * is below them. forceWindow and dictMatchState are therefore incompatible.
842
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
843
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
844
+ * and the dictionary is below them.
845
+ * forceWindow and dictMatchState are therefore incompatible.
681
846
  */
682
847
  MEM_STATIC void
683
848
  ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
684
- void const* srcEnd,
685
- U32 maxDist,
686
- U32* loadedDictEndPtr,
849
+ const void* blockEnd,
850
+ U32 maxDist,
851
+ U32* loadedDictEndPtr,
687
852
  const ZSTD_matchState_t** dictMatchStatePtr)
688
853
  {
689
- U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
690
- U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
691
- DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
692
- (unsigned)blockEndIdx, (unsigned)maxDist);
854
+ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
855
+ U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
856
+ DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
857
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
858
+
859
+ /* - When there is no dictionary : loadedDictEnd == 0.
860
+ In which case, the test (blockEndIdx > maxDist) is merely to avoid
861
+ overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
862
+ - When there is a standard dictionary :
863
+ Index referential is copied from the dictionary,
864
+ which means it starts from 0.
865
+ In which case, loadedDictEnd == dictSize,
866
+ and it makes sense to compare `blockEndIdx > maxDist + dictSize`
867
+ since `blockEndIdx` also starts from zero.
868
+ - When there is an attached dictionary :
869
+ loadedDictEnd is expressed within the referential of the context,
870
+ so it can be directly compared against blockEndIdx.
871
+ */
693
872
  if (blockEndIdx > maxDist + loadedDictEnd) {
694
873
  U32 const newLowLimit = blockEndIdx - maxDist;
695
874
  if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -698,11 +877,54 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
698
877
  (unsigned)window->dictLimit, (unsigned)window->lowLimit);
699
878
  window->dictLimit = window->lowLimit;
700
879
  }
701
- if (loadedDictEndPtr)
880
+ /* On reaching window size, dictionaries are invalidated */
881
+ if (loadedDictEndPtr) *loadedDictEndPtr = 0;
882
+ if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
883
+ }
884
+ }
885
+
886
+ /* Similar to ZSTD_window_enforceMaxDist(),
887
+ * but only invalidates dictionary
888
+ * when input progresses beyond window size.
889
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
890
+ * loadedDictEnd uses same referential as window->base
891
+ * maxDist is the window size */
892
+ MEM_STATIC void
893
+ ZSTD_checkDictValidity(const ZSTD_window_t* window,
894
+ const void* blockEnd,
895
+ U32 maxDist,
896
+ U32* loadedDictEndPtr,
897
+ const ZSTD_matchState_t** dictMatchStatePtr)
898
+ {
899
+ assert(loadedDictEndPtr != NULL);
900
+ assert(dictMatchStatePtr != NULL);
901
+ { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
902
+ U32 const loadedDictEnd = *loadedDictEndPtr;
903
+ DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
904
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
905
+ assert(blockEndIdx >= loadedDictEnd);
906
+
907
+ if (blockEndIdx > loadedDictEnd + maxDist) {
908
+ /* On reaching window size, dictionaries are invalidated.
909
+ * For simplification, if window size is reached anywhere within next block,
910
+ * the dictionary is invalidated for the full block.
911
+ */
912
+ DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
702
913
  *loadedDictEndPtr = 0;
703
- if (dictMatchStatePtr)
704
914
  *dictMatchStatePtr = NULL;
705
- }
915
+ } else {
916
+ if (*loadedDictEndPtr != 0) {
917
+ DEBUGLOG(6, "dictionary considered valid for current block");
918
+ } } }
919
+ }
920
+
921
+ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
922
+ memset(window, 0, sizeof(*window));
923
+ window->base = (BYTE const*)"";
924
+ window->dictBase = (BYTE const*)"";
925
+ window->dictLimit = 1; /* start from 1, so that 1st position is valid */
926
+ window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
927
+ window->nextSrc = window->base + 1; /* see issue #1241 */
706
928
  }
707
929
 
708
930
  /**
@@ -718,6 +940,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
718
940
  BYTE const* const ip = (BYTE const*)src;
719
941
  U32 contiguous = 1;
720
942
  DEBUGLOG(5, "ZSTD_window_update");
943
+ if (srcSize == 0)
944
+ return contiguous;
945
+ assert(window->base != NULL);
946
+ assert(window->dictBase != NULL);
721
947
  /* Check if blocks follow each other */
722
948
  if (src != window->nextSrc) {
723
949
  /* not contiguous */
@@ -728,7 +954,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
728
954
  window->dictLimit = (U32)distanceFromBase;
729
955
  window->dictBase = window->base;
730
956
  window->base = ip - distanceFromBase;
731
- // ms->nextToUpdate = window->dictLimit;
957
+ /* ms->nextToUpdate = window->dictLimit; */
732
958
  if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
733
959
  contiguous = 0;
734
960
  }
@@ -744,6 +970,33 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
744
970
  return contiguous;
745
971
  }
746
972
 
973
+ /**
974
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
975
+ */
976
+ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
977
+ {
978
+ U32 const maxDistance = 1U << windowLog;
979
+ U32 const lowestValid = ms->window.lowLimit;
980
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
981
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
982
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
983
+ return matchLowest;
984
+ }
985
+
986
+ /**
987
+ * Returns the lowest allowed match index in the prefix.
988
+ */
989
+ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
990
+ {
991
+ U32 const maxDistance = 1U << windowLog;
992
+ U32 const lowestValid = ms->window.dictLimit;
993
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
994
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
995
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
996
+ return matchLowest;
997
+ }
998
+
999
+
747
1000
 
748
1001
  /* debug functions */
749
1002
  #if (DEBUGLEVEL>=2)
@@ -781,6 +1034,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
781
1034
  }
782
1035
  #endif
783
1036
 
1037
+ /* ===============================================================
1038
+ * Shared internal declarations
1039
+ * These prototypes may be called from sources not in lib/compress
1040
+ * =============================================================== */
1041
+
1042
+ /* ZSTD_loadCEntropy() :
1043
+ * dict : must point at beginning of a valid zstd dictionary.
1044
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1045
+ * assumptions : magic number supposed already checked
1046
+ * and dictSize >= 8 */
1047
+ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1048
+ short* offcodeNCount, unsigned* offcodeMaxValue,
1049
+ const void* const dict, size_t dictSize);
1050
+
1051
+ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
784
1052
 
785
1053
  /* ==============================================================
786
1054
  * Private declarations
@@ -790,6 +1058,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
790
1058
  /* ZSTD_getCParamsFromCCtxParams() :
791
1059
  * cParams are built depending on compressionLevel, src size hints,
792
1060
  * LDM and manually set compression parameters.
1061
+ * Note: srcSizeHint == 0 means 0!
793
1062
  */
794
1063
  ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
795
1064
  const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
@@ -802,17 +1071,10 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
802
1071
  size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
803
1072
  const void* dict, size_t dictSize,
804
1073
  const ZSTD_CDict* cdict,
805
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
1074
+ const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
806
1075
 
807
1076
  void ZSTD_resetSeqStore(seqStore_t* ssPtr);
808
1077
 
809
- /*! ZSTD_compressStream_generic() :
810
- * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
811
- size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
812
- ZSTD_outBuffer* output,
813
- ZSTD_inBuffer* input,
814
- ZSTD_EndDirective const flushMode);
815
-
816
1078
  /*! ZSTD_getCParamsFromCDict() :
817
1079
  * as the name implies */
818
1080
  ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
@@ -824,7 +1086,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
824
1086
  ZSTD_dictContentType_e dictContentType,
825
1087
  ZSTD_dictTableLoadMethod_e dtlm,
826
1088
  const ZSTD_CDict* cdict,
827
- ZSTD_CCtx_params params,
1089
+ const ZSTD_CCtx_params* params,
828
1090
  unsigned long long pledgedSrcSize);
829
1091
 
830
1092
  /* ZSTD_compress_advanced_internal() :
@@ -833,13 +1095,13 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
833
1095
  void* dst, size_t dstCapacity,
834
1096
  const void* src, size_t srcSize,
835
1097
  const void* dict,size_t dictSize,
836
- ZSTD_CCtx_params params);
1098
+ const ZSTD_CCtx_params* params);
837
1099
 
838
1100
 
839
1101
  /* ZSTD_writeLastEmptyBlock() :
840
1102
  * output an empty Block with end-of-frame mark to complete a frame
841
1103
  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
842
- * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
1104
+ * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
843
1105
  */
844
1106
  size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
845
1107
 
@@ -856,5 +1118,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
856
1118
  */
857
1119
  size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
858
1120
 
1121
+ /** ZSTD_cycleLog() :
1122
+ * condition for correct operation : hashLog > 1 */
1123
+ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
859
1124
 
860
1125
  #endif /* ZSTD_COMPRESS_H */