zstdlib 0.2.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +30 -1
  3. data/README.md +2 -2
  4. data/Rakefile +1 -1
  5. data/ext/zstdlib/extconf.rb +3 -3
  6. data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
  7. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/bitstream.h +38 -39
  8. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/compiler.h +40 -5
  9. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/cpu.h +1 -1
  10. data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
  11. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/debug.h +11 -31
  12. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
  13. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.c +2 -1
  14. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/error_private.h +6 -2
  15. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse.h +12 -32
  16. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -35
  17. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/huf.h +15 -33
  18. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/mem.h +75 -2
  19. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.c +8 -4
  20. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/pool.h +2 -2
  21. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.c +50 -4
  22. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/threading.h +36 -4
  23. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.c +23 -35
  24. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
  25. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
  26. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
  27. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/common/zstd_internal.h +154 -26
  28. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
  29. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.c +15 -35
  30. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/hist.h +12 -32
  31. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
  32. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress.c +1191 -1330
  33. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +317 -55
  34. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.c +158 -0
  35. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_literals.h +29 -0
  36. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.c +419 -0
  37. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_sequences.h +54 -0
  38. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
  39. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
  40. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_cwksp.h +525 -0
  41. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +65 -43
  42. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
  43. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.c +92 -66
  44. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
  45. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.c +74 -42
  46. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
  47. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.c +32 -10
  48. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
  49. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.c +81 -114
  50. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
  51. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +95 -51
  52. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +3 -2
  53. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -60
  54. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
  55. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
  56. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +292 -172
  57. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +459 -338
  58. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
  59. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
  60. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/lib/zstd.h +265 -88
  61. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzclose.c +1 -1
  62. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
  63. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
  64. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzlib.c +9 -9
  65. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzread.c +16 -8
  66. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/gzwrite.c +8 -8
  67. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +16 -12
  68. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  69. metadata +69 -62
  70. data/ext/zstdlib/zstd-1.4.0/lib/common/debug.c +0 -44
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -18,7 +18,8 @@
18
18
  /*-*************************************
19
19
  * Dependencies
20
20
  ***************************************/
21
- #include "zstd_internal.h"
21
+ #include "../common/zstd_internal.h"
22
+ #include "zstd_cwksp.h"
22
23
  #ifdef ZSTD_MULTITHREAD
23
24
  # include "zstdmt_compress.h"
24
25
  #endif
@@ -33,13 +34,13 @@ extern "C" {
33
34
  ***************************************/
34
35
  #define kSearchStrength 8
35
36
  #define HASH_READ_SIZE 8
36
- #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
37
+ #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37
38
  It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38
39
  It's not a big deal though : candidate will just be sorted again.
39
40
  Additionally, candidate position 1 will be lost.
40
41
  But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41
- The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
42
- Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
42
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
43
+ This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43
44
 
44
45
 
45
46
  /*-*************************************
@@ -128,21 +129,26 @@ typedef struct {
128
129
  BYTE const* base; /* All regular indexes relative to this position */
129
130
  BYTE const* dictBase; /* extDict indexes relative to this position */
130
131
  U32 dictLimit; /* below that point, need extDict */
131
- U32 lowLimit; /* below that point, no more data */
132
+ U32 lowLimit; /* below that point, no more valid data */
132
133
  } ZSTD_window_t;
133
134
 
134
135
  typedef struct ZSTD_matchState_t ZSTD_matchState_t;
135
136
  struct ZSTD_matchState_t {
136
137
  ZSTD_window_t window; /* State for window round buffer management */
137
- U32 loadedDictEnd; /* index of end of dictionary */
138
+ U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
139
+ * When loadedDictEnd != 0, a dictionary is in use, and still valid.
140
+ * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
141
+ * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
142
+ * When dict referential is copied into active context (i.e. not attached),
143
+ * loadedDictEnd == dictSize, since referential starts from zero.
144
+ */
138
145
  U32 nextToUpdate; /* index from which to continue table update */
139
- U32 nextToUpdate3; /* index from which to continue table update */
140
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
146
+ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
141
147
  U32* hashTable;
142
148
  U32* hashTable3;
143
149
  U32* chainTable;
144
150
  optState_t opt; /* optimal parser state */
145
- const ZSTD_matchState_t * dictMatchState;
151
+ const ZSTD_matchState_t* dictMatchState;
146
152
  ZSTD_compressionParameters cParams;
147
153
  };
148
154
 
@@ -160,6 +166,7 @@ typedef struct {
160
166
  typedef struct {
161
167
  ZSTD_window_t window; /* State for the window round buffer management */
162
168
  ldmEntry_t* hashTable;
169
+ U32 loadedDictEnd;
163
170
  BYTE* bucketOffsets; /* Next position in bucket to insert entry */
164
171
  U64 hashPower; /* Used to compute the rolling hash.
165
172
  * Depends on ldmParams.minMatchLength */
@@ -187,6 +194,13 @@ typedef struct {
187
194
  size_t capacity; /* The capacity starting from `seq` pointer */
188
195
  } rawSeqStore_t;
189
196
 
197
+ typedef struct {
198
+ int collectSequences;
199
+ ZSTD_Sequence* seqStart;
200
+ size_t seqIndex;
201
+ size_t maxSequences;
202
+ } SeqCollector;
203
+
190
204
  struct ZSTD_CCtx_params_s {
191
205
  ZSTD_format_e format;
192
206
  ZSTD_compressionParameters cParams;
@@ -195,6 +209,12 @@ struct ZSTD_CCtx_params_s {
195
209
  int compressionLevel;
196
210
  int forceWindow; /* force back-references to respect limit of
197
211
  * 1<<wLog, even for dictionary */
212
+ size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
213
+ * No target when targetCBlockSize == 0.
214
+ * There is no guarantee on compressed block size */
215
+ int srcSizeHint; /* User's best guess of source size.
216
+ * Hint is not valid when srcSizeHint == 0.
217
+ * There is no guarantee that hint is close to actual source size */
198
218
 
199
219
  ZSTD_dictAttachPref_e attachDictPref;
200
220
  ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -220,9 +240,7 @@ struct ZSTD_CCtx_s {
220
240
  ZSTD_CCtx_params appliedParams;
221
241
  U32 dictID;
222
242
 
223
- int workSpaceOversizedDuration;
224
- void* workSpace;
225
- size_t workSpaceSize;
243
+ ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
226
244
  size_t blockSize;
227
245
  unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
228
246
  unsigned long long consumedSrcSize;
@@ -230,6 +248,9 @@ struct ZSTD_CCtx_s {
230
248
  XXH64_state_t xxhState;
231
249
  ZSTD_customMem customMem;
232
250
  size_t staticSize;
251
+ SeqCollector seqCollector;
252
+ int isFirstBlock;
253
+ int initialized;
233
254
 
234
255
  seqStore_t seqStore; /* sequences storage ptrs */
235
256
  ldmState_t ldmState; /* long distance matching state */
@@ -305,26 +326,145 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
305
326
  return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
306
327
  }
307
328
 
329
+ typedef struct repcodes_s {
330
+ U32 rep[3];
331
+ } repcodes_t;
332
+
333
+ MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
334
+ {
335
+ repcodes_t newReps;
336
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
337
+ newReps.rep[2] = rep[1];
338
+ newReps.rep[1] = rep[0];
339
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
340
+ } else { /* repcode */
341
+ U32 const repCode = offset + ll0;
342
+ if (repCode > 0) { /* note : if repCode==0, no change */
343
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
344
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
345
+ newReps.rep[1] = rep[0];
346
+ newReps.rep[0] = currentOffset;
347
+ } else { /* repCode == 0 */
348
+ memcpy(&newReps, rep, sizeof(newReps));
349
+ }
350
+ }
351
+ return newReps;
352
+ }
353
+
354
+ /* ZSTD_cParam_withinBounds:
355
+ * @return 1 if value is within cParam bounds,
356
+ * 0 otherwise */
357
+ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
358
+ {
359
+ ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
360
+ if (ZSTD_isError(bounds.error)) return 0;
361
+ if (value < bounds.lowerBound) return 0;
362
+ if (value > bounds.upperBound) return 0;
363
+ return 1;
364
+ }
365
+
366
+ /* ZSTD_noCompressBlock() :
367
+ * Writes uncompressed block to dst buffer from given src.
368
+ * Returns the size of the block */
369
+ MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
370
+ {
371
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
372
+ RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
373
+ dstSize_tooSmall, "dst buf too small for uncompressed block");
374
+ MEM_writeLE24(dst, cBlockHeader24);
375
+ memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
376
+ return ZSTD_blockHeaderSize + srcSize;
377
+ }
378
+
379
+ MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
380
+ {
381
+ BYTE* const op = (BYTE*)dst;
382
+ U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
383
+ RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
384
+ MEM_writeLE24(op, cBlockHeader);
385
+ op[3] = src;
386
+ return 4;
387
+ }
388
+
389
+
390
+ /* ZSTD_minGain() :
391
+ * minimum compression required
392
+ * to generate a compress block or a compressed literals section.
393
+ * note : use same formula for both situations */
394
+ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
395
+ {
396
+ U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
397
+ ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
398
+ assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
399
+ return (srcSize >> minlog) + 2;
400
+ }
401
+
402
+ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
403
+ {
404
+ switch (cctxParams->literalCompressionMode) {
405
+ case ZSTD_lcm_huffman:
406
+ return 0;
407
+ case ZSTD_lcm_uncompressed:
408
+ return 1;
409
+ default:
410
+ assert(0 /* impossible: pre-validated */);
411
+ /* fall-through */
412
+ case ZSTD_lcm_auto:
413
+ return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
414
+ }
415
+ }
416
+
417
+ /*! ZSTD_safecopyLiterals() :
418
+ * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
419
+ * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
420
+ * large copies.
421
+ */
422
+ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
423
+ assert(iend > ilimit_w);
424
+ if (ip <= ilimit_w) {
425
+ ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
426
+ op += ilimit_w - ip;
427
+ ip = ilimit_w;
428
+ }
429
+ while (ip < iend) *op++ = *ip++;
430
+ }
431
+
308
432
  /*! ZSTD_storeSeq() :
309
- * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
310
- * `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
433
+ * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
434
+ * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
311
435
  * `mlBase` : matchLength - MINMATCH
436
+ * Allowed to overread literals up to litLimit.
312
437
  */
313
- MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
438
+ HINT_INLINE UNUSED_ATTR
439
+ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
314
440
  {
441
+ BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
442
+ BYTE const* const litEnd = literals + litLength;
315
443
  #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
316
444
  static const BYTE* g_start = NULL;
317
445
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
318
446
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
319
447
  DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
320
- pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
448
+ pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
321
449
  }
322
450
  #endif
323
451
  assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
324
452
  /* copy Literals */
325
453
  assert(seqStorePtr->maxNbLit <= 128 KB);
326
454
  assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
327
- ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
455
+ assert(literals + litLength <= litLimit);
456
+ if (litEnd <= litLimit_w) {
457
+ /* Common case we can use wildcopy.
458
+ * First copy 16 bytes, because literals are likely short.
459
+ */
460
+ assert(WILDCOPY_OVERLENGTH >= 16);
461
+ ZSTD_copy16(seqStorePtr->lit, literals);
462
+ if (litLength > 16) {
463
+ ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
464
+ }
465
+ } else {
466
+ ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
467
+ }
328
468
  seqStorePtr->lit += litLength;
329
469
 
330
470
  /* literal Length */
@@ -336,7 +476,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
336
476
  seqStorePtr->sequences[0].litLength = (U16)litLength;
337
477
 
338
478
  /* match offset */
339
- seqStorePtr->sequences[0].offset = offsetCode + 1;
479
+ seqStorePtr->sequences[0].offset = offCode + 1;
340
480
 
341
481
  /* match Length */
342
482
  if (mlBase>0xFFFF) {
@@ -359,8 +499,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
359
499
  if (MEM_64bits()) {
360
500
  # if defined(_MSC_VER) && defined(_WIN64)
361
501
  unsigned long r = 0;
362
- _BitScanForward64( &r, (U64)val );
363
- return (unsigned)(r>>3);
502
+ return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
364
503
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
365
504
  return (__builtin_ctzll((U64)val) >> 3);
366
505
  # else
@@ -377,8 +516,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
377
516
  } else { /* 32 bits */
378
517
  # if defined(_MSC_VER)
379
518
  unsigned long r=0;
380
- _BitScanForward( &r, (U32)val );
381
- return (unsigned)(r>>3);
519
+ return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0;
382
520
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
383
521
  return (__builtin_ctz((U32)val) >> 3);
384
522
  # else
@@ -393,8 +531,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
393
531
  if (MEM_64bits()) {
394
532
  # if defined(_MSC_VER) && defined(_WIN64)
395
533
  unsigned long r = 0;
396
- _BitScanReverse64( &r, val );
397
- return (unsigned)(r>>3);
534
+ return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
398
535
  # elif defined(__GNUC__) && (__GNUC__ >= 4)
399
536
  return (__builtin_clzll(val) >> 3);
400
537
  # else
@@ -408,8 +545,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
408
545
  } else { /* 32 bits */
409
546
  # if defined(_MSC_VER)
410
547
  unsigned long r = 0;
411
- _BitScanReverse( &r, (unsigned long)val );
412
- return (unsigned)(r>>3);
548
+ return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0;
413
549
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
414
550
  return (__builtin_clz((U32)val) >> 3);
415
551
  # else
@@ -564,6 +700,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
564
700
  /*-*************************************
565
701
  * Round buffer management
566
702
  ***************************************/
703
+ #if (ZSTD_WINDOWLOG_MAX_64 > 31)
704
+ # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
705
+ #endif
567
706
  /* Max current allowed */
568
707
  #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
569
708
  /* Maximum chunk size before overflow correction needs to be called again */
@@ -653,7 +792,10 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
653
792
  */
654
793
  U32 const cycleMask = (1U << cycleLog) - 1;
655
794
  U32 const current = (U32)((BYTE const*)src - window->base);
656
- U32 const newCurrent = (current & cycleMask) + maxDist;
795
+ U32 const currentCycle0 = current & cycleMask;
796
+ /* Exclude zero so that newCurrent - maxDist >= 1. */
797
+ U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
798
+ U32 const newCurrent = currentCycle1 + maxDist;
657
799
  U32 const correction = current - newCurrent;
658
800
  assert((maxDist & cycleMask) == 0);
659
801
  assert(current > newCurrent);
@@ -662,8 +804,17 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
662
804
 
663
805
  window->base += correction;
664
806
  window->dictBase += correction;
665
- window->lowLimit -= correction;
666
- window->dictLimit -= correction;
807
+ if (window->lowLimit <= correction) window->lowLimit = 1;
808
+ else window->lowLimit -= correction;
809
+ if (window->dictLimit <= correction) window->dictLimit = 1;
810
+ else window->dictLimit -= correction;
811
+
812
+ /* Ensure we can still reference the full window. */
813
+ assert(newCurrent >= maxDist);
814
+ assert(newCurrent - maxDist >= 1);
815
+ /* Ensure that lowLimit and dictLimit didn't underflow. */
816
+ assert(window->lowLimit <= newCurrent);
817
+ assert(window->dictLimit <= newCurrent);
667
818
 
668
819
  DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
669
820
  window->lowLimit);
@@ -675,31 +826,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
675
826
  * Updates lowLimit so that:
676
827
  * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
677
828
  *
678
- * This allows a simple check that index >= lowLimit to see if index is valid.
679
- * This must be called before a block compression call, with srcEnd as the block
680
- * source end.
829
+ * It ensures index is valid as long as index >= lowLimit.
830
+ * This must be called before a block compression call.
831
+ *
832
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
833
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
834
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
681
835
  *
682
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
683
- * This is because dictionaries are allowed to be referenced as long as the last
684
- * byte of the dictionary is in the window, but once they are out of range,
685
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
686
- * loadedDictEnd == 0.
836
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
837
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
838
+ * This is because dictionaries are allowed to be referenced fully
839
+ * as long as the last byte of the dictionary is in the window.
840
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
687
841
  *
688
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
689
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
690
- * is below them. forceWindow and dictMatchState are therefore incompatible.
842
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
843
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
844
+ * and the dictionary is below them.
845
+ * forceWindow and dictMatchState are therefore incompatible.
691
846
  */
692
847
  MEM_STATIC void
693
848
  ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
694
- void const* srcEnd,
695
- U32 maxDist,
696
- U32* loadedDictEndPtr,
849
+ const void* blockEnd,
850
+ U32 maxDist,
851
+ U32* loadedDictEndPtr,
697
852
  const ZSTD_matchState_t** dictMatchStatePtr)
698
853
  {
699
- U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
700
- U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
701
- DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
702
- (unsigned)blockEndIdx, (unsigned)maxDist);
854
+ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
855
+ U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
856
+ DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
857
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
858
+
859
+ /* - When there is no dictionary : loadedDictEnd == 0.
860
+ In which case, the test (blockEndIdx > maxDist) is merely to avoid
861
+ overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
862
+ - When there is a standard dictionary :
863
+ Index referential is copied from the dictionary,
864
+ which means it starts from 0.
865
+ In which case, loadedDictEnd == dictSize,
866
+ and it makes sense to compare `blockEndIdx > maxDist + dictSize`
867
+ since `blockEndIdx` also starts from zero.
868
+ - When there is an attached dictionary :
869
+ loadedDictEnd is expressed within the referential of the context,
870
+ so it can be directly compared against blockEndIdx.
871
+ */
703
872
  if (blockEndIdx > maxDist + loadedDictEnd) {
704
873
  U32 const newLowLimit = blockEndIdx - maxDist;
705
874
  if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -708,11 +877,54 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
708
877
  (unsigned)window->dictLimit, (unsigned)window->lowLimit);
709
878
  window->dictLimit = window->lowLimit;
710
879
  }
711
- if (loadedDictEndPtr)
880
+ /* On reaching window size, dictionaries are invalidated */
881
+ if (loadedDictEndPtr) *loadedDictEndPtr = 0;
882
+ if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
883
+ }
884
+ }
885
+
886
+ /* Similar to ZSTD_window_enforceMaxDist(),
887
+ * but only invalidates dictionary
888
+ * when input progresses beyond window size.
889
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
890
+ * loadedDictEnd uses same referential as window->base
891
+ * maxDist is the window size */
892
+ MEM_STATIC void
893
+ ZSTD_checkDictValidity(const ZSTD_window_t* window,
894
+ const void* blockEnd,
895
+ U32 maxDist,
896
+ U32* loadedDictEndPtr,
897
+ const ZSTD_matchState_t** dictMatchStatePtr)
898
+ {
899
+ assert(loadedDictEndPtr != NULL);
900
+ assert(dictMatchStatePtr != NULL);
901
+ { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
902
+ U32 const loadedDictEnd = *loadedDictEndPtr;
903
+ DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
904
+ (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
905
+ assert(blockEndIdx >= loadedDictEnd);
906
+
907
+ if (blockEndIdx > loadedDictEnd + maxDist) {
908
+ /* On reaching window size, dictionaries are invalidated.
909
+ * For simplification, if window size is reached anywhere within next block,
910
+ * the dictionary is invalidated for the full block.
911
+ */
912
+ DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
712
913
  *loadedDictEndPtr = 0;
713
- if (dictMatchStatePtr)
714
914
  *dictMatchStatePtr = NULL;
715
- }
915
+ } else {
916
+ if (*loadedDictEndPtr != 0) {
917
+ DEBUGLOG(6, "dictionary considered valid for current block");
918
+ } } }
919
+ }
920
+
921
+ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
922
+ memset(window, 0, sizeof(*window));
923
+ window->base = (BYTE const*)"";
924
+ window->dictBase = (BYTE const*)"";
925
+ window->dictLimit = 1; /* start from 1, so that 1st position is valid */
926
+ window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
927
+ window->nextSrc = window->base + 1; /* see issue #1241 */
716
928
  }
717
929
 
718
930
  /**
@@ -728,6 +940,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
728
940
  BYTE const* const ip = (BYTE const*)src;
729
941
  U32 contiguous = 1;
730
942
  DEBUGLOG(5, "ZSTD_window_update");
943
+ if (srcSize == 0)
944
+ return contiguous;
945
+ assert(window->base != NULL);
946
+ assert(window->dictBase != NULL);
731
947
  /* Check if blocks follow each other */
732
948
  if (src != window->nextSrc) {
733
949
  /* not contiguous */
@@ -738,7 +954,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
738
954
  window->dictLimit = (U32)distanceFromBase;
739
955
  window->dictBase = window->base;
740
956
  window->base = ip - distanceFromBase;
741
- // ms->nextToUpdate = window->dictLimit;
957
+ /* ms->nextToUpdate = window->dictLimit; */
742
958
  if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
743
959
  contiguous = 0;
744
960
  }
@@ -754,6 +970,33 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
754
970
  return contiguous;
755
971
  }
756
972
 
973
+ /**
974
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
975
+ */
976
+ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
977
+ {
978
+ U32 const maxDistance = 1U << windowLog;
979
+ U32 const lowestValid = ms->window.lowLimit;
980
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
981
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
982
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
983
+ return matchLowest;
984
+ }
985
+
986
+ /**
987
+ * Returns the lowest allowed match index in the prefix.
988
+ */
989
+ MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
990
+ {
991
+ U32 const maxDistance = 1U << windowLog;
992
+ U32 const lowestValid = ms->window.dictLimit;
993
+ U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
994
+ U32 const isDictionary = (ms->loadedDictEnd != 0);
995
+ U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
996
+ return matchLowest;
997
+ }
998
+
999
+
757
1000
 
758
1001
  /* debug functions */
759
1002
  #if (DEBUGLEVEL>=2)
@@ -791,6 +1034,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
791
1034
  }
792
1035
  #endif
793
1036
 
1037
+ /* ===============================================================
1038
+ * Shared internal declarations
1039
+ * These prototypes may be called from sources not in lib/compress
1040
+ * =============================================================== */
1041
+
1042
+ /* ZSTD_loadCEntropy() :
1043
+ * dict : must point at beginning of a valid zstd dictionary.
1044
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1045
+ * assumptions : magic number supposed already checked
1046
+ * and dictSize >= 8 */
1047
+ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1048
+ short* offcodeNCount, unsigned* offcodeMaxValue,
1049
+ const void* const dict, size_t dictSize);
1050
+
1051
+ void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
794
1052
 
795
1053
  /* ==============================================================
796
1054
  * Private declarations
@@ -800,6 +1058,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
800
1058
  /* ZSTD_getCParamsFromCCtxParams() :
801
1059
  * cParams are built depending on compressionLevel, src size hints,
802
1060
  * LDM and manually set compression parameters.
1061
+ * Note: srcSizeHint == 0 means 0!
803
1062
  */
804
1063
  ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
805
1064
  const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
@@ -812,7 +1071,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
812
1071
  size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
813
1072
  const void* dict, size_t dictSize,
814
1073
  const ZSTD_CDict* cdict,
815
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
1074
+ const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
816
1075
 
817
1076
  void ZSTD_resetSeqStore(seqStore_t* ssPtr);
818
1077
 
@@ -827,7 +1086,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
827
1086
  ZSTD_dictContentType_e dictContentType,
828
1087
  ZSTD_dictTableLoadMethod_e dtlm,
829
1088
  const ZSTD_CDict* cdict,
830
- ZSTD_CCtx_params params,
1089
+ const ZSTD_CCtx_params* params,
831
1090
  unsigned long long pledgedSrcSize);
832
1091
 
833
1092
  /* ZSTD_compress_advanced_internal() :
@@ -836,7 +1095,7 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
836
1095
  void* dst, size_t dstCapacity,
837
1096
  const void* src, size_t srcSize,
838
1097
  const void* dict,size_t dictSize,
839
- ZSTD_CCtx_params params);
1098
+ const ZSTD_CCtx_params* params);
840
1099
 
841
1100
 
842
1101
  /* ZSTD_writeLastEmptyBlock() :
@@ -859,5 +1118,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
859
1118
  */
860
1119
  size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
861
1120
 
1121
+ /** ZSTD_cycleLog() :
1122
+ * condition for correct operation : hashLog > 1 */
1123
+ U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
862
1124
 
863
1125
  #endif /* ZSTD_COMPRESS_H */