zstd-ruby 1.5.2.2 → 1.5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -3
  3. data/ext/zstdruby/common.h +7 -0
  4. data/ext/zstdruby/libzstd/common/bits.h +175 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +18 -59
  6. data/ext/zstdruby/libzstd/common/compiler.h +22 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  8. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.h +1 -1
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
  11. data/ext/zstdruby/libzstd/common/error_private.c +9 -2
  12. data/ext/zstdruby/libzstd/common/error_private.h +1 -1
  13. data/ext/zstdruby/libzstd/common/fse.h +5 -83
  14. data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
  15. data/ext/zstdruby/libzstd/common/huf.h +65 -156
  16. data/ext/zstdruby/libzstd/common/mem.h +39 -46
  17. data/ext/zstdruby/libzstd/common/pool.c +26 -10
  18. data/ext/zstdruby/libzstd/common/pool.h +7 -1
  19. data/ext/zstdruby/libzstd/common/portability_macros.h +22 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +68 -14
  21. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  22. data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
  23. data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +17 -113
  27. data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
  28. data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
  30. data/ext/zstdruby/libzstd/compress/hist.c +1 -1
  31. data/ext/zstdruby/libzstd/compress/hist.h +1 -1
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1055 -455
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +165 -145
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +5 -3
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +433 -148
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +306 -283
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +5 -5
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +104 -80
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +12 -5
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -1
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +30 -39
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +3 -4
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +164 -42
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +186 -65
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
  65. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -87
  67. data/ext/zstdruby/libzstd/zdict.h +53 -31
  68. data/ext/zstdruby/libzstd/zstd.h +489 -90
  69. data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
  70. data/ext/zstdruby/main.c +4 -0
  71. data/ext/zstdruby/streaming_compress.c +1 -7
  72. data/ext/zstdruby/zstdruby.c +110 -26
  73. data/lib/zstd-ruby/version.rb +1 -1
  74. data/lib/zstd-ruby.rb +0 -1
  75. metadata +7 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -25,6 +25,8 @@ extern "C" {
25
25
  */
26
26
  #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
27
27
 
28
+ #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
29
+
28
30
  U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
29
31
  void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
30
32
 
@@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
116
118
  size_t ZSTD_compressBlock_btlazy2_extDict(
117
119
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
118
120
  void const* src, size_t srcSize);
119
-
121
+
120
122
 
121
123
  #if defined (__cplusplus)
122
124
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -242,11 +242,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
242
242
  switch(ms->cParams.strategy)
243
243
  {
244
244
  case ZSTD_fast:
245
- ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
245
+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
246
246
  break;
247
247
 
248
248
  case ZSTD_dfast:
249
- ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
249
+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
250
250
  break;
251
251
 
252
252
  case ZSTD_greedy:
@@ -549,7 +549,7 @@ size_t ZSTD_ldm_generateSequences(
549
549
  * the window through early invalidation.
550
550
  * TODO: * Test the chunk size.
551
551
  * * Try invalidation after the sequence generation and test the
552
- * the offset against maxDist directly.
552
+ * offset against maxDist directly.
553
553
  *
554
554
  * NOTE: Because of dictionaries + sequence splitting we MUST make sure
555
555
  * that any offset used is valid at the END of the sequence, since it may
@@ -711,7 +711,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
711
711
  rep[0] = sequence.offset;
712
712
  /* Store the sequence */
713
713
  ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
714
- STORE_OFFSET(sequence.offset),
714
+ OFFSET_TO_OFFBASE(sequence.offset),
715
715
  sequence.matchLength);
716
716
  ip += sequence.matchLength;
717
717
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,7 +16,7 @@
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
17
  #define ZSTD_MAX_PRICE (1<<30)
18
18
 
19
- #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
19
+ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
20
 
21
21
 
22
22
  /*-*************************************
@@ -26,27 +26,35 @@
26
26
  #if 0 /* approximation at bit level (for tests) */
27
27
  # define BITCOST_ACCURACY 0
28
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29
- # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
29
+ # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
30
30
  #elif 0 /* fractional bit accuracy (for tests) */
31
31
  # define BITCOST_ACCURACY 8
32
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33
- # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33
+ # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
34
34
  #else /* opt==approx, ultra==accurate */
35
35
  # define BITCOST_ACCURACY 8
36
36
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37
- # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37
+ # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38
38
  #endif
39
39
 
40
+ /* ZSTD_bitWeight() :
41
+ * provide estimated "cost" of a stat in full bits only */
40
42
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41
43
  {
42
44
  return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43
45
  }
44
46
 
47
+ /* ZSTD_fracWeight() :
48
+ * provide fractional-bit "cost" of a stat,
49
+ * using linear interpolation approximation */
45
50
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46
51
  {
47
52
  U32 const stat = rawStat + 1;
48
53
  U32 const hb = ZSTD_highbit32(stat);
49
54
  U32 const BWeight = hb * BITCOST_MULTIPLIER;
55
+ /* Fweight was meant for "Fractional weight"
56
+ * but it's effectively a value between 1 and 2
57
+ * using fixed point arithmetic */
50
58
  U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51
59
  U32 const weight = BWeight + FWeight;
52
60
  assert(hb + BITCOST_ACCURACY < 31);
@@ -57,7 +65,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
57
65
  /* debugging function,
58
66
  * @return price in bytes as fractional value
59
67
  * for debug messages only */
60
- MEM_STATIC double ZSTD_fCost(U32 price)
68
+ MEM_STATIC double ZSTD_fCost(int price)
61
69
  {
62
70
  return (double)price / (BITCOST_MULTIPLIER*8);
63
71
  }
@@ -88,20 +96,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
88
96
  return total;
89
97
  }
90
98
 
91
- static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
99
+ typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
100
+
101
+ static U32
102
+ ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
92
103
  {
93
104
  U32 s, sum=0;
94
- DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
105
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
106
+ (unsigned)lastEltIndex+1, (unsigned)shift );
95
107
  assert(shift < 30);
96
108
  for (s=0; s<lastEltIndex+1; s++) {
97
- table[s] = 1 + (table[s] >> shift);
98
- sum += table[s];
109
+ unsigned const base = base1 ? 1 : (table[s]>0);
110
+ unsigned const newStat = base + (table[s] >> shift);
111
+ sum += newStat;
112
+ table[s] = newStat;
99
113
  }
100
114
  return sum;
101
115
  }
102
116
 
103
117
  /* ZSTD_scaleStats() :
104
- * reduce all elements in table is sum too large
118
+ * reduce all elt frequencies in table if sum too large
105
119
  * return the resulting sum of elements */
106
120
  static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
121
  {
@@ -110,7 +124,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
110
124
  DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
125
  assert(logTarget < 30);
112
126
  if (factor <= 1) return prevsum;
113
- return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
127
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
114
128
  }
115
129
 
116
130
  /* ZSTD_rescaleFreqs() :
@@ -129,18 +143,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
129
143
  DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130
144
  optPtr->priceType = zop_dynamic;
131
145
 
132
- if (optPtr->litLengthSum == 0) { /* first block : init */
133
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
134
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
146
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
147
+
148
+ /* heuristic: use pre-defined stats for too small inputs */
149
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
150
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
135
151
  optPtr->priceType = zop_predef;
136
152
  }
137
153
 
138
154
  assert(optPtr->symbolCosts != NULL);
139
155
  if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140
- /* huffman table presumed generated by dictionary */
156
+
157
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
141
158
  optPtr->priceType = zop_dynamic;
142
159
 
143
160
  if (compressedLiterals) {
161
+ /* generate literals statistics from huffman table */
144
162
  unsigned lit;
145
163
  assert(optPtr->litFreq != NULL);
146
164
  optPtr->litSum = 0;
@@ -188,13 +206,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
188
206
  optPtr->offCodeSum += optPtr->offCodeFreq[of];
189
207
  } }
190
208
 
191
- } else { /* not a dictionary */
209
+ } else { /* first block, no dictionary */
192
210
 
193
211
  assert(optPtr->litFreq != NULL);
194
212
  if (compressedLiterals) {
213
+ /* base initial cost of literals on direct frequency within src */
195
214
  unsigned lit = MaxLit;
196
215
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
197
- optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
216
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
198
217
  }
199
218
 
200
219
  { unsigned const baseLLfreqs[MaxLL+1] = {
@@ -224,10 +243,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
224
243
  optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
225
244
  }
226
245
 
227
-
228
246
  }
229
247
 
230
- } else { /* new block : re-use previous statistics, scaled down */
248
+ } else { /* new block : scale down accumulated statistics */
231
249
 
232
250
  if (compressedLiterals)
233
251
  optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
@@ -255,11 +273,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
255
273
  return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
256
274
 
257
275
  /* dynamic statistics */
258
- { U32 price = litLength * optPtr->litSumBasePrice;
276
+ { U32 price = optPtr->litSumBasePrice * litLength;
277
+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
259
278
  U32 u;
279
+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
260
280
  for (u=0; u < litLength; u++) {
261
- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
262
- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
281
+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
282
+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
283
+ price -= litPrice;
263
284
  }
264
285
  return price;
265
286
  }
@@ -272,10 +293,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
272
293
  assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273
294
  if (optPtr->priceType == zop_predef)
274
295
  return WEIGHT(litLength, optLevel);
275
- /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276
- * because it isn't representable in the zstd format. So instead just
277
- * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278
- * would be all literals.
296
+
297
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
298
+ * because it isn't representable in the zstd format.
299
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
300
+ * In such a case, the block would be all literals.
279
301
  */
280
302
  if (litLength == ZSTD_BLOCKSIZE_MAX)
281
303
  return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
@@ -289,24 +311,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
289
311
  }
290
312
 
291
313
  /* ZSTD_getMatchPrice() :
292
- * Provides the cost of the match part (offset + matchLength) of a sequence
314
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
293
315
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294
- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
316
+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
295
317
  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296
318
  */
297
319
  FORCE_INLINE_TEMPLATE U32
298
- ZSTD_getMatchPrice(U32 const offcode,
320
+ ZSTD_getMatchPrice(U32 const offBase,
299
321
  U32 const matchLength,
300
322
  const optState_t* const optPtr,
301
323
  int const optLevel)
302
324
  {
303
325
  U32 price;
304
- U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
326
+ U32 const offCode = ZSTD_highbit32(offBase);
305
327
  U32 const mlBase = matchLength - MINMATCH;
306
328
  assert(matchLength >= MINMATCH);
307
329
 
308
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
309
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
330
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
331
+ return WEIGHT(mlBase, optLevel)
332
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
310
333
 
311
334
  /* dynamic statistics */
312
335
  price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -325,10 +348,10 @@ ZSTD_getMatchPrice(U32 const offcode,
325
348
  }
326
349
 
327
350
  /* ZSTD_updateStats() :
328
- * assumption : literals + litLengtn <= iend */
351
+ * assumption : literals + litLength <= iend */
329
352
  static void ZSTD_updateStats(optState_t* const optPtr,
330
353
  U32 litLength, const BYTE* literals,
331
- U32 offsetCode, U32 matchLength)
354
+ U32 offBase, U32 matchLength)
332
355
  {
333
356
  /* literals */
334
357
  if (ZSTD_compressedLiterals(optPtr)) {
@@ -344,8 +367,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
344
367
  optPtr->litLengthSum++;
345
368
  }
346
369
 
347
- /* offset code : expected to follow storeSeq() numeric representation */
348
- { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
370
+ /* offset code : follows storeSeq() numeric representation */
371
+ { U32 const offCode = ZSTD_highbit32(offBase);
349
372
  assert(offCode <= MaxOff);
350
373
  optPtr->offCodeFreq[offCode]++;
351
374
  optPtr->offCodeSum++;
@@ -552,16 +575,17 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
552
575
  ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
553
576
  }
554
577
 
555
- FORCE_INLINE_TEMPLATE
556
- U32 ZSTD_insertBtAndGetAllMatches (
557
- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
558
- ZSTD_matchState_t* ms,
559
- U32* nextToUpdate3,
560
- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561
- const U32 rep[ZSTD_REP_NUM],
562
- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563
- const U32 lengthToBeat,
564
- U32 const mls /* template */)
578
+ FORCE_INLINE_TEMPLATE U32
579
+ ZSTD_insertBtAndGetAllMatches (
580
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
581
+ ZSTD_matchState_t* ms,
582
+ U32* nextToUpdate3,
583
+ const BYTE* const ip, const BYTE* const iLimit,
584
+ const ZSTD_dictMode_e dictMode,
585
+ const U32 rep[ZSTD_REP_NUM],
586
+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
587
+ const U32 lengthToBeat,
588
+ const U32 mls /* template */)
565
589
  {
566
590
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
567
591
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@@ -644,7 +668,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
644
668
  DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645
669
  repCode, ll0, repOffset, repLen);
646
670
  bestLength = repLen;
647
- matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
671
+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
648
672
  matches[mnum].len = (U32)repLen;
649
673
  mnum++;
650
674
  if ( (repLen > sufficient_len)
@@ -673,7 +697,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
673
697
  bestLength = mlen;
674
698
  assert(curr > matchIndex3);
675
699
  assert(mnum==0); /* no prior solution */
676
- matches[0].off = STORE_OFFSET(curr - matchIndex3);
700
+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
677
701
  matches[0].len = (U32)mlen;
678
702
  mnum = 1;
679
703
  if ( (mlen > sufficient_len) |
@@ -706,13 +730,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
706
730
  }
707
731
 
708
732
  if (matchLength > bestLength) {
709
- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710
- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
733
+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
734
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
711
735
  assert(matchEndIdx > matchIndex);
712
736
  if (matchLength > matchEndIdx - matchIndex)
713
737
  matchEndIdx = matchIndex + (U32)matchLength;
714
738
  bestLength = matchLength;
715
- matches[mnum].off = STORE_OFFSET(curr - matchIndex);
739
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
716
740
  matches[mnum].len = (U32)matchLength;
717
741
  mnum++;
718
742
  if ( (matchLength > ZSTD_OPT_NUM)
@@ -754,12 +778,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
754
778
 
755
779
  if (matchLength > bestLength) {
756
780
  matchIndex = dictMatchIndex + dmsIndexDelta;
757
- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758
- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
781
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
782
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
759
783
  if (matchLength > matchEndIdx - matchIndex)
760
784
  matchEndIdx = matchIndex + (U32)matchLength;
761
785
  bestLength = matchLength;
762
- matches[mnum].off = STORE_OFFSET(curr - matchIndex);
786
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
763
787
  matches[mnum].len = (U32)matchLength;
764
788
  mnum++;
765
789
  if ( (matchLength > ZSTD_OPT_NUM)
@@ -960,7 +984,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960
984
  const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
961
985
  {
962
986
  U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963
- /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
987
+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
964
988
  U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
965
989
 
966
990
  /* Ensure that current block position is not outside of the match */
@@ -971,11 +995,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
971
995
  }
972
996
 
973
997
  if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974
- U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975
- DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976
- candidateOffCode, candidateMatchLength, currPosInBlock);
998
+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
999
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1000
+ candidateOffBase, candidateMatchLength, currPosInBlock);
977
1001
  matches[*nbMatches].len = candidateMatchLength;
978
- matches[*nbMatches].off = candidateOffCode;
1002
+ matches[*nbMatches].off = candidateOffBase;
979
1003
  (*nbMatches)++;
980
1004
  }
981
1005
  }
@@ -1098,14 +1122,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1098
1122
 
1099
1123
  /* large match -> immediate encoding */
1100
1124
  { U32 const maxML = matches[nbMatches-1].len;
1101
- U32 const maxOffcode = matches[nbMatches-1].off;
1102
- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103
- nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1125
+ U32 const maxOffBase = matches[nbMatches-1].off;
1126
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1127
+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1104
1128
 
1105
1129
  if (maxML > sufficient_len) {
1106
1130
  lastSequence.litlen = litlen;
1107
1131
  lastSequence.mlen = maxML;
1108
- lastSequence.off = maxOffcode;
1132
+ lastSequence.off = maxOffBase;
1109
1133
  DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110
1134
  maxML, sufficient_len);
1111
1135
  cur = 0;
@@ -1122,15 +1146,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1122
1146
  opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1123
1147
  }
1124
1148
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125
- U32 const offcode = matches[matchNb].off;
1149
+ U32 const offBase = matches[matchNb].off;
1126
1150
  U32 const end = matches[matchNb].len;
1127
1151
  for ( ; pos <= end ; pos++ ) {
1128
- U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1152
+ U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1129
1153
  U32 const sequencePrice = literalsPrice + matchPrice;
1130
1154
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131
- pos, ZSTD_fCost(sequencePrice));
1155
+ pos, ZSTD_fCost((int)sequencePrice));
1132
1156
  opt[pos].mlen = pos;
1133
- opt[pos].off = offcode;
1157
+ opt[pos].off = offBase;
1134
1158
  opt[pos].litlen = litlen;
1135
1159
  opt[pos].price = (int)sequencePrice;
1136
1160
  } }
@@ -1230,7 +1254,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1230
1254
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231
1255
  U32 mlen;
1232
1256
 
1233
- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1257
+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1234
1258
  matchNb, matches[matchNb].off, lastML, litlen);
1235
1259
 
1236
1260
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
@@ -1296,7 +1320,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1296
1320
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297
1321
  U32 const llen = opt[storePos].litlen;
1298
1322
  U32 const mlen = opt[storePos].mlen;
1299
- U32 const offCode = opt[storePos].off;
1323
+ U32 const offBase = opt[storePos].off;
1300
1324
  U32 const advance = llen + mlen;
1301
1325
  DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302
1326
  anchor - istart, (unsigned)llen, (unsigned)mlen);
@@ -1308,8 +1332,8 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1308
1332
  }
1309
1333
 
1310
1334
  assert(anchor + llen <= iend);
1311
- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312
- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1335
+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1336
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1313
1337
  anchor += advance;
1314
1338
  ip = anchor;
1315
1339
  } }
@@ -1349,7 +1373,7 @@ size_t ZSTD_compressBlock_btopt(
1349
1373
  /* ZSTD_initStats_ultra():
1350
1374
  * make a first compression pass, just to seed stats with more accurate starting values.
1351
1375
  * only works on first block, with no dictionary and no ldm.
1352
- * this function cannot error, hence its contract must be respected.
1376
+ * this function cannot error out, its narrow contract must be respected.
1353
1377
  */
1354
1378
  static void
1355
1379
  ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@@ -1368,7 +1392,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1368
1392
 
1369
1393
  ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1370
1394
 
1371
- /* invalidate first scan from history */
1395
+ /* invalidate first scan from history, only keep entropy stats */
1372
1396
  ZSTD_resetSeqStore(seqStore);
1373
1397
  ms->window.base -= srcSize;
1374
1398
  ms->window.dictLimit += (U32)srcSize;
@@ -1392,20 +1416,20 @@ size_t ZSTD_compressBlock_btultra2(
1392
1416
  U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393
1417
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394
1418
 
1395
- /* 2-pass strategy:
1419
+ /* 2-passes strategy:
1396
1420
  * this strategy makes a first pass over first block to collect statistics
1397
- * and seed next round's statistics with it.
1398
- * After 1st pass, function forgets everything, and starts a new block.
1421
+ * in order to seed next round's statistics with it.
1422
+ * After 1st pass, function forgets history, and starts a new block.
1399
1423
  * Consequently, this can only work if no data has been previously loaded in tables,
1400
1424
  * aka, no dictionary, no prefix, no ldm preprocessing.
1401
1425
  * The compression ratio gain is generally small (~0.5% on first block),
1402
- * the cost is 2x cpu time on first block. */
1426
+ ** the cost is 2x cpu time on first block. */
1403
1427
  assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404
1428
  if ( (ms->opt.litLengthSum==0) /* first block */
1405
1429
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1406
1430
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1407
- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1408
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
1431
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1432
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1409
1433
  ) {
1410
1434
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411
1435
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -266,11 +266,11 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
266
266
  * 1 buffer for input loading
267
267
  * 1 buffer for "next input" when submitting current one
268
268
  * 1 buffer stuck in queue */
269
- #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3
269
+ #define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
270
270
 
271
271
  /* After a worker releases its rawSeqStore, it is immediately ready for reuse.
272
272
  * So we only need one seq buffer per worker. */
273
- #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers
273
+ #define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
274
274
 
275
275
  /* ===== Seq Pool Wrapper ====== */
276
276
 
@@ -1734,7 +1734,7 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1734
1734
  }
1735
1735
  } else {
1736
1736
  /* We have enough bytes buffered to initialize the hash,
1737
- * and are have processed enough bytes to find a sync point.
1737
+ * and have processed enough bytes to find a sync point.
1738
1738
  * Start scanning at the beginning of the input.
1739
1739
  */
1740
1740
  assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
@@ -1761,17 +1761,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
1761
1761
  * then a block will be emitted anyways, but this is okay, since if we
1762
1762
  * are already synchronized we will remain synchronized.
1763
1763
  */
1764
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1764
1765
  for (; pos < syncPoint.toLoad; ++pos) {
1765
1766
  BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
1766
- assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1767
+ /* This assert is very expensive, and Debian compiles with asserts enabled.
1768
+ * So disable it for now. We can get similar coverage by checking it at the
1769
+ * beginning & end of the loop.
1770
+ * assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1771
+ */
1767
1772
  hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
1768
1773
  assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
1769
1774
  if ((hash & hitMask) == hitMask) {
1770
1775
  syncPoint.toLoad = pos + 1;
1771
1776
  syncPoint.flush = 1;
1777
+ ++pos; /* for assert */
1772
1778
  break;
1773
1779
  }
1774
1780
  }
1781
+ assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
1775
1782
  return syncPoint;
1776
1783
  }
1777
1784
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the