zstd-ruby 1.4.4.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +1 -0
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +241 -173
  8. data/ext/zstdruby/libzstd/README.md +76 -18
  9. data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
  10. data/ext/zstdruby/libzstd/common/compiler.h +196 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
  15. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +51 -42
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
  19. data/ext/zstdruby/libzstd/common/huf.h +60 -54
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +10 -8
  25. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
  67. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  70. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
  72. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  73. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  74. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
  75. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
  76. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  77. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  78. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
  80. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
  84. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
  86. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
  88. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
  90. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
  92. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  93. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  94. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  95. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  96. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  97. data/ext/zstdruby/libzstd/zstd.h +760 -234
  98. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  99. data/ext/zstdruby/zstdruby.c +2 -2
  100. data/lib/zstd-ruby/version.rb +1 -1
  101. metadata +20 -9
  102. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
17
  #define ZSTD_MAX_PRICE (1<<30)
19
18
 
20
19
  #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
@@ -24,11 +23,11 @@
24
23
  * Price functions for optimal parser
25
24
  ***************************************/
26
25
 
27
- #if 0 /* approximation at bit level */
26
+ #if 0 /* approximation at bit level (for tests) */
28
27
  # define BITCOST_ACCURACY 0
29
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
29
+ # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30
+ #elif 0 /* fractional bit accuracy (for tests) */
32
31
  # define BITCOST_ACCURACY 8
33
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
33
  # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
65
 
67
66
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
67
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
68
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
69
  }
71
70
 
72
71
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
78
  }
80
79
 
81
80
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
81
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
82
+ {
83
+ size_t n;
84
+ U32 total = 0;
85
+ for (n=0; n<nbElts; n++) {
86
+ total += table[n];
87
+ }
88
+ return total;
89
+ }
90
+
91
+ static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
86
92
  {
87
93
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
94
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95
+ assert(shift < 30);
90
96
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
97
+ table[s] = 1 + (table[s] >> shift);
92
98
  sum += table[s];
93
99
  }
94
100
  return sum;
95
101
  }
96
102
 
103
+ /* ZSTD_scaleStats() :
104
+ * reduce all elements in table is sum too large
105
+ * return the resulting sum of elements */
106
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
+ {
108
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
109
+ U32 const factor = prevsum >> logTarget;
110
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
+ assert(logTarget < 30);
112
+ if (factor <= 1) return prevsum;
113
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114
+ }
115
+
97
116
  /* ZSTD_rescaleFreqs() :
98
117
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
118
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
119
+ * and init from zero if there is none,
120
+ * using src for literals stats, and baseline stats for sequence symbols
101
121
  * otherwise downscale existing stats, to be used as seed for next block.
102
122
  */
103
123
  static void
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
126
146
  optPtr->litSum = 0;
127
147
  for (lit=0; lit<=MaxLit; lit++) {
128
148
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
149
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
150
  assert(bitCost <= scaleLog);
131
151
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
152
  optPtr->litSum += optPtr->litFreq[lit];
@@ -174,14 +194,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
174
194
  if (compressedLiterals) {
175
195
  unsigned lit = MaxLit;
176
196
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
197
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
178
198
  }
179
199
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
200
+ { unsigned const baseLLfreqs[MaxLL+1] = {
201
+ 4, 2, 1, 1, 1, 1, 1, 1,
202
+ 1, 1, 1, 1, 1, 1, 1, 1,
203
+ 1, 1, 1, 1, 1, 1, 1, 1,
204
+ 1, 1, 1, 1, 1, 1, 1, 1,
205
+ 1, 1, 1, 1
206
+ };
207
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
208
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
209
 
186
210
  { unsigned ml;
187
211
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +213,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
213
  }
190
214
  optPtr->matchLengthSum = MaxML+1;
191
215
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
216
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
217
+ 6, 2, 1, 1, 2, 3, 4, 4,
218
+ 4, 3, 2, 1, 1, 1, 1, 1,
219
+ 1, 1, 1, 1, 1, 1, 1, 1,
220
+ 1, 1, 1, 1, 1, 1, 1, 1
221
+ };
222
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
223
  }
196
- optPtr->offCodeSum = MaxOff+1;
224
+
197
225
 
198
226
  }
199
227
 
200
228
  } else { /* new block : re-use previous statistics, scaled down */
201
229
 
202
230
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
231
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
232
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
233
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
234
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
235
  }
208
236
 
209
237
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -249,40 +277,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
249
277
  }
250
278
  }
251
279
 
252
- /* ZSTD_litLengthContribution() :
253
- * @return ( cost(litlength) - cost(0) )
254
- * this value can then be added to rawLiteralsCost()
255
- * to provide a cost which is directly comparable to a match ending at same position */
256
- static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
257
- {
258
- if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
259
-
260
- /* dynamic statistics */
261
- { U32 const llCode = ZSTD_LLcode(litLength);
262
- int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
263
- + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
264
- - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
265
- #if 1
266
- return contribution;
267
- #else
268
- return MAX(0, contribution); /* sometimes better, sometimes not ... */
269
- #endif
270
- }
271
- }
272
-
273
- /* ZSTD_literalsContribution() :
274
- * creates a fake cost for the literals part of a sequence
275
- * which can be compared to the ending cost of a match
276
- * should a new match start at this position */
277
- static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
278
- const optState_t* const optPtr,
279
- int optLevel)
280
- {
281
- int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
282
- + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
283
- return contribution;
284
- }
285
-
286
280
  /* ZSTD_getMatchPrice() :
287
281
  * Provides the cost of the match part (offset + matchLength) of a sequence
288
282
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
@@ -372,7 +366,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
372
366
 
373
367
  /* Update hashTable3 up to ip (excluded)
374
368
  Assumption : always within prefix (i.e. not within extDict) */
375
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
369
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
376
370
  U32* nextToUpdate3,
377
371
  const BYTE* const ip)
378
372
  {
@@ -398,11 +392,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
398
392
  * Binary Tree search
399
393
  ***************************************/
400
394
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
401
- * ip : assumed <= iend-8 .
395
+ * @param ip assumed <= iend-8 .
396
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
402
397
  * @return : nb of positions added */
403
398
  static U32 ZSTD_insertBt1(
404
- ZSTD_matchState_t* ms,
399
+ const ZSTD_matchState_t* ms,
405
400
  const BYTE* const ip, const BYTE* const iend,
401
+ U32 const target,
406
402
  U32 const mls, const int extDict)
407
403
  {
408
404
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -420,32 +416,36 @@ static U32 ZSTD_insertBt1(
420
416
  const BYTE* const dictEnd = dictBase + dictLimit;
421
417
  const BYTE* const prefixStart = base + dictLimit;
422
418
  const BYTE* match;
423
- const U32 current = (U32)(ip-base);
424
- const U32 btLow = btMask >= current ? 0 : current - btMask;
425
- U32* smallerPtr = bt + 2*(current&btMask);
419
+ const U32 curr = (U32)(ip-base);
420
+ const U32 btLow = btMask >= curr ? 0 : curr - btMask;
421
+ U32* smallerPtr = bt + 2*(curr&btMask);
426
422
  U32* largerPtr = smallerPtr + 1;
427
423
  U32 dummy32; /* to be nullified at the end */
428
- U32 const windowLow = ms->window.lowLimit;
429
- U32 matchEndIdx = current+8+1;
424
+ /* windowLow is based on target because
425
+ * we only need positions that will be in the window at the end of the tree update.
426
+ */
427
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
428
+ U32 matchEndIdx = curr+8+1;
430
429
  size_t bestLength = 8;
431
430
  U32 nbCompares = 1U << cParams->searchLog;
432
431
  #ifdef ZSTD_C_PREDICT
433
- U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
434
- U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
432
+ U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
433
+ U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
435
434
  predictedSmall += (predictedSmall>0);
436
435
  predictedLarge += (predictedLarge>0);
437
436
  #endif /* ZSTD_C_PREDICT */
438
437
 
439
- DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
438
+ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
440
439
 
440
+ assert(curr <= target);
441
441
  assert(ip <= iend-8); /* required for h calculation */
442
- hashTable[h] = current; /* Update Hash Table */
442
+ hashTable[h] = curr; /* Update Hash Table */
443
443
 
444
444
  assert(windowLow > 0);
445
- while (nbCompares-- && (matchIndex >= windowLow)) {
445
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
446
446
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
447
447
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
448
- assert(matchIndex < current);
448
+ assert(matchIndex < curr);
449
449
 
450
450
  #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
451
451
  const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
@@ -508,8 +508,8 @@ static U32 ZSTD_insertBt1(
508
508
  *smallerPtr = *largerPtr = 0;
509
509
  { U32 positions = 0;
510
510
  if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
511
- assert(matchEndIdx > current + 8);
512
- return MAX(positions, matchEndIdx - (current + 8));
511
+ assert(matchEndIdx > curr + 8);
512
+ return MAX(positions, matchEndIdx - (curr + 8));
513
513
  }
514
514
  }
515
515
 
@@ -526,7 +526,7 @@ void ZSTD_updateTree_internal(
526
526
  idx, target, dictMode);
527
527
 
528
528
  while(idx < target) {
529
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
529
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
530
530
  assert(idx < (U32)(idx + forward));
531
531
  idx += forward;
532
532
  }
@@ -553,7 +553,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
553
553
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
554
554
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555
555
  const BYTE* const base = ms->window.base;
556
- U32 const current = (U32)(ip-base);
556
+ U32 const curr = (U32)(ip-base);
557
557
  U32 const hashLog = cParams->hashLog;
558
558
  U32 const minMatch = (mls==3) ? 3 : 4;
559
559
  U32* const hashTable = ms->hashTable;
@@ -567,12 +567,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
567
567
  U32 const dictLimit = ms->window.dictLimit;
568
568
  const BYTE* const dictEnd = dictBase + dictLimit;
569
569
  const BYTE* const prefixStart = base + dictLimit;
570
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
571
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
570
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
571
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
572
572
  U32 const matchLow = windowLow ? windowLow : 1;
573
- U32* smallerPtr = bt + 2*(current&btMask);
574
- U32* largerPtr = bt + 2*(current&btMask) + 1;
575
- U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
573
+ U32* smallerPtr = bt + 2*(curr&btMask);
574
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
575
+ U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
576
576
  U32 dummy32; /* to be nullified at the end */
577
577
  U32 mnum = 0;
578
578
  U32 nbCompares = 1U << cParams->searchLog;
@@ -591,7 +591,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
591
591
  U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
592
592
 
593
593
  size_t bestLength = lengthToBeat-1;
594
- DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
594
+ DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
595
595
 
596
596
  /* check repCode */
597
597
  assert(ll0 <= 1); /* necessarily 1 or 0 */
@@ -599,26 +599,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
599
599
  U32 repCode;
600
600
  for (repCode = ll0; repCode < lastR; repCode++) {
601
601
  U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
602
- U32 const repIndex = current - repOffset;
602
+ U32 const repIndex = curr - repOffset;
603
603
  U32 repLen = 0;
604
- assert(current >= dictLimit);
605
- if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
606
- if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
604
+ assert(curr >= dictLimit);
605
+ if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
606
+ /* We must validate the repcode offset because when we're using a dictionary the
607
+ * valid offset range shrinks when the dictionary goes out of bounds.
608
+ */
609
+ if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
607
610
  repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
608
611
  }
609
- } else { /* repIndex < dictLimit || repIndex >= current */
612
+ } else { /* repIndex < dictLimit || repIndex >= curr */
610
613
  const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
611
614
  dmsBase + repIndex - dmsIndexDelta :
612
615
  dictBase + repIndex;
613
- assert(current >= windowLow);
616
+ assert(curr >= windowLow);
614
617
  if ( dictMode == ZSTD_extDict
615
- && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
618
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
616
619
  & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
617
620
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
618
621
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
619
622
  }
620
623
  if (dictMode == ZSTD_dictMatchState
621
- && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
624
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
622
625
  & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
623
626
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
624
627
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
@@ -640,7 +643,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
640
643
  if ((mls == 3) /*static*/ && (bestLength < mls)) {
641
644
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
642
645
  if ((matchIndex3 >= matchLow)
643
- & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
646
+ & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
644
647
  size_t mlen;
645
648
  if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
646
649
  const BYTE* const match = base + matchIndex3;
@@ -655,26 +658,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
655
658
  DEBUGLOG(8, "found small match with hlog3, of length %u",
656
659
  (U32)mlen);
657
660
  bestLength = mlen;
658
- assert(current > matchIndex3);
661
+ assert(curr > matchIndex3);
659
662
  assert(mnum==0); /* no prior solution */
660
- matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
663
+ matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
661
664
  matches[0].len = (U32)mlen;
662
665
  mnum = 1;
663
666
  if ( (mlen > sufficient_len) |
664
667
  (ip+mlen == iLimit) ) { /* best possible length */
665
- ms->nextToUpdate = current+1; /* skip insertion */
668
+ ms->nextToUpdate = curr+1; /* skip insertion */
666
669
  return 1;
667
670
  } } }
668
671
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
669
- }
672
+ } /* if (mls == 3) */
670
673
 
671
- hashTable[h] = current; /* Update Hash Table */
674
+ hashTable[h] = curr; /* Update Hash Table */
672
675
 
673
- while (nbCompares-- && (matchIndex >= matchLow)) {
676
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
674
677
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
675
678
  const BYTE* match;
676
679
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
677
- assert(current > matchIndex);
680
+ assert(curr > matchIndex);
678
681
 
679
682
  if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
680
683
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
@@ -691,20 +694,19 @@ U32 ZSTD_insertBtAndGetAllMatches (
691
694
 
692
695
  if (matchLength > bestLength) {
693
696
  DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
694
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
697
+ (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
695
698
  assert(matchEndIdx > matchIndex);
696
699
  if (matchLength > matchEndIdx - matchIndex)
697
700
  matchEndIdx = matchIndex + (U32)matchLength;
698
701
  bestLength = matchLength;
699
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
702
+ matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
700
703
  matches[mnum].len = (U32)matchLength;
701
704
  mnum++;
702
705
  if ( (matchLength > ZSTD_OPT_NUM)
703
706
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
704
707
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
705
708
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
706
- }
707
- }
709
+ } }
708
710
 
709
711
  if (match[matchLength] < ip[matchLength]) {
710
712
  /* match smaller than current */
@@ -723,12 +725,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
723
725
 
724
726
  *smallerPtr = *largerPtr = 0;
725
727
 
728
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
726
729
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
727
730
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
728
731
  U32 dictMatchIndex = dms->hashTable[dmsH];
729
732
  const U32* const dmsBt = dms->chainTable;
730
733
  commonLengthSmaller = commonLengthLarger = 0;
731
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
734
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
732
735
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
733
736
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
734
737
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -739,18 +742,17 @@ U32 ZSTD_insertBtAndGetAllMatches (
739
742
  if (matchLength > bestLength) {
740
743
  matchIndex = dictMatchIndex + dmsIndexDelta;
741
744
  DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
742
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
745
+ (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
743
746
  if (matchLength > matchEndIdx - matchIndex)
744
747
  matchEndIdx = matchIndex + (U32)matchLength;
745
748
  bestLength = matchLength;
746
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
749
+ matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
747
750
  matches[mnum].len = (U32)matchLength;
748
751
  mnum++;
749
752
  if ( (matchLength > ZSTD_OPT_NUM)
750
753
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
751
754
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
752
- }
753
- }
755
+ } }
754
756
 
755
757
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
756
758
  if (match[matchLength] < ip[matchLength]) {
@@ -760,71 +762,232 @@ U32 ZSTD_insertBtAndGetAllMatches (
760
762
  /* match is larger than current */
761
763
  commonLengthLarger = matchLength;
762
764
  dictMatchIndex = nextPtr[0];
763
- }
764
- }
765
- }
765
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
766
766
 
767
- assert(matchEndIdx > current+8);
767
+ assert(matchEndIdx > curr+8);
768
768
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
769
769
  return mnum;
770
770
  }
771
771
 
772
+ typedef U32 (*ZSTD_getAllMatchesFn)(
773
+ ZSTD_match_t*,
774
+ ZSTD_matchState_t*,
775
+ U32*,
776
+ const BYTE*,
777
+ const BYTE*,
778
+ const U32 rep[ZSTD_REP_NUM],
779
+ U32 const ll0,
780
+ U32 const lengthToBeat);
781
+
782
+ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
783
+ ZSTD_match_t* matches,
784
+ ZSTD_matchState_t* ms,
785
+ U32* nextToUpdate3,
786
+ const BYTE* ip,
787
+ const BYTE* const iHighLimit,
788
+ const U32 rep[ZSTD_REP_NUM],
789
+ U32 const ll0,
790
+ U32 const lengthToBeat,
791
+ const ZSTD_dictMode_e dictMode,
792
+ const U32 mls)
793
+ {
794
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
795
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
796
+ if (ip < ms->window.base + ms->nextToUpdate)
797
+ return 0; /* skipped area */
798
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
799
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
800
+ }
801
+
802
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
803
+
804
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
805
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
806
+ ZSTD_match_t* matches, \
807
+ ZSTD_matchState_t* ms, \
808
+ U32* nextToUpdate3, \
809
+ const BYTE* ip, \
810
+ const BYTE* const iHighLimit, \
811
+ const U32 rep[ZSTD_REP_NUM], \
812
+ U32 const ll0, \
813
+ U32 const lengthToBeat) \
814
+ { \
815
+ return ZSTD_btGetAllMatches_internal( \
816
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
817
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
818
+ }
819
+
820
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
821
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
822
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
823
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
824
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
825
+
826
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
827
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
828
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
829
+
830
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
831
+ { \
832
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
833
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
834
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
835
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
836
+ }
772
837
 
773
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
774
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
775
- ZSTD_matchState_t* ms,
776
- U32* nextToUpdate3,
777
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
778
- const U32 rep[ZSTD_REP_NUM],
779
- U32 const ll0,
780
- U32 const lengthToBeat)
838
+ static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
781
839
  {
782
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
783
- U32 const matchLengthSearch = cParams->minMatch;
784
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
785
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
786
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
787
- switch(matchLengthSearch)
788
- {
789
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
790
- default :
791
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
792
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
793
- case 7 :
794
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
840
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
841
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
842
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
843
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
844
+ };
845
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
846
+ assert((U32)dictMode < 3);
847
+ assert(mls - 3 < 4);
848
+ return getAllMatchesFns[(int)dictMode][mls - 3];
849
+ }
850
+
851
+ /*************************
852
+ * LDM helper functions *
853
+ *************************/
854
+
855
+ /* Struct containing info needed to make decision about ldm inclusion */
856
+ typedef struct {
857
+ rawSeqStore_t seqStore; /* External match candidates store for this block */
858
+ U32 startPosInBlock; /* Start position of the current match candidate */
859
+ U32 endPosInBlock; /* End position of the current match candidate */
860
+ U32 offset; /* Offset of the match candidate */
861
+ } ZSTD_optLdm_t;
862
+
863
+ /* ZSTD_optLdm_skipRawSeqStoreBytes():
864
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
865
+ */
866
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
867
+ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
868
+ while (currPos && rawSeqStore->pos < rawSeqStore->size) {
869
+ rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
870
+ if (currPos >= currSeq.litLength + currSeq.matchLength) {
871
+ currPos -= currSeq.litLength + currSeq.matchLength;
872
+ rawSeqStore->pos++;
873
+ } else {
874
+ rawSeqStore->posInSequence = currPos;
875
+ break;
876
+ }
877
+ }
878
+ if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
879
+ rawSeqStore->posInSequence = 0;
795
880
  }
796
881
  }
797
882
 
883
+ /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
884
+ * Calculates the beginning and end of the next match in the current block.
885
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
886
+ */
887
+ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
888
+ U32 blockBytesRemaining) {
889
+ rawSeq currSeq;
890
+ U32 currBlockEndPos;
891
+ U32 literalsBytesRemaining;
892
+ U32 matchBytesRemaining;
893
+
894
+ /* Setting match end position to MAX to ensure we never use an LDM during this block */
895
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
896
+ optLdm->startPosInBlock = UINT_MAX;
897
+ optLdm->endPosInBlock = UINT_MAX;
898
+ return;
899
+ }
900
+ /* Calculate appropriate bytes left in matchLength and litLength after adjusting
901
+ based on ldmSeqStore->posInSequence */
902
+ currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
903
+ assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
904
+ currBlockEndPos = currPosInBlock + blockBytesRemaining;
905
+ literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
906
+ currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
907
+ 0;
908
+ matchBytesRemaining = (literalsBytesRemaining == 0) ?
909
+ currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
910
+ currSeq.matchLength;
911
+
912
+ /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
913
+ if (literalsBytesRemaining >= blockBytesRemaining) {
914
+ optLdm->startPosInBlock = UINT_MAX;
915
+ optLdm->endPosInBlock = UINT_MAX;
916
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
917
+ return;
918
+ }
798
919
 
799
- /*-*******************************
800
- * Optimal parser
801
- *********************************/
802
- typedef struct repcodes_s {
803
- U32 rep[3];
804
- } repcodes_t;
920
+ /* Matches may be < MINMATCH by this process. In that case, we will reject them
921
+ when we are deciding whether or not to add the ldm */
922
+ optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
923
+ optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
924
+ optLdm->offset = currSeq.offset;
925
+
926
+ if (optLdm->endPosInBlock > currBlockEndPos) {
927
+ /* Match ends after the block ends, we can't use the whole match */
928
+ optLdm->endPosInBlock = currBlockEndPos;
929
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
930
+ } else {
931
+ /* Consume nb of bytes equal to size of sequence left */
932
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
933
+ }
934
+ }
805
935
 
806
- static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
807
- {
808
- repcodes_t newReps;
809
- if (offset >= ZSTD_REP_NUM) { /* full offset */
810
- newReps.rep[2] = rep[1];
811
- newReps.rep[1] = rep[0];
812
- newReps.rep[0] = offset - ZSTD_REP_MOVE;
813
- } else { /* repcode */
814
- U32 const repCode = offset + ll0;
815
- if (repCode > 0) { /* note : if repCode==0, no change */
816
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
817
- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
818
- newReps.rep[1] = rep[0];
819
- newReps.rep[0] = currentOffset;
820
- } else { /* repCode == 0 */
821
- memcpy(&newReps, rep, sizeof(newReps));
936
+ /* ZSTD_optLdm_maybeAddMatch():
937
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
938
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
939
+ */
940
+ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
941
+ ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
942
+ U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
943
+ /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
944
+ U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
945
+ U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
946
+
947
+ /* Ensure that current block position is not outside of the match */
948
+ if (currPosInBlock < optLdm->startPosInBlock
949
+ || currPosInBlock >= optLdm->endPosInBlock
950
+ || candidateMatchLength < MINMATCH) {
951
+ return;
952
+ }
953
+
954
+ if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
955
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
956
+ candidateOffCode, candidateMatchLength, currPosInBlock);
957
+ matches[*nbMatches].len = candidateMatchLength;
958
+ matches[*nbMatches].off = candidateOffCode;
959
+ (*nbMatches)++;
960
+ }
961
+ }
962
+
963
+ /* ZSTD_optLdm_processMatchCandidate():
964
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
965
+ */
966
+ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
967
+ U32 currPosInBlock, U32 remainingBytes) {
968
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
969
+ return;
970
+ }
971
+
972
+ if (currPosInBlock >= optLdm->endPosInBlock) {
973
+ if (currPosInBlock > optLdm->endPosInBlock) {
974
+ /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
975
+ * at the end of a match from the ldm seq store, and will often be some bytes
976
+ * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
977
+ */
978
+ U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
979
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
822
980
  }
981
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
823
982
  }
824
- return newReps;
983
+ ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
825
984
  }
826
985
 
827
986
 
987
+ /*-*******************************
988
+ * Optimal parser
989
+ *********************************/
990
+
828
991
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
829
992
  {
830
993
  return sol.litlen + sol.mlen;
@@ -839,7 +1002,7 @@ listStats(const U32* table, int lastEltID)
839
1002
  int enb;
840
1003
  for (enb=0; enb < nbElts; enb++) {
841
1004
  (void)table;
842
- //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
1005
+ /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
843
1006
  RAWLOG(2, "%4i,", table[enb]);
844
1007
  }
845
1008
  RAWLOG(2, " \n");
@@ -865,6 +1028,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
865
1028
  const BYTE* const prefixStart = base + ms->window.dictLimit;
866
1029
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
867
1030
 
1031
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1032
+
868
1033
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
869
1034
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
870
1035
  U32 nextToUpdate3 = ms->nextToUpdate;
@@ -872,6 +1037,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
872
1037
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
873
1038
  ZSTD_match_t* const matches = optStatePtr->matchTable;
874
1039
  ZSTD_optimal_t lastSequence;
1040
+ ZSTD_optLdm_t optLdm;
1041
+
1042
+ optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1043
+ optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1044
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
875
1045
 
876
1046
  /* init */
877
1047
  DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@@ -887,14 +1057,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
887
1057
  /* find first match */
888
1058
  { U32 const litlen = (U32)(ip - anchor);
889
1059
  U32 const ll0 = !litlen;
890
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1060
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1061
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1062
+ (U32)(ip-istart), (U32)(iend - ip));
891
1063
  if (!nbMatches) { ip++; continue; }
892
1064
 
893
1065
  /* initialize opt[0] */
894
1066
  { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
895
1067
  opt[0].mlen = 0; /* means is_a_literal */
896
1068
  opt[0].litlen = litlen;
897
- opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
1069
+ /* We don't need to include the actual price of the literals because
1070
+ * it is static for the duration of the forward pass, and is included
1071
+ * in every price. We include the literal length to avoid negative
1072
+ * prices when we subtract the previous literal length.
1073
+ */
1074
+ opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
898
1075
 
899
1076
  /* large match -> immediate encoding */
900
1077
  { U32 const maxML = matches[nbMatches-1].len;
@@ -914,7 +1091,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
914
1091
  } }
915
1092
 
916
1093
  /* set prices for first matches starting position == 0 */
917
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1094
+ assert(opt[0].price >= 0);
1095
+ { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
918
1096
  U32 pos;
919
1097
  U32 matchNb;
920
1098
  for (pos = 1; pos < minMatch; pos++) {
@@ -923,7 +1101,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
923
1101
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
924
1102
  U32 const offset = matches[matchNb].off;
925
1103
  U32 const end = matches[matchNb].len;
926
- repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
927
1104
  for ( ; pos <= end ; pos++ ) {
928
1105
  U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
929
1106
  U32 const sequencePrice = literalsPrice + matchPrice;
@@ -932,9 +1109,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
932
1109
  opt[pos].mlen = pos;
933
1110
  opt[pos].off = offset;
934
1111
  opt[pos].litlen = litlen;
935
- opt[pos].price = sequencePrice;
936
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
937
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
1112
+ opt[pos].price = (int)sequencePrice;
938
1113
  } }
939
1114
  last_pos = pos-1;
940
1115
  }
@@ -949,9 +1124,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
949
1124
  /* Fix current position with one literal if cheaper */
950
1125
  { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
951
1126
  int const price = opt[cur-1].price
952
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
953
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
954
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1127
+ + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1128
+ + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1129
+ - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
955
1130
  assert(price < 1000000000); /* overflow check */
956
1131
  if (price <= opt[cur].price) {
957
1132
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -961,7 +1136,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
961
1136
  opt[cur].off = 0;
962
1137
  opt[cur].litlen = litlen;
963
1138
  opt[cur].price = price;
964
- memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
965
1139
  } else {
966
1140
  DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
967
1141
  inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -969,6 +1143,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
969
1143
  }
970
1144
  }
971
1145
 
1146
+ /* Set the repcodes of the current position. We must do it here
1147
+ * because we rely on the repcodes of the 2nd to last sequence being
1148
+ * correct to set the next chunks repcodes during the backward
1149
+ * traversal.
1150
+ */
1151
+ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1152
+ assert(cur >= opt[cur].mlen);
1153
+ if (opt[cur].mlen != 0) {
1154
+ U32 const prev = cur - opt[cur].mlen;
1155
+ repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1156
+ ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1157
+ } else {
1158
+ ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1159
+ }
1160
+
972
1161
  /* last match must start at a minimum distance of 8 from oend */
973
1162
  if (inr > ilimit) continue;
974
1163
 
@@ -980,12 +1169,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
980
1169
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
981
1170
  }
982
1171
 
1172
+ assert(opt[cur].price >= 0);
983
1173
  { U32 const ll0 = (opt[cur].mlen != 0);
984
1174
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
985
- U32 const previousPrice = opt[cur].price;
1175
+ U32 const previousPrice = (U32)opt[cur].price;
986
1176
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
987
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1177
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
988
1178
  U32 matchNb;
1179
+
1180
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1181
+ (U32)(inr-istart), (U32)(iend-inr));
1182
+
989
1183
  if (!nbMatches) {
990
1184
  DEBUGLOG(7, "rPos:%u : no match found", cur);
991
1185
  continue;
@@ -1009,7 +1203,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1009
1203
  /* set prices using matches found at position == cur */
1010
1204
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1011
1205
  U32 const offset = matches[matchNb].off;
1012
- repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
1013
1206
  U32 const lastML = matches[matchNb].len;
1014
1207
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1015
1208
  U32 mlen;
@@ -1019,7 +1212,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1019
1212
 
1020
1213
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1021
1214
  U32 const pos = cur + mlen;
1022
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1215
+ int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1023
1216
 
1024
1217
  if ((pos > last_pos) || (price < opt[pos].price)) {
1025
1218
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1029,8 +1222,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1029
1222
  opt[pos].off = offset;
1030
1223
  opt[pos].litlen = litlen;
1031
1224
  opt[pos].price = price;
1032
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
1033
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
1034
1225
  } else {
1035
1226
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1036
1227
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
@@ -1046,6 +1237,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1046
1237
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1047
1238
  assert(opt[0].mlen == 0);
1048
1239
 
1240
+ /* Set the next chunk's repcodes based on the repcodes of the beginning
1241
+ * of the last match, and the last sequence. This avoids us having to
1242
+ * update them while traversing the sequences.
1243
+ */
1244
+ if (lastSequence.mlen != 0) {
1245
+ repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1246
+ ZSTD_memcpy(rep, &reps, sizeof(reps));
1247
+ } else {
1248
+ ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1249
+ }
1250
+
1049
1251
  { U32 const storeEnd = cur + 1;
1050
1252
  U32 storeStart = storeEnd;
1051
1253
  U32 seqPos = cur;
@@ -1082,20 +1284,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1082
1284
  continue; /* will finish */
1083
1285
  }
1084
1286
 
1085
- /* repcodes update : like ZSTD_updateRep(), but update in place */
1086
- if (offCode >= ZSTD_REP_NUM) { /* full offset */
1087
- rep[2] = rep[1];
1088
- rep[1] = rep[0];
1089
- rep[0] = offCode - ZSTD_REP_MOVE;
1090
- } else { /* repcode */
1091
- U32 const repCode = offCode + (llen==0);
1092
- if (repCode) { /* note : if repCode==0, no change */
1093
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
1094
- if (repCode >= 2) rep[2] = rep[1];
1095
- rep[1] = rep[0];
1096
- rep[0] = currentOffset;
1097
- } }
1098
-
1099
1287
  assert(anchor + llen <= iend);
1100
1288
  ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1101
1289
  ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
@@ -1104,45 +1292,36 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1104
1292
  } }
1105
1293
  ZSTD_setBasePrices(optStatePtr, optLevel);
1106
1294
  }
1107
-
1108
1295
  } /* while (ip < ilimit) */
1109
1296
 
1110
1297
  /* Return the last literals size */
1111
1298
  return (size_t)(iend - anchor);
1112
1299
  }
1113
1300
 
1301
+ static size_t ZSTD_compressBlock_opt0(
1302
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1303
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1304
+ {
1305
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1306
+ }
1307
+
1308
+ static size_t ZSTD_compressBlock_opt2(
1309
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1310
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1311
+ {
1312
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1313
+ }
1114
1314
 
1115
1315
  size_t ZSTD_compressBlock_btopt(
1116
1316
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1117
1317
  const void* src, size_t srcSize)
1118
1318
  {
1119
1319
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1120
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1320
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1121
1321
  }
1122
1322
 
1123
1323
 
1124
- /* used in 2-pass strategy */
1125
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1126
- {
1127
- U32 s, sum=0;
1128
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1129
- for (s=0; s<lastEltIndex+1; s++) {
1130
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1131
- table[s]--;
1132
- sum += table[s];
1133
- }
1134
- return sum;
1135
- }
1136
1324
 
1137
- /* used in 2-pass strategy */
1138
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1139
- {
1140
- if (ZSTD_compressedLiterals(optPtr))
1141
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1142
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1143
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1144
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1145
- }
1146
1325
 
1147
1326
  /* ZSTD_initStats_ultra():
1148
1327
  * make a first compression pass, just to seed stats with more accurate starting values.
@@ -1156,7 +1335,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1156
1335
  const void* src, size_t srcSize)
1157
1336
  {
1158
1337
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1159
- memcpy(tmpRep, rep, sizeof(tmpRep));
1338
+ ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1160
1339
 
1161
1340
  DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1162
1341
  assert(ms->opt.litLengthSum == 0); /* first block */
@@ -1164,7 +1343,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1164
1343
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1165
1344
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1166
1345
 
1167
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1346
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1168
1347
 
1169
1348
  /* invalidate first scan from history */
1170
1349
  ZSTD_resetSeqStore(seqStore);
@@ -1173,8 +1352,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1173
1352
  ms->window.lowLimit = ms->window.dictLimit;
1174
1353
  ms->nextToUpdate = ms->window.dictLimit;
1175
1354
 
1176
- /* re-inforce weight of collected statistics */
1177
- ZSTD_upscaleStats(&ms->opt);
1178
1355
  }
1179
1356
 
1180
1357
  size_t ZSTD_compressBlock_btultra(
@@ -1182,14 +1359,14 @@ size_t ZSTD_compressBlock_btultra(
1182
1359
  const void* src, size_t srcSize)
1183
1360
  {
1184
1361
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1185
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1362
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1186
1363
  }
1187
1364
 
1188
1365
  size_t ZSTD_compressBlock_btultra2(
1189
1366
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1190
1367
  const void* src, size_t srcSize)
1191
1368
  {
1192
- U32 const current = (U32)((const BYTE*)src - ms->window.base);
1369
+ U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1193
1370
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1194
1371
 
1195
1372
  /* 2-pass strategy:
@@ -1204,41 +1381,41 @@ size_t ZSTD_compressBlock_btultra2(
1204
1381
  if ( (ms->opt.litLengthSum==0) /* first block */
1205
1382
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1206
1383
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1207
- && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1384
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1208
1385
  && (srcSize > ZSTD_PREDEF_THRESHOLD)
1209
1386
  ) {
1210
1387
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1211
1388
  }
1212
1389
 
1213
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1390
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1214
1391
  }
1215
1392
 
1216
1393
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1217
1394
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1218
1395
  const void* src, size_t srcSize)
1219
1396
  {
1220
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1397
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1221
1398
  }
1222
1399
 
1223
1400
  size_t ZSTD_compressBlock_btultra_dictMatchState(
1224
1401
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1225
1402
  const void* src, size_t srcSize)
1226
1403
  {
1227
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1404
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1228
1405
  }
1229
1406
 
1230
1407
  size_t ZSTD_compressBlock_btopt_extDict(
1231
1408
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1232
1409
  const void* src, size_t srcSize)
1233
1410
  {
1234
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1411
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1235
1412
  }
1236
1413
 
1237
1414
  size_t ZSTD_compressBlock_btultra_extDict(
1238
1415
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1239
1416
  const void* src, size_t srcSize)
1240
1417
  {
1241
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1418
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1242
1419
  }
1243
1420
 
1244
1421
  /* note : no btultra2 variant for extDict nor dictMatchState,