extzstd 0.3.2 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -12,42 +12,52 @@
12
12
  #include "hist.h"
13
13
  #include "zstd_opt.h"
14
14
 
15
+ #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
15
18
 
16
19
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
20
  #define ZSTD_MAX_PRICE (1<<30)
19
21
 
20
- #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
22
+ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
21
23
 
22
24
 
23
25
  /*-*************************************
24
26
  * Price functions for optimal parser
25
27
  ***************************************/
26
28
 
27
- #if 0 /* approximation at bit level */
29
+ #if 0 /* approximation at bit level (for tests) */
28
30
  # define BITCOST_ACCURACY 0
29
31
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
32
+ # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
+ #elif 0 /* fractional bit accuracy (for tests) */
32
34
  # define BITCOST_ACCURACY 8
33
35
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
- # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
36
+ # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
35
37
  #else /* opt==approx, ultra==accurate */
36
38
  # define BITCOST_ACCURACY 8
37
39
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
38
- # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
40
+ # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
39
41
  #endif
40
42
 
43
+ /* ZSTD_bitWeight() :
44
+ * provide estimated "cost" of a stat in full bits only */
41
45
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
42
46
  {
43
47
  return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
44
48
  }
45
49
 
50
+ /* ZSTD_fracWeight() :
51
+ * provide fractional-bit "cost" of a stat,
52
+ * using linear interpolation approximation */
46
53
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
47
54
  {
48
55
  U32 const stat = rawStat + 1;
49
56
  U32 const hb = ZSTD_highbit32(stat);
50
57
  U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
+ /* Fweight was meant for "Fractional weight"
59
+ * but it's effectively a value between 1 and 2
60
+ * using fixed point arithmetic */
51
61
  U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
52
62
  U32 const weight = BWeight + FWeight;
53
63
  assert(hb + BITCOST_ACCURACY < 31);
@@ -58,7 +68,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
58
68
  /* debugging function,
59
69
  * @return price in bytes as fractional value
60
70
  * for debug messages only */
61
- MEM_STATIC double ZSTD_fCost(U32 price)
71
+ MEM_STATIC double ZSTD_fCost(int price)
62
72
  {
63
73
  return (double)price / (BITCOST_MULTIPLIER*8);
64
74
  }
@@ -66,7 +76,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
76
 
67
77
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
78
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
79
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
80
  }
71
81
 
72
82
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +89,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
89
  }
80
90
 
81
91
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
92
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
93
+ {
94
+ size_t n;
95
+ U32 total = 0;
96
+ for (n=0; n<nbElts; n++) {
97
+ total += table[n];
98
+ }
99
+ return total;
100
+ }
101
+
102
+ typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
+
104
+ static U32
105
+ ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
86
106
  {
87
107
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
108
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
+ (unsigned)lastEltIndex+1, (unsigned)shift );
110
+ assert(shift < 30);
90
111
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
92
- sum += table[s];
112
+ unsigned const base = base1 ? 1 : (table[s]>0);
113
+ unsigned const newStat = base + (table[s] >> shift);
114
+ sum += newStat;
115
+ table[s] = newStat;
93
116
  }
94
117
  return sum;
95
118
  }
96
119
 
120
+ /* ZSTD_scaleStats() :
121
+ * reduce all elt frequencies in table if sum too large
122
+ * return the resulting sum of elements */
123
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
+ {
125
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
+ U32 const factor = prevsum >> logTarget;
127
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
+ assert(logTarget < 30);
129
+ if (factor <= 1) return prevsum;
130
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
+ }
132
+
97
133
  /* ZSTD_rescaleFreqs() :
98
134
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
135
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
136
+ * and init from zero if there is none,
137
+ * using src for literals stats, and baseline stats for sequence symbols
101
138
  * otherwise downscale existing stats, to be used as seed for next block.
102
139
  */
103
140
  static void
@@ -109,24 +146,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
109
146
  DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
110
147
  optPtr->priceType = zop_dynamic;
111
148
 
112
- if (optPtr->litLengthSum == 0) { /* first block : init */
113
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
114
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
149
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
150
+
151
+ /* heuristic: use pre-defined stats for too small inputs */
152
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
115
154
  optPtr->priceType = zop_predef;
116
155
  }
117
156
 
118
157
  assert(optPtr->symbolCosts != NULL);
119
158
  if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
120
- /* huffman table presumed generated by dictionary */
159
+
160
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
121
161
  optPtr->priceType = zop_dynamic;
122
162
 
123
163
  if (compressedLiterals) {
164
+ /* generate literals statistics from huffman table */
124
165
  unsigned lit;
125
166
  assert(optPtr->litFreq != NULL);
126
167
  optPtr->litSum = 0;
127
168
  for (lit=0; lit<=MaxLit; lit++) {
128
169
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
170
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
171
  assert(bitCost <= scaleLog);
131
172
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
173
  optPtr->litSum += optPtr->litFreq[lit];
@@ -168,20 +209,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
168
209
  optPtr->offCodeSum += optPtr->offCodeFreq[of];
169
210
  } }
170
211
 
171
- } else { /* not a dictionary */
212
+ } else { /* first block, no dictionary */
172
213
 
173
214
  assert(optPtr->litFreq != NULL);
174
215
  if (compressedLiterals) {
216
+ /* base initial cost of literals on direct frequency within src */
175
217
  unsigned lit = MaxLit;
176
218
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
219
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
178
220
  }
179
221
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
222
+ { unsigned const baseLLfreqs[MaxLL+1] = {
223
+ 4, 2, 1, 1, 1, 1, 1, 1,
224
+ 1, 1, 1, 1, 1, 1, 1, 1,
225
+ 1, 1, 1, 1, 1, 1, 1, 1,
226
+ 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1, 1, 1, 1
228
+ };
229
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
+ optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
231
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
232
 
186
233
  { unsigned ml;
187
234
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +236,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
236
  }
190
237
  optPtr->matchLengthSum = MaxML+1;
191
238
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
239
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
240
+ 6, 2, 1, 1, 2, 3, 4, 4,
241
+ 4, 3, 2, 1, 1, 1, 1, 1,
242
+ 1, 1, 1, 1, 1, 1, 1, 1,
243
+ 1, 1, 1, 1, 1, 1, 1, 1
244
+ };
245
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
+ optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
247
  }
196
- optPtr->offCodeSum = MaxOff+1;
197
248
 
198
249
  }
199
250
 
200
- } else { /* new block : re-use previous statistics, scaled down */
251
+ } else { /* new block : scale down accumulated statistics */
201
252
 
202
253
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
254
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
258
  }
208
259
 
209
260
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -216,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
216
267
  const optState_t* const optPtr,
217
268
  int optLevel)
218
269
  {
270
+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
219
271
  if (litLength == 0) return 0;
220
272
 
221
273
  if (!ZSTD_compressedLiterals(optPtr))
@@ -225,11 +277,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
225
277
  return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
226
278
 
227
279
  /* dynamic statistics */
228
- { U32 price = litLength * optPtr->litSumBasePrice;
280
+ { U32 price = optPtr->litSumBasePrice * litLength;
281
+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
229
282
  U32 u;
283
+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
230
284
  for (u=0; u < litLength; u++) {
231
- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
232
- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
285
+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
+ price -= litPrice;
233
288
  }
234
289
  return price;
235
290
  }
@@ -239,7 +294,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
239
294
  * cost of literalLength symbol */
240
295
  static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
241
296
  {
242
- if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
297
+ assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
+ if (optPtr->priceType == zop_predef)
299
+ return WEIGHT(litLength, optLevel);
300
+
301
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
+ * because it isn't representable in the zstd format.
303
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
+ * In such a case, the block would be all literals.
305
+ */
306
+ if (litLength == ZSTD_BLOCKSIZE_MAX)
307
+ return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
243
308
 
244
309
  /* dynamic statistics */
245
310
  { U32 const llCode = ZSTD_LLcode(litLength);
@@ -250,22 +315,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
250
315
  }
251
316
 
252
317
  /* ZSTD_getMatchPrice() :
253
- * Provides the cost of the match part (offset + matchLength) of a sequence
318
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
254
319
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
255
- * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
320
+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
+ * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
+ */
256
323
  FORCE_INLINE_TEMPLATE U32
257
- ZSTD_getMatchPrice(U32 const offset,
324
+ ZSTD_getMatchPrice(U32 const offBase,
258
325
  U32 const matchLength,
259
326
  const optState_t* const optPtr,
260
327
  int const optLevel)
261
328
  {
262
329
  U32 price;
263
- U32 const offCode = ZSTD_highbit32(offset+1);
330
+ U32 const offCode = ZSTD_highbit32(offBase);
264
331
  U32 const mlBase = matchLength - MINMATCH;
265
332
  assert(matchLength >= MINMATCH);
266
333
 
267
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
268
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
334
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
335
+ return WEIGHT(mlBase, optLevel)
336
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
269
337
 
270
338
  /* dynamic statistics */
271
339
  price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -284,10 +352,10 @@ ZSTD_getMatchPrice(U32 const offset,
284
352
  }
285
353
 
286
354
  /* ZSTD_updateStats() :
287
- * assumption : literals + litLengtn <= iend */
355
+ * assumption : literals + litLength <= iend */
288
356
  static void ZSTD_updateStats(optState_t* const optPtr,
289
357
  U32 litLength, const BYTE* literals,
290
- U32 offsetCode, U32 matchLength)
358
+ U32 offBase, U32 matchLength)
291
359
  {
292
360
  /* literals */
293
361
  if (ZSTD_compressedLiterals(optPtr)) {
@@ -303,8 +371,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
303
371
  optPtr->litLengthSum++;
304
372
  }
305
373
 
306
- /* match offset code (0-2=>repCode; 3+=>offset+2) */
307
- { U32 const offCode = ZSTD_highbit32(offsetCode+1);
374
+ /* offset code : follows storeSeq() numeric representation */
375
+ { U32 const offCode = ZSTD_highbit32(offBase);
308
376
  assert(offCode <= MaxOff);
309
377
  optPtr->offCodeFreq[offCode]++;
310
378
  optPtr->offCodeSum++;
@@ -338,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
338
406
 
339
407
  /* Update hashTable3 up to ip (excluded)
340
408
  Assumption : always within prefix (i.e. not within extDict) */
341
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
342
- U32* nextToUpdate3,
343
- const BYTE* const ip)
409
+ static
410
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
+ U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412
+ U32* nextToUpdate3,
413
+ const BYTE* const ip)
344
414
  {
345
415
  U32* const hashTable3 = ms->hashTable3;
346
416
  U32 const hashLog3 = ms->hashLog3;
@@ -364,11 +434,15 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
364
434
  * Binary Tree search
365
435
  ***************************************/
366
436
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
367
- * ip : assumed <= iend-8 .
437
+ * @param ip assumed <= iend-8 .
438
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
368
439
  * @return : nb of positions added */
369
- static U32 ZSTD_insertBt1(
370
- ZSTD_matchState_t* ms,
440
+ static
441
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
+ U32 ZSTD_insertBt1(
443
+ const ZSTD_matchState_t* ms,
371
444
  const BYTE* const ip, const BYTE* const iend,
445
+ U32 const target,
372
446
  U32 const mls, const int extDict)
373
447
  {
374
448
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -391,7 +465,10 @@ static U32 ZSTD_insertBt1(
391
465
  U32* smallerPtr = bt + 2*(curr&btMask);
392
466
  U32* largerPtr = smallerPtr + 1;
393
467
  U32 dummy32; /* to be nullified at the end */
394
- U32 const windowLow = ms->window.lowLimit;
468
+ /* windowLow is based on target because
469
+ * we only need positions that will be in the window at the end of the tree update.
470
+ */
471
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
395
472
  U32 matchEndIdx = curr+8+1;
396
473
  size_t bestLength = 8;
397
474
  U32 nbCompares = 1U << cParams->searchLog;
@@ -404,11 +481,12 @@ static U32 ZSTD_insertBt1(
404
481
 
405
482
  DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
406
483
 
484
+ assert(curr <= target);
407
485
  assert(ip <= iend-8); /* required for h calculation */
408
486
  hashTable[h] = curr; /* Update Hash Table */
409
487
 
410
488
  assert(windowLow > 0);
411
- while (nbCompares-- && (matchIndex >= windowLow)) {
489
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
412
490
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
413
491
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
414
492
  assert(matchIndex < curr);
@@ -480,6 +558,7 @@ static U32 ZSTD_insertBt1(
480
558
  }
481
559
 
482
560
  FORCE_INLINE_TEMPLATE
561
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
483
562
  void ZSTD_updateTree_internal(
484
563
  ZSTD_matchState_t* ms,
485
564
  const BYTE* const ip, const BYTE* const iend,
@@ -488,11 +567,11 @@ void ZSTD_updateTree_internal(
488
567
  const BYTE* const base = ms->window.base;
489
568
  U32 const target = (U32)(ip - base);
490
569
  U32 idx = ms->nextToUpdate;
491
- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
570
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
492
571
  idx, target, dictMode);
493
572
 
494
573
  while(idx < target) {
495
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
574
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
496
575
  assert(idx < (U32)(idx + forward));
497
576
  idx += forward;
498
577
  }
@@ -506,15 +585,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
506
585
  }
507
586
 
508
587
  FORCE_INLINE_TEMPLATE
509
- U32 ZSTD_insertBtAndGetAllMatches (
510
- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
511
- ZSTD_matchState_t* ms,
512
- U32* nextToUpdate3,
513
- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
514
- const U32 rep[ZSTD_REP_NUM],
515
- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
516
- const U32 lengthToBeat,
517
- U32 const mls /* template */)
588
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
+ U32
590
+ ZSTD_insertBtAndGetAllMatches (
591
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
592
+ ZSTD_matchState_t* ms,
593
+ U32* nextToUpdate3,
594
+ const BYTE* const ip, const BYTE* const iLimit,
595
+ const ZSTD_dictMode_e dictMode,
596
+ const U32 rep[ZSTD_REP_NUM],
597
+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
+ const U32 lengthToBeat,
599
+ const U32 mls /* template */)
518
600
  {
519
601
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
520
602
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@@ -597,7 +679,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
597
679
  DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
598
680
  repCode, ll0, repOffset, repLen);
599
681
  bestLength = repLen;
600
- matches[mnum].off = repCode - ll0;
682
+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
601
683
  matches[mnum].len = (U32)repLen;
602
684
  mnum++;
603
685
  if ( (repLen > sufficient_len)
@@ -626,7 +708,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
626
708
  bestLength = mlen;
627
709
  assert(curr > matchIndex3);
628
710
  assert(mnum==0); /* no prior solution */
629
- matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
711
+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
630
712
  matches[0].len = (U32)mlen;
631
713
  mnum = 1;
632
714
  if ( (mlen > sufficient_len) |
@@ -635,11 +717,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
635
717
  return 1;
636
718
  } } }
637
719
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638
- }
720
+ } /* if (mls == 3) */
639
721
 
640
722
  hashTable[h] = curr; /* Update Hash Table */
641
723
 
642
- while (nbCompares-- && (matchIndex >= matchLow)) {
724
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
643
725
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
644
726
  const BYTE* match;
645
727
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
@@ -659,21 +741,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
659
741
  }
660
742
 
661
743
  if (matchLength > bestLength) {
662
- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
663
- (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
744
+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
664
746
  assert(matchEndIdx > matchIndex);
665
747
  if (matchLength > matchEndIdx - matchIndex)
666
748
  matchEndIdx = matchIndex + (U32)matchLength;
667
749
  bestLength = matchLength;
668
- matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
750
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
669
751
  matches[mnum].len = (U32)matchLength;
670
752
  mnum++;
671
753
  if ( (matchLength > ZSTD_OPT_NUM)
672
754
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673
755
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
674
756
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675
- }
676
- }
757
+ } }
677
758
 
678
759
  if (match[matchLength] < ip[matchLength]) {
679
760
  /* match smaller than current */
@@ -692,12 +773,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
692
773
 
693
774
  *smallerPtr = *largerPtr = 0;
694
775
 
776
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
695
777
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
696
778
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697
779
  U32 dictMatchIndex = dms->hashTable[dmsH];
698
780
  const U32* const dmsBt = dms->chainTable;
699
781
  commonLengthSmaller = commonLengthLarger = 0;
700
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
782
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
701
783
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702
784
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703
785
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -707,19 +789,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
707
789
 
708
790
  if (matchLength > bestLength) {
709
791
  matchIndex = dictMatchIndex + dmsIndexDelta;
710
- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
711
- (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
792
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
712
794
  if (matchLength > matchEndIdx - matchIndex)
713
795
  matchEndIdx = matchIndex + (U32)matchLength;
714
796
  bestLength = matchLength;
715
- matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
797
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
716
798
  matches[mnum].len = (U32)matchLength;
717
799
  mnum++;
718
800
  if ( (matchLength > ZSTD_OPT_NUM)
719
801
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
802
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
721
- }
722
- }
803
+ } }
723
804
 
724
805
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725
806
  if (match[matchLength] < ip[matchLength]) {
@@ -729,39 +810,93 @@ U32 ZSTD_insertBtAndGetAllMatches (
729
810
  /* match is larger than current */
730
811
  commonLengthLarger = matchLength;
731
812
  dictMatchIndex = nextPtr[0];
732
- }
733
- }
734
- }
813
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
735
814
 
736
815
  assert(matchEndIdx > curr+8);
737
816
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738
817
  return mnum;
739
818
  }
740
819
 
820
+ typedef U32 (*ZSTD_getAllMatchesFn)(
821
+ ZSTD_match_t*,
822
+ ZSTD_matchState_t*,
823
+ U32*,
824
+ const BYTE*,
825
+ const BYTE*,
826
+ const U32 rep[ZSTD_REP_NUM],
827
+ U32 const ll0,
828
+ U32 const lengthToBeat);
741
829
 
742
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
743
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
744
- ZSTD_matchState_t* ms,
745
- U32* nextToUpdate3,
746
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
747
- const U32 rep[ZSTD_REP_NUM],
748
- U32 const ll0,
749
- U32 const lengthToBeat)
830
+ FORCE_INLINE_TEMPLATE
831
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
+ U32 ZSTD_btGetAllMatches_internal(
833
+ ZSTD_match_t* matches,
834
+ ZSTD_matchState_t* ms,
835
+ U32* nextToUpdate3,
836
+ const BYTE* ip,
837
+ const BYTE* const iHighLimit,
838
+ const U32 rep[ZSTD_REP_NUM],
839
+ U32 const ll0,
840
+ U32 const lengthToBeat,
841
+ const ZSTD_dictMode_e dictMode,
842
+ const U32 mls)
750
843
  {
751
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
752
- U32 const matchLengthSearch = cParams->minMatch;
753
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
755
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
756
- switch(matchLengthSearch)
757
- {
758
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
759
- default :
760
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
761
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
762
- case 7 :
763
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
844
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
+ if (ip < ms->window.base + ms->nextToUpdate)
847
+ return 0; /* skipped area */
848
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
+ }
851
+
852
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
+
854
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
855
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
856
+ ZSTD_match_t* matches, \
857
+ ZSTD_matchState_t* ms, \
858
+ U32* nextToUpdate3, \
859
+ const BYTE* ip, \
860
+ const BYTE* const iHighLimit, \
861
+ const U32 rep[ZSTD_REP_NUM], \
862
+ U32 const ll0, \
863
+ U32 const lengthToBeat) \
864
+ { \
865
+ return ZSTD_btGetAllMatches_internal( \
866
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
867
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
764
868
  }
869
+
870
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
871
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
872
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
873
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
874
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
+
876
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
+
880
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
881
+ { \
882
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
886
+ }
887
+
888
+ static ZSTD_getAllMatchesFn
889
+ ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
+ {
891
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
+ };
896
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
+ assert((U32)dictMode < 3);
898
+ assert(mls - 3 < 4);
899
+ return getAllMatchesFns[(int)dictMode][mls - 3];
765
900
  }
766
901
 
767
902
  /*************************
@@ -770,16 +905,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
770
905
 
771
906
  /* Struct containing info needed to make decision about ldm inclusion */
772
907
  typedef struct {
773
- rawSeqStore_t seqStore; /* External match candidates store for this block */
774
- U32 startPosInBlock; /* Start position of the current match candidate */
775
- U32 endPosInBlock; /* End position of the current match candidate */
776
- U32 offset; /* Offset of the match candidate */
908
+ rawSeqStore_t seqStore; /* External match candidates store for this block */
909
+ U32 startPosInBlock; /* Start position of the current match candidate */
910
+ U32 endPosInBlock; /* End position of the current match candidate */
911
+ U32 offset; /* Offset of the match candidate */
777
912
  } ZSTD_optLdm_t;
778
913
 
779
914
  /* ZSTD_optLdm_skipRawSeqStoreBytes():
780
- * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
915
+ * Moves forward in @rawSeqStore by @nbBytes,
916
+ * which will update the fields 'pos' and 'posInSequence'.
781
917
  */
782
- static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
918
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
919
+ {
783
920
  U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
784
921
  while (currPos && rawSeqStore->pos < rawSeqStore->size) {
785
922
  rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
@@ -800,8 +937,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
800
937
  * Calculates the beginning and end of the next match in the current block.
801
938
  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
802
939
  */
803
- static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
804
- U32 blockBytesRemaining) {
940
+ static void
941
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
+ U32 blockBytesRemaining)
943
+ {
805
944
  rawSeq currSeq;
806
945
  U32 currBlockEndPos;
807
946
  U32 literalsBytesRemaining;
@@ -813,8 +952,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
813
952
  optLdm->endPosInBlock = UINT_MAX;
814
953
  return;
815
954
  }
816
- /* Calculate appropriate bytes left in matchLength and litLength after adjusting
817
- based on ldmSeqStore->posInSequence */
955
+ /* Calculate appropriate bytes left in matchLength and litLength
956
+ * after adjusting based on ldmSeqStore->posInSequence */
818
957
  currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
819
958
  assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
820
959
  currBlockEndPos = currPosInBlock + blockBytesRemaining;
@@ -850,15 +989,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
850
989
  }
851
990
 
852
991
  /* ZSTD_optLdm_maybeAddMatch():
853
- * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
854
- * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
992
+ * Adds a match if it's long enough,
993
+ * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
+ * into 'matches'. Maintains the correct ordering of 'matches'.
855
995
  */
856
996
  static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
857
- ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
858
- U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
859
- /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
860
- U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
861
- U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
997
+ const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
998
+ {
999
+ U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1000
+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1001
+ U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
862
1002
 
863
1003
  /* Ensure that current block position is not outside of the match */
864
1004
  if (currPosInBlock < optLdm->startPosInBlock
@@ -868,10 +1008,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
868
1008
  }
869
1009
 
870
1010
  if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
871
- DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
872
- candidateOffCode, candidateMatchLength, currPosInBlock);
1011
+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1012
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1013
+ candidateOffBase, candidateMatchLength, currPosInBlock);
873
1014
  matches[*nbMatches].len = candidateMatchLength;
874
- matches[*nbMatches].off = candidateOffCode;
1015
+ matches[*nbMatches].off = candidateOffBase;
875
1016
  (*nbMatches)++;
876
1017
  }
877
1018
  }
@@ -879,8 +1020,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
879
1020
  /* ZSTD_optLdm_processMatchCandidate():
880
1021
  * Wrapper function to update ldm seq store and call ldm functions as necessary.
881
1022
  */
882
- static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
883
- U32 currPosInBlock, U32 remainingBytes) {
1023
+ static void
1024
+ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1025
+ ZSTD_match_t* matches, U32* nbMatches,
1026
+ U32 currPosInBlock, U32 remainingBytes)
1027
+ {
884
1028
  if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
885
1029
  return;
886
1030
  }
@@ -891,24 +1035,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
891
1035
  * at the end of a match from the ldm seq store, and will often be some bytes
892
1036
  * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
893
1037
  */
894
- U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1038
+ U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
895
1039
  ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
896
- }
1040
+ }
897
1041
  ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
898
1042
  }
899
1043
  ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
900
1044
  }
901
1045
 
1046
+
902
1047
  /*-*******************************
903
1048
  * Optimal parser
904
1049
  *********************************/
905
1050
 
906
-
907
- static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
908
- {
909
- return sol.litlen + sol.mlen;
910
- }
911
-
912
1051
  #if 0 /* debug */
913
1052
 
914
1053
  static void
@@ -926,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
926
1065
 
927
1066
  #endif
928
1067
 
929
- FORCE_INLINE_TEMPLATE size_t
1068
+ #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069
+ #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070
+ #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071
+
1072
+ FORCE_INLINE_TEMPLATE
1073
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074
+ size_t
930
1075
  ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
931
1076
  seqStore_t* seqStore,
932
1077
  U32 rep[ZSTD_REP_NUM],
@@ -944,15 +1089,19 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
944
1089
  const BYTE* const prefixStart = base + ms->window.dictLimit;
945
1090
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
946
1091
 
1092
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1093
+
947
1094
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
948
1095
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
949
1096
  U32 nextToUpdate3 = ms->nextToUpdate;
950
1097
 
951
1098
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
952
1099
  ZSTD_match_t* const matches = optStatePtr->matchTable;
953
- ZSTD_optimal_t lastSequence;
1100
+ ZSTD_optimal_t lastStretch;
954
1101
  ZSTD_optLdm_t optLdm;
955
1102
 
1103
+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1104
+
956
1105
  optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
957
1106
  optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
958
1107
  ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@@ -971,104 +1120,141 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
971
1120
  /* find first match */
972
1121
  { U32 const litlen = (U32)(ip - anchor);
973
1122
  U32 const ll0 = !litlen;
974
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1123
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
975
1124
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
976
- (U32)(ip-istart), (U32)(iend - ip));
977
- if (!nbMatches) { ip++; continue; }
1125
+ (U32)(ip-istart), (U32)(iend-ip));
1126
+ if (!nbMatches) {
1127
+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128
+ ip++;
1129
+ continue;
1130
+ }
1131
+
1132
+ /* Match found: let's store this solution, and eventually find more candidates.
1133
+ * During this forward pass, @opt is used to store stretches,
1134
+ * defined as "a match followed by N literals".
1135
+ * Note how this is different from a Sequence, which is "N literals followed by a match".
1136
+ * Storing stretches allows us to store different match predecessors
1137
+ * for each literal position part of a literals run. */
978
1138
 
979
1139
  /* initialize opt[0] */
980
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
981
- opt[0].mlen = 0; /* means is_a_literal */
1140
+ opt[0].mlen = 0; /* there are only literals so far */
982
1141
  opt[0].litlen = litlen;
983
- /* We don't need to include the actual price of the literals because
984
- * it is static for the duration of the forward pass, and is included
985
- * in every price. We include the literal length to avoid negative
986
- * prices when we subtract the previous literal length.
1142
+ /* No need to include the actual price of the literals before the first match
1143
+ * because it is static for the duration of the forward pass, and is included
1144
+ * in every subsequent price. But, we include the literal length because
1145
+ * the cost variation of litlen depends on the value of litlen.
987
1146
  */
988
- opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1147
+ opt[0].price = LL_PRICE(litlen);
1148
+ ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149
+ ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
989
1150
 
990
1151
  /* large match -> immediate encoding */
991
1152
  { U32 const maxML = matches[nbMatches-1].len;
992
- U32 const maxOffset = matches[nbMatches-1].off;
993
- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
994
- nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
1153
+ U32 const maxOffBase = matches[nbMatches-1].off;
1154
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1155
+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
995
1156
 
996
1157
  if (maxML > sufficient_len) {
997
- lastSequence.litlen = litlen;
998
- lastSequence.mlen = maxML;
999
- lastSequence.off = maxOffset;
1000
- DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1158
+ lastStretch.litlen = 0;
1159
+ lastStretch.mlen = maxML;
1160
+ lastStretch.off = maxOffBase;
1161
+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1001
1162
  maxML, sufficient_len);
1002
1163
  cur = 0;
1003
- last_pos = ZSTD_totalLen(lastSequence);
1164
+ last_pos = maxML;
1004
1165
  goto _shortestPath;
1005
1166
  } }
1006
1167
 
1007
1168
  /* set prices for first matches starting position == 0 */
1008
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1009
- U32 pos;
1169
+ assert(opt[0].price >= 0);
1170
+ { U32 pos;
1010
1171
  U32 matchNb;
1011
1172
  for (pos = 1; pos < minMatch; pos++) {
1012
- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1173
+ opt[pos].price = ZSTD_MAX_PRICE;
1174
+ opt[pos].mlen = 0;
1175
+ opt[pos].litlen = litlen + pos;
1013
1176
  }
1014
1177
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1015
- U32 const offset = matches[matchNb].off;
1178
+ U32 const offBase = matches[matchNb].off;
1016
1179
  U32 const end = matches[matchNb].len;
1017
1180
  for ( ; pos <= end ; pos++ ) {
1018
- U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
1019
- U32 const sequencePrice = literalsPrice + matchPrice;
1181
+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182
+ int const sequencePrice = opt[0].price + matchPrice;
1020
1183
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1021
1184
  pos, ZSTD_fCost(sequencePrice));
1022
1185
  opt[pos].mlen = pos;
1023
- opt[pos].off = offset;
1024
- opt[pos].litlen = litlen;
1025
- opt[pos].price = sequencePrice;
1026
- } }
1186
+ opt[pos].off = offBase;
1187
+ opt[pos].litlen = 0; /* end of match */
1188
+ opt[pos].price = sequencePrice + LL_PRICE(0);
1189
+ }
1190
+ }
1027
1191
  last_pos = pos-1;
1192
+ opt[pos].price = ZSTD_MAX_PRICE;
1028
1193
  }
1029
1194
  }
1030
1195
 
1031
1196
  /* check further positions */
1032
1197
  for (cur = 1; cur <= last_pos; cur++) {
1033
1198
  const BYTE* const inr = ip + cur;
1034
- assert(cur < ZSTD_OPT_NUM);
1035
- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1199
+ assert(cur <= ZSTD_OPT_NUM);
1200
+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
1036
1201
 
1037
1202
  /* Fix current position with one literal if cheaper */
1038
- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1203
+ { U32 const litlen = opt[cur-1].litlen + 1;
1039
1204
  int const price = opt[cur-1].price
1040
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1041
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1042
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1205
+ + LIT_PRICE(ip+cur-1)
1206
+ + LL_INCPRICE(litlen);
1043
1207
  assert(price < 1000000000); /* overflow check */
1044
1208
  if (price <= opt[cur].price) {
1209
+ ZSTD_optimal_t const prevMatch = opt[cur];
1045
1210
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1046
1211
  inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1047
1212
  opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1048
- opt[cur].mlen = 0;
1049
- opt[cur].off = 0;
1213
+ opt[cur] = opt[cur-1];
1050
1214
  opt[cur].litlen = litlen;
1051
1215
  opt[cur].price = price;
1216
+ if ( (optLevel >= 1) /* additional check only for higher modes */
1217
+ && (prevMatch.litlen == 0) /* replace a match */
1218
+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219
+ && LIKELY(ip + cur < iend)
1220
+ ) {
1221
+ /* check next position, in case it would be cheaper */
1222
+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223
+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224
+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225
+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226
+ if ( (with1literal < withMoreLiterals)
1227
+ && (with1literal < opt[cur+1].price) ) {
1228
+ /* update offset history - before it disappears */
1229
+ U32 const prev = cur - prevMatch.mlen;
1230
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231
+ assert(cur >= prevMatch.mlen);
1232
+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233
+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234
+ newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235
+ opt[cur+1] = prevMatch; /* mlen & offbase */
1236
+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237
+ opt[cur+1].litlen = 1;
1238
+ opt[cur+1].price = with1literal;
1239
+ if (last_pos < cur+1) last_pos = cur+1;
1240
+ }
1241
+ }
1052
1242
  } else {
1053
- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1054
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1055
- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1243
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1056
1245
  }
1057
1246
  }
1058
1247
 
1059
- /* Set the repcodes of the current position. We must do it here
1060
- * because we rely on the repcodes of the 2nd to last sequence being
1061
- * correct to set the next chunks repcodes during the backward
1062
- * traversal.
1248
+ /* Offset history is not updated during match comparison.
1249
+ * Do it here, now that the match is selected and confirmed.
1063
1250
  */
1064
1251
  ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1065
1252
  assert(cur >= opt[cur].mlen);
1066
- if (opt[cur].mlen != 0) {
1253
+ if (opt[cur].litlen == 0) {
1254
+ /* just finished a match => alter offset history */
1067
1255
  U32 const prev = cur - opt[cur].mlen;
1068
- repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1256
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1069
1257
  ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1070
- } else {
1071
- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1072
1258
  }
1073
1259
 
1074
1260
  /* last match must start at a minimum distance of 8 from oend */
@@ -1078,15 +1264,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1078
1264
 
1079
1265
  if ( (optLevel==0) /*static_test*/
1080
1266
  && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1081
- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1267
+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1082
1268
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1083
1269
  }
1084
1270
 
1085
- { U32 const ll0 = (opt[cur].mlen != 0);
1086
- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1087
- U32 const previousPrice = opt[cur].price;
1088
- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1089
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1271
+ assert(opt[cur].price >= 0);
1272
+ { U32 const ll0 = (opt[cur].litlen == 0);
1273
+ int const previousPrice = opt[cur].price;
1274
+ int const basePrice = previousPrice + LL_PRICE(0);
1275
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1090
1276
  U32 matchNb;
1091
1277
 
1092
1278
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
@@ -1097,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1097
1283
  continue;
1098
1284
  }
1099
1285
 
1100
- { U32 const maxML = matches[nbMatches-1].len;
1101
- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1102
- inr-istart, cur, nbMatches, maxML);
1103
-
1104
- if ( (maxML > sufficient_len)
1105
- || (cur + maxML >= ZSTD_OPT_NUM) ) {
1106
- lastSequence.mlen = maxML;
1107
- lastSequence.off = matches[nbMatches-1].off;
1108
- lastSequence.litlen = litlen;
1109
- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1110
- last_pos = cur + ZSTD_totalLen(lastSequence);
1111
- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1286
+ { U32 const longestML = matches[nbMatches-1].len;
1287
+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288
+ inr-istart, cur, nbMatches, longestML);
1289
+
1290
+ if ( (longestML > sufficient_len)
1291
+ || (cur + longestML >= ZSTD_OPT_NUM)
1292
+ || (ip + cur + longestML >= iend) ) {
1293
+ lastStretch.mlen = longestML;
1294
+ lastStretch.off = matches[nbMatches-1].off;
1295
+ lastStretch.litlen = 0;
1296
+ last_pos = cur + longestML;
1112
1297
  goto _shortestPath;
1113
1298
  } }
1114
1299
 
@@ -1119,20 +1304,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1119
1304
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1120
1305
  U32 mlen;
1121
1306
 
1122
- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1123
- matchNb, matches[matchNb].off, lastML, litlen);
1307
+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1308
+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1124
1309
 
1125
1310
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1126
1311
  U32 const pos = cur + mlen;
1127
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1312
+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1128
1313
 
1129
1314
  if ((pos > last_pos) || (price < opt[pos].price)) {
1130
1315
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1131
1316
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1132
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1317
+ while (last_pos < pos) {
1318
+ /* fill empty positions, for future comparisons */
1319
+ last_pos++;
1320
+ opt[last_pos].price = ZSTD_MAX_PRICE;
1321
+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1322
+ }
1133
1323
  opt[pos].mlen = mlen;
1134
1324
  opt[pos].off = offset;
1135
- opt[pos].litlen = litlen;
1325
+ opt[pos].litlen = 0;
1136
1326
  opt[pos].price = price;
1137
1327
  } else {
1138
1328
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1140,52 +1330,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1140
1330
  if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1141
1331
  }
1142
1332
  } } }
1333
+ opt[last_pos+1].price = ZSTD_MAX_PRICE;
1143
1334
  } /* for (cur = 1; cur <= last_pos; cur++) */
1144
1335
 
1145
- lastSequence = opt[last_pos];
1146
- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1147
- assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1336
+ lastStretch = opt[last_pos];
1337
+ assert(cur >= lastStretch.mlen);
1338
+ cur = last_pos - lastStretch.mlen;
1148
1339
 
1149
1340
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1150
1341
  assert(opt[0].mlen == 0);
1342
+ assert(last_pos >= lastStretch.mlen);
1343
+ assert(cur == last_pos - lastStretch.mlen);
1151
1344
 
1152
- /* Set the next chunk's repcodes based on the repcodes of the beginning
1153
- * of the last match, and the last sequence. This avoids us having to
1154
- * update them while traversing the sequences.
1155
- */
1156
- if (lastSequence.mlen != 0) {
1157
- repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1158
- ZSTD_memcpy(rep, &reps, sizeof(reps));
1345
+ if (lastStretch.mlen==0) {
1346
+ /* no solution : all matches have been converted into literals */
1347
+ assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348
+ ip += last_pos;
1349
+ continue;
1350
+ }
1351
+ assert(lastStretch.off > 0);
1352
+
1353
+ /* Update offset history */
1354
+ if (lastStretch.litlen == 0) {
1355
+ /* finishing on a match : update offset history */
1356
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357
+ ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
1159
1358
  } else {
1160
- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1359
+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360
+ assert(cur >= lastStretch.litlen);
1361
+ cur -= lastStretch.litlen;
1161
1362
  }
1162
1363
 
1163
- { U32 const storeEnd = cur + 1;
1364
+ /* Let's write the shortest path solution.
1365
+ * It is stored in @opt in reverse order,
1366
+ * starting from @storeEnd (==cur+2),
1367
+ * effectively partially @opt overwriting.
1368
+ * Content is changed too:
1369
+ * - So far, @opt stored stretches, aka a match followed by literals
1370
+ * - Now, it will store sequences, aka literals followed by a match
1371
+ */
1372
+ { U32 const storeEnd = cur + 2;
1164
1373
  U32 storeStart = storeEnd;
1165
- U32 seqPos = cur;
1374
+ U32 stretchPos = cur;
1166
1375
 
1167
1376
  DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1168
1377
  last_pos, cur); (void)last_pos;
1169
- assert(storeEnd < ZSTD_OPT_NUM);
1170
- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1171
- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1172
- opt[storeEnd] = lastSequence;
1173
- while (seqPos > 0) {
1174
- U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1378
+ assert(storeEnd < ZSTD_OPT_SIZE);
1379
+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380
+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381
+ if (lastStretch.litlen > 0) {
1382
+ /* last "sequence" is unfinished: just a bunch of literals */
1383
+ opt[storeEnd].litlen = lastStretch.litlen;
1384
+ opt[storeEnd].mlen = 0;
1385
+ storeStart = storeEnd-1;
1386
+ opt[storeStart] = lastStretch;
1387
+ } {
1388
+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1389
+ storeStart = storeEnd;
1390
+ }
1391
+ while (1) {
1392
+ ZSTD_optimal_t nextStretch = opt[stretchPos];
1393
+ opt[storeStart].litlen = nextStretch.litlen;
1394
+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395
+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396
+ if (nextStretch.mlen == 0) {
1397
+ /* reaching beginning of segment */
1398
+ break;
1399
+ }
1175
1400
  storeStart--;
1176
- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1177
- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1178
- opt[storeStart] = opt[seqPos];
1179
- seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1401
+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402
+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403
+ stretchPos -= nextStretch.litlen + nextStretch.mlen;
1180
1404
  }
1181
1405
 
1182
1406
  /* save sequences */
1183
- DEBUGLOG(6, "sending selected sequences into seqStore")
1407
+ DEBUGLOG(6, "sending selected sequences into seqStore");
1184
1408
  { U32 storePos;
1185
1409
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1186
1410
  U32 const llen = opt[storePos].litlen;
1187
1411
  U32 const mlen = opt[storePos].mlen;
1188
- U32 const offCode = opt[storePos].off;
1412
+ U32 const offBase = opt[storePos].off;
1189
1413
  U32 const advance = llen + mlen;
1190
1414
  DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1191
1415
  anchor - istart, (unsigned)llen, (unsigned)mlen);
@@ -1197,11 +1421,14 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1197
1421
  }
1198
1422
 
1199
1423
  assert(anchor + llen <= iend);
1200
- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1201
- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1424
+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1425
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1202
1426
  anchor += advance;
1203
1427
  ip = anchor;
1204
1428
  } }
1429
+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430
+
1431
+ /* update all costs */
1205
1432
  ZSTD_setBasePrices(optStatePtr, optLevel);
1206
1433
  }
1207
1434
  } /* while (ip < ilimit) */
@@ -1209,50 +1436,51 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1209
1436
  /* Return the last literals size */
1210
1437
  return (size_t)(iend - anchor);
1211
1438
  }
1439
+ #endif /* build exclusions */
1212
1440
 
1441
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1442
+ static size_t ZSTD_compressBlock_opt0(
1443
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1445
+ {
1446
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1447
+ }
1448
+ #endif
1449
+
1450
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1451
+ static size_t ZSTD_compressBlock_opt2(
1452
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1453
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1454
+ {
1455
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1456
+ }
1457
+ #endif
1213
1458
 
1459
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1214
1460
  size_t ZSTD_compressBlock_btopt(
1215
1461
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1216
1462
  const void* src, size_t srcSize)
1217
1463
  {
1218
1464
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1219
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1465
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1220
1466
  }
1467
+ #endif
1221
1468
 
1222
1469
 
1223
- /* used in 2-pass strategy */
1224
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1225
- {
1226
- U32 s, sum=0;
1227
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1228
- for (s=0; s<lastEltIndex+1; s++) {
1229
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1230
- table[s]--;
1231
- sum += table[s];
1232
- }
1233
- return sum;
1234
- }
1235
1470
 
1236
- /* used in 2-pass strategy */
1237
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1238
- {
1239
- if (ZSTD_compressedLiterals(optPtr))
1240
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1241
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1242
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1243
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1244
- }
1245
1471
 
1472
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1246
1473
  /* ZSTD_initStats_ultra():
1247
1474
  * make a first compression pass, just to seed stats with more accurate starting values.
1248
1475
  * only works on first block, with no dictionary and no ldm.
1249
- * this function cannot error, hence its contract must be respected.
1476
+ * this function cannot error out, its narrow contract must be respected.
1250
1477
  */
1251
- static void
1252
- ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1253
- seqStore_t* seqStore,
1254
- U32 rep[ZSTD_REP_NUM],
1255
- const void* src, size_t srcSize)
1478
+ static
1479
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1480
+ void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481
+ seqStore_t* seqStore,
1482
+ U32 rep[ZSTD_REP_NUM],
1483
+ const void* src, size_t srcSize)
1256
1484
  {
1257
1485
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1258
1486
  ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@@ -1263,17 +1491,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1263
1491
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1264
1492
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1265
1493
 
1266
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1494
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1267
1495
 
1268
- /* invalidate first scan from history */
1496
+ /* invalidate first scan from history, only keep entropy stats */
1269
1497
  ZSTD_resetSeqStore(seqStore);
1270
1498
  ms->window.base -= srcSize;
1271
1499
  ms->window.dictLimit += (U32)srcSize;
1272
1500
  ms->window.lowLimit = ms->window.dictLimit;
1273
1501
  ms->nextToUpdate = ms->window.dictLimit;
1274
1502
 
1275
- /* re-inforce weight of collected statistics */
1276
- ZSTD_upscaleStats(&ms->opt);
1277
1503
  }
1278
1504
 
1279
1505
  size_t ZSTD_compressBlock_btultra(
@@ -1281,7 +1507,7 @@ size_t ZSTD_compressBlock_btultra(
1281
1507
  const void* src, size_t srcSize)
1282
1508
  {
1283
1509
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1284
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1510
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1285
1511
  }
1286
1512
 
1287
1513
  size_t ZSTD_compressBlock_btultra2(
@@ -1291,10 +1517,10 @@ size_t ZSTD_compressBlock_btultra2(
1291
1517
  U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1292
1518
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1293
1519
 
1294
- /* 2-pass strategy:
1520
+ /* 2-passes strategy:
1295
1521
  * this strategy makes a first pass over first block to collect statistics
1296
- * and seed next round's statistics with it.
1297
- * After 1st pass, function forgets everything, and starts a new block.
1522
+ * in order to seed next round's statistics with it.
1523
+ * After 1st pass, function forgets history, and starts a new block.
1298
1524
  * Consequently, this can only work if no data has been previously loaded in tables,
1299
1525
  * aka, no dictionary, no prefix, no ldm preprocessing.
1300
1526
  * The compression ratio gain is generally small (~0.5% on first block),
@@ -1303,42 +1529,47 @@ size_t ZSTD_compressBlock_btultra2(
1303
1529
  if ( (ms->opt.litLengthSum==0) /* first block */
1304
1530
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1305
1531
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1306
- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1307
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
1532
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1533
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1308
1534
  ) {
1309
1535
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1310
1536
  }
1311
1537
 
1312
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1538
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1313
1539
  }
1540
+ #endif
1314
1541
 
1542
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1315
1543
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1316
1544
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1317
1545
  const void* src, size_t srcSize)
1318
1546
  {
1319
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1547
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1320
1548
  }
1321
1549
 
1322
- size_t ZSTD_compressBlock_btultra_dictMatchState(
1550
+ size_t ZSTD_compressBlock_btopt_extDict(
1323
1551
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1324
1552
  const void* src, size_t srcSize)
1325
1553
  {
1326
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1554
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1327
1555
  }
1556
+ #endif
1328
1557
 
1329
- size_t ZSTD_compressBlock_btopt_extDict(
1558
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1559
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
1330
1560
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1331
1561
  const void* src, size_t srcSize)
1332
1562
  {
1333
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1563
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1334
1564
  }
1335
1565
 
1336
1566
  size_t ZSTD_compressBlock_btultra_extDict(
1337
1567
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1338
1568
  const void* src, size_t srcSize)
1339
1569
  {
1340
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1570
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1341
1571
  }
1572
+ #endif
1342
1573
 
1343
1574
  /* note : no btultra2 variant for extDict nor dictMatchState,
1344
1575
  * because btultra2 is not meant to work with dictionaries