extzstd 0.3.2 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -3
  3. data/contrib/zstd/CHANGELOG +225 -1
  4. data/contrib/zstd/CONTRIBUTING.md +158 -75
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +106 -69
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +64 -36
  9. data/contrib/zstd/SECURITY.md +15 -0
  10. data/contrib/zstd/TESTING.md +2 -3
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +117 -199
  13. data/contrib/zstd/lib/README.md +37 -7
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +80 -86
  17. data/contrib/zstd/lib/common/compiler.h +225 -63
  18. data/contrib/zstd/lib/common/cpu.h +37 -1
  19. data/contrib/zstd/lib/common/debug.c +7 -1
  20. data/contrib/zstd/lib/common/debug.h +21 -12
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +93 -5
  24. data/contrib/zstd/lib/common/fse.h +12 -87
  25. data/contrib/zstd/lib/common/fse_decompress.c +37 -117
  26. data/contrib/zstd/lib/common/huf.h +97 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -58
  28. data/contrib/zstd/lib/common/pool.c +38 -17
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +158 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +6 -814
  34. data/contrib/zstd/lib/common/xxhash.h +6930 -195
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +68 -154
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +75 -155
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +810 -259
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
  63. data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +237 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +1030 -332
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +26 -7
  105. data/ext/extzstd.c +51 -24
  106. data/ext/extzstd.h +33 -6
  107. data/ext/extzstd_stream.c +74 -31
  108. data/ext/libzstd_conf.h +0 -1
  109. data/ext/zstd_decompress_asm.S +1 -0
  110. metadata +17 -7
  111. data/contrib/zstd/appveyor.yml +0 -292
  112. data/ext/depend +0 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -12,42 +12,52 @@
12
12
  #include "hist.h"
13
13
  #include "zstd_opt.h"
14
14
 
15
+ #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
15
18
 
16
19
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
20
  #define ZSTD_MAX_PRICE (1<<30)
19
21
 
20
- #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
22
+ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
21
23
 
22
24
 
23
25
  /*-*************************************
24
26
  * Price functions for optimal parser
25
27
  ***************************************/
26
28
 
27
- #if 0 /* approximation at bit level */
29
+ #if 0 /* approximation at bit level (for tests) */
28
30
  # define BITCOST_ACCURACY 0
29
31
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
32
+ # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
33
+ #elif 0 /* fractional bit accuracy (for tests) */
32
34
  # define BITCOST_ACCURACY 8
33
35
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
- # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
36
+ # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
35
37
  #else /* opt==approx, ultra==accurate */
36
38
  # define BITCOST_ACCURACY 8
37
39
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
38
- # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
40
+ # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
39
41
  #endif
40
42
 
43
+ /* ZSTD_bitWeight() :
44
+ * provide estimated "cost" of a stat in full bits only */
41
45
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
42
46
  {
43
47
  return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
44
48
  }
45
49
 
50
+ /* ZSTD_fracWeight() :
51
+ * provide fractional-bit "cost" of a stat,
52
+ * using linear interpolation approximation */
46
53
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
47
54
  {
48
55
  U32 const stat = rawStat + 1;
49
56
  U32 const hb = ZSTD_highbit32(stat);
50
57
  U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
+ /* Fweight was meant for "Fractional weight"
59
+ * but it's effectively a value between 1 and 2
60
+ * using fixed point arithmetic */
51
61
  U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
52
62
  U32 const weight = BWeight + FWeight;
53
63
  assert(hb + BITCOST_ACCURACY < 31);
@@ -58,7 +68,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
58
68
  /* debugging function,
59
69
  * @return price in bytes as fractional value
60
70
  * for debug messages only */
61
- MEM_STATIC double ZSTD_fCost(U32 price)
71
+ MEM_STATIC double ZSTD_fCost(int price)
62
72
  {
63
73
  return (double)price / (BITCOST_MULTIPLIER*8);
64
74
  }
@@ -66,7 +76,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
76
 
67
77
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
78
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
79
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
80
  }
71
81
 
72
82
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +89,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
89
  }
80
90
 
81
91
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
92
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
93
+ {
94
+ size_t n;
95
+ U32 total = 0;
96
+ for (n=0; n<nbElts; n++) {
97
+ total += table[n];
98
+ }
99
+ return total;
100
+ }
101
+
102
+ typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
+
104
+ static U32
105
+ ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
86
106
  {
87
107
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
108
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
+ (unsigned)lastEltIndex+1, (unsigned)shift );
110
+ assert(shift < 30);
90
111
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
92
- sum += table[s];
112
+ unsigned const base = base1 ? 1 : (table[s]>0);
113
+ unsigned const newStat = base + (table[s] >> shift);
114
+ sum += newStat;
115
+ table[s] = newStat;
93
116
  }
94
117
  return sum;
95
118
  }
96
119
 
120
+ /* ZSTD_scaleStats() :
121
+ * reduce all elt frequencies in table if sum too large
122
+ * return the resulting sum of elements */
123
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
124
+ {
125
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
126
+ U32 const factor = prevsum >> logTarget;
127
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
128
+ assert(logTarget < 30);
129
+ if (factor <= 1) return prevsum;
130
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
131
+ }
132
+
97
133
  /* ZSTD_rescaleFreqs() :
98
134
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
135
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
136
+ * and init from zero if there is none,
137
+ * using src for literals stats, and baseline stats for sequence symbols
101
138
  * otherwise downscale existing stats, to be used as seed for next block.
102
139
  */
103
140
  static void
@@ -109,24 +146,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
109
146
  DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
110
147
  optPtr->priceType = zop_dynamic;
111
148
 
112
- if (optPtr->litLengthSum == 0) { /* first block : init */
113
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
114
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
149
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
150
+
151
+ /* heuristic: use pre-defined stats for too small inputs */
152
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
115
154
  optPtr->priceType = zop_predef;
116
155
  }
117
156
 
118
157
  assert(optPtr->symbolCosts != NULL);
119
158
  if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
120
- /* huffman table presumed generated by dictionary */
159
+
160
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
121
161
  optPtr->priceType = zop_dynamic;
122
162
 
123
163
  if (compressedLiterals) {
164
+ /* generate literals statistics from huffman table */
124
165
  unsigned lit;
125
166
  assert(optPtr->litFreq != NULL);
126
167
  optPtr->litSum = 0;
127
168
  for (lit=0; lit<=MaxLit; lit++) {
128
169
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
170
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
171
  assert(bitCost <= scaleLog);
131
172
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
173
  optPtr->litSum += optPtr->litFreq[lit];
@@ -168,20 +209,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
168
209
  optPtr->offCodeSum += optPtr->offCodeFreq[of];
169
210
  } }
170
211
 
171
- } else { /* not a dictionary */
212
+ } else { /* first block, no dictionary */
172
213
 
173
214
  assert(optPtr->litFreq != NULL);
174
215
  if (compressedLiterals) {
216
+ /* base initial cost of literals on direct frequency within src */
175
217
  unsigned lit = MaxLit;
176
218
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
219
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
178
220
  }
179
221
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
222
+ { unsigned const baseLLfreqs[MaxLL+1] = {
223
+ 4, 2, 1, 1, 1, 1, 1, 1,
224
+ 1, 1, 1, 1, 1, 1, 1, 1,
225
+ 1, 1, 1, 1, 1, 1, 1, 1,
226
+ 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1, 1, 1, 1
228
+ };
229
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
230
+ optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
231
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
232
 
186
233
  { unsigned ml;
187
234
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +236,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
236
  }
190
237
  optPtr->matchLengthSum = MaxML+1;
191
238
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
239
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
240
+ 6, 2, 1, 1, 2, 3, 4, 4,
241
+ 4, 3, 2, 1, 1, 1, 1, 1,
242
+ 1, 1, 1, 1, 1, 1, 1, 1,
243
+ 1, 1, 1, 1, 1, 1, 1, 1
244
+ };
245
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
246
+ optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
247
  }
196
- optPtr->offCodeSum = MaxOff+1;
197
248
 
198
249
  }
199
250
 
200
- } else { /* new block : re-use previous statistics, scaled down */
251
+ } else { /* new block : scale down accumulated statistics */
201
252
 
202
253
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
254
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
255
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
256
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
257
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
258
  }
208
259
 
209
260
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -216,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
216
267
  const optState_t* const optPtr,
217
268
  int optLevel)
218
269
  {
270
+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
219
271
  if (litLength == 0) return 0;
220
272
 
221
273
  if (!ZSTD_compressedLiterals(optPtr))
@@ -225,11 +277,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
225
277
  return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
226
278
 
227
279
  /* dynamic statistics */
228
- { U32 price = litLength * optPtr->litSumBasePrice;
280
+ { U32 price = optPtr->litSumBasePrice * litLength;
281
+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
229
282
  U32 u;
283
+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
230
284
  for (u=0; u < litLength; u++) {
231
- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
232
- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
285
+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
+ price -= litPrice;
233
288
  }
234
289
  return price;
235
290
  }
@@ -239,7 +294,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
239
294
  * cost of literalLength symbol */
240
295
  static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
241
296
  {
242
- if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
297
+ assert(litLength <= ZSTD_BLOCKSIZE_MAX);
298
+ if (optPtr->priceType == zop_predef)
299
+ return WEIGHT(litLength, optLevel);
300
+
301
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
+ * because it isn't representable in the zstd format.
303
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
+ * In such a case, the block would be all literals.
305
+ */
306
+ if (litLength == ZSTD_BLOCKSIZE_MAX)
307
+ return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
243
308
 
244
309
  /* dynamic statistics */
245
310
  { U32 const llCode = ZSTD_LLcode(litLength);
@@ -250,22 +315,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
250
315
  }
251
316
 
252
317
  /* ZSTD_getMatchPrice() :
253
- * Provides the cost of the match part (offset + matchLength) of a sequence
318
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
254
319
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
255
- * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
320
+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
321
+ * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
322
+ */
256
323
  FORCE_INLINE_TEMPLATE U32
257
- ZSTD_getMatchPrice(U32 const offset,
324
+ ZSTD_getMatchPrice(U32 const offBase,
258
325
  U32 const matchLength,
259
326
  const optState_t* const optPtr,
260
327
  int const optLevel)
261
328
  {
262
329
  U32 price;
263
- U32 const offCode = ZSTD_highbit32(offset+1);
330
+ U32 const offCode = ZSTD_highbit32(offBase);
264
331
  U32 const mlBase = matchLength - MINMATCH;
265
332
  assert(matchLength >= MINMATCH);
266
333
 
267
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
268
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
334
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
335
+ return WEIGHT(mlBase, optLevel)
336
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
269
337
 
270
338
  /* dynamic statistics */
271
339
  price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -284,10 +352,10 @@ ZSTD_getMatchPrice(U32 const offset,
284
352
  }
285
353
 
286
354
  /* ZSTD_updateStats() :
287
- * assumption : literals + litLengtn <= iend */
355
+ * assumption : literals + litLength <= iend */
288
356
  static void ZSTD_updateStats(optState_t* const optPtr,
289
357
  U32 litLength, const BYTE* literals,
290
- U32 offsetCode, U32 matchLength)
358
+ U32 offBase, U32 matchLength)
291
359
  {
292
360
  /* literals */
293
361
  if (ZSTD_compressedLiterals(optPtr)) {
@@ -303,8 +371,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
303
371
  optPtr->litLengthSum++;
304
372
  }
305
373
 
306
- /* match offset code (0-2=>repCode; 3+=>offset+2) */
307
- { U32 const offCode = ZSTD_highbit32(offsetCode+1);
374
+ /* offset code : follows storeSeq() numeric representation */
375
+ { U32 const offCode = ZSTD_highbit32(offBase);
308
376
  assert(offCode <= MaxOff);
309
377
  optPtr->offCodeFreq[offCode]++;
310
378
  optPtr->offCodeSum++;
@@ -338,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
338
406
 
339
407
  /* Update hashTable3 up to ip (excluded)
340
408
  Assumption : always within prefix (i.e. not within extDict) */
341
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
342
- U32* nextToUpdate3,
343
- const BYTE* const ip)
409
+ static
410
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
+ U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412
+ U32* nextToUpdate3,
413
+ const BYTE* const ip)
344
414
  {
345
415
  U32* const hashTable3 = ms->hashTable3;
346
416
  U32 const hashLog3 = ms->hashLog3;
@@ -364,11 +434,15 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
364
434
  * Binary Tree search
365
435
  ***************************************/
366
436
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
367
- * ip : assumed <= iend-8 .
437
+ * @param ip assumed <= iend-8 .
438
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
368
439
  * @return : nb of positions added */
369
- static U32 ZSTD_insertBt1(
370
- ZSTD_matchState_t* ms,
440
+ static
441
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
+ U32 ZSTD_insertBt1(
443
+ const ZSTD_matchState_t* ms,
371
444
  const BYTE* const ip, const BYTE* const iend,
445
+ U32 const target,
372
446
  U32 const mls, const int extDict)
373
447
  {
374
448
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -391,7 +465,10 @@ static U32 ZSTD_insertBt1(
391
465
  U32* smallerPtr = bt + 2*(curr&btMask);
392
466
  U32* largerPtr = smallerPtr + 1;
393
467
  U32 dummy32; /* to be nullified at the end */
394
- U32 const windowLow = ms->window.lowLimit;
468
+ /* windowLow is based on target because
469
+ * we only need positions that will be in the window at the end of the tree update.
470
+ */
471
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
395
472
  U32 matchEndIdx = curr+8+1;
396
473
  size_t bestLength = 8;
397
474
  U32 nbCompares = 1U << cParams->searchLog;
@@ -404,11 +481,12 @@ static U32 ZSTD_insertBt1(
404
481
 
405
482
  DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
406
483
 
484
+ assert(curr <= target);
407
485
  assert(ip <= iend-8); /* required for h calculation */
408
486
  hashTable[h] = curr; /* Update Hash Table */
409
487
 
410
488
  assert(windowLow > 0);
411
- while (nbCompares-- && (matchIndex >= windowLow)) {
489
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
412
490
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
413
491
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
414
492
  assert(matchIndex < curr);
@@ -480,6 +558,7 @@ static U32 ZSTD_insertBt1(
480
558
  }
481
559
 
482
560
  FORCE_INLINE_TEMPLATE
561
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
483
562
  void ZSTD_updateTree_internal(
484
563
  ZSTD_matchState_t* ms,
485
564
  const BYTE* const ip, const BYTE* const iend,
@@ -488,11 +567,11 @@ void ZSTD_updateTree_internal(
488
567
  const BYTE* const base = ms->window.base;
489
568
  U32 const target = (U32)(ip - base);
490
569
  U32 idx = ms->nextToUpdate;
491
- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
570
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
492
571
  idx, target, dictMode);
493
572
 
494
573
  while(idx < target) {
495
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
574
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
496
575
  assert(idx < (U32)(idx + forward));
497
576
  idx += forward;
498
577
  }
@@ -506,15 +585,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
506
585
  }
507
586
 
508
587
  FORCE_INLINE_TEMPLATE
509
- U32 ZSTD_insertBtAndGetAllMatches (
510
- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
511
- ZSTD_matchState_t* ms,
512
- U32* nextToUpdate3,
513
- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
514
- const U32 rep[ZSTD_REP_NUM],
515
- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
516
- const U32 lengthToBeat,
517
- U32 const mls /* template */)
588
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
+ U32
590
+ ZSTD_insertBtAndGetAllMatches (
591
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
592
+ ZSTD_matchState_t* ms,
593
+ U32* nextToUpdate3,
594
+ const BYTE* const ip, const BYTE* const iLimit,
595
+ const ZSTD_dictMode_e dictMode,
596
+ const U32 rep[ZSTD_REP_NUM],
597
+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
+ const U32 lengthToBeat,
599
+ const U32 mls /* template */)
518
600
  {
519
601
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
520
602
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@@ -597,7 +679,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
597
679
  DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
598
680
  repCode, ll0, repOffset, repLen);
599
681
  bestLength = repLen;
600
- matches[mnum].off = repCode - ll0;
682
+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
601
683
  matches[mnum].len = (U32)repLen;
602
684
  mnum++;
603
685
  if ( (repLen > sufficient_len)
@@ -626,7 +708,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
626
708
  bestLength = mlen;
627
709
  assert(curr > matchIndex3);
628
710
  assert(mnum==0); /* no prior solution */
629
- matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
711
+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
630
712
  matches[0].len = (U32)mlen;
631
713
  mnum = 1;
632
714
  if ( (mlen > sufficient_len) |
@@ -635,11 +717,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
635
717
  return 1;
636
718
  } } }
637
719
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638
- }
720
+ } /* if (mls == 3) */
639
721
 
640
722
  hashTable[h] = curr; /* Update Hash Table */
641
723
 
642
- while (nbCompares-- && (matchIndex >= matchLow)) {
724
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
643
725
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
644
726
  const BYTE* match;
645
727
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
@@ -659,21 +741,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
659
741
  }
660
742
 
661
743
  if (matchLength > bestLength) {
662
- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
663
- (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
744
+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
664
746
  assert(matchEndIdx > matchIndex);
665
747
  if (matchLength > matchEndIdx - matchIndex)
666
748
  matchEndIdx = matchIndex + (U32)matchLength;
667
749
  bestLength = matchLength;
668
- matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
750
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
669
751
  matches[mnum].len = (U32)matchLength;
670
752
  mnum++;
671
753
  if ( (matchLength > ZSTD_OPT_NUM)
672
754
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673
755
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
674
756
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675
- }
676
- }
757
+ } }
677
758
 
678
759
  if (match[matchLength] < ip[matchLength]) {
679
760
  /* match smaller than current */
@@ -692,12 +773,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
692
773
 
693
774
  *smallerPtr = *largerPtr = 0;
694
775
 
776
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
695
777
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
696
778
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697
779
  U32 dictMatchIndex = dms->hashTable[dmsH];
698
780
  const U32* const dmsBt = dms->chainTable;
699
781
  commonLengthSmaller = commonLengthLarger = 0;
700
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
782
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
701
783
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702
784
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703
785
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -707,19 +789,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
707
789
 
708
790
  if (matchLength > bestLength) {
709
791
  matchIndex = dictMatchIndex + dmsIndexDelta;
710
- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
711
- (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
792
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
712
794
  if (matchLength > matchEndIdx - matchIndex)
713
795
  matchEndIdx = matchIndex + (U32)matchLength;
714
796
  bestLength = matchLength;
715
- matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
797
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
716
798
  matches[mnum].len = (U32)matchLength;
717
799
  mnum++;
718
800
  if ( (matchLength > ZSTD_OPT_NUM)
719
801
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
802
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
721
- }
722
- }
803
+ } }
723
804
 
724
805
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725
806
  if (match[matchLength] < ip[matchLength]) {
@@ -729,39 +810,93 @@ U32 ZSTD_insertBtAndGetAllMatches (
729
810
  /* match is larger than current */
730
811
  commonLengthLarger = matchLength;
731
812
  dictMatchIndex = nextPtr[0];
732
- }
733
- }
734
- }
813
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
735
814
 
736
815
  assert(matchEndIdx > curr+8);
737
816
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738
817
  return mnum;
739
818
  }
740
819
 
820
+ typedef U32 (*ZSTD_getAllMatchesFn)(
821
+ ZSTD_match_t*,
822
+ ZSTD_matchState_t*,
823
+ U32*,
824
+ const BYTE*,
825
+ const BYTE*,
826
+ const U32 rep[ZSTD_REP_NUM],
827
+ U32 const ll0,
828
+ U32 const lengthToBeat);
741
829
 
742
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
743
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
744
- ZSTD_matchState_t* ms,
745
- U32* nextToUpdate3,
746
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
747
- const U32 rep[ZSTD_REP_NUM],
748
- U32 const ll0,
749
- U32 const lengthToBeat)
830
+ FORCE_INLINE_TEMPLATE
831
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
+ U32 ZSTD_btGetAllMatches_internal(
833
+ ZSTD_match_t* matches,
834
+ ZSTD_matchState_t* ms,
835
+ U32* nextToUpdate3,
836
+ const BYTE* ip,
837
+ const BYTE* const iHighLimit,
838
+ const U32 rep[ZSTD_REP_NUM],
839
+ U32 const ll0,
840
+ U32 const lengthToBeat,
841
+ const ZSTD_dictMode_e dictMode,
842
+ const U32 mls)
750
843
  {
751
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
752
- U32 const matchLengthSearch = cParams->minMatch;
753
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
755
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
756
- switch(matchLengthSearch)
757
- {
758
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
759
- default :
760
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
761
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
762
- case 7 :
763
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
844
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
845
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
846
+ if (ip < ms->window.base + ms->nextToUpdate)
847
+ return 0; /* skipped area */
848
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
849
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
850
+ }
851
+
852
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
853
+
854
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
855
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
856
+ ZSTD_match_t* matches, \
857
+ ZSTD_matchState_t* ms, \
858
+ U32* nextToUpdate3, \
859
+ const BYTE* ip, \
860
+ const BYTE* const iHighLimit, \
861
+ const U32 rep[ZSTD_REP_NUM], \
862
+ U32 const ll0, \
863
+ U32 const lengthToBeat) \
864
+ { \
865
+ return ZSTD_btGetAllMatches_internal( \
866
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
867
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
764
868
  }
869
+
870
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
871
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
872
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
873
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
874
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
875
+
876
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
877
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
878
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
879
+
880
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
881
+ { \
882
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
883
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
884
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
885
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
886
+ }
887
+
888
+ static ZSTD_getAllMatchesFn
889
+ ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
890
+ {
891
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
892
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
893
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
894
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
895
+ };
896
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
897
+ assert((U32)dictMode < 3);
898
+ assert(mls - 3 < 4);
899
+ return getAllMatchesFns[(int)dictMode][mls - 3];
765
900
  }
766
901
 
767
902
  /*************************
@@ -770,16 +905,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
770
905
 
771
906
  /* Struct containing info needed to make decision about ldm inclusion */
772
907
  typedef struct {
773
- rawSeqStore_t seqStore; /* External match candidates store for this block */
774
- U32 startPosInBlock; /* Start position of the current match candidate */
775
- U32 endPosInBlock; /* End position of the current match candidate */
776
- U32 offset; /* Offset of the match candidate */
908
+ rawSeqStore_t seqStore; /* External match candidates store for this block */
909
+ U32 startPosInBlock; /* Start position of the current match candidate */
910
+ U32 endPosInBlock; /* End position of the current match candidate */
911
+ U32 offset; /* Offset of the match candidate */
777
912
  } ZSTD_optLdm_t;
778
913
 
779
914
  /* ZSTD_optLdm_skipRawSeqStoreBytes():
780
- * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
915
+ * Moves forward in @rawSeqStore by @nbBytes,
916
+ * which will update the fields 'pos' and 'posInSequence'.
781
917
  */
782
- static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
918
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
919
+ {
783
920
  U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
784
921
  while (currPos && rawSeqStore->pos < rawSeqStore->size) {
785
922
  rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
@@ -800,8 +937,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
800
937
  * Calculates the beginning and end of the next match in the current block.
801
938
  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
802
939
  */
803
- static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
804
- U32 blockBytesRemaining) {
940
+ static void
941
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
942
+ U32 blockBytesRemaining)
943
+ {
805
944
  rawSeq currSeq;
806
945
  U32 currBlockEndPos;
807
946
  U32 literalsBytesRemaining;
@@ -813,8 +952,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
813
952
  optLdm->endPosInBlock = UINT_MAX;
814
953
  return;
815
954
  }
816
- /* Calculate appropriate bytes left in matchLength and litLength after adjusting
817
- based on ldmSeqStore->posInSequence */
955
+ /* Calculate appropriate bytes left in matchLength and litLength
956
+ * after adjusting based on ldmSeqStore->posInSequence */
818
957
  currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
819
958
  assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
820
959
  currBlockEndPos = currPosInBlock + blockBytesRemaining;
@@ -850,15 +989,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
850
989
  }
851
990
 
852
991
  /* ZSTD_optLdm_maybeAddMatch():
853
- * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
854
- * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
992
+ * Adds a match if it's long enough,
993
+ * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
994
+ * into 'matches'. Maintains the correct ordering of 'matches'.
855
995
  */
856
996
  static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
857
- ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
858
- U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
859
- /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
860
- U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
861
- U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
997
+ const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
998
+ {
999
+ U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
1000
+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
1001
+ U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
862
1002
 
863
1003
  /* Ensure that current block position is not outside of the match */
864
1004
  if (currPosInBlock < optLdm->startPosInBlock
@@ -868,10 +1008,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
868
1008
  }
869
1009
 
870
1010
  if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
871
- DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
872
- candidateOffCode, candidateMatchLength, currPosInBlock);
1011
+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1012
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1013
+ candidateOffBase, candidateMatchLength, currPosInBlock);
873
1014
  matches[*nbMatches].len = candidateMatchLength;
874
- matches[*nbMatches].off = candidateOffCode;
1015
+ matches[*nbMatches].off = candidateOffBase;
875
1016
  (*nbMatches)++;
876
1017
  }
877
1018
  }
@@ -879,8 +1020,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
879
1020
  /* ZSTD_optLdm_processMatchCandidate():
880
1021
  * Wrapper function to update ldm seq store and call ldm functions as necessary.
881
1022
  */
882
- static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
883
- U32 currPosInBlock, U32 remainingBytes) {
1023
+ static void
1024
+ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1025
+ ZSTD_match_t* matches, U32* nbMatches,
1026
+ U32 currPosInBlock, U32 remainingBytes)
1027
+ {
884
1028
  if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
885
1029
  return;
886
1030
  }
@@ -891,24 +1035,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
891
1035
  * at the end of a match from the ldm seq store, and will often be some bytes
892
1036
  * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
893
1037
  */
894
- U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1038
+ U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
895
1039
  ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
896
- }
1040
+ }
897
1041
  ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
898
1042
  }
899
1043
  ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
900
1044
  }
901
1045
 
1046
+
902
1047
  /*-*******************************
903
1048
  * Optimal parser
904
1049
  *********************************/
905
1050
 
906
-
907
- static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
908
- {
909
- return sol.litlen + sol.mlen;
910
- }
911
-
912
1051
  #if 0 /* debug */
913
1052
 
914
1053
  static void
@@ -926,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
926
1065
 
927
1066
  #endif
928
1067
 
929
- FORCE_INLINE_TEMPLATE size_t
1068
+ #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069
+ #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070
+ #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071
+
1072
+ FORCE_INLINE_TEMPLATE
1073
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074
+ size_t
930
1075
  ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
931
1076
  seqStore_t* seqStore,
932
1077
  U32 rep[ZSTD_REP_NUM],
@@ -944,15 +1089,19 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
944
1089
  const BYTE* const prefixStart = base + ms->window.dictLimit;
945
1090
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
946
1091
 
1092
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1093
+
947
1094
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
948
1095
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
949
1096
  U32 nextToUpdate3 = ms->nextToUpdate;
950
1097
 
951
1098
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
952
1099
  ZSTD_match_t* const matches = optStatePtr->matchTable;
953
- ZSTD_optimal_t lastSequence;
1100
+ ZSTD_optimal_t lastStretch;
954
1101
  ZSTD_optLdm_t optLdm;
955
1102
 
1103
+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1104
+
956
1105
  optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
957
1106
  optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
958
1107
  ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@@ -971,104 +1120,141 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
971
1120
  /* find first match */
972
1121
  { U32 const litlen = (U32)(ip - anchor);
973
1122
  U32 const ll0 = !litlen;
974
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1123
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
975
1124
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
976
- (U32)(ip-istart), (U32)(iend - ip));
977
- if (!nbMatches) { ip++; continue; }
1125
+ (U32)(ip-istart), (U32)(iend-ip));
1126
+ if (!nbMatches) {
1127
+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128
+ ip++;
1129
+ continue;
1130
+ }
1131
+
1132
+ /* Match found: let's store this solution, and eventually find more candidates.
1133
+ * During this forward pass, @opt is used to store stretches,
1134
+ * defined as "a match followed by N literals".
1135
+ * Note how this is different from a Sequence, which is "N literals followed by a match".
1136
+ * Storing stretches allows us to store different match predecessors
1137
+ * for each literal position part of a literals run. */
978
1138
 
979
1139
  /* initialize opt[0] */
980
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
981
- opt[0].mlen = 0; /* means is_a_literal */
1140
+ opt[0].mlen = 0; /* there are only literals so far */
982
1141
  opt[0].litlen = litlen;
983
- /* We don't need to include the actual price of the literals because
984
- * it is static for the duration of the forward pass, and is included
985
- * in every price. We include the literal length to avoid negative
986
- * prices when we subtract the previous literal length.
1142
+ /* No need to include the actual price of the literals before the first match
1143
+ * because it is static for the duration of the forward pass, and is included
1144
+ * in every subsequent price. But, we include the literal length because
1145
+ * the cost variation of litlen depends on the value of litlen.
987
1146
  */
988
- opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1147
+ opt[0].price = LL_PRICE(litlen);
1148
+ ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149
+ ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
989
1150
 
990
1151
  /* large match -> immediate encoding */
991
1152
  { U32 const maxML = matches[nbMatches-1].len;
992
- U32 const maxOffset = matches[nbMatches-1].off;
993
- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
994
- nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
1153
+ U32 const maxOffBase = matches[nbMatches-1].off;
1154
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1155
+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
995
1156
 
996
1157
  if (maxML > sufficient_len) {
997
- lastSequence.litlen = litlen;
998
- lastSequence.mlen = maxML;
999
- lastSequence.off = maxOffset;
1000
- DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1158
+ lastStretch.litlen = 0;
1159
+ lastStretch.mlen = maxML;
1160
+ lastStretch.off = maxOffBase;
1161
+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1001
1162
  maxML, sufficient_len);
1002
1163
  cur = 0;
1003
- last_pos = ZSTD_totalLen(lastSequence);
1164
+ last_pos = maxML;
1004
1165
  goto _shortestPath;
1005
1166
  } }
1006
1167
 
1007
1168
  /* set prices for first matches starting position == 0 */
1008
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1009
- U32 pos;
1169
+ assert(opt[0].price >= 0);
1170
+ { U32 pos;
1010
1171
  U32 matchNb;
1011
1172
  for (pos = 1; pos < minMatch; pos++) {
1012
- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1173
+ opt[pos].price = ZSTD_MAX_PRICE;
1174
+ opt[pos].mlen = 0;
1175
+ opt[pos].litlen = litlen + pos;
1013
1176
  }
1014
1177
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1015
- U32 const offset = matches[matchNb].off;
1178
+ U32 const offBase = matches[matchNb].off;
1016
1179
  U32 const end = matches[matchNb].len;
1017
1180
  for ( ; pos <= end ; pos++ ) {
1018
- U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
1019
- U32 const sequencePrice = literalsPrice + matchPrice;
1181
+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182
+ int const sequencePrice = opt[0].price + matchPrice;
1020
1183
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1021
1184
  pos, ZSTD_fCost(sequencePrice));
1022
1185
  opt[pos].mlen = pos;
1023
- opt[pos].off = offset;
1024
- opt[pos].litlen = litlen;
1025
- opt[pos].price = sequencePrice;
1026
- } }
1186
+ opt[pos].off = offBase;
1187
+ opt[pos].litlen = 0; /* end of match */
1188
+ opt[pos].price = sequencePrice + LL_PRICE(0);
1189
+ }
1190
+ }
1027
1191
  last_pos = pos-1;
1192
+ opt[pos].price = ZSTD_MAX_PRICE;
1028
1193
  }
1029
1194
  }
1030
1195
 
1031
1196
  /* check further positions */
1032
1197
  for (cur = 1; cur <= last_pos; cur++) {
1033
1198
  const BYTE* const inr = ip + cur;
1034
- assert(cur < ZSTD_OPT_NUM);
1035
- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1199
+ assert(cur <= ZSTD_OPT_NUM);
1200
+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
1036
1201
 
1037
1202
  /* Fix current position with one literal if cheaper */
1038
- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1203
+ { U32 const litlen = opt[cur-1].litlen + 1;
1039
1204
  int const price = opt[cur-1].price
1040
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1041
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1042
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1205
+ + LIT_PRICE(ip+cur-1)
1206
+ + LL_INCPRICE(litlen);
1043
1207
  assert(price < 1000000000); /* overflow check */
1044
1208
  if (price <= opt[cur].price) {
1209
+ ZSTD_optimal_t const prevMatch = opt[cur];
1045
1210
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1046
1211
  inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1047
1212
  opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1048
- opt[cur].mlen = 0;
1049
- opt[cur].off = 0;
1213
+ opt[cur] = opt[cur-1];
1050
1214
  opt[cur].litlen = litlen;
1051
1215
  opt[cur].price = price;
1216
+ if ( (optLevel >= 1) /* additional check only for higher modes */
1217
+ && (prevMatch.litlen == 0) /* replace a match */
1218
+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219
+ && LIKELY(ip + cur < iend)
1220
+ ) {
1221
+ /* check next position, in case it would be cheaper */
1222
+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223
+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224
+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225
+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226
+ if ( (with1literal < withMoreLiterals)
1227
+ && (with1literal < opt[cur+1].price) ) {
1228
+ /* update offset history - before it disappears */
1229
+ U32 const prev = cur - prevMatch.mlen;
1230
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231
+ assert(cur >= prevMatch.mlen);
1232
+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233
+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234
+ newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235
+ opt[cur+1] = prevMatch; /* mlen & offbase */
1236
+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237
+ opt[cur+1].litlen = 1;
1238
+ opt[cur+1].price = with1literal;
1239
+ if (last_pos < cur+1) last_pos = cur+1;
1240
+ }
1241
+ }
1052
1242
  } else {
1053
- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1054
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1055
- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1243
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1056
1245
  }
1057
1246
  }
1058
1247
 
1059
- /* Set the repcodes of the current position. We must do it here
1060
- * because we rely on the repcodes of the 2nd to last sequence being
1061
- * correct to set the next chunks repcodes during the backward
1062
- * traversal.
1248
+ /* Offset history is not updated during match comparison.
1249
+ * Do it here, now that the match is selected and confirmed.
1063
1250
  */
1064
1251
  ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1065
1252
  assert(cur >= opt[cur].mlen);
1066
- if (opt[cur].mlen != 0) {
1253
+ if (opt[cur].litlen == 0) {
1254
+ /* just finished a match => alter offset history */
1067
1255
  U32 const prev = cur - opt[cur].mlen;
1068
- repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1256
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1069
1257
  ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1070
- } else {
1071
- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1072
1258
  }
1073
1259
 
1074
1260
  /* last match must start at a minimum distance of 8 from oend */
@@ -1078,15 +1264,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1078
1264
 
1079
1265
  if ( (optLevel==0) /*static_test*/
1080
1266
  && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1081
- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1267
+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1082
1268
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1083
1269
  }
1084
1270
 
1085
- { U32 const ll0 = (opt[cur].mlen != 0);
1086
- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1087
- U32 const previousPrice = opt[cur].price;
1088
- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1089
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1271
+ assert(opt[cur].price >= 0);
1272
+ { U32 const ll0 = (opt[cur].litlen == 0);
1273
+ int const previousPrice = opt[cur].price;
1274
+ int const basePrice = previousPrice + LL_PRICE(0);
1275
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1090
1276
  U32 matchNb;
1091
1277
 
1092
1278
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
@@ -1097,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1097
1283
  continue;
1098
1284
  }
1099
1285
 
1100
- { U32 const maxML = matches[nbMatches-1].len;
1101
- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1102
- inr-istart, cur, nbMatches, maxML);
1103
-
1104
- if ( (maxML > sufficient_len)
1105
- || (cur + maxML >= ZSTD_OPT_NUM) ) {
1106
- lastSequence.mlen = maxML;
1107
- lastSequence.off = matches[nbMatches-1].off;
1108
- lastSequence.litlen = litlen;
1109
- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1110
- last_pos = cur + ZSTD_totalLen(lastSequence);
1111
- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1286
+ { U32 const longestML = matches[nbMatches-1].len;
1287
+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288
+ inr-istart, cur, nbMatches, longestML);
1289
+
1290
+ if ( (longestML > sufficient_len)
1291
+ || (cur + longestML >= ZSTD_OPT_NUM)
1292
+ || (ip + cur + longestML >= iend) ) {
1293
+ lastStretch.mlen = longestML;
1294
+ lastStretch.off = matches[nbMatches-1].off;
1295
+ lastStretch.litlen = 0;
1296
+ last_pos = cur + longestML;
1112
1297
  goto _shortestPath;
1113
1298
  } }
1114
1299
 
@@ -1119,20 +1304,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1119
1304
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1120
1305
  U32 mlen;
1121
1306
 
1122
- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1123
- matchNb, matches[matchNb].off, lastML, litlen);
1307
+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1308
+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1124
1309
 
1125
1310
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1126
1311
  U32 const pos = cur + mlen;
1127
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1312
+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1128
1313
 
1129
1314
  if ((pos > last_pos) || (price < opt[pos].price)) {
1130
1315
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1131
1316
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1132
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1317
+ while (last_pos < pos) {
1318
+ /* fill empty positions, for future comparisons */
1319
+ last_pos++;
1320
+ opt[last_pos].price = ZSTD_MAX_PRICE;
1321
+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1322
+ }
1133
1323
  opt[pos].mlen = mlen;
1134
1324
  opt[pos].off = offset;
1135
- opt[pos].litlen = litlen;
1325
+ opt[pos].litlen = 0;
1136
1326
  opt[pos].price = price;
1137
1327
  } else {
1138
1328
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1140,52 +1330,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1140
1330
  if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1141
1331
  }
1142
1332
  } } }
1333
+ opt[last_pos+1].price = ZSTD_MAX_PRICE;
1143
1334
  } /* for (cur = 1; cur <= last_pos; cur++) */
1144
1335
 
1145
- lastSequence = opt[last_pos];
1146
- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1147
- assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1336
+ lastStretch = opt[last_pos];
1337
+ assert(cur >= lastStretch.mlen);
1338
+ cur = last_pos - lastStretch.mlen;
1148
1339
 
1149
1340
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1150
1341
  assert(opt[0].mlen == 0);
1342
+ assert(last_pos >= lastStretch.mlen);
1343
+ assert(cur == last_pos - lastStretch.mlen);
1151
1344
 
1152
- /* Set the next chunk's repcodes based on the repcodes of the beginning
1153
- * of the last match, and the last sequence. This avoids us having to
1154
- * update them while traversing the sequences.
1155
- */
1156
- if (lastSequence.mlen != 0) {
1157
- repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1158
- ZSTD_memcpy(rep, &reps, sizeof(reps));
1345
+ if (lastStretch.mlen==0) {
1346
+ /* no solution : all matches have been converted into literals */
1347
+ assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348
+ ip += last_pos;
1349
+ continue;
1350
+ }
1351
+ assert(lastStretch.off > 0);
1352
+
1353
+ /* Update offset history */
1354
+ if (lastStretch.litlen == 0) {
1355
+ /* finishing on a match : update offset history */
1356
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357
+ ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
1159
1358
  } else {
1160
- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1359
+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360
+ assert(cur >= lastStretch.litlen);
1361
+ cur -= lastStretch.litlen;
1161
1362
  }
1162
1363
 
1163
- { U32 const storeEnd = cur + 1;
1364
+ /* Let's write the shortest path solution.
1365
+ * It is stored in @opt in reverse order,
1366
+ * starting from @storeEnd (==cur+2),
1367
+ * effectively partially @opt overwriting.
1368
+ * Content is changed too:
1369
+ * - So far, @opt stored stretches, aka a match followed by literals
1370
+ * - Now, it will store sequences, aka literals followed by a match
1371
+ */
1372
+ { U32 const storeEnd = cur + 2;
1164
1373
  U32 storeStart = storeEnd;
1165
- U32 seqPos = cur;
1374
+ U32 stretchPos = cur;
1166
1375
 
1167
1376
  DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1168
1377
  last_pos, cur); (void)last_pos;
1169
- assert(storeEnd < ZSTD_OPT_NUM);
1170
- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1171
- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1172
- opt[storeEnd] = lastSequence;
1173
- while (seqPos > 0) {
1174
- U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1378
+ assert(storeEnd < ZSTD_OPT_SIZE);
1379
+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380
+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381
+ if (lastStretch.litlen > 0) {
1382
+ /* last "sequence" is unfinished: just a bunch of literals */
1383
+ opt[storeEnd].litlen = lastStretch.litlen;
1384
+ opt[storeEnd].mlen = 0;
1385
+ storeStart = storeEnd-1;
1386
+ opt[storeStart] = lastStretch;
1387
+ } {
1388
+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1389
+ storeStart = storeEnd;
1390
+ }
1391
+ while (1) {
1392
+ ZSTD_optimal_t nextStretch = opt[stretchPos];
1393
+ opt[storeStart].litlen = nextStretch.litlen;
1394
+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395
+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396
+ if (nextStretch.mlen == 0) {
1397
+ /* reaching beginning of segment */
1398
+ break;
1399
+ }
1175
1400
  storeStart--;
1176
- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1177
- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1178
- opt[storeStart] = opt[seqPos];
1179
- seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1401
+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402
+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403
+ stretchPos -= nextStretch.litlen + nextStretch.mlen;
1180
1404
  }
1181
1405
 
1182
1406
  /* save sequences */
1183
- DEBUGLOG(6, "sending selected sequences into seqStore")
1407
+ DEBUGLOG(6, "sending selected sequences into seqStore");
1184
1408
  { U32 storePos;
1185
1409
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1186
1410
  U32 const llen = opt[storePos].litlen;
1187
1411
  U32 const mlen = opt[storePos].mlen;
1188
- U32 const offCode = opt[storePos].off;
1412
+ U32 const offBase = opt[storePos].off;
1189
1413
  U32 const advance = llen + mlen;
1190
1414
  DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1191
1415
  anchor - istart, (unsigned)llen, (unsigned)mlen);
@@ -1197,11 +1421,14 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1197
1421
  }
1198
1422
 
1199
1423
  assert(anchor + llen <= iend);
1200
- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1201
- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1424
+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1425
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1202
1426
  anchor += advance;
1203
1427
  ip = anchor;
1204
1428
  } }
1429
+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430
+
1431
+ /* update all costs */
1205
1432
  ZSTD_setBasePrices(optStatePtr, optLevel);
1206
1433
  }
1207
1434
  } /* while (ip < ilimit) */
@@ -1209,50 +1436,51 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1209
1436
  /* Return the last literals size */
1210
1437
  return (size_t)(iend - anchor);
1211
1438
  }
1439
+ #endif /* build exclusions */
1212
1440
 
1441
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1442
+ static size_t ZSTD_compressBlock_opt0(
1443
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1445
+ {
1446
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1447
+ }
1448
+ #endif
1449
+
1450
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1451
+ static size_t ZSTD_compressBlock_opt2(
1452
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1453
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1454
+ {
1455
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1456
+ }
1457
+ #endif
1213
1458
 
1459
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1214
1460
  size_t ZSTD_compressBlock_btopt(
1215
1461
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1216
1462
  const void* src, size_t srcSize)
1217
1463
  {
1218
1464
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1219
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1465
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1220
1466
  }
1467
+ #endif
1221
1468
 
1222
1469
 
1223
- /* used in 2-pass strategy */
1224
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1225
- {
1226
- U32 s, sum=0;
1227
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1228
- for (s=0; s<lastEltIndex+1; s++) {
1229
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1230
- table[s]--;
1231
- sum += table[s];
1232
- }
1233
- return sum;
1234
- }
1235
1470
 
1236
- /* used in 2-pass strategy */
1237
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1238
- {
1239
- if (ZSTD_compressedLiterals(optPtr))
1240
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1241
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1242
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1243
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1244
- }
1245
1471
 
1472
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1246
1473
  /* ZSTD_initStats_ultra():
1247
1474
  * make a first compression pass, just to seed stats with more accurate starting values.
1248
1475
  * only works on first block, with no dictionary and no ldm.
1249
- * this function cannot error, hence its contract must be respected.
1476
+ * this function cannot error out, its narrow contract must be respected.
1250
1477
  */
1251
- static void
1252
- ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1253
- seqStore_t* seqStore,
1254
- U32 rep[ZSTD_REP_NUM],
1255
- const void* src, size_t srcSize)
1478
+ static
1479
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1480
+ void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481
+ seqStore_t* seqStore,
1482
+ U32 rep[ZSTD_REP_NUM],
1483
+ const void* src, size_t srcSize)
1256
1484
  {
1257
1485
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1258
1486
  ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@@ -1263,17 +1491,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1263
1491
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1264
1492
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1265
1493
 
1266
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1494
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1267
1495
 
1268
- /* invalidate first scan from history */
1496
+ /* invalidate first scan from history, only keep entropy stats */
1269
1497
  ZSTD_resetSeqStore(seqStore);
1270
1498
  ms->window.base -= srcSize;
1271
1499
  ms->window.dictLimit += (U32)srcSize;
1272
1500
  ms->window.lowLimit = ms->window.dictLimit;
1273
1501
  ms->nextToUpdate = ms->window.dictLimit;
1274
1502
 
1275
- /* re-inforce weight of collected statistics */
1276
- ZSTD_upscaleStats(&ms->opt);
1277
1503
  }
1278
1504
 
1279
1505
  size_t ZSTD_compressBlock_btultra(
@@ -1281,7 +1507,7 @@ size_t ZSTD_compressBlock_btultra(
1281
1507
  const void* src, size_t srcSize)
1282
1508
  {
1283
1509
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1284
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1510
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1285
1511
  }
1286
1512
 
1287
1513
  size_t ZSTD_compressBlock_btultra2(
@@ -1291,10 +1517,10 @@ size_t ZSTD_compressBlock_btultra2(
1291
1517
  U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1292
1518
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1293
1519
 
1294
- /* 2-pass strategy:
1520
+ /* 2-passes strategy:
1295
1521
  * this strategy makes a first pass over first block to collect statistics
1296
- * and seed next round's statistics with it.
1297
- * After 1st pass, function forgets everything, and starts a new block.
1522
+ * in order to seed next round's statistics with it.
1523
+ * After 1st pass, function forgets history, and starts a new block.
1298
1524
  * Consequently, this can only work if no data has been previously loaded in tables,
1299
1525
  * aka, no dictionary, no prefix, no ldm preprocessing.
1300
1526
  * The compression ratio gain is generally small (~0.5% on first block),
@@ -1303,42 +1529,47 @@ size_t ZSTD_compressBlock_btultra2(
1303
1529
  if ( (ms->opt.litLengthSum==0) /* first block */
1304
1530
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1305
1531
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1306
- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1307
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
1532
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1533
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1308
1534
  ) {
1309
1535
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1310
1536
  }
1311
1537
 
1312
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1538
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1313
1539
  }
1540
+ #endif
1314
1541
 
1542
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1315
1543
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1316
1544
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1317
1545
  const void* src, size_t srcSize)
1318
1546
  {
1319
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1547
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1320
1548
  }
1321
1549
 
1322
- size_t ZSTD_compressBlock_btultra_dictMatchState(
1550
+ size_t ZSTD_compressBlock_btopt_extDict(
1323
1551
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1324
1552
  const void* src, size_t srcSize)
1325
1553
  {
1326
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1554
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1327
1555
  }
1556
+ #endif
1328
1557
 
1329
- size_t ZSTD_compressBlock_btopt_extDict(
1558
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1559
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
1330
1560
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1331
1561
  const void* src, size_t srcSize)
1332
1562
  {
1333
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1563
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1334
1564
  }
1335
1565
 
1336
1566
  size_t ZSTD_compressBlock_btultra_extDict(
1337
1567
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1338
1568
  const void* src, size_t srcSize)
1339
1569
  {
1340
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1570
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1341
1571
  }
1572
+ #endif
1342
1573
 
1343
1574
  /* note : no btultra2 variant for extDict nor dictMatchState,
1344
1575
  * because btultra2 is not meant to work with dictionaries