zstd-ruby 1.4.5.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,40 +14,47 @@
14
14
 
15
15
 
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
17
  #define ZSTD_MAX_PRICE (1<<30)
19
18
 
20
- #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
19
+ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
21
20
 
22
21
 
23
22
  /*-*************************************
24
23
  * Price functions for optimal parser
25
24
  ***************************************/
26
25
 
27
- #if 0 /* approximation at bit level */
26
+ #if 0 /* approximation at bit level (for tests) */
28
27
  # define BITCOST_ACCURACY 0
29
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
29
+ # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
30
+ #elif 0 /* fractional bit accuracy (for tests) */
32
31
  # define BITCOST_ACCURACY 8
33
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
- # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33
+ # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
35
34
  #else /* opt==approx, ultra==accurate */
36
35
  # define BITCOST_ACCURACY 8
37
36
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
38
- # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37
+ # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
39
38
  #endif
40
39
 
40
+ /* ZSTD_bitWeight() :
41
+ * provide estimated "cost" of a stat in full bits only */
41
42
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
42
43
  {
43
44
  return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
44
45
  }
45
46
 
47
+ /* ZSTD_fracWeight() :
48
+ * provide fractional-bit "cost" of a stat,
49
+ * using linear interpolation approximation */
46
50
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
47
51
  {
48
52
  U32 const stat = rawStat + 1;
49
53
  U32 const hb = ZSTD_highbit32(stat);
50
54
  U32 const BWeight = hb * BITCOST_MULTIPLIER;
55
+ /* Fweight was meant for "Fractional weight"
56
+ * but it's effectively a value between 1 and 2
57
+ * using fixed point arithmetic */
51
58
  U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
52
59
  U32 const weight = BWeight + FWeight;
53
60
  assert(hb + BITCOST_ACCURACY < 31);
@@ -58,7 +65,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
58
65
  /* debugging function,
59
66
  * @return price in bytes as fractional value
60
67
  * for debug messages only */
61
- MEM_STATIC double ZSTD_fCost(U32 price)
68
+ MEM_STATIC double ZSTD_fCost(int price)
62
69
  {
63
70
  return (double)price / (BITCOST_MULTIPLIER*8);
64
71
  }
@@ -66,7 +73,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
73
 
67
74
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
75
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
76
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
77
  }
71
78
 
72
79
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +86,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
86
  }
80
87
 
81
88
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
89
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
90
+ {
91
+ size_t n;
92
+ U32 total = 0;
93
+ for (n=0; n<nbElts; n++) {
94
+ total += table[n];
95
+ }
96
+ return total;
97
+ }
98
+
99
+ typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
100
+
101
+ static U32
102
+ ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
86
103
  {
87
104
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
105
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
106
+ (unsigned)lastEltIndex+1, (unsigned)shift );
107
+ assert(shift < 30);
90
108
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
92
- sum += table[s];
109
+ unsigned const base = base1 ? 1 : (table[s]>0);
110
+ unsigned const newStat = base + (table[s] >> shift);
111
+ sum += newStat;
112
+ table[s] = newStat;
93
113
  }
94
114
  return sum;
95
115
  }
96
116
 
117
+ /* ZSTD_scaleStats() :
118
+ * reduce all elt frequencies in table if sum too large
119
+ * return the resulting sum of elements */
120
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
121
+ {
122
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
123
+ U32 const factor = prevsum >> logTarget;
124
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
125
+ assert(logTarget < 30);
126
+ if (factor <= 1) return prevsum;
127
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
128
+ }
129
+
97
130
  /* ZSTD_rescaleFreqs() :
98
131
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
132
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
133
+ * and init from zero if there is none,
134
+ * using src for literals stats, and baseline stats for sequence symbols
101
135
  * otherwise downscale existing stats, to be used as seed for next block.
102
136
  */
103
137
  static void
@@ -109,24 +143,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
109
143
  DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
110
144
  optPtr->priceType = zop_dynamic;
111
145
 
112
- if (optPtr->litLengthSum == 0) { /* first block : init */
113
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
114
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
146
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
147
+
148
+ /* heuristic: use pre-defined stats for too small inputs */
149
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
150
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
115
151
  optPtr->priceType = zop_predef;
116
152
  }
117
153
 
118
154
  assert(optPtr->symbolCosts != NULL);
119
155
  if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
120
- /* huffman table presumed generated by dictionary */
156
+
157
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
121
158
  optPtr->priceType = zop_dynamic;
122
159
 
123
160
  if (compressedLiterals) {
161
+ /* generate literals statistics from huffman table */
124
162
  unsigned lit;
125
163
  assert(optPtr->litFreq != NULL);
126
164
  optPtr->litSum = 0;
127
165
  for (lit=0; lit<=MaxLit; lit++) {
128
166
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
167
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
168
  assert(bitCost <= scaleLog);
131
169
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
170
  optPtr->litSum += optPtr->litFreq[lit];
@@ -168,20 +206,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
168
206
  optPtr->offCodeSum += optPtr->offCodeFreq[of];
169
207
  } }
170
208
 
171
- } else { /* not a dictionary */
209
+ } else { /* first block, no dictionary */
172
210
 
173
211
  assert(optPtr->litFreq != NULL);
174
212
  if (compressedLiterals) {
213
+ /* base initial cost of literals on direct frequency within src */
175
214
  unsigned lit = MaxLit;
176
215
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
216
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
178
217
  }
179
218
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
219
+ { unsigned const baseLLfreqs[MaxLL+1] = {
220
+ 4, 2, 1, 1, 1, 1, 1, 1,
221
+ 1, 1, 1, 1, 1, 1, 1, 1,
222
+ 1, 1, 1, 1, 1, 1, 1, 1,
223
+ 1, 1, 1, 1, 1, 1, 1, 1,
224
+ 1, 1, 1, 1
225
+ };
226
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
227
+ optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
228
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
229
 
186
230
  { unsigned ml;
187
231
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +233,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
233
  }
190
234
  optPtr->matchLengthSum = MaxML+1;
191
235
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
236
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
237
+ 6, 2, 1, 1, 2, 3, 4, 4,
238
+ 4, 3, 2, 1, 1, 1, 1, 1,
239
+ 1, 1, 1, 1, 1, 1, 1, 1,
240
+ 1, 1, 1, 1, 1, 1, 1, 1
241
+ };
242
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
243
+ optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
244
  }
196
- optPtr->offCodeSum = MaxOff+1;
197
245
 
198
246
  }
199
247
 
200
- } else { /* new block : re-use previous statistics, scaled down */
248
+ } else { /* new block : scale down accumulated statistics */
201
249
 
202
250
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
251
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
252
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
253
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
254
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
255
  }
208
256
 
209
257
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -225,11 +273,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
225
273
  return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
226
274
 
227
275
  /* dynamic statistics */
228
- { U32 price = litLength * optPtr->litSumBasePrice;
276
+ { U32 price = optPtr->litSumBasePrice * litLength;
277
+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
229
278
  U32 u;
279
+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
230
280
  for (u=0; u < litLength; u++) {
231
- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
232
- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
281
+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
282
+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
283
+ price -= litPrice;
233
284
  }
234
285
  return price;
235
286
  }
@@ -239,7 +290,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
239
290
  * cost of literalLength symbol */
240
291
  static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
241
292
  {
242
- if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
293
+ assert(litLength <= ZSTD_BLOCKSIZE_MAX);
294
+ if (optPtr->priceType == zop_predef)
295
+ return WEIGHT(litLength, optLevel);
296
+
297
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
298
+ * because it isn't representable in the zstd format.
299
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
300
+ * In such a case, the block would be all literals.
301
+ */
302
+ if (litLength == ZSTD_BLOCKSIZE_MAX)
303
+ return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
243
304
 
244
305
  /* dynamic statistics */
245
306
  { U32 const llCode = ZSTD_LLcode(litLength);
@@ -250,22 +311,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
250
311
  }
251
312
 
252
313
  /* ZSTD_getMatchPrice() :
253
- * Provides the cost of the match part (offset + matchLength) of a sequence
314
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
254
315
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
255
- * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
316
+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
317
+ * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
318
+ */
256
319
  FORCE_INLINE_TEMPLATE U32
257
- ZSTD_getMatchPrice(U32 const offset,
320
+ ZSTD_getMatchPrice(U32 const offBase,
258
321
  U32 const matchLength,
259
322
  const optState_t* const optPtr,
260
323
  int const optLevel)
261
324
  {
262
325
  U32 price;
263
- U32 const offCode = ZSTD_highbit32(offset+1);
326
+ U32 const offCode = ZSTD_highbit32(offBase);
264
327
  U32 const mlBase = matchLength - MINMATCH;
265
328
  assert(matchLength >= MINMATCH);
266
329
 
267
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
268
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
330
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
331
+ return WEIGHT(mlBase, optLevel)
332
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
269
333
 
270
334
  /* dynamic statistics */
271
335
  price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -284,10 +348,10 @@ ZSTD_getMatchPrice(U32 const offset,
284
348
  }
285
349
 
286
350
  /* ZSTD_updateStats() :
287
- * assumption : literals + litLengtn <= iend */
351
+ * assumption : literals + litLength <= iend */
288
352
  static void ZSTD_updateStats(optState_t* const optPtr,
289
353
  U32 litLength, const BYTE* literals,
290
- U32 offsetCode, U32 matchLength)
354
+ U32 offBase, U32 matchLength)
291
355
  {
292
356
  /* literals */
293
357
  if (ZSTD_compressedLiterals(optPtr)) {
@@ -303,8 +367,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
303
367
  optPtr->litLengthSum++;
304
368
  }
305
369
 
306
- /* match offset code (0-2=>repCode; 3+=>offset+2) */
307
- { U32 const offCode = ZSTD_highbit32(offsetCode+1);
370
+ /* offset code : follows storeSeq() numeric representation */
371
+ { U32 const offCode = ZSTD_highbit32(offBase);
308
372
  assert(offCode <= MaxOff);
309
373
  optPtr->offCodeFreq[offCode]++;
310
374
  optPtr->offCodeSum++;
@@ -338,7 +402,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
338
402
 
339
403
  /* Update hashTable3 up to ip (excluded)
340
404
  Assumption : always within prefix (i.e. not within extDict) */
341
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
405
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
342
406
  U32* nextToUpdate3,
343
407
  const BYTE* const ip)
344
408
  {
@@ -364,11 +428,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
364
428
  * Binary Tree search
365
429
  ***************************************/
366
430
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
367
- * ip : assumed <= iend-8 .
431
+ * @param ip assumed <= iend-8 .
432
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
368
433
  * @return : nb of positions added */
369
434
  static U32 ZSTD_insertBt1(
370
- ZSTD_matchState_t* ms,
435
+ const ZSTD_matchState_t* ms,
371
436
  const BYTE* const ip, const BYTE* const iend,
437
+ U32 const target,
372
438
  U32 const mls, const int extDict)
373
439
  {
374
440
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -386,32 +452,36 @@ static U32 ZSTD_insertBt1(
386
452
  const BYTE* const dictEnd = dictBase + dictLimit;
387
453
  const BYTE* const prefixStart = base + dictLimit;
388
454
  const BYTE* match;
389
- const U32 current = (U32)(ip-base);
390
- const U32 btLow = btMask >= current ? 0 : current - btMask;
391
- U32* smallerPtr = bt + 2*(current&btMask);
455
+ const U32 curr = (U32)(ip-base);
456
+ const U32 btLow = btMask >= curr ? 0 : curr - btMask;
457
+ U32* smallerPtr = bt + 2*(curr&btMask);
392
458
  U32* largerPtr = smallerPtr + 1;
393
459
  U32 dummy32; /* to be nullified at the end */
394
- U32 const windowLow = ms->window.lowLimit;
395
- U32 matchEndIdx = current+8+1;
460
+ /* windowLow is based on target because
461
+ * we only need positions that will be in the window at the end of the tree update.
462
+ */
463
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
464
+ U32 matchEndIdx = curr+8+1;
396
465
  size_t bestLength = 8;
397
466
  U32 nbCompares = 1U << cParams->searchLog;
398
467
  #ifdef ZSTD_C_PREDICT
399
- U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
400
- U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
468
+ U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
469
+ U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
401
470
  predictedSmall += (predictedSmall>0);
402
471
  predictedLarge += (predictedLarge>0);
403
472
  #endif /* ZSTD_C_PREDICT */
404
473
 
405
- DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
474
+ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
406
475
 
476
+ assert(curr <= target);
407
477
  assert(ip <= iend-8); /* required for h calculation */
408
- hashTable[h] = current; /* Update Hash Table */
478
+ hashTable[h] = curr; /* Update Hash Table */
409
479
 
410
480
  assert(windowLow > 0);
411
- while (nbCompares-- && (matchIndex >= windowLow)) {
481
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
412
482
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
413
483
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
414
- assert(matchIndex < current);
484
+ assert(matchIndex < curr);
415
485
 
416
486
  #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
417
487
  const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
@@ -474,8 +544,8 @@ static U32 ZSTD_insertBt1(
474
544
  *smallerPtr = *largerPtr = 0;
475
545
  { U32 positions = 0;
476
546
  if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
477
- assert(matchEndIdx > current + 8);
478
- return MAX(positions, matchEndIdx - (current + 8));
547
+ assert(matchEndIdx > curr + 8);
548
+ return MAX(positions, matchEndIdx - (curr + 8));
479
549
  }
480
550
  }
481
551
 
@@ -492,7 +562,7 @@ void ZSTD_updateTree_internal(
492
562
  idx, target, dictMode);
493
563
 
494
564
  while(idx < target) {
495
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
565
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
496
566
  assert(idx < (U32)(idx + forward));
497
567
  idx += forward;
498
568
  }
@@ -505,21 +575,22 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
505
575
  ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
506
576
  }
507
577
 
508
- FORCE_INLINE_TEMPLATE
509
- U32 ZSTD_insertBtAndGetAllMatches (
510
- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
511
- ZSTD_matchState_t* ms,
512
- U32* nextToUpdate3,
513
- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
514
- const U32 rep[ZSTD_REP_NUM],
515
- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
516
- const U32 lengthToBeat,
517
- U32 const mls /* template */)
578
+ FORCE_INLINE_TEMPLATE U32
579
+ ZSTD_insertBtAndGetAllMatches (
580
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
581
+ ZSTD_matchState_t* ms,
582
+ U32* nextToUpdate3,
583
+ const BYTE* const ip, const BYTE* const iLimit,
584
+ const ZSTD_dictMode_e dictMode,
585
+ const U32 rep[ZSTD_REP_NUM],
586
+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
587
+ const U32 lengthToBeat,
588
+ const U32 mls /* template */)
518
589
  {
519
590
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
520
591
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
521
592
  const BYTE* const base = ms->window.base;
522
- U32 const current = (U32)(ip-base);
593
+ U32 const curr = (U32)(ip-base);
523
594
  U32 const hashLog = cParams->hashLog;
524
595
  U32 const minMatch = (mls==3) ? 3 : 4;
525
596
  U32* const hashTable = ms->hashTable;
@@ -533,12 +604,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
533
604
  U32 const dictLimit = ms->window.dictLimit;
534
605
  const BYTE* const dictEnd = dictBase + dictLimit;
535
606
  const BYTE* const prefixStart = base + dictLimit;
536
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
537
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
607
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
608
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
538
609
  U32 const matchLow = windowLow ? windowLow : 1;
539
- U32* smallerPtr = bt + 2*(current&btMask);
540
- U32* largerPtr = bt + 2*(current&btMask) + 1;
541
- U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
610
+ U32* smallerPtr = bt + 2*(curr&btMask);
611
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
612
+ U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
542
613
  U32 dummy32; /* to be nullified at the end */
543
614
  U32 mnum = 0;
544
615
  U32 nbCompares = 1U << cParams->searchLog;
@@ -557,7 +628,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
557
628
  U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
558
629
 
559
630
  size_t bestLength = lengthToBeat-1;
560
- DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
631
+ DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
561
632
 
562
633
  /* check repCode */
563
634
  assert(ll0 <= 1); /* necessarily 1 or 0 */
@@ -565,29 +636,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
565
636
  U32 repCode;
566
637
  for (repCode = ll0; repCode < lastR; repCode++) {
567
638
  U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
568
- U32 const repIndex = current - repOffset;
639
+ U32 const repIndex = curr - repOffset;
569
640
  U32 repLen = 0;
570
- assert(current >= dictLimit);
571
- if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
641
+ assert(curr >= dictLimit);
642
+ if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
572
643
  /* We must validate the repcode offset because when we're using a dictionary the
573
644
  * valid offset range shrinks when the dictionary goes out of bounds.
574
645
  */
575
646
  if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
576
647
  repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
577
648
  }
578
- } else { /* repIndex < dictLimit || repIndex >= current */
649
+ } else { /* repIndex < dictLimit || repIndex >= curr */
579
650
  const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
580
651
  dmsBase + repIndex - dmsIndexDelta :
581
652
  dictBase + repIndex;
582
- assert(current >= windowLow);
653
+ assert(curr >= windowLow);
583
654
  if ( dictMode == ZSTD_extDict
584
- && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
655
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
585
656
  & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
586
657
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
587
658
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
588
659
  }
589
660
  if (dictMode == ZSTD_dictMatchState
590
- && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
661
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
591
662
  & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
592
663
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
593
664
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
@@ -597,7 +668,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
597
668
  DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
598
669
  repCode, ll0, repOffset, repLen);
599
670
  bestLength = repLen;
600
- matches[mnum].off = repCode - ll0;
671
+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
601
672
  matches[mnum].len = (U32)repLen;
602
673
  mnum++;
603
674
  if ( (repLen > sufficient_len)
@@ -609,7 +680,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
609
680
  if ((mls == 3) /*static*/ && (bestLength < mls)) {
610
681
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
611
682
  if ((matchIndex3 >= matchLow)
612
- & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
683
+ & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
613
684
  size_t mlen;
614
685
  if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
615
686
  const BYTE* const match = base + matchIndex3;
@@ -624,26 +695,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
624
695
  DEBUGLOG(8, "found small match with hlog3, of length %u",
625
696
  (U32)mlen);
626
697
  bestLength = mlen;
627
- assert(current > matchIndex3);
698
+ assert(curr > matchIndex3);
628
699
  assert(mnum==0); /* no prior solution */
629
- matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
700
+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
630
701
  matches[0].len = (U32)mlen;
631
702
  mnum = 1;
632
703
  if ( (mlen > sufficient_len) |
633
704
  (ip+mlen == iLimit) ) { /* best possible length */
634
- ms->nextToUpdate = current+1; /* skip insertion */
705
+ ms->nextToUpdate = curr+1; /* skip insertion */
635
706
  return 1;
636
707
  } } }
637
708
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638
- }
709
+ } /* if (mls == 3) */
639
710
 
640
- hashTable[h] = current; /* Update Hash Table */
711
+ hashTable[h] = curr; /* Update Hash Table */
641
712
 
642
- while (nbCompares-- && (matchIndex >= matchLow)) {
713
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
643
714
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
644
715
  const BYTE* match;
645
716
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
646
- assert(current > matchIndex);
717
+ assert(curr > matchIndex);
647
718
 
648
719
  if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
649
720
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
@@ -659,21 +730,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
659
730
  }
660
731
 
661
732
  if (matchLength > bestLength) {
662
- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
663
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
733
+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
734
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
664
735
  assert(matchEndIdx > matchIndex);
665
736
  if (matchLength > matchEndIdx - matchIndex)
666
737
  matchEndIdx = matchIndex + (U32)matchLength;
667
738
  bestLength = matchLength;
668
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
739
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
669
740
  matches[mnum].len = (U32)matchLength;
670
741
  mnum++;
671
742
  if ( (matchLength > ZSTD_OPT_NUM)
672
743
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673
744
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
674
745
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675
- }
676
- }
746
+ } }
677
747
 
678
748
  if (match[matchLength] < ip[matchLength]) {
679
749
  /* match smaller than current */
@@ -692,12 +762,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
692
762
 
693
763
  *smallerPtr = *largerPtr = 0;
694
764
 
765
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
695
766
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
696
767
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697
768
  U32 dictMatchIndex = dms->hashTable[dmsH];
698
769
  const U32* const dmsBt = dms->chainTable;
699
770
  commonLengthSmaller = commonLengthLarger = 0;
700
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
771
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
701
772
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702
773
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703
774
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -707,19 +778,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
707
778
 
708
779
  if (matchLength > bestLength) {
709
780
  matchIndex = dictMatchIndex + dmsIndexDelta;
710
- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
711
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
781
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
782
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
712
783
  if (matchLength > matchEndIdx - matchIndex)
713
784
  matchEndIdx = matchIndex + (U32)matchLength;
714
785
  bestLength = matchLength;
715
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
786
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
716
787
  matches[mnum].len = (U32)matchLength;
717
788
  mnum++;
718
789
  if ( (matchLength > ZSTD_OPT_NUM)
719
790
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
791
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
721
- }
722
- }
792
+ } }
723
793
 
724
794
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725
795
  if (match[matchLength] < ip[matchLength]) {
@@ -729,47 +799,242 @@ U32 ZSTD_insertBtAndGetAllMatches (
729
799
  /* match is larger than current */
730
800
  commonLengthLarger = matchLength;
731
801
  dictMatchIndex = nextPtr[0];
732
- }
733
- }
734
- }
802
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
735
803
 
736
- assert(matchEndIdx > current+8);
804
+ assert(matchEndIdx > curr+8);
737
805
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738
806
  return mnum;
739
807
  }
740
808
 
809
+ typedef U32 (*ZSTD_getAllMatchesFn)(
810
+ ZSTD_match_t*,
811
+ ZSTD_matchState_t*,
812
+ U32*,
813
+ const BYTE*,
814
+ const BYTE*,
815
+ const U32 rep[ZSTD_REP_NUM],
816
+ U32 const ll0,
817
+ U32 const lengthToBeat);
818
+
819
+ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
820
+ ZSTD_match_t* matches,
821
+ ZSTD_matchState_t* ms,
822
+ U32* nextToUpdate3,
823
+ const BYTE* ip,
824
+ const BYTE* const iHighLimit,
825
+ const U32 rep[ZSTD_REP_NUM],
826
+ U32 const ll0,
827
+ U32 const lengthToBeat,
828
+ const ZSTD_dictMode_e dictMode,
829
+ const U32 mls)
830
+ {
831
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
832
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
833
+ if (ip < ms->window.base + ms->nextToUpdate)
834
+ return 0; /* skipped area */
835
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
836
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
837
+ }
838
+
839
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
840
+
841
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
842
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
843
+ ZSTD_match_t* matches, \
844
+ ZSTD_matchState_t* ms, \
845
+ U32* nextToUpdate3, \
846
+ const BYTE* ip, \
847
+ const BYTE* const iHighLimit, \
848
+ const U32 rep[ZSTD_REP_NUM], \
849
+ U32 const ll0, \
850
+ U32 const lengthToBeat) \
851
+ { \
852
+ return ZSTD_btGetAllMatches_internal( \
853
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
854
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
855
+ }
856
+
857
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
858
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
859
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
860
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
861
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
862
+
863
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
864
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
865
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
866
+
867
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
868
+ { \
869
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
870
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
871
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
872
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
873
+ }
874
+
875
+ static ZSTD_getAllMatchesFn
876
+ ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
877
+ {
878
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
879
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
880
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
881
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
882
+ };
883
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
884
+ assert((U32)dictMode < 3);
885
+ assert(mls - 3 < 4);
886
+ return getAllMatchesFns[(int)dictMode][mls - 3];
887
+ }
741
888
 
742
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
743
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
744
- ZSTD_matchState_t* ms,
745
- U32* nextToUpdate3,
746
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
747
- const U32 rep[ZSTD_REP_NUM],
748
- U32 const ll0,
749
- U32 const lengthToBeat)
889
+ /*************************
890
+ * LDM helper functions *
891
+ *************************/
892
+
893
+ /* Struct containing info needed to make decision about ldm inclusion */
894
+ typedef struct {
895
+ rawSeqStore_t seqStore; /* External match candidates store for this block */
896
+ U32 startPosInBlock; /* Start position of the current match candidate */
897
+ U32 endPosInBlock; /* End position of the current match candidate */
898
+ U32 offset; /* Offset of the match candidate */
899
+ } ZSTD_optLdm_t;
900
+
901
+ /* ZSTD_optLdm_skipRawSeqStoreBytes():
902
+ * Moves forward in @rawSeqStore by @nbBytes,
903
+ * which will update the fields 'pos' and 'posInSequence'.
904
+ */
905
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
750
906
  {
751
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
752
- U32 const matchLengthSearch = cParams->minMatch;
753
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
755
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
756
- switch(matchLengthSearch)
757
- {
758
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
759
- default :
760
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
761
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
762
- case 7 :
763
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
907
+ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
908
+ while (currPos && rawSeqStore->pos < rawSeqStore->size) {
909
+ rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
910
+ if (currPos >= currSeq.litLength + currSeq.matchLength) {
911
+ currPos -= currSeq.litLength + currSeq.matchLength;
912
+ rawSeqStore->pos++;
913
+ } else {
914
+ rawSeqStore->posInSequence = currPos;
915
+ break;
916
+ }
917
+ }
918
+ if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
919
+ rawSeqStore->posInSequence = 0;
764
920
  }
765
921
  }
766
922
 
923
+ /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
924
+ * Calculates the beginning and end of the next match in the current block.
925
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
926
+ */
927
+ static void
928
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
929
+ U32 blockBytesRemaining)
930
+ {
931
+ rawSeq currSeq;
932
+ U32 currBlockEndPos;
933
+ U32 literalsBytesRemaining;
934
+ U32 matchBytesRemaining;
935
+
936
+ /* Setting match end position to MAX to ensure we never use an LDM during this block */
937
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
938
+ optLdm->startPosInBlock = UINT_MAX;
939
+ optLdm->endPosInBlock = UINT_MAX;
940
+ return;
941
+ }
942
+ /* Calculate appropriate bytes left in matchLength and litLength
943
+ * after adjusting based on ldmSeqStore->posInSequence */
944
+ currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
945
+ assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
946
+ currBlockEndPos = currPosInBlock + blockBytesRemaining;
947
+ literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
948
+ currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
949
+ 0;
950
+ matchBytesRemaining = (literalsBytesRemaining == 0) ?
951
+ currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
952
+ currSeq.matchLength;
953
+
954
+ /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
955
+ if (literalsBytesRemaining >= blockBytesRemaining) {
956
+ optLdm->startPosInBlock = UINT_MAX;
957
+ optLdm->endPosInBlock = UINT_MAX;
958
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
959
+ return;
960
+ }
961
+
962
+ /* Matches may be < MINMATCH by this process. In that case, we will reject them
963
+ when we are deciding whether or not to add the ldm */
964
+ optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
965
+ optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
966
+ optLdm->offset = currSeq.offset;
967
+
968
+ if (optLdm->endPosInBlock > currBlockEndPos) {
969
+ /* Match ends after the block ends, we can't use the whole match */
970
+ optLdm->endPosInBlock = currBlockEndPos;
971
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
972
+ } else {
973
+ /* Consume nb of bytes equal to size of sequence left */
974
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
975
+ }
976
+ }
977
+
978
+ /* ZSTD_optLdm_maybeAddMatch():
979
+ * Adds a match if it's long enough,
980
+ * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
981
+ * into 'matches'. Maintains the correct ordering of 'matches'.
982
+ */
983
+ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
984
+ const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
985
+ {
986
+ U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
987
+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
988
+ U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
989
+
990
+ /* Ensure that current block position is not outside of the match */
991
+ if (currPosInBlock < optLdm->startPosInBlock
992
+ || currPosInBlock >= optLdm->endPosInBlock
993
+ || candidateMatchLength < MINMATCH) {
994
+ return;
995
+ }
996
+
997
+ if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
998
+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
999
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1000
+ candidateOffBase, candidateMatchLength, currPosInBlock);
1001
+ matches[*nbMatches].len = candidateMatchLength;
1002
+ matches[*nbMatches].off = candidateOffBase;
1003
+ (*nbMatches)++;
1004
+ }
1005
+ }
1006
+
1007
+ /* ZSTD_optLdm_processMatchCandidate():
1008
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
1009
+ */
1010
+ static void
1011
+ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1012
+ ZSTD_match_t* matches, U32* nbMatches,
1013
+ U32 currPosInBlock, U32 remainingBytes)
1014
+ {
1015
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1016
+ return;
1017
+ }
1018
+
1019
+ if (currPosInBlock >= optLdm->endPosInBlock) {
1020
+ if (currPosInBlock > optLdm->endPosInBlock) {
1021
+ /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1022
+ * at the end of a match from the ldm seq store, and will often be some bytes
1023
+ * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1024
+ */
1025
+ U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1026
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1027
+ }
1028
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1029
+ }
1030
+ ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1031
+ }
1032
+
767
1033
 
768
1034
  /*-*******************************
769
1035
  * Optimal parser
770
1036
  *********************************/
771
1037
 
772
-
773
1038
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
774
1039
  {
775
1040
  return sol.litlen + sol.mlen;
@@ -810,6 +1075,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
810
1075
  const BYTE* const prefixStart = base + ms->window.dictLimit;
811
1076
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
812
1077
 
1078
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1079
+
813
1080
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
814
1081
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
815
1082
  U32 nextToUpdate3 = ms->nextToUpdate;
@@ -817,6 +1084,13 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
817
1084
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
818
1085
  ZSTD_match_t* const matches = optStatePtr->matchTable;
819
1086
  ZSTD_optimal_t lastSequence;
1087
+ ZSTD_optLdm_t optLdm;
1088
+
1089
+ ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1090
+
1091
+ optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1092
+ optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1093
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
820
1094
 
821
1095
  /* init */
822
1096
  DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@@ -832,7 +1106,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
832
1106
  /* find first match */
833
1107
  { U32 const litlen = (U32)(ip - anchor);
834
1108
  U32 const ll0 = !litlen;
835
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1109
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1110
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1111
+ (U32)(ip-istart), (U32)(iend - ip));
836
1112
  if (!nbMatches) { ip++; continue; }
837
1113
 
838
1114
  /* initialize opt[0] */
@@ -844,18 +1120,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
844
1120
  * in every price. We include the literal length to avoid negative
845
1121
  * prices when we subtract the previous literal length.
846
1122
  */
847
- opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1123
+ opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
848
1124
 
849
1125
  /* large match -> immediate encoding */
850
1126
  { U32 const maxML = matches[nbMatches-1].len;
851
- U32 const maxOffset = matches[nbMatches-1].off;
852
- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
853
- nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
1127
+ U32 const maxOffBase = matches[nbMatches-1].off;
1128
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1129
+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
854
1130
 
855
1131
  if (maxML > sufficient_len) {
856
1132
  lastSequence.litlen = litlen;
857
1133
  lastSequence.mlen = maxML;
858
- lastSequence.off = maxOffset;
1134
+ lastSequence.off = maxOffBase;
859
1135
  DEBUGLOG(6, "large match (%u>%u), immediate encoding",
860
1136
  maxML, sufficient_len);
861
1137
  cur = 0;
@@ -864,24 +1140,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
864
1140
  } }
865
1141
 
866
1142
  /* set prices for first matches starting position == 0 */
867
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1143
+ assert(opt[0].price >= 0);
1144
+ { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
868
1145
  U32 pos;
869
1146
  U32 matchNb;
870
1147
  for (pos = 1; pos < minMatch; pos++) {
871
1148
  opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
872
1149
  }
873
1150
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
874
- U32 const offset = matches[matchNb].off;
1151
+ U32 const offBase = matches[matchNb].off;
875
1152
  U32 const end = matches[matchNb].len;
876
1153
  for ( ; pos <= end ; pos++ ) {
877
- U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
1154
+ U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
878
1155
  U32 const sequencePrice = literalsPrice + matchPrice;
879
1156
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
880
- pos, ZSTD_fCost(sequencePrice));
1157
+ pos, ZSTD_fCost((int)sequencePrice));
881
1158
  opt[pos].mlen = pos;
882
- opt[pos].off = offset;
1159
+ opt[pos].off = offBase;
883
1160
  opt[pos].litlen = litlen;
884
- opt[pos].price = sequencePrice;
1161
+ opt[pos].price = (int)sequencePrice;
885
1162
  } }
886
1163
  last_pos = pos-1;
887
1164
  }
@@ -896,9 +1173,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
896
1173
  /* Fix current position with one literal if cheaper */
897
1174
  { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
898
1175
  int const price = opt[cur-1].price
899
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
900
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
901
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1176
+ + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1177
+ + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1178
+ - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
902
1179
  assert(price < 1000000000); /* overflow check */
903
1180
  if (price <= opt[cur].price) {
904
1181
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -924,10 +1201,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
924
1201
  assert(cur >= opt[cur].mlen);
925
1202
  if (opt[cur].mlen != 0) {
926
1203
  U32 const prev = cur - opt[cur].mlen;
927
- repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
928
- memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1204
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1205
+ ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
929
1206
  } else {
930
- memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1207
+ ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
931
1208
  }
932
1209
 
933
1210
  /* last match must start at a minimum distance of 8 from oend */
@@ -941,12 +1218,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
941
1218
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
942
1219
  }
943
1220
 
1221
+ assert(opt[cur].price >= 0);
944
1222
  { U32 const ll0 = (opt[cur].mlen != 0);
945
1223
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
946
- U32 const previousPrice = opt[cur].price;
1224
+ U32 const previousPrice = (U32)opt[cur].price;
947
1225
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
948
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1226
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
949
1227
  U32 matchNb;
1228
+
1229
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1230
+ (U32)(inr-istart), (U32)(iend-inr));
1231
+
950
1232
  if (!nbMatches) {
951
1233
  DEBUGLOG(7, "rPos:%u : no match found", cur);
952
1234
  continue;
@@ -974,12 +1256,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
974
1256
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
975
1257
  U32 mlen;
976
1258
 
977
- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1259
+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
978
1260
  matchNb, matches[matchNb].off, lastML, litlen);
979
1261
 
980
1262
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
981
1263
  U32 const pos = cur + mlen;
982
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1264
+ int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
983
1265
 
984
1266
  if ((pos > last_pos) || (price < opt[pos].price)) {
985
1267
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1009,10 +1291,10 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1009
1291
  * update them while traversing the sequences.
1010
1292
  */
1011
1293
  if (lastSequence.mlen != 0) {
1012
- repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1013
- memcpy(rep, &reps, sizeof(reps));
1294
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1295
+ ZSTD_memcpy(rep, &reps, sizeof(reps));
1014
1296
  } else {
1015
- memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1297
+ ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1016
1298
  }
1017
1299
 
1018
1300
  { U32 const storeEnd = cur + 1;
@@ -1040,7 +1322,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1040
1322
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1041
1323
  U32 const llen = opt[storePos].litlen;
1042
1324
  U32 const mlen = opt[storePos].mlen;
1043
- U32 const offCode = opt[storePos].off;
1325
+ U32 const offBase = opt[storePos].off;
1044
1326
  U32 const advance = llen + mlen;
1045
1327
  DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1046
1328
  anchor - istart, (unsigned)llen, (unsigned)mlen);
@@ -1052,8 +1334,8 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1052
1334
  }
1053
1335
 
1054
1336
  assert(anchor + llen <= iend);
1055
- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1056
- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1337
+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1338
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1057
1339
  anchor += advance;
1058
1340
  ip = anchor;
1059
1341
  } }
@@ -1065,43 +1347,35 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1065
1347
  return (size_t)(iend - anchor);
1066
1348
  }
1067
1349
 
1350
+ static size_t ZSTD_compressBlock_opt0(
1351
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1352
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1353
+ {
1354
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1355
+ }
1356
+
1357
+ static size_t ZSTD_compressBlock_opt2(
1358
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1360
+ {
1361
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1362
+ }
1068
1363
 
1069
1364
  size_t ZSTD_compressBlock_btopt(
1070
1365
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1071
1366
  const void* src, size_t srcSize)
1072
1367
  {
1073
1368
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1074
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1369
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1075
1370
  }
1076
1371
 
1077
1372
 
1078
- /* used in 2-pass strategy */
1079
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1080
- {
1081
- U32 s, sum=0;
1082
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1083
- for (s=0; s<lastEltIndex+1; s++) {
1084
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1085
- table[s]--;
1086
- sum += table[s];
1087
- }
1088
- return sum;
1089
- }
1090
1373
 
1091
- /* used in 2-pass strategy */
1092
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1093
- {
1094
- if (ZSTD_compressedLiterals(optPtr))
1095
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1096
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1097
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1098
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1099
- }
1100
1374
 
1101
1375
  /* ZSTD_initStats_ultra():
1102
1376
  * make a first compression pass, just to seed stats with more accurate starting values.
1103
1377
  * only works on first block, with no dictionary and no ldm.
1104
- * this function cannot error, hence its contract must be respected.
1378
+ * this function cannot error out, its narrow contract must be respected.
1105
1379
  */
1106
1380
  static void
1107
1381
  ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@@ -1110,7 +1384,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1110
1384
  const void* src, size_t srcSize)
1111
1385
  {
1112
1386
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1113
- memcpy(tmpRep, rep, sizeof(tmpRep));
1387
+ ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1114
1388
 
1115
1389
  DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1116
1390
  assert(ms->opt.litLengthSum == 0); /* first block */
@@ -1118,17 +1392,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1118
1392
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1119
1393
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1120
1394
 
1121
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1395
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1122
1396
 
1123
- /* invalidate first scan from history */
1397
+ /* invalidate first scan from history, only keep entropy stats */
1124
1398
  ZSTD_resetSeqStore(seqStore);
1125
1399
  ms->window.base -= srcSize;
1126
1400
  ms->window.dictLimit += (U32)srcSize;
1127
1401
  ms->window.lowLimit = ms->window.dictLimit;
1128
1402
  ms->nextToUpdate = ms->window.dictLimit;
1129
1403
 
1130
- /* re-inforce weight of collected statistics */
1131
- ZSTD_upscaleStats(&ms->opt);
1132
1404
  }
1133
1405
 
1134
1406
  size_t ZSTD_compressBlock_btultra(
@@ -1136,63 +1408,63 @@ size_t ZSTD_compressBlock_btultra(
1136
1408
  const void* src, size_t srcSize)
1137
1409
  {
1138
1410
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1139
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1411
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1140
1412
  }
1141
1413
 
1142
1414
  size_t ZSTD_compressBlock_btultra2(
1143
1415
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1144
1416
  const void* src, size_t srcSize)
1145
1417
  {
1146
- U32 const current = (U32)((const BYTE*)src - ms->window.base);
1418
+ U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1147
1419
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1148
1420
 
1149
- /* 2-pass strategy:
1421
+ /* 2-passes strategy:
1150
1422
  * this strategy makes a first pass over first block to collect statistics
1151
- * and seed next round's statistics with it.
1152
- * After 1st pass, function forgets everything, and starts a new block.
1423
+ * in order to seed next round's statistics with it.
1424
+ * After 1st pass, function forgets history, and starts a new block.
1153
1425
  * Consequently, this can only work if no data has been previously loaded in tables,
1154
1426
  * aka, no dictionary, no prefix, no ldm preprocessing.
1155
1427
  * The compression ratio gain is generally small (~0.5% on first block),
1156
- * the cost is 2x cpu time on first block. */
1428
+ ** the cost is 2x cpu time on first block. */
1157
1429
  assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1158
1430
  if ( (ms->opt.litLengthSum==0) /* first block */
1159
1431
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1160
1432
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1161
- && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1162
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
1433
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1434
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1163
1435
  ) {
1164
1436
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1165
1437
  }
1166
1438
 
1167
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1439
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1168
1440
  }
1169
1441
 
1170
1442
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1171
1443
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1172
1444
  const void* src, size_t srcSize)
1173
1445
  {
1174
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1446
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1175
1447
  }
1176
1448
 
1177
1449
  size_t ZSTD_compressBlock_btultra_dictMatchState(
1178
1450
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1179
1451
  const void* src, size_t srcSize)
1180
1452
  {
1181
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1453
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1182
1454
  }
1183
1455
 
1184
1456
  size_t ZSTD_compressBlock_btopt_extDict(
1185
1457
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1186
1458
  const void* src, size_t srcSize)
1187
1459
  {
1188
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1460
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1189
1461
  }
1190
1462
 
1191
1463
  size_t ZSTD_compressBlock_btultra_extDict(
1192
1464
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1193
1465
  const void* src, size_t srcSize)
1194
1466
  {
1195
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1467
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1196
1468
  }
1197
1469
 
1198
1470
  /* note : no btultra2 variant for extDict nor dictMatchState,