zstd-ruby 1.4.4.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,40 +14,47 @@
14
14
 
15
15
 
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
17
  #define ZSTD_MAX_PRICE (1<<30)
19
18
 
20
- #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
19
+ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
21
20
 
22
21
 
23
22
  /*-*************************************
24
23
  * Price functions for optimal parser
25
24
  ***************************************/
26
25
 
27
- #if 0 /* approximation at bit level */
26
+ #if 0 /* approximation at bit level (for tests) */
28
27
  # define BITCOST_ACCURACY 0
29
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
29
+ # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
30
+ #elif 0 /* fractional bit accuracy (for tests) */
32
31
  # define BITCOST_ACCURACY 8
33
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
- # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33
+ # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
35
34
  #else /* opt==approx, ultra==accurate */
36
35
  # define BITCOST_ACCURACY 8
37
36
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
38
- # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37
+ # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
39
38
  #endif
40
39
 
40
+ /* ZSTD_bitWeight() :
41
+ * provide estimated "cost" of a stat in full bits only */
41
42
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
42
43
  {
43
44
  return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
44
45
  }
45
46
 
47
+ /* ZSTD_fracWeight() :
48
+ * provide fractional-bit "cost" of a stat,
49
+ * using linear interpolation approximation */
46
50
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
47
51
  {
48
52
  U32 const stat = rawStat + 1;
49
53
  U32 const hb = ZSTD_highbit32(stat);
50
54
  U32 const BWeight = hb * BITCOST_MULTIPLIER;
55
+ /* Fweight was meant for "Fractional weight"
56
+ * but it's effectively a value between 1 and 2
57
+ * using fixed point arithmetic */
51
58
  U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
52
59
  U32 const weight = BWeight + FWeight;
53
60
  assert(hb + BITCOST_ACCURACY < 31);
@@ -58,7 +65,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
58
65
  /* debugging function,
59
66
  * @return price in bytes as fractional value
60
67
  * for debug messages only */
61
- MEM_STATIC double ZSTD_fCost(U32 price)
68
+ MEM_STATIC double ZSTD_fCost(int price)
62
69
  {
63
70
  return (double)price / (BITCOST_MULTIPLIER*8);
64
71
  }
@@ -66,7 +73,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
73
 
67
74
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
75
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
76
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
77
  }
71
78
 
72
79
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +86,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
86
  }
80
87
 
81
88
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
89
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
90
+ {
91
+ size_t n;
92
+ U32 total = 0;
93
+ for (n=0; n<nbElts; n++) {
94
+ total += table[n];
95
+ }
96
+ return total;
97
+ }
98
+
99
+ typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
100
+
101
+ static U32
102
+ ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
86
103
  {
87
104
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
105
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
106
+ (unsigned)lastEltIndex+1, (unsigned)shift );
107
+ assert(shift < 30);
90
108
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
92
- sum += table[s];
109
+ unsigned const base = base1 ? 1 : (table[s]>0);
110
+ unsigned const newStat = base + (table[s] >> shift);
111
+ sum += newStat;
112
+ table[s] = newStat;
93
113
  }
94
114
  return sum;
95
115
  }
96
116
 
117
+ /* ZSTD_scaleStats() :
118
+ * reduce all elt frequencies in table if sum too large
119
+ * return the resulting sum of elements */
120
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
121
+ {
122
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
123
+ U32 const factor = prevsum >> logTarget;
124
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
125
+ assert(logTarget < 30);
126
+ if (factor <= 1) return prevsum;
127
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
128
+ }
129
+
97
130
  /* ZSTD_rescaleFreqs() :
98
131
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
132
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
133
+ * and init from zero if there is none,
134
+ * using src for literals stats, and baseline stats for sequence symbols
101
135
  * otherwise downscale existing stats, to be used as seed for next block.
102
136
  */
103
137
  static void
@@ -109,24 +143,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
109
143
  DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
110
144
  optPtr->priceType = zop_dynamic;
111
145
 
112
- if (optPtr->litLengthSum == 0) { /* first block : init */
113
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
114
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
146
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
147
+
148
+ /* heuristic: use pre-defined stats for too small inputs */
149
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
150
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
115
151
  optPtr->priceType = zop_predef;
116
152
  }
117
153
 
118
154
  assert(optPtr->symbolCosts != NULL);
119
155
  if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
120
- /* huffman table presumed generated by dictionary */
156
+
157
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
121
158
  optPtr->priceType = zop_dynamic;
122
159
 
123
160
  if (compressedLiterals) {
161
+ /* generate literals statistics from huffman table */
124
162
  unsigned lit;
125
163
  assert(optPtr->litFreq != NULL);
126
164
  optPtr->litSum = 0;
127
165
  for (lit=0; lit<=MaxLit; lit++) {
128
166
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
167
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
168
  assert(bitCost <= scaleLog);
131
169
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
170
  optPtr->litSum += optPtr->litFreq[lit];
@@ -168,20 +206,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
168
206
  optPtr->offCodeSum += optPtr->offCodeFreq[of];
169
207
  } }
170
208
 
171
- } else { /* not a dictionary */
209
+ } else { /* first block, no dictionary */
172
210
 
173
211
  assert(optPtr->litFreq != NULL);
174
212
  if (compressedLiterals) {
213
+ /* base initial cost of literals on direct frequency within src */
175
214
  unsigned lit = MaxLit;
176
215
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
216
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
178
217
  }
179
218
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
219
+ { unsigned const baseLLfreqs[MaxLL+1] = {
220
+ 4, 2, 1, 1, 1, 1, 1, 1,
221
+ 1, 1, 1, 1, 1, 1, 1, 1,
222
+ 1, 1, 1, 1, 1, 1, 1, 1,
223
+ 1, 1, 1, 1, 1, 1, 1, 1,
224
+ 1, 1, 1, 1
225
+ };
226
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
227
+ optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
228
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
229
 
186
230
  { unsigned ml;
187
231
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +233,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
233
  }
190
234
  optPtr->matchLengthSum = MaxML+1;
191
235
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
236
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
237
+ 6, 2, 1, 1, 2, 3, 4, 4,
238
+ 4, 3, 2, 1, 1, 1, 1, 1,
239
+ 1, 1, 1, 1, 1, 1, 1, 1,
240
+ 1, 1, 1, 1, 1, 1, 1, 1
241
+ };
242
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
243
+ optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
244
  }
196
- optPtr->offCodeSum = MaxOff+1;
197
245
 
198
246
  }
199
247
 
200
- } else { /* new block : re-use previous statistics, scaled down */
248
+ } else { /* new block : scale down accumulated statistics */
201
249
 
202
250
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
251
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
252
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
253
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
254
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
255
  }
208
256
 
209
257
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -225,11 +273,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
225
273
  return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
226
274
 
227
275
  /* dynamic statistics */
228
- { U32 price = litLength * optPtr->litSumBasePrice;
276
+ { U32 price = optPtr->litSumBasePrice * litLength;
277
+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
229
278
  U32 u;
279
+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
230
280
  for (u=0; u < litLength; u++) {
231
- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
232
- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
281
+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
282
+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
283
+ price -= litPrice;
233
284
  }
234
285
  return price;
235
286
  }
@@ -239,7 +290,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
239
290
  * cost of literalLength symbol */
240
291
  static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
241
292
  {
242
- if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
293
+ assert(litLength <= ZSTD_BLOCKSIZE_MAX);
294
+ if (optPtr->priceType == zop_predef)
295
+ return WEIGHT(litLength, optLevel);
296
+
297
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
298
+ * because it isn't representable in the zstd format.
299
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
300
+ * In such a case, the block would be all literals.
301
+ */
302
+ if (litLength == ZSTD_BLOCKSIZE_MAX)
303
+ return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
243
304
 
244
305
  /* dynamic statistics */
245
306
  { U32 const llCode = ZSTD_LLcode(litLength);
@@ -249,57 +310,26 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
249
310
  }
250
311
  }
251
312
 
252
- /* ZSTD_litLengthContribution() :
253
- * @return ( cost(litlength) - cost(0) )
254
- * this value can then be added to rawLiteralsCost()
255
- * to provide a cost which is directly comparable to a match ending at same position */
256
- static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
257
- {
258
- if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
259
-
260
- /* dynamic statistics */
261
- { U32 const llCode = ZSTD_LLcode(litLength);
262
- int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
263
- + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
264
- - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
265
- #if 1
266
- return contribution;
267
- #else
268
- return MAX(0, contribution); /* sometimes better, sometimes not ... */
269
- #endif
270
- }
271
- }
272
-
273
- /* ZSTD_literalsContribution() :
274
- * creates a fake cost for the literals part of a sequence
275
- * which can be compared to the ending cost of a match
276
- * should a new match start at this position */
277
- static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
278
- const optState_t* const optPtr,
279
- int optLevel)
280
- {
281
- int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
282
- + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
283
- return contribution;
284
- }
285
-
286
313
  /* ZSTD_getMatchPrice() :
287
- * Provides the cost of the match part (offset + matchLength) of a sequence
314
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
288
315
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
289
- * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
316
+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
317
+ * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
318
+ */
290
319
  FORCE_INLINE_TEMPLATE U32
291
- ZSTD_getMatchPrice(U32 const offset,
320
+ ZSTD_getMatchPrice(U32 const offBase,
292
321
  U32 const matchLength,
293
322
  const optState_t* const optPtr,
294
323
  int const optLevel)
295
324
  {
296
325
  U32 price;
297
- U32 const offCode = ZSTD_highbit32(offset+1);
326
+ U32 const offCode = ZSTD_highbit32(offBase);
298
327
  U32 const mlBase = matchLength - MINMATCH;
299
328
  assert(matchLength >= MINMATCH);
300
329
 
301
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
302
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
330
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
331
+ return WEIGHT(mlBase, optLevel)
332
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
303
333
 
304
334
  /* dynamic statistics */
305
335
  price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -318,10 +348,10 @@ ZSTD_getMatchPrice(U32 const offset,
318
348
  }
319
349
 
320
350
  /* ZSTD_updateStats() :
321
- * assumption : literals + litLengtn <= iend */
351
+ * assumption : literals + litLength <= iend */
322
352
  static void ZSTD_updateStats(optState_t* const optPtr,
323
353
  U32 litLength, const BYTE* literals,
324
- U32 offsetCode, U32 matchLength)
354
+ U32 offBase, U32 matchLength)
325
355
  {
326
356
  /* literals */
327
357
  if (ZSTD_compressedLiterals(optPtr)) {
@@ -337,8 +367,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
337
367
  optPtr->litLengthSum++;
338
368
  }
339
369
 
340
- /* match offset code (0-2=>repCode; 3+=>offset+2) */
341
- { U32 const offCode = ZSTD_highbit32(offsetCode+1);
370
+ /* offset code : follows storeSeq() numeric representation */
371
+ { U32 const offCode = ZSTD_highbit32(offBase);
342
372
  assert(offCode <= MaxOff);
343
373
  optPtr->offCodeFreq[offCode]++;
344
374
  optPtr->offCodeSum++;
@@ -372,7 +402,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
372
402
 
373
403
  /* Update hashTable3 up to ip (excluded)
374
404
  Assumption : always within prefix (i.e. not within extDict) */
375
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
405
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
376
406
  U32* nextToUpdate3,
377
407
  const BYTE* const ip)
378
408
  {
@@ -398,11 +428,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
398
428
  * Binary Tree search
399
429
  ***************************************/
400
430
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
401
- * ip : assumed <= iend-8 .
431
+ * @param ip assumed <= iend-8 .
432
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
402
433
  * @return : nb of positions added */
403
434
  static U32 ZSTD_insertBt1(
404
- ZSTD_matchState_t* ms,
435
+ const ZSTD_matchState_t* ms,
405
436
  const BYTE* const ip, const BYTE* const iend,
437
+ U32 const target,
406
438
  U32 const mls, const int extDict)
407
439
  {
408
440
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -420,32 +452,36 @@ static U32 ZSTD_insertBt1(
420
452
  const BYTE* const dictEnd = dictBase + dictLimit;
421
453
  const BYTE* const prefixStart = base + dictLimit;
422
454
  const BYTE* match;
423
- const U32 current = (U32)(ip-base);
424
- const U32 btLow = btMask >= current ? 0 : current - btMask;
425
- U32* smallerPtr = bt + 2*(current&btMask);
455
+ const U32 curr = (U32)(ip-base);
456
+ const U32 btLow = btMask >= curr ? 0 : curr - btMask;
457
+ U32* smallerPtr = bt + 2*(curr&btMask);
426
458
  U32* largerPtr = smallerPtr + 1;
427
459
  U32 dummy32; /* to be nullified at the end */
428
- U32 const windowLow = ms->window.lowLimit;
429
- U32 matchEndIdx = current+8+1;
460
+ /* windowLow is based on target because
461
+ * we only need positions that will be in the window at the end of the tree update.
462
+ */
463
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
464
+ U32 matchEndIdx = curr+8+1;
430
465
  size_t bestLength = 8;
431
466
  U32 nbCompares = 1U << cParams->searchLog;
432
467
  #ifdef ZSTD_C_PREDICT
433
- U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
434
- U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
468
+ U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
469
+ U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
435
470
  predictedSmall += (predictedSmall>0);
436
471
  predictedLarge += (predictedLarge>0);
437
472
  #endif /* ZSTD_C_PREDICT */
438
473
 
439
- DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
474
+ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
440
475
 
476
+ assert(curr <= target);
441
477
  assert(ip <= iend-8); /* required for h calculation */
442
- hashTable[h] = current; /* Update Hash Table */
478
+ hashTable[h] = curr; /* Update Hash Table */
443
479
 
444
480
  assert(windowLow > 0);
445
- while (nbCompares-- && (matchIndex >= windowLow)) {
481
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
446
482
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
447
483
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
448
- assert(matchIndex < current);
484
+ assert(matchIndex < curr);
449
485
 
450
486
  #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
451
487
  const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
@@ -508,8 +544,8 @@ static U32 ZSTD_insertBt1(
508
544
  *smallerPtr = *largerPtr = 0;
509
545
  { U32 positions = 0;
510
546
  if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
511
- assert(matchEndIdx > current + 8);
512
- return MAX(positions, matchEndIdx - (current + 8));
547
+ assert(matchEndIdx > curr + 8);
548
+ return MAX(positions, matchEndIdx - (curr + 8));
513
549
  }
514
550
  }
515
551
 
@@ -526,7 +562,7 @@ void ZSTD_updateTree_internal(
526
562
  idx, target, dictMode);
527
563
 
528
564
  while(idx < target) {
529
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
565
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
530
566
  assert(idx < (U32)(idx + forward));
531
567
  idx += forward;
532
568
  }
@@ -539,21 +575,22 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
539
575
  ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
540
576
  }
541
577
 
542
- FORCE_INLINE_TEMPLATE
543
- U32 ZSTD_insertBtAndGetAllMatches (
544
- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
545
- ZSTD_matchState_t* ms,
546
- U32* nextToUpdate3,
547
- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
548
- const U32 rep[ZSTD_REP_NUM],
549
- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
550
- const U32 lengthToBeat,
551
- U32 const mls /* template */)
578
+ FORCE_INLINE_TEMPLATE U32
579
+ ZSTD_insertBtAndGetAllMatches (
580
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
581
+ ZSTD_matchState_t* ms,
582
+ U32* nextToUpdate3,
583
+ const BYTE* const ip, const BYTE* const iLimit,
584
+ const ZSTD_dictMode_e dictMode,
585
+ const U32 rep[ZSTD_REP_NUM],
586
+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
587
+ const U32 lengthToBeat,
588
+ const U32 mls /* template */)
552
589
  {
553
590
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
554
591
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555
592
  const BYTE* const base = ms->window.base;
556
- U32 const current = (U32)(ip-base);
593
+ U32 const curr = (U32)(ip-base);
557
594
  U32 const hashLog = cParams->hashLog;
558
595
  U32 const minMatch = (mls==3) ? 3 : 4;
559
596
  U32* const hashTable = ms->hashTable;
@@ -567,12 +604,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
567
604
  U32 const dictLimit = ms->window.dictLimit;
568
605
  const BYTE* const dictEnd = dictBase + dictLimit;
569
606
  const BYTE* const prefixStart = base + dictLimit;
570
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
571
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
607
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
608
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
572
609
  U32 const matchLow = windowLow ? windowLow : 1;
573
- U32* smallerPtr = bt + 2*(current&btMask);
574
- U32* largerPtr = bt + 2*(current&btMask) + 1;
575
- U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
610
+ U32* smallerPtr = bt + 2*(curr&btMask);
611
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
612
+ U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
576
613
  U32 dummy32; /* to be nullified at the end */
577
614
  U32 mnum = 0;
578
615
  U32 nbCompares = 1U << cParams->searchLog;
@@ -591,7 +628,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
591
628
  U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
592
629
 
593
630
  size_t bestLength = lengthToBeat-1;
594
- DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
631
+ DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
595
632
 
596
633
  /* check repCode */
597
634
  assert(ll0 <= 1); /* necessarily 1 or 0 */
@@ -599,26 +636,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
599
636
  U32 repCode;
600
637
  for (repCode = ll0; repCode < lastR; repCode++) {
601
638
  U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
602
- U32 const repIndex = current - repOffset;
639
+ U32 const repIndex = curr - repOffset;
603
640
  U32 repLen = 0;
604
- assert(current >= dictLimit);
605
- if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
606
- if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
641
+ assert(curr >= dictLimit);
642
+ if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
643
+ /* We must validate the repcode offset because when we're using a dictionary the
644
+ * valid offset range shrinks when the dictionary goes out of bounds.
645
+ */
646
+ if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
607
647
  repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
608
648
  }
609
- } else { /* repIndex < dictLimit || repIndex >= current */
649
+ } else { /* repIndex < dictLimit || repIndex >= curr */
610
650
  const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
611
651
  dmsBase + repIndex - dmsIndexDelta :
612
652
  dictBase + repIndex;
613
- assert(current >= windowLow);
653
+ assert(curr >= windowLow);
614
654
  if ( dictMode == ZSTD_extDict
615
- && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
655
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
616
656
  & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
617
657
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
618
658
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
619
659
  }
620
660
  if (dictMode == ZSTD_dictMatchState
621
- && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
661
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
622
662
  & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
623
663
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
624
664
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
@@ -628,7 +668,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
628
668
  DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
629
669
  repCode, ll0, repOffset, repLen);
630
670
  bestLength = repLen;
631
- matches[mnum].off = repCode - ll0;
671
+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
632
672
  matches[mnum].len = (U32)repLen;
633
673
  mnum++;
634
674
  if ( (repLen > sufficient_len)
@@ -640,7 +680,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
640
680
  if ((mls == 3) /*static*/ && (bestLength < mls)) {
641
681
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
642
682
  if ((matchIndex3 >= matchLow)
643
- & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
683
+ & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
644
684
  size_t mlen;
645
685
  if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
646
686
  const BYTE* const match = base + matchIndex3;
@@ -655,26 +695,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
655
695
  DEBUGLOG(8, "found small match with hlog3, of length %u",
656
696
  (U32)mlen);
657
697
  bestLength = mlen;
658
- assert(current > matchIndex3);
698
+ assert(curr > matchIndex3);
659
699
  assert(mnum==0); /* no prior solution */
660
- matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
700
+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
661
701
  matches[0].len = (U32)mlen;
662
702
  mnum = 1;
663
703
  if ( (mlen > sufficient_len) |
664
704
  (ip+mlen == iLimit) ) { /* best possible length */
665
- ms->nextToUpdate = current+1; /* skip insertion */
705
+ ms->nextToUpdate = curr+1; /* skip insertion */
666
706
  return 1;
667
707
  } } }
668
708
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
669
- }
709
+ } /* if (mls == 3) */
670
710
 
671
- hashTable[h] = current; /* Update Hash Table */
711
+ hashTable[h] = curr; /* Update Hash Table */
672
712
 
673
- while (nbCompares-- && (matchIndex >= matchLow)) {
713
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
674
714
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
675
715
  const BYTE* match;
676
716
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
677
- assert(current > matchIndex);
717
+ assert(curr > matchIndex);
678
718
 
679
719
  if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
680
720
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
@@ -690,21 +730,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
690
730
  }
691
731
 
692
732
  if (matchLength > bestLength) {
693
- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
694
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
733
+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
734
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
695
735
  assert(matchEndIdx > matchIndex);
696
736
  if (matchLength > matchEndIdx - matchIndex)
697
737
  matchEndIdx = matchIndex + (U32)matchLength;
698
738
  bestLength = matchLength;
699
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
739
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
700
740
  matches[mnum].len = (U32)matchLength;
701
741
  mnum++;
702
742
  if ( (matchLength > ZSTD_OPT_NUM)
703
743
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
704
744
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
705
745
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
706
- }
707
- }
746
+ } }
708
747
 
709
748
  if (match[matchLength] < ip[matchLength]) {
710
749
  /* match smaller than current */
@@ -723,12 +762,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
723
762
 
724
763
  *smallerPtr = *largerPtr = 0;
725
764
 
765
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
726
766
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
727
767
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
728
768
  U32 dictMatchIndex = dms->hashTable[dmsH];
729
769
  const U32* const dmsBt = dms->chainTable;
730
770
  commonLengthSmaller = commonLengthLarger = 0;
731
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
771
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
732
772
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
733
773
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
734
774
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -738,19 +778,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
738
778
 
739
779
  if (matchLength > bestLength) {
740
780
  matchIndex = dictMatchIndex + dmsIndexDelta;
741
- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
742
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
781
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
782
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
743
783
  if (matchLength > matchEndIdx - matchIndex)
744
784
  matchEndIdx = matchIndex + (U32)matchLength;
745
785
  bestLength = matchLength;
746
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
786
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
747
787
  matches[mnum].len = (U32)matchLength;
748
788
  mnum++;
749
789
  if ( (matchLength > ZSTD_OPT_NUM)
750
790
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
751
791
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
752
- }
753
- }
792
+ } }
754
793
 
755
794
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
756
795
  if (match[matchLength] < ip[matchLength]) {
@@ -760,71 +799,242 @@ U32 ZSTD_insertBtAndGetAllMatches (
760
799
  /* match is larger than current */
761
800
  commonLengthLarger = matchLength;
762
801
  dictMatchIndex = nextPtr[0];
763
- }
764
- }
765
- }
802
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
766
803
 
767
- assert(matchEndIdx > current+8);
804
+ assert(matchEndIdx > curr+8);
768
805
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
769
806
  return mnum;
770
807
  }
771
808
 
809
+ typedef U32 (*ZSTD_getAllMatchesFn)(
810
+ ZSTD_match_t*,
811
+ ZSTD_matchState_t*,
812
+ U32*,
813
+ const BYTE*,
814
+ const BYTE*,
815
+ const U32 rep[ZSTD_REP_NUM],
816
+ U32 const ll0,
817
+ U32 const lengthToBeat);
818
+
819
+ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
820
+ ZSTD_match_t* matches,
821
+ ZSTD_matchState_t* ms,
822
+ U32* nextToUpdate3,
823
+ const BYTE* ip,
824
+ const BYTE* const iHighLimit,
825
+ const U32 rep[ZSTD_REP_NUM],
826
+ U32 const ll0,
827
+ U32 const lengthToBeat,
828
+ const ZSTD_dictMode_e dictMode,
829
+ const U32 mls)
830
+ {
831
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
832
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
833
+ if (ip < ms->window.base + ms->nextToUpdate)
834
+ return 0; /* skipped area */
835
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
836
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
837
+ }
838
+
839
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
840
+
841
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
842
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
843
+ ZSTD_match_t* matches, \
844
+ ZSTD_matchState_t* ms, \
845
+ U32* nextToUpdate3, \
846
+ const BYTE* ip, \
847
+ const BYTE* const iHighLimit, \
848
+ const U32 rep[ZSTD_REP_NUM], \
849
+ U32 const ll0, \
850
+ U32 const lengthToBeat) \
851
+ { \
852
+ return ZSTD_btGetAllMatches_internal( \
853
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
854
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
855
+ }
772
856
 
773
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
774
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
775
- ZSTD_matchState_t* ms,
776
- U32* nextToUpdate3,
777
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
778
- const U32 rep[ZSTD_REP_NUM],
779
- U32 const ll0,
780
- U32 const lengthToBeat)
857
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
858
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
859
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
860
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
861
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
862
+
863
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
864
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
865
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
866
+
867
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
868
+ { \
869
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
870
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
871
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
872
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
873
+ }
874
+
875
+ static ZSTD_getAllMatchesFn
876
+ ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
781
877
  {
782
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
783
- U32 const matchLengthSearch = cParams->minMatch;
784
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
785
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
786
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
787
- switch(matchLengthSearch)
788
- {
789
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
790
- default :
791
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
792
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
793
- case 7 :
794
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
878
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
879
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
880
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
881
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
882
+ };
883
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
884
+ assert((U32)dictMode < 3);
885
+ assert(mls - 3 < 4);
886
+ return getAllMatchesFns[(int)dictMode][mls - 3];
887
+ }
888
+
889
+ /*************************
890
+ * LDM helper functions *
891
+ *************************/
892
+
893
+ /* Struct containing info needed to make decision about ldm inclusion */
894
+ typedef struct {
895
+ rawSeqStore_t seqStore; /* External match candidates store for this block */
896
+ U32 startPosInBlock; /* Start position of the current match candidate */
897
+ U32 endPosInBlock; /* End position of the current match candidate */
898
+ U32 offset; /* Offset of the match candidate */
899
+ } ZSTD_optLdm_t;
900
+
901
+ /* ZSTD_optLdm_skipRawSeqStoreBytes():
902
+ * Moves forward in @rawSeqStore by @nbBytes,
903
+ * which will update the fields 'pos' and 'posInSequence'.
904
+ */
905
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
906
+ {
907
+ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
908
+ while (currPos && rawSeqStore->pos < rawSeqStore->size) {
909
+ rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
910
+ if (currPos >= currSeq.litLength + currSeq.matchLength) {
911
+ currPos -= currSeq.litLength + currSeq.matchLength;
912
+ rawSeqStore->pos++;
913
+ } else {
914
+ rawSeqStore->posInSequence = currPos;
915
+ break;
916
+ }
917
+ }
918
+ if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
919
+ rawSeqStore->posInSequence = 0;
795
920
  }
796
921
  }
797
922
 
923
+ /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
924
+ * Calculates the beginning and end of the next match in the current block.
925
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
926
+ */
927
+ static void
928
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
929
+ U32 blockBytesRemaining)
930
+ {
931
+ rawSeq currSeq;
932
+ U32 currBlockEndPos;
933
+ U32 literalsBytesRemaining;
934
+ U32 matchBytesRemaining;
935
+
936
+ /* Setting match end position to MAX to ensure we never use an LDM during this block */
937
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
938
+ optLdm->startPosInBlock = UINT_MAX;
939
+ optLdm->endPosInBlock = UINT_MAX;
940
+ return;
941
+ }
942
+ /* Calculate appropriate bytes left in matchLength and litLength
943
+ * after adjusting based on ldmSeqStore->posInSequence */
944
+ currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
945
+ assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
946
+ currBlockEndPos = currPosInBlock + blockBytesRemaining;
947
+ literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
948
+ currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
949
+ 0;
950
+ matchBytesRemaining = (literalsBytesRemaining == 0) ?
951
+ currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
952
+ currSeq.matchLength;
953
+
954
+ /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
955
+ if (literalsBytesRemaining >= blockBytesRemaining) {
956
+ optLdm->startPosInBlock = UINT_MAX;
957
+ optLdm->endPosInBlock = UINT_MAX;
958
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
959
+ return;
960
+ }
798
961
 
799
- /*-*******************************
800
- * Optimal parser
801
- *********************************/
802
- typedef struct repcodes_s {
803
- U32 rep[3];
804
- } repcodes_t;
962
+ /* Matches may be < MINMATCH by this process. In that case, we will reject them
963
+ when we are deciding whether or not to add the ldm */
964
+ optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
965
+ optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
966
+ optLdm->offset = currSeq.offset;
967
+
968
+ if (optLdm->endPosInBlock > currBlockEndPos) {
969
+ /* Match ends after the block ends, we can't use the whole match */
970
+ optLdm->endPosInBlock = currBlockEndPos;
971
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
972
+ } else {
973
+ /* Consume nb of bytes equal to size of sequence left */
974
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
975
+ }
976
+ }
977
+
978
+ /* ZSTD_optLdm_maybeAddMatch():
979
+ * Adds a match if it's long enough,
980
+ * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
981
+ * into 'matches'. Maintains the correct ordering of 'matches'.
982
+ */
983
+ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
984
+ const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
985
+ {
986
+ U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
987
+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
988
+ U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
989
+
990
+ /* Ensure that current block position is not outside of the match */
991
+ if (currPosInBlock < optLdm->startPosInBlock
992
+ || currPosInBlock >= optLdm->endPosInBlock
993
+ || candidateMatchLength < MINMATCH) {
994
+ return;
995
+ }
996
+
997
+ if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
998
+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
999
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1000
+ candidateOffBase, candidateMatchLength, currPosInBlock);
1001
+ matches[*nbMatches].len = candidateMatchLength;
1002
+ matches[*nbMatches].off = candidateOffBase;
1003
+ (*nbMatches)++;
1004
+ }
1005
+ }
805
1006
 
806
- static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
1007
+ /* ZSTD_optLdm_processMatchCandidate():
1008
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
1009
+ */
1010
+ static void
1011
+ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1012
+ ZSTD_match_t* matches, U32* nbMatches,
1013
+ U32 currPosInBlock, U32 remainingBytes)
807
1014
  {
808
- repcodes_t newReps;
809
- if (offset >= ZSTD_REP_NUM) { /* full offset */
810
- newReps.rep[2] = rep[1];
811
- newReps.rep[1] = rep[0];
812
- newReps.rep[0] = offset - ZSTD_REP_MOVE;
813
- } else { /* repcode */
814
- U32 const repCode = offset + ll0;
815
- if (repCode > 0) { /* note : if repCode==0, no change */
816
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
817
- newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
818
- newReps.rep[1] = rep[0];
819
- newReps.rep[0] = currentOffset;
820
- } else { /* repCode == 0 */
821
- memcpy(&newReps, rep, sizeof(newReps));
1015
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
1016
+ return;
1017
+ }
1018
+
1019
+ if (currPosInBlock >= optLdm->endPosInBlock) {
1020
+ if (currPosInBlock > optLdm->endPosInBlock) {
1021
+ /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
1022
+ * at the end of a match from the ldm seq store, and will often be some bytes
1023
+ * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1024
+ */
1025
+ U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1026
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
822
1027
  }
1028
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
823
1029
  }
824
- return newReps;
1030
+ ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
825
1031
  }
826
1032
 
827
1033
 
1034
+ /*-*******************************
1035
+ * Optimal parser
1036
+ *********************************/
1037
+
828
1038
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
829
1039
  {
830
1040
  return sol.litlen + sol.mlen;
@@ -839,7 +1049,7 @@ listStats(const U32* table, int lastEltID)
839
1049
  int enb;
840
1050
  for (enb=0; enb < nbElts; enb++) {
841
1051
  (void)table;
842
- //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
1052
+ /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
843
1053
  RAWLOG(2, "%4i,", table[enb]);
844
1054
  }
845
1055
  RAWLOG(2, " \n");
@@ -865,6 +1075,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
865
1075
  const BYTE* const prefixStart = base + ms->window.dictLimit;
866
1076
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
867
1077
 
1078
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1079
+
868
1080
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
869
1081
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
870
1082
  U32 nextToUpdate3 = ms->nextToUpdate;
@@ -872,6 +1084,13 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
872
1084
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
873
1085
  ZSTD_match_t* const matches = optStatePtr->matchTable;
874
1086
  ZSTD_optimal_t lastSequence;
1087
+ ZSTD_optLdm_t optLdm;
1088
+
1089
+ ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1090
+
1091
+ optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1092
+ optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1093
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
875
1094
 
876
1095
  /* init */
877
1096
  DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@@ -887,25 +1106,32 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
887
1106
  /* find first match */
888
1107
  { U32 const litlen = (U32)(ip - anchor);
889
1108
  U32 const ll0 = !litlen;
890
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1109
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1110
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1111
+ (U32)(ip-istart), (U32)(iend - ip));
891
1112
  if (!nbMatches) { ip++; continue; }
892
1113
 
893
1114
  /* initialize opt[0] */
894
1115
  { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
895
1116
  opt[0].mlen = 0; /* means is_a_literal */
896
1117
  opt[0].litlen = litlen;
897
- opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
1118
+ /* We don't need to include the actual price of the literals because
1119
+ * it is static for the duration of the forward pass, and is included
1120
+ * in every price. We include the literal length to avoid negative
1121
+ * prices when we subtract the previous literal length.
1122
+ */
1123
+ opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
898
1124
 
899
1125
  /* large match -> immediate encoding */
900
1126
  { U32 const maxML = matches[nbMatches-1].len;
901
- U32 const maxOffset = matches[nbMatches-1].off;
902
- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
903
- nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
1127
+ U32 const maxOffBase = matches[nbMatches-1].off;
1128
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1129
+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
904
1130
 
905
1131
  if (maxML > sufficient_len) {
906
1132
  lastSequence.litlen = litlen;
907
1133
  lastSequence.mlen = maxML;
908
- lastSequence.off = maxOffset;
1134
+ lastSequence.off = maxOffBase;
909
1135
  DEBUGLOG(6, "large match (%u>%u), immediate encoding",
910
1136
  maxML, sufficient_len);
911
1137
  cur = 0;
@@ -914,27 +1140,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
914
1140
  } }
915
1141
 
916
1142
  /* set prices for first matches starting position == 0 */
917
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1143
+ assert(opt[0].price >= 0);
1144
+ { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
918
1145
  U32 pos;
919
1146
  U32 matchNb;
920
1147
  for (pos = 1; pos < minMatch; pos++) {
921
1148
  opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
922
1149
  }
923
1150
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
924
- U32 const offset = matches[matchNb].off;
1151
+ U32 const offBase = matches[matchNb].off;
925
1152
  U32 const end = matches[matchNb].len;
926
- repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
927
1153
  for ( ; pos <= end ; pos++ ) {
928
- U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
1154
+ U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
929
1155
  U32 const sequencePrice = literalsPrice + matchPrice;
930
1156
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
931
- pos, ZSTD_fCost(sequencePrice));
1157
+ pos, ZSTD_fCost((int)sequencePrice));
932
1158
  opt[pos].mlen = pos;
933
- opt[pos].off = offset;
1159
+ opt[pos].off = offBase;
934
1160
  opt[pos].litlen = litlen;
935
- opt[pos].price = sequencePrice;
936
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
937
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
1161
+ opt[pos].price = (int)sequencePrice;
938
1162
  } }
939
1163
  last_pos = pos-1;
940
1164
  }
@@ -949,9 +1173,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
949
1173
  /* Fix current position with one literal if cheaper */
950
1174
  { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
951
1175
  int const price = opt[cur-1].price
952
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
953
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
954
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1176
+ + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1177
+ + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1178
+ - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
955
1179
  assert(price < 1000000000); /* overflow check */
956
1180
  if (price <= opt[cur].price) {
957
1181
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -961,7 +1185,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
961
1185
  opt[cur].off = 0;
962
1186
  opt[cur].litlen = litlen;
963
1187
  opt[cur].price = price;
964
- memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
965
1188
  } else {
966
1189
  DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
967
1190
  inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
@@ -969,6 +1192,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
969
1192
  }
970
1193
  }
971
1194
 
1195
+ /* Set the repcodes of the current position. We must do it here
1196
+ * because we rely on the repcodes of the 2nd to last sequence being
1197
+ * correct to set the next chunks repcodes during the backward
1198
+ * traversal.
1199
+ */
1200
+ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1201
+ assert(cur >= opt[cur].mlen);
1202
+ if (opt[cur].mlen != 0) {
1203
+ U32 const prev = cur - opt[cur].mlen;
1204
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1205
+ ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1206
+ } else {
1207
+ ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1208
+ }
1209
+
972
1210
  /* last match must start at a minimum distance of 8 from oend */
973
1211
  if (inr > ilimit) continue;
974
1212
 
@@ -980,12 +1218,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
980
1218
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
981
1219
  }
982
1220
 
1221
+ assert(opt[cur].price >= 0);
983
1222
  { U32 const ll0 = (opt[cur].mlen != 0);
984
1223
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
985
- U32 const previousPrice = opt[cur].price;
1224
+ U32 const previousPrice = (U32)opt[cur].price;
986
1225
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
987
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1226
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
988
1227
  U32 matchNb;
1228
+
1229
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1230
+ (U32)(inr-istart), (U32)(iend-inr));
1231
+
989
1232
  if (!nbMatches) {
990
1233
  DEBUGLOG(7, "rPos:%u : no match found", cur);
991
1234
  continue;
@@ -1009,17 +1252,16 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1009
1252
  /* set prices using matches found at position == cur */
1010
1253
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1011
1254
  U32 const offset = matches[matchNb].off;
1012
- repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
1013
1255
  U32 const lastML = matches[matchNb].len;
1014
1256
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1015
1257
  U32 mlen;
1016
1258
 
1017
- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1259
+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1018
1260
  matchNb, matches[matchNb].off, lastML, litlen);
1019
1261
 
1020
1262
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1021
1263
  U32 const pos = cur + mlen;
1022
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1264
+ int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1023
1265
 
1024
1266
  if ((pos > last_pos) || (price < opt[pos].price)) {
1025
1267
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1029,8 +1271,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1029
1271
  opt[pos].off = offset;
1030
1272
  opt[pos].litlen = litlen;
1031
1273
  opt[pos].price = price;
1032
- ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
1033
- memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
1034
1274
  } else {
1035
1275
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1036
1276
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
@@ -1046,6 +1286,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1046
1286
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1047
1287
  assert(opt[0].mlen == 0);
1048
1288
 
1289
+ /* Set the next chunk's repcodes based on the repcodes of the beginning
1290
+ * of the last match, and the last sequence. This avoids us having to
1291
+ * update them while traversing the sequences.
1292
+ */
1293
+ if (lastSequence.mlen != 0) {
1294
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1295
+ ZSTD_memcpy(rep, &reps, sizeof(reps));
1296
+ } else {
1297
+ ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1298
+ }
1299
+
1049
1300
  { U32 const storeEnd = cur + 1;
1050
1301
  U32 storeStart = storeEnd;
1051
1302
  U32 seqPos = cur;
@@ -1071,7 +1322,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1071
1322
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1072
1323
  U32 const llen = opt[storePos].litlen;
1073
1324
  U32 const mlen = opt[storePos].mlen;
1074
- U32 const offCode = opt[storePos].off;
1325
+ U32 const offBase = opt[storePos].off;
1075
1326
  U32 const advance = llen + mlen;
1076
1327
  DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1077
1328
  anchor - istart, (unsigned)llen, (unsigned)mlen);
@@ -1082,72 +1333,49 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1082
1333
  continue; /* will finish */
1083
1334
  }
1084
1335
 
1085
- /* repcodes update : like ZSTD_updateRep(), but update in place */
1086
- if (offCode >= ZSTD_REP_NUM) { /* full offset */
1087
- rep[2] = rep[1];
1088
- rep[1] = rep[0];
1089
- rep[0] = offCode - ZSTD_REP_MOVE;
1090
- } else { /* repcode */
1091
- U32 const repCode = offCode + (llen==0);
1092
- if (repCode) { /* note : if repCode==0, no change */
1093
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
1094
- if (repCode >= 2) rep[2] = rep[1];
1095
- rep[1] = rep[0];
1096
- rep[0] = currentOffset;
1097
- } }
1098
-
1099
1336
  assert(anchor + llen <= iend);
1100
- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1101
- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1337
+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1338
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1102
1339
  anchor += advance;
1103
1340
  ip = anchor;
1104
1341
  } }
1105
1342
  ZSTD_setBasePrices(optStatePtr, optLevel);
1106
1343
  }
1107
-
1108
1344
  } /* while (ip < ilimit) */
1109
1345
 
1110
1346
  /* Return the last literals size */
1111
1347
  return (size_t)(iend - anchor);
1112
1348
  }
1113
1349
 
1350
+ static size_t ZSTD_compressBlock_opt0(
1351
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1352
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1353
+ {
1354
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1355
+ }
1356
+
1357
+ static size_t ZSTD_compressBlock_opt2(
1358
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1360
+ {
1361
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1362
+ }
1114
1363
 
1115
1364
  size_t ZSTD_compressBlock_btopt(
1116
1365
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1117
1366
  const void* src, size_t srcSize)
1118
1367
  {
1119
1368
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1120
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1369
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1121
1370
  }
1122
1371
 
1123
1372
 
1124
- /* used in 2-pass strategy */
1125
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1126
- {
1127
- U32 s, sum=0;
1128
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1129
- for (s=0; s<lastEltIndex+1; s++) {
1130
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1131
- table[s]--;
1132
- sum += table[s];
1133
- }
1134
- return sum;
1135
- }
1136
1373
 
1137
- /* used in 2-pass strategy */
1138
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1139
- {
1140
- if (ZSTD_compressedLiterals(optPtr))
1141
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1142
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1143
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1144
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1145
- }
1146
1374
 
1147
1375
  /* ZSTD_initStats_ultra():
1148
1376
  * make a first compression pass, just to seed stats with more accurate starting values.
1149
1377
  * only works on first block, with no dictionary and no ldm.
1150
- * this function cannot error, hence its contract must be respected.
1378
+ * this function cannot error out, its narrow contract must be respected.
1151
1379
  */
1152
1380
  static void
1153
1381
  ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@@ -1156,7 +1384,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1156
1384
  const void* src, size_t srcSize)
1157
1385
  {
1158
1386
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1159
- memcpy(tmpRep, rep, sizeof(tmpRep));
1387
+ ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1160
1388
 
1161
1389
  DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1162
1390
  assert(ms->opt.litLengthSum == 0); /* first block */
@@ -1164,17 +1392,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1164
1392
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1165
1393
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1166
1394
 
1167
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1395
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1168
1396
 
1169
- /* invalidate first scan from history */
1397
+ /* invalidate first scan from history, only keep entropy stats */
1170
1398
  ZSTD_resetSeqStore(seqStore);
1171
1399
  ms->window.base -= srcSize;
1172
1400
  ms->window.dictLimit += (U32)srcSize;
1173
1401
  ms->window.lowLimit = ms->window.dictLimit;
1174
1402
  ms->nextToUpdate = ms->window.dictLimit;
1175
1403
 
1176
- /* re-inforce weight of collected statistics */
1177
- ZSTD_upscaleStats(&ms->opt);
1178
1404
  }
1179
1405
 
1180
1406
  size_t ZSTD_compressBlock_btultra(
@@ -1182,63 +1408,63 @@ size_t ZSTD_compressBlock_btultra(
1182
1408
  const void* src, size_t srcSize)
1183
1409
  {
1184
1410
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1185
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1411
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1186
1412
  }
1187
1413
 
1188
1414
  size_t ZSTD_compressBlock_btultra2(
1189
1415
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1190
1416
  const void* src, size_t srcSize)
1191
1417
  {
1192
- U32 const current = (U32)((const BYTE*)src - ms->window.base);
1418
+ U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1193
1419
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1194
1420
 
1195
- /* 2-pass strategy:
1421
+ /* 2-passes strategy:
1196
1422
  * this strategy makes a first pass over first block to collect statistics
1197
- * and seed next round's statistics with it.
1198
- * After 1st pass, function forgets everything, and starts a new block.
1423
+ * in order to seed next round's statistics with it.
1424
+ * After 1st pass, function forgets history, and starts a new block.
1199
1425
  * Consequently, this can only work if no data has been previously loaded in tables,
1200
1426
  * aka, no dictionary, no prefix, no ldm preprocessing.
1201
1427
  * The compression ratio gain is generally small (~0.5% on first block),
1202
- * the cost is 2x cpu time on first block. */
1428
+ ** the cost is 2x cpu time on first block. */
1203
1429
  assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1204
1430
  if ( (ms->opt.litLengthSum==0) /* first block */
1205
1431
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1206
1432
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1207
- && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1208
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
1433
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1434
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1209
1435
  ) {
1210
1436
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1211
1437
  }
1212
1438
 
1213
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1439
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1214
1440
  }
1215
1441
 
1216
1442
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1217
1443
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1218
1444
  const void* src, size_t srcSize)
1219
1445
  {
1220
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1446
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1221
1447
  }
1222
1448
 
1223
1449
  size_t ZSTD_compressBlock_btultra_dictMatchState(
1224
1450
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1225
1451
  const void* src, size_t srcSize)
1226
1452
  {
1227
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1453
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1228
1454
  }
1229
1455
 
1230
1456
  size_t ZSTD_compressBlock_btopt_extDict(
1231
1457
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1232
1458
  const void* src, size_t srcSize)
1233
1459
  {
1234
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1460
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1235
1461
  }
1236
1462
 
1237
1463
  size_t ZSTD_compressBlock_btultra_extDict(
1238
1464
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1239
1465
  const void* src, size_t srcSize)
1240
1466
  {
1241
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1467
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1242
1468
  }
1243
1469
 
1244
1470
  /* note : no btultra2 variant for extDict nor dictMatchState,