zstd-ruby 1.3.2.0 → 1.3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +31 -10
  4. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  5. data/ext/zstdruby/libzstd/common/mem.h +15 -13
  6. data/ext/zstdruby/libzstd/common/pool.c +1 -2
  7. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -4
  8. data/ext/zstdruby/libzstd/common/zstd_internal.h +52 -170
  9. data/ext/zstdruby/libzstd/compress/zstd_compress.c +434 -337
  10. data/ext/zstdruby/libzstd/compress/{zstd_compress.h → zstd_compress_internal.h} +191 -36
  11. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +1 -0
  12. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  13. data/ext/zstdruby/libzstd/compress/zstd_fast.c +1 -0
  14. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  15. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +66 -50
  16. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +3 -2
  17. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +3 -2
  18. data/ext/zstdruby/libzstd/compress/zstd_opt.c +504 -676
  19. data/ext/zstdruby/libzstd/compress/zstd_opt.h +2 -2
  20. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +130 -80
  21. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +15 -7
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +41 -31
  23. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -0
  24. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
  25. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  26. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +1 -74
  27. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +1 -74
  28. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -72
  29. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -73
  30. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -77
  31. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -77
  32. data/ext/zstdruby/libzstd/zstd.h +43 -30
  33. data/lib/zstd-ruby/version.rb +1 -1
  34. metadata +4 -4
@@ -8,36 +8,35 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
+ #include "zstd_compress_internal.h"
11
12
  #include "zstd_opt.h"
12
- #include "zstd_lazy.h"
13
+ #include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */
13
14
 
14
15
 
15
- #define ZSTD_LITFREQ_ADD 2
16
- #define ZSTD_FREQ_DIV 4
17
- #define ZSTD_MAX_PRICE (1<<30)
16
+ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
17
+ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
+ #define ZSTD_MAX_PRICE (1<<30)
19
+
18
20
 
19
21
  /*-*************************************
20
22
  * Price functions for optimal parser
21
23
  ***************************************/
22
24
  static void ZSTD_setLog2Prices(optState_t* optPtr)
23
25
  {
24
- optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
25
- optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
26
26
  optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
27
+ optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
28
+ optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
27
29
  optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
28
- optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum));
29
30
  }
30
31
 
31
32
 
32
- static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize)
33
+ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
34
+ const BYTE* const src, size_t const srcSize)
33
35
  {
34
- unsigned u;
35
-
36
- optPtr->cachedLiterals = NULL;
37
- optPtr->cachedPrice = optPtr->cachedLitLength = 0;
38
36
  optPtr->staticPrices = 0;
39
37
 
40
- if (optPtr->litLengthSum == 0) {
38
+ if (optPtr->litLengthSum == 0) { /* first init */
39
+ unsigned u;
41
40
  if (srcSize <= 1024) optPtr->staticPrices = 1;
42
41
 
43
42
  assert(optPtr->litFreq!=NULL);
@@ -45,44 +44,41 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz
45
44
  optPtr->litFreq[u] = 0;
46
45
  for (u=0; u<srcSize; u++)
47
46
  optPtr->litFreq[src[u]]++;
48
-
49
47
  optPtr->litSum = 0;
50
- optPtr->litLengthSum = MaxLL+1;
51
- optPtr->matchLengthSum = MaxML+1;
52
- optPtr->offCodeSum = (MaxOff+1);
53
- optPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
54
-
55
48
  for (u=0; u<=MaxLit; u++) {
56
- optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV);
49
+ optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
57
50
  optPtr->litSum += optPtr->litFreq[u];
58
51
  }
52
+
59
53
  for (u=0; u<=MaxLL; u++)
60
54
  optPtr->litLengthFreq[u] = 1;
55
+ optPtr->litLengthSum = MaxLL+1;
61
56
  for (u=0; u<=MaxML; u++)
62
57
  optPtr->matchLengthFreq[u] = 1;
58
+ optPtr->matchLengthSum = MaxML+1;
63
59
  for (u=0; u<=MaxOff; u++)
64
60
  optPtr->offCodeFreq[u] = 1;
61
+ optPtr->offCodeSum = (MaxOff+1);
62
+
65
63
  } else {
66
- optPtr->matchLengthSum = 0;
67
- optPtr->litLengthSum = 0;
68
- optPtr->offCodeSum = 0;
69
- optPtr->matchSum = 0;
70
- optPtr->litSum = 0;
64
+ unsigned u;
71
65
 
66
+ optPtr->litSum = 0;
72
67
  for (u=0; u<=MaxLit; u++) {
73
- optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
68
+ optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
74
69
  optPtr->litSum += optPtr->litFreq[u];
75
70
  }
71
+ optPtr->litLengthSum = 0;
76
72
  for (u=0; u<=MaxLL; u++) {
77
73
  optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
78
74
  optPtr->litLengthSum += optPtr->litLengthFreq[u];
79
75
  }
76
+ optPtr->matchLengthSum = 0;
80
77
  for (u=0; u<=MaxML; u++) {
81
78
  optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
82
79
  optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
83
- optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3);
84
80
  }
85
- optPtr->matchSum *= ZSTD_LITFREQ_ADD;
81
+ optPtr->offCodeSum = 0;
86
82
  for (u=0; u<=MaxOff; u++) {
87
83
  optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
88
84
  optPtr->offCodeSum += optPtr->offCodeFreq[u];
@@ -93,114 +89,146 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz
93
89
  }
94
90
 
95
91
 
96
- static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals)
92
+ /* ZSTD_rawLiteralsCost() :
93
+ * cost of literals (only) in given segment (which length can be null)
94
+ * does not include cost of literalLength symbol */
95
+ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
96
+ const optState_t* const optPtr)
97
97
  {
98
- U32 price, u;
99
-
100
- if (optPtr->staticPrices)
101
- return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
102
-
103
- if (litLength == 0)
104
- return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1);
98
+ if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
99
+ if (litLength == 0) return 0;
105
100
 
106
101
  /* literals */
107
- if (optPtr->cachedLiterals == literals) {
108
- U32 const additional = litLength - optPtr->cachedLitLength;
109
- const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength;
110
- price = optPtr->cachedPrice + additional * optPtr->log2litSum;
111
- for (u=0; u < additional; u++)
112
- price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1);
113
- optPtr->cachedPrice = price;
114
- optPtr->cachedLitLength = litLength;
115
- } else {
116
- price = litLength * optPtr->log2litSum;
102
+ { U32 u;
103
+ U32 cost = litLength * optPtr->log2litSum;
117
104
  for (u=0; u < litLength; u++)
118
- price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
119
-
120
- if (litLength >= 12) {
121
- optPtr->cachedLiterals = literals;
122
- optPtr->cachedPrice = price;
123
- optPtr->cachedLitLength = litLength;
124
- }
105
+ cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
106
+ return cost;
125
107
  }
108
+ }
109
+
110
+ /* ZSTD_litLengthPrice() :
111
+ * cost of literalLength symbol */
112
+ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
113
+ {
114
+ if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
126
115
 
127
116
  /* literal Length */
128
- { const BYTE LL_deltaCode = 19;
129
- const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
130
- price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
117
+ { U32 const llCode = ZSTD_LLcode(litLength);
118
+ U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
119
+ return price;
131
120
  }
121
+ }
132
122
 
133
- return price;
123
+ /* ZSTD_litLengthPrice() :
124
+ * cost of the literal part of a sequence,
125
+ * including literals themselves, and literalLength symbol */
126
+ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
127
+ const optState_t* const optPtr)
128
+ {
129
+ return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
130
+ + ZSTD_litLengthPrice(litLength, optPtr);
131
+ }
132
+
133
+ /* ZSTD_litLengthContribution() :
134
+ * @return ( cost(litlength) - cost(0) )
135
+ * this value can then be added to rawLiteralsCost()
136
+ * to provide a cost which is directly comparable to a match ending at same position */
137
+ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
138
+ {
139
+ if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
140
+
141
+ /* literal Length */
142
+ { U32 const llCode = ZSTD_LLcode(litLength);
143
+ int const contribution = LL_bits[llCode]
144
+ + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
145
+ - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
146
+ #if 1
147
+ return contribution;
148
+ #else
149
+ return MAX(0, contribution); /* sometimes better, sometimes not ... */
150
+ #endif
151
+ }
134
152
  }
135
153
 
154
+ /* ZSTD_literalsContribution() :
155
+ * creates a fake cost for the literals part of a sequence
156
+ * which can be compared to the ending cost of a match
157
+ * should a new match start at this position */
158
+ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
159
+ const optState_t* const optPtr)
160
+ {
161
+ int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
162
+ + ZSTD_litLengthContribution(litLength, optPtr);
163
+ return contribution;
164
+ }
136
165
 
137
- FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
166
+ /* ZSTD_getMatchPrice() :
167
+ * Provides the cost of the match part (offset + matchLength) of a sequence
168
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
169
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
170
+ FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
171
+ U32 const offset, U32 const matchLength,
172
+ const optState_t* const optPtr,
173
+ int const optLevel)
138
174
  {
139
- /* offset */
140
175
  U32 price;
141
- BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
176
+ U32 const offCode = ZSTD_highbit32(offset+1);
177
+ U32 const mlBase = matchLength - MINMATCH;
178
+ assert(matchLength >= MINMATCH);
142
179
 
143
- if (optPtr->staticPrices)
144
- return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
180
+ if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
181
+ return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
145
182
 
146
183
  price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
147
- if (!ultra && offCode >= 20) price += (offCode-19)*2;
184
+ if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
148
185
 
149
186
  /* match Length */
150
- { const BYTE ML_deltaCode = 36;
151
- const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
187
+ { U32 const mlCode = ZSTD_MLcode(mlBase);
152
188
  price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
153
189
  }
154
190
 
155
- return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor;
191
+ DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
192
+ return price;
156
193
  }
157
194
 
158
-
159
- static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
195
+ static void ZSTD_updateStats(optState_t* const optPtr,
196
+ U32 litLength, const BYTE* literals,
197
+ U32 offsetCode, U32 matchLength)
160
198
  {
161
- U32 u;
162
-
163
199
  /* literals */
164
- optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
165
- for (u=0; u < litLength; u++)
166
- optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
200
+ { U32 u;
201
+ for (u=0; u < litLength; u++)
202
+ optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
203
+ optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
204
+ }
167
205
 
168
206
  /* literal Length */
169
- { const BYTE LL_deltaCode = 19;
170
- const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
207
+ { U32 const llCode = ZSTD_LLcode(litLength);
171
208
  optPtr->litLengthFreq[llCode]++;
172
209
  optPtr->litLengthSum++;
173
210
  }
174
211
 
175
- /* match offset */
176
- { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
177
- optPtr->offCodeSum++;
212
+ /* match offset code (0-2=>repCode; 3+=>offset+2) */
213
+ { U32 const offCode = ZSTD_highbit32(offsetCode+1);
214
+ assert(offCode <= MaxOff);
178
215
  optPtr->offCodeFreq[offCode]++;
216
+ optPtr->offCodeSum++;
179
217
  }
180
218
 
181
219
  /* match Length */
182
- { const BYTE ML_deltaCode = 36;
183
- const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
220
+ { U32 const mlBase = matchLength - MINMATCH;
221
+ U32 const mlCode = ZSTD_MLcode(mlBase);
184
222
  optPtr->matchLengthFreq[mlCode]++;
185
223
  optPtr->matchLengthSum++;
186
224
  }
187
-
188
- ZSTD_setLog2Prices(optPtr);
189
225
  }
190
226
 
191
227
 
192
- #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
193
- { \
194
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
195
- opt[pos].mlen = mlen_; \
196
- opt[pos].off = offset_; \
197
- opt[pos].litlen = litlen_; \
198
- opt[pos].price = price_; \
199
- }
200
-
201
-
202
- /* function safe only for comparisons */
203
- static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
228
+ /* ZSTD_readMINMATCH() :
229
+ * function safe only for comparisons
230
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
231
+ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
204
232
  {
205
233
  switch (length)
206
234
  {
@@ -216,15 +244,14 @@ static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
216
244
 
217
245
  /* Update hashTable3 up to ip (excluded)
218
246
  Assumption : always within prefix (i.e. not within extDict) */
219
- static
220
- U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
247
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip)
221
248
  {
222
- U32* const hashTable3 = zc->hashTable3;
223
- U32 const hashLog3 = zc->hashLog3;
224
- const BYTE* const base = zc->base;
225
- U32 idx = zc->nextToUpdate3;
226
- const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
227
- const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
249
+ U32* const hashTable3 = cctx->hashTable3;
250
+ U32 const hashLog3 = cctx->hashLog3;
251
+ const BYTE* const base = cctx->base;
252
+ U32 idx = cctx->nextToUpdate3;
253
+ U32 const target = cctx->nextToUpdate3 = (U32)(ip - base);
254
+ size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
228
255
 
229
256
  while(idx < target) {
230
257
  hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
@@ -238,102 +265,147 @@ U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
238
265
  /*-*************************************
239
266
  * Binary Tree search
240
267
  ***************************************/
241
- static U32 ZSTD_insertBtAndGetAllMatches (
242
- ZSTD_CCtx* zc,
243
- const BYTE* const ip, const BYTE* const iLimit,
244
- U32 nbCompares, const U32 mls,
245
- U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
268
+ FORCE_INLINE_TEMPLATE
269
+ U32 ZSTD_insertBtAndGetAllMatches (
270
+ ZSTD_CCtx* zc,
271
+ const BYTE* const ip, const BYTE* const iLimit, int const extDict,
272
+ U32 nbCompares, U32 const mls, U32 const sufficient_len,
273
+ U32 rep[ZSTD_REP_NUM], U32 const ll0,
274
+ ZSTD_match_t* matches, const U32 lengthToBeat)
246
275
  {
247
276
  const BYTE* const base = zc->base;
248
- const U32 current = (U32)(ip-base);
249
- const U32 hashLog = zc->appliedParams.cParams.hashLog;
250
- const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
277
+ U32 const current = (U32)(ip-base);
278
+ U32 const hashLog = zc->appliedParams.cParams.hashLog;
279
+ U32 const minMatch = (mls==3) ? 3 : 4;
251
280
  U32* const hashTable = zc->hashTable;
281
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
252
282
  U32 matchIndex = hashTable[h];
253
283
  U32* const bt = zc->chainTable;
254
- const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
255
- const U32 btMask= (1U << btLog) - 1;
284
+ U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
285
+ U32 const btMask= (1U << btLog) - 1;
256
286
  size_t commonLengthSmaller=0, commonLengthLarger=0;
257
287
  const BYTE* const dictBase = zc->dictBase;
258
- const U32 dictLimit = zc->dictLimit;
288
+ U32 const dictLimit = zc->dictLimit;
259
289
  const BYTE* const dictEnd = dictBase + dictLimit;
260
290
  const BYTE* const prefixStart = base + dictLimit;
261
- const U32 btLow = btMask >= current ? 0 : current - btMask;
262
- const U32 windowLow = zc->lowLimit;
291
+ U32 const btLow = btMask >= current ? 0 : current - btMask;
292
+ U32 const windowLow = zc->lowLimit;
263
293
  U32* smallerPtr = bt + 2*(current&btMask);
264
294
  U32* largerPtr = bt + 2*(current&btMask) + 1;
265
- U32 matchEndIdx = current+8;
295
+ U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
266
296
  U32 dummy32; /* to be nullified at the end */
267
297
  U32 mnum = 0;
268
298
 
269
- const U32 minMatch = (mls == 3) ? 3 : 4;
270
- size_t bestLength = minMatchLen-1;
299
+ size_t bestLength = lengthToBeat-1;
300
+ DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
301
+
302
+ /* check repCode */
303
+ { U32 const lastR = ZSTD_REP_NUM + ll0;
304
+ U32 repCode;
305
+ for (repCode = ll0; repCode < lastR; repCode++) {
306
+ U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
307
+ U32 const repIndex = current - repOffset;
308
+ U32 repLen = 0;
309
+ assert(current >= dictLimit);
310
+ if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
311
+ if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
312
+ repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
313
+ }
314
+ } else { /* repIndex < dictLimit || repIndex >= current */
315
+ const BYTE* const repMatch = dictBase + repIndex;
316
+ assert(current >= windowLow);
317
+ if ( extDict /* this case only valid in extDict mode */
318
+ && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
319
+ & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
320
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
321
+ repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
322
+ } }
323
+ /* save longer solution */
324
+ if (repLen > bestLength) {
325
+ DEBUGLOG(8, "found rep-match %u of length %u",
326
+ repCode - ll0, (U32)repLen);
327
+ bestLength = repLen;
328
+ matches[mnum].off = repCode - ll0;
329
+ matches[mnum].len = (U32)repLen;
330
+ mnum++;
331
+ if ( (repLen > sufficient_len)
332
+ | (ip+repLen == iLimit) ) { /* best possible */
333
+ return mnum;
334
+ } } } }
271
335
 
272
- if (minMatch == 3) { /* HC3 match finder */
336
+ /* HC3 match finder */
337
+ if ((mls == 3) /*static*/ && (bestLength < mls)) {
273
338
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
274
- if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
275
- const BYTE* match;
276
- size_t currentMl=0;
277
- if ((!extDict) || matchIndex3 >= dictLimit) {
278
- match = base + matchIndex3;
279
- if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
339
+ if ((matchIndex3 > windowLow)
340
+ & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
341
+ size_t mlen;
342
+ if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
343
+ const BYTE* const match = base + matchIndex3;
344
+ mlen = ZSTD_count(ip, match, iLimit);
280
345
  } else {
281
- match = dictBase + matchIndex3;
282
- if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
283
- currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
346
+ const BYTE* const match = dictBase + matchIndex3;
347
+ mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
284
348
  }
285
349
 
286
350
  /* save best solution */
287
- if (currentMl > bestLength) {
288
- bestLength = currentMl;
289
- matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
290
- matches[mnum].len = (U32)currentMl;
291
- mnum++;
292
- if (currentMl > ZSTD_OPT_NUM) goto update;
293
- if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
294
- }
295
- }
296
- }
351
+ if (mlen >= mls /* == 3 > bestLength */) {
352
+ DEBUGLOG(8, "found small match with hlog3, of length %u",
353
+ (U32)mlen);
354
+ bestLength = mlen;
355
+ assert(current > matchIndex3);
356
+ assert(mnum==0); /* no prior solution */
357
+ matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
358
+ matches[0].len = (U32)mlen;
359
+ mnum = 1;
360
+ if ( (mlen > sufficient_len) |
361
+ (ip+mlen == iLimit) ) { /* best possible length */
362
+ zc->nextToUpdate = current+1; /* skip insertion */
363
+ return 1;
364
+ } } } }
297
365
 
298
366
  hashTable[h] = current; /* Update Hash Table */
299
367
 
300
368
  while (nbCompares-- && (matchIndex > windowLow)) {
301
- U32* nextPtr = bt + 2*(matchIndex & btMask);
369
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
302
370
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
303
371
  const BYTE* match;
372
+ assert(current > matchIndex);
304
373
 
305
374
  if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
375
+ assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
306
376
  match = base + matchIndex;
307
- if (match[matchLength] == ip[matchLength]) {
308
- matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
309
- }
377
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
310
378
  } else {
311
379
  match = dictBase + matchIndex;
312
380
  matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
313
381
  if (matchIndex+matchLength >= dictLimit)
314
- match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
382
+ match = base + matchIndex; /* prepare for match[matchLength] */
315
383
  }
316
384
 
317
385
  if (matchLength > bestLength) {
318
- if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
386
+ DEBUGLOG(8, "found match of length %u at distance %u",
387
+ (U32)matchLength, current - matchIndex);
388
+ assert(matchEndIdx > matchIndex);
389
+ if (matchLength > matchEndIdx - matchIndex)
390
+ matchEndIdx = matchIndex + (U32)matchLength;
319
391
  bestLength = matchLength;
320
- matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
392
+ matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
321
393
  matches[mnum].len = (U32)matchLength;
322
394
  mnum++;
323
395
  if (matchLength > ZSTD_OPT_NUM) break;
324
- if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */
325
- break; /* drop, to guarantee consistency (miss a little bit of compression) */
396
+ if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
397
+ break; /* drop, to preserve bt consistency (miss a little bit of compression) */
398
+ }
326
399
  }
327
400
 
328
401
  if (match[matchLength] < ip[matchLength]) {
329
- /* match is smaller than current */
402
+ /* match smaller than current */
330
403
  *smallerPtr = matchIndex; /* update smaller idx */
331
404
  commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
332
405
  if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
333
- smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
334
- matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
406
+ smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
407
+ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
335
408
  } else {
336
- /* match is larger than current */
337
409
  *largerPtr = matchIndex;
338
410
  commonLengthLarger = matchLength;
339
411
  if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
@@ -343,65 +415,31 @@ static U32 ZSTD_insertBtAndGetAllMatches (
343
415
 
344
416
  *smallerPtr = *largerPtr = 0;
345
417
 
346
- update:
347
- zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
418
+ assert(matchEndIdx > current+8);
419
+ zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
348
420
  return mnum;
349
421
  }
350
422
 
351
423
 
352
- /** Tree updater, providing best match */
353
- static U32 ZSTD_BtGetAllMatches (
354
- ZSTD_CCtx* zc,
355
- const BYTE* const ip, const BYTE* const iLimit,
356
- const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
357
- {
358
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
359
- ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
360
- return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
361
- }
362
-
363
-
364
- static U32 ZSTD_BtGetAllMatches_selectMLS (
424
+ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
365
425
  ZSTD_CCtx* zc, /* Index table will be updated */
366
- const BYTE* ip, const BYTE* const iHighLimit,
367
- const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
368
- {
369
- switch(matchLengthSearch)
370
- {
371
- case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
372
- default :
373
- case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
374
- case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
375
- case 7 :
376
- case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
377
- }
378
- }
379
-
380
- /** Tree updater, providing best match */
381
- static U32 ZSTD_BtGetAllMatches_extDict (
382
- ZSTD_CCtx* zc,
383
- const BYTE* const ip, const BYTE* const iLimit,
384
- const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
426
+ const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
427
+ U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
428
+ U32 rep[ZSTD_REP_NUM], U32 const ll0,
429
+ ZSTD_match_t* matches, U32 const lengthToBeat)
385
430
  {
431
+ DEBUGLOG(7, "ZSTD_BtGetAllMatches");
386
432
  if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
387
- ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
388
- return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
389
- }
390
-
391
-
392
- static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
393
- ZSTD_CCtx* zc, /* Index table will be updated */
394
- const BYTE* ip, const BYTE* const iHighLimit,
395
- const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
396
- {
433
+ if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
434
+ else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
397
435
  switch(matchLengthSearch)
398
436
  {
399
- case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
437
+ case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
400
438
  default :
401
- case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
402
- case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
439
+ case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
440
+ case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
403
441
  case 7 :
404
- case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
442
+ case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
405
443
  }
406
444
  }
407
445
 
@@ -409,534 +447,313 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
409
447
  /*-*******************************
410
448
  * Optimal parser
411
449
  *********************************/
412
- FORCE_INLINE_TEMPLATE
413
- size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
414
- const void* src, size_t srcSize, const int ultra)
415
- {
416
- seqStore_t* seqStorePtr = &(ctx->seqStore);
417
- optState_t* optStatePtr = &(ctx->optState);
418
- const BYTE* const istart = (const BYTE*)src;
419
- const BYTE* ip = istart;
420
- const BYTE* anchor = istart;
421
- const BYTE* const iend = istart + srcSize;
422
- const BYTE* const ilimit = iend - 8;
423
- const BYTE* const base = ctx->base;
424
- const BYTE* const prefixStart = base + ctx->dictLimit;
425
-
426
- const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
427
- const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
428
- const U32 mls = ctx->appliedParams.cParams.searchLength;
429
- const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
430
-
431
- ZSTD_optimal_t* opt = optStatePtr->priceTable;
432
- ZSTD_match_t* matches = optStatePtr->matchTable;
433
- const BYTE* inr;
434
- U32 offset, rep[ZSTD_REP_NUM];
435
-
436
- /* init */
437
- ctx->nextToUpdate3 = ctx->nextToUpdate;
438
- ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
439
- ip += (ip==prefixStart);
440
- { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
441
-
442
- /* Match Loop */
443
- while (ip < ilimit) {
444
- U32 cur, match_num, last_pos, litlen, price;
445
- U32 u, mlen, best_mlen, best_off, litLength;
446
- memset(opt, 0, sizeof(ZSTD_optimal_t));
447
- last_pos = 0;
448
- litlen = (U32)(ip - anchor);
449
-
450
- /* check repCode */
451
- { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
452
- for (i=(ip == anchor); i<last_i; i++) {
453
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
454
- if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
455
- && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
456
- mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
457
- if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
458
- best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
459
- goto _storeSequence;
460
- }
461
- best_off = i - (ip == anchor);
462
- do {
463
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
464
- if (mlen > last_pos || price < opt[mlen].price)
465
- SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
466
- mlen--;
467
- } while (mlen >= minMatch);
468
- } } }
469
-
470
- match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
471
-
472
- if (!last_pos && !match_num) { ip++; continue; }
473
-
474
- if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
475
- best_mlen = matches[match_num-1].len;
476
- best_off = matches[match_num-1].off;
477
- cur = 0;
478
- last_pos = 1;
479
- goto _storeSequence;
480
- }
481
-
482
- /* set prices using matches at position = 0 */
483
- best_mlen = (last_pos) ? last_pos : minMatch;
484
- for (u = 0; u < match_num; u++) {
485
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
486
- best_mlen = matches[u].len;
487
- while (mlen <= best_mlen) {
488
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
489
- if (mlen > last_pos || price < opt[mlen].price)
490
- SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
491
- mlen++;
492
- } }
493
-
494
- if (last_pos < minMatch) { ip++; continue; }
495
-
496
- /* initialize opt[0] */
497
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
498
- opt[0].mlen = 1;
499
- opt[0].litlen = litlen;
500
-
501
- /* check further positions */
502
- for (cur = 1; cur <= last_pos; cur++) {
503
- inr = ip + cur;
504
-
505
- if (opt[cur-1].mlen == 1) {
506
- litlen = opt[cur-1].litlen + 1;
507
- if (cur > litlen) {
508
- price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
509
- } else
510
- price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
511
- } else {
512
- litlen = 1;
513
- price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
514
- }
515
-
516
- if (cur > last_pos || price <= opt[cur].price)
517
- SET_PRICE(cur, 1, 0, litlen, price);
518
-
519
- if (cur == last_pos) break;
520
-
521
- if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
522
- continue;
523
-
524
- mlen = opt[cur].mlen;
525
- if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
526
- opt[cur].rep[2] = opt[cur-mlen].rep[1];
527
- opt[cur].rep[1] = opt[cur-mlen].rep[0];
528
- opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
529
- } else {
530
- opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
531
- opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
532
- /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1.
533
- * offset ZSTD_REP_MOVE_OPT is used for the special case
534
- * litLength == 0, where offset 0 means something special.
535
- * mlen == 1 means the previous byte was stored as a literal,
536
- * so they are mutually exclusive.
537
- */
538
- assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1));
539
- opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
540
- }
541
-
542
- best_mlen = minMatch;
543
- { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
544
- for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
545
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
546
- if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
547
- && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
548
- mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
549
-
550
- if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
551
- best_mlen = mlen; best_off = i; last_pos = cur + 1;
552
- goto _storeSequence;
553
- }
554
-
555
- best_off = i - (opt[cur].mlen != 1);
556
- if (mlen > best_mlen) best_mlen = mlen;
557
-
558
- do {
559
- if (opt[cur].mlen == 1) {
560
- litlen = opt[cur].litlen;
561
- if (cur > litlen) {
562
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
563
- } else
564
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
565
- } else {
566
- litlen = 0;
567
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
568
- }
569
-
570
- if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
571
- SET_PRICE(cur + mlen, mlen, i, litlen, price);
572
- mlen--;
573
- } while (mlen >= minMatch);
574
- } } }
575
-
576
- match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
577
-
578
- if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
579
- best_mlen = matches[match_num-1].len;
580
- best_off = matches[match_num-1].off;
581
- last_pos = cur + 1;
582
- goto _storeSequence;
583
- }
584
-
585
- /* set prices using matches at position = cur */
586
- for (u = 0; u < match_num; u++) {
587
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
588
- best_mlen = matches[u].len;
589
-
590
- while (mlen <= best_mlen) {
591
- if (opt[cur].mlen == 1) {
592
- litlen = opt[cur].litlen;
593
- if (cur > litlen)
594
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
595
- else
596
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
597
- } else {
598
- litlen = 0;
599
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
600
- }
601
-
602
- if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
603
- SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
450
+ typedef struct repcodes_s {
451
+ U32 rep[3];
452
+ } repcodes_t;
604
453
 
605
- mlen++;
606
- } } }
607
-
608
- best_mlen = opt[last_pos].mlen;
609
- best_off = opt[last_pos].off;
610
- cur = last_pos - best_mlen;
611
-
612
- /* store sequence */
613
- _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
614
- opt[0].mlen = 1;
615
-
616
- while (1) {
617
- mlen = opt[cur].mlen;
618
- offset = opt[cur].off;
619
- opt[cur].mlen = best_mlen;
620
- opt[cur].off = best_off;
621
- best_mlen = mlen;
622
- best_off = offset;
623
- if (mlen > cur) break;
624
- cur -= mlen;
625
- }
626
-
627
- for (u = 0; u <= last_pos;) {
628
- u += opt[u].mlen;
454
+ repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
455
+ {
456
+ repcodes_t newReps;
457
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
458
+ newReps.rep[2] = rep[1];
459
+ newReps.rep[1] = rep[0];
460
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
461
+ } else { /* repcode */
462
+ U32 const repCode = offset + ll0;
463
+ if (repCode > 0) { /* note : if repCode==0, no change */
464
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
465
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
466
+ newReps.rep[1] = rep[0];
467
+ newReps.rep[0] = currentOffset;
468
+ } else { /* repCode == 0 */
469
+ memcpy(&newReps, rep, sizeof(newReps));
629
470
  }
471
+ }
472
+ return newReps;
473
+ }
630
474
 
631
- for (cur=0; cur < last_pos; ) {
632
- mlen = opt[cur].mlen;
633
- if (mlen == 1) { ip++; cur++; continue; }
634
- offset = opt[cur].off;
635
- cur += mlen;
636
- litLength = (U32)(ip - anchor);
637
-
638
- if (offset > ZSTD_REP_MOVE_OPT) {
639
- rep[2] = rep[1];
640
- rep[1] = rep[0];
641
- rep[0] = offset - ZSTD_REP_MOVE_OPT;
642
- offset--;
643
- } else {
644
- if (offset != 0) {
645
- best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
646
- if (offset != 1) rep[2] = rep[1];
647
- rep[1] = rep[0];
648
- rep[0] = best_off;
649
- }
650
- if (litLength==0) offset--;
651
- }
652
475
 
653
- ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
654
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
655
- anchor = ip = ip + mlen;
656
- } } /* for (cur=0; cur < last_pos; ) */
476
+ typedef struct {
477
+ const BYTE* anchor;
478
+ U32 litlen;
479
+ U32 rawLitCost;
480
+ } cachedLiteralPrice_t;
657
481
 
658
- /* Save reps for next block */
659
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
482
+ static U32 ZSTD_rawLiteralsCost_cached(
483
+ cachedLiteralPrice_t* const cachedLitPrice,
484
+ const BYTE* const anchor, U32 const litlen,
485
+ const optState_t* const optStatePtr)
486
+ {
487
+ U32 startCost;
488
+ U32 remainingLength;
489
+ const BYTE* startPosition;
490
+
491
+ if (anchor == cachedLitPrice->anchor) {
492
+ startCost = cachedLitPrice->rawLitCost;
493
+ startPosition = anchor + cachedLitPrice->litlen;
494
+ assert(litlen >= cachedLitPrice->litlen);
495
+ remainingLength = litlen - cachedLitPrice->litlen;
496
+ } else {
497
+ startCost = 0;
498
+ startPosition = anchor;
499
+ remainingLength = litlen;
500
+ }
660
501
 
661
- /* Return the last literals size */
662
- return iend - anchor;
502
+ { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
503
+ cachedLitPrice->anchor = anchor;
504
+ cachedLitPrice->litlen = litlen;
505
+ cachedLitPrice->rawLitCost = rawLitCost;
506
+ return rawLitCost;
507
+ }
663
508
  }
664
509
 
665
-
666
- size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
510
+ static U32 ZSTD_fullLiteralsCost_cached(
511
+ cachedLiteralPrice_t* const cachedLitPrice,
512
+ const BYTE* const anchor, U32 const litlen,
513
+ const optState_t* const optStatePtr)
667
514
  {
668
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
515
+ return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
516
+ + ZSTD_litLengthPrice(litlen, optStatePtr);
669
517
  }
670
518
 
671
- size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
519
+ static int ZSTD_literalsContribution_cached(
520
+ cachedLiteralPrice_t* const cachedLitPrice,
521
+ const BYTE* const anchor, U32 const litlen,
522
+ const optState_t* const optStatePtr)
672
523
  {
673
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
524
+ int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
525
+ + ZSTD_litLengthContribution(litlen, optStatePtr);
526
+ return contribution;
674
527
  }
675
528
 
676
-
677
529
  FORCE_INLINE_TEMPLATE
678
- size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
679
- const void* src, size_t srcSize, const int ultra)
530
+ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
531
+ const void* src, size_t srcSize,
532
+ const int optLevel, const int extDict)
680
533
  {
681
- seqStore_t* seqStorePtr = &(ctx->seqStore);
682
- optState_t* optStatePtr = &(ctx->optState);
534
+ seqStore_t* const seqStorePtr = &(ctx->seqStore);
535
+ optState_t* const optStatePtr = &(ctx->optState);
683
536
  const BYTE* const istart = (const BYTE*)src;
684
537
  const BYTE* ip = istart;
685
538
  const BYTE* anchor = istart;
686
539
  const BYTE* const iend = istart + srcSize;
687
540
  const BYTE* const ilimit = iend - 8;
688
541
  const BYTE* const base = ctx->base;
689
- const U32 lowestIndex = ctx->lowLimit;
690
- const U32 dictLimit = ctx->dictLimit;
691
- const BYTE* const prefixStart = base + dictLimit;
692
- const BYTE* const dictBase = ctx->dictBase;
693
- const BYTE* const dictEnd = dictBase + dictLimit;
542
+ const BYTE* const prefixStart = base + ctx->dictLimit;
694
543
 
695
- const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
696
- const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
697
- const U32 mls = ctx->appliedParams.cParams.searchLength;
698
- const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
544
+ U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
545
+ U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1);
546
+ U32 const mls = ctx->appliedParams.cParams.searchLength;
547
+ U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
699
548
 
700
- ZSTD_optimal_t* opt = optStatePtr->priceTable;
701
- ZSTD_match_t* matches = optStatePtr->matchTable;
702
- const BYTE* inr;
549
+ ZSTD_optimal_t* const opt = optStatePtr->priceTable;
550
+ ZSTD_match_t* const matches = optStatePtr->matchTable;
551
+ cachedLiteralPrice_t cachedLitPrice;
552
+ U32 rep[ZSTD_REP_NUM];
703
553
 
704
554
  /* init */
705
- U32 offset, rep[ZSTD_REP_NUM];
706
- { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
707
-
555
+ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
708
556
  ctx->nextToUpdate3 = ctx->nextToUpdate;
709
557
  ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
710
558
  ip += (ip==prefixStart);
559
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
560
+ memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
711
561
 
712
562
  /* Match Loop */
713
563
  while (ip < ilimit) {
714
- U32 cur, match_num, last_pos, litlen, price;
715
- U32 u, mlen, best_mlen, best_off, litLength;
716
- U32 current = (U32)(ip-base);
717
- memset(opt, 0, sizeof(ZSTD_optimal_t));
718
- last_pos = 0;
719
- opt[0].litlen = (U32)(ip - anchor);
720
-
721
- /* check repCode */
722
- { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
723
- for (i = (ip==anchor); i<last_i; i++) {
724
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
725
- const U32 repIndex = (U32)(current - repCur);
726
- const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
727
- const BYTE* const repMatch = repBase + repIndex;
728
- if ( (repCur > 0 && repCur <= (S32)current)
729
- && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
730
- && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
731
- /* repcode detected we should take it */
732
- const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
733
- mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
734
-
735
- if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
736
- best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
737
- goto _storeSequence;
738
- }
739
-
740
- best_off = i - (ip==anchor);
741
- litlen = opt[0].litlen;
742
- do {
743
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
744
- if (mlen > last_pos || price < opt[mlen].price)
745
- SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
746
- mlen--;
747
- } while (mlen >= minMatch);
748
- } } }
749
-
750
- match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
751
-
752
- if (!last_pos && !match_num) { ip++; continue; }
753
-
754
- { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
755
- opt[0].mlen = 1;
756
-
757
- if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
758
- best_mlen = matches[match_num-1].len;
759
- best_off = matches[match_num-1].off;
760
- cur = 0;
761
- last_pos = 1;
762
- goto _storeSequence;
763
- }
764
-
765
- best_mlen = (last_pos) ? last_pos : minMatch;
766
-
767
- /* set prices using matches at position = 0 */
768
- for (u = 0; u < match_num; u++) {
769
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
770
- best_mlen = matches[u].len;
771
- litlen = opt[0].litlen;
772
- while (mlen <= best_mlen) {
773
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
774
- if (mlen > last_pos || price < opt[mlen].price)
775
- SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
776
- mlen++;
777
- } }
778
-
779
- if (last_pos < minMatch) {
780
- ip++; continue;
564
+ U32 cur, last_pos = 0;
565
+ U32 best_mlen, best_off;
566
+
567
+ /* find first match */
568
+ { U32 const litlen = (U32)(ip - anchor);
569
+ U32 const ll0 = !litlen;
570
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
571
+ if (!nbMatches) { ip++; continue; }
572
+
573
+ /* initialize opt[0] */
574
+ { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
575
+ opt[0].mlen = 1;
576
+ opt[0].litlen = litlen;
577
+
578
+ /* large match -> immediate encoding */
579
+ { U32 const maxML = matches[nbMatches-1].len;
580
+ DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
581
+ nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
582
+
583
+ if (maxML > sufficient_len) {
584
+ best_mlen = maxML;
585
+ best_off = matches[nbMatches-1].off;
586
+ DEBUGLOG(7, "large match (%u>%u), immediate encoding",
587
+ best_mlen, sufficient_len);
588
+ cur = 0;
589
+ last_pos = 1;
590
+ goto _shortestPath;
591
+ } }
592
+
593
+ /* set prices for first matches starting position == 0 */
594
+ { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
595
+ U32 pos;
596
+ U32 matchNb;
597
+ for (pos = 0; pos < minMatch; pos++) {
598
+ opt[pos].mlen = 1;
599
+ opt[pos].price = ZSTD_MAX_PRICE;
600
+ }
601
+ for (matchNb = 0; matchNb < nbMatches; matchNb++) {
602
+ U32 const offset = matches[matchNb].off;
603
+ U32 const end = matches[matchNb].len;
604
+ repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
605
+ for ( ; pos <= end ; pos++ ) {
606
+ U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
607
+ DEBUGLOG(7, "rPos:%u => set initial price : %u",
608
+ pos, matchPrice);
609
+ opt[pos].mlen = pos;
610
+ opt[pos].off = offset;
611
+ opt[pos].litlen = litlen;
612
+ opt[pos].price = matchPrice;
613
+ memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
614
+ } }
615
+ last_pos = pos-1;
616
+ }
781
617
  }
782
618
 
783
619
  /* check further positions */
784
620
  for (cur = 1; cur <= last_pos; cur++) {
785
- inr = ip + cur;
621
+ const BYTE* const inr = ip + cur;
622
+ assert(cur < ZSTD_OPT_NUM);
786
623
 
787
- if (opt[cur-1].mlen == 1) {
788
- litlen = opt[cur-1].litlen + 1;
624
+ /* Fix current position with one literal if cheaper */
625
+ { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
626
+ int price; /* note : contribution can be negative */
789
627
  if (cur > litlen) {
790
- price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
791
- } else
792
- price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
793
- } else {
794
- litlen = 1;
795
- price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
796
- }
797
-
798
- if (cur > last_pos || price <= opt[cur].price)
799
- SET_PRICE(cur, 1, 0, litlen, price);
628
+ price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
629
+ } else {
630
+ price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
631
+ }
632
+ assert(price < 1000000000); /* overflow check */
633
+ if (price <= opt[cur].price) {
634
+ DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
635
+ cur, price, opt[cur].price);
636
+ opt[cur].mlen = 1;
637
+ opt[cur].off = 0;
638
+ opt[cur].litlen = litlen;
639
+ opt[cur].price = price;
640
+ memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
641
+ } }
642
+
643
+ /* last match must start at a minimum distance of 8 from oend */
644
+ if (inr > ilimit) continue;
800
645
 
801
646
  if (cur == last_pos) break;
802
647
 
803
- if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
804
- continue;
805
-
806
- mlen = opt[cur].mlen;
807
- if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
808
- opt[cur].rep[2] = opt[cur-mlen].rep[1];
809
- opt[cur].rep[1] = opt[cur-mlen].rep[0];
810
- opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
811
- } else {
812
- opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
813
- opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
814
- assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1));
815
- opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
816
- }
648
+ if ( (optLevel==0) /*static*/
649
+ && (opt[cur+1].price <= opt[cur].price) )
650
+ continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
651
+
652
+ { U32 const ll0 = (opt[cur].mlen != 1);
653
+ U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
654
+ U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
655
+ U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
656
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
657
+ U32 matchNb;
658
+ if (!nbMatches) continue;
659
+
660
+ { U32 const maxML = matches[nbMatches-1].len;
661
+ DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
662
+ cur, nbMatches, maxML);
663
+
664
+ if ( (maxML > sufficient_len)
665
+ | (cur + maxML >= ZSTD_OPT_NUM) ) {
666
+ best_mlen = maxML;
667
+ best_off = matches[nbMatches-1].off;
668
+ last_pos = cur + 1;
669
+ goto _shortestPath;
670
+ }
671
+ }
817
672
 
818
- best_mlen = minMatch;
819
- { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
820
- for (i = (mlen != 1); i<last_i; i++) {
821
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
822
- const U32 repIndex = (U32)(current+cur - repCur);
823
- const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
824
- const BYTE* const repMatch = repBase + repIndex;
825
- if ( (repCur > 0 && repCur <= (S32)(current+cur))
826
- && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
827
- && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
828
- /* repcode detected */
829
- const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
830
- mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
831
-
832
- if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
833
- best_mlen = mlen; best_off = i; last_pos = cur + 1;
834
- goto _storeSequence;
673
+ /* set prices using matches found at position == cur */
674
+ for (matchNb = 0; matchNb < nbMatches; matchNb++) {
675
+ U32 const offset = matches[matchNb].off;
676
+ repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
677
+ U32 const lastML = matches[matchNb].len;
678
+ U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
679
+ U32 mlen;
680
+
681
+ DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
682
+ matchNb, matches[matchNb].off, lastML, litlen);
683
+
684
+ for (mlen = lastML; mlen >= startML; mlen--) {
685
+ U32 const pos = cur + mlen;
686
+ int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
687
+
688
+ if ((pos > last_pos) || (price < opt[pos].price)) {
689
+ DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
690
+ pos, price, opt[pos].price);
691
+ while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
692
+ opt[pos].mlen = mlen;
693
+ opt[pos].off = offset;
694
+ opt[pos].litlen = litlen;
695
+ opt[pos].price = price;
696
+ memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
697
+ } else {
698
+ if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
835
699
  }
836
-
837
- best_off = i - (opt[cur].mlen != 1);
838
- if (mlen > best_mlen) best_mlen = mlen;
839
-
840
- do {
841
- if (opt[cur].mlen == 1) {
842
- litlen = opt[cur].litlen;
843
- if (cur > litlen) {
844
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
845
- } else
846
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
847
- } else {
848
- litlen = 0;
849
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
850
- }
851
-
852
- if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
853
- SET_PRICE(cur + mlen, mlen, i, litlen, price);
854
- mlen--;
855
- } while (mlen >= minMatch);
856
700
  } } }
857
-
858
- match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
859
-
860
- if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
861
- best_mlen = matches[match_num-1].len;
862
- best_off = matches[match_num-1].off;
863
- last_pos = cur + 1;
864
- goto _storeSequence;
865
- }
866
-
867
- /* set prices using matches at position = cur */
868
- for (u = 0; u < match_num; u++) {
869
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
870
- best_mlen = matches[u].len;
871
-
872
- while (mlen <= best_mlen) {
873
- if (opt[cur].mlen == 1) {
874
- litlen = opt[cur].litlen;
875
- if (cur > litlen)
876
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
877
- else
878
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
879
- } else {
880
- litlen = 0;
881
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
882
- }
883
-
884
- if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
885
- SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
886
-
887
- mlen++;
888
- } } } /* for (cur = 1; cur <= last_pos; cur++) */
701
+ } /* for (cur = 1; cur <= last_pos; cur++) */
889
702
 
890
703
  best_mlen = opt[last_pos].mlen;
891
704
  best_off = opt[last_pos].off;
892
705
  cur = last_pos - best_mlen;
893
706
 
894
- /* store sequence */
895
- _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
896
- opt[0].mlen = 1;
897
-
898
- while (1) {
899
- mlen = opt[cur].mlen;
900
- offset = opt[cur].off;
901
- opt[cur].mlen = best_mlen;
902
- opt[cur].off = best_off;
903
- best_mlen = mlen;
904
- best_off = offset;
905
- if (mlen > cur) break;
906
- cur -= mlen;
907
- }
908
-
909
- for (u = 0; u <= last_pos; ) {
910
- u += opt[u].mlen;
911
- }
707
+ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
708
+ assert(opt[0].mlen == 1);
709
+
710
+ /* reverse traversal */
711
+ DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
712
+ last_pos, cur);
713
+ { U32 selectedMatchLength = best_mlen;
714
+ U32 selectedOffset = best_off;
715
+ U32 pos = cur;
716
+ while (1) {
717
+ U32 const mlen = opt[pos].mlen;
718
+ U32 const off = opt[pos].off;
719
+ opt[pos].mlen = selectedMatchLength;
720
+ opt[pos].off = selectedOffset;
721
+ selectedMatchLength = mlen;
722
+ selectedOffset = off;
723
+ if (mlen > pos) break;
724
+ pos -= mlen;
725
+ } }
912
726
 
913
- for (cur=0; cur < last_pos; ) {
914
- mlen = opt[cur].mlen;
915
- if (mlen == 1) { ip++; cur++; continue; }
916
- offset = opt[cur].off;
917
- cur += mlen;
918
- litLength = (U32)(ip - anchor);
919
-
920
- if (offset > ZSTD_REP_MOVE_OPT) {
921
- rep[2] = rep[1];
922
- rep[1] = rep[0];
923
- rep[0] = offset - ZSTD_REP_MOVE_OPT;
924
- offset--;
925
- } else {
926
- if (offset != 0) {
927
- best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
928
- if (offset != 1) rep[2] = rep[1];
727
+ /* save sequences */
728
+ { U32 pos;
729
+ for (pos=0; pos < last_pos; ) {
730
+ U32 const llen = (U32)(ip - anchor);
731
+ U32 const mlen = opt[pos].mlen;
732
+ U32 const offset = opt[pos].off;
733
+ if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
734
+ pos += mlen; ip += mlen;
735
+
736
+ /* repcodes update : like ZSTD_updateRep(), but update in place */
737
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
738
+ rep[2] = rep[1];
929
739
  rep[1] = rep[0];
930
- rep[0] = best_off;
740
+ rep[0] = offset - ZSTD_REP_MOVE;
741
+ } else { /* repcode */
742
+ U32 const repCode = offset + (llen==0);
743
+ if (repCode) { /* note : if repCode==0, no change */
744
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
745
+ if (repCode >= 2) rep[2] = rep[1];
746
+ rep[1] = rep[0];
747
+ rep[0] = currentOffset;
748
+ }
931
749
  }
932
750
 
933
- if (litLength==0) offset--;
934
- }
935
-
936
- ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
937
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
938
- anchor = ip = ip + mlen;
939
- } } /* for (cur=0; cur < last_pos; ) */
751
+ ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
752
+ ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
753
+ anchor = ip;
754
+ } }
755
+ ZSTD_setLog2Prices(optStatePtr);
756
+ } /* while (ip < ilimit) */
940
757
 
941
758
  /* Save reps for next block */
942
759
  { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
@@ -946,12 +763,23 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
946
763
  }
947
764
 
948
765
 
766
+ size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
767
+ {
768
+ DEBUGLOG(5, "ZSTD_compressBlock_btopt");
769
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
770
+ }
771
+
772
+ size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
773
+ {
774
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
775
+ }
776
+
949
777
  size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
950
778
  {
951
- return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
779
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
952
780
  }
953
781
 
954
782
  size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
955
783
  {
956
- return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
784
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
957
785
  }