zstd-ruby 1.3.2.0 → 1.3.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +31 -10
  4. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  5. data/ext/zstdruby/libzstd/common/mem.h +15 -13
  6. data/ext/zstdruby/libzstd/common/pool.c +1 -2
  7. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -4
  8. data/ext/zstdruby/libzstd/common/zstd_internal.h +52 -170
  9. data/ext/zstdruby/libzstd/compress/zstd_compress.c +434 -337
  10. data/ext/zstdruby/libzstd/compress/{zstd_compress.h → zstd_compress_internal.h} +191 -36
  11. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +1 -0
  12. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  13. data/ext/zstdruby/libzstd/compress/zstd_fast.c +1 -0
  14. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  15. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +66 -50
  16. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +3 -2
  17. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +3 -2
  18. data/ext/zstdruby/libzstd/compress/zstd_opt.c +504 -676
  19. data/ext/zstdruby/libzstd/compress/zstd_opt.h +2 -2
  20. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +130 -80
  21. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +15 -7
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +41 -31
  23. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -0
  24. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +1 -1
  25. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  26. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +1 -74
  27. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +1 -74
  28. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -72
  29. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -73
  30. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -77
  31. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -77
  32. data/ext/zstdruby/libzstd/zstd.h +43 -30
  33. data/lib/zstd-ruby/version.rb +1 -1
  34. metadata +4 -4
@@ -8,36 +8,35 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
+ #include "zstd_compress_internal.h"
11
12
  #include "zstd_opt.h"
12
- #include "zstd_lazy.h"
13
+ #include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */
13
14
 
14
15
 
15
- #define ZSTD_LITFREQ_ADD 2
16
- #define ZSTD_FREQ_DIV 4
17
- #define ZSTD_MAX_PRICE (1<<30)
16
+ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
17
+ #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
+ #define ZSTD_MAX_PRICE (1<<30)
19
+
18
20
 
19
21
  /*-*************************************
20
22
  * Price functions for optimal parser
21
23
  ***************************************/
22
24
  static void ZSTD_setLog2Prices(optState_t* optPtr)
23
25
  {
24
- optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
25
- optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
26
26
  optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
27
+ optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
28
+ optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
27
29
  optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
28
- optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum));
29
30
  }
30
31
 
31
32
 
32
- static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize)
33
+ static void ZSTD_rescaleFreqs(optState_t* const optPtr,
34
+ const BYTE* const src, size_t const srcSize)
33
35
  {
34
- unsigned u;
35
-
36
- optPtr->cachedLiterals = NULL;
37
- optPtr->cachedPrice = optPtr->cachedLitLength = 0;
38
36
  optPtr->staticPrices = 0;
39
37
 
40
- if (optPtr->litLengthSum == 0) {
38
+ if (optPtr->litLengthSum == 0) { /* first init */
39
+ unsigned u;
41
40
  if (srcSize <= 1024) optPtr->staticPrices = 1;
42
41
 
43
42
  assert(optPtr->litFreq!=NULL);
@@ -45,44 +44,41 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz
45
44
  optPtr->litFreq[u] = 0;
46
45
  for (u=0; u<srcSize; u++)
47
46
  optPtr->litFreq[src[u]]++;
48
-
49
47
  optPtr->litSum = 0;
50
- optPtr->litLengthSum = MaxLL+1;
51
- optPtr->matchLengthSum = MaxML+1;
52
- optPtr->offCodeSum = (MaxOff+1);
53
- optPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
54
-
55
48
  for (u=0; u<=MaxLit; u++) {
56
- optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV);
49
+ optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
57
50
  optPtr->litSum += optPtr->litFreq[u];
58
51
  }
52
+
59
53
  for (u=0; u<=MaxLL; u++)
60
54
  optPtr->litLengthFreq[u] = 1;
55
+ optPtr->litLengthSum = MaxLL+1;
61
56
  for (u=0; u<=MaxML; u++)
62
57
  optPtr->matchLengthFreq[u] = 1;
58
+ optPtr->matchLengthSum = MaxML+1;
63
59
  for (u=0; u<=MaxOff; u++)
64
60
  optPtr->offCodeFreq[u] = 1;
61
+ optPtr->offCodeSum = (MaxOff+1);
62
+
65
63
  } else {
66
- optPtr->matchLengthSum = 0;
67
- optPtr->litLengthSum = 0;
68
- optPtr->offCodeSum = 0;
69
- optPtr->matchSum = 0;
70
- optPtr->litSum = 0;
64
+ unsigned u;
71
65
 
66
+ optPtr->litSum = 0;
72
67
  for (u=0; u<=MaxLit; u++) {
73
- optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
68
+ optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
74
69
  optPtr->litSum += optPtr->litFreq[u];
75
70
  }
71
+ optPtr->litLengthSum = 0;
76
72
  for (u=0; u<=MaxLL; u++) {
77
73
  optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
78
74
  optPtr->litLengthSum += optPtr->litLengthFreq[u];
79
75
  }
76
+ optPtr->matchLengthSum = 0;
80
77
  for (u=0; u<=MaxML; u++) {
81
78
  optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
82
79
  optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
83
- optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3);
84
80
  }
85
- optPtr->matchSum *= ZSTD_LITFREQ_ADD;
81
+ optPtr->offCodeSum = 0;
86
82
  for (u=0; u<=MaxOff; u++) {
87
83
  optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
88
84
  optPtr->offCodeSum += optPtr->offCodeFreq[u];
@@ -93,114 +89,146 @@ static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSiz
93
89
  }
94
90
 
95
91
 
96
- static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals)
92
+ /* ZSTD_rawLiteralsCost() :
93
+ * cost of literals (only) in given segment (which length can be null)
94
+ * does not include cost of literalLength symbol */
95
+ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
96
+ const optState_t* const optPtr)
97
97
  {
98
- U32 price, u;
99
-
100
- if (optPtr->staticPrices)
101
- return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
102
-
103
- if (litLength == 0)
104
- return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1);
98
+ if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
99
+ if (litLength == 0) return 0;
105
100
 
106
101
  /* literals */
107
- if (optPtr->cachedLiterals == literals) {
108
- U32 const additional = litLength - optPtr->cachedLitLength;
109
- const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength;
110
- price = optPtr->cachedPrice + additional * optPtr->log2litSum;
111
- for (u=0; u < additional; u++)
112
- price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1);
113
- optPtr->cachedPrice = price;
114
- optPtr->cachedLitLength = litLength;
115
- } else {
116
- price = litLength * optPtr->log2litSum;
102
+ { U32 u;
103
+ U32 cost = litLength * optPtr->log2litSum;
117
104
  for (u=0; u < litLength; u++)
118
- price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
119
-
120
- if (litLength >= 12) {
121
- optPtr->cachedLiterals = literals;
122
- optPtr->cachedPrice = price;
123
- optPtr->cachedLitLength = litLength;
124
- }
105
+ cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
106
+ return cost;
125
107
  }
108
+ }
109
+
110
+ /* ZSTD_litLengthPrice() :
111
+ * cost of literalLength symbol */
112
+ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
113
+ {
114
+ if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
126
115
 
127
116
  /* literal Length */
128
- { const BYTE LL_deltaCode = 19;
129
- const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
130
- price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
117
+ { U32 const llCode = ZSTD_LLcode(litLength);
118
+ U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
119
+ return price;
131
120
  }
121
+ }
132
122
 
133
- return price;
123
+ /* ZSTD_litLengthPrice() :
124
+ * cost of the literal part of a sequence,
125
+ * including literals themselves, and literalLength symbol */
126
+ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
127
+ const optState_t* const optPtr)
128
+ {
129
+ return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
130
+ + ZSTD_litLengthPrice(litLength, optPtr);
131
+ }
132
+
133
+ /* ZSTD_litLengthContribution() :
134
+ * @return ( cost(litlength) - cost(0) )
135
+ * this value can then be added to rawLiteralsCost()
136
+ * to provide a cost which is directly comparable to a match ending at same position */
137
+ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
138
+ {
139
+ if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
140
+
141
+ /* literal Length */
142
+ { U32 const llCode = ZSTD_LLcode(litLength);
143
+ int const contribution = LL_bits[llCode]
144
+ + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
145
+ - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
146
+ #if 1
147
+ return contribution;
148
+ #else
149
+ return MAX(0, contribution); /* sometimes better, sometimes not ... */
150
+ #endif
151
+ }
134
152
  }
135
153
 
154
+ /* ZSTD_literalsContribution() :
155
+ * creates a fake cost for the literals part of a sequence
156
+ * which can be compared to the ending cost of a match
157
+ * should a new match start at this position */
158
+ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
159
+ const optState_t* const optPtr)
160
+ {
161
+ int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
162
+ + ZSTD_litLengthContribution(litLength, optPtr);
163
+ return contribution;
164
+ }
136
165
 
137
- FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
166
+ /* ZSTD_getMatchPrice() :
167
+ * Provides the cost of the match part (offset + matchLength) of a sequence
168
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
169
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
170
+ FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
171
+ U32 const offset, U32 const matchLength,
172
+ const optState_t* const optPtr,
173
+ int const optLevel)
138
174
  {
139
- /* offset */
140
175
  U32 price;
141
- BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
176
+ U32 const offCode = ZSTD_highbit32(offset+1);
177
+ U32 const mlBase = matchLength - MINMATCH;
178
+ assert(matchLength >= MINMATCH);
142
179
 
143
- if (optPtr->staticPrices)
144
- return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
180
+ if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
181
+ return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
145
182
 
146
183
  price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
147
- if (!ultra && offCode >= 20) price += (offCode-19)*2;
184
+ if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
148
185
 
149
186
  /* match Length */
150
- { const BYTE ML_deltaCode = 36;
151
- const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
187
+ { U32 const mlCode = ZSTD_MLcode(mlBase);
152
188
  price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
153
189
  }
154
190
 
155
- return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor;
191
+ DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
192
+ return price;
156
193
  }
157
194
 
158
-
159
- static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
195
+ static void ZSTD_updateStats(optState_t* const optPtr,
196
+ U32 litLength, const BYTE* literals,
197
+ U32 offsetCode, U32 matchLength)
160
198
  {
161
- U32 u;
162
-
163
199
  /* literals */
164
- optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
165
- for (u=0; u < litLength; u++)
166
- optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
200
+ { U32 u;
201
+ for (u=0; u < litLength; u++)
202
+ optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
203
+ optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
204
+ }
167
205
 
168
206
  /* literal Length */
169
- { const BYTE LL_deltaCode = 19;
170
- const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
207
+ { U32 const llCode = ZSTD_LLcode(litLength);
171
208
  optPtr->litLengthFreq[llCode]++;
172
209
  optPtr->litLengthSum++;
173
210
  }
174
211
 
175
- /* match offset */
176
- { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
177
- optPtr->offCodeSum++;
212
+ /* match offset code (0-2=>repCode; 3+=>offset+2) */
213
+ { U32 const offCode = ZSTD_highbit32(offsetCode+1);
214
+ assert(offCode <= MaxOff);
178
215
  optPtr->offCodeFreq[offCode]++;
216
+ optPtr->offCodeSum++;
179
217
  }
180
218
 
181
219
  /* match Length */
182
- { const BYTE ML_deltaCode = 36;
183
- const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
220
+ { U32 const mlBase = matchLength - MINMATCH;
221
+ U32 const mlCode = ZSTD_MLcode(mlBase);
184
222
  optPtr->matchLengthFreq[mlCode]++;
185
223
  optPtr->matchLengthSum++;
186
224
  }
187
-
188
- ZSTD_setLog2Prices(optPtr);
189
225
  }
190
226
 
191
227
 
192
- #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
193
- { \
194
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
195
- opt[pos].mlen = mlen_; \
196
- opt[pos].off = offset_; \
197
- opt[pos].litlen = litlen_; \
198
- opt[pos].price = price_; \
199
- }
200
-
201
-
202
- /* function safe only for comparisons */
203
- static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
228
+ /* ZSTD_readMINMATCH() :
229
+ * function safe only for comparisons
230
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
231
+ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
204
232
  {
205
233
  switch (length)
206
234
  {
@@ -216,15 +244,14 @@ static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
216
244
 
217
245
  /* Update hashTable3 up to ip (excluded)
218
246
  Assumption : always within prefix (i.e. not within extDict) */
219
- static
220
- U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
247
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip)
221
248
  {
222
- U32* const hashTable3 = zc->hashTable3;
223
- U32 const hashLog3 = zc->hashLog3;
224
- const BYTE* const base = zc->base;
225
- U32 idx = zc->nextToUpdate3;
226
- const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
227
- const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
249
+ U32* const hashTable3 = cctx->hashTable3;
250
+ U32 const hashLog3 = cctx->hashLog3;
251
+ const BYTE* const base = cctx->base;
252
+ U32 idx = cctx->nextToUpdate3;
253
+ U32 const target = cctx->nextToUpdate3 = (U32)(ip - base);
254
+ size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
228
255
 
229
256
  while(idx < target) {
230
257
  hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
@@ -238,102 +265,147 @@ U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
238
265
  /*-*************************************
239
266
  * Binary Tree search
240
267
  ***************************************/
241
- static U32 ZSTD_insertBtAndGetAllMatches (
242
- ZSTD_CCtx* zc,
243
- const BYTE* const ip, const BYTE* const iLimit,
244
- U32 nbCompares, const U32 mls,
245
- U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
268
+ FORCE_INLINE_TEMPLATE
269
+ U32 ZSTD_insertBtAndGetAllMatches (
270
+ ZSTD_CCtx* zc,
271
+ const BYTE* const ip, const BYTE* const iLimit, int const extDict,
272
+ U32 nbCompares, U32 const mls, U32 const sufficient_len,
273
+ U32 rep[ZSTD_REP_NUM], U32 const ll0,
274
+ ZSTD_match_t* matches, const U32 lengthToBeat)
246
275
  {
247
276
  const BYTE* const base = zc->base;
248
- const U32 current = (U32)(ip-base);
249
- const U32 hashLog = zc->appliedParams.cParams.hashLog;
250
- const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
277
+ U32 const current = (U32)(ip-base);
278
+ U32 const hashLog = zc->appliedParams.cParams.hashLog;
279
+ U32 const minMatch = (mls==3) ? 3 : 4;
251
280
  U32* const hashTable = zc->hashTable;
281
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
252
282
  U32 matchIndex = hashTable[h];
253
283
  U32* const bt = zc->chainTable;
254
- const U32 btLog = zc->appliedParams.cParams.chainLog - 1;
255
- const U32 btMask= (1U << btLog) - 1;
284
+ U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
285
+ U32 const btMask= (1U << btLog) - 1;
256
286
  size_t commonLengthSmaller=0, commonLengthLarger=0;
257
287
  const BYTE* const dictBase = zc->dictBase;
258
- const U32 dictLimit = zc->dictLimit;
288
+ U32 const dictLimit = zc->dictLimit;
259
289
  const BYTE* const dictEnd = dictBase + dictLimit;
260
290
  const BYTE* const prefixStart = base + dictLimit;
261
- const U32 btLow = btMask >= current ? 0 : current - btMask;
262
- const U32 windowLow = zc->lowLimit;
291
+ U32 const btLow = btMask >= current ? 0 : current - btMask;
292
+ U32 const windowLow = zc->lowLimit;
263
293
  U32* smallerPtr = bt + 2*(current&btMask);
264
294
  U32* largerPtr = bt + 2*(current&btMask) + 1;
265
- U32 matchEndIdx = current+8;
295
+ U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
266
296
  U32 dummy32; /* to be nullified at the end */
267
297
  U32 mnum = 0;
268
298
 
269
- const U32 minMatch = (mls == 3) ? 3 : 4;
270
- size_t bestLength = minMatchLen-1;
299
+ size_t bestLength = lengthToBeat-1;
300
+ DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
301
+
302
+ /* check repCode */
303
+ { U32 const lastR = ZSTD_REP_NUM + ll0;
304
+ U32 repCode;
305
+ for (repCode = ll0; repCode < lastR; repCode++) {
306
+ U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
307
+ U32 const repIndex = current - repOffset;
308
+ U32 repLen = 0;
309
+ assert(current >= dictLimit);
310
+ if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
311
+ if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) {
312
+ repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
313
+ }
314
+ } else { /* repIndex < dictLimit || repIndex >= current */
315
+ const BYTE* const repMatch = dictBase + repIndex;
316
+ assert(current >= windowLow);
317
+ if ( extDict /* this case only valid in extDict mode */
318
+ && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
319
+ & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
320
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
321
+ repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
322
+ } }
323
+ /* save longer solution */
324
+ if (repLen > bestLength) {
325
+ DEBUGLOG(8, "found rep-match %u of length %u",
326
+ repCode - ll0, (U32)repLen);
327
+ bestLength = repLen;
328
+ matches[mnum].off = repCode - ll0;
329
+ matches[mnum].len = (U32)repLen;
330
+ mnum++;
331
+ if ( (repLen > sufficient_len)
332
+ | (ip+repLen == iLimit) ) { /* best possible */
333
+ return mnum;
334
+ } } } }
271
335
 
272
- if (minMatch == 3) { /* HC3 match finder */
336
+ /* HC3 match finder */
337
+ if ((mls == 3) /*static*/ && (bestLength < mls)) {
273
338
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
274
- if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
275
- const BYTE* match;
276
- size_t currentMl=0;
277
- if ((!extDict) || matchIndex3 >= dictLimit) {
278
- match = base + matchIndex3;
279
- if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
339
+ if ((matchIndex3 > windowLow)
340
+ & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
341
+ size_t mlen;
342
+ if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
343
+ const BYTE* const match = base + matchIndex3;
344
+ mlen = ZSTD_count(ip, match, iLimit);
280
345
  } else {
281
- match = dictBase + matchIndex3;
282
- if (ZSTD_readMINMATCH(match, MINMATCH) == ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
283
- currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
346
+ const BYTE* const match = dictBase + matchIndex3;
347
+ mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
284
348
  }
285
349
 
286
350
  /* save best solution */
287
- if (currentMl > bestLength) {
288
- bestLength = currentMl;
289
- matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
290
- matches[mnum].len = (U32)currentMl;
291
- mnum++;
292
- if (currentMl > ZSTD_OPT_NUM) goto update;
293
- if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
294
- }
295
- }
296
- }
351
+ if (mlen >= mls /* == 3 > bestLength */) {
352
+ DEBUGLOG(8, "found small match with hlog3, of length %u",
353
+ (U32)mlen);
354
+ bestLength = mlen;
355
+ assert(current > matchIndex3);
356
+ assert(mnum==0); /* no prior solution */
357
+ matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
358
+ matches[0].len = (U32)mlen;
359
+ mnum = 1;
360
+ if ( (mlen > sufficient_len) |
361
+ (ip+mlen == iLimit) ) { /* best possible length */
362
+ zc->nextToUpdate = current+1; /* skip insertion */
363
+ return 1;
364
+ } } } }
297
365
 
298
366
  hashTable[h] = current; /* Update Hash Table */
299
367
 
300
368
  while (nbCompares-- && (matchIndex > windowLow)) {
301
- U32* nextPtr = bt + 2*(matchIndex & btMask);
369
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
302
370
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
303
371
  const BYTE* match;
372
+ assert(current > matchIndex);
304
373
 
305
374
  if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
375
+ assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
306
376
  match = base + matchIndex;
307
- if (match[matchLength] == ip[matchLength]) {
308
- matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
309
- }
377
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
310
378
  } else {
311
379
  match = dictBase + matchIndex;
312
380
  matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
313
381
  if (matchIndex+matchLength >= dictLimit)
314
- match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
382
+ match = base + matchIndex; /* prepare for match[matchLength] */
315
383
  }
316
384
 
317
385
  if (matchLength > bestLength) {
318
- if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
386
+ DEBUGLOG(8, "found match of length %u at distance %u",
387
+ (U32)matchLength, current - matchIndex);
388
+ assert(matchEndIdx > matchIndex);
389
+ if (matchLength > matchEndIdx - matchIndex)
390
+ matchEndIdx = matchIndex + (U32)matchLength;
319
391
  bestLength = matchLength;
320
- matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
392
+ matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
321
393
  matches[mnum].len = (U32)matchLength;
322
394
  mnum++;
323
395
  if (matchLength > ZSTD_OPT_NUM) break;
324
- if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */
325
- break; /* drop, to guarantee consistency (miss a little bit of compression) */
396
+ if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
397
+ break; /* drop, to preserve bt consistency (miss a little bit of compression) */
398
+ }
326
399
  }
327
400
 
328
401
  if (match[matchLength] < ip[matchLength]) {
329
- /* match is smaller than current */
402
+ /* match smaller than current */
330
403
  *smallerPtr = matchIndex; /* update smaller idx */
331
404
  commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
332
405
  if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
333
- smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
334
- matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
406
+ smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
407
+ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
335
408
  } else {
336
- /* match is larger than current */
337
409
  *largerPtr = matchIndex;
338
410
  commonLengthLarger = matchLength;
339
411
  if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
@@ -343,65 +415,31 @@ static U32 ZSTD_insertBtAndGetAllMatches (
343
415
 
344
416
  *smallerPtr = *largerPtr = 0;
345
417
 
346
- update:
347
- zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
418
+ assert(matchEndIdx > current+8);
419
+ zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
348
420
  return mnum;
349
421
  }
350
422
 
351
423
 
352
- /** Tree updater, providing best match */
353
- static U32 ZSTD_BtGetAllMatches (
354
- ZSTD_CCtx* zc,
355
- const BYTE* const ip, const BYTE* const iLimit,
356
- const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
357
- {
358
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
359
- ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
360
- return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
361
- }
362
-
363
-
364
- static U32 ZSTD_BtGetAllMatches_selectMLS (
424
+ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
365
425
  ZSTD_CCtx* zc, /* Index table will be updated */
366
- const BYTE* ip, const BYTE* const iHighLimit,
367
- const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
368
- {
369
- switch(matchLengthSearch)
370
- {
371
- case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
372
- default :
373
- case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
374
- case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
375
- case 7 :
376
- case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
377
- }
378
- }
379
-
380
- /** Tree updater, providing best match */
381
- static U32 ZSTD_BtGetAllMatches_extDict (
382
- ZSTD_CCtx* zc,
383
- const BYTE* const ip, const BYTE* const iLimit,
384
- const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
426
+ const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
427
+ U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
428
+ U32 rep[ZSTD_REP_NUM], U32 const ll0,
429
+ ZSTD_match_t* matches, U32 const lengthToBeat)
385
430
  {
431
+ DEBUGLOG(7, "ZSTD_BtGetAllMatches");
386
432
  if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
387
- ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
388
- return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
389
- }
390
-
391
-
392
- static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
393
- ZSTD_CCtx* zc, /* Index table will be updated */
394
- const BYTE* ip, const BYTE* const iHighLimit,
395
- const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
396
- {
433
+ if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
434
+ else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
397
435
  switch(matchLengthSearch)
398
436
  {
399
- case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
437
+ case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
400
438
  default :
401
- case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
402
- case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
439
+ case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
440
+ case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
403
441
  case 7 :
404
- case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
442
+ case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
405
443
  }
406
444
  }
407
445
 
@@ -409,534 +447,313 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
409
447
  /*-*******************************
410
448
  * Optimal parser
411
449
  *********************************/
412
- FORCE_INLINE_TEMPLATE
413
- size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
414
- const void* src, size_t srcSize, const int ultra)
415
- {
416
- seqStore_t* seqStorePtr = &(ctx->seqStore);
417
- optState_t* optStatePtr = &(ctx->optState);
418
- const BYTE* const istart = (const BYTE*)src;
419
- const BYTE* ip = istart;
420
- const BYTE* anchor = istart;
421
- const BYTE* const iend = istart + srcSize;
422
- const BYTE* const ilimit = iend - 8;
423
- const BYTE* const base = ctx->base;
424
- const BYTE* const prefixStart = base + ctx->dictLimit;
425
-
426
- const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
427
- const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
428
- const U32 mls = ctx->appliedParams.cParams.searchLength;
429
- const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
430
-
431
- ZSTD_optimal_t* opt = optStatePtr->priceTable;
432
- ZSTD_match_t* matches = optStatePtr->matchTable;
433
- const BYTE* inr;
434
- U32 offset, rep[ZSTD_REP_NUM];
435
-
436
- /* init */
437
- ctx->nextToUpdate3 = ctx->nextToUpdate;
438
- ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
439
- ip += (ip==prefixStart);
440
- { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
441
-
442
- /* Match Loop */
443
- while (ip < ilimit) {
444
- U32 cur, match_num, last_pos, litlen, price;
445
- U32 u, mlen, best_mlen, best_off, litLength;
446
- memset(opt, 0, sizeof(ZSTD_optimal_t));
447
- last_pos = 0;
448
- litlen = (U32)(ip - anchor);
449
-
450
- /* check repCode */
451
- { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
452
- for (i=(ip == anchor); i<last_i; i++) {
453
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
454
- if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
455
- && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
456
- mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
457
- if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
458
- best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
459
- goto _storeSequence;
460
- }
461
- best_off = i - (ip == anchor);
462
- do {
463
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
464
- if (mlen > last_pos || price < opt[mlen].price)
465
- SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
466
- mlen--;
467
- } while (mlen >= minMatch);
468
- } } }
469
-
470
- match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
471
-
472
- if (!last_pos && !match_num) { ip++; continue; }
473
-
474
- if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
475
- best_mlen = matches[match_num-1].len;
476
- best_off = matches[match_num-1].off;
477
- cur = 0;
478
- last_pos = 1;
479
- goto _storeSequence;
480
- }
481
-
482
- /* set prices using matches at position = 0 */
483
- best_mlen = (last_pos) ? last_pos : minMatch;
484
- for (u = 0; u < match_num; u++) {
485
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
486
- best_mlen = matches[u].len;
487
- while (mlen <= best_mlen) {
488
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
489
- if (mlen > last_pos || price < opt[mlen].price)
490
- SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
491
- mlen++;
492
- } }
493
-
494
- if (last_pos < minMatch) { ip++; continue; }
495
-
496
- /* initialize opt[0] */
497
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
498
- opt[0].mlen = 1;
499
- opt[0].litlen = litlen;
500
-
501
- /* check further positions */
502
- for (cur = 1; cur <= last_pos; cur++) {
503
- inr = ip + cur;
504
-
505
- if (opt[cur-1].mlen == 1) {
506
- litlen = opt[cur-1].litlen + 1;
507
- if (cur > litlen) {
508
- price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
509
- } else
510
- price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
511
- } else {
512
- litlen = 1;
513
- price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
514
- }
515
-
516
- if (cur > last_pos || price <= opt[cur].price)
517
- SET_PRICE(cur, 1, 0, litlen, price);
518
-
519
- if (cur == last_pos) break;
520
-
521
- if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
522
- continue;
523
-
524
- mlen = opt[cur].mlen;
525
- if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
526
- opt[cur].rep[2] = opt[cur-mlen].rep[1];
527
- opt[cur].rep[1] = opt[cur-mlen].rep[0];
528
- opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
529
- } else {
530
- opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
531
- opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
532
- /* If opt[cur].off == ZSTD_REP_MOVE_OPT, then mlen != 1.
533
- * offset ZSTD_REP_MOVE_OPT is used for the special case
534
- * litLength == 0, where offset 0 means something special.
535
- * mlen == 1 means the previous byte was stored as a literal,
536
- * so they are mutually exclusive.
537
- */
538
- assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1));
539
- opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
540
- }
541
-
542
- best_mlen = minMatch;
543
- { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
544
- for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
545
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
546
- if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
547
- && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
548
- mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
549
-
550
- if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
551
- best_mlen = mlen; best_off = i; last_pos = cur + 1;
552
- goto _storeSequence;
553
- }
554
-
555
- best_off = i - (opt[cur].mlen != 1);
556
- if (mlen > best_mlen) best_mlen = mlen;
557
-
558
- do {
559
- if (opt[cur].mlen == 1) {
560
- litlen = opt[cur].litlen;
561
- if (cur > litlen) {
562
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
563
- } else
564
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
565
- } else {
566
- litlen = 0;
567
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
568
- }
569
-
570
- if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
571
- SET_PRICE(cur + mlen, mlen, i, litlen, price);
572
- mlen--;
573
- } while (mlen >= minMatch);
574
- } } }
575
-
576
- match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
577
-
578
- if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
579
- best_mlen = matches[match_num-1].len;
580
- best_off = matches[match_num-1].off;
581
- last_pos = cur + 1;
582
- goto _storeSequence;
583
- }
584
-
585
- /* set prices using matches at position = cur */
586
- for (u = 0; u < match_num; u++) {
587
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
588
- best_mlen = matches[u].len;
589
-
590
- while (mlen <= best_mlen) {
591
- if (opt[cur].mlen == 1) {
592
- litlen = opt[cur].litlen;
593
- if (cur > litlen)
594
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
595
- else
596
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
597
- } else {
598
- litlen = 0;
599
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
600
- }
601
-
602
- if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
603
- SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
450
+ typedef struct repcodes_s {
451
+ U32 rep[3];
452
+ } repcodes_t;
604
453
 
605
- mlen++;
606
- } } }
607
-
608
- best_mlen = opt[last_pos].mlen;
609
- best_off = opt[last_pos].off;
610
- cur = last_pos - best_mlen;
611
-
612
- /* store sequence */
613
- _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
614
- opt[0].mlen = 1;
615
-
616
- while (1) {
617
- mlen = opt[cur].mlen;
618
- offset = opt[cur].off;
619
- opt[cur].mlen = best_mlen;
620
- opt[cur].off = best_off;
621
- best_mlen = mlen;
622
- best_off = offset;
623
- if (mlen > cur) break;
624
- cur -= mlen;
625
- }
626
-
627
- for (u = 0; u <= last_pos;) {
628
- u += opt[u].mlen;
454
+ repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
455
+ {
456
+ repcodes_t newReps;
457
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
458
+ newReps.rep[2] = rep[1];
459
+ newReps.rep[1] = rep[0];
460
+ newReps.rep[0] = offset - ZSTD_REP_MOVE;
461
+ } else { /* repcode */
462
+ U32 const repCode = offset + ll0;
463
+ if (repCode > 0) { /* note : if repCode==0, no change */
464
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
465
+ newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
466
+ newReps.rep[1] = rep[0];
467
+ newReps.rep[0] = currentOffset;
468
+ } else { /* repCode == 0 */
469
+ memcpy(&newReps, rep, sizeof(newReps));
629
470
  }
471
+ }
472
+ return newReps;
473
+ }
630
474
 
631
- for (cur=0; cur < last_pos; ) {
632
- mlen = opt[cur].mlen;
633
- if (mlen == 1) { ip++; cur++; continue; }
634
- offset = opt[cur].off;
635
- cur += mlen;
636
- litLength = (U32)(ip - anchor);
637
-
638
- if (offset > ZSTD_REP_MOVE_OPT) {
639
- rep[2] = rep[1];
640
- rep[1] = rep[0];
641
- rep[0] = offset - ZSTD_REP_MOVE_OPT;
642
- offset--;
643
- } else {
644
- if (offset != 0) {
645
- best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
646
- if (offset != 1) rep[2] = rep[1];
647
- rep[1] = rep[0];
648
- rep[0] = best_off;
649
- }
650
- if (litLength==0) offset--;
651
- }
652
475
 
653
- ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
654
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
655
- anchor = ip = ip + mlen;
656
- } } /* for (cur=0; cur < last_pos; ) */
476
+ typedef struct {
477
+ const BYTE* anchor;
478
+ U32 litlen;
479
+ U32 rawLitCost;
480
+ } cachedLiteralPrice_t;
657
481
 
658
- /* Save reps for next block */
659
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
482
+ static U32 ZSTD_rawLiteralsCost_cached(
483
+ cachedLiteralPrice_t* const cachedLitPrice,
484
+ const BYTE* const anchor, U32 const litlen,
485
+ const optState_t* const optStatePtr)
486
+ {
487
+ U32 startCost;
488
+ U32 remainingLength;
489
+ const BYTE* startPosition;
490
+
491
+ if (anchor == cachedLitPrice->anchor) {
492
+ startCost = cachedLitPrice->rawLitCost;
493
+ startPosition = anchor + cachedLitPrice->litlen;
494
+ assert(litlen >= cachedLitPrice->litlen);
495
+ remainingLength = litlen - cachedLitPrice->litlen;
496
+ } else {
497
+ startCost = 0;
498
+ startPosition = anchor;
499
+ remainingLength = litlen;
500
+ }
660
501
 
661
- /* Return the last literals size */
662
- return iend - anchor;
502
+ { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
503
+ cachedLitPrice->anchor = anchor;
504
+ cachedLitPrice->litlen = litlen;
505
+ cachedLitPrice->rawLitCost = rawLitCost;
506
+ return rawLitCost;
507
+ }
663
508
  }
664
509
 
665
-
666
- size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
510
+ static U32 ZSTD_fullLiteralsCost_cached(
511
+ cachedLiteralPrice_t* const cachedLitPrice,
512
+ const BYTE* const anchor, U32 const litlen,
513
+ const optState_t* const optStatePtr)
667
514
  {
668
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
515
+ return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
516
+ + ZSTD_litLengthPrice(litlen, optStatePtr);
669
517
  }
670
518
 
671
- size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
519
+ static int ZSTD_literalsContribution_cached(
520
+ cachedLiteralPrice_t* const cachedLitPrice,
521
+ const BYTE* const anchor, U32 const litlen,
522
+ const optState_t* const optStatePtr)
672
523
  {
673
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
524
+ int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
525
+ + ZSTD_litLengthContribution(litlen, optStatePtr);
526
+ return contribution;
674
527
  }
675
528
 
676
-
677
529
  FORCE_INLINE_TEMPLATE
678
- size_t ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
679
- const void* src, size_t srcSize, const int ultra)
530
+ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
531
+ const void* src, size_t srcSize,
532
+ const int optLevel, const int extDict)
680
533
  {
681
- seqStore_t* seqStorePtr = &(ctx->seqStore);
682
- optState_t* optStatePtr = &(ctx->optState);
534
+ seqStore_t* const seqStorePtr = &(ctx->seqStore);
535
+ optState_t* const optStatePtr = &(ctx->optState);
683
536
  const BYTE* const istart = (const BYTE*)src;
684
537
  const BYTE* ip = istart;
685
538
  const BYTE* anchor = istart;
686
539
  const BYTE* const iend = istart + srcSize;
687
540
  const BYTE* const ilimit = iend - 8;
688
541
  const BYTE* const base = ctx->base;
689
- const U32 lowestIndex = ctx->lowLimit;
690
- const U32 dictLimit = ctx->dictLimit;
691
- const BYTE* const prefixStart = base + dictLimit;
692
- const BYTE* const dictBase = ctx->dictBase;
693
- const BYTE* const dictEnd = dictBase + dictLimit;
542
+ const BYTE* const prefixStart = base + ctx->dictLimit;
694
543
 
695
- const U32 maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
696
- const U32 sufficient_len = ctx->appliedParams.cParams.targetLength;
697
- const U32 mls = ctx->appliedParams.cParams.searchLength;
698
- const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
544
+ U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
545
+ U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1);
546
+ U32 const mls = ctx->appliedParams.cParams.searchLength;
547
+ U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
699
548
 
700
- ZSTD_optimal_t* opt = optStatePtr->priceTable;
701
- ZSTD_match_t* matches = optStatePtr->matchTable;
702
- const BYTE* inr;
549
+ ZSTD_optimal_t* const opt = optStatePtr->priceTable;
550
+ ZSTD_match_t* const matches = optStatePtr->matchTable;
551
+ cachedLiteralPrice_t cachedLitPrice;
552
+ U32 rep[ZSTD_REP_NUM];
703
553
 
704
554
  /* init */
705
- U32 offset, rep[ZSTD_REP_NUM];
706
- { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
707
-
555
+ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
708
556
  ctx->nextToUpdate3 = ctx->nextToUpdate;
709
557
  ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
710
558
  ip += (ip==prefixStart);
559
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
560
+ memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
711
561
 
712
562
  /* Match Loop */
713
563
  while (ip < ilimit) {
714
- U32 cur, match_num, last_pos, litlen, price;
715
- U32 u, mlen, best_mlen, best_off, litLength;
716
- U32 current = (U32)(ip-base);
717
- memset(opt, 0, sizeof(ZSTD_optimal_t));
718
- last_pos = 0;
719
- opt[0].litlen = (U32)(ip - anchor);
720
-
721
- /* check repCode */
722
- { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
723
- for (i = (ip==anchor); i<last_i; i++) {
724
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
725
- const U32 repIndex = (U32)(current - repCur);
726
- const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
727
- const BYTE* const repMatch = repBase + repIndex;
728
- if ( (repCur > 0 && repCur <= (S32)current)
729
- && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
730
- && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
731
- /* repcode detected we should take it */
732
- const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
733
- mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
734
-
735
- if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
736
- best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
737
- goto _storeSequence;
738
- }
739
-
740
- best_off = i - (ip==anchor);
741
- litlen = opt[0].litlen;
742
- do {
743
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
744
- if (mlen > last_pos || price < opt[mlen].price)
745
- SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
746
- mlen--;
747
- } while (mlen >= minMatch);
748
- } } }
749
-
750
- match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
751
-
752
- if (!last_pos && !match_num) { ip++; continue; }
753
-
754
- { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
755
- opt[0].mlen = 1;
756
-
757
- if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
758
- best_mlen = matches[match_num-1].len;
759
- best_off = matches[match_num-1].off;
760
- cur = 0;
761
- last_pos = 1;
762
- goto _storeSequence;
763
- }
764
-
765
- best_mlen = (last_pos) ? last_pos : minMatch;
766
-
767
- /* set prices using matches at position = 0 */
768
- for (u = 0; u < match_num; u++) {
769
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
770
- best_mlen = matches[u].len;
771
- litlen = opt[0].litlen;
772
- while (mlen <= best_mlen) {
773
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
774
- if (mlen > last_pos || price < opt[mlen].price)
775
- SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
776
- mlen++;
777
- } }
778
-
779
- if (last_pos < minMatch) {
780
- ip++; continue;
564
+ U32 cur, last_pos = 0;
565
+ U32 best_mlen, best_off;
566
+
567
+ /* find first match */
568
+ { U32 const litlen = (U32)(ip - anchor);
569
+ U32 const ll0 = !litlen;
570
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
571
+ if (!nbMatches) { ip++; continue; }
572
+
573
+ /* initialize opt[0] */
574
+ { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
575
+ opt[0].mlen = 1;
576
+ opt[0].litlen = litlen;
577
+
578
+ /* large match -> immediate encoding */
579
+ { U32 const maxML = matches[nbMatches-1].len;
580
+ DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
581
+ nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
582
+
583
+ if (maxML > sufficient_len) {
584
+ best_mlen = maxML;
585
+ best_off = matches[nbMatches-1].off;
586
+ DEBUGLOG(7, "large match (%u>%u), immediate encoding",
587
+ best_mlen, sufficient_len);
588
+ cur = 0;
589
+ last_pos = 1;
590
+ goto _shortestPath;
591
+ } }
592
+
593
+ /* set prices for first matches starting position == 0 */
594
+ { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
595
+ U32 pos;
596
+ U32 matchNb;
597
+ for (pos = 0; pos < minMatch; pos++) {
598
+ opt[pos].mlen = 1;
599
+ opt[pos].price = ZSTD_MAX_PRICE;
600
+ }
601
+ for (matchNb = 0; matchNb < nbMatches; matchNb++) {
602
+ U32 const offset = matches[matchNb].off;
603
+ U32 const end = matches[matchNb].len;
604
+ repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
605
+ for ( ; pos <= end ; pos++ ) {
606
+ U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
607
+ DEBUGLOG(7, "rPos:%u => set initial price : %u",
608
+ pos, matchPrice);
609
+ opt[pos].mlen = pos;
610
+ opt[pos].off = offset;
611
+ opt[pos].litlen = litlen;
612
+ opt[pos].price = matchPrice;
613
+ memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
614
+ } }
615
+ last_pos = pos-1;
616
+ }
781
617
  }
782
618
 
783
619
  /* check further positions */
784
620
  for (cur = 1; cur <= last_pos; cur++) {
785
- inr = ip + cur;
621
+ const BYTE* const inr = ip + cur;
622
+ assert(cur < ZSTD_OPT_NUM);
786
623
 
787
- if (opt[cur-1].mlen == 1) {
788
- litlen = opt[cur-1].litlen + 1;
624
+ /* Fix current position with one literal if cheaper */
625
+ { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
626
+ int price; /* note : contribution can be negative */
789
627
  if (cur > litlen) {
790
- price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen);
791
- } else
792
- price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor);
793
- } else {
794
- litlen = 1;
795
- price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1);
796
- }
797
-
798
- if (cur > last_pos || price <= opt[cur].price)
799
- SET_PRICE(cur, 1, 0, litlen, price);
628
+ price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
629
+ } else {
630
+ price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
631
+ }
632
+ assert(price < 1000000000); /* overflow check */
633
+ if (price <= opt[cur].price) {
634
+ DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
635
+ cur, price, opt[cur].price);
636
+ opt[cur].mlen = 1;
637
+ opt[cur].off = 0;
638
+ opt[cur].litlen = litlen;
639
+ opt[cur].price = price;
640
+ memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
641
+ } }
642
+
643
+ /* last match must start at a minimum distance of 8 from oend */
644
+ if (inr > ilimit) continue;
800
645
 
801
646
  if (cur == last_pos) break;
802
647
 
803
- if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
804
- continue;
805
-
806
- mlen = opt[cur].mlen;
807
- if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
808
- opt[cur].rep[2] = opt[cur-mlen].rep[1];
809
- opt[cur].rep[1] = opt[cur-mlen].rep[0];
810
- opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
811
- } else {
812
- opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
813
- opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
814
- assert(!(opt[cur].off == ZSTD_REP_MOVE_OPT && mlen == 1));
815
- opt[cur].rep[0] = (opt[cur].off == ZSTD_REP_MOVE_OPT) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
816
- }
648
+ if ( (optLevel==0) /*static*/
649
+ && (opt[cur+1].price <= opt[cur].price) )
650
+ continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
651
+
652
+ { U32 const ll0 = (opt[cur].mlen != 1);
653
+ U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
654
+ U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
655
+ U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
656
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
657
+ U32 matchNb;
658
+ if (!nbMatches) continue;
659
+
660
+ { U32 const maxML = matches[nbMatches-1].len;
661
+ DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
662
+ cur, nbMatches, maxML);
663
+
664
+ if ( (maxML > sufficient_len)
665
+ | (cur + maxML >= ZSTD_OPT_NUM) ) {
666
+ best_mlen = maxML;
667
+ best_off = matches[nbMatches-1].off;
668
+ last_pos = cur + 1;
669
+ goto _shortestPath;
670
+ }
671
+ }
817
672
 
818
- best_mlen = minMatch;
819
- { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
820
- for (i = (mlen != 1); i<last_i; i++) {
821
- const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
822
- const U32 repIndex = (U32)(current+cur - repCur);
823
- const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
824
- const BYTE* const repMatch = repBase + repIndex;
825
- if ( (repCur > 0 && repCur <= (S32)(current+cur))
826
- && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
827
- && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
828
- /* repcode detected */
829
- const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
830
- mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
831
-
832
- if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
833
- best_mlen = mlen; best_off = i; last_pos = cur + 1;
834
- goto _storeSequence;
673
+ /* set prices using matches found at position == cur */
674
+ for (matchNb = 0; matchNb < nbMatches; matchNb++) {
675
+ U32 const offset = matches[matchNb].off;
676
+ repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
677
+ U32 const lastML = matches[matchNb].len;
678
+ U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
679
+ U32 mlen;
680
+
681
+ DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
682
+ matchNb, matches[matchNb].off, lastML, litlen);
683
+
684
+ for (mlen = lastML; mlen >= startML; mlen--) {
685
+ U32 const pos = cur + mlen;
686
+ int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
687
+
688
+ if ((pos > last_pos) || (price < opt[pos].price)) {
689
+ DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
690
+ pos, price, opt[pos].price);
691
+ while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
692
+ opt[pos].mlen = mlen;
693
+ opt[pos].off = offset;
694
+ opt[pos].litlen = litlen;
695
+ opt[pos].price = price;
696
+ memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
697
+ } else {
698
+ if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
835
699
  }
836
-
837
- best_off = i - (opt[cur].mlen != 1);
838
- if (mlen > best_mlen) best_mlen = mlen;
839
-
840
- do {
841
- if (opt[cur].mlen == 1) {
842
- litlen = opt[cur].litlen;
843
- if (cur > litlen) {
844
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
845
- } else
846
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
847
- } else {
848
- litlen = 0;
849
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
850
- }
851
-
852
- if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
853
- SET_PRICE(cur + mlen, mlen, i, litlen, price);
854
- mlen--;
855
- } while (mlen >= minMatch);
856
700
  } } }
857
-
858
- match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
859
-
860
- if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
861
- best_mlen = matches[match_num-1].len;
862
- best_off = matches[match_num-1].off;
863
- last_pos = cur + 1;
864
- goto _storeSequence;
865
- }
866
-
867
- /* set prices using matches at position = cur */
868
- for (u = 0; u < match_num; u++) {
869
- mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
870
- best_mlen = matches[u].len;
871
-
872
- while (mlen <= best_mlen) {
873
- if (opt[cur].mlen == 1) {
874
- litlen = opt[cur].litlen;
875
- if (cur > litlen)
876
- price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
877
- else
878
- price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
879
- } else {
880
- litlen = 0;
881
- price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
882
- }
883
-
884
- if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
885
- SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
886
-
887
- mlen++;
888
- } } } /* for (cur = 1; cur <= last_pos; cur++) */
701
+ } /* for (cur = 1; cur <= last_pos; cur++) */
889
702
 
890
703
  best_mlen = opt[last_pos].mlen;
891
704
  best_off = opt[last_pos].off;
892
705
  cur = last_pos - best_mlen;
893
706
 
894
- /* store sequence */
895
- _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
896
- opt[0].mlen = 1;
897
-
898
- while (1) {
899
- mlen = opt[cur].mlen;
900
- offset = opt[cur].off;
901
- opt[cur].mlen = best_mlen;
902
- opt[cur].off = best_off;
903
- best_mlen = mlen;
904
- best_off = offset;
905
- if (mlen > cur) break;
906
- cur -= mlen;
907
- }
908
-
909
- for (u = 0; u <= last_pos; ) {
910
- u += opt[u].mlen;
911
- }
707
+ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
708
+ assert(opt[0].mlen == 1);
709
+
710
+ /* reverse traversal */
711
+ DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
712
+ last_pos, cur);
713
+ { U32 selectedMatchLength = best_mlen;
714
+ U32 selectedOffset = best_off;
715
+ U32 pos = cur;
716
+ while (1) {
717
+ U32 const mlen = opt[pos].mlen;
718
+ U32 const off = opt[pos].off;
719
+ opt[pos].mlen = selectedMatchLength;
720
+ opt[pos].off = selectedOffset;
721
+ selectedMatchLength = mlen;
722
+ selectedOffset = off;
723
+ if (mlen > pos) break;
724
+ pos -= mlen;
725
+ } }
912
726
 
913
- for (cur=0; cur < last_pos; ) {
914
- mlen = opt[cur].mlen;
915
- if (mlen == 1) { ip++; cur++; continue; }
916
- offset = opt[cur].off;
917
- cur += mlen;
918
- litLength = (U32)(ip - anchor);
919
-
920
- if (offset > ZSTD_REP_MOVE_OPT) {
921
- rep[2] = rep[1];
922
- rep[1] = rep[0];
923
- rep[0] = offset - ZSTD_REP_MOVE_OPT;
924
- offset--;
925
- } else {
926
- if (offset != 0) {
927
- best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
928
- if (offset != 1) rep[2] = rep[1];
727
+ /* save sequences */
728
+ { U32 pos;
729
+ for (pos=0; pos < last_pos; ) {
730
+ U32 const llen = (U32)(ip - anchor);
731
+ U32 const mlen = opt[pos].mlen;
732
+ U32 const offset = opt[pos].off;
733
+ if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
734
+ pos += mlen; ip += mlen;
735
+
736
+ /* repcodes update : like ZSTD_updateRep(), but update in place */
737
+ if (offset >= ZSTD_REP_NUM) { /* full offset */
738
+ rep[2] = rep[1];
929
739
  rep[1] = rep[0];
930
- rep[0] = best_off;
740
+ rep[0] = offset - ZSTD_REP_MOVE;
741
+ } else { /* repcode */
742
+ U32 const repCode = offset + (llen==0);
743
+ if (repCode) { /* note : if repCode==0, no change */
744
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
745
+ if (repCode >= 2) rep[2] = rep[1];
746
+ rep[1] = rep[0];
747
+ rep[0] = currentOffset;
748
+ }
931
749
  }
932
750
 
933
- if (litLength==0) offset--;
934
- }
935
-
936
- ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH);
937
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
938
- anchor = ip = ip + mlen;
939
- } } /* for (cur=0; cur < last_pos; ) */
751
+ ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
752
+ ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
753
+ anchor = ip;
754
+ } }
755
+ ZSTD_setLog2Prices(optStatePtr);
756
+ } /* while (ip < ilimit) */
940
757
 
941
758
  /* Save reps for next block */
942
759
  { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
@@ -946,12 +763,23 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
946
763
  }
947
764
 
948
765
 
766
+ size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
767
+ {
768
+ DEBUGLOG(5, "ZSTD_compressBlock_btopt");
769
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
770
+ }
771
+
772
+ size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
773
+ {
774
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
775
+ }
776
+
949
777
  size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
950
778
  {
951
- return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
779
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
952
780
  }
953
781
 
954
782
  size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
955
783
  {
956
- return ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
784
+ return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
957
785
  }