zstd-ruby 1.3.4.0 → 1.3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +56 -10
  4. data/ext/zstdruby/libzstd/README.md +4 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
  6. data/ext/zstdruby/libzstd/common/compiler.h +3 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -2
  8. data/ext/zstdruby/libzstd/common/debug.c +44 -0
  9. data/ext/zstdruby/libzstd/common/debug.h +123 -0
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
  11. data/ext/zstdruby/libzstd/common/fse.h +45 -41
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +34 -27
  14. data/ext/zstdruby/libzstd/common/pool.c +89 -32
  15. data/ext/zstdruby/libzstd/common/pool.h +29 -19
  16. data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
  19. data/ext/zstdruby/libzstd/compress/hist.c +195 -0
  20. data/ext/zstdruby/libzstd/compress/hist.h +92 -0
  21. data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
  22. data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
  23. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
  24. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
  25. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
  26. data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
  27. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
  28. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
  29. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
  30. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
  31. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
  32. data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
  34. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
  38. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  39. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
  41. data/ext/zstdruby/libzstd/zstd.h +137 -69
  42. data/lib/zstd-ruby/version.rb +1 -1
  43. metadata +7 -3
@@ -20,10 +20,13 @@ extern "C" {
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
22
22
  ZSTD_compressionParameters const* cParams,
23
- void const* end);
23
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm);
24
24
  size_t ZSTD_compressBlock_doubleFast(
25
25
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26
26
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
27
+ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
28
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
27
30
  size_t ZSTD_compressBlock_doubleFast_extDict(
28
31
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
32
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
@@ -14,7 +14,7 @@
14
14
 
15
15
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16
16
  ZSTD_compressionParameters const* cParams,
17
- void const* end)
17
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm)
18
18
  {
19
19
  U32* const hashTable = ms->hashTable;
20
20
  U32 const hBits = cParams->hashLog;
@@ -34,6 +34,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
34
34
  size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
35
35
  if (i == 0 || hashTable[hash] == 0)
36
36
  hashTable[hash] = current + i;
37
+ /* Only load extra positions for ZSTD_dtlm_full */
38
+ if (dtlm == ZSTD_dtlm_fast)
39
+ break;
37
40
  }
38
41
  }
39
42
  }
@@ -42,26 +45,58 @@ FORCE_INLINE_TEMPLATE
42
45
  size_t ZSTD_compressBlock_fast_generic(
43
46
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
44
47
  void const* src, size_t srcSize,
45
- U32 const hlog, U32 const stepSize, U32 const mls)
48
+ U32 const hlog, U32 stepSize, U32 const mls,
49
+ ZSTD_dictMode_e const dictMode)
46
50
  {
47
51
  U32* const hashTable = ms->hashTable;
48
52
  const BYTE* const base = ms->window.base;
49
53
  const BYTE* const istart = (const BYTE*)src;
50
54
  const BYTE* ip = istart;
51
55
  const BYTE* anchor = istart;
52
- const U32 lowestIndex = ms->window.dictLimit;
53
- const BYTE* const lowest = base + lowestIndex;
56
+ const U32 prefixStartIndex = ms->window.dictLimit;
57
+ const BYTE* const prefixStart = base + prefixStartIndex;
54
58
  const BYTE* const iend = istart + srcSize;
55
59
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
56
60
  U32 offset_1=rep[0], offset_2=rep[1];
57
61
  U32 offsetSaved = 0;
58
62
 
63
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
64
+ const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
65
+ dms->hashTable : NULL;
66
+ const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
67
+ dms->window.dictLimit : 0;
68
+ const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
69
+ dms->window.base : NULL;
70
+ const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
71
+ dictBase + dictStartIndex : NULL;
72
+ const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
73
+ dms->window.nextSrc : NULL;
74
+ const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
75
+ prefixStartIndex - (U32)(dictEnd - dictBase) :
76
+ 0;
77
+ const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
78
+
79
+ assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
80
+
81
+ /* otherwise, we would get index underflow when translating a dict index
82
+ * into a local index */
83
+ assert(dictMode != ZSTD_dictMatchState
84
+ || prefixStartIndex >= (U32)(dictEnd - dictBase));
85
+
59
86
  /* init */
60
- ip += (ip==lowest);
61
- { U32 const maxRep = (U32)(ip-lowest);
87
+ stepSize += !stepSize; /* support stepSize of 0 */
88
+ ip += (dictAndPrefixLength == 0);
89
+ if (dictMode == ZSTD_noDict) {
90
+ U32 const maxRep = (U32)(ip - prefixStart);
62
91
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
63
92
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
64
93
  }
94
+ if (dictMode == ZSTD_dictMatchState) {
95
+ /* dictMatchState repCode checks don't currently handle repCode == 0
96
+ * disabling. */
97
+ assert(offset_1 <= dictAndPrefixLength);
98
+ assert(offset_2 <= dictAndPrefixLength);
99
+ }
65
100
 
66
101
  /* Main Search Loop */
67
102
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@@ -70,26 +105,62 @@ size_t ZSTD_compressBlock_fast_generic(
70
105
  U32 const current = (U32)(ip-base);
71
106
  U32 const matchIndex = hashTable[h];
72
107
  const BYTE* match = base + matchIndex;
108
+ const U32 repIndex = current + 1 - offset_1;
109
+ const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
110
+ && repIndex < prefixStartIndex) ?
111
+ dictBase + (repIndex - dictIndexDelta) :
112
+ base + repIndex;
73
113
  hashTable[h] = current; /* update hash table */
74
114
 
75
- if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
115
+ if ( (dictMode == ZSTD_dictMatchState)
116
+ && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
117
+ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
118
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
119
+ mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
120
+ ip++;
121
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
122
+ } else if ( dictMode == ZSTD_noDict
123
+ && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
76
124
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
77
125
  ip++;
78
126
  ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
79
- } else {
80
- if ( (matchIndex <= lowestIndex)
81
- || (MEM_read32(match) != MEM_read32(ip)) ) {
127
+ } else if ( (matchIndex <= prefixStartIndex)
128
+ || (MEM_read32(match) != MEM_read32(ip)) ) {
129
+ if (dictMode == ZSTD_dictMatchState) {
130
+ U32 const dictMatchIndex = dictHashTable[h];
131
+ const BYTE* dictMatch = dictBase + dictMatchIndex;
132
+ if (dictMatchIndex <= dictStartIndex ||
133
+ MEM_read32(dictMatch) != MEM_read32(ip)) {
134
+ assert(stepSize >= 1);
135
+ ip += ((ip-anchor) >> kSearchStrength) + stepSize;
136
+ continue;
137
+ } else {
138
+ /* found a dict match */
139
+ U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
140
+ mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
141
+ while (((ip>anchor) & (dictMatch>dictStart))
142
+ && (ip[-1] == dictMatch[-1])) {
143
+ ip--; dictMatch--; mLength++;
144
+ } /* catch up */
145
+ offset_2 = offset_1;
146
+ offset_1 = offset;
147
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
148
+ }
149
+ } else {
82
150
  assert(stepSize >= 1);
83
151
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
84
152
  continue;
85
153
  }
154
+ } else {
155
+ /* found a regular match */
156
+ U32 const offset = (U32)(ip-match);
86
157
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
87
- { U32 const offset = (U32)(ip-match);
88
- while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
89
- offset_2 = offset_1;
90
- offset_1 = offset;
91
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
92
- } }
158
+ while (((ip>anchor) & (match>prefixStart))
159
+ && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
160
+ offset_2 = offset_1;
161
+ offset_1 = offset;
162
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
163
+ }
93
164
 
94
165
  /* match found */
95
166
  ip += mLength;
@@ -99,19 +170,43 @@ size_t ZSTD_compressBlock_fast_generic(
99
170
  /* Fill Table */
100
171
  hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
101
172
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
173
+
102
174
  /* check immediate repcode */
103
- while ( (ip <= ilimit)
104
- && ( (offset_2>0)
105
- & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
106
- /* store sequence */
107
- size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
108
- { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
109
- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
110
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
111
- ip += rLength;
112
- anchor = ip;
113
- continue; /* faster when present ... (?) */
114
- } } }
175
+ if (dictMode == ZSTD_dictMatchState) {
176
+ while (ip <= ilimit) {
177
+ U32 const current2 = (U32)(ip-base);
178
+ U32 const repIndex2 = current2 - offset_2;
179
+ const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
180
+ dictBase - dictIndexDelta + repIndex2 :
181
+ base + repIndex2;
182
+ if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
183
+ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
184
+ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
185
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
186
+ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
187
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
188
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
189
+ ip += repLength2;
190
+ anchor = ip;
191
+ continue;
192
+ }
193
+ break;
194
+ }
195
+ }
196
+
197
+ if (dictMode == ZSTD_noDict) {
198
+ while ( (ip <= ilimit)
199
+ && ( (offset_2>0)
200
+ & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
201
+ /* store sequence */
202
+ size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
203
+ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
204
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
205
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
206
+ ip += rLength;
207
+ anchor = ip;
208
+ continue; /* faster when present ... (?) */
209
+ } } } }
115
210
 
116
211
  /* save reps for next block */
117
212
  rep[0] = offset_1 ? offset_1 : offsetSaved;
@@ -129,17 +224,40 @@ size_t ZSTD_compressBlock_fast(
129
224
  U32 const hlog = cParams->hashLog;
130
225
  U32 const mls = cParams->searchLength;
131
226
  U32 const stepSize = cParams->targetLength;
227
+ assert(ms->dictMatchState == NULL);
132
228
  switch(mls)
133
229
  {
134
230
  default: /* includes case 3 */
135
231
  case 4 :
136
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
232
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_noDict);
137
233
  case 5 :
138
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
234
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_noDict);
139
235
  case 6 :
140
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
236
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_noDict);
141
237
  case 7 :
142
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
238
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_noDict);
239
+ }
240
+ }
241
+
242
+ size_t ZSTD_compressBlock_fast_dictMatchState(
243
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
244
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
245
+ {
246
+ U32 const hlog = cParams->hashLog;
247
+ U32 const mls = cParams->searchLength;
248
+ U32 const stepSize = cParams->targetLength;
249
+ assert(ms->dictMatchState != NULL);
250
+ switch(mls)
251
+ {
252
+ default: /* includes case 3 */
253
+ case 4 :
254
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4, ZSTD_dictMatchState);
255
+ case 5 :
256
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5, ZSTD_dictMatchState);
257
+ case 6 :
258
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6, ZSTD_dictMatchState);
259
+ case 7 :
260
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7, ZSTD_dictMatchState);
143
261
  }
144
262
  }
145
263
 
@@ -147,7 +265,7 @@ size_t ZSTD_compressBlock_fast(
147
265
  static size_t ZSTD_compressBlock_fast_extDict_generic(
148
266
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
149
267
  void const* src, size_t srcSize,
150
- U32 const hlog, U32 const stepSize, U32 const mls)
268
+ U32 const hlog, U32 stepSize, U32 const mls)
151
269
  {
152
270
  U32* hashTable = ms->hashTable;
153
271
  const BYTE* const base = ms->window.base;
@@ -155,45 +273,48 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
155
273
  const BYTE* const istart = (const BYTE*)src;
156
274
  const BYTE* ip = istart;
157
275
  const BYTE* anchor = istart;
158
- const U32 lowestIndex = ms->window.lowLimit;
159
- const BYTE* const dictStart = dictBase + lowestIndex;
160
- const U32 dictLimit = ms->window.dictLimit;
161
- const BYTE* const lowPrefixPtr = base + dictLimit;
162
- const BYTE* const dictEnd = dictBase + dictLimit;
276
+ const U32 dictStartIndex = ms->window.lowLimit;
277
+ const BYTE* const dictStart = dictBase + dictStartIndex;
278
+ const U32 prefixStartIndex = ms->window.dictLimit;
279
+ const BYTE* const prefixStart = base + prefixStartIndex;
280
+ const BYTE* const dictEnd = dictBase + prefixStartIndex;
163
281
  const BYTE* const iend = istart + srcSize;
164
282
  const BYTE* const ilimit = iend - 8;
165
283
  U32 offset_1=rep[0], offset_2=rep[1];
166
284
 
285
+ stepSize += !stepSize; /* support stepSize == 0 */
286
+
167
287
  /* Search Loop */
168
288
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
169
289
  const size_t h = ZSTD_hashPtr(ip, hlog, mls);
170
- const U32 matchIndex = hashTable[h];
171
- const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
172
- const BYTE* match = matchBase + matchIndex;
173
- const U32 current = (U32)(ip-base);
174
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
175
- const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
176
- const BYTE* repMatch = repBase + repIndex;
290
+ const U32 matchIndex = hashTable[h];
291
+ const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
292
+ const BYTE* match = matchBase + matchIndex;
293
+ const U32 current = (U32)(ip-base);
294
+ const U32 repIndex = current + 1 - offset_1;
295
+ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
296
+ const BYTE* const repMatch = repBase + repIndex;
177
297
  size_t mLength;
178
298
  hashTable[h] = current; /* update hash table */
299
+ assert(offset_1 <= current +1); /* check repIndex */
179
300
 
180
- if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
301
+ if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
181
302
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
182
- const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
183
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
303
+ const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
304
+ mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
184
305
  ip++;
185
306
  ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
186
307
  } else {
187
- if ( (matchIndex < lowestIndex) ||
308
+ if ( (matchIndex < dictStartIndex) ||
188
309
  (MEM_read32(match) != MEM_read32(ip)) ) {
189
310
  assert(stepSize >= 1);
190
311
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
191
312
  continue;
192
313
  }
193
- { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
194
- const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
314
+ { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
315
+ const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
195
316
  U32 offset;
196
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
317
+ mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
197
318
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
198
319
  offset = current - matchIndex;
199
320
  offset_2 = offset_1;
@@ -213,11 +334,11 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
213
334
  while (ip <= ilimit) {
214
335
  U32 const current2 = (U32)(ip-base);
215
336
  U32 const repIndex2 = current2 - offset_2;
216
- const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
217
- if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
337
+ const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
338
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
218
339
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
219
- const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
220
- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
340
+ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
341
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
221
342
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
222
343
  ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
223
344
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
@@ -20,10 +20,13 @@ extern "C" {
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
22
22
  ZSTD_compressionParameters const* cParams,
23
- void const* end);
23
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm);
24
24
  size_t ZSTD_compressBlock_fast(
25
25
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26
26
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
27
+ size_t ZSTD_compressBlock_fast_dictMatchState(
28
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
27
30
  size_t ZSTD_compressBlock_fast_extDict(
28
31
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
32
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
@@ -62,7 +62,7 @@ void ZSTD_updateDUBT(
62
62
  static void ZSTD_insertDUBT1(
63
63
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
64
64
  U32 current, const BYTE* inputEnd,
65
- U32 nbCompares, U32 btLow, int extDict)
65
+ U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
66
66
  {
67
67
  U32* const bt = ms->chainTable;
68
68
  U32 const btLog = cParams->chainLog - 1;
@@ -92,10 +92,12 @@ static void ZSTD_insertDUBT1(
92
92
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
93
93
  assert(matchIndex < current);
94
94
 
95
- if ( (!extDict)
95
+ if ( (dictMode != ZSTD_extDict)
96
96
  || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
97
97
  || (current < dictLimit) /* both in extDict */) {
98
- const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
98
+ const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
99
+ || (matchIndex+matchLength >= dictLimit)) ?
100
+ base : dictBase;
99
101
  assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
100
102
  || (current < dictLimit) );
101
103
  match = mBase + matchIndex;
@@ -138,12 +140,90 @@ static void ZSTD_insertDUBT1(
138
140
  }
139
141
 
140
142
 
143
+ static size_t ZSTD_DUBT_findBetterDictMatch (
144
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
145
+ const BYTE* const ip, const BYTE* const iend,
146
+ size_t* offsetPtr,
147
+ size_t bestLength,
148
+ U32 nbCompares,
149
+ U32 const mls,
150
+ const ZSTD_dictMode_e dictMode) {
151
+ const ZSTD_matchState_t * const dms = ms->dictMatchState;
152
+ const U32 * const dictHashTable = dms->hashTable;
153
+ U32 const hashLog = cParams->hashLog;
154
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
155
+ U32 dictMatchIndex = dictHashTable[h];
156
+
157
+ const BYTE* const base = ms->window.base;
158
+ const BYTE* const prefixStart = base + ms->window.dictLimit;
159
+ U32 const current = (U32)(ip-base);
160
+ const BYTE* const dictBase = dms->window.base;
161
+ const BYTE* const dictEnd = dms->window.nextSrc;
162
+ U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
163
+ U32 const dictLowLimit = dms->window.lowLimit;
164
+ U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
165
+
166
+ U32* const dictBt = dms->chainTable;
167
+ U32 const btLog = cParams->chainLog - 1;
168
+ U32 const btMask = (1 << btLog) - 1;
169
+ U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
170
+
171
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
172
+ U32 matchEndIdx = current+8+1;
173
+
174
+ (void)dictMode;
175
+ assert(dictMode == ZSTD_dictMatchState);
176
+
177
+ while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
178
+ U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
179
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
180
+ const BYTE* match = dictBase + dictMatchIndex;
181
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
182
+ if (dictMatchIndex+matchLength >= dictHighLimit)
183
+ match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
184
+
185
+ if (matchLength > bestLength) {
186
+ U32 matchIndex = dictMatchIndex + dictIndexDelta;
187
+ if (matchLength > matchEndIdx - matchIndex)
188
+ matchEndIdx = matchIndex + (U32)matchLength;
189
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
190
+ DEBUGLOG(9, "ZSTD_DUBT_findBestDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
191
+ current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
192
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
193
+ }
194
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
195
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
196
+ }
197
+ }
198
+
199
+ if (match[matchLength] < ip[matchLength]) {
200
+ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
201
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
202
+ dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
203
+ } else {
204
+ /* match is larger than current */
205
+ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
206
+ commonLengthLarger = matchLength;
207
+ dictMatchIndex = nextPtr[0];
208
+ }
209
+ }
210
+
211
+ if (bestLength >= MINMATCH) {
212
+ U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
213
+ DEBUGLOG(8, "ZSTD_DUBT_findBestDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
214
+ current, (U32)bestLength, (U32)*offsetPtr, mIndex);
215
+ }
216
+ return bestLength;
217
+
218
+ }
219
+
220
+
141
221
  static size_t ZSTD_DUBT_findBestMatch (
142
222
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
143
223
  const BYTE* const ip, const BYTE* const iend,
144
224
  size_t* offsetPtr,
145
225
  U32 const mls,
146
- U32 const extDict)
226
+ const ZSTD_dictMode_e dictMode)
147
227
  {
148
228
  U32* const hashTable = ms->hashTable;
149
229
  U32 const hashLog = cParams->hashLog;
@@ -196,7 +276,7 @@ static size_t ZSTD_DUBT_findBestMatch (
196
276
  U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
197
277
  U32 const nextCandidateIdx = *nextCandidateIdxPtr;
198
278
  ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
199
- nbCandidates, unsortLimit, extDict);
279
+ nbCandidates, unsortLimit, dictMode);
200
280
  matchIndex = nextCandidateIdx;
201
281
  nbCandidates++;
202
282
  }
@@ -221,7 +301,7 @@ static size_t ZSTD_DUBT_findBestMatch (
221
301
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
222
302
  const BYTE* match;
223
303
 
224
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
304
+ if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
225
305
  match = base + matchIndex;
226
306
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
227
307
  } else {
@@ -259,6 +339,10 @@ static size_t ZSTD_DUBT_findBestMatch (
259
339
 
260
340
  *smallerPtr = *largerPtr = 0;
261
341
 
342
+ if (dictMode == ZSTD_dictMatchState && nbCompares) {
343
+ bestLength = ZSTD_DUBT_findBetterDictMatch(ms, cParams, ip, iend, offsetPtr, bestLength, nbCompares, mls, dictMode);
344
+ }
345
+
262
346
  assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
263
347
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
264
348
  if (bestLength >= MINMATCH) {
@@ -272,16 +356,17 @@ static size_t ZSTD_DUBT_findBestMatch (
272
356
 
273
357
 
274
358
  /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
275
- static size_t ZSTD_BtFindBestMatch (
359
+ FORCE_INLINE_TEMPLATE size_t ZSTD_BtFindBestMatch (
276
360
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
277
361
  const BYTE* const ip, const BYTE* const iLimit,
278
362
  size_t* offsetPtr,
279
- const U32 mls /* template */)
363
+ const U32 mls /* template */,
364
+ const ZSTD_dictMode_e dictMode)
280
365
  {
281
366
  DEBUGLOG(7, "ZSTD_BtFindBestMatch");
282
367
  if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
283
368
  ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
284
- return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
369
+ return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, dictMode);
285
370
  }
286
371
 
287
372
 
@@ -293,29 +378,31 @@ static size_t ZSTD_BtFindBestMatch_selectMLS (
293
378
  switch(cParams->searchLength)
294
379
  {
295
380
  default : /* includes case 3 */
296
- case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
297
- case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
381
+ case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
382
+ case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
298
383
  case 7 :
299
- case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
384
+ case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
300
385
  }
301
386
  }
302
387
 
303
388
 
304
- /** Tree updater, providing best match */
305
- static size_t ZSTD_BtFindBestMatch_extDict (
389
+ static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
306
390
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
307
- const BYTE* const ip, const BYTE* const iLimit,
308
- size_t* offsetPtr,
309
- const U32 mls)
391
+ const BYTE* ip, const BYTE* const iLimit,
392
+ size_t* offsetPtr)
310
393
  {
311
- DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
312
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
313
- ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
314
- return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
394
+ switch(cParams->searchLength)
395
+ {
396
+ default : /* includes case 3 */
397
+ case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
398
+ case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
399
+ case 7 :
400
+ case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
401
+ }
315
402
  }
316
403
 
317
404
 
318
- static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
405
+ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
319
406
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
320
407
  const BYTE* ip, const BYTE* const iLimit,
321
408
  size_t* offsetPtr)
@@ -323,10 +410,10 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
323
410
  switch(cParams->searchLength)
324
411
  {
325
412
  default : /* includes case 3 */
326
- case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
327
- case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
413
+ case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
414
+ case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
328
415
  case 7 :
329
- case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
416
+ case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
330
417
  }
331
418
  }
332
419
 
@@ -376,7 +463,7 @@ size_t ZSTD_HcFindBestMatch_generic (
376
463
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
377
464
  const BYTE* const ip, const BYTE* const iLimit,
378
465
  size_t* offsetPtr,
379
- const U32 mls, const U32 extDict)
466
+ const U32 mls, const ZSTD_dictMode_e dictMode)
380
467
  {
381
468
  U32* const chainTable = ms->chainTable;
382
469
  const U32 chainSize = (1 << cParams->chainLog);
@@ -397,7 +484,7 @@ size_t ZSTD_HcFindBestMatch_generic (
397
484
 
398
485
  for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
399
486
  size_t currentMl=0;
400
- if ((!extDict) || matchIndex >= dictLimit) {
487
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
401
488
  const BYTE* const match = base + matchIndex;
402
489
  if (match[ml] == ip[ml]) /* potentially better */
403
490
  currentMl = ZSTD_count(ip, match, iLimit);
@@ -419,6 +506,37 @@ size_t ZSTD_HcFindBestMatch_generic (
419
506
  matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
420
507
  }
421
508
 
509
+ if (dictMode == ZSTD_dictMatchState) {
510
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
511
+ const U32* const dmsChainTable = dms->chainTable;
512
+ const U32 dmsLowestIndex = dms->window.dictLimit;
513
+ const BYTE* const dmsBase = dms->window.base;
514
+ const BYTE* const dmsEnd = dms->window.nextSrc;
515
+ const U32 dmsSize = (U32)(dmsEnd - dmsBase);
516
+ const U32 dmsIndexDelta = dictLimit - dmsSize;
517
+ const U32 dmsMinChain = dmsSize > chainSize ? dmsSize - chainSize : 0;
518
+
519
+ matchIndex = dms->hashTable[ZSTD_hashPtr(ip, cParams->hashLog, mls)];
520
+
521
+ for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
522
+ size_t currentMl=0;
523
+ const BYTE* const match = dmsBase + matchIndex;
524
+ assert(match+4 <= dmsEnd);
525
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
526
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
527
+
528
+ /* save best solution */
529
+ if (currentMl > ml) {
530
+ ml = currentMl;
531
+ *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
532
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
533
+ }
534
+
535
+ if (matchIndex <= dmsMinChain) break;
536
+ matchIndex = dmsChainTable[matchIndex & chainMask];
537
+ }
538
+ }
539
+
422
540
  return ml;
423
541
  }
424
542
 
@@ -431,10 +549,26 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
431
549
  switch(cParams->searchLength)
432
550
  {
433
551
  default : /* includes case 3 */
434
- case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
435
- case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
552
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
553
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
436
554
  case 7 :
437
- case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
555
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
556
+ }
557
+ }
558
+
559
+
560
+ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
561
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
562
+ const BYTE* ip, const BYTE* const iLimit,
563
+ size_t* offsetPtr)
564
+ {
565
+ switch(cParams->searchLength)
566
+ {
567
+ default : /* includes case 3 */
568
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
569
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
570
+ case 7 :
571
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
438
572
  }
439
573
  }
440
574
 
@@ -442,15 +576,15 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
442
576
  FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
443
577
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
444
578
  const BYTE* ip, const BYTE* const iLimit,
445
- size_t* const offsetPtr)
579
+ size_t* offsetPtr)
446
580
  {
447
581
  switch(cParams->searchLength)
448
582
  {
449
583
  default : /* includes case 3 */
450
- case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
451
- case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
584
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
585
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
452
586
  case 7 :
453
- case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
587
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
454
588
  }
455
589
  }
456
590
 
@@ -464,28 +598,56 @@ size_t ZSTD_compressBlock_lazy_generic(
464
598
  U32 rep[ZSTD_REP_NUM],
465
599
  ZSTD_compressionParameters const* cParams,
466
600
  const void* src, size_t srcSize,
467
- const U32 searchMethod, const U32 depth)
601
+ const U32 searchMethod, const U32 depth,
602
+ ZSTD_dictMode_e const dictMode)
468
603
  {
469
604
  const BYTE* const istart = (const BYTE*)src;
470
605
  const BYTE* ip = istart;
471
606
  const BYTE* anchor = istart;
472
607
  const BYTE* const iend = istart + srcSize;
473
608
  const BYTE* const ilimit = iend - 8;
474
- const BYTE* const base = ms->window.base + ms->window.dictLimit;
609
+ const BYTE* const base = ms->window.base;
610
+ const U32 prefixLowestIndex = ms->window.dictLimit;
611
+ const BYTE* const prefixLowest = base + prefixLowestIndex;
475
612
 
476
613
  typedef size_t (*searchMax_f)(
477
614
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
478
615
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
479
- searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
616
+ searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
617
+ (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
618
+ (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
480
619
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
481
620
 
621
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
622
+ const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
623
+ dms->window.dictLimit : 0;
624
+ const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
625
+ dms->window.base : NULL;
626
+ const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
627
+ dictBase + dictLowestIndex : NULL;
628
+ const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
629
+ dms->window.nextSrc : NULL;
630
+ const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
631
+ prefixLowestIndex - (U32)(dictEnd - dictBase) :
632
+ 0;
633
+ const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
634
+
635
+ (void)dictMode;
636
+
482
637
  /* init */
483
- ip += (ip==base);
638
+ ip += (dictAndPrefixLength == 0);
484
639
  ms->nextToUpdate3 = ms->nextToUpdate;
485
- { U32 const maxRep = (U32)(ip-base);
640
+ if (dictMode == ZSTD_noDict) {
641
+ U32 const maxRep = (U32)(ip - prefixLowest);
486
642
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
487
643
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
488
644
  }
645
+ if (dictMode == ZSTD_dictMatchState) {
646
+ /* dictMatchState repCode checks don't currently handle repCode == 0
647
+ * disabling. */
648
+ assert(offset_1 <= dictAndPrefixLength);
649
+ assert(offset_2 <= dictAndPrefixLength);
650
+ }
489
651
 
490
652
  /* Match Loop */
491
653
  while (ip < ilimit) {
@@ -494,8 +656,21 @@ size_t ZSTD_compressBlock_lazy_generic(
494
656
  const BYTE* start=ip+1;
495
657
 
496
658
  /* check repCode */
497
- if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
498
- /* repcode : we take it */
659
+ if (dictMode == ZSTD_dictMatchState) {
660
+ const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
661
+ const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
662
+ && repIndex < prefixLowestIndex) ?
663
+ dictBase + (repIndex - dictIndexDelta) :
664
+ base + repIndex;
665
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
666
+ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
667
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
668
+ matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
669
+ if (depth==0) goto _storeSequence;
670
+ }
671
+ }
672
+ if ( dictMode == ZSTD_noDict
673
+ && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
499
674
  matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
500
675
  if (depth==0) goto _storeSequence;
501
676
  }
@@ -516,13 +691,29 @@ size_t ZSTD_compressBlock_lazy_generic(
516
691
  if (depth>=1)
517
692
  while (ip<ilimit) {
518
693
  ip ++;
519
- if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
694
+ if ( (dictMode == ZSTD_noDict)
695
+ && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
520
696
  size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
521
697
  int const gain2 = (int)(mlRep * 3);
522
698
  int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
523
699
  if ((mlRep >= 4) && (gain2 > gain1))
524
700
  matchLength = mlRep, offset = 0, start = ip;
525
701
  }
702
+ if (dictMode == ZSTD_dictMatchState) {
703
+ const U32 repIndex = (U32)(ip - base) - offset_1;
704
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
705
+ dictBase + (repIndex - dictIndexDelta) :
706
+ base + repIndex;
707
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
708
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
709
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
710
+ size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
711
+ int const gain2 = (int)(mlRep * 3);
712
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
713
+ if ((mlRep >= 4) && (gain2 > gain1))
714
+ matchLength = mlRep, offset = 0, start = ip;
715
+ }
716
+ }
526
717
  { size_t offset2=99999999;
527
718
  size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
528
719
  int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
@@ -535,12 +726,28 @@ size_t ZSTD_compressBlock_lazy_generic(
535
726
  /* let's find an even better one */
536
727
  if ((depth==2) && (ip<ilimit)) {
537
728
  ip ++;
538
- if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
539
- size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
540
- int const gain2 = (int)(ml2 * 4);
729
+ if ( (dictMode == ZSTD_noDict)
730
+ && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
731
+ size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
732
+ int const gain2 = (int)(mlRep * 4);
541
733
  int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
542
- if ((ml2 >= 4) && (gain2 > gain1))
543
- matchLength = ml2, offset = 0, start = ip;
734
+ if ((mlRep >= 4) && (gain2 > gain1))
735
+ matchLength = mlRep, offset = 0, start = ip;
736
+ }
737
+ if (dictMode == ZSTD_dictMatchState) {
738
+ const U32 repIndex = (U32)(ip - base) - offset_1;
739
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
740
+ dictBase + (repIndex - dictIndexDelta) :
741
+ base + repIndex;
742
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
743
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
744
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
745
+ size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
746
+ int const gain2 = (int)(mlRep * 4);
747
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
748
+ if ((mlRep >= 4) && (gain2 > gain1))
749
+ matchLength = mlRep, offset = 0, start = ip;
750
+ }
544
751
  }
545
752
  { size_t offset2=99999999;
546
753
  size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
@@ -560,9 +767,17 @@ size_t ZSTD_compressBlock_lazy_generic(
560
767
  */
561
768
  /* catch up */
562
769
  if (offset) {
563
- while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
564
- && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
565
- { start--; matchLength++; }
770
+ if (dictMode == ZSTD_noDict) {
771
+ while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
772
+ && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
773
+ { start--; matchLength++; }
774
+ }
775
+ if (dictMode == ZSTD_dictMatchState) {
776
+ U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
777
+ const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
778
+ const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
779
+ while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
780
+ }
566
781
  offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
567
782
  }
568
783
  /* store sequence */
@@ -573,16 +788,39 @@ _storeSequence:
573
788
  }
574
789
 
575
790
  /* check immediate repcode */
576
- while ( ((ip <= ilimit) & (offset_2>0))
577
- && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
578
- /* store sequence */
579
- matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
580
- offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
581
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
582
- ip += matchLength;
583
- anchor = ip;
584
- continue; /* faster when present ... (?) */
585
- } }
791
+ if (dictMode == ZSTD_dictMatchState) {
792
+ while (ip <= ilimit) {
793
+ U32 const current2 = (U32)(ip-base);
794
+ U32 const repIndex = current2 - offset_2;
795
+ const BYTE* repMatch = dictMode == ZSTD_dictMatchState
796
+ && repIndex < prefixLowestIndex ?
797
+ dictBase - dictIndexDelta + repIndex :
798
+ base + repIndex;
799
+ if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
800
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
801
+ const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
802
+ matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
803
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
804
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
805
+ ip += matchLength;
806
+ anchor = ip;
807
+ continue;
808
+ }
809
+ break;
810
+ }
811
+ }
812
+
813
+ if (dictMode == ZSTD_noDict) {
814
+ while ( ((ip <= ilimit) & (offset_2>0))
815
+ && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
816
+ /* store sequence */
817
+ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
818
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
819
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
820
+ ip += matchLength;
821
+ anchor = ip;
822
+ continue; /* faster when present ... (?) */
823
+ } } }
586
824
 
587
825
  /* Save reps for next block */
588
826
  rep[0] = offset_1 ? offset_1 : savedOffset;
@@ -597,28 +835,56 @@ size_t ZSTD_compressBlock_btlazy2(
597
835
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
598
836
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
599
837
  {
600
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
838
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2, ZSTD_noDict);
601
839
  }
602
840
 
603
841
  size_t ZSTD_compressBlock_lazy2(
604
842
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
605
843
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
606
844
  {
607
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
845
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2, ZSTD_noDict);
608
846
  }
609
847
 
610
848
  size_t ZSTD_compressBlock_lazy(
611
849
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
612
850
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
613
851
  {
614
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
852
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1, ZSTD_noDict);
615
853
  }
616
854
 
617
855
  size_t ZSTD_compressBlock_greedy(
618
856
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
619
857
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
620
858
  {
621
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
859
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0, ZSTD_noDict);
860
+ }
861
+
862
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
863
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
864
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
865
+ {
866
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2, ZSTD_dictMatchState);
867
+ }
868
+
869
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
870
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
871
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
872
+ {
873
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2, ZSTD_dictMatchState);
874
+ }
875
+
876
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
877
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
878
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
879
+ {
880
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1, ZSTD_dictMatchState);
881
+ }
882
+
883
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
884
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
885
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
886
+ {
887
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0, ZSTD_dictMatchState);
622
888
  }
623
889
 
624
890
 
@@ -646,7 +912,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
646
912
  typedef size_t (*searchMax_f)(
647
913
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
648
914
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
649
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
915
+ searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
650
916
 
651
917
  U32 offset_1 = rep[0], offset_2 = rep[1];
652
918