zstd-ruby 1.4.0.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -31,20 +31,19 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
31
  * is empty.
32
32
  */
33
33
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
34
+ U32 const curr = (U32)(ip - base);
35
35
  U32 i;
36
36
  for (i = 0; i < fastHashFillStep; ++i) {
37
37
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
38
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
39
  if (i == 0)
40
- hashSmall[smHash] = current + i;
40
+ hashSmall[smHash] = curr + i;
41
41
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
42
+ hashLarge[lgHash] = curr + i;
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
46
- }
47
- }
46
+ } }
48
47
  }
49
48
 
50
49
 
@@ -63,7 +62,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
62
  const BYTE* const istart = (const BYTE*)src;
64
63
  const BYTE* ip = istart;
65
64
  const BYTE* anchor = istart;
66
- const U32 prefixLowestIndex = ms->window.dictLimit;
65
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
67
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
68
  const BYTE* const prefixLowest = base + prefixLowestIndex;
68
69
  const BYTE* const iend = istart + srcSize;
69
70
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -93,14 +94,23 @@ size_t ZSTD_compressBlock_doubleFast_generic(
93
94
  dictCParams->hashLog : hBitsL;
94
95
  const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
95
96
  dictCParams->chainLog : hBitsS;
96
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
98
+
99
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
97
100
 
98
101
  assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
99
102
 
103
+ /* if a dictionary is attached, it must be within window range */
104
+ if (dictMode == ZSTD_dictMatchState) {
105
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
106
+ }
107
+
100
108
  /* init */
101
109
  ip += (dictAndPrefixLength == 0);
102
110
  if (dictMode == ZSTD_noDict) {
103
- U32 const maxRep = (U32)(ip - prefixLowest);
111
+ U32 const curr = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
113
+ U32 const maxRep = curr - windowLow;
104
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
105
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
106
116
  }
@@ -119,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
119
129
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
120
130
  size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
121
131
  size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
122
- U32 const current = (U32)(ip-base);
132
+ U32 const curr = (U32)(ip-base);
123
133
  U32 const matchIndexL = hashLong[h2];
124
134
  U32 matchIndexS = hashSmall[h];
125
135
  const BYTE* matchLong = base + matchIndexL;
126
136
  const BYTE* match = base + matchIndexS;
127
- const U32 repIndex = current + 1 - offset_1;
137
+ const U32 repIndex = curr + 1 - offset_1;
128
138
  const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
129
139
  && repIndex < prefixLowestIndex) ?
130
140
  dictBase + (repIndex - dictIndexDelta) :
131
141
  base + repIndex;
132
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
142
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
133
143
 
134
144
  /* check dictMatchState repcode */
135
145
  if (dictMode == ZSTD_dictMatchState
@@ -138,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
138
148
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
139
149
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
140
150
  ip++;
141
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
151
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
142
152
  goto _match_stored;
143
153
  }
144
154
 
@@ -147,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
147
157
  && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
148
158
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
149
159
  ip++;
150
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
160
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
151
161
  goto _match_stored;
152
162
  }
153
163
 
@@ -167,11 +177,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
167
177
 
168
178
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
169
179
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
170
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
180
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
171
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
172
182
  goto _match_found;
173
- }
174
- }
183
+ } }
175
184
 
176
185
  if (matchIndexS > prefixLowestIndex) {
177
186
  /* check prefix short match */
@@ -186,20 +195,21 @@ size_t ZSTD_compressBlock_doubleFast_generic(
186
195
 
187
196
  if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
188
197
  goto _search_next_long;
189
- }
190
- }
198
+ } }
191
199
 
192
200
  ip += ((ip-anchor) >> kSearchStrength) + 1;
201
+ #if defined(__aarch64__)
202
+ PREFETCH_L1(ip+256);
203
+ #endif
193
204
  continue;
194
205
 
195
206
  _search_next_long:
196
207
 
197
- {
198
- size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
208
+ { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
199
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
200
210
  U32 const matchIndexL3 = hashLong[hl3];
201
211
  const BYTE* matchL3 = base + matchIndexL3;
202
- hashLong[hl3] = current + 1;
212
+ hashLong[hl3] = curr + 1;
203
213
 
204
214
  /* check prefix long +1 match */
205
215
  if (matchIndexL3 > prefixLowestIndex) {
@@ -218,17 +228,15 @@ _search_next_long:
218
228
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
219
229
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
220
230
  ip++;
221
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
231
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
222
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
223
233
  goto _match_found;
224
- }
225
- }
226
- }
234
+ } } }
227
235
 
228
236
  /* if no long +1 match, explore the short match we found */
229
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
230
238
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
231
- offset = (U32)(current - matchIndexS);
239
+ offset = (U32)(curr - matchIndexS);
232
240
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
233
241
  } else {
234
242
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -242,7 +250,7 @@ _match_found:
242
250
  offset_2 = offset_1;
243
251
  offset_1 = offset;
244
252
 
245
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
253
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
246
254
 
247
255
  _match_stored:
248
256
  /* match found */
@@ -250,11 +258,14 @@ _match_stored:
250
258
  anchor = ip;
251
259
 
252
260
  if (ip <= ilimit) {
253
- /* Fill Table */
254
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
255
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
256
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
257
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
261
+ /* Complementary insertion */
262
+ /* done after iLimit test, as candidates could be > iend-8 */
263
+ { U32 const indexToInsert = curr+2;
264
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
265
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
266
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
267
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
268
+ }
258
269
 
259
270
  /* check immediate repcode */
260
271
  if (dictMode == ZSTD_dictMatchState) {
@@ -263,14 +274,14 @@ _match_stored:
263
274
  U32 const repIndex2 = current2 - offset_2;
264
275
  const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
265
276
  && repIndex2 < prefixLowestIndex ?
266
- dictBase - dictIndexDelta + repIndex2 :
277
+ dictBase + repIndex2 - dictIndexDelta :
267
278
  base + repIndex2;
268
279
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
269
280
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
270
281
  const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
271
282
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
272
283
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
273
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
284
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
274
285
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
275
286
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
276
287
  ip += repLength2;
@@ -278,8 +289,7 @@ _match_stored:
278
289
  continue;
279
290
  }
280
291
  break;
281
- }
282
- }
292
+ } }
283
293
 
284
294
  if (dictMode == ZSTD_noDict) {
285
295
  while ( (ip <= ilimit)
@@ -290,18 +300,19 @@ _match_stored:
290
300
  U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
291
301
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
292
302
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
293
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
303
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
294
304
  ip += rLength;
295
305
  anchor = ip;
296
306
  continue; /* faster when present ... (?) */
297
- } } } }
307
+ } } }
308
+ } /* while (ip < ilimit) */
298
309
 
299
310
  /* save reps for next block */
300
311
  rep[0] = offset_1 ? offset_1 : offsetSaved;
301
312
  rep[1] = offset_2 ? offset_2 : offsetSaved;
302
313
 
303
314
  /* Return the last literals size */
304
- return iend - anchor;
315
+ return (size_t)(iend - anchor);
305
316
  }
306
317
 
307
318
 
@@ -360,10 +371,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
360
371
  const BYTE* anchor = istart;
361
372
  const BYTE* const iend = istart + srcSize;
362
373
  const BYTE* const ilimit = iend - 8;
363
- const U32 prefixStartIndex = ms->window.dictLimit;
364
374
  const BYTE* const base = ms->window.base;
375
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
376
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
377
+ const U32 dictStartIndex = lowLimit;
378
+ const U32 dictLimit = ms->window.dictLimit;
379
+ const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
365
380
  const BYTE* const prefixStart = base + prefixStartIndex;
366
- const U32 dictStartIndex = ms->window.lowLimit;
367
381
  const BYTE* const dictBase = ms->window.dictBase;
368
382
  const BYTE* const dictStart = dictBase + dictStartIndex;
369
383
  const BYTE* const dictEnd = dictBase + prefixStartIndex;
@@ -371,6 +385,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
371
385
 
372
386
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
373
387
 
388
+ /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
389
+ if (prefixStartIndex == dictStartIndex)
390
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
391
+
374
392
  /* Search Loop */
375
393
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
376
394
  const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@@ -383,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
383
401
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
384
402
  const BYTE* matchLong = matchLongBase + matchLongIndex;
385
403
 
386
- const U32 current = (U32)(ip-base);
387
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
404
+ const U32 curr = (U32)(ip-base);
405
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
388
406
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
389
407
  const BYTE* const repMatch = repBase + repIndex;
390
408
  size_t mLength;
391
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
409
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
392
410
 
393
411
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
394
412
  & (repIndex > dictStartIndex))
@@ -396,18 +414,18 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
396
414
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
397
415
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
398
416
  ip++;
399
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
417
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
400
418
  } else {
401
419
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
402
420
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
403
421
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
404
422
  U32 offset;
405
423
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
406
- offset = current - matchLongIndex;
424
+ offset = curr - matchLongIndex;
407
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
408
426
  offset_2 = offset_1;
409
427
  offset_1 = offset;
410
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
428
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
411
429
 
412
430
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
413
431
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -415,40 +433,44 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
415
433
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
416
434
  const BYTE* match3 = match3Base + matchIndex3;
417
435
  U32 offset;
418
- hashLong[h3] = current + 1;
436
+ hashLong[h3] = curr + 1;
419
437
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
420
438
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
421
439
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
422
440
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
423
441
  ip++;
424
- offset = current+1 - matchIndex3;
442
+ offset = curr+1 - matchIndex3;
425
443
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
426
444
  } else {
427
445
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
428
446
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
429
447
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
430
- offset = current - matchIndex;
448
+ offset = curr - matchIndex;
431
449
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
432
450
  }
433
451
  offset_2 = offset_1;
434
452
  offset_1 = offset;
435
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
453
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
436
454
 
437
455
  } else {
438
456
  ip += ((ip-anchor) >> kSearchStrength) + 1;
439
457
  continue;
440
458
  } }
441
459
 
442
- /* found a match : store it */
460
+ /* move to next sequence start */
443
461
  ip += mLength;
444
462
  anchor = ip;
445
463
 
446
464
  if (ip <= ilimit) {
447
- /* Fill Table */
448
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
449
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
450
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
451
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
465
+ /* Complementary insertion */
466
+ /* done after iLimit test, as candidates could be > iend-8 */
467
+ { U32 const indexToInsert = curr+2;
468
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
469
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
470
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
471
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
472
+ }
473
+
452
474
  /* check immediate repcode */
453
475
  while (ip <= ilimit) {
454
476
  U32 const current2 = (U32)(ip-base);
@@ -460,7 +482,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
460
482
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
461
483
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
462
484
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
463
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
485
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
464
486
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
465
487
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
466
488
  ip += repLength2;
@@ -475,7 +497,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
475
497
  rep[1] = offset_2;
476
498
 
477
499
  /* Return the last literals size */
478
- return iend - anchor;
500
+ return (size_t)(iend - anchor);
479
501
  }
480
502
 
481
503
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,12 +8,13 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
- #include "zstd_compress_internal.h"
11
+ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12
12
  #include "zstd_fast.h"
13
13
 
14
14
 
15
15
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16
- void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16
+ const void* const end,
17
+ ZSTD_dictTableLoadMethod_e dtlm)
17
18
  {
18
19
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
19
20
  U32* const hashTable = ms->hashTable;
@@ -28,21 +29,22 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
28
29
  * Insert the other positions if their hash entry is empty.
29
30
  */
30
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
31
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
32
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
33
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
34
35
  if (dtlm == ZSTD_dtlm_fast) continue;
35
36
  /* Only load extra positions for ZSTD_dtlm_full */
36
37
  { U32 p;
37
38
  for (p = 1; p < fastHashFillStep; ++p) {
38
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
39
40
  if (hashTable[hash] == 0) { /* not yet filled */
40
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
41
42
  } } } }
42
43
  }
43
44
 
44
- FORCE_INLINE_TEMPLATE
45
- size_t ZSTD_compressBlock_fast_generic(
45
+
46
+ FORCE_INLINE_TEMPLATE size_t
47
+ ZSTD_compressBlock_fast_generic(
46
48
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
47
49
  void const* src, size_t srcSize,
48
50
  U32 const mls)
@@ -58,7 +60,8 @@ size_t ZSTD_compressBlock_fast_generic(
58
60
  const BYTE* ip0 = istart;
59
61
  const BYTE* ip1;
60
62
  const BYTE* anchor = istart;
61
- const U32 prefixStartIndex = ms->window.dictLimit;
63
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
62
65
  const BYTE* const prefixStart = base + prefixStartIndex;
63
66
  const BYTE* const iend = istart + srcSize;
64
67
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -66,15 +69,24 @@ size_t ZSTD_compressBlock_fast_generic(
66
69
  U32 offsetSaved = 0;
67
70
 
68
71
  /* init */
72
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
69
73
  ip0 += (ip0 == prefixStart);
70
74
  ip1 = ip0 + 1;
71
- {
72
- U32 const maxRep = (U32)(ip0 - prefixStart);
75
+ { U32 const curr = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
77
+ U32 const maxRep = curr - windowLow;
73
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
74
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
75
80
  }
76
81
 
77
82
  /* Main Search Loop */
83
+ #ifdef __INTEL_COMPILER
84
+ /* From intel 'The vector pragma indicates that the loop should be
85
+ * vectorized if it is legal to do so'. Can be used together with
86
+ * #pragma ivdep (but have opted to exclude that because intel
87
+ * warns against using it).*/
88
+ #pragma vector always
89
+ #endif
78
90
  while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
79
91
  size_t mLength;
80
92
  BYTE const* ip2 = ip0 + 2;
@@ -86,19 +98,25 @@ size_t ZSTD_compressBlock_fast_generic(
86
98
  U32 const current1 = (U32)(ip1-base);
87
99
  U32 const matchIndex0 = hashTable[h0];
88
100
  U32 const matchIndex1 = hashTable[h1];
89
- BYTE const* repMatch = ip2-offset_1;
101
+ BYTE const* repMatch = ip2 - offset_1;
90
102
  const BYTE* match0 = base + matchIndex0;
91
103
  const BYTE* match1 = base + matchIndex1;
92
104
  U32 offcode;
105
+
106
+ #if defined(__aarch64__)
107
+ PREFETCH_L1(ip0+256);
108
+ #endif
109
+
93
110
  hashTable[h0] = current0; /* update hash table */
94
111
  hashTable[h1] = current1; /* update hash table */
95
112
 
96
113
  assert(ip0 + 1 == ip1);
97
114
 
98
115
  if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
99
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
116
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
100
117
  ip0 = ip2 - mLength;
101
118
  match0 = repMatch - mLength;
119
+ mLength += 4;
102
120
  offcode = 0;
103
121
  goto _match;
104
122
  }
@@ -112,8 +130,7 @@ size_t ZSTD_compressBlock_fast_generic(
112
130
  match0 = match1;
113
131
  goto _offset;
114
132
  }
115
- {
116
- size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
133
+ { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
117
134
  assert(step >= 2);
118
135
  ip0 += step;
119
136
  ip1 += step;
@@ -124,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
124
141
  offset_2 = offset_1;
125
142
  offset_1 = (U32)(ip0-match0);
126
143
  offcode = offset_1 + ZSTD_REP_MOVE;
127
- mLength = 0;
144
+ mLength = 4;
128
145
  /* Count the backwards match length */
129
146
  while (((ip0>anchor) & (match0>prefixStart))
130
147
  && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
131
148
 
132
149
  _match: /* Requires: ip0, match0, offcode */
133
150
  /* Count the forward length */
134
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
135
- ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
151
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
152
+ ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
136
153
  /* match found */
137
154
  ip0 += mLength;
138
155
  anchor = ip0;
139
- ip1 = ip0 + 1;
140
156
 
141
157
  if (ip0 <= ilimit) {
142
158
  /* Fill Table */
@@ -144,20 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
144
160
  hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
145
161
  hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
146
162
 
147
- while ( (ip0 <= ilimit)
148
- && ( (offset_2>0)
149
- & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
150
- /* store sequence */
151
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
152
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
153
- hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
154
- ip0 += rLength;
155
- ip1 = ip0 + 1;
156
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
157
- anchor = ip0;
158
- continue; /* faster when present (confirmed on gcc-8) ... (?) */
159
- }
160
- }
163
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165
+ /* store sequence */
166
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169
+ ip0 += rLength;
170
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171
+ anchor = ip0;
172
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
173
+ } } }
174
+ ip1 = ip0 + 1;
161
175
  }
162
176
 
163
177
  /* save reps for next block */
@@ -165,7 +179,7 @@ _match: /* Requires: ip0, match0, offcode */
165
179
  rep[1] = offset_2 ? offset_2 : offsetSaved;
166
180
 
167
181
  /* Return the last literals size */
168
- return iend - anchor;
182
+ return (size_t)(iend - anchor);
169
183
  }
170
184
 
171
185
 
@@ -173,8 +187,7 @@ size_t ZSTD_compressBlock_fast(
173
187
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
174
188
  void const* src, size_t srcSize)
175
189
  {
176
- ZSTD_compressionParameters const* cParams = &ms->cParams;
177
- U32 const mls = cParams->minMatch;
190
+ U32 const mls = ms->cParams.minMatch;
178
191
  assert(ms->dictMatchState == NULL);
179
192
  switch(mls)
180
193
  {
@@ -222,11 +235,19 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
222
235
  const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
223
236
  const U32 dictHLog = dictCParams->hashLog;
224
237
 
225
- /* otherwise, we would get index underflow when translating a dict index
226
- * into a local index */
238
+ /* if a dictionary is still attached, it necessarily means that
239
+ * it is within window size. So we just check it. */
240
+ const U32 maxDistance = 1U << cParams->windowLog;
241
+ const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
242
+ assert(endIndex - prefixStartIndex <= maxDistance);
243
+ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
244
+
245
+ /* ensure there will be no underflow
246
+ * when translating a dict index into a local index */
227
247
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
228
248
 
229
249
  /* init */
250
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
230
251
  ip += (dictAndPrefixLength == 0);
231
252
  /* dictMatchState repCode checks don't currently handle repCode == 0
232
253
  * disabling. */
@@ -237,21 +258,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
237
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
238
259
  size_t mLength;
239
260
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
240
- U32 const current = (U32)(ip-base);
261
+ U32 const curr = (U32)(ip-base);
241
262
  U32 const matchIndex = hashTable[h];
242
263
  const BYTE* match = base + matchIndex;
243
- const U32 repIndex = current + 1 - offset_1;
264
+ const U32 repIndex = curr + 1 - offset_1;
244
265
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
245
266
  dictBase + (repIndex - dictIndexDelta) :
246
267
  base + repIndex;
247
- hashTable[h] = current; /* update hash table */
268
+ hashTable[h] = curr; /* update hash table */
248
269
 
249
270
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
250
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
251
272
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
252
273
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
253
274
  ip++;
254
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
275
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
255
276
  } else if ( (matchIndex <= prefixStartIndex) ) {
256
277
  size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
257
278
  U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -263,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
263
284
  continue;
264
285
  } else {
265
286
  /* found a dict match */
266
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
287
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
267
288
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
268
289
  while (((ip>anchor) & (dictMatch>dictStart))
269
290
  && (ip[-1] == dictMatch[-1])) {
@@ -271,7 +292,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
271
292
  } /* catch up */
272
293
  offset_2 = offset_1;
273
294
  offset_1 = offset;
274
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
295
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
275
296
  }
276
297
  } else if (MEM_read32(match) != MEM_read32(ip)) {
277
298
  /* it's not a match, and we're not going to check the dictionary */
@@ -286,7 +307,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
286
307
  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
287
308
  offset_2 = offset_1;
288
309
  offset_1 = offset;
289
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
310
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
290
311
  }
291
312
 
292
313
  /* match found */
@@ -295,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
295
316
 
296
317
  if (ip <= ilimit) {
297
318
  /* Fill Table */
298
- assert(base+current+2 > istart); /* check base overflow */
299
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
319
+ assert(base+curr+2 > istart); /* check base overflow */
320
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
300
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
301
322
 
302
323
  /* check immediate repcode */
@@ -311,7 +332,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
311
332
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
312
333
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
313
334
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
314
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
335
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
315
336
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
316
337
  ip += repLength2;
317
338
  anchor = ip;
@@ -327,15 +348,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
327
348
  rep[1] = offset_2 ? offset_2 : offsetSaved;
328
349
 
329
350
  /* Return the last literals size */
330
- return iend - anchor;
351
+ return (size_t)(iend - anchor);
331
352
  }
332
353
 
333
354
  size_t ZSTD_compressBlock_fast_dictMatchState(
334
355
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
335
356
  void const* src, size_t srcSize)
336
357
  {
337
- ZSTD_compressionParameters const* cParams = &ms->cParams;
338
- U32 const mls = cParams->minMatch;
358
+ U32 const mls = ms->cParams.minMatch;
339
359
  assert(ms->dictMatchState != NULL);
340
360
  switch(mls)
341
361
  {
@@ -366,35 +386,46 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
366
386
  const BYTE* const istart = (const BYTE*)src;
367
387
  const BYTE* ip = istart;
368
388
  const BYTE* anchor = istart;
369
- const U32 dictStartIndex = ms->window.lowLimit;
389
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
390
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
391
+ const U32 dictStartIndex = lowLimit;
370
392
  const BYTE* const dictStart = dictBase + dictStartIndex;
371
- const U32 prefixStartIndex = ms->window.dictLimit;
393
+ const U32 dictLimit = ms->window.dictLimit;
394
+ const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
372
395
  const BYTE* const prefixStart = base + prefixStartIndex;
373
396
  const BYTE* const dictEnd = dictBase + prefixStartIndex;
374
397
  const BYTE* const iend = istart + srcSize;
375
398
  const BYTE* const ilimit = iend - 8;
376
399
  U32 offset_1=rep[0], offset_2=rep[1];
377
400
 
401
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
402
+
403
+ /* switch to "regular" variant if extDict is invalidated due to maxDistance */
404
+ if (prefixStartIndex == dictStartIndex)
405
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
406
+
378
407
  /* Search Loop */
379
408
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
380
409
  const size_t h = ZSTD_hashPtr(ip, hlog, mls);
381
410
  const U32 matchIndex = hashTable[h];
382
411
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
383
412
  const BYTE* match = matchBase + matchIndex;
384
- const U32 current = (U32)(ip-base);
385
- const U32 repIndex = current + 1 - offset_1;
413
+ const U32 curr = (U32)(ip-base);
414
+ const U32 repIndex = curr + 1 - offset_1;
386
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
387
416
  const BYTE* const repMatch = repBase + repIndex;
388
- size_t mLength;
389
- hashTable[h] = current; /* update hash table */
390
- assert(offset_1 <= current +1); /* check repIndex */
417
+ hashTable[h] = curr; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
419
+ assert(offset_1 <= curr +1); /* check repIndex */
391
420
 
392
421
  if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
393
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
394
- const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
395
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
423
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
424
+ size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
396
425
  ip++;
397
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
426
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
427
+ ip += rLength;
428
+ anchor = ip;
398
429
  } else {
399
430
  if ( (matchIndex < dictStartIndex) ||
400
431
  (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -402,36 +433,32 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
402
433
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
403
434
  continue;
404
435
  }
405
- { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
406
- const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
407
- U32 offset;
408
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
436
+ { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
+ const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
+ U32 const offset = curr - matchIndex;
439
+ size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
409
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
410
- offset = current - matchIndex;
411
- offset_2 = offset_1;
412
- offset_1 = offset;
413
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
441
+ offset_2 = offset_1; offset_1 = offset; /* update offset history */
442
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
443
+ ip += mLength;
444
+ anchor = ip;
414
445
  } }
415
446
 
416
- /* found a match : store it */
417
- ip += mLength;
418
- anchor = ip;
419
-
420
447
  if (ip <= ilimit) {
421
448
  /* Fill Table */
422
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
449
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
423
450
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
424
451
  /* check immediate repcode */
425
452
  while (ip <= ilimit) {
426
453
  U32 const current2 = (U32)(ip-base);
427
454
  U32 const repIndex2 = current2 - offset_2;
428
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
455
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
429
456
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
430
457
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
431
458
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
432
459
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
433
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
434
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
460
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
461
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
435
462
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
436
463
  ip += repLength2;
437
464
  anchor = ip;
@@ -445,7 +472,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
445
472
  rep[1] = offset_2;
446
473
 
447
474
  /* Return the last literals size */
448
- return iend - anchor;
475
+ return (size_t)(iend - anchor);
449
476
  }
450
477
 
451
478
 
@@ -453,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
453
480
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
454
481
  void const* src, size_t srcSize)
455
482
  {
456
- ZSTD_compressionParameters const* cParams = &ms->cParams;
457
- U32 const mls = cParams->minMatch;
483
+ U32 const mls = ms->cParams.minMatch;
458
484
  switch(mls)
459
485
  {
460
486
  default: /* includes case 3 */