zstd-ruby 1.4.1.0 → 1.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/libzstd/BUCK +5 -7
  6. data/ext/zstdruby/libzstd/Makefile +304 -113
  7. data/ext/zstdruby/libzstd/README.md +83 -20
  8. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  9. data/ext/zstdruby/libzstd/common/compiler.h +150 -8
  10. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  11. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  12. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  13. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  14. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  15. data/ext/zstdruby/libzstd/common/error_private.h +8 -4
  16. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  17. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
  18. data/ext/zstdruby/libzstd/common/huf.h +43 -39
  19. data/ext/zstdruby/libzstd/common/mem.h +69 -25
  20. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  21. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  22. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  23. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  24. data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
  25. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  26. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  27. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  28. data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  90. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  91. data/ext/zstdruby/libzstd/zstd.h +740 -153
  92. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  93. data/lib/zstd-ruby/version.rb +1 -1
  94. data/zstd-ruby.gemspec +1 -1
  95. metadata +21 -10
  96. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
31
  * is empty.
32
32
  */
33
33
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
34
+ U32 const curr = (U32)(ip - base);
35
35
  U32 i;
36
36
  for (i = 0; i < fastHashFillStep; ++i) {
37
37
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
38
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
39
  if (i == 0)
40
- hashSmall[smHash] = current + i;
40
+ hashSmall[smHash] = curr + i;
41
41
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
42
+ hashLarge[lgHash] = curr + i;
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
@@ -63,9 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
63
  const BYTE* ip = istart;
64
64
  const BYTE* anchor = istart;
65
65
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
- const U32 lowestValid = ms->window.dictLimit;
67
- const U32 maxDistance = 1U << cParams->windowLog;
68
- const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
66
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
67
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
69
68
  const BYTE* const prefixLowest = base + prefixLowestIndex;
70
69
  const BYTE* const iend = istart + srcSize;
71
70
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -95,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
95
94
  dictCParams->hashLog : hBitsL;
96
95
  const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
97
96
  dictCParams->chainLog : hBitsS;
98
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
99
98
 
100
99
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
101
100
 
@@ -103,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
103
102
 
104
103
  /* if a dictionary is attached, it must be within window range */
105
104
  if (dictMode == ZSTD_dictMatchState) {
106
- assert(lowestValid + maxDistance >= endIndex);
105
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
107
106
  }
108
107
 
109
108
  /* init */
110
109
  ip += (dictAndPrefixLength == 0);
111
110
  if (dictMode == ZSTD_noDict) {
112
- U32 const maxRep = (U32)(ip - prefixLowest);
111
+ U32 const curr = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
113
+ U32 const maxRep = curr - windowLow;
113
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
114
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
115
116
  }
@@ -128,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
128
129
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
129
130
  size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
130
131
  size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
131
- U32 const current = (U32)(ip-base);
132
+ U32 const curr = (U32)(ip-base);
132
133
  U32 const matchIndexL = hashLong[h2];
133
134
  U32 matchIndexS = hashSmall[h];
134
135
  const BYTE* matchLong = base + matchIndexL;
135
136
  const BYTE* match = base + matchIndexS;
136
- const U32 repIndex = current + 1 - offset_1;
137
+ const U32 repIndex = curr + 1 - offset_1;
137
138
  const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
138
139
  && repIndex < prefixLowestIndex) ?
139
140
  dictBase + (repIndex - dictIndexDelta) :
140
141
  base + repIndex;
141
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
142
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
142
143
 
143
144
  /* check dictMatchState repcode */
144
145
  if (dictMode == ZSTD_dictMatchState
@@ -147,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
147
148
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
148
149
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
149
150
  ip++;
150
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
151
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
151
152
  goto _match_stored;
152
153
  }
153
154
 
@@ -156,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
156
157
  && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
157
158
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
158
159
  ip++;
159
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
160
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
160
161
  goto _match_stored;
161
162
  }
162
163
 
@@ -176,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
176
177
 
177
178
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
178
179
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
179
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
180
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
180
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
181
182
  goto _match_found;
182
183
  } }
@@ -197,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
197
198
  } }
198
199
 
199
200
  ip += ((ip-anchor) >> kSearchStrength) + 1;
201
+ #if defined(__aarch64__)
202
+ PREFETCH_L1(ip+256);
203
+ #endif
200
204
  continue;
201
205
 
202
206
  _search_next_long:
@@ -205,7 +209,7 @@ _search_next_long:
205
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
206
210
  U32 const matchIndexL3 = hashLong[hl3];
207
211
  const BYTE* matchL3 = base + matchIndexL3;
208
- hashLong[hl3] = current + 1;
212
+ hashLong[hl3] = curr + 1;
209
213
 
210
214
  /* check prefix long +1 match */
211
215
  if (matchIndexL3 > prefixLowestIndex) {
@@ -224,7 +228,7 @@ _search_next_long:
224
228
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
225
229
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
226
230
  ip++;
227
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
231
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
228
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
229
233
  goto _match_found;
230
234
  } } }
@@ -232,7 +236,7 @@ _search_next_long:
232
236
  /* if no long +1 match, explore the short match we found */
233
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
234
238
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
235
- offset = (U32)(current - matchIndexS);
239
+ offset = (U32)(curr - matchIndexS);
236
240
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
237
241
  } else {
238
242
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -246,7 +250,7 @@ _match_found:
246
250
  offset_2 = offset_1;
247
251
  offset_1 = offset;
248
252
 
249
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
253
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
250
254
 
251
255
  _match_stored:
252
256
  /* match found */
@@ -256,7 +260,7 @@ _match_stored:
256
260
  if (ip <= ilimit) {
257
261
  /* Complementary insertion */
258
262
  /* done after iLimit test, as candidates could be > iend-8 */
259
- { U32 const indexToInsert = current+2;
263
+ { U32 const indexToInsert = curr+2;
260
264
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
261
265
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
262
266
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -270,14 +274,14 @@ _match_stored:
270
274
  U32 const repIndex2 = current2 - offset_2;
271
275
  const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
272
276
  && repIndex2 < prefixLowestIndex ?
273
- dictBase - dictIndexDelta + repIndex2 :
277
+ dictBase + repIndex2 - dictIndexDelta :
274
278
  base + repIndex2;
275
279
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
276
280
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
277
281
  const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
278
282
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
279
283
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
280
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
284
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
281
285
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
282
286
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
283
287
  ip += repLength2;
@@ -296,7 +300,7 @@ _match_stored:
296
300
  U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
297
301
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
298
302
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
299
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
303
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
300
304
  ip += rLength;
301
305
  anchor = ip;
302
306
  continue; /* faster when present ... (?) */
@@ -369,9 +373,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
369
373
  const BYTE* const ilimit = iend - 8;
370
374
  const BYTE* const base = ms->window.base;
371
375
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
372
- const U32 maxDistance = 1U << cParams->windowLog;
373
- const U32 lowestValid = ms->window.lowLimit;
374
- const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
376
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
375
377
  const U32 dictStartIndex = lowLimit;
376
378
  const U32 dictLimit = ms->window.dictLimit;
377
379
  const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
@@ -399,31 +401,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
399
401
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
400
402
  const BYTE* matchLong = matchLongBase + matchLongIndex;
401
403
 
402
- const U32 current = (U32)(ip-base);
403
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
404
+ const U32 curr = (U32)(ip-base);
405
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
404
406
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
405
407
  const BYTE* const repMatch = repBase + repIndex;
406
408
  size_t mLength;
407
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
409
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
408
410
 
409
411
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
410
- & (repIndex > dictStartIndex))
412
+ & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
411
413
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
412
414
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
413
415
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
414
416
  ip++;
415
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
417
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
416
418
  } else {
417
419
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
418
420
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
419
421
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
420
422
  U32 offset;
421
423
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
422
- offset = current - matchLongIndex;
424
+ offset = curr - matchLongIndex;
423
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
424
426
  offset_2 = offset_1;
425
427
  offset_1 = offset;
426
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
428
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
427
429
 
428
430
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
429
431
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -431,24 +433,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
431
433
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
432
434
  const BYTE* match3 = match3Base + matchIndex3;
433
435
  U32 offset;
434
- hashLong[h3] = current + 1;
436
+ hashLong[h3] = curr + 1;
435
437
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
436
438
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
437
439
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
438
440
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
439
441
  ip++;
440
- offset = current+1 - matchIndex3;
442
+ offset = curr+1 - matchIndex3;
441
443
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
442
444
  } else {
443
445
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
444
446
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
445
447
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
446
- offset = current - matchIndex;
448
+ offset = curr - matchIndex;
447
449
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
448
450
  }
449
451
  offset_2 = offset_1;
450
452
  offset_1 = offset;
451
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
453
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
452
454
 
453
455
  } else {
454
456
  ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -462,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
462
464
  if (ip <= ilimit) {
463
465
  /* Complementary insertion */
464
466
  /* done after iLimit test, as candidates could be > iend-8 */
465
- { U32 const indexToInsert = current+2;
467
+ { U32 const indexToInsert = curr+2;
466
468
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
467
469
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
468
470
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -475,12 +477,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
475
477
  U32 const repIndex2 = current2 - offset_2;
476
478
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
477
479
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
478
- & (repIndex2 > dictStartIndex))
480
+ & (offset_2 < current2 - dictStartIndex))
479
481
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
480
482
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
481
483
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
482
484
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
483
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
485
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
484
486
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
485
487
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
486
488
  ip += repLength2;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,7 +8,7 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
- #include "zstd_compress_internal.h"
11
+ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12
12
  #include "zstd_fast.h"
13
13
 
14
14
 
@@ -29,22 +29,22 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
45
45
 
46
- FORCE_INLINE_TEMPLATE
47
- size_t ZSTD_compressBlock_fast_generic(
46
+ FORCE_INLINE_TEMPLATE size_t
47
+ ZSTD_compressBlock_fast_generic(
48
48
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
49
49
  void const* src, size_t srcSize,
50
50
  U32 const mls)
@@ -61,9 +61,7 @@ size_t ZSTD_compressBlock_fast_generic(
61
61
  const BYTE* ip1;
62
62
  const BYTE* anchor = istart;
63
63
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
- const U32 maxDistance = 1U << cParams->windowLog;
65
- const U32 validStartIndex = ms->window.dictLimit;
66
- const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
64
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
65
  const BYTE* const prefixStart = base + prefixStartIndex;
68
66
  const BYTE* const iend = istart + srcSize;
69
67
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -71,15 +69,24 @@ size_t ZSTD_compressBlock_fast_generic(
71
69
  U32 offsetSaved = 0;
72
70
 
73
71
  /* init */
72
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
74
73
  ip0 += (ip0 == prefixStart);
75
74
  ip1 = ip0 + 1;
76
- {
77
- U32 const maxRep = (U32)(ip0 - prefixStart);
75
+ { U32 const curr = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
77
+ U32 const maxRep = curr - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
81
81
 
82
82
  /* Main Search Loop */
83
+ #ifdef __INTEL_COMPILER
84
+ /* From intel 'The vector pragma indicates that the loop should be
85
+ * vectorized if it is legal to do so'. Can be used together with
86
+ * #pragma ivdep (but have opted to exclude that because intel
87
+ * warns against using it).*/
88
+ #pragma vector always
89
+ #endif
83
90
  while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
84
91
  size_t mLength;
85
92
  BYTE const* ip2 = ip0 + 2;
@@ -91,19 +98,25 @@ size_t ZSTD_compressBlock_fast_generic(
91
98
  U32 const current1 = (U32)(ip1-base);
92
99
  U32 const matchIndex0 = hashTable[h0];
93
100
  U32 const matchIndex1 = hashTable[h1];
94
- BYTE const* repMatch = ip2-offset_1;
101
+ BYTE const* repMatch = ip2 - offset_1;
95
102
  const BYTE* match0 = base + matchIndex0;
96
103
  const BYTE* match1 = base + matchIndex1;
97
104
  U32 offcode;
105
+
106
+ #if defined(__aarch64__)
107
+ PREFETCH_L1(ip0+256);
108
+ #endif
109
+
98
110
  hashTable[h0] = current0; /* update hash table */
99
111
  hashTable[h1] = current1; /* update hash table */
100
112
 
101
113
  assert(ip0 + 1 == ip1);
102
114
 
103
115
  if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
104
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
116
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
105
117
  ip0 = ip2 - mLength;
106
118
  match0 = repMatch - mLength;
119
+ mLength += 4;
107
120
  offcode = 0;
108
121
  goto _match;
109
122
  }
@@ -117,8 +130,7 @@ size_t ZSTD_compressBlock_fast_generic(
117
130
  match0 = match1;
118
131
  goto _offset;
119
132
  }
120
- {
121
- size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
133
+ { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
122
134
  assert(step >= 2);
123
135
  ip0 += step;
124
136
  ip1 += step;
@@ -129,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
129
141
  offset_2 = offset_1;
130
142
  offset_1 = (U32)(ip0-match0);
131
143
  offcode = offset_1 + ZSTD_REP_MOVE;
132
- mLength = 0;
144
+ mLength = 4;
133
145
  /* Count the backwards match length */
134
146
  while (((ip0>anchor) & (match0>prefixStart))
135
147
  && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
136
148
 
137
149
  _match: /* Requires: ip0, match0, offcode */
138
150
  /* Count the forward length */
139
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
140
- ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
151
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
152
+ ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
141
153
  /* match found */
142
154
  ip0 += mLength;
143
155
  anchor = ip0;
144
- ip1 = ip0 + 1;
145
156
 
146
157
  if (ip0 <= ilimit) {
147
158
  /* Fill Table */
@@ -149,20 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
149
160
  hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
150
161
  hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
151
162
 
152
- while ( (ip0 <= ilimit)
153
- && ( (offset_2>0)
154
- & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
155
- /* store sequence */
156
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
157
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
158
- hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
159
- ip0 += rLength;
160
- ip1 = ip0 + 1;
161
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
162
- anchor = ip0;
163
- continue; /* faster when present (confirmed on gcc-8) ... (?) */
164
- }
165
- }
163
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165
+ /* store sequence */
166
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169
+ ip0 += rLength;
170
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171
+ anchor = ip0;
172
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
173
+ } } }
174
+ ip1 = ip0 + 1;
166
175
  }
167
176
 
168
177
  /* save reps for next block */
@@ -178,8 +187,7 @@ size_t ZSTD_compressBlock_fast(
178
187
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
179
188
  void const* src, size_t srcSize)
180
189
  {
181
- ZSTD_compressionParameters const* cParams = &ms->cParams;
182
- U32 const mls = cParams->minMatch;
190
+ U32 const mls = ms->cParams.minMatch;
183
191
  assert(ms->dictMatchState == NULL);
184
192
  switch(mls)
185
193
  {
@@ -234,11 +242,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
234
242
  assert(endIndex - prefixStartIndex <= maxDistance);
235
243
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
236
244
 
237
- /* ensure there will be no no underflow
245
+ /* ensure there will be no underflow
238
246
  * when translating a dict index into a local index */
239
247
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
240
248
 
241
249
  /* init */
250
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
242
251
  ip += (dictAndPrefixLength == 0);
243
252
  /* dictMatchState repCode checks don't currently handle repCode == 0
244
253
  * disabling. */
@@ -249,21 +258,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
249
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
250
259
  size_t mLength;
251
260
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
252
- U32 const current = (U32)(ip-base);
261
+ U32 const curr = (U32)(ip-base);
253
262
  U32 const matchIndex = hashTable[h];
254
263
  const BYTE* match = base + matchIndex;
255
- const U32 repIndex = current + 1 - offset_1;
264
+ const U32 repIndex = curr + 1 - offset_1;
256
265
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
257
266
  dictBase + (repIndex - dictIndexDelta) :
258
267
  base + repIndex;
259
- hashTable[h] = current; /* update hash table */
268
+ hashTable[h] = curr; /* update hash table */
260
269
 
261
270
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
262
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
263
272
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
264
273
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
265
274
  ip++;
266
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
275
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
267
276
  } else if ( (matchIndex <= prefixStartIndex) ) {
268
277
  size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
269
278
  U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -275,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
275
284
  continue;
276
285
  } else {
277
286
  /* found a dict match */
278
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
287
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
279
288
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
280
289
  while (((ip>anchor) & (dictMatch>dictStart))
281
290
  && (ip[-1] == dictMatch[-1])) {
@@ -283,7 +292,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
283
292
  } /* catch up */
284
293
  offset_2 = offset_1;
285
294
  offset_1 = offset;
286
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
295
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
287
296
  }
288
297
  } else if (MEM_read32(match) != MEM_read32(ip)) {
289
298
  /* it's not a match, and we're not going to check the dictionary */
@@ -298,7 +307,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
298
307
  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
299
308
  offset_2 = offset_1;
300
309
  offset_1 = offset;
301
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
310
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
302
311
  }
303
312
 
304
313
  /* match found */
@@ -307,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
307
316
 
308
317
  if (ip <= ilimit) {
309
318
  /* Fill Table */
310
- assert(base+current+2 > istart); /* check base overflow */
311
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
319
+ assert(base+curr+2 > istart); /* check base overflow */
320
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
312
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
313
322
 
314
323
  /* check immediate repcode */
@@ -323,7 +332,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
323
332
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
324
333
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
325
334
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
326
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
335
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
327
336
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
328
337
  ip += repLength2;
329
338
  anchor = ip;
@@ -346,8 +355,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
346
355
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
347
356
  void const* src, size_t srcSize)
348
357
  {
349
- ZSTD_compressionParameters const* cParams = &ms->cParams;
350
- U32 const mls = cParams->minMatch;
358
+ U32 const mls = ms->cParams.minMatch;
351
359
  assert(ms->dictMatchState != NULL);
352
360
  switch(mls)
353
361
  {
@@ -379,9 +387,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
379
387
  const BYTE* ip = istart;
380
388
  const BYTE* anchor = istart;
381
389
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
382
- const U32 maxDistance = 1U << cParams->windowLog;
383
- const U32 validLow = ms->window.lowLimit;
384
- const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
390
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
385
391
  const U32 dictStartIndex = lowLimit;
386
392
  const BYTE* const dictStart = dictBase + dictStartIndex;
387
393
  const U32 dictLimit = ms->window.dictLimit;
@@ -392,6 +398,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
392
398
  const BYTE* const ilimit = iend - 8;
393
399
  U32 offset_1=rep[0], offset_2=rep[1];
394
400
 
401
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
402
+
395
403
  /* switch to "regular" variant if extDict is invalidated due to maxDistance */
396
404
  if (prefixStartIndex == dictStartIndex)
397
405
  return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
@@ -402,20 +410,22 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
402
410
  const U32 matchIndex = hashTable[h];
403
411
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
404
412
  const BYTE* match = matchBase + matchIndex;
405
- const U32 current = (U32)(ip-base);
406
- const U32 repIndex = current + 1 - offset_1;
413
+ const U32 curr = (U32)(ip-base);
414
+ const U32 repIndex = curr + 1 - offset_1;
407
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
408
416
  const BYTE* const repMatch = repBase + repIndex;
409
- size_t mLength;
410
- hashTable[h] = current; /* update hash table */
411
- assert(offset_1 <= current +1); /* check repIndex */
417
+ hashTable[h] = curr; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
412
419
 
413
- if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
420
+ if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
421
+ & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
414
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
415
- const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
416
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
423
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
424
+ size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
417
425
  ip++;
418
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
426
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
427
+ ip += rLength;
428
+ anchor = ip;
419
429
  } else {
420
430
  if ( (matchIndex < dictStartIndex) ||
421
431
  (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -423,36 +433,32 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
423
433
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
424
434
  continue;
425
435
  }
426
- { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
427
- const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
428
- U32 offset;
429
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
436
+ { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
+ const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
+ U32 const offset = curr - matchIndex;
439
+ size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
430
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
431
- offset = current - matchIndex;
432
- offset_2 = offset_1;
433
- offset_1 = offset;
434
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
441
+ offset_2 = offset_1; offset_1 = offset; /* update offset history */
442
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
443
+ ip += mLength;
444
+ anchor = ip;
435
445
  } }
436
446
 
437
- /* found a match : store it */
438
- ip += mLength;
439
- anchor = ip;
440
-
441
447
  if (ip <= ilimit) {
442
448
  /* Fill Table */
443
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
449
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
444
450
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
445
451
  /* check immediate repcode */
446
452
  while (ip <= ilimit) {
447
453
  U32 const current2 = (U32)(ip-base);
448
454
  U32 const repIndex2 = current2 - offset_2;
449
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
450
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
455
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
456
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
451
457
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
452
458
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
453
459
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
454
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
455
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
460
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
461
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
456
462
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
457
463
  ip += repLength2;
458
464
  anchor = ip;
@@ -474,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
474
480
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
475
481
  void const* src, size_t srcSize)
476
482
  {
477
- ZSTD_compressionParameters const* cParams = &ms->cParams;
478
- U32 const mls = cParams->minMatch;
483
+ U32 const mls = ms->cParams.minMatch;
479
484
  switch(mls)
480
485
  {
481
486
  default: /* includes case 3 */