zstdlib 0.3.0-x64-mingw32 → 0.8.0-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +30 -1
  3. data/README.md +2 -2
  4. data/Rakefile +1 -1
  5. data/ext/zstdlib/extconf.rb +3 -3
  6. data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
  7. data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
  8. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/bitstream.h +59 -51
  9. data/ext/zstdlib/zstd-1.5.0/lib/common/compiler.h +289 -0
  10. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/cpu.h +1 -3
  11. data/ext/zstdlib/zstd-1.5.0/lib/common/debug.c +24 -0
  12. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/debug.h +22 -49
  13. data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
  14. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.c +3 -1
  15. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.h +8 -4
  16. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse.h +50 -42
  17. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse_decompress.c +149 -55
  18. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/huf.h +43 -39
  19. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/mem.h +69 -25
  20. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.c +30 -20
  21. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.h +3 -3
  22. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.c +51 -4
  23. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.h +36 -4
  24. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.c +40 -92
  25. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.h +12 -32
  26. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
  27. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
  28. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_internal.h +490 -0
  29. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
  30. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/fse_compress.c +47 -63
  31. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.c +41 -63
  32. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.h +13 -33
  33. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/huf_compress.c +332 -193
  34. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress.c +6393 -0
  35. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +522 -86
  36. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +25 -16
  37. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +2 -2
  38. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +50 -24
  39. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +11 -4
  40. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.c +85 -80
  46. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.h +2 -2
  47. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
  48. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
  49. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.c +228 -129
  53. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
  54. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
  56. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +628 -231
  60. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
  64. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/zstd.h +740 -153
  65. data/ext/zstdlib/{zstd-1.4.2/lib/common → zstd-1.5.0/lib}/zstd_errors.h +3 -1
  66. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzclose.c +1 -1
  67. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
  68. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
  69. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzlib.c +9 -9
  70. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzread.c +16 -8
  71. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzwrite.c +8 -8
  72. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +131 -45
  73. data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  74. data/lib/2.2/zstdlib.so +0 -0
  75. data/lib/2.3/zstdlib.so +0 -0
  76. data/lib/2.4/zstdlib.so +0 -0
  77. data/lib/2.5/zstdlib.so +0 -0
  78. data/lib/2.6/zstdlib.so +0 -0
  79. data/lib/2.7/zstdlib.so +0 -0
  80. metadata +76 -67
  81. data/ext/zstdlib/zstd-1.4.2/lib/common/compiler.h +0 -147
  82. data/ext/zstdlib/zstd-1.4.2/lib/common/debug.c +0 -44
  83. data/ext/zstdlib/zstd-1.4.2/lib/common/entropy_common.c +0 -236
  84. data/ext/zstdlib/zstd-1.4.2/lib/common/zstd_internal.h +0 -371
  85. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress.c +0 -3904
  86. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.c +0 -1111
  87. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.h +0 -67
  88. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
31
  * is empty.
32
32
  */
33
33
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
34
+ U32 const curr = (U32)(ip - base);
35
35
  U32 i;
36
36
  for (i = 0; i < fastHashFillStep; ++i) {
37
37
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
38
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
39
  if (i == 0)
40
- hashSmall[smHash] = current + i;
40
+ hashSmall[smHash] = curr + i;
41
41
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
42
+ hashLarge[lgHash] = curr + i;
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
@@ -63,9 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
63
  const BYTE* ip = istart;
64
64
  const BYTE* anchor = istart;
65
65
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
- const U32 lowestValid = ms->window.dictLimit;
67
- const U32 maxDistance = 1U << cParams->windowLog;
68
- const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
66
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
67
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
69
68
  const BYTE* const prefixLowest = base + prefixLowestIndex;
70
69
  const BYTE* const iend = istart + srcSize;
71
70
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -95,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
95
94
  dictCParams->hashLog : hBitsL;
96
95
  const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
97
96
  dictCParams->chainLog : hBitsS;
98
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
99
98
 
100
99
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
101
100
 
@@ -103,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
103
102
 
104
103
  /* if a dictionary is attached, it must be within window range */
105
104
  if (dictMode == ZSTD_dictMatchState) {
106
- assert(lowestValid + maxDistance >= endIndex);
105
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
107
106
  }
108
107
 
109
108
  /* init */
110
109
  ip += (dictAndPrefixLength == 0);
111
110
  if (dictMode == ZSTD_noDict) {
112
- U32 const maxRep = (U32)(ip - prefixLowest);
111
+ U32 const curr = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
113
+ U32 const maxRep = curr - windowLow;
113
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
114
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
115
116
  }
@@ -128,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
128
129
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
129
130
  size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
130
131
  size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
131
- U32 const current = (U32)(ip-base);
132
+ U32 const curr = (U32)(ip-base);
132
133
  U32 const matchIndexL = hashLong[h2];
133
134
  U32 matchIndexS = hashSmall[h];
134
135
  const BYTE* matchLong = base + matchIndexL;
135
136
  const BYTE* match = base + matchIndexS;
136
- const U32 repIndex = current + 1 - offset_1;
137
+ const U32 repIndex = curr + 1 - offset_1;
137
138
  const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
138
139
  && repIndex < prefixLowestIndex) ?
139
140
  dictBase + (repIndex - dictIndexDelta) :
140
141
  base + repIndex;
141
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
142
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
142
143
 
143
144
  /* check dictMatchState repcode */
144
145
  if (dictMode == ZSTD_dictMatchState
@@ -147,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
147
148
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
148
149
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
149
150
  ip++;
150
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
151
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
151
152
  goto _match_stored;
152
153
  }
153
154
 
@@ -156,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
156
157
  && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
157
158
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
158
159
  ip++;
159
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
160
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
160
161
  goto _match_stored;
161
162
  }
162
163
 
@@ -176,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
176
177
 
177
178
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
178
179
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
179
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
180
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
180
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
181
182
  goto _match_found;
182
183
  } }
@@ -197,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
197
198
  } }
198
199
 
199
200
  ip += ((ip-anchor) >> kSearchStrength) + 1;
201
+ #if defined(__aarch64__)
202
+ PREFETCH_L1(ip+256);
203
+ #endif
200
204
  continue;
201
205
 
202
206
  _search_next_long:
@@ -205,7 +209,7 @@ _search_next_long:
205
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
206
210
  U32 const matchIndexL3 = hashLong[hl3];
207
211
  const BYTE* matchL3 = base + matchIndexL3;
208
- hashLong[hl3] = current + 1;
212
+ hashLong[hl3] = curr + 1;
209
213
 
210
214
  /* check prefix long +1 match */
211
215
  if (matchIndexL3 > prefixLowestIndex) {
@@ -224,7 +228,7 @@ _search_next_long:
224
228
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
225
229
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
226
230
  ip++;
227
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
231
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
228
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
229
233
  goto _match_found;
230
234
  } } }
@@ -232,7 +236,7 @@ _search_next_long:
232
236
  /* if no long +1 match, explore the short match we found */
233
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
234
238
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
235
- offset = (U32)(current - matchIndexS);
239
+ offset = (U32)(curr - matchIndexS);
236
240
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
237
241
  } else {
238
242
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -246,7 +250,7 @@ _match_found:
246
250
  offset_2 = offset_1;
247
251
  offset_1 = offset;
248
252
 
249
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
253
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
250
254
 
251
255
  _match_stored:
252
256
  /* match found */
@@ -256,7 +260,7 @@ _match_stored:
256
260
  if (ip <= ilimit) {
257
261
  /* Complementary insertion */
258
262
  /* done after iLimit test, as candidates could be > iend-8 */
259
- { U32 const indexToInsert = current+2;
263
+ { U32 const indexToInsert = curr+2;
260
264
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
261
265
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
262
266
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -270,14 +274,14 @@ _match_stored:
270
274
  U32 const repIndex2 = current2 - offset_2;
271
275
  const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
272
276
  && repIndex2 < prefixLowestIndex ?
273
- dictBase - dictIndexDelta + repIndex2 :
277
+ dictBase + repIndex2 - dictIndexDelta :
274
278
  base + repIndex2;
275
279
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
276
280
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
277
281
  const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
278
282
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
279
283
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
280
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
284
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
281
285
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
282
286
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
283
287
  ip += repLength2;
@@ -296,7 +300,7 @@ _match_stored:
296
300
  U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
297
301
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
298
302
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
299
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
303
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
300
304
  ip += rLength;
301
305
  anchor = ip;
302
306
  continue; /* faster when present ... (?) */
@@ -369,9 +373,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
369
373
  const BYTE* const ilimit = iend - 8;
370
374
  const BYTE* const base = ms->window.base;
371
375
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
372
- const U32 maxDistance = 1U << cParams->windowLog;
373
- const U32 lowestValid = ms->window.lowLimit;
374
- const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
376
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
375
377
  const U32 dictStartIndex = lowLimit;
376
378
  const U32 dictLimit = ms->window.dictLimit;
377
379
  const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
@@ -399,31 +401,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
399
401
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
400
402
  const BYTE* matchLong = matchLongBase + matchLongIndex;
401
403
 
402
- const U32 current = (U32)(ip-base);
403
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
404
+ const U32 curr = (U32)(ip-base);
405
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
404
406
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
405
407
  const BYTE* const repMatch = repBase + repIndex;
406
408
  size_t mLength;
407
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
409
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
408
410
 
409
411
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
410
- & (repIndex > dictStartIndex))
412
+ & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
411
413
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
412
414
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
413
415
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
414
416
  ip++;
415
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
417
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
416
418
  } else {
417
419
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
418
420
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
419
421
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
420
422
  U32 offset;
421
423
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
422
- offset = current - matchLongIndex;
424
+ offset = curr - matchLongIndex;
423
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
424
426
  offset_2 = offset_1;
425
427
  offset_1 = offset;
426
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
428
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
427
429
 
428
430
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
429
431
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -431,24 +433,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
431
433
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
432
434
  const BYTE* match3 = match3Base + matchIndex3;
433
435
  U32 offset;
434
- hashLong[h3] = current + 1;
436
+ hashLong[h3] = curr + 1;
435
437
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
436
438
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
437
439
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
438
440
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
439
441
  ip++;
440
- offset = current+1 - matchIndex3;
442
+ offset = curr+1 - matchIndex3;
441
443
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
442
444
  } else {
443
445
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
444
446
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
445
447
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
446
- offset = current - matchIndex;
448
+ offset = curr - matchIndex;
447
449
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
448
450
  }
449
451
  offset_2 = offset_1;
450
452
  offset_1 = offset;
451
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
453
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
452
454
 
453
455
  } else {
454
456
  ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -462,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
462
464
  if (ip <= ilimit) {
463
465
  /* Complementary insertion */
464
466
  /* done after iLimit test, as candidates could be > iend-8 */
465
- { U32 const indexToInsert = current+2;
467
+ { U32 const indexToInsert = curr+2;
466
468
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
467
469
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
468
470
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -475,12 +477,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
475
477
  U32 const repIndex2 = current2 - offset_2;
476
478
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
477
479
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
478
- & (repIndex2 > dictStartIndex))
480
+ & (offset_2 < current2 - dictStartIndex))
479
481
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
480
482
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
481
483
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
482
484
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
483
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
485
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
484
486
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
485
487
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
486
488
  ip += repLength2;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,7 +8,7 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
- #include "zstd_compress_internal.h"
11
+ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12
12
  #include "zstd_fast.h"
13
13
 
14
14
 
@@ -29,22 +29,22 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
45
45
 
46
- FORCE_INLINE_TEMPLATE
47
- size_t ZSTD_compressBlock_fast_generic(
46
+ FORCE_INLINE_TEMPLATE size_t
47
+ ZSTD_compressBlock_fast_generic(
48
48
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
49
49
  void const* src, size_t srcSize,
50
50
  U32 const mls)
@@ -61,9 +61,7 @@ size_t ZSTD_compressBlock_fast_generic(
61
61
  const BYTE* ip1;
62
62
  const BYTE* anchor = istart;
63
63
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
- const U32 maxDistance = 1U << cParams->windowLog;
65
- const U32 validStartIndex = ms->window.dictLimit;
66
- const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
64
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
65
  const BYTE* const prefixStart = base + prefixStartIndex;
68
66
  const BYTE* const iend = istart + srcSize;
69
67
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -71,15 +69,24 @@ size_t ZSTD_compressBlock_fast_generic(
71
69
  U32 offsetSaved = 0;
72
70
 
73
71
  /* init */
72
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
74
73
  ip0 += (ip0 == prefixStart);
75
74
  ip1 = ip0 + 1;
76
- {
77
- U32 const maxRep = (U32)(ip0 - prefixStart);
75
+ { U32 const curr = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
77
+ U32 const maxRep = curr - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
81
81
 
82
82
  /* Main Search Loop */
83
+ #ifdef __INTEL_COMPILER
84
+ /* From intel 'The vector pragma indicates that the loop should be
85
+ * vectorized if it is legal to do so'. Can be used together with
86
+ * #pragma ivdep (but have opted to exclude that because intel
87
+ * warns against using it).*/
88
+ #pragma vector always
89
+ #endif
83
90
  while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
84
91
  size_t mLength;
85
92
  BYTE const* ip2 = ip0 + 2;
@@ -91,19 +98,25 @@ size_t ZSTD_compressBlock_fast_generic(
91
98
  U32 const current1 = (U32)(ip1-base);
92
99
  U32 const matchIndex0 = hashTable[h0];
93
100
  U32 const matchIndex1 = hashTable[h1];
94
- BYTE const* repMatch = ip2-offset_1;
101
+ BYTE const* repMatch = ip2 - offset_1;
95
102
  const BYTE* match0 = base + matchIndex0;
96
103
  const BYTE* match1 = base + matchIndex1;
97
104
  U32 offcode;
105
+
106
+ #if defined(__aarch64__)
107
+ PREFETCH_L1(ip0+256);
108
+ #endif
109
+
98
110
  hashTable[h0] = current0; /* update hash table */
99
111
  hashTable[h1] = current1; /* update hash table */
100
112
 
101
113
  assert(ip0 + 1 == ip1);
102
114
 
103
115
  if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
104
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
116
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
105
117
  ip0 = ip2 - mLength;
106
118
  match0 = repMatch - mLength;
119
+ mLength += 4;
107
120
  offcode = 0;
108
121
  goto _match;
109
122
  }
@@ -117,8 +130,7 @@ size_t ZSTD_compressBlock_fast_generic(
117
130
  match0 = match1;
118
131
  goto _offset;
119
132
  }
120
- {
121
- size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
133
+ { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
122
134
  assert(step >= 2);
123
135
  ip0 += step;
124
136
  ip1 += step;
@@ -129,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
129
141
  offset_2 = offset_1;
130
142
  offset_1 = (U32)(ip0-match0);
131
143
  offcode = offset_1 + ZSTD_REP_MOVE;
132
- mLength = 0;
144
+ mLength = 4;
133
145
  /* Count the backwards match length */
134
146
  while (((ip0>anchor) & (match0>prefixStart))
135
147
  && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
136
148
 
137
149
  _match: /* Requires: ip0, match0, offcode */
138
150
  /* Count the forward length */
139
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
140
- ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH);
151
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
152
+ ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
141
153
  /* match found */
142
154
  ip0 += mLength;
143
155
  anchor = ip0;
144
- ip1 = ip0 + 1;
145
156
 
146
157
  if (ip0 <= ilimit) {
147
158
  /* Fill Table */
@@ -149,20 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
149
160
  hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
150
161
  hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
151
162
 
152
- while ( (ip0 <= ilimit)
153
- && ( (offset_2>0)
154
- & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) {
155
- /* store sequence */
156
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
157
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
158
- hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
159
- ip0 += rLength;
160
- ip1 = ip0 + 1;
161
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
162
- anchor = ip0;
163
- continue; /* faster when present (confirmed on gcc-8) ... (?) */
164
- }
165
- }
163
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165
+ /* store sequence */
166
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169
+ ip0 += rLength;
170
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171
+ anchor = ip0;
172
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
173
+ } } }
174
+ ip1 = ip0 + 1;
166
175
  }
167
176
 
168
177
  /* save reps for next block */
@@ -178,8 +187,7 @@ size_t ZSTD_compressBlock_fast(
178
187
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
179
188
  void const* src, size_t srcSize)
180
189
  {
181
- ZSTD_compressionParameters const* cParams = &ms->cParams;
182
- U32 const mls = cParams->minMatch;
190
+ U32 const mls = ms->cParams.minMatch;
183
191
  assert(ms->dictMatchState == NULL);
184
192
  switch(mls)
185
193
  {
@@ -234,11 +242,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
234
242
  assert(endIndex - prefixStartIndex <= maxDistance);
235
243
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
236
244
 
237
- /* ensure there will be no no underflow
245
+ /* ensure there will be no underflow
238
246
  * when translating a dict index into a local index */
239
247
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
240
248
 
241
249
  /* init */
250
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
242
251
  ip += (dictAndPrefixLength == 0);
243
252
  /* dictMatchState repCode checks don't currently handle repCode == 0
244
253
  * disabling. */
@@ -249,21 +258,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
249
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
250
259
  size_t mLength;
251
260
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
252
- U32 const current = (U32)(ip-base);
261
+ U32 const curr = (U32)(ip-base);
253
262
  U32 const matchIndex = hashTable[h];
254
263
  const BYTE* match = base + matchIndex;
255
- const U32 repIndex = current + 1 - offset_1;
264
+ const U32 repIndex = curr + 1 - offset_1;
256
265
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
257
266
  dictBase + (repIndex - dictIndexDelta) :
258
267
  base + repIndex;
259
- hashTable[h] = current; /* update hash table */
268
+ hashTable[h] = curr; /* update hash table */
260
269
 
261
270
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
262
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
263
272
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
264
273
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
265
274
  ip++;
266
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
275
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
267
276
  } else if ( (matchIndex <= prefixStartIndex) ) {
268
277
  size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
269
278
  U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -275,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
275
284
  continue;
276
285
  } else {
277
286
  /* found a dict match */
278
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
287
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
279
288
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
280
289
  while (((ip>anchor) & (dictMatch>dictStart))
281
290
  && (ip[-1] == dictMatch[-1])) {
@@ -283,7 +292,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
283
292
  } /* catch up */
284
293
  offset_2 = offset_1;
285
294
  offset_1 = offset;
286
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
295
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
287
296
  }
288
297
  } else if (MEM_read32(match) != MEM_read32(ip)) {
289
298
  /* it's not a match, and we're not going to check the dictionary */
@@ -298,7 +307,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
298
307
  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
299
308
  offset_2 = offset_1;
300
309
  offset_1 = offset;
301
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
310
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
302
311
  }
303
312
 
304
313
  /* match found */
@@ -307,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
307
316
 
308
317
  if (ip <= ilimit) {
309
318
  /* Fill Table */
310
- assert(base+current+2 > istart); /* check base overflow */
311
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
319
+ assert(base+curr+2 > istart); /* check base overflow */
320
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
312
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
313
322
 
314
323
  /* check immediate repcode */
@@ -323,7 +332,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
323
332
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
324
333
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
325
334
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
326
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
335
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
327
336
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
328
337
  ip += repLength2;
329
338
  anchor = ip;
@@ -346,8 +355,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
346
355
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
347
356
  void const* src, size_t srcSize)
348
357
  {
349
- ZSTD_compressionParameters const* cParams = &ms->cParams;
350
- U32 const mls = cParams->minMatch;
358
+ U32 const mls = ms->cParams.minMatch;
351
359
  assert(ms->dictMatchState != NULL);
352
360
  switch(mls)
353
361
  {
@@ -379,9 +387,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
379
387
  const BYTE* ip = istart;
380
388
  const BYTE* anchor = istart;
381
389
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
382
- const U32 maxDistance = 1U << cParams->windowLog;
383
- const U32 validLow = ms->window.lowLimit;
384
- const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
390
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
385
391
  const U32 dictStartIndex = lowLimit;
386
392
  const BYTE* const dictStart = dictBase + dictStartIndex;
387
393
  const U32 dictLimit = ms->window.dictLimit;
@@ -392,6 +398,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
392
398
  const BYTE* const ilimit = iend - 8;
393
399
  U32 offset_1=rep[0], offset_2=rep[1];
394
400
 
401
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
402
+
395
403
  /* switch to "regular" variant if extDict is invalidated due to maxDistance */
396
404
  if (prefixStartIndex == dictStartIndex)
397
405
  return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
@@ -402,20 +410,22 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
402
410
  const U32 matchIndex = hashTable[h];
403
411
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
404
412
  const BYTE* match = matchBase + matchIndex;
405
- const U32 current = (U32)(ip-base);
406
- const U32 repIndex = current + 1 - offset_1;
413
+ const U32 curr = (U32)(ip-base);
414
+ const U32 repIndex = curr + 1 - offset_1;
407
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
408
416
  const BYTE* const repMatch = repBase + repIndex;
409
- size_t mLength;
410
- hashTable[h] = current; /* update hash table */
411
- assert(offset_1 <= current +1); /* check repIndex */
417
+ hashTable[h] = curr; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
412
419
 
413
- if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
420
+ if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
421
+ & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
414
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
415
- const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
416
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
423
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
424
+ size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
417
425
  ip++;
418
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
426
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
427
+ ip += rLength;
428
+ anchor = ip;
419
429
  } else {
420
430
  if ( (matchIndex < dictStartIndex) ||
421
431
  (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -423,36 +433,32 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
423
433
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
424
434
  continue;
425
435
  }
426
- { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
427
- const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
428
- U32 offset;
429
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
436
+ { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
+ const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
+ U32 const offset = curr - matchIndex;
439
+ size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
430
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
431
- offset = current - matchIndex;
432
- offset_2 = offset_1;
433
- offset_1 = offset;
434
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
441
+ offset_2 = offset_1; offset_1 = offset; /* update offset history */
442
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
443
+ ip += mLength;
444
+ anchor = ip;
435
445
  } }
436
446
 
437
- /* found a match : store it */
438
- ip += mLength;
439
- anchor = ip;
440
-
441
447
  if (ip <= ilimit) {
442
448
  /* Fill Table */
443
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
449
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
444
450
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
445
451
  /* check immediate repcode */
446
452
  while (ip <= ilimit) {
447
453
  U32 const current2 = (U32)(ip-base);
448
454
  U32 const repIndex2 = current2 - offset_2;
449
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
450
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
455
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
456
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
451
457
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
452
458
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
453
459
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
454
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
455
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
460
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
461
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
456
462
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
457
463
  ip += repLength2;
458
464
  anchor = ip;
@@ -474,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
474
480
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
475
481
  void const* src, size_t srcSize)
476
482
  {
477
- ZSTD_compressionParameters const* cParams = &ms->cParams;
478
- U32 const mls = cParams->minMatch;
483
+ U32 const mls = ms->cParams.minMatch;
479
484
  switch(mls)
480
485
  {
481
486
  default: /* includes case 3 */