zstd-ruby 1.3.8.0 → 1.4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +133 -61
  5. data/ext/zstdruby/libzstd/README.md +51 -18
  6. data/ext/zstdruby/libzstd/common/bitstream.h +38 -39
  7. data/ext/zstdruby/libzstd/common/compiler.h +41 -6
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +11 -31
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +6 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +13 -33
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -35
  16. data/ext/zstdruby/libzstd/common/huf.h +15 -33
  17. data/ext/zstdruby/libzstd/common/mem.h +75 -2
  18. data/ext/zstdruby/libzstd/common/pool.c +8 -4
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +52 -6
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +25 -37
  23. data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +203 -22
  27. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -42
  28. data/ext/zstdruby/libzstd/compress/hist.c +15 -35
  29. data/ext/zstdruby/libzstd/compress/hist.h +12 -32
  30. data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
  31. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1460 -1472
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +330 -65
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +419 -0
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +525 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +65 -43
  41. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.c +264 -159
  43. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +74 -42
  45. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +2 -2
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +33 -11
  47. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_opt.c +108 -125
  49. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +129 -93
  51. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +46 -28
  52. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -60
  53. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +14 -10
  54. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  55. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +471 -258
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +471 -346
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +25 -4
  59. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  60. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  62. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.c +220 -65
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.h +81 -7
  65. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +85 -56
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +43 -19
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +73 -35
  68. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  69. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  70. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +49 -15
  71. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +142 -117
  72. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +13 -8
  73. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +54 -25
  74. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +13 -8
  75. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +55 -25
  76. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +13 -8
  77. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +62 -29
  78. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +13 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +145 -109
  80. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +14 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +56 -26
  82. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +11 -6
  83. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +65 -28
  84. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +11 -6
  85. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -2
  86. data/ext/zstdruby/libzstd/zstd.h +921 -597
  87. data/lib/zstd-ruby/version.rb +1 -1
  88. data/zstd-ruby.gemspec +2 -2
  89. metadata +19 -14
  90. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
46
- }
47
- }
46
+ } }
48
47
  }
49
48
 
50
49
 
@@ -63,7 +62,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
62
  const BYTE* const istart = (const BYTE*)src;
64
63
  const BYTE* ip = istart;
65
64
  const BYTE* anchor = istart;
66
- const U32 prefixLowestIndex = ms->window.dictLimit;
65
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
67
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
68
  const BYTE* const prefixLowest = base + prefixLowestIndex;
68
69
  const BYTE* const iend = istart + srcSize;
69
70
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -93,14 +94,23 @@ size_t ZSTD_compressBlock_doubleFast_generic(
93
94
  dictCParams->hashLog : hBitsL;
94
95
  const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
95
96
  dictCParams->chainLog : hBitsS;
96
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
98
+
99
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
97
100
 
98
101
  assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
99
102
 
103
+ /* if a dictionary is attached, it must be within window range */
104
+ if (dictMode == ZSTD_dictMatchState) {
105
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
106
+ }
107
+
100
108
  /* init */
101
109
  ip += (dictAndPrefixLength == 0);
102
110
  if (dictMode == ZSTD_noDict) {
103
- U32 const maxRep = (U32)(ip - prefixLowest);
111
+ U32 const current = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
+ U32 const maxRep = current - windowLow;
104
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
105
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
106
116
  }
@@ -138,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
138
148
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
139
149
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
140
150
  ip++;
141
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
151
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
142
152
  goto _match_stored;
143
153
  }
144
154
 
@@ -147,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
147
157
  && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
148
158
  mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
149
159
  ip++;
150
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
160
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
151
161
  goto _match_stored;
152
162
  }
153
163
 
@@ -170,8 +180,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
170
180
  offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
171
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
172
182
  goto _match_found;
173
- }
174
- }
183
+ } }
175
184
 
176
185
  if (matchIndexS > prefixLowestIndex) {
177
186
  /* check prefix short match */
@@ -186,16 +195,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
186
195
 
187
196
  if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
188
197
  goto _search_next_long;
189
- }
190
- }
198
+ } }
191
199
 
192
200
  ip += ((ip-anchor) >> kSearchStrength) + 1;
201
+ #if defined(__aarch64__)
202
+ PREFETCH_L1(ip+256);
203
+ #endif
193
204
  continue;
194
205
 
195
206
  _search_next_long:
196
207
 
197
- {
198
- size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
208
+ { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
199
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
200
210
  U32 const matchIndexL3 = hashLong[hl3];
201
211
  const BYTE* matchL3 = base + matchIndexL3;
@@ -221,9 +231,7 @@ _search_next_long:
221
231
  offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
222
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
223
233
  goto _match_found;
224
- }
225
- }
226
- }
234
+ } } }
227
235
 
228
236
  /* if no long +1 match, explore the short match we found */
229
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@@ -242,7 +250,7 @@ _match_found:
242
250
  offset_2 = offset_1;
243
251
  offset_1 = offset;
244
252
 
245
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
253
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
246
254
 
247
255
  _match_stored:
248
256
  /* match found */
@@ -250,11 +258,14 @@ _match_stored:
250
258
  anchor = ip;
251
259
 
252
260
  if (ip <= ilimit) {
253
- /* Fill Table */
254
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
255
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
256
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
257
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
261
+ /* Complementary insertion */
262
+ /* done after iLimit test, as candidates could be > iend-8 */
263
+ { U32 const indexToInsert = current+2;
264
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
265
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
266
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
267
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
268
+ }
258
269
 
259
270
  /* check immediate repcode */
260
271
  if (dictMode == ZSTD_dictMatchState) {
@@ -263,14 +274,14 @@ _match_stored:
263
274
  U32 const repIndex2 = current2 - offset_2;
264
275
  const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
265
276
  && repIndex2 < prefixLowestIndex ?
266
- dictBase - dictIndexDelta + repIndex2 :
277
+ dictBase + repIndex2 - dictIndexDelta :
267
278
  base + repIndex2;
268
279
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
269
280
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
270
281
  const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
271
282
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
272
283
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
273
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
284
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
274
285
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
275
286
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
276
287
  ip += repLength2;
@@ -278,8 +289,7 @@ _match_stored:
278
289
  continue;
279
290
  }
280
291
  break;
281
- }
282
- }
292
+ } }
283
293
 
284
294
  if (dictMode == ZSTD_noDict) {
285
295
  while ( (ip <= ilimit)
@@ -290,18 +300,19 @@ _match_stored:
290
300
  U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
291
301
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
292
302
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
293
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
303
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
294
304
  ip += rLength;
295
305
  anchor = ip;
296
306
  continue; /* faster when present ... (?) */
297
- } } } }
307
+ } } }
308
+ } /* while (ip < ilimit) */
298
309
 
299
310
  /* save reps for next block */
300
311
  rep[0] = offset_1 ? offset_1 : offsetSaved;
301
312
  rep[1] = offset_2 ? offset_2 : offsetSaved;
302
313
 
303
314
  /* Return the last literals size */
304
- return iend - anchor;
315
+ return (size_t)(iend - anchor);
305
316
  }
306
317
 
307
318
 
@@ -360,10 +371,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
360
371
  const BYTE* anchor = istart;
361
372
  const BYTE* const iend = istart + srcSize;
362
373
  const BYTE* const ilimit = iend - 8;
363
- const U32 prefixStartIndex = ms->window.dictLimit;
364
374
  const BYTE* const base = ms->window.base;
375
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
376
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
377
+ const U32 dictStartIndex = lowLimit;
378
+ const U32 dictLimit = ms->window.dictLimit;
379
+ const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
365
380
  const BYTE* const prefixStart = base + prefixStartIndex;
366
- const U32 dictStartIndex = ms->window.lowLimit;
367
381
  const BYTE* const dictBase = ms->window.dictBase;
368
382
  const BYTE* const dictStart = dictBase + dictStartIndex;
369
383
  const BYTE* const dictEnd = dictBase + prefixStartIndex;
@@ -371,6 +385,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
371
385
 
372
386
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
373
387
 
388
+ /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
389
+ if (prefixStartIndex == dictStartIndex)
390
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
391
+
374
392
  /* Search Loop */
375
393
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
376
394
  const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@@ -396,7 +414,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
396
414
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
397
415
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
398
416
  ip++;
399
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
417
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
400
418
  } else {
401
419
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
402
420
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -407,7 +425,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
407
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
408
426
  offset_2 = offset_1;
409
427
  offset_1 = offset;
410
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
428
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
411
429
 
412
430
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
413
431
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -432,23 +450,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
432
450
  }
433
451
  offset_2 = offset_1;
434
452
  offset_1 = offset;
435
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
453
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
436
454
 
437
455
  } else {
438
456
  ip += ((ip-anchor) >> kSearchStrength) + 1;
439
457
  continue;
440
458
  } }
441
459
 
442
- /* found a match : store it */
460
+ /* move to next sequence start */
443
461
  ip += mLength;
444
462
  anchor = ip;
445
463
 
446
464
  if (ip <= ilimit) {
447
- /* Fill Table */
448
- hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
449
- hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
450
- hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
451
- hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
465
+ /* Complementary insertion */
466
+ /* done after iLimit test, as candidates could be > iend-8 */
467
+ { U32 const indexToInsert = current+2;
468
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
469
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
470
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
471
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
472
+ }
473
+
452
474
  /* check immediate repcode */
453
475
  while (ip <= ilimit) {
454
476
  U32 const current2 = (U32)(ip-base);
@@ -460,7 +482,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
460
482
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
461
483
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
462
484
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
463
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
485
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
464
486
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
465
487
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
466
488
  ip += repLength2;
@@ -475,7 +497,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
475
497
  rep[1] = offset_2;
476
498
 
477
499
  /* Return the last literals size */
478
- return iend - anchor;
500
+ return (size_t)(iend - anchor);
479
501
  }
480
502
 
481
503
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -8,12 +8,13 @@
8
8
  * You may select, at your option, one of the above-listed licenses.
9
9
  */
10
10
 
11
- #include "zstd_compress_internal.h"
11
+ #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12
12
  #include "zstd_fast.h"
13
13
 
14
14
 
15
15
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16
- void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16
+ const void* const end,
17
+ ZSTD_dictTableLoadMethod_e dtlm)
17
18
  {
18
19
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
19
20
  U32* const hashTable = ms->hashTable;
@@ -41,11 +42,171 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
41
42
  } } } }
42
43
  }
43
44
 
44
- FORCE_INLINE_TEMPLATE
45
- size_t ZSTD_compressBlock_fast_generic(
45
+
46
+ FORCE_INLINE_TEMPLATE size_t
47
+ ZSTD_compressBlock_fast_generic(
46
48
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
47
49
  void const* src, size_t srcSize,
48
- U32 const mls, ZSTD_dictMode_e const dictMode)
50
+ U32 const mls)
51
+ {
52
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
53
+ U32* const hashTable = ms->hashTable;
54
+ U32 const hlog = cParams->hashLog;
55
+ /* support stepSize of 0 */
56
+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
57
+ const BYTE* const base = ms->window.base;
58
+ const BYTE* const istart = (const BYTE*)src;
59
+ /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
60
+ const BYTE* ip0 = istart;
61
+ const BYTE* ip1;
62
+ const BYTE* anchor = istart;
63
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
65
+ const BYTE* const prefixStart = base + prefixStartIndex;
66
+ const BYTE* const iend = istart + srcSize;
67
+ const BYTE* const ilimit = iend - HASH_READ_SIZE;
68
+ U32 offset_1=rep[0], offset_2=rep[1];
69
+ U32 offsetSaved = 0;
70
+
71
+ /* init */
72
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
73
+ ip0 += (ip0 == prefixStart);
74
+ ip1 = ip0 + 1;
75
+ { U32 const current = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77
+ U32 const maxRep = current - windowLow;
78
+ if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
+ if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
+ }
81
+
82
+ /* Main Search Loop */
83
+ #ifdef __INTEL_COMPILER
84
+ /* From intel 'The vector pragma indicates that the loop should be
85
+ * vectorized if it is legal to do so'. Can be used together with
86
+ * #pragma ivdep (but have opted to exclude that because intel
87
+ * warns against using it).*/
88
+ #pragma vector always
89
+ #endif
90
+ while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
91
+ size_t mLength;
92
+ BYTE const* ip2 = ip0 + 2;
93
+ size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
94
+ U32 const val0 = MEM_read32(ip0);
95
+ size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
96
+ U32 const val1 = MEM_read32(ip1);
97
+ U32 const current0 = (U32)(ip0-base);
98
+ U32 const current1 = (U32)(ip1-base);
99
+ U32 const matchIndex0 = hashTable[h0];
100
+ U32 const matchIndex1 = hashTable[h1];
101
+ BYTE const* repMatch = ip2 - offset_1;
102
+ const BYTE* match0 = base + matchIndex0;
103
+ const BYTE* match1 = base + matchIndex1;
104
+ U32 offcode;
105
+
106
+ #if defined(__aarch64__)
107
+ PREFETCH_L1(ip0+256);
108
+ #endif
109
+
110
+ hashTable[h0] = current0; /* update hash table */
111
+ hashTable[h1] = current1; /* update hash table */
112
+
113
+ assert(ip0 + 1 == ip1);
114
+
115
+ if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
116
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
117
+ ip0 = ip2 - mLength;
118
+ match0 = repMatch - mLength;
119
+ mLength += 4;
120
+ offcode = 0;
121
+ goto _match;
122
+ }
123
+ if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
124
+ /* found a regular match */
125
+ goto _offset;
126
+ }
127
+ if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
128
+ /* found a regular match after one literal */
129
+ ip0 = ip1;
130
+ match0 = match1;
131
+ goto _offset;
132
+ }
133
+ { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
134
+ assert(step >= 2);
135
+ ip0 += step;
136
+ ip1 += step;
137
+ continue;
138
+ }
139
+ _offset: /* Requires: ip0, match0 */
140
+ /* Compute the offset code */
141
+ offset_2 = offset_1;
142
+ offset_1 = (U32)(ip0-match0);
143
+ offcode = offset_1 + ZSTD_REP_MOVE;
144
+ mLength = 4;
145
+ /* Count the backwards match length */
146
+ while (((ip0>anchor) & (match0>prefixStart))
147
+ && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
148
+
149
+ _match: /* Requires: ip0, match0, offcode */
150
+ /* Count the forward length */
151
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
152
+ ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
153
+ /* match found */
154
+ ip0 += mLength;
155
+ anchor = ip0;
156
+
157
+ if (ip0 <= ilimit) {
158
+ /* Fill Table */
159
+ assert(base+current0+2 > istart); /* check base overflow */
160
+ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
161
+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
162
+
163
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165
+ /* store sequence */
166
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169
+ ip0 += rLength;
170
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171
+ anchor = ip0;
172
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
173
+ } } }
174
+ ip1 = ip0 + 1;
175
+ }
176
+
177
+ /* save reps for next block */
178
+ rep[0] = offset_1 ? offset_1 : offsetSaved;
179
+ rep[1] = offset_2 ? offset_2 : offsetSaved;
180
+
181
+ /* Return the last literals size */
182
+ return (size_t)(iend - anchor);
183
+ }
184
+
185
+
186
+ size_t ZSTD_compressBlock_fast(
187
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
188
+ void const* src, size_t srcSize)
189
+ {
190
+ U32 const mls = ms->cParams.minMatch;
191
+ assert(ms->dictMatchState == NULL);
192
+ switch(mls)
193
+ {
194
+ default: /* includes case 3 */
195
+ case 4 :
196
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
197
+ case 5 :
198
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
199
+ case 6 :
200
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
201
+ case 7 :
202
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
203
+ }
204
+ }
205
+
206
+ FORCE_INLINE_TEMPLATE
207
+ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
208
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
209
+ void const* src, size_t srcSize, U32 const mls)
49
210
  {
50
211
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
51
212
  U32* const hashTable = ms->hashTable;
@@ -64,46 +225,34 @@ size_t ZSTD_compressBlock_fast_generic(
64
225
  U32 offsetSaved = 0;
65
226
 
66
227
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
67
- const ZSTD_compressionParameters* const dictCParams =
68
- dictMode == ZSTD_dictMatchState ?
69
- &dms->cParams : NULL;
70
- const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
71
- dms->hashTable : NULL;
72
- const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
73
- dms->window.dictLimit : 0;
74
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
75
- dms->window.base : NULL;
76
- const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
77
- dictBase + dictStartIndex : NULL;
78
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
79
- dms->window.nextSrc : NULL;
80
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
81
- prefixStartIndex - (U32)(dictEnd - dictBase) :
82
- 0;
228
+ const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
229
+ const U32* const dictHashTable = dms->hashTable;
230
+ const U32 dictStartIndex = dms->window.dictLimit;
231
+ const BYTE* const dictBase = dms->window.base;
232
+ const BYTE* const dictStart = dictBase + dictStartIndex;
233
+ const BYTE* const dictEnd = dms->window.nextSrc;
234
+ const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
83
235
  const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
84
- const U32 dictHLog = dictMode == ZSTD_dictMatchState ?
85
- dictCParams->hashLog : hlog;
236
+ const U32 dictHLog = dictCParams->hashLog;
86
237
 
87
- assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
238
+ /* if a dictionary is still attached, it necessarily means that
239
+ * it is within window size. So we just check it. */
240
+ const U32 maxDistance = 1U << cParams->windowLog;
241
+ const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
242
+ assert(endIndex - prefixStartIndex <= maxDistance);
243
+ (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
88
244
 
89
- /* otherwise, we would get index underflow when translating a dict index
90
- * into a local index */
91
- assert(dictMode != ZSTD_dictMatchState
92
- || prefixStartIndex >= (U32)(dictEnd - dictBase));
245
+ /* ensure there will be no no underflow
246
+ * when translating a dict index into a local index */
247
+ assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
93
248
 
94
249
  /* init */
250
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
95
251
  ip += (dictAndPrefixLength == 0);
96
- if (dictMode == ZSTD_noDict) {
97
- U32 const maxRep = (U32)(ip - prefixStart);
98
- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
99
- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
100
- }
101
- if (dictMode == ZSTD_dictMatchState) {
102
- /* dictMatchState repCode checks don't currently handle repCode == 0
103
- * disabling. */
104
- assert(offset_1 <= dictAndPrefixLength);
105
- assert(offset_2 <= dictAndPrefixLength);
106
- }
252
+ /* dictMatchState repCode checks don't currently handle repCode == 0
253
+ * disabling. */
254
+ assert(offset_1 <= dictAndPrefixLength);
255
+ assert(offset_2 <= dictAndPrefixLength);
107
256
 
108
257
  /* Main Search Loop */
109
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@@ -113,50 +262,37 @@ size_t ZSTD_compressBlock_fast_generic(
113
262
  U32 const matchIndex = hashTable[h];
114
263
  const BYTE* match = base + matchIndex;
115
264
  const U32 repIndex = current + 1 - offset_1;
116
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
117
- && repIndex < prefixStartIndex) ?
265
+ const BYTE* repMatch = (repIndex < prefixStartIndex) ?
118
266
  dictBase + (repIndex - dictIndexDelta) :
119
267
  base + repIndex;
120
268
  hashTable[h] = current; /* update hash table */
121
269
 
122
- if ( (dictMode == ZSTD_dictMatchState)
123
- && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
270
+ if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
124
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
125
272
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
126
273
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
127
274
  ip++;
128
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
129
- } else if ( dictMode == ZSTD_noDict
130
- && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
131
- mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
132
- ip++;
133
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
275
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
134
276
  } else if ( (matchIndex <= prefixStartIndex) ) {
135
- if (dictMode == ZSTD_dictMatchState) {
136
- size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
137
- U32 const dictMatchIndex = dictHashTable[dictHash];
138
- const BYTE* dictMatch = dictBase + dictMatchIndex;
139
- if (dictMatchIndex <= dictStartIndex ||
140
- MEM_read32(dictMatch) != MEM_read32(ip)) {
141
- assert(stepSize >= 1);
142
- ip += ((ip-anchor) >> kSearchStrength) + stepSize;
143
- continue;
144
- } else {
145
- /* found a dict match */
146
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
147
- mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
148
- while (((ip>anchor) & (dictMatch>dictStart))
149
- && (ip[-1] == dictMatch[-1])) {
150
- ip--; dictMatch--; mLength++;
151
- } /* catch up */
152
- offset_2 = offset_1;
153
- offset_1 = offset;
154
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
155
- }
156
- } else {
277
+ size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
278
+ U32 const dictMatchIndex = dictHashTable[dictHash];
279
+ const BYTE* dictMatch = dictBase + dictMatchIndex;
280
+ if (dictMatchIndex <= dictStartIndex ||
281
+ MEM_read32(dictMatch) != MEM_read32(ip)) {
157
282
  assert(stepSize >= 1);
158
283
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
159
284
  continue;
285
+ } else {
286
+ /* found a dict match */
287
+ U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
288
+ mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
289
+ while (((ip>anchor) & (dictMatch>dictStart))
290
+ && (ip[-1] == dictMatch[-1])) {
291
+ ip--; dictMatch--; mLength++;
292
+ } /* catch up */
293
+ offset_2 = offset_1;
294
+ offset_1 = offset;
295
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
160
296
  }
161
297
  } else if (MEM_read32(match) != MEM_read32(ip)) {
162
298
  /* it's not a match, and we're not going to check the dictionary */
@@ -171,7 +307,7 @@ size_t ZSTD_compressBlock_fast_generic(
171
307
  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
172
308
  offset_2 = offset_1;
173
309
  offset_1 = offset;
174
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
310
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
175
311
  }
176
312
 
177
313
  /* match found */
@@ -185,90 +321,53 @@ size_t ZSTD_compressBlock_fast_generic(
185
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
186
322
 
187
323
  /* check immediate repcode */
188
- if (dictMode == ZSTD_dictMatchState) {
189
- while (ip <= ilimit) {
190
- U32 const current2 = (U32)(ip-base);
191
- U32 const repIndex2 = current2 - offset_2;
192
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
193
- dictBase - dictIndexDelta + repIndex2 :
194
- base + repIndex2;
195
- if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
196
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
197
- const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
198
- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
199
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
200
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
201
- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
202
- ip += repLength2;
203
- anchor = ip;
204
- continue;
205
- }
206
- break;
324
+ while (ip <= ilimit) {
325
+ U32 const current2 = (U32)(ip-base);
326
+ U32 const repIndex2 = current2 - offset_2;
327
+ const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
328
+ dictBase - dictIndexDelta + repIndex2 :
329
+ base + repIndex2;
330
+ if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
331
+ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
332
+ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
333
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
334
+ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
335
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
336
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
337
+ ip += repLength2;
338
+ anchor = ip;
339
+ continue;
207
340
  }
341
+ break;
208
342
  }
209
-
210
- if (dictMode == ZSTD_noDict) {
211
- while ( (ip <= ilimit)
212
- && ( (offset_2>0)
213
- & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
214
- /* store sequence */
215
- size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
216
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
217
- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
218
- ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
219
- ip += rLength;
220
- anchor = ip;
221
- continue; /* faster when present ... (?) */
222
- } } } }
343
+ }
344
+ }
223
345
 
224
346
  /* save reps for next block */
225
347
  rep[0] = offset_1 ? offset_1 : offsetSaved;
226
348
  rep[1] = offset_2 ? offset_2 : offsetSaved;
227
349
 
228
350
  /* Return the last literals size */
229
- return iend - anchor;
230
- }
231
-
232
-
233
- size_t ZSTD_compressBlock_fast(
234
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
235
- void const* src, size_t srcSize)
236
- {
237
- ZSTD_compressionParameters const* cParams = &ms->cParams;
238
- U32 const mls = cParams->minMatch;
239
- assert(ms->dictMatchState == NULL);
240
- switch(mls)
241
- {
242
- default: /* includes case 3 */
243
- case 4 :
244
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
245
- case 5 :
246
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
247
- case 6 :
248
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
249
- case 7 :
250
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
251
- }
351
+ return (size_t)(iend - anchor);
252
352
  }
253
353
 
254
354
  size_t ZSTD_compressBlock_fast_dictMatchState(
255
355
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
256
356
  void const* src, size_t srcSize)
257
357
  {
258
- ZSTD_compressionParameters const* cParams = &ms->cParams;
259
- U32 const mls = cParams->minMatch;
358
+ U32 const mls = ms->cParams.minMatch;
260
359
  assert(ms->dictMatchState != NULL);
261
360
  switch(mls)
262
361
  {
263
362
  default: /* includes case 3 */
264
363
  case 4 :
265
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
364
+ return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
266
365
  case 5 :
267
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
366
+ return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
268
367
  case 6 :
269
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
368
+ return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
270
369
  case 7 :
271
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
370
+ return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
272
371
  }
273
372
  }
274
373
 
@@ -287,15 +386,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
287
386
  const BYTE* const istart = (const BYTE*)src;
288
387
  const BYTE* ip = istart;
289
388
  const BYTE* anchor = istart;
290
- const U32 dictStartIndex = ms->window.lowLimit;
389
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
390
+ const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
391
+ const U32 dictStartIndex = lowLimit;
291
392
  const BYTE* const dictStart = dictBase + dictStartIndex;
292
- const U32 prefixStartIndex = ms->window.dictLimit;
393
+ const U32 dictLimit = ms->window.dictLimit;
394
+ const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
293
395
  const BYTE* const prefixStart = base + prefixStartIndex;
294
396
  const BYTE* const dictEnd = dictBase + prefixStartIndex;
295
397
  const BYTE* const iend = istart + srcSize;
296
398
  const BYTE* const ilimit = iend - 8;
297
399
  U32 offset_1=rep[0], offset_2=rep[1];
298
400
 
401
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
402
+
403
+ /* switch to "regular" variant if extDict is invalidated due to maxDistance */
404
+ if (prefixStartIndex == dictStartIndex)
405
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
406
+
299
407
  /* Search Loop */
300
408
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
301
409
  const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@@ -306,16 +414,18 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
306
414
  const U32 repIndex = current + 1 - offset_1;
307
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
308
416
  const BYTE* const repMatch = repBase + repIndex;
309
- size_t mLength;
310
417
  hashTable[h] = current; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
311
419
  assert(offset_1 <= current +1); /* check repIndex */
312
420
 
313
421
  if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
314
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
315
- const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
316
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
423
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
424
+ size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
317
425
  ip++;
318
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
426
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
427
+ ip += rLength;
428
+ anchor = ip;
319
429
  } else {
320
430
  if ( (matchIndex < dictStartIndex) ||
321
431
  (MEM_read32(match) != MEM_read32(ip)) ) {
@@ -323,21 +433,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
323
433
  ip += ((ip-anchor) >> kSearchStrength) + stepSize;
324
434
  continue;
325
435
  }
326
- { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
327
- const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
328
- U32 offset;
329
- mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
436
+ { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
+ const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
+ U32 const offset = current - matchIndex;
439
+ size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
330
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
331
- offset = current - matchIndex;
332
- offset_2 = offset_1;
333
- offset_1 = offset;
334
- ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
441
+ offset_2 = offset_1; offset_1 = offset; /* update offset history */
442
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
443
+ ip += mLength;
444
+ anchor = ip;
335
445
  } }
336
446
 
337
- /* found a match : store it */
338
- ip += mLength;
339
- anchor = ip;
340
-
341
447
  if (ip <= ilimit) {
342
448
  /* Fill Table */
343
449
  hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@@ -346,13 +452,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
346
452
  while (ip <= ilimit) {
347
453
  U32 const current2 = (U32)(ip-base);
348
454
  U32 const repIndex2 = current2 - offset_2;
349
- const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
455
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
350
456
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
351
457
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
352
458
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
353
459
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
354
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
355
- ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
460
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
461
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
356
462
  hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
357
463
  ip += repLength2;
358
464
  anchor = ip;
@@ -366,7 +472,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
366
472
  rep[1] = offset_2;
367
473
 
368
474
  /* Return the last literals size */
369
- return iend - anchor;
475
+ return (size_t)(iend - anchor);
370
476
  }
371
477
 
372
478
 
@@ -374,8 +480,7 @@ size_t ZSTD_compressBlock_fast_extDict(
374
480
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
375
481
  void const* src, size_t srcSize)
376
482
  {
377
- ZSTD_compressionParameters const* cParams = &ms->cParams;
378
- U32 const mls = cParams->minMatch;
483
+ U32 const mls = ms->cParams.minMatch;
379
484
  switch(mls)
380
485
  {
381
486
  default: /* includes case 3 */