zstd-ruby 1.4.0.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
58
58
 
59
59
  /** ZSTD_insertDUBT1() :
60
60
  * sort one already inserted but unsorted position
61
- * assumption : current >= btlow == (current - btmask)
61
+ * assumption : curr >= btlow == (curr - btmask)
62
62
  * doesn't fail */
63
63
  static void
64
64
  ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65
- U32 current, const BYTE* inputEnd,
65
+ U32 curr, const BYTE* inputEnd,
66
66
  U32 nbCompares, U32 btLow,
67
67
  const ZSTD_dictMode_e dictMode)
68
68
  {
@@ -74,38 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
74
74
  const BYTE* const base = ms->window.base;
75
75
  const BYTE* const dictBase = ms->window.dictBase;
76
76
  const U32 dictLimit = ms->window.dictLimit;
77
- const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
78
- const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
77
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
78
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
79
79
  const BYTE* const dictEnd = dictBase + dictLimit;
80
80
  const BYTE* const prefixStart = base + dictLimit;
81
81
  const BYTE* match;
82
- U32* smallerPtr = bt + 2*(current&btMask);
82
+ U32* smallerPtr = bt + 2*(curr&btMask);
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
- U32 const windowLow = ms->window.lowLimit;
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
89
+
87
90
 
88
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
89
- current, dictLimit, windowLow);
90
- assert(current >= btLow);
92
+ curr, dictLimit, windowLow);
93
+ assert(curr >= btLow);
91
94
  assert(ip < iend); /* condition for ZSTD_count */
92
95
 
93
96
  while (nbCompares-- && (matchIndex > windowLow)) {
94
97
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
95
98
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
96
- assert(matchIndex < current);
99
+ assert(matchIndex < curr);
97
100
  /* note : all candidates are now supposed sorted,
98
101
  * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
99
102
  * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
100
103
 
101
104
  if ( (dictMode != ZSTD_extDict)
102
105
  || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
103
- || (current < dictLimit) /* both in extDict */) {
106
+ || (curr < dictLimit) /* both in extDict */) {
104
107
  const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
105
108
  || (matchIndex+matchLength >= dictLimit)) ?
106
109
  base : dictBase;
107
110
  assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
108
- || (current < dictLimit) );
111
+ || (curr < dictLimit) );
109
112
  match = mBase + matchIndex;
110
113
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
111
114
  } else {
@@ -116,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
116
119
  }
117
120
 
118
121
  DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
119
- current, matchIndex, (U32)matchLength);
122
+ curr, matchIndex, (U32)matchLength);
120
123
 
121
124
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
122
125
  break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -165,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch (
165
168
 
166
169
  const BYTE* const base = ms->window.base;
167
170
  const BYTE* const prefixStart = base + ms->window.dictLimit;
168
- U32 const current = (U32)(ip-base);
171
+ U32 const curr = (U32)(ip-base);
169
172
  const BYTE* const dictBase = dms->window.base;
170
173
  const BYTE* const dictEnd = dms->window.nextSrc;
171
174
  U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
@@ -192,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch (
192
195
 
193
196
  if (matchLength > bestLength) {
194
197
  U32 matchIndex = dictMatchIndex + dictIndexDelta;
195
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
198
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
196
199
  DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
197
- current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
198
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
200
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
201
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
199
202
  }
200
203
  if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
201
204
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -215,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch (
215
218
  }
216
219
 
217
220
  if (bestLength >= MINMATCH) {
218
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
221
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
219
222
  DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
220
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
223
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
221
224
  }
222
225
  return bestLength;
223
226
 
@@ -238,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
238
241
  U32 matchIndex = hashTable[h];
239
242
 
240
243
  const BYTE* const base = ms->window.base;
241
- U32 const current = (U32)(ip-base);
242
- U32 const windowLow = ms->window.lowLimit;
244
+ U32 const curr = (U32)(ip-base);
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
243
246
 
244
247
  U32* const bt = ms->chainTable;
245
248
  U32 const btLog = cParams->chainLog - 1;
246
249
  U32 const btMask = (1 << btLog) - 1;
247
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
250
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
248
251
  U32 const unsortLimit = MAX(btLow, windowLow);
249
252
 
250
253
  U32* nextCandidate = bt + 2*(matchIndex&btMask);
@@ -253,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
253
256
  U32 nbCandidates = nbCompares;
254
257
  U32 previousCandidate = 0;
255
258
 
256
- DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
259
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
257
260
  assert(ip <= iend-8); /* required for h calculation */
261
+ assert(dictMode != ZSTD_dedicatedDictSearch);
258
262
 
259
263
  /* reach end of unsorted candidates list */
260
264
  while ( (matchIndex > unsortLimit)
@@ -296,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
296
300
  const U32 dictLimit = ms->window.dictLimit;
297
301
  const BYTE* const dictEnd = dictBase + dictLimit;
298
302
  const BYTE* const prefixStart = base + dictLimit;
299
- U32* smallerPtr = bt + 2*(current&btMask);
300
- U32* largerPtr = bt + 2*(current&btMask) + 1;
301
- U32 matchEndIdx = current + 8 + 1;
303
+ U32* smallerPtr = bt + 2*(curr&btMask);
304
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
305
+ U32 matchEndIdx = curr + 8 + 1;
302
306
  U32 dummy32; /* to be nullified at the end */
303
307
  size_t bestLength = 0;
304
308
 
305
309
  matchIndex = hashTable[h];
306
- hashTable[h] = current; /* Update Hash Table */
310
+ hashTable[h] = curr; /* Update Hash Table */
307
311
 
308
312
  while (nbCompares-- && (matchIndex > windowLow)) {
309
313
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
@@ -323,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
323
327
  if (matchLength > bestLength) {
324
328
  if (matchLength > matchEndIdx - matchIndex)
325
329
  matchEndIdx = matchIndex + (U32)matchLength;
326
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
327
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
330
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
328
332
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
329
333
  if (dictMode == ZSTD_dictMatchState) {
330
334
  nbCompares = 0; /* in addition to avoiding checking any
@@ -360,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
360
364
  mls, dictMode);
361
365
  }
362
366
 
363
- assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
367
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
364
368
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
365
369
  if (bestLength >= MINMATCH) {
366
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
367
371
  DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
368
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
372
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
369
373
  }
370
374
  return bestLength;
371
375
  }
@@ -443,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
443
447
 
444
448
  /* Update chains up to ip (excluded)
445
449
  Assumption : always within prefix (i.e. not within extDict) */
446
- static U32 ZSTD_insertAndFindFirstIndex_internal(
450
+ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
447
451
  ZSTD_matchState_t* ms,
448
452
  const ZSTD_compressionParameters* const cParams,
449
453
  const BYTE* ip, U32 const mls)
@@ -472,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
472
476
  return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
473
477
  }
474
478
 
479
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
480
+ {
481
+ const BYTE* const base = ms->window.base;
482
+ U32 const target = (U32)(ip - base);
483
+ U32* const hashTable = ms->hashTable;
484
+ U32* const chainTable = ms->chainTable;
485
+ U32 const chainSize = 1 << ms->cParams.chainLog;
486
+ U32 idx = ms->nextToUpdate;
487
+ U32 const minChain = chainSize < target ? target - chainSize : idx;
488
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
489
+ U32 const cacheSize = bucketSize - 1;
490
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
491
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
492
+
493
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
494
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
495
+ * single entry. We will use the rest of the space to construct a temporary
496
+ * chaintable.
497
+ */
498
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
499
+ U32* const tmpHashTable = hashTable;
500
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
501
+ U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
502
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
503
+
504
+ U32 hashIdx;
505
+
506
+ assert(ms->cParams.chainLog <= 24);
507
+ assert(ms->cParams.hashLog >= ms->cParams.chainLog);
508
+ assert(idx != 0);
509
+ assert(tmpMinChain <= minChain);
510
+
511
+ /* fill conventional hash table and conventional chain table */
512
+ for ( ; idx < target; idx++) {
513
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
514
+ if (idx >= tmpMinChain) {
515
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
516
+ }
517
+ tmpHashTable[h] = idx;
518
+ }
519
+
520
+ /* sort chains into ddss chain table */
521
+ {
522
+ U32 chainPos = 0;
523
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
524
+ U32 count;
525
+ U32 countBeyondMinChain = 0;
526
+ U32 i = tmpHashTable[hashIdx];
527
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
528
+ /* skip through the chain to the first position that won't be
529
+ * in the hash cache bucket */
530
+ if (i < minChain) {
531
+ countBeyondMinChain++;
532
+ }
533
+ i = tmpChainTable[i - tmpMinChain];
534
+ }
535
+ if (count == cacheSize) {
536
+ for (count = 0; count < chainLimit;) {
537
+ if (i < minChain) {
538
+ if (!i || countBeyondMinChain++ > cacheSize) {
539
+ /* only allow pulling `cacheSize` number of entries
540
+ * into the cache or chainTable beyond `minChain`,
541
+ * to replace the entries pulled out of the
542
+ * chainTable into the cache. This lets us reach
543
+ * back further without increasing the total number
544
+ * of entries in the chainTable, guaranteeing the
545
+ * DDSS chain table will fit into the space
546
+ * allocated for the regular one. */
547
+ break;
548
+ }
549
+ }
550
+ chainTable[chainPos++] = i;
551
+ count++;
552
+ if (i < tmpMinChain) {
553
+ break;
554
+ }
555
+ i = tmpChainTable[i - tmpMinChain];
556
+ }
557
+ } else {
558
+ count = 0;
559
+ }
560
+ if (count) {
561
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
562
+ } else {
563
+ tmpHashTable[hashIdx] = 0;
564
+ }
565
+ }
566
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
567
+ }
568
+
569
+ /* move chain pointers into the last entry of each hash bucket */
570
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
571
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
572
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
573
+ U32 i;
574
+ for (i = 0; i < cacheSize; i++) {
575
+ hashTable[bucketIdx + i] = 0;
576
+ }
577
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
578
+ }
579
+
580
+ /* fill the buckets of the hash table */
581
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
582
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
583
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
584
+ U32 i;
585
+ /* Shift hash cache down 1. */
586
+ for (i = cacheSize - 1; i; i--)
587
+ hashTable[h + i] = hashTable[h + i - 1];
588
+ hashTable[h] = idx;
589
+ }
590
+
591
+ ms->nextToUpdate = target;
592
+ }
593
+
475
594
 
476
595
  /* inlining is important to hardwire a hot branch (template emulation) */
477
596
  FORCE_INLINE_TEMPLATE
@@ -490,16 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic (
490
609
  const U32 dictLimit = ms->window.dictLimit;
491
610
  const BYTE* const prefixStart = base + dictLimit;
492
611
  const BYTE* const dictEnd = dictBase + dictLimit;
493
- const U32 lowLimit = ms->window.lowLimit;
494
- const U32 current = (U32)(ip-base);
495
- const U32 minChain = current > chainSize ? current - chainSize : 0;
612
+ const U32 curr = (U32)(ip-base);
613
+ const U32 maxDistance = 1U << cParams->windowLog;
614
+ const U32 lowestValid = ms->window.lowLimit;
615
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
616
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
617
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
618
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
496
619
  U32 nbAttempts = 1U << cParams->searchLog;
497
620
  size_t ml=4-1;
498
621
 
622
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
623
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
624
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
625
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
626
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
627
+
628
+ U32 matchIndex;
629
+
630
+ if (dictMode == ZSTD_dedicatedDictSearch) {
631
+ const U32* entry = &dms->hashTable[ddsIdx];
632
+ PREFETCH_L1(entry);
633
+ }
634
+
499
635
  /* HC4 match finder */
500
- U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
636
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
501
637
 
502
- for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
638
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
503
639
  size_t currentMl=0;
504
640
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
505
641
  const BYTE* const match = base + matchIndex;
@@ -516,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic (
516
652
  /* save best solution */
517
653
  if (currentMl > ml) {
518
654
  ml = currentMl;
519
- *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
655
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
520
656
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
521
657
  }
522
658
 
@@ -524,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic (
524
660
  matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
525
661
  }
526
662
 
527
- if (dictMode == ZSTD_dictMatchState) {
528
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
663
+ if (dictMode == ZSTD_dedicatedDictSearch) {
664
+ const U32 ddsLowestIndex = dms->window.dictLimit;
665
+ const BYTE* const ddsBase = dms->window.base;
666
+ const BYTE* const ddsEnd = dms->window.nextSrc;
667
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
668
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
669
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
670
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
671
+ U32 ddsAttempt;
672
+
673
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
674
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
675
+ }
676
+
677
+ {
678
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
679
+ U32 const chainIndex = chainPackedPointer >> 8;
680
+
681
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
682
+ }
683
+
684
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
685
+ size_t currentMl=0;
686
+ const BYTE* match;
687
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
688
+ match = ddsBase + matchIndex;
689
+
690
+ if (!matchIndex) {
691
+ return ml;
692
+ }
693
+
694
+ /* guaranteed by table construction */
695
+ (void)ddsLowestIndex;
696
+ assert(matchIndex >= ddsLowestIndex);
697
+ assert(match+4 <= ddsEnd);
698
+ if (MEM_read32(match) == MEM_read32(ip)) {
699
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
700
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
701
+ }
702
+
703
+ /* save best solution */
704
+ if (currentMl > ml) {
705
+ ml = currentMl;
706
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
707
+ if (ip+currentMl == iLimit) {
708
+ /* best possible, avoids read overflow on next attempt */
709
+ return ml;
710
+ }
711
+ }
712
+ }
713
+
714
+ {
715
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
716
+ U32 chainIndex = chainPackedPointer >> 8;
717
+ U32 const chainLength = chainPackedPointer & 0xFF;
718
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
719
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
720
+ U32 chainAttempt;
721
+
722
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
723
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
724
+ }
725
+
726
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
727
+ size_t currentMl=0;
728
+ const BYTE* match;
729
+ matchIndex = dms->chainTable[chainIndex];
730
+ match = ddsBase + matchIndex;
731
+
732
+ /* guaranteed by table construction */
733
+ assert(matchIndex >= ddsLowestIndex);
734
+ assert(match+4 <= ddsEnd);
735
+ if (MEM_read32(match) == MEM_read32(ip)) {
736
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
737
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
738
+ }
739
+
740
+ /* save best solution */
741
+ if (currentMl > ml) {
742
+ ml = currentMl;
743
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
744
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
745
+ }
746
+ }
747
+ }
748
+ } else if (dictMode == ZSTD_dictMatchState) {
529
749
  const U32* const dmsChainTable = dms->chainTable;
530
750
  const U32 dmsChainSize = (1 << dms->cParams.chainLog);
531
751
  const U32 dmsChainMask = dmsChainSize - 1;
@@ -538,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic (
538
758
 
539
759
  matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
540
760
 
541
- for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
761
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
542
762
  size_t currentMl=0;
543
763
  const BYTE* const match = dmsBase + matchIndex;
544
764
  assert(match+4 <= dmsEnd);
@@ -548,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic (
548
768
  /* save best solution */
549
769
  if (currentMl > ml) {
550
770
  ml = currentMl;
551
- *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
771
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
552
772
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
553
773
  }
554
774
 
555
775
  if (matchIndex <= dmsMinChain) break;
776
+
556
777
  matchIndex = dmsChainTable[matchIndex & dmsChainMask];
557
778
  }
558
779
  }
@@ -593,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
593
814
  }
594
815
 
595
816
 
817
+ static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
818
+ ZSTD_matchState_t* ms,
819
+ const BYTE* ip, const BYTE* const iLimit,
820
+ size_t* offsetPtr)
821
+ {
822
+ switch(ms->cParams.minMatch)
823
+ {
824
+ default : /* includes case 3 */
825
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
826
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
827
+ case 7 :
828
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
829
+ }
830
+ }
831
+
832
+
596
833
  FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
597
834
  ZSTD_matchState_t* ms,
598
835
  const BYTE* ip, const BYTE* const iLimit,
@@ -612,12 +849,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
612
849
  /* *******************************
613
850
  * Common parser - lazy strategy
614
851
  *********************************/
615
- FORCE_INLINE_TEMPLATE
616
- size_t ZSTD_compressBlock_lazy_generic(
852
+ typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
853
+
854
+ FORCE_INLINE_TEMPLATE size_t
855
+ ZSTD_compressBlock_lazy_generic(
617
856
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
618
857
  U32 rep[ZSTD_REP_NUM],
619
858
  const void* src, size_t srcSize,
620
- const U32 searchMethod, const U32 depth,
859
+ const searchMethod_e searchMethod, const U32 depth,
621
860
  ZSTD_dictMode_e const dictMode)
622
861
  {
623
862
  const BYTE* const istart = (const BYTE*)src;
@@ -632,34 +871,62 @@ size_t ZSTD_compressBlock_lazy_generic(
632
871
  typedef size_t (*searchMax_f)(
633
872
  ZSTD_matchState_t* ms,
634
873
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
635
- searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
636
- (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
637
- (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
874
+
875
+ /**
876
+ * This table is indexed first by the four ZSTD_dictMode_e values, and then
877
+ * by the two searchMethod_e values. NULLs are placed for configurations
878
+ * that should never occur (extDict modes go to the other implementation
879
+ * below and there is no DDSS for binary tree search yet).
880
+ */
881
+ const searchMax_f searchFuncs[4][2] = {
882
+ {
883
+ ZSTD_HcFindBestMatch_selectMLS,
884
+ ZSTD_BtFindBestMatch_selectMLS
885
+ },
886
+ {
887
+ NULL,
888
+ NULL
889
+ },
890
+ {
891
+ ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
892
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS
893
+ },
894
+ {
895
+ ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
896
+ NULL
897
+ }
898
+ };
899
+
900
+ searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
638
901
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
639
902
 
903
+ const int isDMS = dictMode == ZSTD_dictMatchState;
904
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
905
+ const int isDxS = isDMS || isDDS;
640
906
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
641
- const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
642
- dms->window.dictLimit : 0;
643
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
644
- dms->window.base : NULL;
645
- const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
646
- dictBase + dictLowestIndex : NULL;
647
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
648
- dms->window.nextSrc : NULL;
649
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
907
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
908
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
909
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
910
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
911
+ const U32 dictIndexDelta = isDxS ?
650
912
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
651
913
  0;
652
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
914
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
915
+
916
+ assert(searchMax != NULL);
917
+
918
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
653
919
 
654
920
  /* init */
655
921
  ip += (dictAndPrefixLength == 0);
656
- ms->nextToUpdate3 = ms->nextToUpdate;
657
922
  if (dictMode == ZSTD_noDict) {
658
- U32 const maxRep = (U32)(ip - prefixLowest);
923
+ U32 const curr = (U32)(ip - base);
924
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
925
+ U32 const maxRep = curr - windowLow;
659
926
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
660
927
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
661
928
  }
662
- if (dictMode == ZSTD_dictMatchState) {
929
+ if (isDxS) {
663
930
  /* dictMatchState repCode checks don't currently handle repCode == 0
664
931
  * disabling. */
665
932
  assert(offset_1 <= dictAndPrefixLength);
@@ -667,15 +934,21 @@ size_t ZSTD_compressBlock_lazy_generic(
667
934
  }
668
935
 
669
936
  /* Match Loop */
937
+ #if defined(__GNUC__) && defined(__x86_64__)
938
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
939
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
940
+ */
941
+ __asm__(".p2align 5");
942
+ #endif
670
943
  while (ip < ilimit) {
671
944
  size_t matchLength=0;
672
945
  size_t offset=0;
673
946
  const BYTE* start=ip+1;
674
947
 
675
948
  /* check repCode */
676
- if (dictMode == ZSTD_dictMatchState) {
949
+ if (isDxS) {
677
950
  const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
678
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
951
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
679
952
  && repIndex < prefixLowestIndex) ?
680
953
  dictBase + (repIndex - dictIndexDelta) :
681
954
  base + repIndex;
@@ -716,7 +989,7 @@ size_t ZSTD_compressBlock_lazy_generic(
716
989
  if ((mlRep >= 4) && (gain2 > gain1))
717
990
  matchLength = mlRep, offset = 0, start = ip;
718
991
  }
719
- if (dictMode == ZSTD_dictMatchState) {
992
+ if (isDxS) {
720
993
  const U32 repIndex = (U32)(ip - base) - offset_1;
721
994
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
722
995
  dictBase + (repIndex - dictIndexDelta) :
@@ -751,7 +1024,7 @@ size_t ZSTD_compressBlock_lazy_generic(
751
1024
  if ((mlRep >= 4) && (gain2 > gain1))
752
1025
  matchLength = mlRep, offset = 0, start = ip;
753
1026
  }
754
- if (dictMode == ZSTD_dictMatchState) {
1027
+ if (isDxS) {
755
1028
  const U32 repIndex = (U32)(ip - base) - offset_1;
756
1029
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
757
1030
  dictBase + (repIndex - dictIndexDelta) :
@@ -789,7 +1062,7 @@ size_t ZSTD_compressBlock_lazy_generic(
789
1062
  && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
790
1063
  { start--; matchLength++; }
791
1064
  }
792
- if (dictMode == ZSTD_dictMatchState) {
1065
+ if (isDxS) {
793
1066
  U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
794
1067
  const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
795
1068
  const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
@@ -800,17 +1073,16 @@ size_t ZSTD_compressBlock_lazy_generic(
800
1073
  /* store sequence */
801
1074
  _storeSequence:
802
1075
  { size_t const litLength = start - anchor;
803
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1076
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
804
1077
  anchor = ip = start + matchLength;
805
1078
  }
806
1079
 
807
1080
  /* check immediate repcode */
808
- if (dictMode == ZSTD_dictMatchState) {
1081
+ if (isDxS) {
809
1082
  while (ip <= ilimit) {
810
1083
  U32 const current2 = (U32)(ip-base);
811
1084
  U32 const repIndex = current2 - offset_2;
812
- const BYTE* repMatch = dictMode == ZSTD_dictMatchState
813
- && repIndex < prefixLowestIndex ?
1085
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
814
1086
  dictBase - dictIndexDelta + repIndex :
815
1087
  base + repIndex;
816
1088
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
@@ -818,7 +1090,7 @@ _storeSequence:
818
1090
  const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
819
1091
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
820
1092
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
821
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1093
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
822
1094
  ip += matchLength;
823
1095
  anchor = ip;
824
1096
  continue;
@@ -833,7 +1105,7 @@ _storeSequence:
833
1105
  /* store sequence */
834
1106
  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
835
1107
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
836
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1108
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
837
1109
  ip += matchLength;
838
1110
  anchor = ip;
839
1111
  continue; /* faster when present ... (?) */
@@ -844,7 +1116,7 @@ _storeSequence:
844
1116
  rep[1] = offset_2 ? offset_2 : savedOffset;
845
1117
 
846
1118
  /* Return the last literals size */
847
- return iend - anchor;
1119
+ return (size_t)(iend - anchor);
848
1120
  }
849
1121
 
850
1122
 
@@ -852,56 +1124,78 @@ size_t ZSTD_compressBlock_btlazy2(
852
1124
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
853
1125
  void const* src, size_t srcSize)
854
1126
  {
855
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
1127
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
856
1128
  }
857
1129
 
858
1130
  size_t ZSTD_compressBlock_lazy2(
859
1131
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
860
1132
  void const* src, size_t srcSize)
861
1133
  {
862
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
1134
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
863
1135
  }
864
1136
 
865
1137
  size_t ZSTD_compressBlock_lazy(
866
1138
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
867
1139
  void const* src, size_t srcSize)
868
1140
  {
869
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
1141
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
870
1142
  }
871
1143
 
872
1144
  size_t ZSTD_compressBlock_greedy(
873
1145
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
874
1146
  void const* src, size_t srcSize)
875
1147
  {
876
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
1148
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
877
1149
  }
878
1150
 
879
1151
  size_t ZSTD_compressBlock_btlazy2_dictMatchState(
880
1152
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
881
1153
  void const* src, size_t srcSize)
882
1154
  {
883
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
1155
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
884
1156
  }
885
1157
 
886
1158
  size_t ZSTD_compressBlock_lazy2_dictMatchState(
887
1159
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
888
1160
  void const* src, size_t srcSize)
889
1161
  {
890
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
1162
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
891
1163
  }
892
1164
 
893
1165
  size_t ZSTD_compressBlock_lazy_dictMatchState(
894
1166
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
895
1167
  void const* src, size_t srcSize)
896
1168
  {
897
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
1169
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
898
1170
  }
899
1171
 
900
1172
  size_t ZSTD_compressBlock_greedy_dictMatchState(
901
1173
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
902
1174
  void const* src, size_t srcSize)
903
1175
  {
904
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
1176
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1177
+ }
1178
+
1179
+
1180
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1181
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1182
+ void const* src, size_t srcSize)
1183
+ {
1184
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1185
+ }
1186
+
1187
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1188
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1189
+ void const* src, size_t srcSize)
1190
+ {
1191
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1192
+ }
1193
+
1194
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1195
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1196
+ void const* src, size_t srcSize)
1197
+ {
1198
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
905
1199
  }
906
1200
 
907
1201
 
@@ -910,7 +1204,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
910
1204
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
911
1205
  U32 rep[ZSTD_REP_NUM],
912
1206
  const void* src, size_t srcSize,
913
- const U32 searchMethod, const U32 depth)
1207
+ const searchMethod_e searchMethod, const U32 depth)
914
1208
  {
915
1209
  const BYTE* const istart = (const BYTE*)src;
916
1210
  const BYTE* ip = istart;
@@ -919,35 +1213,43 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
919
1213
  const BYTE* const ilimit = iend - 8;
920
1214
  const BYTE* const base = ms->window.base;
921
1215
  const U32 dictLimit = ms->window.dictLimit;
922
- const U32 lowestIndex = ms->window.lowLimit;
923
1216
  const BYTE* const prefixStart = base + dictLimit;
924
1217
  const BYTE* const dictBase = ms->window.dictBase;
925
1218
  const BYTE* const dictEnd = dictBase + dictLimit;
926
- const BYTE* const dictStart = dictBase + lowestIndex;
1219
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
1220
+ const U32 windowLog = ms->cParams.windowLog;
927
1221
 
928
1222
  typedef size_t (*searchMax_f)(
929
1223
  ZSTD_matchState_t* ms,
930
1224
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
931
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
1225
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
932
1226
 
933
1227
  U32 offset_1 = rep[0], offset_2 = rep[1];
934
1228
 
1229
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
1230
+
935
1231
  /* init */
936
- ms->nextToUpdate3 = ms->nextToUpdate;
937
1232
  ip += (ip == prefixStart);
938
1233
 
939
1234
  /* Match Loop */
1235
+ #if defined(__GNUC__) && defined(__x86_64__)
1236
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
1237
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
1238
+ */
1239
+ __asm__(".p2align 5");
1240
+ #endif
940
1241
  while (ip < ilimit) {
941
1242
  size_t matchLength=0;
942
1243
  size_t offset=0;
943
1244
  const BYTE* start=ip+1;
944
- U32 current = (U32)(ip-base);
1245
+ U32 curr = (U32)(ip-base);
945
1246
 
946
1247
  /* check repCode */
947
- { const U32 repIndex = (U32)(current+1 - offset_1);
1248
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1249
+ const U32 repIndex = (U32)(curr+1 - offset_1);
948
1250
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
949
1251
  const BYTE* const repMatch = repBase + repIndex;
950
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1252
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
951
1253
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
952
1254
  /* repcode detected we should take it */
953
1255
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -971,13 +1273,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
971
1273
  if (depth>=1)
972
1274
  while (ip<ilimit) {
973
1275
  ip ++;
974
- current++;
1276
+ curr++;
975
1277
  /* check repCode */
976
1278
  if (offset) {
977
- const U32 repIndex = (U32)(current - offset_1);
1279
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1280
+ const U32 repIndex = (U32)(curr - offset_1);
978
1281
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
979
1282
  const BYTE* const repMatch = repBase + repIndex;
980
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1283
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
981
1284
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
982
1285
  /* repcode detected */
983
1286
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1001,13 +1304,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1001
1304
  /* let's find an even better one */
1002
1305
  if ((depth==2) && (ip<ilimit)) {
1003
1306
  ip ++;
1004
- current++;
1307
+ curr++;
1005
1308
  /* check repCode */
1006
1309
  if (offset) {
1007
- const U32 repIndex = (U32)(current - offset_1);
1310
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1311
+ const U32 repIndex = (U32)(curr - offset_1);
1008
1312
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1009
1313
  const BYTE* const repMatch = repBase + repIndex;
1010
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1314
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1011
1315
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1012
1316
  /* repcode detected */
1013
1317
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1042,22 +1346,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1042
1346
  /* store sequence */
1043
1347
  _storeSequence:
1044
1348
  { size_t const litLength = start - anchor;
1045
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1349
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1046
1350
  anchor = ip = start + matchLength;
1047
1351
  }
1048
1352
 
1049
1353
  /* check immediate repcode */
1050
1354
  while (ip <= ilimit) {
1051
- const U32 repIndex = (U32)((ip-base) - offset_2);
1355
+ const U32 repCurrent = (U32)(ip-base);
1356
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1357
+ const U32 repIndex = repCurrent - offset_2;
1052
1358
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1053
1359
  const BYTE* const repMatch = repBase + repIndex;
1054
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1360
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1055
1361
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1056
1362
  /* repcode detected we should take it */
1057
1363
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1058
1364
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1059
1365
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1060
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1366
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1061
1367
  ip += matchLength;
1062
1368
  anchor = ip;
1063
1369
  continue; /* faster when present ... (?) */
@@ -1070,7 +1376,7 @@ _storeSequence:
1070
1376
  rep[1] = offset_2;
1071
1377
 
1072
1378
  /* Return the last literals size */
1073
- return iend - anchor;
1379
+ return (size_t)(iend - anchor);
1074
1380
  }
1075
1381
 
1076
1382
 
@@ -1078,7 +1384,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
1078
1384
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1079
1385
  void const* src, size_t srcSize)
1080
1386
  {
1081
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
1387
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1082
1388
  }
1083
1389
 
1084
1390
  size_t ZSTD_compressBlock_lazy_extDict(
@@ -1086,7 +1392,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
1086
1392
  void const* src, size_t srcSize)
1087
1393
 
1088
1394
  {
1089
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
1395
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1090
1396
  }
1091
1397
 
1092
1398
  size_t ZSTD_compressBlock_lazy2_extDict(
@@ -1094,7 +1400,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
1094
1400
  void const* src, size_t srcSize)
1095
1401
 
1096
1402
  {
1097
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
1403
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1098
1404
  }
1099
1405
 
1100
1406
  size_t ZSTD_compressBlock_btlazy2_extDict(
@@ -1102,5 +1408,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
1102
1408
  void const* src, size_t srcSize)
1103
1409
 
1104
1410
  {
1105
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
1411
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1106
1412
  }