zstd-ruby 1.4.0.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
58
58
 
59
59
  /** ZSTD_insertDUBT1() :
60
60
  * sort one already inserted but unsorted position
61
- * assumption : current >= btlow == (current - btmask)
61
+ * assumption : curr >= btlow == (curr - btmask)
62
62
  * doesn't fail */
63
63
  static void
64
64
  ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65
- U32 current, const BYTE* inputEnd,
65
+ U32 curr, const BYTE* inputEnd,
66
66
  U32 nbCompares, U32 btLow,
67
67
  const ZSTD_dictMode_e dictMode)
68
68
  {
@@ -74,38 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
74
74
  const BYTE* const base = ms->window.base;
75
75
  const BYTE* const dictBase = ms->window.dictBase;
76
76
  const U32 dictLimit = ms->window.dictLimit;
77
- const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
78
- const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
77
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
78
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
79
79
  const BYTE* const dictEnd = dictBase + dictLimit;
80
80
  const BYTE* const prefixStart = base + dictLimit;
81
81
  const BYTE* match;
82
- U32* smallerPtr = bt + 2*(current&btMask);
82
+ U32* smallerPtr = bt + 2*(curr&btMask);
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
- U32 const windowLow = ms->window.lowLimit;
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
89
+
87
90
 
88
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
89
- current, dictLimit, windowLow);
90
- assert(current >= btLow);
92
+ curr, dictLimit, windowLow);
93
+ assert(curr >= btLow);
91
94
  assert(ip < iend); /* condition for ZSTD_count */
92
95
 
93
96
  while (nbCompares-- && (matchIndex > windowLow)) {
94
97
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
95
98
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
96
- assert(matchIndex < current);
99
+ assert(matchIndex < curr);
97
100
  /* note : all candidates are now supposed sorted,
98
101
  * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
99
102
  * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
100
103
 
101
104
  if ( (dictMode != ZSTD_extDict)
102
105
  || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
103
- || (current < dictLimit) /* both in extDict */) {
106
+ || (curr < dictLimit) /* both in extDict */) {
104
107
  const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
105
108
  || (matchIndex+matchLength >= dictLimit)) ?
106
109
  base : dictBase;
107
110
  assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
108
- || (current < dictLimit) );
111
+ || (curr < dictLimit) );
109
112
  match = mBase + matchIndex;
110
113
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
111
114
  } else {
@@ -116,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
116
119
  }
117
120
 
118
121
  DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
119
- current, matchIndex, (U32)matchLength);
122
+ curr, matchIndex, (U32)matchLength);
120
123
 
121
124
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
122
125
  break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -165,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch (
165
168
 
166
169
  const BYTE* const base = ms->window.base;
167
170
  const BYTE* const prefixStart = base + ms->window.dictLimit;
168
- U32 const current = (U32)(ip-base);
171
+ U32 const curr = (U32)(ip-base);
169
172
  const BYTE* const dictBase = dms->window.base;
170
173
  const BYTE* const dictEnd = dms->window.nextSrc;
171
174
  U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
@@ -192,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch (
192
195
 
193
196
  if (matchLength > bestLength) {
194
197
  U32 matchIndex = dictMatchIndex + dictIndexDelta;
195
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
198
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
196
199
  DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
197
- current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
198
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
200
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
201
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
199
202
  }
200
203
  if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
201
204
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -215,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch (
215
218
  }
216
219
 
217
220
  if (bestLength >= MINMATCH) {
218
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
221
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
219
222
  DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
220
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
223
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
221
224
  }
222
225
  return bestLength;
223
226
 
@@ -238,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
238
241
  U32 matchIndex = hashTable[h];
239
242
 
240
243
  const BYTE* const base = ms->window.base;
241
- U32 const current = (U32)(ip-base);
242
- U32 const windowLow = ms->window.lowLimit;
244
+ U32 const curr = (U32)(ip-base);
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
243
246
 
244
247
  U32* const bt = ms->chainTable;
245
248
  U32 const btLog = cParams->chainLog - 1;
246
249
  U32 const btMask = (1 << btLog) - 1;
247
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
250
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
248
251
  U32 const unsortLimit = MAX(btLow, windowLow);
249
252
 
250
253
  U32* nextCandidate = bt + 2*(matchIndex&btMask);
@@ -253,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
253
256
  U32 nbCandidates = nbCompares;
254
257
  U32 previousCandidate = 0;
255
258
 
256
- DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
259
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
257
260
  assert(ip <= iend-8); /* required for h calculation */
261
+ assert(dictMode != ZSTD_dedicatedDictSearch);
258
262
 
259
263
  /* reach end of unsorted candidates list */
260
264
  while ( (matchIndex > unsortLimit)
@@ -296,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
296
300
  const U32 dictLimit = ms->window.dictLimit;
297
301
  const BYTE* const dictEnd = dictBase + dictLimit;
298
302
  const BYTE* const prefixStart = base + dictLimit;
299
- U32* smallerPtr = bt + 2*(current&btMask);
300
- U32* largerPtr = bt + 2*(current&btMask) + 1;
301
- U32 matchEndIdx = current + 8 + 1;
303
+ U32* smallerPtr = bt + 2*(curr&btMask);
304
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
305
+ U32 matchEndIdx = curr + 8 + 1;
302
306
  U32 dummy32; /* to be nullified at the end */
303
307
  size_t bestLength = 0;
304
308
 
305
309
  matchIndex = hashTable[h];
306
- hashTable[h] = current; /* Update Hash Table */
310
+ hashTable[h] = curr; /* Update Hash Table */
307
311
 
308
312
  while (nbCompares-- && (matchIndex > windowLow)) {
309
313
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
@@ -323,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
323
327
  if (matchLength > bestLength) {
324
328
  if (matchLength > matchEndIdx - matchIndex)
325
329
  matchEndIdx = matchIndex + (U32)matchLength;
326
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
327
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
330
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
328
332
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
329
333
  if (dictMode == ZSTD_dictMatchState) {
330
334
  nbCompares = 0; /* in addition to avoiding checking any
@@ -360,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
360
364
  mls, dictMode);
361
365
  }
362
366
 
363
- assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
367
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
364
368
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
365
369
  if (bestLength >= MINMATCH) {
366
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
367
371
  DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
368
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
372
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
369
373
  }
370
374
  return bestLength;
371
375
  }
@@ -443,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
443
447
 
444
448
  /* Update chains up to ip (excluded)
445
449
  Assumption : always within prefix (i.e. not within extDict) */
446
- static U32 ZSTD_insertAndFindFirstIndex_internal(
450
+ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
447
451
  ZSTD_matchState_t* ms,
448
452
  const ZSTD_compressionParameters* const cParams,
449
453
  const BYTE* ip, U32 const mls)
@@ -472,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
472
476
  return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
473
477
  }
474
478
 
479
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
480
+ {
481
+ const BYTE* const base = ms->window.base;
482
+ U32 const target = (U32)(ip - base);
483
+ U32* const hashTable = ms->hashTable;
484
+ U32* const chainTable = ms->chainTable;
485
+ U32 const chainSize = 1 << ms->cParams.chainLog;
486
+ U32 idx = ms->nextToUpdate;
487
+ U32 const minChain = chainSize < target ? target - chainSize : idx;
488
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
489
+ U32 const cacheSize = bucketSize - 1;
490
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
491
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
492
+
493
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
494
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
495
+ * single entry. We will use the rest of the space to construct a temporary
496
+ * chaintable.
497
+ */
498
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
499
+ U32* const tmpHashTable = hashTable;
500
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
501
+ U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
502
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
503
+
504
+ U32 hashIdx;
505
+
506
+ assert(ms->cParams.chainLog <= 24);
507
+ assert(ms->cParams.hashLog >= ms->cParams.chainLog);
508
+ assert(idx != 0);
509
+ assert(tmpMinChain <= minChain);
510
+
511
+ /* fill conventional hash table and conventional chain table */
512
+ for ( ; idx < target; idx++) {
513
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
514
+ if (idx >= tmpMinChain) {
515
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
516
+ }
517
+ tmpHashTable[h] = idx;
518
+ }
519
+
520
+ /* sort chains into ddss chain table */
521
+ {
522
+ U32 chainPos = 0;
523
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
524
+ U32 count;
525
+ U32 countBeyondMinChain = 0;
526
+ U32 i = tmpHashTable[hashIdx];
527
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
528
+ /* skip through the chain to the first position that won't be
529
+ * in the hash cache bucket */
530
+ if (i < minChain) {
531
+ countBeyondMinChain++;
532
+ }
533
+ i = tmpChainTable[i - tmpMinChain];
534
+ }
535
+ if (count == cacheSize) {
536
+ for (count = 0; count < chainLimit;) {
537
+ if (i < minChain) {
538
+ if (!i || countBeyondMinChain++ > cacheSize) {
539
+ /* only allow pulling `cacheSize` number of entries
540
+ * into the cache or chainTable beyond `minChain`,
541
+ * to replace the entries pulled out of the
542
+ * chainTable into the cache. This lets us reach
543
+ * back further without increasing the total number
544
+ * of entries in the chainTable, guaranteeing the
545
+ * DDSS chain table will fit into the space
546
+ * allocated for the regular one. */
547
+ break;
548
+ }
549
+ }
550
+ chainTable[chainPos++] = i;
551
+ count++;
552
+ if (i < tmpMinChain) {
553
+ break;
554
+ }
555
+ i = tmpChainTable[i - tmpMinChain];
556
+ }
557
+ } else {
558
+ count = 0;
559
+ }
560
+ if (count) {
561
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
562
+ } else {
563
+ tmpHashTable[hashIdx] = 0;
564
+ }
565
+ }
566
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
567
+ }
568
+
569
+ /* move chain pointers into the last entry of each hash bucket */
570
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
571
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
572
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
573
+ U32 i;
574
+ for (i = 0; i < cacheSize; i++) {
575
+ hashTable[bucketIdx + i] = 0;
576
+ }
577
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
578
+ }
579
+
580
+ /* fill the buckets of the hash table */
581
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
582
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
583
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
584
+ U32 i;
585
+ /* Shift hash cache down 1. */
586
+ for (i = cacheSize - 1; i; i--)
587
+ hashTable[h + i] = hashTable[h + i - 1];
588
+ hashTable[h] = idx;
589
+ }
590
+
591
+ ms->nextToUpdate = target;
592
+ }
593
+
475
594
 
476
595
  /* inlining is important to hardwire a hot branch (template emulation) */
477
596
  FORCE_INLINE_TEMPLATE
@@ -490,16 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic (
490
609
  const U32 dictLimit = ms->window.dictLimit;
491
610
  const BYTE* const prefixStart = base + dictLimit;
492
611
  const BYTE* const dictEnd = dictBase + dictLimit;
493
- const U32 lowLimit = ms->window.lowLimit;
494
- const U32 current = (U32)(ip-base);
495
- const U32 minChain = current > chainSize ? current - chainSize : 0;
612
+ const U32 curr = (U32)(ip-base);
613
+ const U32 maxDistance = 1U << cParams->windowLog;
614
+ const U32 lowestValid = ms->window.lowLimit;
615
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
616
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
617
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
618
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
496
619
  U32 nbAttempts = 1U << cParams->searchLog;
497
620
  size_t ml=4-1;
498
621
 
622
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
623
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
624
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
625
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
626
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
627
+
628
+ U32 matchIndex;
629
+
630
+ if (dictMode == ZSTD_dedicatedDictSearch) {
631
+ const U32* entry = &dms->hashTable[ddsIdx];
632
+ PREFETCH_L1(entry);
633
+ }
634
+
499
635
  /* HC4 match finder */
500
- U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
636
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
501
637
 
502
- for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
638
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
503
639
  size_t currentMl=0;
504
640
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
505
641
  const BYTE* const match = base + matchIndex;
@@ -516,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic (
516
652
  /* save best solution */
517
653
  if (currentMl > ml) {
518
654
  ml = currentMl;
519
- *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
655
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
520
656
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
521
657
  }
522
658
 
@@ -524,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic (
524
660
  matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
525
661
  }
526
662
 
527
- if (dictMode == ZSTD_dictMatchState) {
528
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
663
+ if (dictMode == ZSTD_dedicatedDictSearch) {
664
+ const U32 ddsLowestIndex = dms->window.dictLimit;
665
+ const BYTE* const ddsBase = dms->window.base;
666
+ const BYTE* const ddsEnd = dms->window.nextSrc;
667
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
668
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
669
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
670
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
671
+ U32 ddsAttempt;
672
+
673
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
674
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
675
+ }
676
+
677
+ {
678
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
679
+ U32 const chainIndex = chainPackedPointer >> 8;
680
+
681
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
682
+ }
683
+
684
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
685
+ size_t currentMl=0;
686
+ const BYTE* match;
687
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
688
+ match = ddsBase + matchIndex;
689
+
690
+ if (!matchIndex) {
691
+ return ml;
692
+ }
693
+
694
+ /* guaranteed by table construction */
695
+ (void)ddsLowestIndex;
696
+ assert(matchIndex >= ddsLowestIndex);
697
+ assert(match+4 <= ddsEnd);
698
+ if (MEM_read32(match) == MEM_read32(ip)) {
699
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
700
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
701
+ }
702
+
703
+ /* save best solution */
704
+ if (currentMl > ml) {
705
+ ml = currentMl;
706
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
707
+ if (ip+currentMl == iLimit) {
708
+ /* best possible, avoids read overflow on next attempt */
709
+ return ml;
710
+ }
711
+ }
712
+ }
713
+
714
+ {
715
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
716
+ U32 chainIndex = chainPackedPointer >> 8;
717
+ U32 const chainLength = chainPackedPointer & 0xFF;
718
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
719
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
720
+ U32 chainAttempt;
721
+
722
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
723
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
724
+ }
725
+
726
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
727
+ size_t currentMl=0;
728
+ const BYTE* match;
729
+ matchIndex = dms->chainTable[chainIndex];
730
+ match = ddsBase + matchIndex;
731
+
732
+ /* guaranteed by table construction */
733
+ assert(matchIndex >= ddsLowestIndex);
734
+ assert(match+4 <= ddsEnd);
735
+ if (MEM_read32(match) == MEM_read32(ip)) {
736
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
737
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
738
+ }
739
+
740
+ /* save best solution */
741
+ if (currentMl > ml) {
742
+ ml = currentMl;
743
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
744
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
745
+ }
746
+ }
747
+ }
748
+ } else if (dictMode == ZSTD_dictMatchState) {
529
749
  const U32* const dmsChainTable = dms->chainTable;
530
750
  const U32 dmsChainSize = (1 << dms->cParams.chainLog);
531
751
  const U32 dmsChainMask = dmsChainSize - 1;
@@ -538,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic (
538
758
 
539
759
  matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
540
760
 
541
- for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
761
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
542
762
  size_t currentMl=0;
543
763
  const BYTE* const match = dmsBase + matchIndex;
544
764
  assert(match+4 <= dmsEnd);
@@ -548,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic (
548
768
  /* save best solution */
549
769
  if (currentMl > ml) {
550
770
  ml = currentMl;
551
- *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
771
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
552
772
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
553
773
  }
554
774
 
555
775
  if (matchIndex <= dmsMinChain) break;
776
+
556
777
  matchIndex = dmsChainTable[matchIndex & dmsChainMask];
557
778
  }
558
779
  }
@@ -593,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
593
814
  }
594
815
 
595
816
 
817
+ static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
818
+ ZSTD_matchState_t* ms,
819
+ const BYTE* ip, const BYTE* const iLimit,
820
+ size_t* offsetPtr)
821
+ {
822
+ switch(ms->cParams.minMatch)
823
+ {
824
+ default : /* includes case 3 */
825
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
826
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
827
+ case 7 :
828
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
829
+ }
830
+ }
831
+
832
+
596
833
  FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
597
834
  ZSTD_matchState_t* ms,
598
835
  const BYTE* ip, const BYTE* const iLimit,
@@ -612,12 +849,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
612
849
  /* *******************************
613
850
  * Common parser - lazy strategy
614
851
  *********************************/
615
- FORCE_INLINE_TEMPLATE
616
- size_t ZSTD_compressBlock_lazy_generic(
852
+ typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
853
+
854
+ FORCE_INLINE_TEMPLATE size_t
855
+ ZSTD_compressBlock_lazy_generic(
617
856
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
618
857
  U32 rep[ZSTD_REP_NUM],
619
858
  const void* src, size_t srcSize,
620
- const U32 searchMethod, const U32 depth,
859
+ const searchMethod_e searchMethod, const U32 depth,
621
860
  ZSTD_dictMode_e const dictMode)
622
861
  {
623
862
  const BYTE* const istart = (const BYTE*)src;
@@ -632,34 +871,62 @@ size_t ZSTD_compressBlock_lazy_generic(
632
871
  typedef size_t (*searchMax_f)(
633
872
  ZSTD_matchState_t* ms,
634
873
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
635
- searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
636
- (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
637
- (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
874
+
875
+ /**
876
+ * This table is indexed first by the four ZSTD_dictMode_e values, and then
877
+ * by the two searchMethod_e values. NULLs are placed for configurations
878
+ * that should never occur (extDict modes go to the other implementation
879
+ * below and there is no DDSS for binary tree search yet).
880
+ */
881
+ const searchMax_f searchFuncs[4][2] = {
882
+ {
883
+ ZSTD_HcFindBestMatch_selectMLS,
884
+ ZSTD_BtFindBestMatch_selectMLS
885
+ },
886
+ {
887
+ NULL,
888
+ NULL
889
+ },
890
+ {
891
+ ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
892
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS
893
+ },
894
+ {
895
+ ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
896
+ NULL
897
+ }
898
+ };
899
+
900
+ searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
638
901
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
639
902
 
903
+ const int isDMS = dictMode == ZSTD_dictMatchState;
904
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
905
+ const int isDxS = isDMS || isDDS;
640
906
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
641
- const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
642
- dms->window.dictLimit : 0;
643
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
644
- dms->window.base : NULL;
645
- const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
646
- dictBase + dictLowestIndex : NULL;
647
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
648
- dms->window.nextSrc : NULL;
649
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
907
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
908
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
909
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
910
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
911
+ const U32 dictIndexDelta = isDxS ?
650
912
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
651
913
  0;
652
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
914
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
915
+
916
+ assert(searchMax != NULL);
917
+
918
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
653
919
 
654
920
  /* init */
655
921
  ip += (dictAndPrefixLength == 0);
656
- ms->nextToUpdate3 = ms->nextToUpdate;
657
922
  if (dictMode == ZSTD_noDict) {
658
- U32 const maxRep = (U32)(ip - prefixLowest);
923
+ U32 const curr = (U32)(ip - base);
924
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
925
+ U32 const maxRep = curr - windowLow;
659
926
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
660
927
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
661
928
  }
662
- if (dictMode == ZSTD_dictMatchState) {
929
+ if (isDxS) {
663
930
  /* dictMatchState repCode checks don't currently handle repCode == 0
664
931
  * disabling. */
665
932
  assert(offset_1 <= dictAndPrefixLength);
@@ -667,15 +934,21 @@ size_t ZSTD_compressBlock_lazy_generic(
667
934
  }
668
935
 
669
936
  /* Match Loop */
937
+ #if defined(__GNUC__) && defined(__x86_64__)
938
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
939
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
940
+ */
941
+ __asm__(".p2align 5");
942
+ #endif
670
943
  while (ip < ilimit) {
671
944
  size_t matchLength=0;
672
945
  size_t offset=0;
673
946
  const BYTE* start=ip+1;
674
947
 
675
948
  /* check repCode */
676
- if (dictMode == ZSTD_dictMatchState) {
949
+ if (isDxS) {
677
950
  const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
678
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
951
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
679
952
  && repIndex < prefixLowestIndex) ?
680
953
  dictBase + (repIndex - dictIndexDelta) :
681
954
  base + repIndex;
@@ -716,7 +989,7 @@ size_t ZSTD_compressBlock_lazy_generic(
716
989
  if ((mlRep >= 4) && (gain2 > gain1))
717
990
  matchLength = mlRep, offset = 0, start = ip;
718
991
  }
719
- if (dictMode == ZSTD_dictMatchState) {
992
+ if (isDxS) {
720
993
  const U32 repIndex = (U32)(ip - base) - offset_1;
721
994
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
722
995
  dictBase + (repIndex - dictIndexDelta) :
@@ -751,7 +1024,7 @@ size_t ZSTD_compressBlock_lazy_generic(
751
1024
  if ((mlRep >= 4) && (gain2 > gain1))
752
1025
  matchLength = mlRep, offset = 0, start = ip;
753
1026
  }
754
- if (dictMode == ZSTD_dictMatchState) {
1027
+ if (isDxS) {
755
1028
  const U32 repIndex = (U32)(ip - base) - offset_1;
756
1029
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
757
1030
  dictBase + (repIndex - dictIndexDelta) :
@@ -789,7 +1062,7 @@ size_t ZSTD_compressBlock_lazy_generic(
789
1062
  && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
790
1063
  { start--; matchLength++; }
791
1064
  }
792
- if (dictMode == ZSTD_dictMatchState) {
1065
+ if (isDxS) {
793
1066
  U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
794
1067
  const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
795
1068
  const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
@@ -800,17 +1073,16 @@ size_t ZSTD_compressBlock_lazy_generic(
800
1073
  /* store sequence */
801
1074
  _storeSequence:
802
1075
  { size_t const litLength = start - anchor;
803
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1076
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
804
1077
  anchor = ip = start + matchLength;
805
1078
  }
806
1079
 
807
1080
  /* check immediate repcode */
808
- if (dictMode == ZSTD_dictMatchState) {
1081
+ if (isDxS) {
809
1082
  while (ip <= ilimit) {
810
1083
  U32 const current2 = (U32)(ip-base);
811
1084
  U32 const repIndex = current2 - offset_2;
812
- const BYTE* repMatch = dictMode == ZSTD_dictMatchState
813
- && repIndex < prefixLowestIndex ?
1085
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
814
1086
  dictBase - dictIndexDelta + repIndex :
815
1087
  base + repIndex;
816
1088
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
@@ -818,7 +1090,7 @@ _storeSequence:
818
1090
  const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
819
1091
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
820
1092
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
821
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1093
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
822
1094
  ip += matchLength;
823
1095
  anchor = ip;
824
1096
  continue;
@@ -833,7 +1105,7 @@ _storeSequence:
833
1105
  /* store sequence */
834
1106
  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
835
1107
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
836
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1108
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
837
1109
  ip += matchLength;
838
1110
  anchor = ip;
839
1111
  continue; /* faster when present ... (?) */
@@ -844,7 +1116,7 @@ _storeSequence:
844
1116
  rep[1] = offset_2 ? offset_2 : savedOffset;
845
1117
 
846
1118
  /* Return the last literals size */
847
- return iend - anchor;
1119
+ return (size_t)(iend - anchor);
848
1120
  }
849
1121
 
850
1122
 
@@ -852,56 +1124,78 @@ size_t ZSTD_compressBlock_btlazy2(
852
1124
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
853
1125
  void const* src, size_t srcSize)
854
1126
  {
855
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
1127
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
856
1128
  }
857
1129
 
858
1130
  size_t ZSTD_compressBlock_lazy2(
859
1131
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
860
1132
  void const* src, size_t srcSize)
861
1133
  {
862
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
1134
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
863
1135
  }
864
1136
 
865
1137
  size_t ZSTD_compressBlock_lazy(
866
1138
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
867
1139
  void const* src, size_t srcSize)
868
1140
  {
869
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
1141
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
870
1142
  }
871
1143
 
872
1144
  size_t ZSTD_compressBlock_greedy(
873
1145
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
874
1146
  void const* src, size_t srcSize)
875
1147
  {
876
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
1148
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
877
1149
  }
878
1150
 
879
1151
  size_t ZSTD_compressBlock_btlazy2_dictMatchState(
880
1152
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
881
1153
  void const* src, size_t srcSize)
882
1154
  {
883
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
1155
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
884
1156
  }
885
1157
 
886
1158
  size_t ZSTD_compressBlock_lazy2_dictMatchState(
887
1159
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
888
1160
  void const* src, size_t srcSize)
889
1161
  {
890
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
1162
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
891
1163
  }
892
1164
 
893
1165
  size_t ZSTD_compressBlock_lazy_dictMatchState(
894
1166
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
895
1167
  void const* src, size_t srcSize)
896
1168
  {
897
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
1169
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
898
1170
  }
899
1171
 
900
1172
  size_t ZSTD_compressBlock_greedy_dictMatchState(
901
1173
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
902
1174
  void const* src, size_t srcSize)
903
1175
  {
904
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
1176
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1177
+ }
1178
+
1179
+
1180
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1181
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1182
+ void const* src, size_t srcSize)
1183
+ {
1184
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1185
+ }
1186
+
1187
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1188
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1189
+ void const* src, size_t srcSize)
1190
+ {
1191
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1192
+ }
1193
+
1194
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1195
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1196
+ void const* src, size_t srcSize)
1197
+ {
1198
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
905
1199
  }
906
1200
 
907
1201
 
@@ -910,7 +1204,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
910
1204
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
911
1205
  U32 rep[ZSTD_REP_NUM],
912
1206
  const void* src, size_t srcSize,
913
- const U32 searchMethod, const U32 depth)
1207
+ const searchMethod_e searchMethod, const U32 depth)
914
1208
  {
915
1209
  const BYTE* const istart = (const BYTE*)src;
916
1210
  const BYTE* ip = istart;
@@ -919,35 +1213,43 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
919
1213
  const BYTE* const ilimit = iend - 8;
920
1214
  const BYTE* const base = ms->window.base;
921
1215
  const U32 dictLimit = ms->window.dictLimit;
922
- const U32 lowestIndex = ms->window.lowLimit;
923
1216
  const BYTE* const prefixStart = base + dictLimit;
924
1217
  const BYTE* const dictBase = ms->window.dictBase;
925
1218
  const BYTE* const dictEnd = dictBase + dictLimit;
926
- const BYTE* const dictStart = dictBase + lowestIndex;
1219
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
1220
+ const U32 windowLog = ms->cParams.windowLog;
927
1221
 
928
1222
  typedef size_t (*searchMax_f)(
929
1223
  ZSTD_matchState_t* ms,
930
1224
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
931
- searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
1225
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
932
1226
 
933
1227
  U32 offset_1 = rep[0], offset_2 = rep[1];
934
1228
 
1229
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
1230
+
935
1231
  /* init */
936
- ms->nextToUpdate3 = ms->nextToUpdate;
937
1232
  ip += (ip == prefixStart);
938
1233
 
939
1234
  /* Match Loop */
1235
+ #if defined(__GNUC__) && defined(__x86_64__)
1236
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
1237
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
1238
+ */
1239
+ __asm__(".p2align 5");
1240
+ #endif
940
1241
  while (ip < ilimit) {
941
1242
  size_t matchLength=0;
942
1243
  size_t offset=0;
943
1244
  const BYTE* start=ip+1;
944
- U32 current = (U32)(ip-base);
1245
+ U32 curr = (U32)(ip-base);
945
1246
 
946
1247
  /* check repCode */
947
- { const U32 repIndex = (U32)(current+1 - offset_1);
1248
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1249
+ const U32 repIndex = (U32)(curr+1 - offset_1);
948
1250
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
949
1251
  const BYTE* const repMatch = repBase + repIndex;
950
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1252
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
951
1253
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
952
1254
  /* repcode detected we should take it */
953
1255
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -971,13 +1273,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
971
1273
  if (depth>=1)
972
1274
  while (ip<ilimit) {
973
1275
  ip ++;
974
- current++;
1276
+ curr++;
975
1277
  /* check repCode */
976
1278
  if (offset) {
977
- const U32 repIndex = (U32)(current - offset_1);
1279
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1280
+ const U32 repIndex = (U32)(curr - offset_1);
978
1281
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
979
1282
  const BYTE* const repMatch = repBase + repIndex;
980
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1283
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
981
1284
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
982
1285
  /* repcode detected */
983
1286
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1001,13 +1304,14 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1001
1304
  /* let's find an even better one */
1002
1305
  if ((depth==2) && (ip<ilimit)) {
1003
1306
  ip ++;
1004
- current++;
1307
+ curr++;
1005
1308
  /* check repCode */
1006
1309
  if (offset) {
1007
- const U32 repIndex = (U32)(current - offset_1);
1310
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1311
+ const U32 repIndex = (U32)(curr - offset_1);
1008
1312
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1009
1313
  const BYTE* const repMatch = repBase + repIndex;
1010
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1314
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1011
1315
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1012
1316
  /* repcode detected */
1013
1317
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1042,22 +1346,24 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1042
1346
  /* store sequence */
1043
1347
  _storeSequence:
1044
1348
  { size_t const litLength = start - anchor;
1045
- ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1349
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1046
1350
  anchor = ip = start + matchLength;
1047
1351
  }
1048
1352
 
1049
1353
  /* check immediate repcode */
1050
1354
  while (ip <= ilimit) {
1051
- const U32 repIndex = (U32)((ip-base) - offset_2);
1355
+ const U32 repCurrent = (U32)(ip-base);
1356
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1357
+ const U32 repIndex = repCurrent - offset_2;
1052
1358
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1053
1359
  const BYTE* const repMatch = repBase + repIndex;
1054
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1360
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1055
1361
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1056
1362
  /* repcode detected we should take it */
1057
1363
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1058
1364
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1059
1365
  offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1060
- ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
1366
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1061
1367
  ip += matchLength;
1062
1368
  anchor = ip;
1063
1369
  continue; /* faster when present ... (?) */
@@ -1070,7 +1376,7 @@ _storeSequence:
1070
1376
  rep[1] = offset_2;
1071
1377
 
1072
1378
  /* Return the last literals size */
1073
- return iend - anchor;
1379
+ return (size_t)(iend - anchor);
1074
1380
  }
1075
1381
 
1076
1382
 
@@ -1078,7 +1384,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
1078
1384
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1079
1385
  void const* src, size_t srcSize)
1080
1386
  {
1081
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
1387
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1082
1388
  }
1083
1389
 
1084
1390
  size_t ZSTD_compressBlock_lazy_extDict(
@@ -1086,7 +1392,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
1086
1392
  void const* src, size_t srcSize)
1087
1393
 
1088
1394
  {
1089
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
1395
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1090
1396
  }
1091
1397
 
1092
1398
  size_t ZSTD_compressBlock_lazy2_extDict(
@@ -1094,7 +1400,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
1094
1400
  void const* src, size_t srcSize)
1095
1401
 
1096
1402
  {
1097
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
1403
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1098
1404
  }
1099
1405
 
1100
1406
  size_t ZSTD_compressBlock_btlazy2_extDict(
@@ -1102,5 +1408,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
1102
1408
  void const* src, size_t srcSize)
1103
1409
 
1104
1410
  {
1105
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
1411
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1106
1412
  }