zstd-ruby 1.4.5.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +237 -138
  5. data/ext/zstdruby/libzstd/README.md +28 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
  7. data/ext/zstdruby/libzstd/common/compiler.h +118 -4
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +2 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +40 -12
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
  16. data/ext/zstdruby/libzstd/common/huf.h +27 -6
  17. data/ext/zstdruby/libzstd/common/mem.h +67 -94
  18. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  21. data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
  22. data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
  27. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
  30. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  31. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
  56. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
  62. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
  68. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  69. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
  70. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
  90. data/ext/zstdruby/libzstd/zstd.h +414 -54
  91. data/lib/zstd-ruby/version.rb +1 -1
  92. metadata +7 -3
  93. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
31
  * is empty.
32
32
  */
33
33
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
34
+ U32 const curr = (U32)(ip - base);
35
35
  U32 i;
36
36
  for (i = 0; i < fastHashFillStep; ++i) {
37
37
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
38
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
39
  if (i == 0)
40
- hashSmall[smHash] = current + i;
40
+ hashSmall[smHash] = curr + i;
41
41
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
42
+ hashLarge[lgHash] = curr + i;
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
@@ -108,9 +108,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
108
108
  /* init */
109
109
  ip += (dictAndPrefixLength == 0);
110
110
  if (dictMode == ZSTD_noDict) {
111
- U32 const current = (U32)(ip - base);
112
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
- U32 const maxRep = current - windowLow;
111
+ U32 const curr = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
113
+ U32 const maxRep = curr - windowLow;
114
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
116
  }
@@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
129
129
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
130
130
  size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
131
131
  size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
132
- U32 const current = (U32)(ip-base);
132
+ U32 const curr = (U32)(ip-base);
133
133
  U32 const matchIndexL = hashLong[h2];
134
134
  U32 matchIndexS = hashSmall[h];
135
135
  const BYTE* matchLong = base + matchIndexL;
136
136
  const BYTE* match = base + matchIndexS;
137
- const U32 repIndex = current + 1 - offset_1;
137
+ const U32 repIndex = curr + 1 - offset_1;
138
138
  const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
139
139
  && repIndex < prefixLowestIndex) ?
140
140
  dictBase + (repIndex - dictIndexDelta) :
141
141
  base + repIndex;
142
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
142
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
143
143
 
144
144
  /* check dictMatchState repcode */
145
145
  if (dictMode == ZSTD_dictMatchState
@@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
177
177
 
178
178
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
179
179
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
180
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
180
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
181
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
182
182
  goto _match_found;
183
183
  } }
@@ -209,7 +209,7 @@ _search_next_long:
209
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
210
210
  U32 const matchIndexL3 = hashLong[hl3];
211
211
  const BYTE* matchL3 = base + matchIndexL3;
212
- hashLong[hl3] = current + 1;
212
+ hashLong[hl3] = curr + 1;
213
213
 
214
214
  /* check prefix long +1 match */
215
215
  if (matchIndexL3 > prefixLowestIndex) {
@@ -228,7 +228,7 @@ _search_next_long:
228
228
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
229
229
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
230
230
  ip++;
231
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
231
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
232
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
233
233
  goto _match_found;
234
234
  } } }
@@ -236,7 +236,7 @@ _search_next_long:
236
236
  /* if no long +1 match, explore the short match we found */
237
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
238
238
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
239
- offset = (U32)(current - matchIndexS);
239
+ offset = (U32)(curr - matchIndexS);
240
240
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
241
241
  } else {
242
242
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -260,7 +260,7 @@ _match_stored:
260
260
  if (ip <= ilimit) {
261
261
  /* Complementary insertion */
262
262
  /* done after iLimit test, as candidates could be > iend-8 */
263
- { U32 const indexToInsert = current+2;
263
+ { U32 const indexToInsert = curr+2;
264
264
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
265
265
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
266
266
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -401,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
401
401
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
402
402
  const BYTE* matchLong = matchLongBase + matchLongIndex;
403
403
 
404
- const U32 current = (U32)(ip-base);
405
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
404
+ const U32 curr = (U32)(ip-base);
405
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
406
406
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
407
407
  const BYTE* const repMatch = repBase + repIndex;
408
408
  size_t mLength;
409
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
409
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
410
410
 
411
411
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
412
412
  & (repIndex > dictStartIndex))
@@ -421,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
421
421
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
422
422
  U32 offset;
423
423
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
424
- offset = current - matchLongIndex;
424
+ offset = curr - matchLongIndex;
425
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
426
426
  offset_2 = offset_1;
427
427
  offset_1 = offset;
@@ -433,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
433
433
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
434
434
  const BYTE* match3 = match3Base + matchIndex3;
435
435
  U32 offset;
436
- hashLong[h3] = current + 1;
436
+ hashLong[h3] = curr + 1;
437
437
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
438
438
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
439
439
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
440
440
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
441
441
  ip++;
442
- offset = current+1 - matchIndex3;
442
+ offset = curr+1 - matchIndex3;
443
443
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
444
444
  } else {
445
445
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
446
446
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
447
447
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
448
- offset = current - matchIndex;
448
+ offset = curr - matchIndex;
449
449
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
450
450
  }
451
451
  offset_2 = offset_1;
@@ -464,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
464
464
  if (ip <= ilimit) {
465
465
  /* Complementary insertion */
466
466
  /* done after iLimit test, as candidates could be > iend-8 */
467
- { U32 const indexToInsert = current+2;
467
+ { U32 const indexToInsert = curr+2;
468
468
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
469
469
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
470
470
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
@@ -72,9 +72,9 @@ ZSTD_compressBlock_fast_generic(
72
72
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
73
73
  ip0 += (ip0 == prefixStart);
74
74
  ip1 = ip0 + 1;
75
- { U32 const current = (U32)(ip0 - base);
76
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77
- U32 const maxRep = current - windowLow;
75
+ { U32 const curr = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
77
+ U32 const maxRep = curr - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
@@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
242
242
  assert(endIndex - prefixStartIndex <= maxDistance);
243
243
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
244
244
 
245
- /* ensure there will be no no underflow
245
+ /* ensure there will be no underflow
246
246
  * when translating a dict index into a local index */
247
247
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
248
248
 
@@ -258,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
258
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
259
259
  size_t mLength;
260
260
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
261
- U32 const current = (U32)(ip-base);
261
+ U32 const curr = (U32)(ip-base);
262
262
  U32 const matchIndex = hashTable[h];
263
263
  const BYTE* match = base + matchIndex;
264
- const U32 repIndex = current + 1 - offset_1;
264
+ const U32 repIndex = curr + 1 - offset_1;
265
265
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
266
266
  dictBase + (repIndex - dictIndexDelta) :
267
267
  base + repIndex;
268
- hashTable[h] = current; /* update hash table */
268
+ hashTable[h] = curr; /* update hash table */
269
269
 
270
270
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
271
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -284,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
284
284
  continue;
285
285
  } else {
286
286
  /* found a dict match */
287
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
287
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
288
288
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
289
289
  while (((ip>anchor) & (dictMatch>dictStart))
290
290
  && (ip[-1] == dictMatch[-1])) {
@@ -316,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
316
316
 
317
317
  if (ip <= ilimit) {
318
318
  /* Fill Table */
319
- assert(base+current+2 > istart); /* check base overflow */
320
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
319
+ assert(base+curr+2 > istart); /* check base overflow */
320
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
321
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
322
322
 
323
323
  /* check immediate repcode */
@@ -410,13 +410,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
410
410
  const U32 matchIndex = hashTable[h];
411
411
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
412
412
  const BYTE* match = matchBase + matchIndex;
413
- const U32 current = (U32)(ip-base);
414
- const U32 repIndex = current + 1 - offset_1;
413
+ const U32 curr = (U32)(ip-base);
414
+ const U32 repIndex = curr + 1 - offset_1;
415
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
416
416
  const BYTE* const repMatch = repBase + repIndex;
417
- hashTable[h] = current; /* update hash table */
418
- DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
419
- assert(offset_1 <= current +1); /* check repIndex */
417
+ hashTable[h] = curr; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
419
+ assert(offset_1 <= curr +1); /* check repIndex */
420
420
 
421
421
  if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
422
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -435,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
435
435
  }
436
436
  { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
437
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
- U32 const offset = current - matchIndex;
438
+ U32 const offset = curr - matchIndex;
439
439
  size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
440
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
441
441
  offset_2 = offset_1; offset_1 = offset; /* update offset history */
@@ -446,7 +446,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
446
446
 
447
447
  if (ip <= ilimit) {
448
448
  /* Fill Table */
449
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
449
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
450
450
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
451
451
  /* check immediate repcode */
452
452
  while (ip <= ilimit) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
58
58
 
59
59
  /** ZSTD_insertDUBT1() :
60
60
  * sort one already inserted but unsorted position
61
- * assumption : current >= btlow == (current - btmask)
61
+ * assumption : curr >= btlow == (curr - btmask)
62
62
  * doesn't fail */
63
63
  static void
64
64
  ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65
- U32 current, const BYTE* inputEnd,
65
+ U32 curr, const BYTE* inputEnd,
66
66
  U32 nbCompares, U32 btLow,
67
67
  const ZSTD_dictMode_e dictMode)
68
68
  {
@@ -74,41 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
74
74
  const BYTE* const base = ms->window.base;
75
75
  const BYTE* const dictBase = ms->window.dictBase;
76
76
  const U32 dictLimit = ms->window.dictLimit;
77
- const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
78
- const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
77
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
78
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
79
79
  const BYTE* const dictEnd = dictBase + dictLimit;
80
80
  const BYTE* const prefixStart = base + dictLimit;
81
81
  const BYTE* match;
82
- U32* smallerPtr = bt + 2*(current&btMask);
82
+ U32* smallerPtr = bt + 2*(curr&btMask);
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
86
  U32 const windowValid = ms->window.lowLimit;
87
87
  U32 const maxDistance = 1U << cParams->windowLog;
88
- U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
88
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
89
89
 
90
90
 
91
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92
- current, dictLimit, windowLow);
93
- assert(current >= btLow);
92
+ curr, dictLimit, windowLow);
93
+ assert(curr >= btLow);
94
94
  assert(ip < iend); /* condition for ZSTD_count */
95
95
 
96
96
  while (nbCompares-- && (matchIndex > windowLow)) {
97
97
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
98
98
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
99
- assert(matchIndex < current);
99
+ assert(matchIndex < curr);
100
100
  /* note : all candidates are now supposed sorted,
101
101
  * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
102
102
  * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
103
103
 
104
104
  if ( (dictMode != ZSTD_extDict)
105
105
  || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
106
- || (current < dictLimit) /* both in extDict */) {
106
+ || (curr < dictLimit) /* both in extDict */) {
107
107
  const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
108
108
  || (matchIndex+matchLength >= dictLimit)) ?
109
109
  base : dictBase;
110
110
  assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
111
- || (current < dictLimit) );
111
+ || (curr < dictLimit) );
112
112
  match = mBase + matchIndex;
113
113
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
114
114
  } else {
@@ -119,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
119
119
  }
120
120
 
121
121
  DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122
- current, matchIndex, (U32)matchLength);
122
+ curr, matchIndex, (U32)matchLength);
123
123
 
124
124
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
125
125
  break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -168,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch (
168
168
 
169
169
  const BYTE* const base = ms->window.base;
170
170
  const BYTE* const prefixStart = base + ms->window.dictLimit;
171
- U32 const current = (U32)(ip-base);
171
+ U32 const curr = (U32)(ip-base);
172
172
  const BYTE* const dictBase = dms->window.base;
173
173
  const BYTE* const dictEnd = dms->window.nextSrc;
174
174
  U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
@@ -195,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch (
195
195
 
196
196
  if (matchLength > bestLength) {
197
197
  U32 matchIndex = dictMatchIndex + dictIndexDelta;
198
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
198
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199
199
  DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200
- current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
201
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
200
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
201
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
202
202
  }
203
203
  if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204
204
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -218,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch (
218
218
  }
219
219
 
220
220
  if (bestLength >= MINMATCH) {
221
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
221
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
222
222
  DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
223
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
224
224
  }
225
225
  return bestLength;
226
226
 
@@ -241,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
241
241
  U32 matchIndex = hashTable[h];
242
242
 
243
243
  const BYTE* const base = ms->window.base;
244
- U32 const current = (U32)(ip-base);
245
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
244
+ U32 const curr = (U32)(ip-base);
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
246
246
 
247
247
  U32* const bt = ms->chainTable;
248
248
  U32 const btLog = cParams->chainLog - 1;
249
249
  U32 const btMask = (1 << btLog) - 1;
250
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
250
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
251
251
  U32 const unsortLimit = MAX(btLow, windowLow);
252
252
 
253
253
  U32* nextCandidate = bt + 2*(matchIndex&btMask);
@@ -256,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
256
256
  U32 nbCandidates = nbCompares;
257
257
  U32 previousCandidate = 0;
258
258
 
259
- DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
259
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
260
260
  assert(ip <= iend-8); /* required for h calculation */
261
+ assert(dictMode != ZSTD_dedicatedDictSearch);
261
262
 
262
263
  /* reach end of unsorted candidates list */
263
264
  while ( (matchIndex > unsortLimit)
@@ -299,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
299
300
  const U32 dictLimit = ms->window.dictLimit;
300
301
  const BYTE* const dictEnd = dictBase + dictLimit;
301
302
  const BYTE* const prefixStart = base + dictLimit;
302
- U32* smallerPtr = bt + 2*(current&btMask);
303
- U32* largerPtr = bt + 2*(current&btMask) + 1;
304
- U32 matchEndIdx = current + 8 + 1;
303
+ U32* smallerPtr = bt + 2*(curr&btMask);
304
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
305
+ U32 matchEndIdx = curr + 8 + 1;
305
306
  U32 dummy32; /* to be nullified at the end */
306
307
  size_t bestLength = 0;
307
308
 
308
309
  matchIndex = hashTable[h];
309
- hashTable[h] = current; /* Update Hash Table */
310
+ hashTable[h] = curr; /* Update Hash Table */
310
311
 
311
312
  while (nbCompares-- && (matchIndex > windowLow)) {
312
313
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
@@ -326,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
326
327
  if (matchLength > bestLength) {
327
328
  if (matchLength > matchEndIdx - matchIndex)
328
329
  matchEndIdx = matchIndex + (U32)matchLength;
329
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
330
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
330
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
331
332
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
332
333
  if (dictMode == ZSTD_dictMatchState) {
333
334
  nbCompares = 0; /* in addition to avoiding checking any
@@ -363,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
363
364
  mls, dictMode);
364
365
  }
365
366
 
366
- assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
367
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
367
368
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
368
369
  if (bestLength >= MINMATCH) {
369
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370
371
  DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
371
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
372
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
372
373
  }
373
374
  return bestLength;
374
375
  }
@@ -446,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
446
447
 
447
448
  /* Update chains up to ip (excluded)
448
449
  Assumption : always within prefix (i.e. not within extDict) */
449
- static U32 ZSTD_insertAndFindFirstIndex_internal(
450
+ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
450
451
  ZSTD_matchState_t* ms,
451
452
  const ZSTD_compressionParameters* const cParams,
452
453
  const BYTE* ip, U32 const mls)
@@ -475,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
475
476
  return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
476
477
  }
477
478
 
479
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
480
+ {
481
+ const BYTE* const base = ms->window.base;
482
+ U32 const target = (U32)(ip - base);
483
+ U32* const hashTable = ms->hashTable;
484
+ U32* const chainTable = ms->chainTable;
485
+ U32 const chainSize = 1 << ms->cParams.chainLog;
486
+ U32 idx = ms->nextToUpdate;
487
+ U32 const minChain = chainSize < target ? target - chainSize : idx;
488
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
489
+ U32 const cacheSize = bucketSize - 1;
490
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
491
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
492
+
493
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
494
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
495
+ * single entry. We will use the rest of the space to construct a temporary
496
+ * chaintable.
497
+ */
498
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
499
+ U32* const tmpHashTable = hashTable;
500
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
501
+ U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
502
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
503
+
504
+ U32 hashIdx;
505
+
506
+ assert(ms->cParams.chainLog <= 24);
507
+ assert(ms->cParams.hashLog >= ms->cParams.chainLog);
508
+ assert(idx != 0);
509
+ assert(tmpMinChain <= minChain);
510
+
511
+ /* fill conventional hash table and conventional chain table */
512
+ for ( ; idx < target; idx++) {
513
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
514
+ if (idx >= tmpMinChain) {
515
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
516
+ }
517
+ tmpHashTable[h] = idx;
518
+ }
519
+
520
+ /* sort chains into ddss chain table */
521
+ {
522
+ U32 chainPos = 0;
523
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
524
+ U32 count;
525
+ U32 countBeyondMinChain = 0;
526
+ U32 i = tmpHashTable[hashIdx];
527
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
528
+ /* skip through the chain to the first position that won't be
529
+ * in the hash cache bucket */
530
+ if (i < minChain) {
531
+ countBeyondMinChain++;
532
+ }
533
+ i = tmpChainTable[i - tmpMinChain];
534
+ }
535
+ if (count == cacheSize) {
536
+ for (count = 0; count < chainLimit;) {
537
+ if (i < minChain) {
538
+ if (!i || countBeyondMinChain++ > cacheSize) {
539
+ /* only allow pulling `cacheSize` number of entries
540
+ * into the cache or chainTable beyond `minChain`,
541
+ * to replace the entries pulled out of the
542
+ * chainTable into the cache. This lets us reach
543
+ * back further without increasing the total number
544
+ * of entries in the chainTable, guaranteeing the
545
+ * DDSS chain table will fit into the space
546
+ * allocated for the regular one. */
547
+ break;
548
+ }
549
+ }
550
+ chainTable[chainPos++] = i;
551
+ count++;
552
+ if (i < tmpMinChain) {
553
+ break;
554
+ }
555
+ i = tmpChainTable[i - tmpMinChain];
556
+ }
557
+ } else {
558
+ count = 0;
559
+ }
560
+ if (count) {
561
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
562
+ } else {
563
+ tmpHashTable[hashIdx] = 0;
564
+ }
565
+ }
566
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
567
+ }
568
+
569
+ /* move chain pointers into the last entry of each hash bucket */
570
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
571
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
572
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
573
+ U32 i;
574
+ for (i = 0; i < cacheSize; i++) {
575
+ hashTable[bucketIdx + i] = 0;
576
+ }
577
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
578
+ }
579
+
580
+ /* fill the buckets of the hash table */
581
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
582
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
583
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
584
+ U32 i;
585
+ /* Shift hash cache down 1. */
586
+ for (i = cacheSize - 1; i; i--)
587
+ hashTable[h + i] = hashTable[h + i - 1];
588
+ hashTable[h] = idx;
589
+ }
590
+
591
+ ms->nextToUpdate = target;
592
+ }
593
+
478
594
 
479
595
  /* inlining is important to hardwire a hot branch (template emulation) */
480
596
  FORCE_INLINE_TEMPLATE
@@ -493,20 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic (
493
609
  const U32 dictLimit = ms->window.dictLimit;
494
610
  const BYTE* const prefixStart = base + dictLimit;
495
611
  const BYTE* const dictEnd = dictBase + dictLimit;
496
- const U32 current = (U32)(ip-base);
612
+ const U32 curr = (U32)(ip-base);
497
613
  const U32 maxDistance = 1U << cParams->windowLog;
498
614
  const U32 lowestValid = ms->window.lowLimit;
499
- const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
615
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
500
616
  const U32 isDictionary = (ms->loadedDictEnd != 0);
501
617
  const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
502
- const U32 minChain = current > chainSize ? current - chainSize : 0;
618
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
503
619
  U32 nbAttempts = 1U << cParams->searchLog;
504
620
  size_t ml=4-1;
505
621
 
622
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
623
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
624
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
625
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
626
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
627
+
628
+ U32 matchIndex;
629
+
630
+ if (dictMode == ZSTD_dedicatedDictSearch) {
631
+ const U32* entry = &dms->hashTable[ddsIdx];
632
+ PREFETCH_L1(entry);
633
+ }
634
+
506
635
  /* HC4 match finder */
507
- U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
636
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
508
637
 
509
- for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
638
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
510
639
  size_t currentMl=0;
511
640
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
512
641
  const BYTE* const match = base + matchIndex;
@@ -523,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic (
523
652
  /* save best solution */
524
653
  if (currentMl > ml) {
525
654
  ml = currentMl;
526
- *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
655
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
527
656
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
528
657
  }
529
658
 
@@ -531,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic (
531
660
  matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
532
661
  }
533
662
 
534
- if (dictMode == ZSTD_dictMatchState) {
535
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
663
+ if (dictMode == ZSTD_dedicatedDictSearch) {
664
+ const U32 ddsLowestIndex = dms->window.dictLimit;
665
+ const BYTE* const ddsBase = dms->window.base;
666
+ const BYTE* const ddsEnd = dms->window.nextSrc;
667
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
668
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
669
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
670
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
671
+ U32 ddsAttempt;
672
+
673
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
674
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
675
+ }
676
+
677
+ {
678
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
679
+ U32 const chainIndex = chainPackedPointer >> 8;
680
+
681
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
682
+ }
683
+
684
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
685
+ size_t currentMl=0;
686
+ const BYTE* match;
687
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
688
+ match = ddsBase + matchIndex;
689
+
690
+ if (!matchIndex) {
691
+ return ml;
692
+ }
693
+
694
+ /* guaranteed by table construction */
695
+ (void)ddsLowestIndex;
696
+ assert(matchIndex >= ddsLowestIndex);
697
+ assert(match+4 <= ddsEnd);
698
+ if (MEM_read32(match) == MEM_read32(ip)) {
699
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
700
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
701
+ }
702
+
703
+ /* save best solution */
704
+ if (currentMl > ml) {
705
+ ml = currentMl;
706
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
707
+ if (ip+currentMl == iLimit) {
708
+ /* best possible, avoids read overflow on next attempt */
709
+ return ml;
710
+ }
711
+ }
712
+ }
713
+
714
+ {
715
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
716
+ U32 chainIndex = chainPackedPointer >> 8;
717
+ U32 const chainLength = chainPackedPointer & 0xFF;
718
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
719
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
720
+ U32 chainAttempt;
721
+
722
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
723
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
724
+ }
725
+
726
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
727
+ size_t currentMl=0;
728
+ const BYTE* match;
729
+ matchIndex = dms->chainTable[chainIndex];
730
+ match = ddsBase + matchIndex;
731
+
732
+ /* guaranteed by table construction */
733
+ assert(matchIndex >= ddsLowestIndex);
734
+ assert(match+4 <= ddsEnd);
735
+ if (MEM_read32(match) == MEM_read32(ip)) {
736
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
737
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
738
+ }
739
+
740
+ /* save best solution */
741
+ if (currentMl > ml) {
742
+ ml = currentMl;
743
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
744
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
745
+ }
746
+ }
747
+ }
748
+ } else if (dictMode == ZSTD_dictMatchState) {
536
749
  const U32* const dmsChainTable = dms->chainTable;
537
750
  const U32 dmsChainSize = (1 << dms->cParams.chainLog);
538
751
  const U32 dmsChainMask = dmsChainSize - 1;
@@ -545,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic (
545
758
 
546
759
  matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
547
760
 
548
- for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
761
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
549
762
  size_t currentMl=0;
550
763
  const BYTE* const match = dmsBase + matchIndex;
551
764
  assert(match+4 <= dmsEnd);
@@ -555,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic (
555
768
  /* save best solution */
556
769
  if (currentMl > ml) {
557
770
  ml = currentMl;
558
- *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
771
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
559
772
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
560
773
  }
561
774
 
562
775
  if (matchIndex <= dmsMinChain) break;
776
+
563
777
  matchIndex = dmsChainTable[matchIndex & dmsChainMask];
564
778
  }
565
779
  }
@@ -600,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
600
814
  }
601
815
 
602
816
 
817
+ static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
818
+ ZSTD_matchState_t* ms,
819
+ const BYTE* ip, const BYTE* const iLimit,
820
+ size_t* offsetPtr)
821
+ {
822
+ switch(ms->cParams.minMatch)
823
+ {
824
+ default : /* includes case 3 */
825
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
826
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
827
+ case 7 :
828
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
829
+ }
830
+ }
831
+
832
+
603
833
  FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
604
834
  ZSTD_matchState_t* ms,
605
835
  const BYTE* ip, const BYTE* const iLimit,
@@ -641,39 +871,62 @@ ZSTD_compressBlock_lazy_generic(
641
871
  typedef size_t (*searchMax_f)(
642
872
  ZSTD_matchState_t* ms,
643
873
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
644
- searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
645
- (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
646
- : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
647
- (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
648
- : ZSTD_HcFindBestMatch_selectMLS);
874
+
875
+ /**
876
+ * This table is indexed first by the four ZSTD_dictMode_e values, and then
877
+ * by the two searchMethod_e values. NULLs are placed for configurations
878
+ * that should never occur (extDict modes go to the other implementation
879
+ * below and there is no DDSS for binary tree search yet).
880
+ */
881
+ const searchMax_f searchFuncs[4][2] = {
882
+ {
883
+ ZSTD_HcFindBestMatch_selectMLS,
884
+ ZSTD_BtFindBestMatch_selectMLS
885
+ },
886
+ {
887
+ NULL,
888
+ NULL
889
+ },
890
+ {
891
+ ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
892
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS
893
+ },
894
+ {
895
+ ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
896
+ NULL
897
+ }
898
+ };
899
+
900
+ searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
649
901
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
650
902
 
903
+ const int isDMS = dictMode == ZSTD_dictMatchState;
904
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
905
+ const int isDxS = isDMS || isDDS;
651
906
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
652
- const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
653
- dms->window.dictLimit : 0;
654
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
655
- dms->window.base : NULL;
656
- const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
657
- dictBase + dictLowestIndex : NULL;
658
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
659
- dms->window.nextSrc : NULL;
660
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
907
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
908
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
909
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
910
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
911
+ const U32 dictIndexDelta = isDxS ?
661
912
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
662
913
  0;
663
914
  const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664
915
 
916
+ assert(searchMax != NULL);
917
+
665
918
  DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
666
919
 
667
920
  /* init */
668
921
  ip += (dictAndPrefixLength == 0);
669
922
  if (dictMode == ZSTD_noDict) {
670
- U32 const current = (U32)(ip - base);
671
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672
- U32 const maxRep = current - windowLow;
923
+ U32 const curr = (U32)(ip - base);
924
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
925
+ U32 const maxRep = curr - windowLow;
673
926
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
674
927
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
675
928
  }
676
- if (dictMode == ZSTD_dictMatchState) {
929
+ if (isDxS) {
677
930
  /* dictMatchState repCode checks don't currently handle repCode == 0
678
931
  * disabling. */
679
932
  assert(offset_1 <= dictAndPrefixLength);
@@ -693,9 +946,9 @@ ZSTD_compressBlock_lazy_generic(
693
946
  const BYTE* start=ip+1;
694
947
 
695
948
  /* check repCode */
696
- if (dictMode == ZSTD_dictMatchState) {
949
+ if (isDxS) {
697
950
  const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
698
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
951
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
699
952
  && repIndex < prefixLowestIndex) ?
700
953
  dictBase + (repIndex - dictIndexDelta) :
701
954
  base + repIndex;
@@ -736,7 +989,7 @@ ZSTD_compressBlock_lazy_generic(
736
989
  if ((mlRep >= 4) && (gain2 > gain1))
737
990
  matchLength = mlRep, offset = 0, start = ip;
738
991
  }
739
- if (dictMode == ZSTD_dictMatchState) {
992
+ if (isDxS) {
740
993
  const U32 repIndex = (U32)(ip - base) - offset_1;
741
994
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
742
995
  dictBase + (repIndex - dictIndexDelta) :
@@ -771,7 +1024,7 @@ ZSTD_compressBlock_lazy_generic(
771
1024
  if ((mlRep >= 4) && (gain2 > gain1))
772
1025
  matchLength = mlRep, offset = 0, start = ip;
773
1026
  }
774
- if (dictMode == ZSTD_dictMatchState) {
1027
+ if (isDxS) {
775
1028
  const U32 repIndex = (U32)(ip - base) - offset_1;
776
1029
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
777
1030
  dictBase + (repIndex - dictIndexDelta) :
@@ -809,7 +1062,7 @@ ZSTD_compressBlock_lazy_generic(
809
1062
  && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
810
1063
  { start--; matchLength++; }
811
1064
  }
812
- if (dictMode == ZSTD_dictMatchState) {
1065
+ if (isDxS) {
813
1066
  U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
814
1067
  const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
815
1068
  const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
@@ -825,12 +1078,11 @@ _storeSequence:
825
1078
  }
826
1079
 
827
1080
  /* check immediate repcode */
828
- if (dictMode == ZSTD_dictMatchState) {
1081
+ if (isDxS) {
829
1082
  while (ip <= ilimit) {
830
1083
  U32 const current2 = (U32)(ip-base);
831
1084
  U32 const repIndex = current2 - offset_2;
832
- const BYTE* repMatch = dictMode == ZSTD_dictMatchState
833
- && repIndex < prefixLowestIndex ?
1085
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
834
1086
  dictBase - dictIndexDelta + repIndex :
835
1087
  base + repIndex;
836
1088
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
@@ -925,6 +1177,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
925
1177
  }
926
1178
 
927
1179
 
1180
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1181
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1182
+ void const* src, size_t srcSize)
1183
+ {
1184
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1185
+ }
1186
+
1187
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1188
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1189
+ void const* src, size_t srcSize)
1190
+ {
1191
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1192
+ }
1193
+
1194
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1195
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1196
+ void const* src, size_t srcSize)
1197
+ {
1198
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1199
+ }
1200
+
1201
+
928
1202
  FORCE_INLINE_TEMPLATE
929
1203
  size_t ZSTD_compressBlock_lazy_extDict_generic(
930
1204
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
@@ -968,11 +1242,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
968
1242
  size_t matchLength=0;
969
1243
  size_t offset=0;
970
1244
  const BYTE* start=ip+1;
971
- U32 current = (U32)(ip-base);
1245
+ U32 curr = (U32)(ip-base);
972
1246
 
973
1247
  /* check repCode */
974
- { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975
- const U32 repIndex = (U32)(current+1 - offset_1);
1248
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1249
+ const U32 repIndex = (U32)(curr+1 - offset_1);
976
1250
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
977
1251
  const BYTE* const repMatch = repBase + repIndex;
978
1252
  if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
@@ -999,11 +1273,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
999
1273
  if (depth>=1)
1000
1274
  while (ip<ilimit) {
1001
1275
  ip ++;
1002
- current++;
1276
+ curr++;
1003
1277
  /* check repCode */
1004
1278
  if (offset) {
1005
- const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1006
- const U32 repIndex = (U32)(current - offset_1);
1279
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1280
+ const U32 repIndex = (U32)(curr - offset_1);
1007
1281
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1008
1282
  const BYTE* const repMatch = repBase + repIndex;
1009
1283
  if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
@@ -1030,11 +1304,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1030
1304
  /* let's find an even better one */
1031
1305
  if ((depth==2) && (ip<ilimit)) {
1032
1306
  ip ++;
1033
- current++;
1307
+ curr++;
1034
1308
  /* check repCode */
1035
1309
  if (offset) {
1036
- const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1037
- const U32 repIndex = (U32)(current - offset_1);
1310
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1311
+ const U32 repIndex = (U32)(curr - offset_1);
1038
1312
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1039
1313
  const BYTE* const repMatch = repBase + repIndex;
1040
1314
  if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */