zstd-ruby 1.4.5.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +237 -138
  5. data/ext/zstdruby/libzstd/README.md +28 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
  7. data/ext/zstdruby/libzstd/common/compiler.h +118 -4
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +2 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +40 -12
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
  16. data/ext/zstdruby/libzstd/common/huf.h +27 -6
  17. data/ext/zstdruby/libzstd/common/mem.h +67 -94
  18. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  21. data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
  22. data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
  27. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
  30. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  31. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
  56. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
  62. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
  68. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  69. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
  70. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
  90. data/ext/zstdruby/libzstd/zstd.h +414 -54
  91. data/lib/zstd-ruby/version.rb +1 -1
  92. metadata +7 -3
  93. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
31
  * is empty.
32
32
  */
33
33
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
34
+ U32 const curr = (U32)(ip - base);
35
35
  U32 i;
36
36
  for (i = 0; i < fastHashFillStep; ++i) {
37
37
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
38
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
39
  if (i == 0)
40
- hashSmall[smHash] = current + i;
40
+ hashSmall[smHash] = curr + i;
41
41
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
42
+ hashLarge[lgHash] = curr + i;
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
@@ -108,9 +108,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
108
108
  /* init */
109
109
  ip += (dictAndPrefixLength == 0);
110
110
  if (dictMode == ZSTD_noDict) {
111
- U32 const current = (U32)(ip - base);
112
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
- U32 const maxRep = current - windowLow;
111
+ U32 const curr = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
113
+ U32 const maxRep = curr - windowLow;
114
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
116
  }
@@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
129
129
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
130
130
  size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
131
131
  size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
132
- U32 const current = (U32)(ip-base);
132
+ U32 const curr = (U32)(ip-base);
133
133
  U32 const matchIndexL = hashLong[h2];
134
134
  U32 matchIndexS = hashSmall[h];
135
135
  const BYTE* matchLong = base + matchIndexL;
136
136
  const BYTE* match = base + matchIndexS;
137
- const U32 repIndex = current + 1 - offset_1;
137
+ const U32 repIndex = curr + 1 - offset_1;
138
138
  const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
139
139
  && repIndex < prefixLowestIndex) ?
140
140
  dictBase + (repIndex - dictIndexDelta) :
141
141
  base + repIndex;
142
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
142
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
143
143
 
144
144
  /* check dictMatchState repcode */
145
145
  if (dictMode == ZSTD_dictMatchState
@@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
177
177
 
178
178
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
179
179
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
180
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
180
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
181
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
182
182
  goto _match_found;
183
183
  } }
@@ -209,7 +209,7 @@ _search_next_long:
209
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
210
210
  U32 const matchIndexL3 = hashLong[hl3];
211
211
  const BYTE* matchL3 = base + matchIndexL3;
212
- hashLong[hl3] = current + 1;
212
+ hashLong[hl3] = curr + 1;
213
213
 
214
214
  /* check prefix long +1 match */
215
215
  if (matchIndexL3 > prefixLowestIndex) {
@@ -228,7 +228,7 @@ _search_next_long:
228
228
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
229
229
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
230
230
  ip++;
231
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
231
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
232
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
233
233
  goto _match_found;
234
234
  } } }
@@ -236,7 +236,7 @@ _search_next_long:
236
236
  /* if no long +1 match, explore the short match we found */
237
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
238
238
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
239
- offset = (U32)(current - matchIndexS);
239
+ offset = (U32)(curr - matchIndexS);
240
240
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
241
241
  } else {
242
242
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -260,7 +260,7 @@ _match_stored:
260
260
  if (ip <= ilimit) {
261
261
  /* Complementary insertion */
262
262
  /* done after iLimit test, as candidates could be > iend-8 */
263
- { U32 const indexToInsert = current+2;
263
+ { U32 const indexToInsert = curr+2;
264
264
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
265
265
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
266
266
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -401,12 +401,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
401
401
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
402
402
  const BYTE* matchLong = matchLongBase + matchLongIndex;
403
403
 
404
- const U32 current = (U32)(ip-base);
405
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
404
+ const U32 curr = (U32)(ip-base);
405
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
406
406
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
407
407
  const BYTE* const repMatch = repBase + repIndex;
408
408
  size_t mLength;
409
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
409
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
410
410
 
411
411
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
412
412
  & (repIndex > dictStartIndex))
@@ -421,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
421
421
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
422
422
  U32 offset;
423
423
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
424
- offset = current - matchLongIndex;
424
+ offset = curr - matchLongIndex;
425
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
426
426
  offset_2 = offset_1;
427
427
  offset_1 = offset;
@@ -433,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
433
433
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
434
434
  const BYTE* match3 = match3Base + matchIndex3;
435
435
  U32 offset;
436
- hashLong[h3] = current + 1;
436
+ hashLong[h3] = curr + 1;
437
437
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
438
438
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
439
439
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
440
440
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
441
441
  ip++;
442
- offset = current+1 - matchIndex3;
442
+ offset = curr+1 - matchIndex3;
443
443
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
444
444
  } else {
445
445
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
446
446
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
447
447
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
448
- offset = current - matchIndex;
448
+ offset = curr - matchIndex;
449
449
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
450
450
  }
451
451
  offset_2 = offset_1;
@@ -464,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
464
464
  if (ip <= ilimit) {
465
465
  /* Complementary insertion */
466
466
  /* done after iLimit test, as candidates could be > iend-8 */
467
- { U32 const indexToInsert = current+2;
467
+ { U32 const indexToInsert = curr+2;
468
468
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
469
469
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
470
470
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
@@ -72,9 +72,9 @@ ZSTD_compressBlock_fast_generic(
72
72
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
73
73
  ip0 += (ip0 == prefixStart);
74
74
  ip1 = ip0 + 1;
75
- { U32 const current = (U32)(ip0 - base);
76
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77
- U32 const maxRep = current - windowLow;
75
+ { U32 const curr = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
77
+ U32 const maxRep = curr - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
@@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
242
242
  assert(endIndex - prefixStartIndex <= maxDistance);
243
243
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
244
244
 
245
- /* ensure there will be no no underflow
245
+ /* ensure there will be no underflow
246
246
  * when translating a dict index into a local index */
247
247
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
248
248
 
@@ -258,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
258
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
259
259
  size_t mLength;
260
260
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
261
- U32 const current = (U32)(ip-base);
261
+ U32 const curr = (U32)(ip-base);
262
262
  U32 const matchIndex = hashTable[h];
263
263
  const BYTE* match = base + matchIndex;
264
- const U32 repIndex = current + 1 - offset_1;
264
+ const U32 repIndex = curr + 1 - offset_1;
265
265
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
266
266
  dictBase + (repIndex - dictIndexDelta) :
267
267
  base + repIndex;
268
- hashTable[h] = current; /* update hash table */
268
+ hashTable[h] = curr; /* update hash table */
269
269
 
270
270
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
271
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -284,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
284
284
  continue;
285
285
  } else {
286
286
  /* found a dict match */
287
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
287
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
288
288
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
289
289
  while (((ip>anchor) & (dictMatch>dictStart))
290
290
  && (ip[-1] == dictMatch[-1])) {
@@ -316,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
316
316
 
317
317
  if (ip <= ilimit) {
318
318
  /* Fill Table */
319
- assert(base+current+2 > istart); /* check base overflow */
320
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
319
+ assert(base+curr+2 > istart); /* check base overflow */
320
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
321
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
322
322
 
323
323
  /* check immediate repcode */
@@ -410,13 +410,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
410
410
  const U32 matchIndex = hashTable[h];
411
411
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
412
412
  const BYTE* match = matchBase + matchIndex;
413
- const U32 current = (U32)(ip-base);
414
- const U32 repIndex = current + 1 - offset_1;
413
+ const U32 curr = (U32)(ip-base);
414
+ const U32 repIndex = curr + 1 - offset_1;
415
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
416
416
  const BYTE* const repMatch = repBase + repIndex;
417
- hashTable[h] = current; /* update hash table */
418
- DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
419
- assert(offset_1 <= current +1); /* check repIndex */
417
+ hashTable[h] = curr; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
419
+ assert(offset_1 <= curr +1); /* check repIndex */
420
420
 
421
421
  if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
422
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -435,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
435
435
  }
436
436
  { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
437
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
- U32 const offset = current - matchIndex;
438
+ U32 const offset = curr - matchIndex;
439
439
  size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
440
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
441
441
  offset_2 = offset_1; offset_1 = offset; /* update offset history */
@@ -446,7 +446,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
446
446
 
447
447
  if (ip <= ilimit) {
448
448
  /* Fill Table */
449
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
449
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
450
450
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
451
451
  /* check immediate repcode */
452
452
  while (ip <= ilimit) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -58,11 +58,11 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
58
58
 
59
59
  /** ZSTD_insertDUBT1() :
60
60
  * sort one already inserted but unsorted position
61
- * assumption : current >= btlow == (current - btmask)
61
+ * assumption : curr >= btlow == (curr - btmask)
62
62
  * doesn't fail */
63
63
  static void
64
64
  ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65
- U32 current, const BYTE* inputEnd,
65
+ U32 curr, const BYTE* inputEnd,
66
66
  U32 nbCompares, U32 btLow,
67
67
  const ZSTD_dictMode_e dictMode)
68
68
  {
@@ -74,41 +74,41 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
74
74
  const BYTE* const base = ms->window.base;
75
75
  const BYTE* const dictBase = ms->window.dictBase;
76
76
  const U32 dictLimit = ms->window.dictLimit;
77
- const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
78
- const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
77
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
78
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
79
79
  const BYTE* const dictEnd = dictBase + dictLimit;
80
80
  const BYTE* const prefixStart = base + dictLimit;
81
81
  const BYTE* match;
82
- U32* smallerPtr = bt + 2*(current&btMask);
82
+ U32* smallerPtr = bt + 2*(curr&btMask);
83
83
  U32* largerPtr = smallerPtr + 1;
84
84
  U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
85
  U32 dummy32; /* to be nullified at the end */
86
86
  U32 const windowValid = ms->window.lowLimit;
87
87
  U32 const maxDistance = 1U << cParams->windowLog;
88
- U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
88
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
89
89
 
90
90
 
91
91
  DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92
- current, dictLimit, windowLow);
93
- assert(current >= btLow);
92
+ curr, dictLimit, windowLow);
93
+ assert(curr >= btLow);
94
94
  assert(ip < iend); /* condition for ZSTD_count */
95
95
 
96
96
  while (nbCompares-- && (matchIndex > windowLow)) {
97
97
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
98
98
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
99
- assert(matchIndex < current);
99
+ assert(matchIndex < curr);
100
100
  /* note : all candidates are now supposed sorted,
101
101
  * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
102
102
  * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
103
103
 
104
104
  if ( (dictMode != ZSTD_extDict)
105
105
  || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
106
- || (current < dictLimit) /* both in extDict */) {
106
+ || (curr < dictLimit) /* both in extDict */) {
107
107
  const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
108
108
  || (matchIndex+matchLength >= dictLimit)) ?
109
109
  base : dictBase;
110
110
  assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
111
- || (current < dictLimit) );
111
+ || (curr < dictLimit) );
112
112
  match = mBase + matchIndex;
113
113
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
114
114
  } else {
@@ -119,7 +119,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
119
119
  }
120
120
 
121
121
  DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122
- current, matchIndex, (U32)matchLength);
122
+ curr, matchIndex, (U32)matchLength);
123
123
 
124
124
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
125
125
  break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -168,7 +168,7 @@ ZSTD_DUBT_findBetterDictMatch (
168
168
 
169
169
  const BYTE* const base = ms->window.base;
170
170
  const BYTE* const prefixStart = base + ms->window.dictLimit;
171
- U32 const current = (U32)(ip-base);
171
+ U32 const curr = (U32)(ip-base);
172
172
  const BYTE* const dictBase = dms->window.base;
173
173
  const BYTE* const dictEnd = dms->window.nextSrc;
174
174
  U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
@@ -195,10 +195,10 @@ ZSTD_DUBT_findBetterDictMatch (
195
195
 
196
196
  if (matchLength > bestLength) {
197
197
  U32 matchIndex = dictMatchIndex + dictIndexDelta;
198
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
198
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199
199
  DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200
- current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
201
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
200
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
201
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
202
202
  }
203
203
  if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204
204
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -218,9 +218,9 @@ ZSTD_DUBT_findBetterDictMatch (
218
218
  }
219
219
 
220
220
  if (bestLength >= MINMATCH) {
221
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
221
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
222
222
  DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
223
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
224
224
  }
225
225
  return bestLength;
226
226
 
@@ -241,13 +241,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
241
241
  U32 matchIndex = hashTable[h];
242
242
 
243
243
  const BYTE* const base = ms->window.base;
244
- U32 const current = (U32)(ip-base);
245
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
244
+ U32 const curr = (U32)(ip-base);
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
246
246
 
247
247
  U32* const bt = ms->chainTable;
248
248
  U32 const btLog = cParams->chainLog - 1;
249
249
  U32 const btMask = (1 << btLog) - 1;
250
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
250
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
251
251
  U32 const unsortLimit = MAX(btLow, windowLow);
252
252
 
253
253
  U32* nextCandidate = bt + 2*(matchIndex&btMask);
@@ -256,8 +256,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
256
256
  U32 nbCandidates = nbCompares;
257
257
  U32 previousCandidate = 0;
258
258
 
259
- DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
259
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
260
260
  assert(ip <= iend-8); /* required for h calculation */
261
+ assert(dictMode != ZSTD_dedicatedDictSearch);
261
262
 
262
263
  /* reach end of unsorted candidates list */
263
264
  while ( (matchIndex > unsortLimit)
@@ -299,14 +300,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
299
300
  const U32 dictLimit = ms->window.dictLimit;
300
301
  const BYTE* const dictEnd = dictBase + dictLimit;
301
302
  const BYTE* const prefixStart = base + dictLimit;
302
- U32* smallerPtr = bt + 2*(current&btMask);
303
- U32* largerPtr = bt + 2*(current&btMask) + 1;
304
- U32 matchEndIdx = current + 8 + 1;
303
+ U32* smallerPtr = bt + 2*(curr&btMask);
304
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
305
+ U32 matchEndIdx = curr + 8 + 1;
305
306
  U32 dummy32; /* to be nullified at the end */
306
307
  size_t bestLength = 0;
307
308
 
308
309
  matchIndex = hashTable[h];
309
- hashTable[h] = current; /* Update Hash Table */
310
+ hashTable[h] = curr; /* Update Hash Table */
310
311
 
311
312
  while (nbCompares-- && (matchIndex > windowLow)) {
312
313
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
@@ -326,8 +327,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
326
327
  if (matchLength > bestLength) {
327
328
  if (matchLength > matchEndIdx - matchIndex)
328
329
  matchEndIdx = matchIndex + (U32)matchLength;
329
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
330
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
330
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
331
332
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
332
333
  if (dictMode == ZSTD_dictMatchState) {
333
334
  nbCompares = 0; /* in addition to avoiding checking any
@@ -363,12 +364,12 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
363
364
  mls, dictMode);
364
365
  }
365
366
 
366
- assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
367
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
367
368
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
368
369
  if (bestLength >= MINMATCH) {
369
- U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
370
371
  DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
371
- current, (U32)bestLength, (U32)*offsetPtr, mIndex);
372
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
372
373
  }
373
374
  return bestLength;
374
375
  }
@@ -446,7 +447,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
446
447
 
447
448
  /* Update chains up to ip (excluded)
448
449
  Assumption : always within prefix (i.e. not within extDict) */
449
- static U32 ZSTD_insertAndFindFirstIndex_internal(
450
+ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
450
451
  ZSTD_matchState_t* ms,
451
452
  const ZSTD_compressionParameters* const cParams,
452
453
  const BYTE* ip, U32 const mls)
@@ -475,6 +476,121 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
475
476
  return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
476
477
  }
477
478
 
479
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
480
+ {
481
+ const BYTE* const base = ms->window.base;
482
+ U32 const target = (U32)(ip - base);
483
+ U32* const hashTable = ms->hashTable;
484
+ U32* const chainTable = ms->chainTable;
485
+ U32 const chainSize = 1 << ms->cParams.chainLog;
486
+ U32 idx = ms->nextToUpdate;
487
+ U32 const minChain = chainSize < target ? target - chainSize : idx;
488
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
489
+ U32 const cacheSize = bucketSize - 1;
490
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
491
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
492
+
493
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
494
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
495
+ * single entry. We will use the rest of the space to construct a temporary
496
+ * chaintable.
497
+ */
498
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
499
+ U32* const tmpHashTable = hashTable;
500
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
501
+ U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
502
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
503
+
504
+ U32 hashIdx;
505
+
506
+ assert(ms->cParams.chainLog <= 24);
507
+ assert(ms->cParams.hashLog >= ms->cParams.chainLog);
508
+ assert(idx != 0);
509
+ assert(tmpMinChain <= minChain);
510
+
511
+ /* fill conventional hash table and conventional chain table */
512
+ for ( ; idx < target; idx++) {
513
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
514
+ if (idx >= tmpMinChain) {
515
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
516
+ }
517
+ tmpHashTable[h] = idx;
518
+ }
519
+
520
+ /* sort chains into ddss chain table */
521
+ {
522
+ U32 chainPos = 0;
523
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
524
+ U32 count;
525
+ U32 countBeyondMinChain = 0;
526
+ U32 i = tmpHashTable[hashIdx];
527
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
528
+ /* skip through the chain to the first position that won't be
529
+ * in the hash cache bucket */
530
+ if (i < minChain) {
531
+ countBeyondMinChain++;
532
+ }
533
+ i = tmpChainTable[i - tmpMinChain];
534
+ }
535
+ if (count == cacheSize) {
536
+ for (count = 0; count < chainLimit;) {
537
+ if (i < minChain) {
538
+ if (!i || countBeyondMinChain++ > cacheSize) {
539
+ /* only allow pulling `cacheSize` number of entries
540
+ * into the cache or chainTable beyond `minChain`,
541
+ * to replace the entries pulled out of the
542
+ * chainTable into the cache. This lets us reach
543
+ * back further without increasing the total number
544
+ * of entries in the chainTable, guaranteeing the
545
+ * DDSS chain table will fit into the space
546
+ * allocated for the regular one. */
547
+ break;
548
+ }
549
+ }
550
+ chainTable[chainPos++] = i;
551
+ count++;
552
+ if (i < tmpMinChain) {
553
+ break;
554
+ }
555
+ i = tmpChainTable[i - tmpMinChain];
556
+ }
557
+ } else {
558
+ count = 0;
559
+ }
560
+ if (count) {
561
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
562
+ } else {
563
+ tmpHashTable[hashIdx] = 0;
564
+ }
565
+ }
566
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
567
+ }
568
+
569
+ /* move chain pointers into the last entry of each hash bucket */
570
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
571
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
572
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
573
+ U32 i;
574
+ for (i = 0; i < cacheSize; i++) {
575
+ hashTable[bucketIdx + i] = 0;
576
+ }
577
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
578
+ }
579
+
580
+ /* fill the buckets of the hash table */
581
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
582
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
583
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
584
+ U32 i;
585
+ /* Shift hash cache down 1. */
586
+ for (i = cacheSize - 1; i; i--)
587
+ hashTable[h + i] = hashTable[h + i - 1];
588
+ hashTable[h] = idx;
589
+ }
590
+
591
+ ms->nextToUpdate = target;
592
+ }
593
+
478
594
 
479
595
  /* inlining is important to hardwire a hot branch (template emulation) */
480
596
  FORCE_INLINE_TEMPLATE
@@ -493,20 +609,33 @@ size_t ZSTD_HcFindBestMatch_generic (
493
609
  const U32 dictLimit = ms->window.dictLimit;
494
610
  const BYTE* const prefixStart = base + dictLimit;
495
611
  const BYTE* const dictEnd = dictBase + dictLimit;
496
- const U32 current = (U32)(ip-base);
612
+ const U32 curr = (U32)(ip-base);
497
613
  const U32 maxDistance = 1U << cParams->windowLog;
498
614
  const U32 lowestValid = ms->window.lowLimit;
499
- const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
615
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
500
616
  const U32 isDictionary = (ms->loadedDictEnd != 0);
501
617
  const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
502
- const U32 minChain = current > chainSize ? current - chainSize : 0;
618
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
503
619
  U32 nbAttempts = 1U << cParams->searchLog;
504
620
  size_t ml=4-1;
505
621
 
622
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
623
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
624
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
625
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
626
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
627
+
628
+ U32 matchIndex;
629
+
630
+ if (dictMode == ZSTD_dedicatedDictSearch) {
631
+ const U32* entry = &dms->hashTable[ddsIdx];
632
+ PREFETCH_L1(entry);
633
+ }
634
+
506
635
  /* HC4 match finder */
507
- U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
636
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
508
637
 
509
- for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
638
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
510
639
  size_t currentMl=0;
511
640
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
512
641
  const BYTE* const match = base + matchIndex;
@@ -523,7 +652,7 @@ size_t ZSTD_HcFindBestMatch_generic (
523
652
  /* save best solution */
524
653
  if (currentMl > ml) {
525
654
  ml = currentMl;
526
- *offsetPtr = current - matchIndex + ZSTD_REP_MOVE;
655
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
527
656
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
528
657
  }
529
658
 
@@ -531,8 +660,92 @@ size_t ZSTD_HcFindBestMatch_generic (
531
660
  matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
532
661
  }
533
662
 
534
- if (dictMode == ZSTD_dictMatchState) {
535
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
663
+ if (dictMode == ZSTD_dedicatedDictSearch) {
664
+ const U32 ddsLowestIndex = dms->window.dictLimit;
665
+ const BYTE* const ddsBase = dms->window.base;
666
+ const BYTE* const ddsEnd = dms->window.nextSrc;
667
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
668
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
669
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
670
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
671
+ U32 ddsAttempt;
672
+
673
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
674
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
675
+ }
676
+
677
+ {
678
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
679
+ U32 const chainIndex = chainPackedPointer >> 8;
680
+
681
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
682
+ }
683
+
684
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
685
+ size_t currentMl=0;
686
+ const BYTE* match;
687
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
688
+ match = ddsBase + matchIndex;
689
+
690
+ if (!matchIndex) {
691
+ return ml;
692
+ }
693
+
694
+ /* guaranteed by table construction */
695
+ (void)ddsLowestIndex;
696
+ assert(matchIndex >= ddsLowestIndex);
697
+ assert(match+4 <= ddsEnd);
698
+ if (MEM_read32(match) == MEM_read32(ip)) {
699
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
700
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
701
+ }
702
+
703
+ /* save best solution */
704
+ if (currentMl > ml) {
705
+ ml = currentMl;
706
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
707
+ if (ip+currentMl == iLimit) {
708
+ /* best possible, avoids read overflow on next attempt */
709
+ return ml;
710
+ }
711
+ }
712
+ }
713
+
714
+ {
715
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
716
+ U32 chainIndex = chainPackedPointer >> 8;
717
+ U32 const chainLength = chainPackedPointer & 0xFF;
718
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
719
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
720
+ U32 chainAttempt;
721
+
722
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
723
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
724
+ }
725
+
726
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
727
+ size_t currentMl=0;
728
+ const BYTE* match;
729
+ matchIndex = dms->chainTable[chainIndex];
730
+ match = ddsBase + matchIndex;
731
+
732
+ /* guaranteed by table construction */
733
+ assert(matchIndex >= ddsLowestIndex);
734
+ assert(match+4 <= ddsEnd);
735
+ if (MEM_read32(match) == MEM_read32(ip)) {
736
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
737
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
738
+ }
739
+
740
+ /* save best solution */
741
+ if (currentMl > ml) {
742
+ ml = currentMl;
743
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
744
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
745
+ }
746
+ }
747
+ }
748
+ } else if (dictMode == ZSTD_dictMatchState) {
536
749
  const U32* const dmsChainTable = dms->chainTable;
537
750
  const U32 dmsChainSize = (1 << dms->cParams.chainLog);
538
751
  const U32 dmsChainMask = dmsChainSize - 1;
@@ -545,7 +758,7 @@ size_t ZSTD_HcFindBestMatch_generic (
545
758
 
546
759
  matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
547
760
 
548
- for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
761
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
549
762
  size_t currentMl=0;
550
763
  const BYTE* const match = dmsBase + matchIndex;
551
764
  assert(match+4 <= dmsEnd);
@@ -555,11 +768,12 @@ size_t ZSTD_HcFindBestMatch_generic (
555
768
  /* save best solution */
556
769
  if (currentMl > ml) {
557
770
  ml = currentMl;
558
- *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
771
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
559
772
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
560
773
  }
561
774
 
562
775
  if (matchIndex <= dmsMinChain) break;
776
+
563
777
  matchIndex = dmsChainTable[matchIndex & dmsChainMask];
564
778
  }
565
779
  }
@@ -600,6 +814,22 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
600
814
  }
601
815
 
602
816
 
817
+ static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
818
+ ZSTD_matchState_t* ms,
819
+ const BYTE* ip, const BYTE* const iLimit,
820
+ size_t* offsetPtr)
821
+ {
822
+ switch(ms->cParams.minMatch)
823
+ {
824
+ default : /* includes case 3 */
825
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
826
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
827
+ case 7 :
828
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
829
+ }
830
+ }
831
+
832
+
603
833
  FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
604
834
  ZSTD_matchState_t* ms,
605
835
  const BYTE* ip, const BYTE* const iLimit,
@@ -641,39 +871,62 @@ ZSTD_compressBlock_lazy_generic(
641
871
  typedef size_t (*searchMax_f)(
642
872
  ZSTD_matchState_t* ms,
643
873
  const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
644
- searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
645
- (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
646
- : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
647
- (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
648
- : ZSTD_HcFindBestMatch_selectMLS);
874
+
875
+ /**
876
+ * This table is indexed first by the four ZSTD_dictMode_e values, and then
877
+ * by the two searchMethod_e values. NULLs are placed for configurations
878
+ * that should never occur (extDict modes go to the other implementation
879
+ * below and there is no DDSS for binary tree search yet).
880
+ */
881
+ const searchMax_f searchFuncs[4][2] = {
882
+ {
883
+ ZSTD_HcFindBestMatch_selectMLS,
884
+ ZSTD_BtFindBestMatch_selectMLS
885
+ },
886
+ {
887
+ NULL,
888
+ NULL
889
+ },
890
+ {
891
+ ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
892
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS
893
+ },
894
+ {
895
+ ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
896
+ NULL
897
+ }
898
+ };
899
+
900
+ searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
649
901
  U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
650
902
 
903
+ const int isDMS = dictMode == ZSTD_dictMatchState;
904
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
905
+ const int isDxS = isDMS || isDDS;
651
906
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
652
- const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
653
- dms->window.dictLimit : 0;
654
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
655
- dms->window.base : NULL;
656
- const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
657
- dictBase + dictLowestIndex : NULL;
658
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
659
- dms->window.nextSrc : NULL;
660
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
907
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
908
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
909
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
910
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
911
+ const U32 dictIndexDelta = isDxS ?
661
912
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
662
913
  0;
663
914
  const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664
915
 
916
+ assert(searchMax != NULL);
917
+
665
918
  DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
666
919
 
667
920
  /* init */
668
921
  ip += (dictAndPrefixLength == 0);
669
922
  if (dictMode == ZSTD_noDict) {
670
- U32 const current = (U32)(ip - base);
671
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672
- U32 const maxRep = current - windowLow;
923
+ U32 const curr = (U32)(ip - base);
924
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
925
+ U32 const maxRep = curr - windowLow;
673
926
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
674
927
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
675
928
  }
676
- if (dictMode == ZSTD_dictMatchState) {
929
+ if (isDxS) {
677
930
  /* dictMatchState repCode checks don't currently handle repCode == 0
678
931
  * disabling. */
679
932
  assert(offset_1 <= dictAndPrefixLength);
@@ -693,9 +946,9 @@ ZSTD_compressBlock_lazy_generic(
693
946
  const BYTE* start=ip+1;
694
947
 
695
948
  /* check repCode */
696
- if (dictMode == ZSTD_dictMatchState) {
949
+ if (isDxS) {
697
950
  const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
698
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
951
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
699
952
  && repIndex < prefixLowestIndex) ?
700
953
  dictBase + (repIndex - dictIndexDelta) :
701
954
  base + repIndex;
@@ -736,7 +989,7 @@ ZSTD_compressBlock_lazy_generic(
736
989
  if ((mlRep >= 4) && (gain2 > gain1))
737
990
  matchLength = mlRep, offset = 0, start = ip;
738
991
  }
739
- if (dictMode == ZSTD_dictMatchState) {
992
+ if (isDxS) {
740
993
  const U32 repIndex = (U32)(ip - base) - offset_1;
741
994
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
742
995
  dictBase + (repIndex - dictIndexDelta) :
@@ -771,7 +1024,7 @@ ZSTD_compressBlock_lazy_generic(
771
1024
  if ((mlRep >= 4) && (gain2 > gain1))
772
1025
  matchLength = mlRep, offset = 0, start = ip;
773
1026
  }
774
- if (dictMode == ZSTD_dictMatchState) {
1027
+ if (isDxS) {
775
1028
  const U32 repIndex = (U32)(ip - base) - offset_1;
776
1029
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
777
1030
  dictBase + (repIndex - dictIndexDelta) :
@@ -809,7 +1062,7 @@ ZSTD_compressBlock_lazy_generic(
809
1062
  && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
810
1063
  { start--; matchLength++; }
811
1064
  }
812
- if (dictMode == ZSTD_dictMatchState) {
1065
+ if (isDxS) {
813
1066
  U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
814
1067
  const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
815
1068
  const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
@@ -825,12 +1078,11 @@ _storeSequence:
825
1078
  }
826
1079
 
827
1080
  /* check immediate repcode */
828
- if (dictMode == ZSTD_dictMatchState) {
1081
+ if (isDxS) {
829
1082
  while (ip <= ilimit) {
830
1083
  U32 const current2 = (U32)(ip-base);
831
1084
  U32 const repIndex = current2 - offset_2;
832
- const BYTE* repMatch = dictMode == ZSTD_dictMatchState
833
- && repIndex < prefixLowestIndex ?
1085
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
834
1086
  dictBase - dictIndexDelta + repIndex :
835
1087
  base + repIndex;
836
1088
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
@@ -925,6 +1177,28 @@ size_t ZSTD_compressBlock_greedy_dictMatchState(
925
1177
  }
926
1178
 
927
1179
 
1180
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1181
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1182
+ void const* src, size_t srcSize)
1183
+ {
1184
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1185
+ }
1186
+
1187
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1188
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1189
+ void const* src, size_t srcSize)
1190
+ {
1191
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1192
+ }
1193
+
1194
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1195
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1196
+ void const* src, size_t srcSize)
1197
+ {
1198
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1199
+ }
1200
+
1201
+
928
1202
  FORCE_INLINE_TEMPLATE
929
1203
  size_t ZSTD_compressBlock_lazy_extDict_generic(
930
1204
  ZSTD_matchState_t* ms, seqStore_t* seqStore,
@@ -968,11 +1242,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
968
1242
  size_t matchLength=0;
969
1243
  size_t offset=0;
970
1244
  const BYTE* start=ip+1;
971
- U32 current = (U32)(ip-base);
1245
+ U32 curr = (U32)(ip-base);
972
1246
 
973
1247
  /* check repCode */
974
- { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975
- const U32 repIndex = (U32)(current+1 - offset_1);
1248
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1249
+ const U32 repIndex = (U32)(curr+1 - offset_1);
976
1250
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
977
1251
  const BYTE* const repMatch = repBase + repIndex;
978
1252
  if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
@@ -999,11 +1273,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
999
1273
  if (depth>=1)
1000
1274
  while (ip<ilimit) {
1001
1275
  ip ++;
1002
- current++;
1276
+ curr++;
1003
1277
  /* check repCode */
1004
1278
  if (offset) {
1005
- const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1006
- const U32 repIndex = (U32)(current - offset_1);
1279
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1280
+ const U32 repIndex = (U32)(curr - offset_1);
1007
1281
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1008
1282
  const BYTE* const repMatch = repBase + repIndex;
1009
1283
  if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
@@ -1030,11 +1304,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1030
1304
  /* let's find an even better one */
1031
1305
  if ((depth==2) && (ip<ilimit)) {
1032
1306
  ip ++;
1033
- current++;
1307
+ curr++;
1034
1308
  /* check repCode */
1035
1309
  if (offset) {
1036
- const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1037
- const U32 repIndex = (U32)(current - offset_1);
1310
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1311
+ const U32 repIndex = (U32)(curr - offset_1);
1038
1312
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1039
1313
  const BYTE* const repMatch = repBase + repIndex;
1040
1314
  if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */