zstdlib 0.7.0-x64-mingw32 → 0.8.0-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib/extconf.rb +1 -1
  4. data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
  5. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/bitstream.h +25 -16
  6. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/compiler.h +118 -4
  7. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/cpu.h +1 -3
  8. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/debug.c +1 -1
  9. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/debug.h +12 -19
  10. data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
  11. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/error_private.c +2 -1
  12. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/error_private.h +3 -3
  13. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/fse.h +40 -12
  14. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/fse_decompress.c +139 -22
  15. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/huf.h +29 -7
  16. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/mem.h +69 -98
  17. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/pool.c +23 -17
  18. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/pool.h +2 -2
  19. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/threading.c +6 -5
  20. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/threading.h +0 -0
  21. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/xxhash.c +20 -60
  22. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/xxhash.h +2 -2
  23. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
  24. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
  25. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/common/zstd_internal.h +105 -62
  26. data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
  27. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/fse_compress.c +31 -24
  28. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/hist.c +27 -29
  29. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/hist.h +2 -2
  30. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/huf_compress.c +265 -126
  31. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress.c +2843 -728
  32. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +305 -63
  33. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +8 -8
  34. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +1 -1
  35. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +29 -7
  36. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +1 -1
  37. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_superblock.c +22 -295
  38. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_compress_superblock.h +1 -1
  39. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_cwksp.h +204 -67
  40. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +25 -25
  41. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +1 -1
  42. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_fast.c +23 -23
  43. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_fast.h +1 -1
  44. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
  45. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
  46. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_ldm.c +314 -211
  47. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_ldm.h +9 -2
  48. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
  49. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_opt.c +191 -46
  50. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
  51. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +93 -415
  52. data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
  53. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/huf_decompress.c +342 -239
  54. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +9 -9
  55. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +2 -2
  56. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +369 -87
  57. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +191 -75
  58. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +6 -3
  59. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +27 -11
  60. data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
  61. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/lib/zstd.h +568 -126
  62. data/ext/zstdlib/{zstd-1.4.5/lib/common → zstd-1.5.0/lib}/zstd_errors.h +2 -1
  63. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzclose.c +0 -0
  64. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
  65. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
  66. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzlib.c +0 -0
  67. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzread.c +0 -0
  68. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/gzwrite.c +0 -0
  69. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +126 -44
  70. data/ext/zstdlib/{zstd-1.4.5 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  71. data/lib/2.2/zstdlib.so +0 -0
  72. data/lib/2.3/zstdlib.so +0 -0
  73. data/lib/2.4/zstdlib.so +0 -0
  74. data/lib/2.5/zstdlib.so +0 -0
  75. data/lib/2.6/zstdlib.so +0 -0
  76. data/lib/2.7/zstdlib.so +0 -0
  77. metadata +69 -64
  78. data/ext/zstdlib/zstd-1.4.5/lib/common/entropy_common.c +0 -216
  79. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.c +0 -1138
  80. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.h +0 -67
  81. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -31,15 +31,15 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
31
  * is empty.
32
32
  */
33
33
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
34
+ U32 const curr = (U32)(ip - base);
35
35
  U32 i;
36
36
  for (i = 0; i < fastHashFillStep; ++i) {
37
37
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
38
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
39
  if (i == 0)
40
- hashSmall[smHash] = current + i;
40
+ hashSmall[smHash] = curr + i;
41
41
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
42
+ hashLarge[lgHash] = curr + i;
43
43
  /* Only load extra positions for ZSTD_dtlm_full */
44
44
  if (dtlm == ZSTD_dtlm_fast)
45
45
  break;
@@ -108,9 +108,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
108
108
  /* init */
109
109
  ip += (dictAndPrefixLength == 0);
110
110
  if (dictMode == ZSTD_noDict) {
111
- U32 const current = (U32)(ip - base);
112
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
- U32 const maxRep = current - windowLow;
111
+ U32 const curr = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
113
+ U32 const maxRep = curr - windowLow;
114
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
116
  }
@@ -129,17 +129,17 @@ size_t ZSTD_compressBlock_doubleFast_generic(
129
129
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
130
130
  size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
131
131
  size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
132
- U32 const current = (U32)(ip-base);
132
+ U32 const curr = (U32)(ip-base);
133
133
  U32 const matchIndexL = hashLong[h2];
134
134
  U32 matchIndexS = hashSmall[h];
135
135
  const BYTE* matchLong = base + matchIndexL;
136
136
  const BYTE* match = base + matchIndexS;
137
- const U32 repIndex = current + 1 - offset_1;
137
+ const U32 repIndex = curr + 1 - offset_1;
138
138
  const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
139
139
  && repIndex < prefixLowestIndex) ?
140
140
  dictBase + (repIndex - dictIndexDelta) :
141
141
  base + repIndex;
142
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
142
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
143
143
 
144
144
  /* check dictMatchState repcode */
145
145
  if (dictMode == ZSTD_dictMatchState
@@ -177,7 +177,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
177
177
 
178
178
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
179
179
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
180
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
180
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
181
181
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
182
182
  goto _match_found;
183
183
  } }
@@ -209,7 +209,7 @@ _search_next_long:
209
209
  size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
210
210
  U32 const matchIndexL3 = hashLong[hl3];
211
211
  const BYTE* matchL3 = base + matchIndexL3;
212
- hashLong[hl3] = current + 1;
212
+ hashLong[hl3] = curr + 1;
213
213
 
214
214
  /* check prefix long +1 match */
215
215
  if (matchIndexL3 > prefixLowestIndex) {
@@ -228,7 +228,7 @@ _search_next_long:
228
228
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
229
229
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
230
230
  ip++;
231
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
231
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
232
232
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
233
233
  goto _match_found;
234
234
  } } }
@@ -236,7 +236,7 @@ _search_next_long:
236
236
  /* if no long +1 match, explore the short match we found */
237
237
  if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
238
238
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
239
- offset = (U32)(current - matchIndexS);
239
+ offset = (U32)(curr - matchIndexS);
240
240
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
241
241
  } else {
242
242
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -260,7 +260,7 @@ _match_stored:
260
260
  if (ip <= ilimit) {
261
261
  /* Complementary insertion */
262
262
  /* done after iLimit test, as candidates could be > iend-8 */
263
- { U32 const indexToInsert = current+2;
263
+ { U32 const indexToInsert = curr+2;
264
264
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
265
265
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
266
266
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -401,15 +401,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
401
401
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
402
402
  const BYTE* matchLong = matchLongBase + matchLongIndex;
403
403
 
404
- const U32 current = (U32)(ip-base);
405
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
404
+ const U32 curr = (U32)(ip-base);
405
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
406
406
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
407
407
  const BYTE* const repMatch = repBase + repIndex;
408
408
  size_t mLength;
409
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
409
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
410
410
 
411
411
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
412
- & (repIndex > dictStartIndex))
412
+ & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
413
413
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
414
414
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
415
415
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@@ -421,7 +421,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
421
421
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
422
422
  U32 offset;
423
423
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
424
- offset = current - matchLongIndex;
424
+ offset = curr - matchLongIndex;
425
425
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
426
426
  offset_2 = offset_1;
427
427
  offset_1 = offset;
@@ -433,19 +433,19 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
433
433
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
434
434
  const BYTE* match3 = match3Base + matchIndex3;
435
435
  U32 offset;
436
- hashLong[h3] = current + 1;
436
+ hashLong[h3] = curr + 1;
437
437
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
438
438
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
439
439
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
440
440
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
441
441
  ip++;
442
- offset = current+1 - matchIndex3;
442
+ offset = curr+1 - matchIndex3;
443
443
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
444
444
  } else {
445
445
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
446
446
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
447
447
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
448
- offset = current - matchIndex;
448
+ offset = curr - matchIndex;
449
449
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
450
450
  }
451
451
  offset_2 = offset_1;
@@ -464,7 +464,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
464
464
  if (ip <= ilimit) {
465
465
  /* Complementary insertion */
466
466
  /* done after iLimit test, as candidates could be > iend-8 */
467
- { U32 const indexToInsert = current+2;
467
+ { U32 const indexToInsert = curr+2;
468
468
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
469
469
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
470
470
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
477
477
  U32 const repIndex2 = current2 - offset_2;
478
478
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
479
479
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
480
- & (repIndex2 > dictStartIndex))
480
+ & (offset_2 < current2 - dictStartIndex))
481
481
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
482
482
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
483
483
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,16 +29,16 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
@@ -72,9 +72,9 @@ ZSTD_compressBlock_fast_generic(
72
72
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
73
73
  ip0 += (ip0 == prefixStart);
74
74
  ip1 = ip0 + 1;
75
- { U32 const current = (U32)(ip0 - base);
76
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77
- U32 const maxRep = current - windowLow;
75
+ { U32 const curr = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
77
+ U32 const maxRep = curr - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
@@ -242,7 +242,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
242
242
  assert(endIndex - prefixStartIndex <= maxDistance);
243
243
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
244
244
 
245
- /* ensure there will be no no underflow
245
+ /* ensure there will be no underflow
246
246
  * when translating a dict index into a local index */
247
247
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
248
248
 
@@ -258,14 +258,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
258
258
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
259
259
  size_t mLength;
260
260
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
261
- U32 const current = (U32)(ip-base);
261
+ U32 const curr = (U32)(ip-base);
262
262
  U32 const matchIndex = hashTable[h];
263
263
  const BYTE* match = base + matchIndex;
264
- const U32 repIndex = current + 1 - offset_1;
264
+ const U32 repIndex = curr + 1 - offset_1;
265
265
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
266
266
  dictBase + (repIndex - dictIndexDelta) :
267
267
  base + repIndex;
268
- hashTable[h] = current; /* update hash table */
268
+ hashTable[h] = curr; /* update hash table */
269
269
 
270
270
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
271
271
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -284,7 +284,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
284
284
  continue;
285
285
  } else {
286
286
  /* found a dict match */
287
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
287
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
288
288
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
289
289
  while (((ip>anchor) & (dictMatch>dictStart))
290
290
  && (ip[-1] == dictMatch[-1])) {
@@ -316,8 +316,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
316
316
 
317
317
  if (ip <= ilimit) {
318
318
  /* Fill Table */
319
- assert(base+current+2 > istart); /* check base overflow */
320
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
319
+ assert(base+curr+2 > istart); /* check base overflow */
320
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
321
321
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
322
322
 
323
323
  /* check immediate repcode */
@@ -410,15 +410,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
410
410
  const U32 matchIndex = hashTable[h];
411
411
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
412
412
  const BYTE* match = matchBase + matchIndex;
413
- const U32 current = (U32)(ip-base);
414
- const U32 repIndex = current + 1 - offset_1;
413
+ const U32 curr = (U32)(ip-base);
414
+ const U32 repIndex = curr + 1 - offset_1;
415
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
416
416
  const BYTE* const repMatch = repBase + repIndex;
417
- hashTable[h] = current; /* update hash table */
418
- DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
419
- assert(offset_1 <= current +1); /* check repIndex */
417
+ hashTable[h] = curr; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
420
419
 
421
- if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
420
+ if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
421
+ & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
422
422
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
423
423
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
424
424
  size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@@ -435,7 +435,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
435
435
  }
436
436
  { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
437
437
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
438
- U32 const offset = current - matchIndex;
438
+ U32 const offset = curr - matchIndex;
439
439
  size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
440
440
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
441
441
  offset_2 = offset_1; offset_1 = offset; /* update offset history */
@@ -446,14 +446,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
446
446
 
447
447
  if (ip <= ilimit) {
448
448
  /* Fill Table */
449
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
449
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
450
450
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
451
451
  /* check immediate repcode */
452
452
  while (ip <= ilimit) {
453
453
  U32 const current2 = (U32)(ip-base);
454
454
  U32 const repIndex2 = current2 - offset_2;
455
455
  const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
456
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
456
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
457
457
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
458
458
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
459
459
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -0,0 +1,2184 @@
1
+ /*
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #include "zstd_compress_internal.h"
12
+ #include "zstd_lazy.h"
13
+
14
+
15
+ /*-*************************************
16
+ * Binary Tree search
17
+ ***************************************/
18
+
19
+ static void
20
+ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
21
+ const BYTE* ip, const BYTE* iend,
22
+ U32 mls)
23
+ {
24
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
25
+ U32* const hashTable = ms->hashTable;
26
+ U32 const hashLog = cParams->hashLog;
27
+
28
+ U32* const bt = ms->chainTable;
29
+ U32 const btLog = cParams->chainLog - 1;
30
+ U32 const btMask = (1 << btLog) - 1;
31
+
32
+ const BYTE* const base = ms->window.base;
33
+ U32 const target = (U32)(ip - base);
34
+ U32 idx = ms->nextToUpdate;
35
+
36
+ if (idx != target)
37
+ DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
38
+ idx, target, ms->window.dictLimit);
39
+ assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
40
+ (void)iend;
41
+
42
+ assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
43
+ for ( ; idx < target ; idx++) {
44
+ size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
45
+ U32 const matchIndex = hashTable[h];
46
+
47
+ U32* const nextCandidatePtr = bt + 2*(idx&btMask);
48
+ U32* const sortMarkPtr = nextCandidatePtr + 1;
49
+
50
+ DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
51
+ hashTable[h] = idx; /* Update Hash Table */
52
+ *nextCandidatePtr = matchIndex; /* update BT like a chain */
53
+ *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
54
+ }
55
+ ms->nextToUpdate = target;
56
+ }
57
+
58
+
59
+ /** ZSTD_insertDUBT1() :
60
+ * sort one already inserted but unsorted position
61
+ * assumption : curr >= btlow == (curr - btmask)
62
+ * doesn't fail */
63
+ static void
64
+ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65
+ U32 curr, const BYTE* inputEnd,
66
+ U32 nbCompares, U32 btLow,
67
+ const ZSTD_dictMode_e dictMode)
68
+ {
69
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
70
+ U32* const bt = ms->chainTable;
71
+ U32 const btLog = cParams->chainLog - 1;
72
+ U32 const btMask = (1 << btLog) - 1;
73
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
74
+ const BYTE* const base = ms->window.base;
75
+ const BYTE* const dictBase = ms->window.dictBase;
76
+ const U32 dictLimit = ms->window.dictLimit;
77
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
78
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
79
+ const BYTE* const dictEnd = dictBase + dictLimit;
80
+ const BYTE* const prefixStart = base + dictLimit;
81
+ const BYTE* match;
82
+ U32* smallerPtr = bt + 2*(curr&btMask);
83
+ U32* largerPtr = smallerPtr + 1;
84
+ U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
+ U32 dummy32; /* to be nullified at the end */
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
89
+
90
+
91
+ DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92
+ curr, dictLimit, windowLow);
93
+ assert(curr >= btLow);
94
+ assert(ip < iend); /* condition for ZSTD_count */
95
+
96
+ while (nbCompares-- && (matchIndex > windowLow)) {
97
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
98
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
99
+ assert(matchIndex < curr);
100
+ /* note : all candidates are now supposed sorted,
101
+ * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
102
+ * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
103
+
104
+ if ( (dictMode != ZSTD_extDict)
105
+ || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
106
+ || (curr < dictLimit) /* both in extDict */) {
107
+ const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
108
+ || (matchIndex+matchLength >= dictLimit)) ?
109
+ base : dictBase;
110
+ assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
111
+ || (curr < dictLimit) );
112
+ match = mBase + matchIndex;
113
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
114
+ } else {
115
+ match = dictBase + matchIndex;
116
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
117
+ if (matchIndex+matchLength >= dictLimit)
118
+ match = base + matchIndex; /* preparation for next read of match[matchLength] */
119
+ }
120
+
121
+ DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122
+ curr, matchIndex, (U32)matchLength);
123
+
124
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
125
+ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
126
+ }
127
+
128
+ if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
129
+ /* match is smaller than current */
130
+ *smallerPtr = matchIndex; /* update smaller idx */
131
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
132
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
133
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
134
+ matchIndex, btLow, nextPtr[1]);
135
+ smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
136
+ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
137
+ } else {
138
+ /* match is larger than current */
139
+ *largerPtr = matchIndex;
140
+ commonLengthLarger = matchLength;
141
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
142
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
143
+ matchIndex, btLow, nextPtr[0]);
144
+ largerPtr = nextPtr;
145
+ matchIndex = nextPtr[0];
146
+ } }
147
+
148
+ *smallerPtr = *largerPtr = 0;
149
+ }
150
+
151
+
152
+ static size_t
153
+ ZSTD_DUBT_findBetterDictMatch (
154
+ ZSTD_matchState_t* ms,
155
+ const BYTE* const ip, const BYTE* const iend,
156
+ size_t* offsetPtr,
157
+ size_t bestLength,
158
+ U32 nbCompares,
159
+ U32 const mls,
160
+ const ZSTD_dictMode_e dictMode)
161
+ {
162
+ const ZSTD_matchState_t * const dms = ms->dictMatchState;
163
+ const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
164
+ const U32 * const dictHashTable = dms->hashTable;
165
+ U32 const hashLog = dmsCParams->hashLog;
166
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
167
+ U32 dictMatchIndex = dictHashTable[h];
168
+
169
+ const BYTE* const base = ms->window.base;
170
+ const BYTE* const prefixStart = base + ms->window.dictLimit;
171
+ U32 const curr = (U32)(ip-base);
172
+ const BYTE* const dictBase = dms->window.base;
173
+ const BYTE* const dictEnd = dms->window.nextSrc;
174
+ U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
175
+ U32 const dictLowLimit = dms->window.lowLimit;
176
+ U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
177
+
178
+ U32* const dictBt = dms->chainTable;
179
+ U32 const btLog = dmsCParams->chainLog - 1;
180
+ U32 const btMask = (1 << btLog) - 1;
181
+ U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
182
+
183
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
184
+
185
+ (void)dictMode;
186
+ assert(dictMode == ZSTD_dictMatchState);
187
+
188
+ while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
189
+ U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
190
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
191
+ const BYTE* match = dictBase + dictMatchIndex;
192
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
193
+ if (dictMatchIndex+matchLength >= dictHighLimit)
194
+ match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
195
+
196
+ if (matchLength > bestLength) {
197
+ U32 matchIndex = dictMatchIndex + dictIndexDelta;
198
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199
+ DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
201
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
202
+ }
203
+ if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
205
+ }
206
+ }
207
+
208
+ if (match[matchLength] < ip[matchLength]) {
209
+ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
210
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
211
+ dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
212
+ } else {
213
+ /* match is larger than current */
214
+ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
215
+ commonLengthLarger = matchLength;
216
+ dictMatchIndex = nextPtr[0];
217
+ }
218
+ }
219
+
220
+ if (bestLength >= MINMATCH) {
221
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
222
+ DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
224
+ }
225
+ return bestLength;
226
+
227
+ }
228
+
229
+
230
+ static size_t
231
+ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
232
+ const BYTE* const ip, const BYTE* const iend,
233
+ size_t* offsetPtr,
234
+ U32 const mls,
235
+ const ZSTD_dictMode_e dictMode)
236
+ {
237
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
238
+ U32* const hashTable = ms->hashTable;
239
+ U32 const hashLog = cParams->hashLog;
240
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
241
+ U32 matchIndex = hashTable[h];
242
+
243
+ const BYTE* const base = ms->window.base;
244
+ U32 const curr = (U32)(ip-base);
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
246
+
247
+ U32* const bt = ms->chainTable;
248
+ U32 const btLog = cParams->chainLog - 1;
249
+ U32 const btMask = (1 << btLog) - 1;
250
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
251
+ U32 const unsortLimit = MAX(btLow, windowLow);
252
+
253
+ U32* nextCandidate = bt + 2*(matchIndex&btMask);
254
+ U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
255
+ U32 nbCompares = 1U << cParams->searchLog;
256
+ U32 nbCandidates = nbCompares;
257
+ U32 previousCandidate = 0;
258
+
259
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
260
+ assert(ip <= iend-8); /* required for h calculation */
261
+ assert(dictMode != ZSTD_dedicatedDictSearch);
262
+
263
+ /* reach end of unsorted candidates list */
264
+ while ( (matchIndex > unsortLimit)
265
+ && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
266
+ && (nbCandidates > 1) ) {
267
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
268
+ matchIndex);
269
+ *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
270
+ previousCandidate = matchIndex;
271
+ matchIndex = *nextCandidate;
272
+ nextCandidate = bt + 2*(matchIndex&btMask);
273
+ unsortedMark = bt + 2*(matchIndex&btMask) + 1;
274
+ nbCandidates --;
275
+ }
276
+
277
+ /* nullify last candidate if it's still unsorted
278
+ * simplification, detrimental to compression ratio, beneficial for speed */
279
+ if ( (matchIndex > unsortLimit)
280
+ && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
281
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
282
+ matchIndex);
283
+ *nextCandidate = *unsortedMark = 0;
284
+ }
285
+
286
+ /* batch sort stacked candidates */
287
+ matchIndex = previousCandidate;
288
+ while (matchIndex) { /* will end on matchIndex == 0 */
289
+ U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
290
+ U32 const nextCandidateIdx = *nextCandidateIdxPtr;
291
+ ZSTD_insertDUBT1(ms, matchIndex, iend,
292
+ nbCandidates, unsortLimit, dictMode);
293
+ matchIndex = nextCandidateIdx;
294
+ nbCandidates++;
295
+ }
296
+
297
+ /* find longest match */
298
+ { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
299
+ const BYTE* const dictBase = ms->window.dictBase;
300
+ const U32 dictLimit = ms->window.dictLimit;
301
+ const BYTE* const dictEnd = dictBase + dictLimit;
302
+ const BYTE* const prefixStart = base + dictLimit;
303
+ U32* smallerPtr = bt + 2*(curr&btMask);
304
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
305
+ U32 matchEndIdx = curr + 8 + 1;
306
+ U32 dummy32; /* to be nullified at the end */
307
+ size_t bestLength = 0;
308
+
309
+ matchIndex = hashTable[h];
310
+ hashTable[h] = curr; /* Update Hash Table */
311
+
312
+ while (nbCompares-- && (matchIndex > windowLow)) {
313
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
314
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
315
+ const BYTE* match;
316
+
317
+ if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
318
+ match = base + matchIndex;
319
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
320
+ } else {
321
+ match = dictBase + matchIndex;
322
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
323
+ if (matchIndex+matchLength >= dictLimit)
324
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
325
+ }
326
+
327
+ if (matchLength > bestLength) {
328
+ if (matchLength > matchEndIdx - matchIndex)
329
+ matchEndIdx = matchIndex + (U32)matchLength;
330
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
332
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
333
+ if (dictMode == ZSTD_dictMatchState) {
334
+ nbCompares = 0; /* in addition to avoiding checking any
335
+ * further in this loop, make sure we
336
+ * skip checking in the dictionary. */
337
+ }
338
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
339
+ }
340
+ }
341
+
342
+ if (match[matchLength] < ip[matchLength]) {
343
+ /* match is smaller than current */
344
+ *smallerPtr = matchIndex; /* update smaller idx */
345
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
346
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
347
+ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
348
+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
349
+ } else {
350
+ /* match is larger than current */
351
+ *largerPtr = matchIndex;
352
+ commonLengthLarger = matchLength;
353
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
354
+ largerPtr = nextPtr;
355
+ matchIndex = nextPtr[0];
356
+ } }
357
+
358
+ *smallerPtr = *largerPtr = 0;
359
+
360
+ if (dictMode == ZSTD_dictMatchState && nbCompares) {
361
+ bestLength = ZSTD_DUBT_findBetterDictMatch(
362
+ ms, ip, iend,
363
+ offsetPtr, bestLength, nbCompares,
364
+ mls, dictMode);
365
+ }
366
+
367
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
368
+ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
369
+ if (bestLength >= MINMATCH) {
370
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
371
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
372
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
373
+ }
374
+ return bestLength;
375
+ }
376
+ }
377
+
378
+
379
+ /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
380
+ FORCE_INLINE_TEMPLATE size_t
381
+ ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
382
+ const BYTE* const ip, const BYTE* const iLimit,
383
+ size_t* offsetPtr,
384
+ const U32 mls /* template */,
385
+ const ZSTD_dictMode_e dictMode)
386
+ {
387
+ DEBUGLOG(7, "ZSTD_BtFindBestMatch");
388
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
389
+ ZSTD_updateDUBT(ms, ip, iLimit, mls);
390
+ return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
391
+ }
392
+
393
+
394
+ static size_t
395
+ ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
396
+ const BYTE* ip, const BYTE* const iLimit,
397
+ size_t* offsetPtr)
398
+ {
399
+ switch(ms->cParams.minMatch)
400
+ {
401
+ default : /* includes case 3 */
402
+ case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
403
+ case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
404
+ case 7 :
405
+ case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
406
+ }
407
+ }
408
+
409
+
410
+ static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
411
+ ZSTD_matchState_t* ms,
412
+ const BYTE* ip, const BYTE* const iLimit,
413
+ size_t* offsetPtr)
414
+ {
415
+ switch(ms->cParams.minMatch)
416
+ {
417
+ default : /* includes case 3 */
418
+ case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
419
+ case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
420
+ case 7 :
421
+ case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
422
+ }
423
+ }
424
+
425
+
426
+ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
427
+ ZSTD_matchState_t* ms,
428
+ const BYTE* ip, const BYTE* const iLimit,
429
+ size_t* offsetPtr)
430
+ {
431
+ switch(ms->cParams.minMatch)
432
+ {
433
+ default : /* includes case 3 */
434
+ case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
435
+ case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
436
+ case 7 :
437
+ case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
438
+ }
439
+ }
440
+
441
+ /***********************************
442
+ * Dedicated dict search
443
+ ***********************************/
444
+
445
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
446
+ {
447
+ const BYTE* const base = ms->window.base;
448
+ U32 const target = (U32)(ip - base);
449
+ U32* const hashTable = ms->hashTable;
450
+ U32* const chainTable = ms->chainTable;
451
+ U32 const chainSize = 1 << ms->cParams.chainLog;
452
+ U32 idx = ms->nextToUpdate;
453
+ U32 const minChain = chainSize < target ? target - chainSize : idx;
454
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
455
+ U32 const cacheSize = bucketSize - 1;
456
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
457
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
458
+
459
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
460
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
461
+ * single entry. We will use the rest of the space to construct a temporary
462
+ * chaintable.
463
+ */
464
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
465
+ U32* const tmpHashTable = hashTable;
466
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
467
+ U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
468
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
469
+ U32 hashIdx;
470
+
471
+ assert(ms->cParams.chainLog <= 24);
472
+ assert(ms->cParams.hashLog > ms->cParams.chainLog);
473
+ assert(idx != 0);
474
+ assert(tmpMinChain <= minChain);
475
+
476
+ /* fill conventional hash table and conventional chain table */
477
+ for ( ; idx < target; idx++) {
478
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
479
+ if (idx >= tmpMinChain) {
480
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
481
+ }
482
+ tmpHashTable[h] = idx;
483
+ }
484
+
485
+ /* sort chains into ddss chain table */
486
+ {
487
+ U32 chainPos = 0;
488
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
489
+ U32 count;
490
+ U32 countBeyondMinChain = 0;
491
+ U32 i = tmpHashTable[hashIdx];
492
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
493
+ /* skip through the chain to the first position that won't be
494
+ * in the hash cache bucket */
495
+ if (i < minChain) {
496
+ countBeyondMinChain++;
497
+ }
498
+ i = tmpChainTable[i - tmpMinChain];
499
+ }
500
+ if (count == cacheSize) {
501
+ for (count = 0; count < chainLimit;) {
502
+ if (i < minChain) {
503
+ if (!i || ++countBeyondMinChain > cacheSize) {
504
+ /* only allow pulling `cacheSize` number of entries
505
+ * into the cache or chainTable beyond `minChain`,
506
+ * to replace the entries pulled out of the
507
+ * chainTable into the cache. This lets us reach
508
+ * back further without increasing the total number
509
+ * of entries in the chainTable, guaranteeing the
510
+ * DDSS chain table will fit into the space
511
+ * allocated for the regular one. */
512
+ break;
513
+ }
514
+ }
515
+ chainTable[chainPos++] = i;
516
+ count++;
517
+ if (i < tmpMinChain) {
518
+ break;
519
+ }
520
+ i = tmpChainTable[i - tmpMinChain];
521
+ }
522
+ } else {
523
+ count = 0;
524
+ }
525
+ if (count) {
526
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
527
+ } else {
528
+ tmpHashTable[hashIdx] = 0;
529
+ }
530
+ }
531
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
532
+ }
533
+
534
+ /* move chain pointers into the last entry of each hash bucket */
535
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
536
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
537
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
538
+ U32 i;
539
+ for (i = 0; i < cacheSize; i++) {
540
+ hashTable[bucketIdx + i] = 0;
541
+ }
542
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
543
+ }
544
+
545
+ /* fill the buckets of the hash table */
546
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
547
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
548
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
549
+ U32 i;
550
+ /* Shift hash cache down 1. */
551
+ for (i = cacheSize - 1; i; i--)
552
+ hashTable[h + i] = hashTable[h + i - 1];
553
+ hashTable[h] = idx;
554
+ }
555
+
556
+ ms->nextToUpdate = target;
557
+ }
558
+
559
+ /* Returns the longest match length found in the dedicated dict search structure.
560
+ * If none are longer than the argument ml, then ml will be returned.
561
+ */
562
+ FORCE_INLINE_TEMPLATE
563
+ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
564
+ const ZSTD_matchState_t* const dms,
565
+ const BYTE* const ip, const BYTE* const iLimit,
566
+ const BYTE* const prefixStart, const U32 curr,
567
+ const U32 dictLimit, const size_t ddsIdx) {
568
+ const U32 ddsLowestIndex = dms->window.dictLimit;
569
+ const BYTE* const ddsBase = dms->window.base;
570
+ const BYTE* const ddsEnd = dms->window.nextSrc;
571
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
572
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
573
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
574
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
575
+ U32 ddsAttempt;
576
+ U32 matchIndex;
577
+
578
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
579
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
580
+ }
581
+
582
+ {
583
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
584
+ U32 const chainIndex = chainPackedPointer >> 8;
585
+
586
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
587
+ }
588
+
589
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
590
+ size_t currentMl=0;
591
+ const BYTE* match;
592
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
593
+ match = ddsBase + matchIndex;
594
+
595
+ if (!matchIndex) {
596
+ return ml;
597
+ }
598
+
599
+ /* guaranteed by table construction */
600
+ (void)ddsLowestIndex;
601
+ assert(matchIndex >= ddsLowestIndex);
602
+ assert(match+4 <= ddsEnd);
603
+ if (MEM_read32(match) == MEM_read32(ip)) {
604
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
605
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
606
+ }
607
+
608
+ /* save best solution */
609
+ if (currentMl > ml) {
610
+ ml = currentMl;
611
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
612
+ if (ip+currentMl == iLimit) {
613
+ /* best possible, avoids read overflow on next attempt */
614
+ return ml;
615
+ }
616
+ }
617
+ }
618
+
619
+ {
620
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
621
+ U32 chainIndex = chainPackedPointer >> 8;
622
+ U32 const chainLength = chainPackedPointer & 0xFF;
623
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
624
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
625
+ U32 chainAttempt;
626
+
627
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
628
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
629
+ }
630
+
631
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
632
+ size_t currentMl=0;
633
+ const BYTE* match;
634
+ matchIndex = dms->chainTable[chainIndex];
635
+ match = ddsBase + matchIndex;
636
+
637
+ /* guaranteed by table construction */
638
+ assert(matchIndex >= ddsLowestIndex);
639
+ assert(match+4 <= ddsEnd);
640
+ if (MEM_read32(match) == MEM_read32(ip)) {
641
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
642
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
643
+ }
644
+
645
+ /* save best solution */
646
+ if (currentMl > ml) {
647
+ ml = currentMl;
648
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
649
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
650
+ }
651
+ }
652
+ }
653
+ return ml;
654
+ }
655
+
656
+
657
+ /* *********************************
658
+ * Hash Chain
659
+ ***********************************/
660
+ #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
661
+
662
+ /* Update chains up to ip (excluded)
663
+ Assumption : always within prefix (i.e. not within extDict) */
664
+ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
665
+ ZSTD_matchState_t* ms,
666
+ const ZSTD_compressionParameters* const cParams,
667
+ const BYTE* ip, U32 const mls)
668
+ {
669
+ U32* const hashTable = ms->hashTable;
670
+ const U32 hashLog = cParams->hashLog;
671
+ U32* const chainTable = ms->chainTable;
672
+ const U32 chainMask = (1 << cParams->chainLog) - 1;
673
+ const BYTE* const base = ms->window.base;
674
+ const U32 target = (U32)(ip - base);
675
+ U32 idx = ms->nextToUpdate;
676
+
677
+ while(idx < target) { /* catch up */
678
+ size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
679
+ NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
680
+ hashTable[h] = idx;
681
+ idx++;
682
+ }
683
+
684
+ ms->nextToUpdate = target;
685
+ return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
686
+ }
687
+
688
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
689
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
690
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
691
+ }
692
+
693
+ /* inlining is important to hardwire a hot branch (template emulation) */
694
+ FORCE_INLINE_TEMPLATE
695
+ size_t ZSTD_HcFindBestMatch_generic (
696
+ ZSTD_matchState_t* ms,
697
+ const BYTE* const ip, const BYTE* const iLimit,
698
+ size_t* offsetPtr,
699
+ const U32 mls, const ZSTD_dictMode_e dictMode)
700
+ {
701
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
702
+ U32* const chainTable = ms->chainTable;
703
+ const U32 chainSize = (1 << cParams->chainLog);
704
+ const U32 chainMask = chainSize-1;
705
+ const BYTE* const base = ms->window.base;
706
+ const BYTE* const dictBase = ms->window.dictBase;
707
+ const U32 dictLimit = ms->window.dictLimit;
708
+ const BYTE* const prefixStart = base + dictLimit;
709
+ const BYTE* const dictEnd = dictBase + dictLimit;
710
+ const U32 curr = (U32)(ip-base);
711
+ const U32 maxDistance = 1U << cParams->windowLog;
712
+ const U32 lowestValid = ms->window.lowLimit;
713
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
714
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
715
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
716
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
717
+ U32 nbAttempts = 1U << cParams->searchLog;
718
+ size_t ml=4-1;
719
+
720
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
721
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
722
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
723
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
724
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
725
+
726
+ U32 matchIndex;
727
+
728
+ if (dictMode == ZSTD_dedicatedDictSearch) {
729
+ const U32* entry = &dms->hashTable[ddsIdx];
730
+ PREFETCH_L1(entry);
731
+ }
732
+
733
+ /* HC4 match finder */
734
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
735
+
736
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
737
+ size_t currentMl=0;
738
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
739
+ const BYTE* const match = base + matchIndex;
740
+ assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
741
+ if (match[ml] == ip[ml]) /* potentially better */
742
+ currentMl = ZSTD_count(ip, match, iLimit);
743
+ } else {
744
+ const BYTE* const match = dictBase + matchIndex;
745
+ assert(match+4 <= dictEnd);
746
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
747
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
748
+ }
749
+
750
+ /* save best solution */
751
+ if (currentMl > ml) {
752
+ ml = currentMl;
753
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
754
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
755
+ }
756
+
757
+ if (matchIndex <= minChain) break;
758
+ matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
759
+ }
760
+
761
+ if (dictMode == ZSTD_dedicatedDictSearch) {
762
+ ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
763
+ ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
764
+ } else if (dictMode == ZSTD_dictMatchState) {
765
+ const U32* const dmsChainTable = dms->chainTable;
766
+ const U32 dmsChainSize = (1 << dms->cParams.chainLog);
767
+ const U32 dmsChainMask = dmsChainSize - 1;
768
+ const U32 dmsLowestIndex = dms->window.dictLimit;
769
+ const BYTE* const dmsBase = dms->window.base;
770
+ const BYTE* const dmsEnd = dms->window.nextSrc;
771
+ const U32 dmsSize = (U32)(dmsEnd - dmsBase);
772
+ const U32 dmsIndexDelta = dictLimit - dmsSize;
773
+ const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
774
+
775
+ matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
776
+
777
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
778
+ size_t currentMl=0;
779
+ const BYTE* const match = dmsBase + matchIndex;
780
+ assert(match+4 <= dmsEnd);
781
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
782
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
783
+
784
+ /* save best solution */
785
+ if (currentMl > ml) {
786
+ ml = currentMl;
787
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
788
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
789
+ }
790
+
791
+ if (matchIndex <= dmsMinChain) break;
792
+
793
+ matchIndex = dmsChainTable[matchIndex & dmsChainMask];
794
+ }
795
+ }
796
+
797
+ return ml;
798
+ }
799
+
800
+
801
+ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
802
+ ZSTD_matchState_t* ms,
803
+ const BYTE* ip, const BYTE* const iLimit,
804
+ size_t* offsetPtr)
805
+ {
806
+ switch(ms->cParams.minMatch)
807
+ {
808
+ default : /* includes case 3 */
809
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
810
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
811
+ case 7 :
812
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
813
+ }
814
+ }
815
+
816
+
817
+ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
818
+ ZSTD_matchState_t* ms,
819
+ const BYTE* ip, const BYTE* const iLimit,
820
+ size_t* offsetPtr)
821
+ {
822
+ switch(ms->cParams.minMatch)
823
+ {
824
+ default : /* includes case 3 */
825
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
826
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
827
+ case 7 :
828
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
829
+ }
830
+ }
831
+
832
+
833
+ static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
834
+ ZSTD_matchState_t* ms,
835
+ const BYTE* ip, const BYTE* const iLimit,
836
+ size_t* offsetPtr)
837
+ {
838
+ switch(ms->cParams.minMatch)
839
+ {
840
+ default : /* includes case 3 */
841
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
842
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
843
+ case 7 :
844
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
845
+ }
846
+ }
847
+
848
+
849
+ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
850
+ ZSTD_matchState_t* ms,
851
+ const BYTE* ip, const BYTE* const iLimit,
852
+ size_t* offsetPtr)
853
+ {
854
+ switch(ms->cParams.minMatch)
855
+ {
856
+ default : /* includes case 3 */
857
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
858
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
859
+ case 7 :
860
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
861
+ }
862
+ }
863
+
864
+ /* *********************************
865
+ * (SIMD) Row-based matchfinder
866
+ ***********************************/
867
+ /* Constants for row-based hash */
868
+ #define ZSTD_ROW_HASH_TAG_OFFSET 1 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
869
+ #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
870
+ #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
871
+
872
+ #define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
873
+
874
+ typedef U32 ZSTD_VecMask; /* Clarifies when we are interacting with a U32 representing a mask of matches */
875
+
876
+ #if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */
877
+
878
+ #include <emmintrin.h>
879
+ typedef __m128i ZSTD_Vec128;
880
+
881
+ /* Returns a 128-bit container with 128-bits from src */
882
+ static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
883
+ return _mm_loadu_si128((ZSTD_Vec128 const*)src);
884
+ }
885
+
886
+ /* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */
887
+ static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
888
+ return _mm_set1_epi8((char)val);
889
+ }
890
+
891
+ /* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask
892
+ * into a 32-bit mask that is the MSB of each byte.
893
+ * */
894
+ static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
895
+ return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
896
+ }
897
+
898
+ typedef struct {
899
+ __m128i fst;
900
+ __m128i snd;
901
+ } ZSTD_Vec256;
902
+
903
+ static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
904
+ ZSTD_Vec256 v;
905
+ v.fst = ZSTD_Vec128_read(ptr);
906
+ v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
907
+ return v;
908
+ }
909
+
910
+ static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
911
+ ZSTD_Vec256 v;
912
+ v.fst = ZSTD_Vec128_set8(val);
913
+ v.snd = ZSTD_Vec128_set8(val);
914
+ return v;
915
+ }
916
+
917
+ static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
918
+ ZSTD_VecMask fstMask;
919
+ ZSTD_VecMask sndMask;
920
+ fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
921
+ sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
922
+ return fstMask | (sndMask << 16);
923
+ }
924
+
925
+ #elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */
926
+
927
+ #include <arm_neon.h>
928
+ typedef uint8x16_t ZSTD_Vec128;
929
+
930
+ static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
931
+ return vld1q_u8((const BYTE* const)src);
932
+ }
933
+
934
+ static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
935
+ return vdupq_n_u8(val);
936
+ }
937
+
938
+ /* Mimics '_mm_movemask_epi8()' from SSE */
939
+ static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) {
940
+ /* Shift out everything but the MSB bits in each byte */
941
+ uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7));
942
+ /* Merge the even lanes together with vsra (right shift and add) */
943
+ uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7));
944
+ uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14));
945
+ uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28));
946
+ /* Extract the low 8 bits from each lane, merge */
947
+ return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8);
948
+ }
949
+
950
+ static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
951
+ return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y));
952
+ }
953
+
954
+ typedef struct {
955
+ uint8x16_t fst;
956
+ uint8x16_t snd;
957
+ } ZSTD_Vec256;
958
+
959
+ static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) {
960
+ ZSTD_Vec256 v;
961
+ v.fst = ZSTD_Vec128_read(ptr);
962
+ v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1);
963
+ return v;
964
+ }
965
+
966
+ static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
967
+ ZSTD_Vec256 v;
968
+ v.fst = ZSTD_Vec128_set8(val);
969
+ v.snd = ZSTD_Vec128_set8(val);
970
+ return v;
971
+ }
972
+
973
+ static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
974
+ ZSTD_VecMask fstMask;
975
+ ZSTD_VecMask sndMask;
976
+ fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst);
977
+ sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd);
978
+ return fstMask | (sndMask << 16);
979
+ }
980
+
981
+ #else /* Scalar fallback version */
982
+
983
+ #define VEC128_NB_SIZE_T (16 / sizeof(size_t))
984
+ typedef struct {
985
+ size_t vec[VEC128_NB_SIZE_T];
986
+ } ZSTD_Vec128;
987
+
988
+ static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) {
989
+ ZSTD_Vec128 ret;
990
+ ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t));
991
+ return ret;
992
+ }
993
+
994
+ static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) {
995
+ ZSTD_Vec128 ret = { {0} };
996
+ int startBit = sizeof(size_t) * 8 - 8;
997
+ for (;startBit >= 0; startBit -= 8) {
998
+ unsigned j = 0;
999
+ for (;j < VEC128_NB_SIZE_T; ++j) {
1000
+ ret.vec[j] |= ((size_t)val << startBit);
1001
+ }
1002
+ }
1003
+ return ret;
1004
+ }
1005
+
1006
+ /* Compare x to y, byte by byte, generating a "matches" bitfield */
1007
+ static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) {
1008
+ ZSTD_VecMask res = 0;
1009
+ unsigned i = 0;
1010
+ unsigned l = 0;
1011
+ for (; i < VEC128_NB_SIZE_T; ++i) {
1012
+ const size_t cmp1 = x.vec[i];
1013
+ const size_t cmp2 = y.vec[i];
1014
+ unsigned j = 0;
1015
+ for (; j < sizeof(size_t); ++j, ++l) {
1016
+ if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
1017
+ res |= ((U32)1 << (j+i*sizeof(size_t)));
1018
+ }
1019
+ }
1020
+ }
1021
+ return res;
1022
+ }
1023
+
1024
+ #define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T
1025
+ typedef struct {
1026
+ size_t vec[VEC256_NB_SIZE_T];
1027
+ } ZSTD_Vec256;
1028
+
1029
+ static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) {
1030
+ ZSTD_Vec256 ret;
1031
+ ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t));
1032
+ return ret;
1033
+ }
1034
+
1035
+ static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) {
1036
+ ZSTD_Vec256 ret = { {0} };
1037
+ int startBit = sizeof(size_t) * 8 - 8;
1038
+ for (;startBit >= 0; startBit -= 8) {
1039
+ unsigned j = 0;
1040
+ for (;j < VEC256_NB_SIZE_T; ++j) {
1041
+ ret.vec[j] |= ((size_t)val << startBit);
1042
+ }
1043
+ }
1044
+ return ret;
1045
+ }
1046
+
1047
+ /* Compare x to y, byte by byte, generating a "matches" bitfield */
1048
+ static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) {
1049
+ ZSTD_VecMask res = 0;
1050
+ unsigned i = 0;
1051
+ unsigned l = 0;
1052
+ for (; i < VEC256_NB_SIZE_T; ++i) {
1053
+ const size_t cmp1 = x.vec[i];
1054
+ const size_t cmp2 = y.vec[i];
1055
+ unsigned j = 0;
1056
+ for (; j < sizeof(size_t); ++j, ++l) {
1057
+ if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) {
1058
+ res |= ((U32)1 << (j+i*sizeof(size_t)));
1059
+ }
1060
+ }
1061
+ }
1062
+ return res;
1063
+ }
1064
+
1065
+ #endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */
1066
+
1067
+ /* ZSTD_VecMask_next():
1068
+ * Starting from the LSB, returns the idx of the next non-zero bit.
1069
+ * Basically counting the nb of trailing zeroes.
1070
+ */
1071
+ static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
1072
+ # if defined(_MSC_VER) /* Visual */
1073
+ unsigned long r=0;
1074
+ return _BitScanForward(&r, val) ? (U32)r : 0;
1075
+ # elif defined(__GNUC__) && (__GNUC__ >= 3)
1076
+ return (U32)__builtin_ctz(val);
1077
+ # else
1078
+ /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */
1079
+ static const U32 multiplyDeBruijnBitPosition[32] =
1080
+ {
1081
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
1082
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
1083
+ };
1084
+ return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27];
1085
+ # endif
1086
+ }
1087
+
1088
+ /* ZSTD_VecMask_rotateRight():
1089
+ * Rotates a bitfield to the right by "rotation" bits.
1090
+ * If the rotation is greater than totalBits, the returned mask is 0.
1091
+ */
1092
+ FORCE_INLINE_TEMPLATE ZSTD_VecMask
1093
+ ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) {
1094
+ if (rotation == 0)
1095
+ return mask;
1096
+ switch (totalBits) {
1097
+ default:
1098
+ assert(0);
1099
+ case 16:
1100
+ return (mask >> rotation) | (U16)(mask << (16 - rotation));
1101
+ case 32:
1102
+ return (mask >> rotation) | (U32)(mask << (32 - rotation));
1103
+ }
1104
+ }
1105
+
1106
+ /* ZSTD_row_nextIndex():
1107
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
1108
+ * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
1109
+ */
1110
+ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
1111
+ U32 const next = (*tagRow - 1) & rowMask;
1112
+ *tagRow = (BYTE)next;
1113
+ return next;
1114
+ }
1115
+
1116
+ /* ZSTD_isAligned():
1117
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
1118
+ */
1119
+ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
1120
+ assert((align & (align - 1)) == 0);
1121
+ return (((size_t)ptr) & (align - 1)) == 0;
1122
+ }
1123
+
1124
+ /* ZSTD_row_prefetch():
1125
+ * Performs prefetching for the hashTable and tagTable at a given row.
1126
+ */
1127
+ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
1128
+ PREFETCH_L1(hashTable + relRow);
1129
+ if (rowLog == 5) {
1130
+ PREFETCH_L1(hashTable + relRow + 16);
1131
+ }
1132
+ PREFETCH_L1(tagTable + relRow);
1133
+ assert(rowLog == 4 || rowLog == 5);
1134
+ assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */
1135
+ assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */
1136
+ }
1137
+
1138
+ /* ZSTD_row_fillHashCache():
1139
+ * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
1140
+ * but not beyond iLimit.
1141
+ */
1142
+ static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
1143
+ U32 const rowLog, U32 const mls,
1144
+ U32 idx, const BYTE* const iLimit)
1145
+ {
1146
+ U32 const* const hashTable = ms->hashTable;
1147
+ U16 const* const tagTable = ms->tagTable;
1148
+ U32 const hashLog = ms->rowHashLog;
1149
+ U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
1150
+ U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
1151
+
1152
+ for (; idx < lim; ++idx) {
1153
+ U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1154
+ U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1155
+ ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
1156
+ ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
1157
+ }
1158
+
1159
+ DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
1160
+ ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
1161
+ ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
1162
+ }
1163
+
1164
+ /* ZSTD_row_nextCachedHash():
1165
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
1166
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
1167
+ */
1168
+ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
1169
+ U16 const* tagTable, BYTE const* base,
1170
+ U32 idx, U32 const hashLog,
1171
+ U32 const rowLog, U32 const mls)
1172
+ {
1173
+ U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1174
+ U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1175
+ ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
1176
+ { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
1177
+ cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
1178
+ return hash;
1179
+ }
1180
+ }
1181
+
1182
+ /* ZSTD_row_update_internal():
1183
+ * Inserts the byte at ip into the appropriate position in the hash table.
1184
+ * Determines the relative row, and the position within the {16, 32} entry row to insert at.
1185
+ */
1186
+ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
1187
+ U32 const mls, U32 const rowLog,
1188
+ U32 const rowMask, U32 const useCache)
1189
+ {
1190
+ U32* const hashTable = ms->hashTable;
1191
+ U16* const tagTable = ms->tagTable;
1192
+ U32 const hashLog = ms->rowHashLog;
1193
+ const BYTE* const base = ms->window.base;
1194
+ const U32 target = (U32)(ip - base);
1195
+ U32 idx = ms->nextToUpdate;
1196
+
1197
+ DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target);
1198
+ for (; idx < target; ++idx) {
1199
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls)
1200
+ : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1201
+ U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1202
+ U32* const row = hashTable + relRow;
1203
+ BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
1204
+ Explicit cast allows us to get exact desired position within each row */
1205
+ U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
1206
+
1207
+ assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
1208
+ ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
1209
+ row[pos] = idx;
1210
+ }
1211
+ ms->nextToUpdate = target;
1212
+ }
1213
+
1214
+ /* ZSTD_row_update():
1215
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
1216
+ * processing.
1217
+ */
1218
+ void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
1219
+ const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
1220
+ const U32 rowMask = (1u << rowLog) - 1;
1221
+ const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
1222
+
1223
+ DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
1224
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
1225
+ }
1226
+
1227
+ /* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
1228
+ * the hash at the nth position in a row of the tagTable.
1229
+ */
1230
+ FORCE_INLINE_TEMPLATE
1231
+ ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) {
1232
+ ZSTD_VecMask matches = 0;
1233
+ if (rowEntries == 16) {
1234
+ ZSTD_Vec128 hashes = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
1235
+ ZSTD_Vec128 expandedTags = ZSTD_Vec128_set8(tag);
1236
+ matches = ZSTD_Vec128_cmpMask8(hashes, expandedTags);
1237
+ } else if (rowEntries == 32) {
1238
+ ZSTD_Vec256 hashes = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET);
1239
+ ZSTD_Vec256 expandedTags = ZSTD_Vec256_set8(tag);
1240
+ matches = ZSTD_Vec256_cmpMask8(hashes, expandedTags);
1241
+ } else {
1242
+ assert(0);
1243
+ }
1244
+ /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
1245
+ to match up with the actual layout of the entries within the hashTable */
1246
+ return ZSTD_VecMask_rotateRight(matches, head, rowEntries);
1247
+ }
1248
+
1249
+ /* The high-level approach of the SIMD row based match finder is as follows:
1250
+ * - Figure out where to insert the new entry:
1251
+ * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
1252
+ * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
1253
+ * which row to insert into.
1254
+ * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
1255
+ * be considered as a circular buffer with a "head" index that resides in the tagTable.
1256
+ * - Also insert the "tag" into the equivalent row and position in the tagTable.
1257
+ * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
1258
+ * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
1259
+ * for alignment/performance reasons, leaving some bytes unused.
1260
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
1261
+ * generate a bitfield that we can cycle through to check the collisions in the hash table.
1262
+ * - Pick the longest match.
1263
+ */
1264
+ FORCE_INLINE_TEMPLATE
1265
+ size_t ZSTD_RowFindBestMatch_generic (
1266
+ ZSTD_matchState_t* ms,
1267
+ const BYTE* const ip, const BYTE* const iLimit,
1268
+ size_t* offsetPtr,
1269
+ const U32 mls, const ZSTD_dictMode_e dictMode,
1270
+ const U32 rowLog)
1271
+ {
1272
+ U32* const hashTable = ms->hashTable;
1273
+ U16* const tagTable = ms->tagTable;
1274
+ U32* const hashCache = ms->hashCache;
1275
+ const U32 hashLog = ms->rowHashLog;
1276
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
1277
+ const BYTE* const base = ms->window.base;
1278
+ const BYTE* const dictBase = ms->window.dictBase;
1279
+ const U32 dictLimit = ms->window.dictLimit;
1280
+ const BYTE* const prefixStart = base + dictLimit;
1281
+ const BYTE* const dictEnd = dictBase + dictLimit;
1282
+ const U32 curr = (U32)(ip-base);
1283
+ const U32 maxDistance = 1U << cParams->windowLog;
1284
+ const U32 lowestValid = ms->window.lowLimit;
1285
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1286
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
1287
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
1288
+ const U32 rowEntries = (1U << rowLog);
1289
+ const U32 rowMask = rowEntries - 1;
1290
+ const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
1291
+ U32 nbAttempts = 1U << cappedSearchLog;
1292
+ size_t ml=4-1;
1293
+
1294
+ /* DMS/DDS variables that may be referenced laster */
1295
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
1296
+ size_t ddsIdx;
1297
+ U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
1298
+ U32 dmsTag;
1299
+ U32* dmsRow;
1300
+ BYTE* dmsTagRow;
1301
+
1302
+ if (dictMode == ZSTD_dedicatedDictSearch) {
1303
+ const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
1304
+ { /* Prefetch DDS hashtable entry */
1305
+ ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
1306
+ PREFETCH_L1(&dms->hashTable[ddsIdx]);
1307
+ }
1308
+ ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
1309
+ }
1310
+
1311
+ if (dictMode == ZSTD_dictMatchState) {
1312
+ /* Prefetch DMS rows */
1313
+ U32* const dmsHashTable = dms->hashTable;
1314
+ U16* const dmsTagTable = dms->tagTable;
1315
+ U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1316
+ U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1317
+ dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
1318
+ dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
1319
+ dmsRow = dmsHashTable + dmsRelRow;
1320
+ ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
1321
+ }
1322
+
1323
+ /* Update the hashTable and tagTable up to (but not including) ip */
1324
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1325
+ { /* Get the hash for ip, compute the appropriate row */
1326
+ U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
1327
+ U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1328
+ U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
1329
+ U32* const row = hashTable + relRow;
1330
+ BYTE* tagRow = (BYTE*)(tagTable + relRow);
1331
+ U32 const head = *tagRow & rowMask;
1332
+ U32 matchBuffer[32 /* maximum nb entries per row */];
1333
+ size_t numMatches = 0;
1334
+ size_t currMatch = 0;
1335
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
1336
+
1337
+ /* Cycle through the matches and prefetch */
1338
+ for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1339
+ U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
1340
+ U32 const matchIndex = row[matchPos];
1341
+ assert(numMatches < rowEntries);
1342
+ if (matchIndex < lowLimit)
1343
+ break;
1344
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
1345
+ PREFETCH_L1(base + matchIndex);
1346
+ } else {
1347
+ PREFETCH_L1(dictBase + matchIndex);
1348
+ }
1349
+ matchBuffer[numMatches++] = matchIndex;
1350
+ }
1351
+
1352
+ /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
1353
+ in ZSTD_row_update_internal() at the next search. */
1354
+ {
1355
+ U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
1356
+ tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
1357
+ row[pos] = ms->nextToUpdate++;
1358
+ }
1359
+
1360
+ /* Return the longest match */
1361
+ for (; currMatch < numMatches; ++currMatch) {
1362
+ U32 const matchIndex = matchBuffer[currMatch];
1363
+ size_t currentMl=0;
1364
+ assert(matchIndex < curr);
1365
+ assert(matchIndex >= lowLimit);
1366
+
1367
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
1368
+ const BYTE* const match = base + matchIndex;
1369
+ assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
1370
+ if (match[ml] == ip[ml]) /* potentially better */
1371
+ currentMl = ZSTD_count(ip, match, iLimit);
1372
+ } else {
1373
+ const BYTE* const match = dictBase + matchIndex;
1374
+ assert(match+4 <= dictEnd);
1375
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
1376
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
1377
+ }
1378
+
1379
+ /* Save best solution */
1380
+ if (currentMl > ml) {
1381
+ ml = currentMl;
1382
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
1383
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
1384
+ }
1385
+ }
1386
+ }
1387
+
1388
+ if (dictMode == ZSTD_dedicatedDictSearch) {
1389
+ ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
1390
+ ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
1391
+ } else if (dictMode == ZSTD_dictMatchState) {
1392
+ /* TODO: Measure and potentially add prefetching to DMS */
1393
+ const U32 dmsLowestIndex = dms->window.dictLimit;
1394
+ const BYTE* const dmsBase = dms->window.base;
1395
+ const BYTE* const dmsEnd = dms->window.nextSrc;
1396
+ const U32 dmsSize = (U32)(dmsEnd - dmsBase);
1397
+ const U32 dmsIndexDelta = dictLimit - dmsSize;
1398
+
1399
+ { U32 const head = *dmsTagRow & rowMask;
1400
+ U32 matchBuffer[32 /* maximum nb row entries */];
1401
+ size_t numMatches = 0;
1402
+ size_t currMatch = 0;
1403
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
1404
+
1405
+ for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1406
+ U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
1407
+ U32 const matchIndex = dmsRow[matchPos];
1408
+ if (matchIndex < dmsLowestIndex)
1409
+ break;
1410
+ PREFETCH_L1(dmsBase + matchIndex);
1411
+ matchBuffer[numMatches++] = matchIndex;
1412
+ }
1413
+
1414
+ /* Return the longest match */
1415
+ for (; currMatch < numMatches; ++currMatch) {
1416
+ U32 const matchIndex = matchBuffer[currMatch];
1417
+ size_t currentMl=0;
1418
+ assert(matchIndex >= dmsLowestIndex);
1419
+ assert(matchIndex < curr);
1420
+
1421
+ { const BYTE* const match = dmsBase + matchIndex;
1422
+ assert(match+4 <= dmsEnd);
1423
+ if (MEM_read32(match) == MEM_read32(ip))
1424
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
1425
+ }
1426
+
1427
+ if (currentMl > ml) {
1428
+ ml = currentMl;
1429
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
1430
+ if (ip+currentMl == iLimit) break;
1431
+ }
1432
+ }
1433
+ }
1434
+ }
1435
+ return ml;
1436
+ }
1437
+
1438
+ /* Inlining is important to hardwire a hot branch (template emulation) */
1439
+ FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS (
1440
+ ZSTD_matchState_t* ms,
1441
+ const BYTE* ip, const BYTE* const iLimit,
1442
+ const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog)
1443
+ {
1444
+ switch(ms->cParams.minMatch)
1445
+ {
1446
+ default : /* includes case 3 */
1447
+ case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog);
1448
+ case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog);
1449
+ case 7 :
1450
+ case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog);
1451
+ }
1452
+ }
1453
+
1454
+ FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog (
1455
+ ZSTD_matchState_t* ms,
1456
+ const BYTE* ip, const BYTE* const iLimit,
1457
+ size_t* offsetPtr)
1458
+ {
1459
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
1460
+ switch(cappedSearchLog)
1461
+ {
1462
+ default :
1463
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4);
1464
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5);
1465
+ }
1466
+ }
1467
+
1468
+ FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog(
1469
+ ZSTD_matchState_t* ms,
1470
+ const BYTE* ip, const BYTE* const iLimit,
1471
+ size_t* offsetPtr)
1472
+ {
1473
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
1474
+ switch(cappedSearchLog)
1475
+ {
1476
+ default :
1477
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4);
1478
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5);
1479
+ }
1480
+ }
1481
+
1482
+ FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog(
1483
+ ZSTD_matchState_t* ms,
1484
+ const BYTE* ip, const BYTE* const iLimit,
1485
+ size_t* offsetPtr)
1486
+ {
1487
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
1488
+ switch(cappedSearchLog)
1489
+ {
1490
+ default :
1491
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4);
1492
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5);
1493
+ }
1494
+ }
1495
+
1496
+ FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog (
1497
+ ZSTD_matchState_t* ms,
1498
+ const BYTE* ip, const BYTE* const iLimit,
1499
+ size_t* offsetPtr)
1500
+ {
1501
+ const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5);
1502
+ switch(cappedSearchLog)
1503
+ {
1504
+ default :
1505
+ case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4);
1506
+ case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5);
1507
+ }
1508
+ }
1509
+
1510
+
1511
+ /* *******************************
1512
+ * Common parser - lazy strategy
1513
+ *********************************/
1514
+ typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
1515
+
1516
+ FORCE_INLINE_TEMPLATE size_t
1517
+ ZSTD_compressBlock_lazy_generic(
1518
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
1519
+ U32 rep[ZSTD_REP_NUM],
1520
+ const void* src, size_t srcSize,
1521
+ const searchMethod_e searchMethod, const U32 depth,
1522
+ ZSTD_dictMode_e const dictMode)
1523
+ {
1524
+ const BYTE* const istart = (const BYTE*)src;
1525
+ const BYTE* ip = istart;
1526
+ const BYTE* anchor = istart;
1527
+ const BYTE* const iend = istart + srcSize;
1528
+ const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
1529
+ const BYTE* const base = ms->window.base;
1530
+ const U32 prefixLowestIndex = ms->window.dictLimit;
1531
+ const BYTE* const prefixLowest = base + prefixLowestIndex;
1532
+ const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
1533
+
1534
+ typedef size_t (*searchMax_f)(
1535
+ ZSTD_matchState_t* ms,
1536
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
1537
+
1538
+ /**
1539
+ * This table is indexed first by the four ZSTD_dictMode_e values, and then
1540
+ * by the two searchMethod_e values. NULLs are placed for configurations
1541
+ * that should never occur (extDict modes go to the other implementation
1542
+ * below and there is no DDSS for binary tree search yet).
1543
+ */
1544
+ const searchMax_f searchFuncs[4][3] = {
1545
+ {
1546
+ ZSTD_HcFindBestMatch_selectMLS,
1547
+ ZSTD_BtFindBestMatch_selectMLS,
1548
+ ZSTD_RowFindBestMatch_selectRowLog
1549
+ },
1550
+ {
1551
+ NULL,
1552
+ NULL,
1553
+ NULL
1554
+ },
1555
+ {
1556
+ ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
1557
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS,
1558
+ ZSTD_RowFindBestMatch_dictMatchState_selectRowLog
1559
+ },
1560
+ {
1561
+ ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
1562
+ NULL,
1563
+ ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog
1564
+ }
1565
+ };
1566
+
1567
+ searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod];
1568
+ U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
1569
+
1570
+ const int isDMS = dictMode == ZSTD_dictMatchState;
1571
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
1572
+ const int isDxS = isDMS || isDDS;
1573
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
1574
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
1575
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
1576
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
1577
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
1578
+ const U32 dictIndexDelta = isDxS ?
1579
+ prefixLowestIndex - (U32)(dictEnd - dictBase) :
1580
+ 0;
1581
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
1582
+
1583
+ assert(searchMax != NULL);
1584
+
1585
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
1586
+ ip += (dictAndPrefixLength == 0);
1587
+ if (dictMode == ZSTD_noDict) {
1588
+ U32 const curr = (U32)(ip - base);
1589
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
1590
+ U32 const maxRep = curr - windowLow;
1591
+ if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1592
+ if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1593
+ }
1594
+ if (isDxS) {
1595
+ /* dictMatchState repCode checks don't currently handle repCode == 0
1596
+ * disabling. */
1597
+ assert(offset_1 <= dictAndPrefixLength);
1598
+ assert(offset_2 <= dictAndPrefixLength);
1599
+ }
1600
+
1601
+ if (searchMethod == search_rowHash) {
1602
+ ZSTD_row_fillHashCache(ms, base, rowLog,
1603
+ MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1604
+ ms->nextToUpdate, ilimit);
1605
+ }
1606
+
1607
+ /* Match Loop */
1608
+ #if defined(__GNUC__) && defined(__x86_64__)
1609
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
1610
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
1611
+ */
1612
+ __asm__(".p2align 5");
1613
+ #endif
1614
+ while (ip < ilimit) {
1615
+ size_t matchLength=0;
1616
+ size_t offset=0;
1617
+ const BYTE* start=ip+1;
1618
+
1619
+ /* check repCode */
1620
+ if (isDxS) {
1621
+ const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
1622
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
1623
+ && repIndex < prefixLowestIndex) ?
1624
+ dictBase + (repIndex - dictIndexDelta) :
1625
+ base + repIndex;
1626
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1627
+ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1628
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1629
+ matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1630
+ if (depth==0) goto _storeSequence;
1631
+ }
1632
+ }
1633
+ if ( dictMode == ZSTD_noDict
1634
+ && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
1635
+ matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1636
+ if (depth==0) goto _storeSequence;
1637
+ }
1638
+
1639
+ /* first search (depth 0) */
1640
+ { size_t offsetFound = 999999999;
1641
+ size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
1642
+ if (ml2 > matchLength)
1643
+ matchLength = ml2, start = ip, offset=offsetFound;
1644
+ }
1645
+
1646
+ if (matchLength < 4) {
1647
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1648
+ continue;
1649
+ }
1650
+
1651
+ /* let's try to find a better solution */
1652
+ if (depth>=1)
1653
+ while (ip<ilimit) {
1654
+ ip ++;
1655
+ if ( (dictMode == ZSTD_noDict)
1656
+ && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1657
+ size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
1658
+ int const gain2 = (int)(mlRep * 3);
1659
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1660
+ if ((mlRep >= 4) && (gain2 > gain1))
1661
+ matchLength = mlRep, offset = 0, start = ip;
1662
+ }
1663
+ if (isDxS) {
1664
+ const U32 repIndex = (U32)(ip - base) - offset_1;
1665
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
1666
+ dictBase + (repIndex - dictIndexDelta) :
1667
+ base + repIndex;
1668
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1669
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1670
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1671
+ size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1672
+ int const gain2 = (int)(mlRep * 3);
1673
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1674
+ if ((mlRep >= 4) && (gain2 > gain1))
1675
+ matchLength = mlRep, offset = 0, start = ip;
1676
+ }
1677
+ }
1678
+ { size_t offset2=999999999;
1679
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1680
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1681
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1682
+ if ((ml2 >= 4) && (gain2 > gain1)) {
1683
+ matchLength = ml2, offset = offset2, start = ip;
1684
+ continue; /* search a better one */
1685
+ } }
1686
+
1687
+ /* let's find an even better one */
1688
+ if ((depth==2) && (ip<ilimit)) {
1689
+ ip ++;
1690
+ if ( (dictMode == ZSTD_noDict)
1691
+ && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1692
+ size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
1693
+ int const gain2 = (int)(mlRep * 4);
1694
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1695
+ if ((mlRep >= 4) && (gain2 > gain1))
1696
+ matchLength = mlRep, offset = 0, start = ip;
1697
+ }
1698
+ if (isDxS) {
1699
+ const U32 repIndex = (U32)(ip - base) - offset_1;
1700
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
1701
+ dictBase + (repIndex - dictIndexDelta) :
1702
+ base + repIndex;
1703
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1704
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1705
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1706
+ size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1707
+ int const gain2 = (int)(mlRep * 4);
1708
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1709
+ if ((mlRep >= 4) && (gain2 > gain1))
1710
+ matchLength = mlRep, offset = 0, start = ip;
1711
+ }
1712
+ }
1713
+ { size_t offset2=999999999;
1714
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1715
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1716
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1717
+ if ((ml2 >= 4) && (gain2 > gain1)) {
1718
+ matchLength = ml2, offset = offset2, start = ip;
1719
+ continue;
1720
+ } } }
1721
+ break; /* nothing found : store previous solution */
1722
+ }
1723
+
1724
+ /* NOTE:
1725
+ * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
1726
+ * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
1727
+ * overflows the pointer, which is undefined behavior.
1728
+ */
1729
+ /* catch up */
1730
+ if (offset) {
1731
+ if (dictMode == ZSTD_noDict) {
1732
+ while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
1733
+ && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
1734
+ { start--; matchLength++; }
1735
+ }
1736
+ if (isDxS) {
1737
+ U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1738
+ const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
1739
+ const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
1740
+ while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1741
+ }
1742
+ offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1743
+ }
1744
+ /* store sequence */
1745
+ _storeSequence:
1746
+ { size_t const litLength = start - anchor;
1747
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1748
+ anchor = ip = start + matchLength;
1749
+ }
1750
+
1751
+ /* check immediate repcode */
1752
+ if (isDxS) {
1753
+ while (ip <= ilimit) {
1754
+ U32 const current2 = (U32)(ip-base);
1755
+ U32 const repIndex = current2 - offset_2;
1756
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
1757
+ dictBase - dictIndexDelta + repIndex :
1758
+ base + repIndex;
1759
+ if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
1760
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1761
+ const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
1762
+ matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
1763
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
1764
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1765
+ ip += matchLength;
1766
+ anchor = ip;
1767
+ continue;
1768
+ }
1769
+ break;
1770
+ }
1771
+ }
1772
+
1773
+ if (dictMode == ZSTD_noDict) {
1774
+ while ( ((ip <= ilimit) & (offset_2>0))
1775
+ && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
1776
+ /* store sequence */
1777
+ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1778
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
1779
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1780
+ ip += matchLength;
1781
+ anchor = ip;
1782
+ continue; /* faster when present ... (?) */
1783
+ } } }
1784
+
1785
+ /* Save reps for next block */
1786
+ rep[0] = offset_1 ? offset_1 : savedOffset;
1787
+ rep[1] = offset_2 ? offset_2 : savedOffset;
1788
+
1789
+ /* Return the last literals size */
1790
+ return (size_t)(iend - anchor);
1791
+ }
1792
+
1793
+
1794
+ size_t ZSTD_compressBlock_btlazy2(
1795
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1796
+ void const* src, size_t srcSize)
1797
+ {
1798
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1799
+ }
1800
+
1801
+ size_t ZSTD_compressBlock_lazy2(
1802
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1803
+ void const* src, size_t srcSize)
1804
+ {
1805
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
1806
+ }
1807
+
1808
+ size_t ZSTD_compressBlock_lazy(
1809
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1810
+ void const* src, size_t srcSize)
1811
+ {
1812
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
1813
+ }
1814
+
1815
+ size_t ZSTD_compressBlock_greedy(
1816
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1817
+ void const* src, size_t srcSize)
1818
+ {
1819
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
1820
+ }
1821
+
1822
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1823
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1824
+ void const* src, size_t srcSize)
1825
+ {
1826
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1827
+ }
1828
+
1829
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
1830
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1831
+ void const* src, size_t srcSize)
1832
+ {
1833
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
1834
+ }
1835
+
1836
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
1837
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1838
+ void const* src, size_t srcSize)
1839
+ {
1840
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
1841
+ }
1842
+
1843
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
1844
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1845
+ void const* src, size_t srcSize)
1846
+ {
1847
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1848
+ }
1849
+
1850
+
1851
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1852
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1853
+ void const* src, size_t srcSize)
1854
+ {
1855
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1856
+ }
1857
+
1858
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1859
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1860
+ void const* src, size_t srcSize)
1861
+ {
1862
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1863
+ }
1864
+
1865
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1866
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1867
+ void const* src, size_t srcSize)
1868
+ {
1869
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1870
+ }
1871
+
1872
+ /* Row-based matchfinder */
1873
+ size_t ZSTD_compressBlock_lazy2_row(
1874
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1875
+ void const* src, size_t srcSize)
1876
+ {
1877
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
1878
+ }
1879
+
1880
+ size_t ZSTD_compressBlock_lazy_row(
1881
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1882
+ void const* src, size_t srcSize)
1883
+ {
1884
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
1885
+ }
1886
+
1887
+ size_t ZSTD_compressBlock_greedy_row(
1888
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1889
+ void const* src, size_t srcSize)
1890
+ {
1891
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
1892
+ }
1893
+
1894
+ size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
1895
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1896
+ void const* src, size_t srcSize)
1897
+ {
1898
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
1899
+ }
1900
+
1901
+ size_t ZSTD_compressBlock_lazy_dictMatchState_row(
1902
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1903
+ void const* src, size_t srcSize)
1904
+ {
1905
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
1906
+ }
1907
+
1908
+ size_t ZSTD_compressBlock_greedy_dictMatchState_row(
1909
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1910
+ void const* src, size_t srcSize)
1911
+ {
1912
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
1913
+ }
1914
+
1915
+
1916
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
1917
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1918
+ void const* src, size_t srcSize)
1919
+ {
1920
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
1921
+ }
1922
+
1923
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
1924
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1925
+ void const* src, size_t srcSize)
1926
+ {
1927
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
1928
+ }
1929
+
1930
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
1931
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1932
+ void const* src, size_t srcSize)
1933
+ {
1934
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
1935
+ }
1936
+
1937
+ FORCE_INLINE_TEMPLATE
1938
+ size_t ZSTD_compressBlock_lazy_extDict_generic(
1939
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
1940
+ U32 rep[ZSTD_REP_NUM],
1941
+ const void* src, size_t srcSize,
1942
+ const searchMethod_e searchMethod, const U32 depth)
1943
+ {
1944
+ const BYTE* const istart = (const BYTE*)src;
1945
+ const BYTE* ip = istart;
1946
+ const BYTE* anchor = istart;
1947
+ const BYTE* const iend = istart + srcSize;
1948
+ const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
1949
+ const BYTE* const base = ms->window.base;
1950
+ const U32 dictLimit = ms->window.dictLimit;
1951
+ const BYTE* const prefixStart = base + dictLimit;
1952
+ const BYTE* const dictBase = ms->window.dictBase;
1953
+ const BYTE* const dictEnd = dictBase + dictLimit;
1954
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
1955
+ const U32 windowLog = ms->cParams.windowLog;
1956
+ const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
1957
+
1958
+ typedef size_t (*searchMax_f)(
1959
+ ZSTD_matchState_t* ms,
1960
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
1961
+ const searchMax_f searchFuncs[3] = {
1962
+ ZSTD_HcFindBestMatch_extDict_selectMLS,
1963
+ ZSTD_BtFindBestMatch_extDict_selectMLS,
1964
+ ZSTD_RowFindBestMatch_extDict_selectRowLog
1965
+ };
1966
+ searchMax_f searchMax = searchFuncs[(int)searchMethod];
1967
+ U32 offset_1 = rep[0], offset_2 = rep[1];
1968
+
1969
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
1970
+
1971
+ /* init */
1972
+ ip += (ip == prefixStart);
1973
+ if (searchMethod == search_rowHash) {
1974
+ ZSTD_row_fillHashCache(ms, base, rowLog,
1975
+ MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1976
+ ms->nextToUpdate, ilimit);
1977
+ }
1978
+
1979
+ /* Match Loop */
1980
+ #if defined(__GNUC__) && defined(__x86_64__)
1981
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
1982
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
1983
+ */
1984
+ __asm__(".p2align 5");
1985
+ #endif
1986
+ while (ip < ilimit) {
1987
+ size_t matchLength=0;
1988
+ size_t offset=0;
1989
+ const BYTE* start=ip+1;
1990
+ U32 curr = (U32)(ip-base);
1991
+
1992
+ /* check repCode */
1993
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1994
+ const U32 repIndex = (U32)(curr+1 - offset_1);
1995
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1996
+ const BYTE* const repMatch = repBase + repIndex;
1997
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
1998
+ & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
1999
+ if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
2000
+ /* repcode detected we should take it */
2001
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2002
+ matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2003
+ if (depth==0) goto _storeSequence;
2004
+ } }
2005
+
2006
+ /* first search (depth 0) */
2007
+ { size_t offsetFound = 999999999;
2008
+ size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
2009
+ if (ml2 > matchLength)
2010
+ matchLength = ml2, start = ip, offset=offsetFound;
2011
+ }
2012
+
2013
+ if (matchLength < 4) {
2014
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
2015
+ continue;
2016
+ }
2017
+
2018
+ /* let's try to find a better solution */
2019
+ if (depth>=1)
2020
+ while (ip<ilimit) {
2021
+ ip ++;
2022
+ curr++;
2023
+ /* check repCode */
2024
+ if (offset) {
2025
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
2026
+ const U32 repIndex = (U32)(curr - offset_1);
2027
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2028
+ const BYTE* const repMatch = repBase + repIndex;
2029
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2030
+ & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
2031
+ if (MEM_read32(ip) == MEM_read32(repMatch)) {
2032
+ /* repcode detected */
2033
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2034
+ size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2035
+ int const gain2 = (int)(repLength * 3);
2036
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
2037
+ if ((repLength >= 4) && (gain2 > gain1))
2038
+ matchLength = repLength, offset = 0, start = ip;
2039
+ } }
2040
+
2041
+ /* search match, depth 1 */
2042
+ { size_t offset2=999999999;
2043
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
2044
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2045
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
2046
+ if ((ml2 >= 4) && (gain2 > gain1)) {
2047
+ matchLength = ml2, offset = offset2, start = ip;
2048
+ continue; /* search a better one */
2049
+ } }
2050
+
2051
+ /* let's find an even better one */
2052
+ if ((depth==2) && (ip<ilimit)) {
2053
+ ip ++;
2054
+ curr++;
2055
+ /* check repCode */
2056
+ if (offset) {
2057
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
2058
+ const U32 repIndex = (U32)(curr - offset_1);
2059
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2060
+ const BYTE* const repMatch = repBase + repIndex;
2061
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2062
+ & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
2063
+ if (MEM_read32(ip) == MEM_read32(repMatch)) {
2064
+ /* repcode detected */
2065
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2066
+ size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2067
+ int const gain2 = (int)(repLength * 4);
2068
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
2069
+ if ((repLength >= 4) && (gain2 > gain1))
2070
+ matchLength = repLength, offset = 0, start = ip;
2071
+ } }
2072
+
2073
+ /* search match, depth 2 */
2074
+ { size_t offset2=999999999;
2075
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
2076
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2077
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
2078
+ if ((ml2 >= 4) && (gain2 > gain1)) {
2079
+ matchLength = ml2, offset = offset2, start = ip;
2080
+ continue;
2081
+ } } }
2082
+ break; /* nothing found : store previous solution */
2083
+ }
2084
+
2085
+ /* catch up */
2086
+ if (offset) {
2087
+ U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2088
+ const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2089
+ const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
2090
+ while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
2091
+ offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2092
+ }
2093
+
2094
+ /* store sequence */
2095
+ _storeSequence:
2096
+ { size_t const litLength = start - anchor;
2097
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
2098
+ anchor = ip = start + matchLength;
2099
+ }
2100
+
2101
+ /* check immediate repcode */
2102
+ while (ip <= ilimit) {
2103
+ const U32 repCurrent = (U32)(ip-base);
2104
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
2105
+ const U32 repIndex = repCurrent - offset_2;
2106
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2107
+ const BYTE* const repMatch = repBase + repIndex;
2108
+ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2109
+ & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
2110
+ if (MEM_read32(ip) == MEM_read32(repMatch)) {
2111
+ /* repcode detected we should take it */
2112
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2113
+ matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2114
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
2115
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
2116
+ ip += matchLength;
2117
+ anchor = ip;
2118
+ continue; /* faster when present ... (?) */
2119
+ }
2120
+ break;
2121
+ } }
2122
+
2123
+ /* Save reps for next block */
2124
+ rep[0] = offset_1;
2125
+ rep[1] = offset_2;
2126
+
2127
+ /* Return the last literals size */
2128
+ return (size_t)(iend - anchor);
2129
+ }
2130
+
2131
+
2132
+ size_t ZSTD_compressBlock_greedy_extDict(
2133
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2134
+ void const* src, size_t srcSize)
2135
+ {
2136
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
2137
+ }
2138
+
2139
+ size_t ZSTD_compressBlock_lazy_extDict(
2140
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2141
+ void const* src, size_t srcSize)
2142
+
2143
+ {
2144
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
2145
+ }
2146
+
2147
+ size_t ZSTD_compressBlock_lazy2_extDict(
2148
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2149
+ void const* src, size_t srcSize)
2150
+
2151
+ {
2152
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
2153
+ }
2154
+
2155
+ size_t ZSTD_compressBlock_btlazy2_extDict(
2156
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2157
+ void const* src, size_t srcSize)
2158
+
2159
+ {
2160
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
2161
+ }
2162
+
2163
+ size_t ZSTD_compressBlock_greedy_extDict_row(
2164
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2165
+ void const* src, size_t srcSize)
2166
+ {
2167
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
2168
+ }
2169
+
2170
+ size_t ZSTD_compressBlock_lazy_extDict_row(
2171
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2172
+ void const* src, size_t srcSize)
2173
+
2174
+ {
2175
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
2176
+ }
2177
+
2178
+ size_t ZSTD_compressBlock_lazy2_extDict_row(
2179
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2180
+ void const* src, size_t srcSize)
2181
+
2182
+ {
2183
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
2184
+ }