zstd-ruby 1.4.5.0 → 1.5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +2 -1
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +225 -222
  8. data/ext/zstdruby/libzstd/README.md +43 -5
  9. data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
  10. data/ext/zstdruby/libzstd/common/compiler.h +182 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
  15. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +41 -12
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
  19. data/ext/zstdruby/libzstd/common/huf.h +47 -23
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  25. data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
  26. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  27. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  28. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  29. data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
  30. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  31. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  32. data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
  33. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  34. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  35. data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
  36. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
  45. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  47. data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  54. data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  56. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
  58. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  60. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
  66. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  67. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  72. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  73. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
  74. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
  75. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  76. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  77. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  86. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  90. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  92. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  93. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  94. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  95. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
  96. data/ext/zstdruby/libzstd/zstd.h +699 -214
  97. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
  98. data/ext/zstdruby/zstdruby.c +2 -2
  99. data/lib/zstd-ruby/version.rb +1 -1
  100. metadata +15 -6
  101. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
17
  #define ZSTD_MAX_PRICE (1<<30)
19
18
 
20
19
  #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
@@ -24,11 +23,11 @@
24
23
  * Price functions for optimal parser
25
24
  ***************************************/
26
25
 
27
- #if 0 /* approximation at bit level */
26
+ #if 0 /* approximation at bit level (for tests) */
28
27
  # define BITCOST_ACCURACY 0
29
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
29
+ # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30
+ #elif 0 /* fractional bit accuracy (for tests) */
32
31
  # define BITCOST_ACCURACY 8
33
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
33
  # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
65
 
67
66
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
67
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
68
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
69
  }
71
70
 
72
71
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
78
  }
80
79
 
81
80
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
81
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
82
+ {
83
+ size_t n;
84
+ U32 total = 0;
85
+ for (n=0; n<nbElts; n++) {
86
+ total += table[n];
87
+ }
88
+ return total;
89
+ }
90
+
91
+ static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
86
92
  {
87
93
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
94
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95
+ assert(shift < 30);
90
96
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
97
+ table[s] = 1 + (table[s] >> shift);
92
98
  sum += table[s];
93
99
  }
94
100
  return sum;
95
101
  }
96
102
 
103
+ /* ZSTD_scaleStats() :
104
+ * reduce all elements in table is sum too large
105
+ * return the resulting sum of elements */
106
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
+ {
108
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
109
+ U32 const factor = prevsum >> logTarget;
110
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
+ assert(logTarget < 30);
112
+ if (factor <= 1) return prevsum;
113
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114
+ }
115
+
97
116
  /* ZSTD_rescaleFreqs() :
98
117
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
118
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
119
+ * and init from zero if there is none,
120
+ * using src for literals stats, and baseline stats for sequence symbols
101
121
  * otherwise downscale existing stats, to be used as seed for next block.
102
122
  */
103
123
  static void
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
126
146
  optPtr->litSum = 0;
127
147
  for (lit=0; lit<=MaxLit; lit++) {
128
148
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
149
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
150
  assert(bitCost <= scaleLog);
131
151
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
152
  optPtr->litSum += optPtr->litFreq[lit];
@@ -174,14 +194,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
174
194
  if (compressedLiterals) {
175
195
  unsigned lit = MaxLit;
176
196
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
197
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
178
198
  }
179
199
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
200
+ { unsigned const baseLLfreqs[MaxLL+1] = {
201
+ 4, 2, 1, 1, 1, 1, 1, 1,
202
+ 1, 1, 1, 1, 1, 1, 1, 1,
203
+ 1, 1, 1, 1, 1, 1, 1, 1,
204
+ 1, 1, 1, 1, 1, 1, 1, 1,
205
+ 1, 1, 1, 1
206
+ };
207
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
208
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
209
 
186
210
  { unsigned ml;
187
211
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +213,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
213
  }
190
214
  optPtr->matchLengthSum = MaxML+1;
191
215
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
216
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
217
+ 6, 2, 1, 1, 2, 3, 4, 4,
218
+ 4, 3, 2, 1, 1, 1, 1, 1,
219
+ 1, 1, 1, 1, 1, 1, 1, 1,
220
+ 1, 1, 1, 1, 1, 1, 1, 1
221
+ };
222
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
223
  }
196
- optPtr->offCodeSum = MaxOff+1;
224
+
197
225
 
198
226
  }
199
227
 
200
228
  } else { /* new block : re-use previous statistics, scaled down */
201
229
 
202
230
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
231
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
232
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
233
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
234
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
235
  }
208
236
 
209
237
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -338,7 +366,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
338
366
 
339
367
  /* Update hashTable3 up to ip (excluded)
340
368
  Assumption : always within prefix (i.e. not within extDict) */
341
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
369
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
342
370
  U32* nextToUpdate3,
343
371
  const BYTE* const ip)
344
372
  {
@@ -364,11 +392,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
364
392
  * Binary Tree search
365
393
  ***************************************/
366
394
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
367
- * ip : assumed <= iend-8 .
395
+ * @param ip assumed <= iend-8 .
396
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
368
397
  * @return : nb of positions added */
369
398
  static U32 ZSTD_insertBt1(
370
- ZSTD_matchState_t* ms,
399
+ const ZSTD_matchState_t* ms,
371
400
  const BYTE* const ip, const BYTE* const iend,
401
+ U32 const target,
372
402
  U32 const mls, const int extDict)
373
403
  {
374
404
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -386,32 +416,36 @@ static U32 ZSTD_insertBt1(
386
416
  const BYTE* const dictEnd = dictBase + dictLimit;
387
417
  const BYTE* const prefixStart = base + dictLimit;
388
418
  const BYTE* match;
389
- const U32 current = (U32)(ip-base);
390
- const U32 btLow = btMask >= current ? 0 : current - btMask;
391
- U32* smallerPtr = bt + 2*(current&btMask);
419
+ const U32 curr = (U32)(ip-base);
420
+ const U32 btLow = btMask >= curr ? 0 : curr - btMask;
421
+ U32* smallerPtr = bt + 2*(curr&btMask);
392
422
  U32* largerPtr = smallerPtr + 1;
393
423
  U32 dummy32; /* to be nullified at the end */
394
- U32 const windowLow = ms->window.lowLimit;
395
- U32 matchEndIdx = current+8+1;
424
+ /* windowLow is based on target because
425
+ * we only need positions that will be in the window at the end of the tree update.
426
+ */
427
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
428
+ U32 matchEndIdx = curr+8+1;
396
429
  size_t bestLength = 8;
397
430
  U32 nbCompares = 1U << cParams->searchLog;
398
431
  #ifdef ZSTD_C_PREDICT
399
- U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
400
- U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
432
+ U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
433
+ U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
401
434
  predictedSmall += (predictedSmall>0);
402
435
  predictedLarge += (predictedLarge>0);
403
436
  #endif /* ZSTD_C_PREDICT */
404
437
 
405
- DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
438
+ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
406
439
 
440
+ assert(curr <= target);
407
441
  assert(ip <= iend-8); /* required for h calculation */
408
- hashTable[h] = current; /* Update Hash Table */
442
+ hashTable[h] = curr; /* Update Hash Table */
409
443
 
410
444
  assert(windowLow > 0);
411
- while (nbCompares-- && (matchIndex >= windowLow)) {
445
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
412
446
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
413
447
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
414
- assert(matchIndex < current);
448
+ assert(matchIndex < curr);
415
449
 
416
450
  #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
417
451
  const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
@@ -474,8 +508,8 @@ static U32 ZSTD_insertBt1(
474
508
  *smallerPtr = *largerPtr = 0;
475
509
  { U32 positions = 0;
476
510
  if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
477
- assert(matchEndIdx > current + 8);
478
- return MAX(positions, matchEndIdx - (current + 8));
511
+ assert(matchEndIdx > curr + 8);
512
+ return MAX(positions, matchEndIdx - (curr + 8));
479
513
  }
480
514
  }
481
515
 
@@ -492,7 +526,7 @@ void ZSTD_updateTree_internal(
492
526
  idx, target, dictMode);
493
527
 
494
528
  while(idx < target) {
495
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
529
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
496
530
  assert(idx < (U32)(idx + forward));
497
531
  idx += forward;
498
532
  }
@@ -519,7 +553,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
519
553
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
520
554
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
521
555
  const BYTE* const base = ms->window.base;
522
- U32 const current = (U32)(ip-base);
556
+ U32 const curr = (U32)(ip-base);
523
557
  U32 const hashLog = cParams->hashLog;
524
558
  U32 const minMatch = (mls==3) ? 3 : 4;
525
559
  U32* const hashTable = ms->hashTable;
@@ -533,12 +567,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
533
567
  U32 const dictLimit = ms->window.dictLimit;
534
568
  const BYTE* const dictEnd = dictBase + dictLimit;
535
569
  const BYTE* const prefixStart = base + dictLimit;
536
- U32 const btLow = (btMask >= current) ? 0 : current - btMask;
537
- U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
570
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
571
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
538
572
  U32 const matchLow = windowLow ? windowLow : 1;
539
- U32* smallerPtr = bt + 2*(current&btMask);
540
- U32* largerPtr = bt + 2*(current&btMask) + 1;
541
- U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
573
+ U32* smallerPtr = bt + 2*(curr&btMask);
574
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
575
+ U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
542
576
  U32 dummy32; /* to be nullified at the end */
543
577
  U32 mnum = 0;
544
578
  U32 nbCompares = 1U << cParams->searchLog;
@@ -557,7 +591,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
557
591
  U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
558
592
 
559
593
  size_t bestLength = lengthToBeat-1;
560
- DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
594
+ DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
561
595
 
562
596
  /* check repCode */
563
597
  assert(ll0 <= 1); /* necessarily 1 or 0 */
@@ -565,29 +599,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
565
599
  U32 repCode;
566
600
  for (repCode = ll0; repCode < lastR; repCode++) {
567
601
  U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
568
- U32 const repIndex = current - repOffset;
602
+ U32 const repIndex = curr - repOffset;
569
603
  U32 repLen = 0;
570
- assert(current >= dictLimit);
571
- if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */
604
+ assert(curr >= dictLimit);
605
+ if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
572
606
  /* We must validate the repcode offset because when we're using a dictionary the
573
607
  * valid offset range shrinks when the dictionary goes out of bounds.
574
608
  */
575
609
  if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
576
610
  repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
577
611
  }
578
- } else { /* repIndex < dictLimit || repIndex >= current */
612
+ } else { /* repIndex < dictLimit || repIndex >= curr */
579
613
  const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
580
614
  dmsBase + repIndex - dmsIndexDelta :
581
615
  dictBase + repIndex;
582
- assert(current >= windowLow);
616
+ assert(curr >= windowLow);
583
617
  if ( dictMode == ZSTD_extDict
584
- && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
618
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
585
619
  & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
586
620
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
587
621
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
588
622
  }
589
623
  if (dictMode == ZSTD_dictMatchState
590
- && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
624
+ && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
591
625
  & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
592
626
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
593
627
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
@@ -609,7 +643,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
609
643
  if ((mls == 3) /*static*/ && (bestLength < mls)) {
610
644
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
611
645
  if ((matchIndex3 >= matchLow)
612
- & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
646
+ & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
613
647
  size_t mlen;
614
648
  if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
615
649
  const BYTE* const match = base + matchIndex3;
@@ -624,26 +658,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
624
658
  DEBUGLOG(8, "found small match with hlog3, of length %u",
625
659
  (U32)mlen);
626
660
  bestLength = mlen;
627
- assert(current > matchIndex3);
661
+ assert(curr > matchIndex3);
628
662
  assert(mnum==0); /* no prior solution */
629
- matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE;
663
+ matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
630
664
  matches[0].len = (U32)mlen;
631
665
  mnum = 1;
632
666
  if ( (mlen > sufficient_len) |
633
667
  (ip+mlen == iLimit) ) { /* best possible length */
634
- ms->nextToUpdate = current+1; /* skip insertion */
668
+ ms->nextToUpdate = curr+1; /* skip insertion */
635
669
  return 1;
636
670
  } } }
637
671
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638
- }
672
+ } /* if (mls == 3) */
639
673
 
640
- hashTable[h] = current; /* Update Hash Table */
674
+ hashTable[h] = curr; /* Update Hash Table */
641
675
 
642
- while (nbCompares-- && (matchIndex >= matchLow)) {
676
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
643
677
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
644
678
  const BYTE* match;
645
679
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
646
- assert(current > matchIndex);
680
+ assert(curr > matchIndex);
647
681
 
648
682
  if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
649
683
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
@@ -660,20 +694,19 @@ U32 ZSTD_insertBtAndGetAllMatches (
660
694
 
661
695
  if (matchLength > bestLength) {
662
696
  DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
663
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
697
+ (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
664
698
  assert(matchEndIdx > matchIndex);
665
699
  if (matchLength > matchEndIdx - matchIndex)
666
700
  matchEndIdx = matchIndex + (U32)matchLength;
667
701
  bestLength = matchLength;
668
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
702
+ matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
669
703
  matches[mnum].len = (U32)matchLength;
670
704
  mnum++;
671
705
  if ( (matchLength > ZSTD_OPT_NUM)
672
706
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673
707
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
674
708
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675
- }
676
- }
709
+ } }
677
710
 
678
711
  if (match[matchLength] < ip[matchLength]) {
679
712
  /* match smaller than current */
@@ -692,12 +725,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
692
725
 
693
726
  *smallerPtr = *largerPtr = 0;
694
727
 
728
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
695
729
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
696
730
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697
731
  U32 dictMatchIndex = dms->hashTable[dmsH];
698
732
  const U32* const dmsBt = dms->chainTable;
699
733
  commonLengthSmaller = commonLengthLarger = 0;
700
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
734
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
701
735
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702
736
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703
737
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -708,18 +742,17 @@ U32 ZSTD_insertBtAndGetAllMatches (
708
742
  if (matchLength > bestLength) {
709
743
  matchIndex = dictMatchIndex + dmsIndexDelta;
710
744
  DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
711
- (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
745
+ (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
712
746
  if (matchLength > matchEndIdx - matchIndex)
713
747
  matchEndIdx = matchIndex + (U32)matchLength;
714
748
  bestLength = matchLength;
715
- matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
749
+ matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
716
750
  matches[mnum].len = (U32)matchLength;
717
751
  mnum++;
718
752
  if ( (matchLength > ZSTD_OPT_NUM)
719
753
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
754
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
721
- }
722
- }
755
+ } }
723
756
 
724
757
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725
758
  if (match[matchLength] < ip[matchLength]) {
@@ -729,47 +762,232 @@ U32 ZSTD_insertBtAndGetAllMatches (
729
762
  /* match is larger than current */
730
763
  commonLengthLarger = matchLength;
731
764
  dictMatchIndex = nextPtr[0];
732
- }
733
- }
734
- }
765
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
735
766
 
736
- assert(matchEndIdx > current+8);
767
+ assert(matchEndIdx > curr+8);
737
768
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738
769
  return mnum;
739
770
  }
740
771
 
772
+ typedef U32 (*ZSTD_getAllMatchesFn)(
773
+ ZSTD_match_t*,
774
+ ZSTD_matchState_t*,
775
+ U32*,
776
+ const BYTE*,
777
+ const BYTE*,
778
+ const U32 rep[ZSTD_REP_NUM],
779
+ U32 const ll0,
780
+ U32 const lengthToBeat);
781
+
782
+ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
783
+ ZSTD_match_t* matches,
784
+ ZSTD_matchState_t* ms,
785
+ U32* nextToUpdate3,
786
+ const BYTE* ip,
787
+ const BYTE* const iHighLimit,
788
+ const U32 rep[ZSTD_REP_NUM],
789
+ U32 const ll0,
790
+ U32 const lengthToBeat,
791
+ const ZSTD_dictMode_e dictMode,
792
+ const U32 mls)
793
+ {
794
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
795
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
796
+ if (ip < ms->window.base + ms->nextToUpdate)
797
+ return 0; /* skipped area */
798
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
799
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
800
+ }
801
+
802
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
803
+
804
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
805
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
806
+ ZSTD_match_t* matches, \
807
+ ZSTD_matchState_t* ms, \
808
+ U32* nextToUpdate3, \
809
+ const BYTE* ip, \
810
+ const BYTE* const iHighLimit, \
811
+ const U32 rep[ZSTD_REP_NUM], \
812
+ U32 const ll0, \
813
+ U32 const lengthToBeat) \
814
+ { \
815
+ return ZSTD_btGetAllMatches_internal( \
816
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
817
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
818
+ }
819
+
820
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
821
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
822
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
823
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
824
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
825
+
826
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
827
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
828
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
829
+
830
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
831
+ { \
832
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
833
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
834
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
835
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
836
+ }
741
837
 
742
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
743
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
744
- ZSTD_matchState_t* ms,
745
- U32* nextToUpdate3,
746
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
747
- const U32 rep[ZSTD_REP_NUM],
748
- U32 const ll0,
749
- U32 const lengthToBeat)
838
+ static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
750
839
  {
751
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
752
- U32 const matchLengthSearch = cParams->minMatch;
753
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
755
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
756
- switch(matchLengthSearch)
757
- {
758
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
759
- default :
760
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
761
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
762
- case 7 :
763
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
840
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
841
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
842
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
843
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
844
+ };
845
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
846
+ assert((U32)dictMode < 3);
847
+ assert(mls - 3 < 4);
848
+ return getAllMatchesFns[(int)dictMode][mls - 3];
849
+ }
850
+
851
+ /*************************
852
+ * LDM helper functions *
853
+ *************************/
854
+
855
+ /* Struct containing info needed to make decision about ldm inclusion */
856
+ typedef struct {
857
+ rawSeqStore_t seqStore; /* External match candidates store for this block */
858
+ U32 startPosInBlock; /* Start position of the current match candidate */
859
+ U32 endPosInBlock; /* End position of the current match candidate */
860
+ U32 offset; /* Offset of the match candidate */
861
+ } ZSTD_optLdm_t;
862
+
863
+ /* ZSTD_optLdm_skipRawSeqStoreBytes():
864
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
865
+ */
866
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
867
+ U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
868
+ while (currPos && rawSeqStore->pos < rawSeqStore->size) {
869
+ rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
870
+ if (currPos >= currSeq.litLength + currSeq.matchLength) {
871
+ currPos -= currSeq.litLength + currSeq.matchLength;
872
+ rawSeqStore->pos++;
873
+ } else {
874
+ rawSeqStore->posInSequence = currPos;
875
+ break;
876
+ }
877
+ }
878
+ if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
879
+ rawSeqStore->posInSequence = 0;
880
+ }
881
+ }
882
+
883
+ /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
884
+ * Calculates the beginning and end of the next match in the current block.
885
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
886
+ */
887
+ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
888
+ U32 blockBytesRemaining) {
889
+ rawSeq currSeq;
890
+ U32 currBlockEndPos;
891
+ U32 literalsBytesRemaining;
892
+ U32 matchBytesRemaining;
893
+
894
+ /* Setting match end position to MAX to ensure we never use an LDM during this block */
895
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
896
+ optLdm->startPosInBlock = UINT_MAX;
897
+ optLdm->endPosInBlock = UINT_MAX;
898
+ return;
899
+ }
900
+ /* Calculate appropriate bytes left in matchLength and litLength after adjusting
901
+ based on ldmSeqStore->posInSequence */
902
+ currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
903
+ assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
904
+ currBlockEndPos = currPosInBlock + blockBytesRemaining;
905
+ literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
906
+ currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
907
+ 0;
908
+ matchBytesRemaining = (literalsBytesRemaining == 0) ?
909
+ currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
910
+ currSeq.matchLength;
911
+
912
+ /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
913
+ if (literalsBytesRemaining >= blockBytesRemaining) {
914
+ optLdm->startPosInBlock = UINT_MAX;
915
+ optLdm->endPosInBlock = UINT_MAX;
916
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
917
+ return;
918
+ }
919
+
920
+ /* Matches may be < MINMATCH by this process. In that case, we will reject them
921
+ when we are deciding whether or not to add the ldm */
922
+ optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
923
+ optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
924
+ optLdm->offset = currSeq.offset;
925
+
926
+ if (optLdm->endPosInBlock > currBlockEndPos) {
927
+ /* Match ends after the block ends, we can't use the whole match */
928
+ optLdm->endPosInBlock = currBlockEndPos;
929
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
930
+ } else {
931
+ /* Consume nb of bytes equal to size of sequence left */
932
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
764
933
  }
765
934
  }
766
935
 
936
+ /* ZSTD_optLdm_maybeAddMatch():
937
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
938
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
939
+ */
940
+ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
941
+ ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
942
+ U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
943
+ /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
944
+ U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
945
+ U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
946
+
947
+ /* Ensure that current block position is not outside of the match */
948
+ if (currPosInBlock < optLdm->startPosInBlock
949
+ || currPosInBlock >= optLdm->endPosInBlock
950
+ || candidateMatchLength < MINMATCH) {
951
+ return;
952
+ }
953
+
954
+ if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
955
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
956
+ candidateOffCode, candidateMatchLength, currPosInBlock);
957
+ matches[*nbMatches].len = candidateMatchLength;
958
+ matches[*nbMatches].off = candidateOffCode;
959
+ (*nbMatches)++;
960
+ }
961
+ }
962
+
963
+ /* ZSTD_optLdm_processMatchCandidate():
964
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
965
+ */
966
+ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
967
+ U32 currPosInBlock, U32 remainingBytes) {
968
+ if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
969
+ return;
970
+ }
971
+
972
+ if (currPosInBlock >= optLdm->endPosInBlock) {
973
+ if (currPosInBlock > optLdm->endPosInBlock) {
974
+ /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
975
+ * at the end of a match from the ldm seq store, and will often be some bytes
976
+ * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
977
+ */
978
+ U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
979
+ ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
980
+ }
981
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
982
+ }
983
+ ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
984
+ }
985
+
767
986
 
768
987
  /*-*******************************
769
988
  * Optimal parser
770
989
  *********************************/
771
990
 
772
-
773
991
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
774
992
  {
775
993
  return sol.litlen + sol.mlen;
@@ -810,6 +1028,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
810
1028
  const BYTE* const prefixStart = base + ms->window.dictLimit;
811
1029
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
812
1030
 
1031
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1032
+
813
1033
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
814
1034
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
815
1035
  U32 nextToUpdate3 = ms->nextToUpdate;
@@ -817,6 +1037,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
817
1037
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
818
1038
  ZSTD_match_t* const matches = optStatePtr->matchTable;
819
1039
  ZSTD_optimal_t lastSequence;
1040
+ ZSTD_optLdm_t optLdm;
1041
+
1042
+ optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1043
+ optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1044
+ ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
820
1045
 
821
1046
  /* init */
822
1047
  DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
@@ -832,7 +1057,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
832
1057
  /* find first match */
833
1058
  { U32 const litlen = (U32)(ip - anchor);
834
1059
  U32 const ll0 = !litlen;
835
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1060
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1061
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1062
+ (U32)(ip-istart), (U32)(iend - ip));
836
1063
  if (!nbMatches) { ip++; continue; }
837
1064
 
838
1065
  /* initialize opt[0] */
@@ -844,7 +1071,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
844
1071
  * in every price. We include the literal length to avoid negative
845
1072
  * prices when we subtract the previous literal length.
846
1073
  */
847
- opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1074
+ opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
848
1075
 
849
1076
  /* large match -> immediate encoding */
850
1077
  { U32 const maxML = matches[nbMatches-1].len;
@@ -864,7 +1091,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
864
1091
  } }
865
1092
 
866
1093
  /* set prices for first matches starting position == 0 */
867
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1094
+ assert(opt[0].price >= 0);
1095
+ { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
868
1096
  U32 pos;
869
1097
  U32 matchNb;
870
1098
  for (pos = 1; pos < minMatch; pos++) {
@@ -881,7 +1109,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
881
1109
  opt[pos].mlen = pos;
882
1110
  opt[pos].off = offset;
883
1111
  opt[pos].litlen = litlen;
884
- opt[pos].price = sequencePrice;
1112
+ opt[pos].price = (int)sequencePrice;
885
1113
  } }
886
1114
  last_pos = pos-1;
887
1115
  }
@@ -896,9 +1124,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
896
1124
  /* Fix current position with one literal if cheaper */
897
1125
  { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
898
1126
  int const price = opt[cur-1].price
899
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
900
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
901
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1127
+ + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1128
+ + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1129
+ - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
902
1130
  assert(price < 1000000000); /* overflow check */
903
1131
  if (price <= opt[cur].price) {
904
1132
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -925,9 +1153,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
925
1153
  if (opt[cur].mlen != 0) {
926
1154
  U32 const prev = cur - opt[cur].mlen;
927
1155
  repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
928
- memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1156
+ ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
929
1157
  } else {
930
- memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1158
+ ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
931
1159
  }
932
1160
 
933
1161
  /* last match must start at a minimum distance of 8 from oend */
@@ -941,12 +1169,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
941
1169
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
942
1170
  }
943
1171
 
1172
+ assert(opt[cur].price >= 0);
944
1173
  { U32 const ll0 = (opt[cur].mlen != 0);
945
1174
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
946
- U32 const previousPrice = opt[cur].price;
1175
+ U32 const previousPrice = (U32)opt[cur].price;
947
1176
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
948
- U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1177
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
949
1178
  U32 matchNb;
1179
+
1180
+ ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1181
+ (U32)(inr-istart), (U32)(iend-inr));
1182
+
950
1183
  if (!nbMatches) {
951
1184
  DEBUGLOG(7, "rPos:%u : no match found", cur);
952
1185
  continue;
@@ -979,7 +1212,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
979
1212
 
980
1213
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
981
1214
  U32 const pos = cur + mlen;
982
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1215
+ int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
983
1216
 
984
1217
  if ((pos > last_pos) || (price < opt[pos].price)) {
985
1218
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1010,9 +1243,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1010
1243
  */
1011
1244
  if (lastSequence.mlen != 0) {
1012
1245
  repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1013
- memcpy(rep, &reps, sizeof(reps));
1246
+ ZSTD_memcpy(rep, &reps, sizeof(reps));
1014
1247
  } else {
1015
- memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1248
+ ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1016
1249
  }
1017
1250
 
1018
1251
  { U32 const storeEnd = cur + 1;
@@ -1065,38 +1298,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1065
1298
  return (size_t)(iend - anchor);
1066
1299
  }
1067
1300
 
1301
+ static size_t ZSTD_compressBlock_opt0(
1302
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1303
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1304
+ {
1305
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1306
+ }
1307
+
1308
+ static size_t ZSTD_compressBlock_opt2(
1309
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1310
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1311
+ {
1312
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1313
+ }
1068
1314
 
1069
1315
  size_t ZSTD_compressBlock_btopt(
1070
1316
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1071
1317
  const void* src, size_t srcSize)
1072
1318
  {
1073
1319
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1074
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1320
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1075
1321
  }
1076
1322
 
1077
1323
 
1078
- /* used in 2-pass strategy */
1079
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1080
- {
1081
- U32 s, sum=0;
1082
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1083
- for (s=0; s<lastEltIndex+1; s++) {
1084
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1085
- table[s]--;
1086
- sum += table[s];
1087
- }
1088
- return sum;
1089
- }
1090
1324
 
1091
- /* used in 2-pass strategy */
1092
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1093
- {
1094
- if (ZSTD_compressedLiterals(optPtr))
1095
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1096
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1097
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1098
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1099
- }
1100
1325
 
1101
1326
  /* ZSTD_initStats_ultra():
1102
1327
  * make a first compression pass, just to seed stats with more accurate starting values.
@@ -1110,7 +1335,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1110
1335
  const void* src, size_t srcSize)
1111
1336
  {
1112
1337
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1113
- memcpy(tmpRep, rep, sizeof(tmpRep));
1338
+ ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1114
1339
 
1115
1340
  DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1116
1341
  assert(ms->opt.litLengthSum == 0); /* first block */
@@ -1118,7 +1343,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1118
1343
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1119
1344
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1120
1345
 
1121
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1346
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1122
1347
 
1123
1348
  /* invalidate first scan from history */
1124
1349
  ZSTD_resetSeqStore(seqStore);
@@ -1127,8 +1352,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1127
1352
  ms->window.lowLimit = ms->window.dictLimit;
1128
1353
  ms->nextToUpdate = ms->window.dictLimit;
1129
1354
 
1130
- /* re-inforce weight of collected statistics */
1131
- ZSTD_upscaleStats(&ms->opt);
1132
1355
  }
1133
1356
 
1134
1357
  size_t ZSTD_compressBlock_btultra(
@@ -1136,14 +1359,14 @@ size_t ZSTD_compressBlock_btultra(
1136
1359
  const void* src, size_t srcSize)
1137
1360
  {
1138
1361
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1139
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1362
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1140
1363
  }
1141
1364
 
1142
1365
  size_t ZSTD_compressBlock_btultra2(
1143
1366
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1144
1367
  const void* src, size_t srcSize)
1145
1368
  {
1146
- U32 const current = (U32)((const BYTE*)src - ms->window.base);
1369
+ U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1147
1370
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1148
1371
 
1149
1372
  /* 2-pass strategy:
@@ -1158,41 +1381,41 @@ size_t ZSTD_compressBlock_btultra2(
1158
1381
  if ( (ms->opt.litLengthSum==0) /* first block */
1159
1382
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1160
1383
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1161
- && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1384
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1162
1385
  && (srcSize > ZSTD_PREDEF_THRESHOLD)
1163
1386
  ) {
1164
1387
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1165
1388
  }
1166
1389
 
1167
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1390
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1168
1391
  }
1169
1392
 
1170
1393
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1171
1394
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1172
1395
  const void* src, size_t srcSize)
1173
1396
  {
1174
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1397
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1175
1398
  }
1176
1399
 
1177
1400
  size_t ZSTD_compressBlock_btultra_dictMatchState(
1178
1401
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1179
1402
  const void* src, size_t srcSize)
1180
1403
  {
1181
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1404
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1182
1405
  }
1183
1406
 
1184
1407
  size_t ZSTD_compressBlock_btopt_extDict(
1185
1408
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1186
1409
  const void* src, size_t srcSize)
1187
1410
  {
1188
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1411
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1189
1412
  }
1190
1413
 
1191
1414
  size_t ZSTD_compressBlock_btultra_extDict(
1192
1415
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1193
1416
  const void* src, size_t srcSize)
1194
1417
  {
1195
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1418
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1196
1419
  }
1197
1420
 
1198
1421
  /* note : no btultra2 variant for extDict nor dictMatchState,