zstd-ruby 1.5.0.0 → 1.5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/README.md +1 -1
  4. data/ext/zstdruby/extconf.rb +1 -0
  5. data/ext/zstdruby/libzstd/Makefile +50 -175
  6. data/ext/zstdruby/libzstd/README.md +7 -1
  7. data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
  8. data/ext/zstdruby/libzstd/common/compiler.h +89 -43
  9. data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
  10. data/ext/zstdruby/libzstd/common/error_private.h +79 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -1
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +24 -22
  14. data/ext/zstdruby/libzstd/common/mem.h +18 -0
  15. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  16. data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
  17. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  18. data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
  19. data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
  20. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  21. data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
  22. data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
  23. data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
  24. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
  25. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
  26. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
  27. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
  28. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
  29. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
  30. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
  31. data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
  32. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
  33. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
  34. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  35. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
  36. data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
  37. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
  38. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
  39. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  40. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
  41. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
  42. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
  43. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
  44. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
  45. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  46. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
  47. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
  48. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
  49. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
  50. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
  51. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
  52. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
  53. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
  54. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
  55. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  56. data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
  57. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  58. data/ext/zstdruby/libzstd/zdict.h +4 -4
  59. data/ext/zstdruby/libzstd/zstd.h +179 -136
  60. data/ext/zstdruby/zstdruby.c +2 -2
  61. data/lib/zstd-ruby/version.rb +1 -1
  62. metadata +8 -3
@@ -159,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
159
159
  size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
160
160
  size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
161
161
  + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
162
- return params.enableLdm ? totalSize : 0;
162
+ return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
163
163
  }
164
164
 
165
165
  size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
166
166
  {
167
- return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
167
+ return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
168
168
  }
169
169
 
170
170
  /** ZSTD_ldm_getBucket() :
@@ -478,7 +478,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
478
478
  */
479
479
  if (anchor > ip + hashed) {
480
480
  ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
481
- /* Continue the outter loop at anchor (ip + hashed == anchor). */
481
+ /* Continue the outer loop at anchor (ip + hashed == anchor). */
482
482
  ip = anchor - hashed;
483
483
  break;
484
484
  }
@@ -657,7 +657,7 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
657
657
 
658
658
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
659
659
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
660
- ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
660
+ ZSTD_paramSwitch_e useRowMatchFinder,
661
661
  void const* src, size_t srcSize)
662
662
  {
663
663
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -66,7 +66,7 @@ size_t ZSTD_ldm_generateSequences(
66
66
  */
67
67
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
68
68
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
69
- ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
69
+ ZSTD_paramSwitch_e useRowMatchFinder,
70
70
  void const* src, size_t srcSize);
71
71
 
72
72
  /**
@@ -11,7 +11,10 @@
11
11
  #ifndef ZSTD_LDM_GEARTAB_H
12
12
  #define ZSTD_LDM_GEARTAB_H
13
13
 
14
- static U64 ZSTD_ldm_gearTab[256] = {
14
+ #include "../common/compiler.h" /* UNUSED_ATTR */
15
+ #include "../common/mem.h" /* U64 */
16
+
17
+ static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
15
18
  0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
16
19
  0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
17
20
  0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
17
  #define ZSTD_MAX_PRICE (1<<30)
19
18
 
20
19
  #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
@@ -24,11 +23,11 @@
24
23
  * Price functions for optimal parser
25
24
  ***************************************/
26
25
 
27
- #if 0 /* approximation at bit level */
26
+ #if 0 /* approximation at bit level (for tests) */
28
27
  # define BITCOST_ACCURACY 0
29
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
29
+ # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30
+ #elif 0 /* fractional bit accuracy (for tests) */
32
31
  # define BITCOST_ACCURACY 8
33
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
33
  # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
65
 
67
66
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
67
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
68
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
69
  }
71
70
 
72
71
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
78
  }
80
79
 
81
80
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
81
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
82
+ {
83
+ size_t n;
84
+ U32 total = 0;
85
+ for (n=0; n<nbElts; n++) {
86
+ total += table[n];
87
+ }
88
+ return total;
89
+ }
90
+
91
+ static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
86
92
  {
87
93
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
94
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95
+ assert(shift < 30);
90
96
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
97
+ table[s] = 1 + (table[s] >> shift);
92
98
  sum += table[s];
93
99
  }
94
100
  return sum;
95
101
  }
96
102
 
103
+ /* ZSTD_scaleStats() :
104
+ * reduce all elements in table is sum too large
105
+ * return the resulting sum of elements */
106
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
+ {
108
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
109
+ U32 const factor = prevsum >> logTarget;
110
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
+ assert(logTarget < 30);
112
+ if (factor <= 1) return prevsum;
113
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114
+ }
115
+
97
116
  /* ZSTD_rescaleFreqs() :
98
117
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
118
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
119
+ * and init from zero if there is none,
120
+ * using src for literals stats, and baseline stats for sequence symbols
101
121
  * otherwise downscale existing stats, to be used as seed for next block.
102
122
  */
103
123
  static void
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
126
146
  optPtr->litSum = 0;
127
147
  for (lit=0; lit<=MaxLit; lit++) {
128
148
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
149
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
150
  assert(bitCost <= scaleLog);
131
151
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
152
  optPtr->litSum += optPtr->litFreq[lit];
@@ -174,14 +194,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
174
194
  if (compressedLiterals) {
175
195
  unsigned lit = MaxLit;
176
196
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
197
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
178
198
  }
179
199
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
200
+ { unsigned const baseLLfreqs[MaxLL+1] = {
201
+ 4, 2, 1, 1, 1, 1, 1, 1,
202
+ 1, 1, 1, 1, 1, 1, 1, 1,
203
+ 1, 1, 1, 1, 1, 1, 1, 1,
204
+ 1, 1, 1, 1, 1, 1, 1, 1,
205
+ 1, 1, 1, 1
206
+ };
207
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
208
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
209
 
186
210
  { unsigned ml;
187
211
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +213,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
213
  }
190
214
  optPtr->matchLengthSum = MaxML+1;
191
215
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
216
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
217
+ 6, 2, 1, 1, 2, 3, 4, 4,
218
+ 4, 3, 2, 1, 1, 1, 1, 1,
219
+ 1, 1, 1, 1, 1, 1, 1, 1,
220
+ 1, 1, 1, 1, 1, 1, 1, 1
221
+ };
222
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
223
  }
196
- optPtr->offCodeSum = MaxOff+1;
224
+
197
225
 
198
226
  }
199
227
 
200
228
  } else { /* new block : re-use previous statistics, scaled down */
201
229
 
202
230
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
231
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
232
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
233
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
234
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
235
  }
208
236
 
209
237
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -338,7 +366,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
338
366
 
339
367
  /* Update hashTable3 up to ip (excluded)
340
368
  Assumption : always within prefix (i.e. not within extDict) */
341
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
369
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
342
370
  U32* nextToUpdate3,
343
371
  const BYTE* const ip)
344
372
  {
@@ -364,11 +392,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
364
392
  * Binary Tree search
365
393
  ***************************************/
366
394
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
367
- * ip : assumed <= iend-8 .
395
+ * @param ip assumed <= iend-8 .
396
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
368
397
  * @return : nb of positions added */
369
398
  static U32 ZSTD_insertBt1(
370
- ZSTD_matchState_t* ms,
399
+ const ZSTD_matchState_t* ms,
371
400
  const BYTE* const ip, const BYTE* const iend,
401
+ U32 const target,
372
402
  U32 const mls, const int extDict)
373
403
  {
374
404
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -391,7 +421,10 @@ static U32 ZSTD_insertBt1(
391
421
  U32* smallerPtr = bt + 2*(curr&btMask);
392
422
  U32* largerPtr = smallerPtr + 1;
393
423
  U32 dummy32; /* to be nullified at the end */
394
- U32 const windowLow = ms->window.lowLimit;
424
+ /* windowLow is based on target because
425
+ * we only need positions that will be in the window at the end of the tree update.
426
+ */
427
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
395
428
  U32 matchEndIdx = curr+8+1;
396
429
  size_t bestLength = 8;
397
430
  U32 nbCompares = 1U << cParams->searchLog;
@@ -404,11 +437,12 @@ static U32 ZSTD_insertBt1(
404
437
 
405
438
  DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
406
439
 
440
+ assert(curr <= target);
407
441
  assert(ip <= iend-8); /* required for h calculation */
408
442
  hashTable[h] = curr; /* Update Hash Table */
409
443
 
410
444
  assert(windowLow > 0);
411
- while (nbCompares-- && (matchIndex >= windowLow)) {
445
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
412
446
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
413
447
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
414
448
  assert(matchIndex < curr);
@@ -492,7 +526,7 @@ void ZSTD_updateTree_internal(
492
526
  idx, target, dictMode);
493
527
 
494
528
  while(idx < target) {
495
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
529
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
496
530
  assert(idx < (U32)(idx + forward));
497
531
  idx += forward;
498
532
  }
@@ -635,11 +669,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
635
669
  return 1;
636
670
  } } }
637
671
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638
- }
672
+ } /* if (mls == 3) */
639
673
 
640
674
  hashTable[h] = curr; /* Update Hash Table */
641
675
 
642
- while (nbCompares-- && (matchIndex >= matchLow)) {
676
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
643
677
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
644
678
  const BYTE* match;
645
679
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
@@ -672,8 +706,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
672
706
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673
707
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
674
708
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675
- }
676
- }
709
+ } }
677
710
 
678
711
  if (match[matchLength] < ip[matchLength]) {
679
712
  /* match smaller than current */
@@ -692,12 +725,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
692
725
 
693
726
  *smallerPtr = *largerPtr = 0;
694
727
 
728
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
695
729
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
696
730
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697
731
  U32 dictMatchIndex = dms->hashTable[dmsH];
698
732
  const U32* const dmsBt = dms->chainTable;
699
733
  commonLengthSmaller = commonLengthLarger = 0;
700
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
734
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
701
735
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702
736
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703
737
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -718,8 +752,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
718
752
  if ( (matchLength > ZSTD_OPT_NUM)
719
753
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
754
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
721
- }
722
- }
755
+ } }
723
756
 
724
757
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725
758
  if (match[matchLength] < ip[matchLength]) {
@@ -729,39 +762,90 @@ U32 ZSTD_insertBtAndGetAllMatches (
729
762
  /* match is larger than current */
730
763
  commonLengthLarger = matchLength;
731
764
  dictMatchIndex = nextPtr[0];
732
- }
733
- }
734
- }
765
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
735
766
 
736
767
  assert(matchEndIdx > curr+8);
737
768
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738
769
  return mnum;
739
770
  }
740
771
 
741
-
742
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
743
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
744
- ZSTD_matchState_t* ms,
745
- U32* nextToUpdate3,
746
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
747
- const U32 rep[ZSTD_REP_NUM],
748
- U32 const ll0,
749
- U32 const lengthToBeat)
772
+ typedef U32 (*ZSTD_getAllMatchesFn)(
773
+ ZSTD_match_t*,
774
+ ZSTD_matchState_t*,
775
+ U32*,
776
+ const BYTE*,
777
+ const BYTE*,
778
+ const U32 rep[ZSTD_REP_NUM],
779
+ U32 const ll0,
780
+ U32 const lengthToBeat);
781
+
782
+ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
783
+ ZSTD_match_t* matches,
784
+ ZSTD_matchState_t* ms,
785
+ U32* nextToUpdate3,
786
+ const BYTE* ip,
787
+ const BYTE* const iHighLimit,
788
+ const U32 rep[ZSTD_REP_NUM],
789
+ U32 const ll0,
790
+ U32 const lengthToBeat,
791
+ const ZSTD_dictMode_e dictMode,
792
+ const U32 mls)
750
793
  {
751
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
752
- U32 const matchLengthSearch = cParams->minMatch;
753
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
755
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
756
- switch(matchLengthSearch)
757
- {
758
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
759
- default :
760
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
761
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
762
- case 7 :
763
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
794
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
795
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
796
+ if (ip < ms->window.base + ms->nextToUpdate)
797
+ return 0; /* skipped area */
798
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
799
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
800
+ }
801
+
802
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
803
+
804
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
805
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
806
+ ZSTD_match_t* matches, \
807
+ ZSTD_matchState_t* ms, \
808
+ U32* nextToUpdate3, \
809
+ const BYTE* ip, \
810
+ const BYTE* const iHighLimit, \
811
+ const U32 rep[ZSTD_REP_NUM], \
812
+ U32 const ll0, \
813
+ U32 const lengthToBeat) \
814
+ { \
815
+ return ZSTD_btGetAllMatches_internal( \
816
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
817
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
818
+ }
819
+
820
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
821
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
822
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
823
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
824
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
825
+
826
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
827
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
828
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
829
+
830
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
831
+ { \
832
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
833
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
834
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
835
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
764
836
  }
837
+
838
+ static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
839
+ {
840
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
841
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
842
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
843
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
844
+ };
845
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
846
+ assert((U32)dictMode < 3);
847
+ assert(mls - 3 < 4);
848
+ return getAllMatchesFns[(int)dictMode][mls - 3];
765
849
  }
766
850
 
767
851
  /*************************
@@ -893,17 +977,17 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
893
977
  */
894
978
  U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
895
979
  ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
896
- }
980
+ }
897
981
  ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
898
982
  }
899
983
  ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
900
984
  }
901
985
 
986
+
902
987
  /*-*******************************
903
988
  * Optimal parser
904
989
  *********************************/
905
990
 
906
-
907
991
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
908
992
  {
909
993
  return sol.litlen + sol.mlen;
@@ -944,6 +1028,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
944
1028
  const BYTE* const prefixStart = base + ms->window.dictLimit;
945
1029
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
946
1030
 
1031
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1032
+
947
1033
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
948
1034
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
949
1035
  U32 nextToUpdate3 = ms->nextToUpdate;
@@ -971,7 +1057,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
971
1057
  /* find first match */
972
1058
  { U32 const litlen = (U32)(ip - anchor);
973
1059
  U32 const ll0 = !litlen;
974
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1060
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
975
1061
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
976
1062
  (U32)(ip-istart), (U32)(iend - ip));
977
1063
  if (!nbMatches) { ip++; continue; }
@@ -985,7 +1071,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
985
1071
  * in every price. We include the literal length to avoid negative
986
1072
  * prices when we subtract the previous literal length.
987
1073
  */
988
- opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1074
+ opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
989
1075
 
990
1076
  /* large match -> immediate encoding */
991
1077
  { U32 const maxML = matches[nbMatches-1].len;
@@ -1005,7 +1091,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1005
1091
  } }
1006
1092
 
1007
1093
  /* set prices for first matches starting position == 0 */
1008
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1094
+ assert(opt[0].price >= 0);
1095
+ { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1009
1096
  U32 pos;
1010
1097
  U32 matchNb;
1011
1098
  for (pos = 1; pos < minMatch; pos++) {
@@ -1022,7 +1109,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1022
1109
  opt[pos].mlen = pos;
1023
1110
  opt[pos].off = offset;
1024
1111
  opt[pos].litlen = litlen;
1025
- opt[pos].price = sequencePrice;
1112
+ opt[pos].price = (int)sequencePrice;
1026
1113
  } }
1027
1114
  last_pos = pos-1;
1028
1115
  }
@@ -1037,9 +1124,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1037
1124
  /* Fix current position with one literal if cheaper */
1038
1125
  { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1039
1126
  int const price = opt[cur-1].price
1040
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1041
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1042
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1127
+ + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1128
+ + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1129
+ - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1043
1130
  assert(price < 1000000000); /* overflow check */
1044
1131
  if (price <= opt[cur].price) {
1045
1132
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -1082,11 +1169,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1082
1169
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1083
1170
  }
1084
1171
 
1172
+ assert(opt[cur].price >= 0);
1085
1173
  { U32 const ll0 = (opt[cur].mlen != 0);
1086
1174
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1087
- U32 const previousPrice = opt[cur].price;
1175
+ U32 const previousPrice = (U32)opt[cur].price;
1088
1176
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1089
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1177
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1090
1178
  U32 matchNb;
1091
1179
 
1092
1180
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
@@ -1124,7 +1212,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1124
1212
 
1125
1213
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1126
1214
  U32 const pos = cur + mlen;
1127
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1215
+ int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1128
1216
 
1129
1217
  if ((pos > last_pos) || (price < opt[pos].price)) {
1130
1218
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1210,38 +1298,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1210
1298
  return (size_t)(iend - anchor);
1211
1299
  }
1212
1300
 
1301
+ static size_t ZSTD_compressBlock_opt0(
1302
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1303
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1304
+ {
1305
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1306
+ }
1307
+
1308
+ static size_t ZSTD_compressBlock_opt2(
1309
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1310
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1311
+ {
1312
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1313
+ }
1213
1314
 
1214
1315
  size_t ZSTD_compressBlock_btopt(
1215
1316
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1216
1317
  const void* src, size_t srcSize)
1217
1318
  {
1218
1319
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1219
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1320
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1220
1321
  }
1221
1322
 
1222
1323
 
1223
- /* used in 2-pass strategy */
1224
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1225
- {
1226
- U32 s, sum=0;
1227
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1228
- for (s=0; s<lastEltIndex+1; s++) {
1229
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1230
- table[s]--;
1231
- sum += table[s];
1232
- }
1233
- return sum;
1234
- }
1235
1324
 
1236
- /* used in 2-pass strategy */
1237
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1238
- {
1239
- if (ZSTD_compressedLiterals(optPtr))
1240
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1241
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1242
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1243
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1244
- }
1245
1325
 
1246
1326
  /* ZSTD_initStats_ultra():
1247
1327
  * make a first compression pass, just to seed stats with more accurate starting values.
@@ -1263,7 +1343,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1263
1343
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1264
1344
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1265
1345
 
1266
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1346
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1267
1347
 
1268
1348
  /* invalidate first scan from history */
1269
1349
  ZSTD_resetSeqStore(seqStore);
@@ -1272,8 +1352,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1272
1352
  ms->window.lowLimit = ms->window.dictLimit;
1273
1353
  ms->nextToUpdate = ms->window.dictLimit;
1274
1354
 
1275
- /* re-inforce weight of collected statistics */
1276
- ZSTD_upscaleStats(&ms->opt);
1277
1355
  }
1278
1356
 
1279
1357
  size_t ZSTD_compressBlock_btultra(
@@ -1281,7 +1359,7 @@ size_t ZSTD_compressBlock_btultra(
1281
1359
  const void* src, size_t srcSize)
1282
1360
  {
1283
1361
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1284
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1362
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1285
1363
  }
1286
1364
 
1287
1365
  size_t ZSTD_compressBlock_btultra2(
@@ -1309,35 +1387,35 @@ size_t ZSTD_compressBlock_btultra2(
1309
1387
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1310
1388
  }
1311
1389
 
1312
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1390
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1313
1391
  }
1314
1392
 
1315
1393
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1316
1394
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1317
1395
  const void* src, size_t srcSize)
1318
1396
  {
1319
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1397
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1320
1398
  }
1321
1399
 
1322
1400
  size_t ZSTD_compressBlock_btultra_dictMatchState(
1323
1401
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1324
1402
  const void* src, size_t srcSize)
1325
1403
  {
1326
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1404
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1327
1405
  }
1328
1406
 
1329
1407
  size_t ZSTD_compressBlock_btopt_extDict(
1330
1408
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1331
1409
  const void* src, size_t srcSize)
1332
1410
  {
1333
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1411
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1334
1412
  }
1335
1413
 
1336
1414
  size_t ZSTD_compressBlock_btultra_extDict(
1337
1415
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1338
1416
  const void* src, size_t srcSize)
1339
1417
  {
1340
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1418
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1341
1419
  }
1342
1420
 
1343
1421
  /* note : no btultra2 variant for extDict nor dictMatchState,