zstd-ruby 1.5.0.0 → 1.5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/README.md +1 -1
  4. data/ext/zstdruby/extconf.rb +1 -0
  5. data/ext/zstdruby/libzstd/Makefile +50 -175
  6. data/ext/zstdruby/libzstd/README.md +7 -1
  7. data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
  8. data/ext/zstdruby/libzstd/common/compiler.h +89 -43
  9. data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
  10. data/ext/zstdruby/libzstd/common/error_private.h +79 -0
  11. data/ext/zstdruby/libzstd/common/fse.h +2 -1
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +24 -22
  14. data/ext/zstdruby/libzstd/common/mem.h +18 -0
  15. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  16. data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
  17. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  18. data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
  19. data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
  20. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  21. data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
  22. data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
  23. data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
  24. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
  25. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
  26. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
  27. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
  28. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
  29. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
  30. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
  31. data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
  32. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
  33. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
  34. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
  35. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
  36. data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
  37. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
  38. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
  39. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  40. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
  41. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
  42. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
  43. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
  44. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
  45. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  46. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
  47. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
  48. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
  49. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
  50. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
  51. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
  52. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
  53. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
  54. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
  55. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  56. data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
  57. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  58. data/ext/zstdruby/libzstd/zdict.h +4 -4
  59. data/ext/zstdruby/libzstd/zstd.h +179 -136
  60. data/ext/zstdruby/zstdruby.c +2 -2
  61. data/lib/zstd-ruby/version.rb +1 -1
  62. metadata +8 -3
@@ -159,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
159
159
  size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
160
160
  size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
161
161
  + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
162
- return params.enableLdm ? totalSize : 0;
162
+ return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
163
163
  }
164
164
 
165
165
  size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
166
166
  {
167
- return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
167
+ return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
168
168
  }
169
169
 
170
170
  /** ZSTD_ldm_getBucket() :
@@ -478,7 +478,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
478
478
  */
479
479
  if (anchor > ip + hashed) {
480
480
  ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
481
- /* Continue the outter loop at anchor (ip + hashed == anchor). */
481
+ /* Continue the outer loop at anchor (ip + hashed == anchor). */
482
482
  ip = anchor - hashed;
483
483
  break;
484
484
  }
@@ -657,7 +657,7 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
657
657
 
658
658
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
659
659
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
660
- ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
660
+ ZSTD_paramSwitch_e useRowMatchFinder,
661
661
  void const* src, size_t srcSize)
662
662
  {
663
663
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -66,7 +66,7 @@ size_t ZSTD_ldm_generateSequences(
66
66
  */
67
67
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
68
68
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
69
- ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
69
+ ZSTD_paramSwitch_e useRowMatchFinder,
70
70
  void const* src, size_t srcSize);
71
71
 
72
72
  /**
@@ -11,7 +11,10 @@
11
11
  #ifndef ZSTD_LDM_GEARTAB_H
12
12
  #define ZSTD_LDM_GEARTAB_H
13
13
 
14
- static U64 ZSTD_ldm_gearTab[256] = {
14
+ #include "../common/compiler.h" /* UNUSED_ATTR */
15
+ #include "../common/mem.h" /* U64 */
16
+
17
+ static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
15
18
  0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
16
19
  0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
17
20
  0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
- #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18
17
  #define ZSTD_MAX_PRICE (1<<30)
19
18
 
20
19
  #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
@@ -24,11 +23,11 @@
24
23
  * Price functions for optimal parser
25
24
  ***************************************/
26
25
 
27
- #if 0 /* approximation at bit level */
26
+ #if 0 /* approximation at bit level (for tests) */
28
27
  # define BITCOST_ACCURACY 0
29
28
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30
- # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31
- #elif 0 /* fractional bit accuracy */
29
+ # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30
+ #elif 0 /* fractional bit accuracy (for tests) */
32
31
  # define BITCOST_ACCURACY 8
33
32
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34
33
  # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
66
65
 
67
66
  static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68
67
  {
69
- return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
68
+ return optPtr->literalCompressionMode != ZSTD_ps_disable;
70
69
  }
71
70
 
72
71
  static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
79
78
  }
80
79
 
81
80
 
82
- /* ZSTD_downscaleStat() :
83
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84
- * return the resulting sum of elements */
85
- static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
81
+ static U32 sum_u32(const unsigned table[], size_t nbElts)
82
+ {
83
+ size_t n;
84
+ U32 total = 0;
85
+ for (n=0; n<nbElts; n++) {
86
+ total += table[n];
87
+ }
88
+ return total;
89
+ }
90
+
91
+ static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
86
92
  {
87
93
  U32 s, sum=0;
88
- DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
89
- assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
94
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95
+ assert(shift < 30);
90
96
  for (s=0; s<lastEltIndex+1; s++) {
91
- table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
97
+ table[s] = 1 + (table[s] >> shift);
92
98
  sum += table[s];
93
99
  }
94
100
  return sum;
95
101
  }
96
102
 
103
+ /* ZSTD_scaleStats() :
104
+ * reduce all elements in table is sum too large
105
+ * return the resulting sum of elements */
106
+ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
+ {
108
+ U32 const prevsum = sum_u32(table, lastEltIndex+1);
109
+ U32 const factor = prevsum >> logTarget;
110
+ DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
+ assert(logTarget < 30);
112
+ if (factor <= 1) return prevsum;
113
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114
+ }
115
+
97
116
  /* ZSTD_rescaleFreqs() :
98
117
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99
118
  * take hints from dictionary if there is one
100
- * or init from zero, using src for literals stats, or flat 1 for match symbols
119
+ * and init from zero if there is none,
120
+ * using src for literals stats, and baseline stats for sequence symbols
101
121
  * otherwise downscale existing stats, to be used as seed for next block.
102
122
  */
103
123
  static void
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
126
146
  optPtr->litSum = 0;
127
147
  for (lit=0; lit<=MaxLit; lit++) {
128
148
  U32 const scaleLog = 11; /* scale to 2K */
129
- U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
149
+ U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
130
150
  assert(bitCost <= scaleLog);
131
151
  optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
132
152
  optPtr->litSum += optPtr->litFreq[lit];
@@ -174,14 +194,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
174
194
  if (compressedLiterals) {
175
195
  unsigned lit = MaxLit;
176
196
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
177
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
197
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
178
198
  }
179
199
 
180
- { unsigned ll;
181
- for (ll=0; ll<=MaxLL; ll++)
182
- optPtr->litLengthFreq[ll] = 1;
200
+ { unsigned const baseLLfreqs[MaxLL+1] = {
201
+ 4, 2, 1, 1, 1, 1, 1, 1,
202
+ 1, 1, 1, 1, 1, 1, 1, 1,
203
+ 1, 1, 1, 1, 1, 1, 1, 1,
204
+ 1, 1, 1, 1, 1, 1, 1, 1,
205
+ 1, 1, 1, 1
206
+ };
207
+ ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
183
208
  }
184
- optPtr->litLengthSum = MaxLL+1;
185
209
 
186
210
  { unsigned ml;
187
211
  for (ml=0; ml<=MaxML; ml++)
@@ -189,21 +213,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
189
213
  }
190
214
  optPtr->matchLengthSum = MaxML+1;
191
215
 
192
- { unsigned of;
193
- for (of=0; of<=MaxOff; of++)
194
- optPtr->offCodeFreq[of] = 1;
216
+ { unsigned const baseOFCfreqs[MaxOff+1] = {
217
+ 6, 2, 1, 1, 2, 3, 4, 4,
218
+ 4, 3, 2, 1, 1, 1, 1, 1,
219
+ 1, 1, 1, 1, 1, 1, 1, 1,
220
+ 1, 1, 1, 1, 1, 1, 1, 1
221
+ };
222
+ ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
195
223
  }
196
- optPtr->offCodeSum = MaxOff+1;
224
+
197
225
 
198
226
  }
199
227
 
200
228
  } else { /* new block : re-use previous statistics, scaled down */
201
229
 
202
230
  if (compressedLiterals)
203
- optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
204
- optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
205
- optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
206
- optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
231
+ optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
232
+ optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
233
+ optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
234
+ optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
207
235
  }
208
236
 
209
237
  ZSTD_setBasePrices(optPtr, optLevel);
@@ -338,7 +366,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
338
366
 
339
367
  /* Update hashTable3 up to ip (excluded)
340
368
  Assumption : always within prefix (i.e. not within extDict) */
341
- static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
369
+ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
342
370
  U32* nextToUpdate3,
343
371
  const BYTE* const ip)
344
372
  {
@@ -364,11 +392,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
364
392
  * Binary Tree search
365
393
  ***************************************/
366
394
  /** ZSTD_insertBt1() : add one or multiple positions to tree.
367
- * ip : assumed <= iend-8 .
395
+ * @param ip assumed <= iend-8 .
396
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
368
397
  * @return : nb of positions added */
369
398
  static U32 ZSTD_insertBt1(
370
- ZSTD_matchState_t* ms,
399
+ const ZSTD_matchState_t* ms,
371
400
  const BYTE* const ip, const BYTE* const iend,
401
+ U32 const target,
372
402
  U32 const mls, const int extDict)
373
403
  {
374
404
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -391,7 +421,10 @@ static U32 ZSTD_insertBt1(
391
421
  U32* smallerPtr = bt + 2*(curr&btMask);
392
422
  U32* largerPtr = smallerPtr + 1;
393
423
  U32 dummy32; /* to be nullified at the end */
394
- U32 const windowLow = ms->window.lowLimit;
424
+ /* windowLow is based on target because
425
+ * we only need positions that will be in the window at the end of the tree update.
426
+ */
427
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
395
428
  U32 matchEndIdx = curr+8+1;
396
429
  size_t bestLength = 8;
397
430
  U32 nbCompares = 1U << cParams->searchLog;
@@ -404,11 +437,12 @@ static U32 ZSTD_insertBt1(
404
437
 
405
438
  DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
406
439
 
440
+ assert(curr <= target);
407
441
  assert(ip <= iend-8); /* required for h calculation */
408
442
  hashTable[h] = curr; /* Update Hash Table */
409
443
 
410
444
  assert(windowLow > 0);
411
- while (nbCompares-- && (matchIndex >= windowLow)) {
445
+ for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
412
446
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
413
447
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
414
448
  assert(matchIndex < curr);
@@ -492,7 +526,7 @@ void ZSTD_updateTree_internal(
492
526
  idx, target, dictMode);
493
527
 
494
528
  while(idx < target) {
495
- U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
529
+ U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
496
530
  assert(idx < (U32)(idx + forward));
497
531
  idx += forward;
498
532
  }
@@ -635,11 +669,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
635
669
  return 1;
636
670
  } } }
637
671
  /* no dictMatchState lookup: dicts don't have a populated HC3 table */
638
- }
672
+ } /* if (mls == 3) */
639
673
 
640
674
  hashTable[h] = curr; /* Update Hash Table */
641
675
 
642
- while (nbCompares-- && (matchIndex >= matchLow)) {
676
+ for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
643
677
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
644
678
  const BYTE* match;
645
679
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
@@ -672,8 +706,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
672
706
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
673
707
  if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
674
708
  break; /* drop, to preserve bt consistency (miss a little bit of compression) */
675
- }
676
- }
709
+ } }
677
710
 
678
711
  if (match[matchLength] < ip[matchLength]) {
679
712
  /* match smaller than current */
@@ -692,12 +725,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
692
725
 
693
726
  *smallerPtr = *largerPtr = 0;
694
727
 
728
+ assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
695
729
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
696
730
  size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
697
731
  U32 dictMatchIndex = dms->hashTable[dmsH];
698
732
  const U32* const dmsBt = dms->chainTable;
699
733
  commonLengthSmaller = commonLengthLarger = 0;
700
- while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
734
+ for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
701
735
  const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
702
736
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
703
737
  const BYTE* match = dmsBase + dictMatchIndex;
@@ -718,8 +752,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
718
752
  if ( (matchLength > ZSTD_OPT_NUM)
719
753
  | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720
754
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
721
- }
722
- }
755
+ } }
723
756
 
724
757
  if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
725
758
  if (match[matchLength] < ip[matchLength]) {
@@ -729,39 +762,90 @@ U32 ZSTD_insertBtAndGetAllMatches (
729
762
  /* match is larger than current */
730
763
  commonLengthLarger = matchLength;
731
764
  dictMatchIndex = nextPtr[0];
732
- }
733
- }
734
- }
765
+ } } } /* if (dictMode == ZSTD_dictMatchState) */
735
766
 
736
767
  assert(matchEndIdx > curr+8);
737
768
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
738
769
  return mnum;
739
770
  }
740
771
 
741
-
742
- FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
743
- ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
744
- ZSTD_matchState_t* ms,
745
- U32* nextToUpdate3,
746
- const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
747
- const U32 rep[ZSTD_REP_NUM],
748
- U32 const ll0,
749
- U32 const lengthToBeat)
772
+ typedef U32 (*ZSTD_getAllMatchesFn)(
773
+ ZSTD_match_t*,
774
+ ZSTD_matchState_t*,
775
+ U32*,
776
+ const BYTE*,
777
+ const BYTE*,
778
+ const U32 rep[ZSTD_REP_NUM],
779
+ U32 const ll0,
780
+ U32 const lengthToBeat);
781
+
782
+ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
783
+ ZSTD_match_t* matches,
784
+ ZSTD_matchState_t* ms,
785
+ U32* nextToUpdate3,
786
+ const BYTE* ip,
787
+ const BYTE* const iHighLimit,
788
+ const U32 rep[ZSTD_REP_NUM],
789
+ U32 const ll0,
790
+ U32 const lengthToBeat,
791
+ const ZSTD_dictMode_e dictMode,
792
+ const U32 mls)
750
793
  {
751
- const ZSTD_compressionParameters* const cParams = &ms->cParams;
752
- U32 const matchLengthSearch = cParams->minMatch;
753
- DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754
- if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
755
- ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
756
- switch(matchLengthSearch)
757
- {
758
- case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
759
- default :
760
- case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
761
- case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
762
- case 7 :
763
- case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
794
+ assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
795
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
796
+ if (ip < ms->window.base + ms->nextToUpdate)
797
+ return 0; /* skipped area */
798
+ ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
799
+ return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
800
+ }
801
+
802
+ #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
803
+
804
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
805
+ static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
806
+ ZSTD_match_t* matches, \
807
+ ZSTD_matchState_t* ms, \
808
+ U32* nextToUpdate3, \
809
+ const BYTE* ip, \
810
+ const BYTE* const iHighLimit, \
811
+ const U32 rep[ZSTD_REP_NUM], \
812
+ U32 const ll0, \
813
+ U32 const lengthToBeat) \
814
+ { \
815
+ return ZSTD_btGetAllMatches_internal( \
816
+ matches, ms, nextToUpdate3, ip, iHighLimit, \
817
+ rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
818
+ }
819
+
820
+ #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
821
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
822
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
823
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
824
+ GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
825
+
826
+ GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
827
+ GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
828
+ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
829
+
830
+ #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
831
+ { \
832
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
833
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
834
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
835
+ ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
764
836
  }
837
+
838
+ static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
839
+ {
840
+ ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
841
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
842
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
843
+ ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
844
+ };
845
+ U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
846
+ assert((U32)dictMode < 3);
847
+ assert(mls - 3 < 4);
848
+ return getAllMatchesFns[(int)dictMode][mls - 3];
765
849
  }
766
850
 
767
851
  /*************************
@@ -893,17 +977,17 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
893
977
  */
894
978
  U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
895
979
  ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
896
- }
980
+ }
897
981
  ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
898
982
  }
899
983
  ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
900
984
  }
901
985
 
986
+
902
987
  /*-*******************************
903
988
  * Optimal parser
904
989
  *********************************/
905
990
 
906
-
907
991
  static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
908
992
  {
909
993
  return sol.litlen + sol.mlen;
@@ -944,6 +1028,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
944
1028
  const BYTE* const prefixStart = base + ms->window.dictLimit;
945
1029
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
946
1030
 
1031
+ ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1032
+
947
1033
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
948
1034
  U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
949
1035
  U32 nextToUpdate3 = ms->nextToUpdate;
@@ -971,7 +1057,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
971
1057
  /* find first match */
972
1058
  { U32 const litlen = (U32)(ip - anchor);
973
1059
  U32 const ll0 = !litlen;
974
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
1060
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
975
1061
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
976
1062
  (U32)(ip-istart), (U32)(iend - ip));
977
1063
  if (!nbMatches) { ip++; continue; }
@@ -985,7 +1071,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
985
1071
  * in every price. We include the literal length to avoid negative
986
1072
  * prices when we subtract the previous literal length.
987
1073
  */
988
- opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1074
+ opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
989
1075
 
990
1076
  /* large match -> immediate encoding */
991
1077
  { U32 const maxML = matches[nbMatches-1].len;
@@ -1005,7 +1091,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1005
1091
  } }
1006
1092
 
1007
1093
  /* set prices for first matches starting position == 0 */
1008
- { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1094
+ assert(opt[0].price >= 0);
1095
+ { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1009
1096
  U32 pos;
1010
1097
  U32 matchNb;
1011
1098
  for (pos = 1; pos < minMatch; pos++) {
@@ -1022,7 +1109,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1022
1109
  opt[pos].mlen = pos;
1023
1110
  opt[pos].off = offset;
1024
1111
  opt[pos].litlen = litlen;
1025
- opt[pos].price = sequencePrice;
1112
+ opt[pos].price = (int)sequencePrice;
1026
1113
  } }
1027
1114
  last_pos = pos-1;
1028
1115
  }
@@ -1037,9 +1124,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1037
1124
  /* Fix current position with one literal if cheaper */
1038
1125
  { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1039
1126
  int const price = opt[cur-1].price
1040
- + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1041
- + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1042
- - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1127
+ + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1128
+ + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1129
+ - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1043
1130
  assert(price < 1000000000); /* overflow check */
1044
1131
  if (price <= opt[cur].price) {
1045
1132
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -1082,11 +1169,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1082
1169
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1083
1170
  }
1084
1171
 
1172
+ assert(opt[cur].price >= 0);
1085
1173
  { U32 const ll0 = (opt[cur].mlen != 0);
1086
1174
  U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1087
- U32 const previousPrice = opt[cur].price;
1175
+ U32 const previousPrice = (U32)opt[cur].price;
1088
1176
  U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1089
- U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
1177
+ U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1090
1178
  U32 matchNb;
1091
1179
 
1092
1180
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
@@ -1124,7 +1212,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1124
1212
 
1125
1213
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1126
1214
  U32 const pos = cur + mlen;
1127
- int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1215
+ int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1128
1216
 
1129
1217
  if ((pos > last_pos) || (price < opt[pos].price)) {
1130
1218
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1210,38 +1298,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1210
1298
  return (size_t)(iend - anchor);
1211
1299
  }
1212
1300
 
1301
+ static size_t ZSTD_compressBlock_opt0(
1302
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1303
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1304
+ {
1305
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1306
+ }
1307
+
1308
+ static size_t ZSTD_compressBlock_opt2(
1309
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1310
+ const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1311
+ {
1312
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1313
+ }
1213
1314
 
1214
1315
  size_t ZSTD_compressBlock_btopt(
1215
1316
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1216
1317
  const void* src, size_t srcSize)
1217
1318
  {
1218
1319
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1219
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1320
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1220
1321
  }
1221
1322
 
1222
1323
 
1223
- /* used in 2-pass strategy */
1224
- static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
1225
- {
1226
- U32 s, sum=0;
1227
- assert(ZSTD_FREQ_DIV+bonus >= 0);
1228
- for (s=0; s<lastEltIndex+1; s++) {
1229
- table[s] <<= ZSTD_FREQ_DIV+bonus;
1230
- table[s]--;
1231
- sum += table[s];
1232
- }
1233
- return sum;
1234
- }
1235
1324
 
1236
- /* used in 2-pass strategy */
1237
- MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1238
- {
1239
- if (ZSTD_compressedLiterals(optPtr))
1240
- optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1241
- optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
1242
- optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
1243
- optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
1244
- }
1245
1325
 
1246
1326
  /* ZSTD_initStats_ultra():
1247
1327
  * make a first compression pass, just to seed stats with more accurate starting values.
@@ -1263,7 +1343,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1263
1343
  assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1264
1344
  assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1265
1345
 
1266
- ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1346
+ ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1267
1347
 
1268
1348
  /* invalidate first scan from history */
1269
1349
  ZSTD_resetSeqStore(seqStore);
@@ -1272,8 +1352,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1272
1352
  ms->window.lowLimit = ms->window.dictLimit;
1273
1353
  ms->nextToUpdate = ms->window.dictLimit;
1274
1354
 
1275
- /* re-inforce weight of collected statistics */
1276
- ZSTD_upscaleStats(&ms->opt);
1277
1355
  }
1278
1356
 
1279
1357
  size_t ZSTD_compressBlock_btultra(
@@ -1281,7 +1359,7 @@ size_t ZSTD_compressBlock_btultra(
1281
1359
  const void* src, size_t srcSize)
1282
1360
  {
1283
1361
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1284
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1362
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1285
1363
  }
1286
1364
 
1287
1365
  size_t ZSTD_compressBlock_btultra2(
@@ -1309,35 +1387,35 @@ size_t ZSTD_compressBlock_btultra2(
1309
1387
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1310
1388
  }
1311
1389
 
1312
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1390
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1313
1391
  }
1314
1392
 
1315
1393
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1316
1394
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1317
1395
  const void* src, size_t srcSize)
1318
1396
  {
1319
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1397
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1320
1398
  }
1321
1399
 
1322
1400
  size_t ZSTD_compressBlock_btultra_dictMatchState(
1323
1401
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1324
1402
  const void* src, size_t srcSize)
1325
1403
  {
1326
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
1404
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1327
1405
  }
1328
1406
 
1329
1407
  size_t ZSTD_compressBlock_btopt_extDict(
1330
1408
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1331
1409
  const void* src, size_t srcSize)
1332
1410
  {
1333
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
1411
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1334
1412
  }
1335
1413
 
1336
1414
  size_t ZSTD_compressBlock_btultra_extDict(
1337
1415
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1338
1416
  const void* src, size_t srcSize)
1339
1417
  {
1340
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
1418
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1341
1419
  }
1342
1420
 
1343
1421
  /* note : no btultra2 variant for extDict nor dictMatchState,