zstd-ruby 1.3.4.0 → 1.3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +56 -10
  4. data/ext/zstdruby/libzstd/README.md +4 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
  6. data/ext/zstdruby/libzstd/common/compiler.h +3 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -2
  8. data/ext/zstdruby/libzstd/common/debug.c +44 -0
  9. data/ext/zstdruby/libzstd/common/debug.h +123 -0
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
  11. data/ext/zstdruby/libzstd/common/fse.h +45 -41
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +34 -27
  14. data/ext/zstdruby/libzstd/common/pool.c +89 -32
  15. data/ext/zstdruby/libzstd/common/pool.h +29 -19
  16. data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
  19. data/ext/zstdruby/libzstd/compress/hist.c +195 -0
  20. data/ext/zstdruby/libzstd/compress/hist.h +92 -0
  21. data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
  22. data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
  23. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
  24. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
  25. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
  26. data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
  27. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
  28. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
  29. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
  30. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
  31. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
  32. data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
  34. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
  38. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  39. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
  41. data/ext/zstdruby/libzstd/zstd.h +137 -69
  42. data/lib/zstd-ruby/version.rb +1 -1
  43. metadata +7 -3
@@ -36,6 +36,19 @@ size_t ZSTD_compressBlock_greedy(
36
36
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
37
37
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
38
38
 
39
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
40
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
41
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
42
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
43
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
44
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
45
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
46
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
47
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
48
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
49
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
50
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
51
+
39
52
  size_t ZSTD_compressBlock_greedy_extDict(
40
53
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
41
54
  ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include "zstd_ldm.h"
11
11
 
12
+ #include "debug.h"
12
13
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13
14
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14
15
 
@@ -20,7 +21,7 @@
20
21
  void ZSTD_ldm_adjustParameters(ldmParams_t* params,
21
22
  ZSTD_compressionParameters const* cParams)
22
23
  {
23
- U32 const windowLog = cParams->windowLog;
24
+ params->windowLog = cParams->windowLog;
24
25
  ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
25
26
  DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
26
27
  if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
@@ -33,12 +34,13 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
33
34
  params->minMatchLength = minMatch;
34
35
  }
35
36
  if (params->hashLog == 0) {
36
- params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
37
+ params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
37
38
  assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38
39
  }
39
40
  if (params->hashEveryLog == 0) {
40
- params->hashEveryLog =
41
- windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
41
+ params->hashEveryLog = params->windowLog < params->hashLog
42
+ ? 0
43
+ : params->windowLog - params->hashLog;
42
44
  }
43
45
  params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
44
46
  }
@@ -224,13 +226,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
224
226
  switch(cParams->strategy)
225
227
  {
226
228
  case ZSTD_fast:
227
- ZSTD_fillHashTable(ms, cParams, iend);
228
- ms->nextToUpdate = (U32)(iend - ms->window.base);
229
+ ZSTD_fillHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
229
230
  break;
230
231
 
231
232
  case ZSTD_dfast:
232
- ZSTD_fillDoubleHashTable(ms, cParams, iend);
233
- ms->nextToUpdate = (U32)(iend - ms->window.base);
233
+ ZSTD_fillDoubleHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
234
234
  break;
235
235
 
236
236
  case ZSTD_greedy:
@@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences(
508
508
  * * Try invalidation after the sequence generation and test the
509
509
  * the offset against maxDist directly.
510
510
  */
511
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
511
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
512
512
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
513
513
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
514
514
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -591,19 +591,18 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
591
591
 
592
592
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
593
593
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
594
- ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
595
- int const extDict)
594
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
596
595
  {
597
596
  unsigned const minMatch = cParams->searchLength;
598
597
  ZSTD_blockCompressor const blockCompressor =
599
- ZSTD_selectBlockCompressor(cParams->strategy, extDict);
600
- BYTE const* const base = ms->window.base;
598
+ ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
601
599
  /* Input bounds */
602
600
  BYTE const* const istart = (BYTE const*)src;
603
601
  BYTE const* const iend = istart + srcSize;
604
602
  /* Input positions */
605
603
  BYTE const* ip = istart;
606
604
 
605
+ DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
607
606
  assert(rawSeqStore->pos <= rawSeqStore->size);
608
607
  assert(rawSeqStore->size <= rawSeqStore->capacity);
609
608
  /* Loop through each sequence and apply the block compressor to the lits */
@@ -623,12 +622,12 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
623
622
  ZSTD_ldm_limitTableUpdate(ms, ip);
624
623
  ZSTD_ldm_fillFastTables(ms, cParams, ip);
625
624
  /* Run the block compressor */
625
+ DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
626
626
  {
627
627
  size_t const newLitLength =
628
628
  blockCompressor(ms, seqStore, rep, cParams, ip,
629
629
  sequence.litLength);
630
630
  ip += sequence.litLength;
631
- ms->nextToUpdate = (U32)(ip - base);
632
631
  /* Update the repcodes */
633
632
  for (i = ZSTD_REP_NUM - 1; i > 0; i--)
634
633
  rep[i] = rep[i-1];
@@ -644,10 +643,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
644
643
  ZSTD_ldm_limitTableUpdate(ms, ip);
645
644
  ZSTD_ldm_fillFastTables(ms, cParams, ip);
646
645
  /* Compress the last literals */
647
- {
648
- size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
649
- ip, iend - ip);
650
- ms->nextToUpdate = (U32)(iend - base);
651
- return lastLiterals;
652
- }
646
+ return blockCompressor(ms, seqStore, rep, cParams,
647
+ ip, iend - ip);
653
648
  }
@@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences(
62
62
  size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
63
63
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
64
64
  ZSTD_compressionParameters const* cParams,
65
- void const* src, size_t srcSize,
66
- int const extDict);
65
+ void const* src, size_t srcSize);
67
66
 
68
67
  /**
69
68
  * ZSTD_ldm_skipSequences():
@@ -9,10 +9,11 @@
9
9
  */
10
10
 
11
11
  #include "zstd_compress_internal.h"
12
+ #include "hist.h"
12
13
  #include "zstd_opt.h"
13
14
 
14
15
 
15
- #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
16
+ #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
16
17
  #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17
18
  #define ZSTD_MAX_PRICE (1<<30)
18
19
 
@@ -20,128 +21,210 @@
20
21
  /*-*************************************
21
22
  * Price functions for optimal parser
22
23
  ***************************************/
23
- static void ZSTD_setLog2Prices(optState_t* optPtr)
24
+
25
+ #if 0 /* approximation at bit level */
26
+ # define BITCOST_ACCURACY 0
27
+ # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
28
+ # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
29
+ #elif 0 /* fractional bit accuracy */
30
+ # define BITCOST_ACCURACY 8
31
+ # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32
+ # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33
+ #else /* opt==approx, ultra==accurate */
34
+ # define BITCOST_ACCURACY 8
35
+ # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36
+ # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37
+ #endif
38
+
39
+ MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
40
+ {
41
+ return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
42
+ }
43
+
44
+ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
24
45
  {
25
- optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
26
- optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
27
- optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
28
- optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
46
+ U32 const stat = rawStat + 1;
47
+ U32 const hb = ZSTD_highbit32(stat);
48
+ U32 const BWeight = hb * BITCOST_MULTIPLIER;
49
+ U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
50
+ U32 const weight = BWeight + FWeight;
51
+ assert(hb + BITCOST_ACCURACY < 31);
52
+ return weight;
29
53
  }
30
54
 
55
+ /* debugging function, @return price in bytes */
56
+ MEM_STATIC double ZSTD_fCost(U32 price)
57
+ {
58
+ return (double)price / (BITCOST_MULTIPLIER*8);
59
+ }
60
+
61
+ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
62
+ {
63
+ optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
64
+ optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
65
+ optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
66
+ optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
67
+ }
68
+
69
+
70
+ static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
71
+ {
72
+ U32 s, sum=0;
73
+ assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
74
+ for (s=0; s<=lastEltIndex; s++) {
75
+ table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
76
+ sum += table[s];
77
+ }
78
+ return sum;
79
+ }
31
80
 
32
81
  static void ZSTD_rescaleFreqs(optState_t* const optPtr,
33
- const BYTE* const src, size_t const srcSize)
82
+ const BYTE* const src, size_t const srcSize,
83
+ int optLevel)
34
84
  {
35
- optPtr->staticPrices = 0;
36
-
37
- if (optPtr->litLengthSum == 0) { /* first init */
38
- unsigned u;
39
- if (srcSize <= 1024) optPtr->staticPrices = 1;
40
-
41
- assert(optPtr->litFreq!=NULL);
42
- for (u=0; u<=MaxLit; u++)
43
- optPtr->litFreq[u] = 0;
44
- for (u=0; u<srcSize; u++)
45
- optPtr->litFreq[src[u]]++;
46
- optPtr->litSum = 0;
47
- for (u=0; u<=MaxLit; u++) {
48
- optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
49
- optPtr->litSum += optPtr->litFreq[u];
50
- }
85
+ optPtr->priceType = zop_dynamic;
86
+
87
+ if (optPtr->litLengthSum == 0) { /* first block : init */
88
+ if (srcSize <= 1024) /* heuristic */
89
+ optPtr->priceType = zop_predef;
90
+
91
+ assert(optPtr->symbolCosts != NULL);
92
+ if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
93
+ optPtr->priceType = zop_dynamic;
94
+
95
+ assert(optPtr->litFreq != NULL);
96
+ optPtr->litSum = 0;
97
+ { unsigned lit;
98
+ for (lit=0; lit<=MaxLit; lit++) {
99
+ U32 const scaleLog = 11; /* scale to 2K */
100
+ U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
101
+ assert(bitCost <= scaleLog);
102
+ optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
103
+ optPtr->litSum += optPtr->litFreq[lit];
104
+ } }
105
+
106
+ { unsigned ll;
107
+ FSE_CState_t llstate;
108
+ FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
109
+ optPtr->litLengthSum = 0;
110
+ for (ll=0; ll<=MaxLL; ll++) {
111
+ U32 const scaleLog = 10; /* scale to 1K */
112
+ U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
113
+ assert(bitCost < scaleLog);
114
+ optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
115
+ optPtr->litLengthSum += optPtr->litLengthFreq[ll];
116
+ } }
117
+
118
+ { unsigned ml;
119
+ FSE_CState_t mlstate;
120
+ FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
121
+ optPtr->matchLengthSum = 0;
122
+ for (ml=0; ml<=MaxML; ml++) {
123
+ U32 const scaleLog = 10;
124
+ U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
125
+ assert(bitCost < scaleLog);
126
+ optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
127
+ optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
128
+ } }
129
+
130
+ { unsigned of;
131
+ FSE_CState_t ofstate;
132
+ FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
133
+ optPtr->offCodeSum = 0;
134
+ for (of=0; of<=MaxOff; of++) {
135
+ U32 const scaleLog = 10;
136
+ U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
137
+ assert(bitCost < scaleLog);
138
+ optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
139
+ optPtr->offCodeSum += optPtr->offCodeFreq[of];
140
+ } }
141
+
142
+ } else { /* not a dictionary */
143
+
144
+ assert(optPtr->litFreq != NULL);
145
+ { unsigned lit = MaxLit;
146
+ HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
147
+ }
148
+ optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
149
+
150
+ { unsigned ll;
151
+ for (ll=0; ll<=MaxLL; ll++)
152
+ optPtr->litLengthFreq[ll] = 1;
153
+ }
154
+ optPtr->litLengthSum = MaxLL+1;
155
+
156
+ { unsigned ml;
157
+ for (ml=0; ml<=MaxML; ml++)
158
+ optPtr->matchLengthFreq[ml] = 1;
159
+ }
160
+ optPtr->matchLengthSum = MaxML+1;
161
+
162
+ { unsigned of;
163
+ for (of=0; of<=MaxOff; of++)
164
+ optPtr->offCodeFreq[of] = 1;
165
+ }
166
+ optPtr->offCodeSum = MaxOff+1;
51
167
 
52
- for (u=0; u<=MaxLL; u++)
53
- optPtr->litLengthFreq[u] = 1;
54
- optPtr->litLengthSum = MaxLL+1;
55
- for (u=0; u<=MaxML; u++)
56
- optPtr->matchLengthFreq[u] = 1;
57
- optPtr->matchLengthSum = MaxML+1;
58
- for (u=0; u<=MaxOff; u++)
59
- optPtr->offCodeFreq[u] = 1;
60
- optPtr->offCodeSum = (MaxOff+1);
61
-
62
- } else {
63
- unsigned u;
64
-
65
- optPtr->litSum = 0;
66
- for (u=0; u<=MaxLit; u++) {
67
- optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
68
- optPtr->litSum += optPtr->litFreq[u];
69
- }
70
- optPtr->litLengthSum = 0;
71
- for (u=0; u<=MaxLL; u++) {
72
- optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
73
- optPtr->litLengthSum += optPtr->litLengthFreq[u];
74
- }
75
- optPtr->matchLengthSum = 0;
76
- for (u=0; u<=MaxML; u++) {
77
- optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
78
- optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
79
- }
80
- optPtr->offCodeSum = 0;
81
- for (u=0; u<=MaxOff; u++) {
82
- optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
83
- optPtr->offCodeSum += optPtr->offCodeFreq[u];
84
168
  }
169
+
170
+ } else { /* new block : re-use previous statistics, scaled down */
171
+
172
+ optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
173
+ optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
174
+ optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
175
+ optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
85
176
  }
86
177
 
87
- ZSTD_setLog2Prices(optPtr);
178
+ ZSTD_setBasePrices(optPtr, optLevel);
88
179
  }
89
180
 
90
-
91
181
  /* ZSTD_rawLiteralsCost() :
92
- * cost of literals (only) in given segment (which length can be null)
93
- * does not include cost of literalLength symbol */
182
+ * price of literals (only) in specified segment (which length can be 0).
183
+ * does not include price of literalLength symbol */
94
184
  static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
95
- const optState_t* const optPtr)
185
+ const optState_t* const optPtr,
186
+ int optLevel)
96
187
  {
97
- if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
98
188
  if (litLength == 0) return 0;
99
-
100
- /* literals */
101
- { U32 u;
102
- U32 cost = litLength * optPtr->log2litSum;
103
- for (u=0; u < litLength; u++)
104
- cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
105
- return cost;
189
+ if (optPtr->priceType == zop_predef)
190
+ return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
191
+
192
+ /* dynamic statistics */
193
+ { U32 price = litLength * optPtr->litSumBasePrice;
194
+ U32 u;
195
+ for (u=0; u < litLength; u++) {
196
+ assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
197
+ price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
198
+ }
199
+ return price;
106
200
  }
107
201
  }
108
202
 
109
203
  /* ZSTD_litLengthPrice() :
110
204
  * cost of literalLength symbol */
111
- static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
205
+ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
112
206
  {
113
- if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
207
+ if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
114
208
 
115
- /* literal Length */
209
+ /* dynamic statistics */
116
210
  { U32 const llCode = ZSTD_LLcode(litLength);
117
- U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
118
- return price;
211
+ return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
119
212
  }
120
213
  }
121
214
 
122
- /* ZSTD_litLengthPrice() :
123
- * cost of the literal part of a sequence,
124
- * including literals themselves, and literalLength symbol */
125
- static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
126
- const optState_t* const optPtr)
127
- {
128
- return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
129
- + ZSTD_litLengthPrice(litLength, optPtr);
130
- }
131
-
132
215
  /* ZSTD_litLengthContribution() :
133
216
  * @return ( cost(litlength) - cost(0) )
134
217
  * this value can then be added to rawLiteralsCost()
135
218
  * to provide a cost which is directly comparable to a match ending at same position */
136
- static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
219
+ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
137
220
  {
138
- if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
221
+ if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
139
222
 
140
- /* literal Length */
223
+ /* dynamic statistics */
141
224
  { U32 const llCode = ZSTD_LLcode(litLength);
142
- int const contribution = LL_bits[llCode]
143
- + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
144
- - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
225
+ int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
226
+ + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
227
+ - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
145
228
  #if 1
146
229
  return contribution;
147
230
  #else
@@ -155,10 +238,11 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con
155
238
  * which can be compared to the ending cost of a match
156
239
  * should a new match start at this position */
157
240
  static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
158
- const optState_t* const optPtr)
241
+ const optState_t* const optPtr,
242
+ int optLevel)
159
243
  {
160
- int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
161
- + ZSTD_litLengthContribution(litLength, optPtr);
244
+ int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
245
+ + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
162
246
  return contribution;
163
247
  }
164
248
 
@@ -166,31 +250,38 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
166
250
  * Provides the cost of the match part (offset + matchLength) of a sequence
167
251
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
168
252
  * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
169
- FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
170
- U32 const offset, U32 const matchLength,
171
- const optState_t* const optPtr,
172
- int const optLevel)
253
+ FORCE_INLINE_TEMPLATE U32
254
+ ZSTD_getMatchPrice(U32 const offset,
255
+ U32 const matchLength,
256
+ const optState_t* const optPtr,
257
+ int const optLevel)
173
258
  {
174
259
  U32 price;
175
260
  U32 const offCode = ZSTD_highbit32(offset+1);
176
261
  U32 const mlBase = matchLength - MINMATCH;
177
262
  assert(matchLength >= MINMATCH);
178
263
 
179
- if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
180
- return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
264
+ if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
265
+ return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
181
266
 
182
- price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
183
- if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
267
+ /* dynamic statistics */
268
+ price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
269
+ if ((optLevel<2) /*static*/ && offCode >= 20)
270
+ price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
184
271
 
185
272
  /* match Length */
186
273
  { U32 const mlCode = ZSTD_MLcode(mlBase);
187
- price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
274
+ price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
188
275
  }
189
276
 
277
+ price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
278
+
190
279
  DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
191
280
  return price;
192
281
  }
193
282
 
283
+ /* ZSTD_updateStats() :
284
+ * assumption : literals + litLengtn <= iend */
194
285
  static void ZSTD_updateStats(optState_t* const optPtr,
195
286
  U32 litLength, const BYTE* literals,
196
287
  U32 offsetCode, U32 matchLength)
@@ -271,7 +362,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
271
362
  static U32 ZSTD_insertBt1(
272
363
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
273
364
  const BYTE* const ip, const BYTE* const iend,
274
- U32 const mls, U32 const extDict)
365
+ U32 const mls, const int extDict)
275
366
  {
276
367
  U32* const hashTable = ms->hashTable;
277
368
  U32 const hashLog = cParams->hashLog;
@@ -293,6 +384,7 @@ static U32 ZSTD_insertBt1(
293
384
  U32* largerPtr = smallerPtr + 1;
294
385
  U32 dummy32; /* to be nullified at the end */
295
386
  U32 const windowLow = ms->window.lowLimit;
387
+ U32 const matchLow = windowLow ? windowLow : 1;
296
388
  U32 matchEndIdx = current+8+1;
297
389
  size_t bestLength = 8;
298
390
  U32 nbCompares = 1U << cParams->searchLog;
@@ -308,7 +400,7 @@ static U32 ZSTD_insertBt1(
308
400
  assert(ip <= iend-8); /* required for h calculation */
309
401
  hashTable[h] = current; /* Update Hash Table */
310
402
 
311
- while (nbCompares-- && (matchIndex > windowLow)) {
403
+ while (nbCompares-- && (matchIndex >= matchLow)) {
312
404
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
313
405
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
314
406
  assert(matchIndex < current);
@@ -334,8 +426,8 @@ static U32 ZSTD_insertBt1(
334
426
  }
335
427
  #endif
336
428
 
337
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
338
- assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
429
+ if (!extDict || (matchIndex+matchLength >= dictLimit)) {
430
+ assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
339
431
  match = base + matchIndex;
340
432
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
341
433
  } else {
@@ -381,16 +473,16 @@ FORCE_INLINE_TEMPLATE
381
473
  void ZSTD_updateTree_internal(
382
474
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
383
475
  const BYTE* const ip, const BYTE* const iend,
384
- const U32 mls, const U32 extDict)
476
+ const U32 mls, const ZSTD_dictMode_e dictMode)
385
477
  {
386
478
  const BYTE* const base = ms->window.base;
387
479
  U32 const target = (U32)(ip - base);
388
480
  U32 idx = ms->nextToUpdate;
389
- DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
390
- idx, target, extDict);
481
+ DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
482
+ idx, target, dictMode);
391
483
 
392
484
  while(idx < target)
393
- idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
485
+ idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, dictMode == ZSTD_extDict);
394
486
  ms->nextToUpdate = target;
395
487
  }
396
488
 
@@ -398,13 +490,13 @@ void ZSTD_updateTree(
398
490
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
399
491
  const BYTE* ip, const BYTE* iend)
400
492
  {
401
- ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
493
+ ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, ZSTD_noDict);
402
494
  }
403
495
 
404
496
  FORCE_INLINE_TEMPLATE
405
497
  U32 ZSTD_insertBtAndGetAllMatches (
406
498
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
407
- const BYTE* const ip, const BYTE* const iLimit, int const extDict,
499
+ const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
408
500
  U32 rep[ZSTD_REP_NUM], U32 const ll0,
409
501
  ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
410
502
  {
@@ -426,6 +518,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
426
518
  const BYTE* const prefixStart = base + dictLimit;
427
519
  U32 const btLow = btMask >= current ? 0 : current - btMask;
428
520
  U32 const windowLow = ms->window.lowLimit;
521
+ U32 const matchLow = windowLow ? windowLow : 1;
429
522
  U32* smallerPtr = bt + 2*(current&btMask);
430
523
  U32* largerPtr = bt + 2*(current&btMask) + 1;
431
524
  U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
@@ -433,8 +526,16 @@ U32 ZSTD_insertBtAndGetAllMatches (
433
526
  U32 mnum = 0;
434
527
  U32 nbCompares = 1U << cParams->searchLog;
435
528
 
529
+ const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
530
+ const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
531
+ const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
532
+ U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
533
+ U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
534
+ U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
535
+ U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && btMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - btMask : dmsLowLimit;
536
+
436
537
  size_t bestLength = lengthToBeat-1;
437
- DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
538
+ DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
438
539
 
439
540
  /* check repCode */
440
541
  { U32 const lastR = ZSTD_REP_NUM + ll0;
@@ -449,18 +550,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
449
550
  repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
450
551
  }
451
552
  } else { /* repIndex < dictLimit || repIndex >= current */
452
- const BYTE* const repMatch = dictBase + repIndex;
553
+ const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
554
+ dmsBase + repIndex - dmsIndexDelta :
555
+ dictBase + repIndex;
453
556
  assert(current >= windowLow);
454
- if ( extDict /* this case only valid in extDict mode */
557
+ if ( dictMode == ZSTD_extDict
455
558
  && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
456
559
  & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
457
560
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
458
561
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
562
+ }
563
+ if (dictMode == ZSTD_dictMatchState
564
+ && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
565
+ & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
566
+ && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
567
+ repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
459
568
  } }
460
569
  /* save longer solution */
461
570
  if (repLen > bestLength) {
462
- DEBUGLOG(8, "found rep-match %u of length %u",
463
- repCode - ll0, (U32)repLen);
571
+ DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
572
+ repCode, ll0, repOffset, repLen);
464
573
  bestLength = repLen;
465
574
  matches[mnum].off = repCode - ll0;
466
575
  matches[mnum].len = (U32)repLen;
@@ -473,10 +582,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
473
582
  /* HC3 match finder */
474
583
  if ((mls == 3) /*static*/ && (bestLength < mls)) {
475
584
  U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
476
- if ((matchIndex3 > windowLow)
585
+ if ((matchIndex3 >= matchLow)
477
586
  & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
478
587
  size_t mlen;
479
- if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
588
+ if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
480
589
  const BYTE* const match = base + matchIndex3;
481
590
  mlen = ZSTD_count(ip, match, iLimit);
482
591
  } else {
@@ -498,17 +607,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
498
607
  (ip+mlen == iLimit) ) { /* best possible length */
499
608
  ms->nextToUpdate = current+1; /* skip insertion */
500
609
  return 1;
501
- } } } }
610
+ }
611
+ }
612
+ }
613
+ /* no dictMatchState lookup: dicts don't have a populated HC3 table */
614
+ }
502
615
 
503
616
  hashTable[h] = current; /* Update Hash Table */
504
617
 
505
- while (nbCompares-- && (matchIndex > windowLow)) {
618
+ while (nbCompares-- && (matchIndex >= matchLow)) {
506
619
  U32* const nextPtr = bt + 2*(matchIndex & btMask);
507
620
  size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
508
621
  const BYTE* match;
509
622
  assert(current > matchIndex);
510
623
 
511
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
624
+ if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
512
625
  assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
513
626
  match = base + matchIndex;
514
627
  matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
@@ -520,8 +633,8 @@ U32 ZSTD_insertBtAndGetAllMatches (
520
633
  }
521
634
 
522
635
  if (matchLength > bestLength) {
523
- DEBUGLOG(8, "found match of length %u at distance %u",
524
- (U32)matchLength, current - matchIndex);
636
+ DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
637
+ (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
525
638
  assert(matchEndIdx > matchIndex);
526
639
  if (matchLength > matchEndIdx - matchIndex)
527
640
  matchEndIdx = matchIndex + (U32)matchLength;
@@ -529,9 +642,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
529
642
  matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
530
643
  matches[mnum].len = (U32)matchLength;
531
644
  mnum++;
532
- if (matchLength > ZSTD_OPT_NUM) break;
533
- if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
534
- break; /* drop, to preserve bt consistency (miss a little bit of compression) */
645
+ if ( (matchLength > ZSTD_OPT_NUM)
646
+ | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
647
+ if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
648
+ break; /* drop, to preserve bt consistency (miss a little bit of compression) */
535
649
  }
536
650
  }
537
651
 
@@ -552,6 +666,46 @@ U32 ZSTD_insertBtAndGetAllMatches (
552
666
 
553
667
  *smallerPtr = *largerPtr = 0;
554
668
 
669
+ if (dictMode == ZSTD_dictMatchState && nbCompares) {
670
+ U32 dictMatchIndex = dms->hashTable[h];
671
+ const U32* const dmsBt = dms->chainTable;
672
+ commonLengthSmaller = commonLengthLarger = 0;
673
+ while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
674
+ const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & btMask);
675
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
676
+ const BYTE* match = dmsBase + dictMatchIndex;
677
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
678
+ if (dictMatchIndex+matchLength >= dmsHighLimit)
679
+ match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
680
+
681
+ if (matchLength > bestLength) {
682
+ matchIndex = dictMatchIndex + dmsIndexDelta;
683
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
684
+ (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
685
+ if (matchLength > matchEndIdx - matchIndex)
686
+ matchEndIdx = matchIndex + (U32)matchLength;
687
+ bestLength = matchLength;
688
+ matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
689
+ matches[mnum].len = (U32)matchLength;
690
+ mnum++;
691
+ if ( (matchLength > ZSTD_OPT_NUM)
692
+ | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
693
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
694
+ }
695
+ }
696
+
697
+ if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
698
+ if (match[matchLength] < ip[matchLength]) {
699
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
700
+ dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
701
+ } else {
702
+ /* match is larger than current */
703
+ commonLengthLarger = matchLength;
704
+ dictMatchIndex = nextPtr[0];
705
+ }
706
+ }
707
+ }
708
+
555
709
  assert(matchEndIdx > current+8);
556
710
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
557
711
  return mnum;
@@ -560,22 +714,22 @@ U32 ZSTD_insertBtAndGetAllMatches (
560
714
 
561
715
  FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
562
716
  ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
563
- const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
717
+ const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
564
718
  U32 rep[ZSTD_REP_NUM], U32 const ll0,
565
719
  ZSTD_match_t* matches, U32 const lengthToBeat)
566
720
  {
567
721
  U32 const matchLengthSearch = cParams->searchLength;
568
- DEBUGLOG(7, "ZSTD_BtGetAllMatches");
722
+ DEBUGLOG(8, "ZSTD_BtGetAllMatches");
569
723
  if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
570
- ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
724
+ ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, dictMode);
571
725
  switch(matchLengthSearch)
572
726
  {
573
- case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
727
+ case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
574
728
  default :
575
- case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
576
- case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
729
+ case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
730
+ case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
577
731
  case 7 :
578
- case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
732
+ case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
579
733
  }
580
734
  }
581
735
 
@@ -609,65 +763,18 @@ repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
609
763
  }
610
764
 
611
765
 
612
- typedef struct {
613
- const BYTE* anchor;
614
- U32 litlen;
615
- U32 rawLitCost;
616
- } cachedLiteralPrice_t;
617
-
618
- static U32 ZSTD_rawLiteralsCost_cached(
619
- cachedLiteralPrice_t* const cachedLitPrice,
620
- const BYTE* const anchor, U32 const litlen,
621
- const optState_t* const optStatePtr)
622
- {
623
- U32 startCost;
624
- U32 remainingLength;
625
- const BYTE* startPosition;
626
-
627
- if (anchor == cachedLitPrice->anchor) {
628
- startCost = cachedLitPrice->rawLitCost;
629
- startPosition = anchor + cachedLitPrice->litlen;
630
- assert(litlen >= cachedLitPrice->litlen);
631
- remainingLength = litlen - cachedLitPrice->litlen;
632
- } else {
633
- startCost = 0;
634
- startPosition = anchor;
635
- remainingLength = litlen;
636
- }
637
-
638
- { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
639
- cachedLitPrice->anchor = anchor;
640
- cachedLitPrice->litlen = litlen;
641
- cachedLitPrice->rawLitCost = rawLitCost;
642
- return rawLitCost;
643
- }
644
- }
645
-
646
- static U32 ZSTD_fullLiteralsCost_cached(
647
- cachedLiteralPrice_t* const cachedLitPrice,
648
- const BYTE* const anchor, U32 const litlen,
649
- const optState_t* const optStatePtr)
650
- {
651
- return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
652
- + ZSTD_litLengthPrice(litlen, optStatePtr);
653
- }
654
-
655
- static int ZSTD_literalsContribution_cached(
656
- cachedLiteralPrice_t* const cachedLitPrice,
657
- const BYTE* const anchor, U32 const litlen,
658
- const optState_t* const optStatePtr)
766
+ static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
659
767
  {
660
- int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
661
- + ZSTD_litLengthContribution(litlen, optStatePtr);
662
- return contribution;
768
+ return sol.litlen + sol.mlen;
663
769
  }
664
770
 
665
- FORCE_INLINE_TEMPLATE
666
- size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
667
- U32 rep[ZSTD_REP_NUM],
668
- ZSTD_compressionParameters const* cParams,
669
- const void* src, size_t srcSize,
670
- const int optLevel, const int extDict)
771
+ FORCE_INLINE_TEMPLATE size_t
772
+ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
773
+ seqStore_t* seqStore,
774
+ U32 rep[ZSTD_REP_NUM],
775
+ const ZSTD_compressionParameters* cParams,
776
+ const void* src, size_t srcSize,
777
+ const int optLevel, const ZSTD_dictMode_e dictMode)
671
778
  {
672
779
  optState_t* const optStatePtr = &ms->opt;
673
780
  const BYTE* const istart = (const BYTE*)src;
@@ -683,66 +790,69 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
683
790
 
684
791
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
685
792
  ZSTD_match_t* const matches = optStatePtr->matchTable;
686
- cachedLiteralPrice_t cachedLitPrice;
793
+ ZSTD_optimal_t lastSequence;
687
794
 
688
795
  /* init */
689
796
  DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
797
+ assert(optLevel <= 2);
690
798
  ms->nextToUpdate3 = ms->nextToUpdate;
691
- ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
799
+ ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
692
800
  ip += (ip==prefixStart);
693
- memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
694
801
 
695
802
  /* Match Loop */
696
803
  while (ip < ilimit) {
697
804
  U32 cur, last_pos = 0;
698
- U32 best_mlen, best_off;
699
805
 
700
806
  /* find first match */
701
807
  { U32 const litlen = (U32)(ip - anchor);
702
808
  U32 const ll0 = !litlen;
703
- U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
809
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, dictMode, rep, ll0, matches, minMatch);
704
810
  if (!nbMatches) { ip++; continue; }
705
811
 
706
812
  /* initialize opt[0] */
707
813
  { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
708
- opt[0].mlen = 1;
814
+ opt[0].mlen = 0; /* means is_a_literal */
709
815
  opt[0].litlen = litlen;
816
+ opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
710
817
 
711
818
  /* large match -> immediate encoding */
712
819
  { U32 const maxML = matches[nbMatches-1].len;
713
- DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
714
- nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
820
+ U32 const maxOffset = matches[nbMatches-1].off;
821
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
822
+ nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
715
823
 
716
824
  if (maxML > sufficient_len) {
717
- best_mlen = maxML;
718
- best_off = matches[nbMatches-1].off;
719
- DEBUGLOG(7, "large match (%u>%u), immediate encoding",
720
- best_mlen, sufficient_len);
825
+ lastSequence.litlen = litlen;
826
+ lastSequence.mlen = maxML;
827
+ lastSequence.off = maxOffset;
828
+ DEBUGLOG(6, "large match (%u>%u), immediate encoding",
829
+ maxML, sufficient_len);
721
830
  cur = 0;
722
- last_pos = 1;
831
+ last_pos = ZSTD_totalLen(lastSequence);
723
832
  goto _shortestPath;
724
833
  } }
725
834
 
726
835
  /* set prices for first matches starting position == 0 */
727
- { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
836
+ { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
728
837
  U32 pos;
729
838
  U32 matchNb;
730
- for (pos = 0; pos < minMatch; pos++) {
731
- opt[pos].mlen = 1;
732
- opt[pos].price = ZSTD_MAX_PRICE;
839
+ for (pos = 1; pos < minMatch; pos++) {
840
+ opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
733
841
  }
734
842
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
735
843
  U32 const offset = matches[matchNb].off;
736
844
  U32 const end = matches[matchNb].len;
737
845
  repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
738
846
  for ( ; pos <= end ; pos++ ) {
739
- U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
740
- DEBUGLOG(7, "rPos:%u => set initial price : %u",
741
- pos, matchPrice);
847
+ U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
848
+ U32 const sequencePrice = literalsPrice + matchPrice;
849
+ DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
850
+ pos, ZSTD_fCost(sequencePrice));
742
851
  opt[pos].mlen = pos;
743
852
  opt[pos].off = offset;
744
853
  opt[pos].litlen = litlen;
745
- opt[pos].price = matchPrice;
854
+ opt[pos].price = sequencePrice;
855
+ ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
746
856
  memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
747
857
  } }
748
858
  last_pos = pos-1;
@@ -753,55 +863,67 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
753
863
  for (cur = 1; cur <= last_pos; cur++) {
754
864
  const BYTE* const inr = ip + cur;
755
865
  assert(cur < ZSTD_OPT_NUM);
866
+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
756
867
 
757
868
  /* Fix current position with one literal if cheaper */
758
- { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
759
- int price; /* note : contribution can be negative */
760
- if (cur > litlen) {
761
- price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
762
- } else {
763
- price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
764
- }
869
+ { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
870
+ int const price = opt[cur-1].price
871
+ + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
872
+ + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
873
+ - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
765
874
  assert(price < 1000000000); /* overflow check */
766
875
  if (price <= opt[cur].price) {
767
- DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
768
- cur, price, opt[cur].price);
769
- opt[cur].mlen = 1;
876
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
877
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
878
+ opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
879
+ opt[cur].mlen = 0;
770
880
  opt[cur].off = 0;
771
881
  opt[cur].litlen = litlen;
772
882
  opt[cur].price = price;
773
883
  memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
774
- } }
884
+ } else {
885
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
886
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
887
+ opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
888
+ }
889
+ }
775
890
 
776
891
  /* last match must start at a minimum distance of 8 from oend */
777
892
  if (inr > ilimit) continue;
778
893
 
779
894
  if (cur == last_pos) break;
780
895
 
781
- if ( (optLevel==0) /*static*/
782
- && (opt[cur+1].price <= opt[cur].price) )
896
+ if ( (optLevel==0) /*static_test*/
897
+ && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
898
+ DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
783
899
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
900
+ }
784
901
 
785
- { U32 const ll0 = (opt[cur].mlen != 1);
786
- U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
787
- U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
788
- U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
789
- U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
902
+ { U32 const ll0 = (opt[cur].mlen != 0);
903
+ U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
904
+ U32 const previousPrice = opt[cur].price;
905
+ U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
906
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
790
907
  U32 matchNb;
791
- if (!nbMatches) continue;
908
+ if (!nbMatches) {
909
+ DEBUGLOG(7, "rPos:%u : no match found", cur);
910
+ continue;
911
+ }
792
912
 
793
913
  { U32 const maxML = matches[nbMatches-1].len;
794
- DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
795
- cur, nbMatches, maxML);
914
+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
915
+ inr-istart, cur, nbMatches, maxML);
796
916
 
797
917
  if ( (maxML > sufficient_len)
798
- | (cur + maxML >= ZSTD_OPT_NUM) ) {
799
- best_mlen = maxML;
800
- best_off = matches[nbMatches-1].off;
801
- last_pos = cur + 1;
918
+ || (cur + maxML >= ZSTD_OPT_NUM) ) {
919
+ lastSequence.mlen = maxML;
920
+ lastSequence.off = matches[nbMatches-1].off;
921
+ lastSequence.litlen = litlen;
922
+ cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
923
+ last_pos = cur + ZSTD_totalLen(lastSequence);
924
+ if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
802
925
  goto _shortestPath;
803
- }
804
- }
926
+ } }
805
927
 
806
928
  /* set prices using matches found at position == cur */
807
929
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
@@ -811,81 +933,97 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
811
933
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
812
934
  U32 mlen;
813
935
 
814
- DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
936
+ DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
815
937
  matchNb, matches[matchNb].off, lastML, litlen);
816
938
 
817
- for (mlen = lastML; mlen >= startML; mlen--) {
939
+ for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
818
940
  U32 const pos = cur + mlen;
819
941
  int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
820
942
 
821
943
  if ((pos > last_pos) || (price < opt[pos].price)) {
822
- DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
823
- pos, price, opt[pos].price);
824
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
944
+ DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
945
+ pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
946
+ while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
825
947
  opt[pos].mlen = mlen;
826
948
  opt[pos].off = offset;
827
949
  opt[pos].litlen = litlen;
828
950
  opt[pos].price = price;
951
+ ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
829
952
  memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
830
953
  } else {
831
- if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
954
+ DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
955
+ pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
956
+ if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
832
957
  }
833
958
  } } }
834
959
  } /* for (cur = 1; cur <= last_pos; cur++) */
835
960
 
836
- best_mlen = opt[last_pos].mlen;
837
- best_off = opt[last_pos].off;
838
- cur = last_pos - best_mlen;
961
+ lastSequence = opt[last_pos];
962
+ cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
963
+ assert(cur < ZSTD_OPT_NUM); /* control overflow*/
839
964
 
840
965
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
841
- assert(opt[0].mlen == 1);
842
-
843
- /* reverse traversal */
844
- DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
845
- last_pos, cur);
846
- { U32 selectedMatchLength = best_mlen;
847
- U32 selectedOffset = best_off;
848
- U32 pos = cur;
849
- while (1) {
850
- U32 const mlen = opt[pos].mlen;
851
- U32 const off = opt[pos].off;
852
- opt[pos].mlen = selectedMatchLength;
853
- opt[pos].off = selectedOffset;
854
- selectedMatchLength = mlen;
855
- selectedOffset = off;
856
- if (mlen > pos) break;
857
- pos -= mlen;
858
- } }
859
-
860
- /* save sequences */
861
- { U32 pos;
862
- for (pos=0; pos < last_pos; ) {
863
- U32 const llen = (U32)(ip - anchor);
864
- U32 const mlen = opt[pos].mlen;
865
- U32 const offset = opt[pos].off;
866
- if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
867
- pos += mlen; ip += mlen;
868
-
869
- /* repcodes update : like ZSTD_updateRep(), but update in place */
870
- if (offset >= ZSTD_REP_NUM) { /* full offset */
871
- rep[2] = rep[1];
872
- rep[1] = rep[0];
873
- rep[0] = offset - ZSTD_REP_MOVE;
874
- } else { /* repcode */
875
- U32 const repCode = offset + (llen==0);
876
- if (repCode) { /* note : if repCode==0, no change */
877
- U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
878
- if (repCode >= 2) rep[2] = rep[1];
879
- rep[1] = rep[0];
880
- rep[0] = currentOffset;
966
+ assert(opt[0].mlen == 0);
967
+
968
+ { U32 const storeEnd = cur + 1;
969
+ U32 storeStart = storeEnd;
970
+ U32 seqPos = cur;
971
+
972
+ DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
973
+ last_pos, cur);
974
+ assert(storeEnd < ZSTD_OPT_NUM);
975
+ DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
976
+ storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
977
+ opt[storeEnd] = lastSequence;
978
+ while (seqPos > 0) {
979
+ U32 const backDist = ZSTD_totalLen(opt[seqPos]);
980
+ storeStart--;
981
+ DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
982
+ seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
983
+ opt[storeStart] = opt[seqPos];
984
+ seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
985
+ }
986
+
987
+ /* save sequences */
988
+ DEBUGLOG(6, "sending selected sequences into seqStore")
989
+ { U32 storePos;
990
+ for (storePos=storeStart; storePos <= storeEnd; storePos++) {
991
+ U32 const llen = opt[storePos].litlen;
992
+ U32 const mlen = opt[storePos].mlen;
993
+ U32 const offCode = opt[storePos].off;
994
+ U32 const advance = llen + mlen;
995
+ DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
996
+ anchor - istart, llen, mlen);
997
+
998
+ if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
999
+ assert(storePos == storeEnd); /* must be last sequence */
1000
+ ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1001
+ continue; /* will finish */
881
1002
  }
882
- }
883
1003
 
884
- ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
885
- ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
886
- anchor = ip;
887
- } }
888
- ZSTD_setLog2Prices(optStatePtr);
1004
+ /* repcodes update : like ZSTD_updateRep(), but update in place */
1005
+ if (offCode >= ZSTD_REP_NUM) { /* full offset */
1006
+ rep[2] = rep[1];
1007
+ rep[1] = rep[0];
1008
+ rep[0] = offCode - ZSTD_REP_MOVE;
1009
+ } else { /* repcode */
1010
+ U32 const repCode = offCode + (llen==0);
1011
+ if (repCode) { /* note : if repCode==0, no change */
1012
+ U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
1013
+ if (repCode >= 2) rep[2] = rep[1];
1014
+ rep[1] = rep[0];
1015
+ rep[0] = currentOffset;
1016
+ } }
1017
+
1018
+ assert(anchor + llen <= iend);
1019
+ ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1020
+ ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
1021
+ anchor += advance;
1022
+ ip = anchor;
1023
+ } }
1024
+ ZSTD_setBasePrices(optStatePtr, optLevel);
1025
+ }
1026
+
889
1027
  } /* while (ip < ilimit) */
890
1028
 
891
1029
  /* Return the last literals size */
@@ -895,29 +1033,94 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
895
1033
 
896
1034
  size_t ZSTD_compressBlock_btopt(
897
1035
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
898
- ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1036
+ const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
899
1037
  {
900
1038
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
901
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
1039
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1040
+ }
1041
+
1042
+
1043
+ /* used in 2-pass strategy */
1044
+ static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
1045
+ {
1046
+ U32 s, sum=0;
1047
+ assert(ZSTD_FREQ_DIV+bonus > 0);
1048
+ for (s=0; s<=lastEltIndex; s++) {
1049
+ table[s] <<= ZSTD_FREQ_DIV+bonus;
1050
+ table[s]--;
1051
+ sum += table[s];
1052
+ }
1053
+ return sum;
1054
+ }
1055
+
1056
+ /* used in 2-pass strategy */
1057
+ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1058
+ {
1059
+ optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1060
+ optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
1061
+ optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
1062
+ optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
902
1063
  }
903
1064
 
904
1065
  size_t ZSTD_compressBlock_btultra(
905
1066
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
906
- ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1067
+ const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
1068
+ {
1069
+ DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1070
+ #if 0
1071
+ /* 2-pass strategy (disabled)
1072
+ * this strategy makes a first pass over first block to collect statistics
1073
+ * and seed next round's statistics with it.
1074
+ * The compression ratio gain is generally small (~0.5% on first block),
1075
+ * the cost is 2x cpu time on first block. */
1076
+ assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1077
+ if ( (ms->opt.litLengthSum==0) /* first block */
1078
+ && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1079
+ && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
1080
+ U32 tmpRep[ZSTD_REP_NUM];
1081
+ DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
1082
+ assert(ms->nextToUpdate >= ms->window.dictLimit
1083
+ && ms->nextToUpdate <= ms->window.dictLimit + 1);
1084
+ memcpy(tmpRep, rep, sizeof(tmpRep));
1085
+ ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1086
+ ZSTD_resetSeqStore(seqStore);
1087
+ /* invalidate first scan from history */
1088
+ ms->window.base -= srcSize;
1089
+ ms->window.dictLimit += (U32)srcSize;
1090
+ ms->window.lowLimit = ms->window.dictLimit;
1091
+ ms->nextToUpdate = ms->window.dictLimit;
1092
+ ms->nextToUpdate3 = ms->window.dictLimit;
1093
+ /* re-inforce weight of collected statistics */
1094
+ ZSTD_upscaleStats(&ms->opt);
1095
+ }
1096
+ #endif
1097
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1098
+ }
1099
+
1100
+ size_t ZSTD_compressBlock_btopt_dictMatchState(
1101
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1102
+ const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
1103
+ {
1104
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1105
+ }
1106
+
1107
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
1108
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1109
+ const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
907
1110
  {
908
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
1111
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
909
1112
  }
910
1113
 
911
1114
  size_t ZSTD_compressBlock_btopt_extDict(
912
1115
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
913
- ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1116
+ const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
914
1117
  {
915
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
1118
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
916
1119
  }
917
1120
 
918
1121
  size_t ZSTD_compressBlock_btultra_extDict(
919
1122
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
920
- ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1123
+ const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
921
1124
  {
922
- return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
1125
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
923
1126
  }