zstd-ruby 1.3.4.0 → 1.3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +56 -10
- data/ext/zstdruby/libzstd/README.md +4 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
- data/ext/zstdruby/libzstd/common/compiler.h +3 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -2
- data/ext/zstdruby/libzstd/common/debug.c +44 -0
- data/ext/zstdruby/libzstd/common/debug.h +123 -0
- data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
- data/ext/zstdruby/libzstd/common/fse.h +45 -41
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +34 -27
- data/ext/zstdruby/libzstd/common/pool.c +89 -32
- data/ext/zstdruby/libzstd/common/pool.h +29 -19
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
- data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
- data/ext/zstdruby/libzstd/compress/hist.c +195 -0
- data/ext/zstdruby/libzstd/compress/hist.h +92 -0
- data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
- data/ext/zstdruby/libzstd/zstd.h +137 -69
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -3
@@ -36,6 +36,19 @@ size_t ZSTD_compressBlock_greedy(
|
|
36
36
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
37
37
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
38
38
|
|
39
|
+
size_t ZSTD_compressBlock_btlazy2_dictMatchState(
|
40
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
41
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
42
|
+
size_t ZSTD_compressBlock_lazy2_dictMatchState(
|
43
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
44
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
45
|
+
size_t ZSTD_compressBlock_lazy_dictMatchState(
|
46
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
47
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
48
|
+
size_t ZSTD_compressBlock_greedy_dictMatchState(
|
49
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
50
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
51
|
+
|
39
52
|
size_t ZSTD_compressBlock_greedy_extDict(
|
40
53
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
41
54
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
|
@@ -9,6 +9,7 @@
|
|
9
9
|
|
10
10
|
#include "zstd_ldm.h"
|
11
11
|
|
12
|
+
#include "debug.h"
|
12
13
|
#include "zstd_fast.h" /* ZSTD_fillHashTable() */
|
13
14
|
#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
|
14
15
|
|
@@ -20,7 +21,7 @@
|
|
20
21
|
void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
21
22
|
ZSTD_compressionParameters const* cParams)
|
22
23
|
{
|
23
|
-
|
24
|
+
params->windowLog = cParams->windowLog;
|
24
25
|
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
|
25
26
|
DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
|
26
27
|
if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
|
@@ -33,12 +34,13 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
|
33
34
|
params->minMatchLength = minMatch;
|
34
35
|
}
|
35
36
|
if (params->hashLog == 0) {
|
36
|
-
params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
|
37
|
+
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
|
37
38
|
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
38
39
|
}
|
39
40
|
if (params->hashEveryLog == 0) {
|
40
|
-
params->hashEveryLog =
|
41
|
-
|
41
|
+
params->hashEveryLog = params->windowLog < params->hashLog
|
42
|
+
? 0
|
43
|
+
: params->windowLog - params->hashLog;
|
42
44
|
}
|
43
45
|
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
|
44
46
|
}
|
@@ -224,13 +226,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
|
224
226
|
switch(cParams->strategy)
|
225
227
|
{
|
226
228
|
case ZSTD_fast:
|
227
|
-
ZSTD_fillHashTable(ms, cParams, iend);
|
228
|
-
ms->nextToUpdate = (U32)(iend - ms->window.base);
|
229
|
+
ZSTD_fillHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
|
229
230
|
break;
|
230
231
|
|
231
232
|
case ZSTD_dfast:
|
232
|
-
ZSTD_fillDoubleHashTable(ms, cParams, iend);
|
233
|
-
ms->nextToUpdate = (U32)(iend - ms->window.base);
|
233
|
+
ZSTD_fillDoubleHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
|
234
234
|
break;
|
235
235
|
|
236
236
|
case ZSTD_greedy:
|
@@ -508,7 +508,7 @@ size_t ZSTD_ldm_generateSequences(
|
|
508
508
|
* * Try invalidation after the sequence generation and test the
|
509
509
|
* the offset against maxDist directly.
|
510
510
|
*/
|
511
|
-
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
|
511
|
+
ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
|
512
512
|
/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
|
513
513
|
newLeftoverSize = ZSTD_ldm_generateSequences_internal(
|
514
514
|
ldmState, sequences, params, chunkStart, chunkSize);
|
@@ -591,19 +591,18 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
|
591
591
|
|
592
592
|
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
593
593
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
594
|
-
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize
|
595
|
-
int const extDict)
|
594
|
+
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
|
596
595
|
{
|
597
596
|
unsigned const minMatch = cParams->searchLength;
|
598
597
|
ZSTD_blockCompressor const blockCompressor =
|
599
|
-
ZSTD_selectBlockCompressor(cParams->strategy,
|
600
|
-
BYTE const* const base = ms->window.base;
|
598
|
+
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
|
601
599
|
/* Input bounds */
|
602
600
|
BYTE const* const istart = (BYTE const*)src;
|
603
601
|
BYTE const* const iend = istart + srcSize;
|
604
602
|
/* Input positions */
|
605
603
|
BYTE const* ip = istart;
|
606
604
|
|
605
|
+
DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
|
607
606
|
assert(rawSeqStore->pos <= rawSeqStore->size);
|
608
607
|
assert(rawSeqStore->size <= rawSeqStore->capacity);
|
609
608
|
/* Loop through each sequence and apply the block compressor to the lits */
|
@@ -623,12 +622,12 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
623
622
|
ZSTD_ldm_limitTableUpdate(ms, ip);
|
624
623
|
ZSTD_ldm_fillFastTables(ms, cParams, ip);
|
625
624
|
/* Run the block compressor */
|
625
|
+
DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
|
626
626
|
{
|
627
627
|
size_t const newLitLength =
|
628
628
|
blockCompressor(ms, seqStore, rep, cParams, ip,
|
629
629
|
sequence.litLength);
|
630
630
|
ip += sequence.litLength;
|
631
|
-
ms->nextToUpdate = (U32)(ip - base);
|
632
631
|
/* Update the repcodes */
|
633
632
|
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
|
634
633
|
rep[i] = rep[i-1];
|
@@ -644,10 +643,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
644
643
|
ZSTD_ldm_limitTableUpdate(ms, ip);
|
645
644
|
ZSTD_ldm_fillFastTables(ms, cParams, ip);
|
646
645
|
/* Compress the last literals */
|
647
|
-
|
648
|
-
|
649
|
-
ip, iend - ip);
|
650
|
-
ms->nextToUpdate = (U32)(iend - base);
|
651
|
-
return lastLiterals;
|
652
|
-
}
|
646
|
+
return blockCompressor(ms, seqStore, rep, cParams,
|
647
|
+
ip, iend - ip);
|
653
648
|
}
|
@@ -62,8 +62,7 @@ size_t ZSTD_ldm_generateSequences(
|
|
62
62
|
size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
63
63
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
64
64
|
ZSTD_compressionParameters const* cParams,
|
65
|
-
void const* src, size_t srcSize
|
66
|
-
int const extDict);
|
65
|
+
void const* src, size_t srcSize);
|
67
66
|
|
68
67
|
/**
|
69
68
|
* ZSTD_ldm_skipSequences():
|
@@ -9,10 +9,11 @@
|
|
9
9
|
*/
|
10
10
|
|
11
11
|
#include "zstd_compress_internal.h"
|
12
|
+
#include "hist.h"
|
12
13
|
#include "zstd_opt.h"
|
13
14
|
|
14
15
|
|
15
|
-
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats
|
16
|
+
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
16
17
|
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
17
18
|
#define ZSTD_MAX_PRICE (1<<30)
|
18
19
|
|
@@ -20,128 +21,210 @@
|
|
20
21
|
/*-*************************************
|
21
22
|
* Price functions for optimal parser
|
22
23
|
***************************************/
|
23
|
-
|
24
|
+
|
25
|
+
#if 0 /* approximation at bit level */
|
26
|
+
# define BITCOST_ACCURACY 0
|
27
|
+
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
28
|
+
# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
|
29
|
+
#elif 0 /* fractional bit accuracy */
|
30
|
+
# define BITCOST_ACCURACY 8
|
31
|
+
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
32
|
+
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
33
|
+
#else /* opt==approx, ultra==accurate */
|
34
|
+
# define BITCOST_ACCURACY 8
|
35
|
+
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
36
|
+
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
37
|
+
#endif
|
38
|
+
|
39
|
+
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
|
40
|
+
{
|
41
|
+
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
|
42
|
+
}
|
43
|
+
|
44
|
+
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
24
45
|
{
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
46
|
+
U32 const stat = rawStat + 1;
|
47
|
+
U32 const hb = ZSTD_highbit32(stat);
|
48
|
+
U32 const BWeight = hb * BITCOST_MULTIPLIER;
|
49
|
+
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
|
50
|
+
U32 const weight = BWeight + FWeight;
|
51
|
+
assert(hb + BITCOST_ACCURACY < 31);
|
52
|
+
return weight;
|
29
53
|
}
|
30
54
|
|
55
|
+
/* debugging function, @return price in bytes */
|
56
|
+
MEM_STATIC double ZSTD_fCost(U32 price)
|
57
|
+
{
|
58
|
+
return (double)price / (BITCOST_MULTIPLIER*8);
|
59
|
+
}
|
60
|
+
|
61
|
+
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
62
|
+
{
|
63
|
+
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
|
64
|
+
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
|
65
|
+
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
|
66
|
+
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
|
67
|
+
}
|
68
|
+
|
69
|
+
|
70
|
+
static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
|
71
|
+
{
|
72
|
+
U32 s, sum=0;
|
73
|
+
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
|
74
|
+
for (s=0; s<=lastEltIndex; s++) {
|
75
|
+
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
|
76
|
+
sum += table[s];
|
77
|
+
}
|
78
|
+
return sum;
|
79
|
+
}
|
31
80
|
|
32
81
|
static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
33
|
-
const BYTE* const src, size_t const srcSize
|
82
|
+
const BYTE* const src, size_t const srcSize,
|
83
|
+
int optLevel)
|
34
84
|
{
|
35
|
-
optPtr->
|
36
|
-
|
37
|
-
if (optPtr->litLengthSum == 0) { /* first init */
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
assert(optPtr->
|
42
|
-
|
43
|
-
optPtr->
|
44
|
-
|
45
|
-
optPtr->litFreq
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
85
|
+
optPtr->priceType = zop_dynamic;
|
86
|
+
|
87
|
+
if (optPtr->litLengthSum == 0) { /* first block : init */
|
88
|
+
if (srcSize <= 1024) /* heuristic */
|
89
|
+
optPtr->priceType = zop_predef;
|
90
|
+
|
91
|
+
assert(optPtr->symbolCosts != NULL);
|
92
|
+
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
|
93
|
+
optPtr->priceType = zop_dynamic;
|
94
|
+
|
95
|
+
assert(optPtr->litFreq != NULL);
|
96
|
+
optPtr->litSum = 0;
|
97
|
+
{ unsigned lit;
|
98
|
+
for (lit=0; lit<=MaxLit; lit++) {
|
99
|
+
U32 const scaleLog = 11; /* scale to 2K */
|
100
|
+
U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
|
101
|
+
assert(bitCost <= scaleLog);
|
102
|
+
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
103
|
+
optPtr->litSum += optPtr->litFreq[lit];
|
104
|
+
} }
|
105
|
+
|
106
|
+
{ unsigned ll;
|
107
|
+
FSE_CState_t llstate;
|
108
|
+
FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
|
109
|
+
optPtr->litLengthSum = 0;
|
110
|
+
for (ll=0; ll<=MaxLL; ll++) {
|
111
|
+
U32 const scaleLog = 10; /* scale to 1K */
|
112
|
+
U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
|
113
|
+
assert(bitCost < scaleLog);
|
114
|
+
optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
115
|
+
optPtr->litLengthSum += optPtr->litLengthFreq[ll];
|
116
|
+
} }
|
117
|
+
|
118
|
+
{ unsigned ml;
|
119
|
+
FSE_CState_t mlstate;
|
120
|
+
FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
|
121
|
+
optPtr->matchLengthSum = 0;
|
122
|
+
for (ml=0; ml<=MaxML; ml++) {
|
123
|
+
U32 const scaleLog = 10;
|
124
|
+
U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
|
125
|
+
assert(bitCost < scaleLog);
|
126
|
+
optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
127
|
+
optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
|
128
|
+
} }
|
129
|
+
|
130
|
+
{ unsigned of;
|
131
|
+
FSE_CState_t ofstate;
|
132
|
+
FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
|
133
|
+
optPtr->offCodeSum = 0;
|
134
|
+
for (of=0; of<=MaxOff; of++) {
|
135
|
+
U32 const scaleLog = 10;
|
136
|
+
U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
|
137
|
+
assert(bitCost < scaleLog);
|
138
|
+
optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
139
|
+
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
140
|
+
} }
|
141
|
+
|
142
|
+
} else { /* not a dictionary */
|
143
|
+
|
144
|
+
assert(optPtr->litFreq != NULL);
|
145
|
+
{ unsigned lit = MaxLit;
|
146
|
+
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
147
|
+
}
|
148
|
+
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
149
|
+
|
150
|
+
{ unsigned ll;
|
151
|
+
for (ll=0; ll<=MaxLL; ll++)
|
152
|
+
optPtr->litLengthFreq[ll] = 1;
|
153
|
+
}
|
154
|
+
optPtr->litLengthSum = MaxLL+1;
|
155
|
+
|
156
|
+
{ unsigned ml;
|
157
|
+
for (ml=0; ml<=MaxML; ml++)
|
158
|
+
optPtr->matchLengthFreq[ml] = 1;
|
159
|
+
}
|
160
|
+
optPtr->matchLengthSum = MaxML+1;
|
161
|
+
|
162
|
+
{ unsigned of;
|
163
|
+
for (of=0; of<=MaxOff; of++)
|
164
|
+
optPtr->offCodeFreq[of] = 1;
|
165
|
+
}
|
166
|
+
optPtr->offCodeSum = MaxOff+1;
|
51
167
|
|
52
|
-
for (u=0; u<=MaxLL; u++)
|
53
|
-
optPtr->litLengthFreq[u] = 1;
|
54
|
-
optPtr->litLengthSum = MaxLL+1;
|
55
|
-
for (u=0; u<=MaxML; u++)
|
56
|
-
optPtr->matchLengthFreq[u] = 1;
|
57
|
-
optPtr->matchLengthSum = MaxML+1;
|
58
|
-
for (u=0; u<=MaxOff; u++)
|
59
|
-
optPtr->offCodeFreq[u] = 1;
|
60
|
-
optPtr->offCodeSum = (MaxOff+1);
|
61
|
-
|
62
|
-
} else {
|
63
|
-
unsigned u;
|
64
|
-
|
65
|
-
optPtr->litSum = 0;
|
66
|
-
for (u=0; u<=MaxLit; u++) {
|
67
|
-
optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
|
68
|
-
optPtr->litSum += optPtr->litFreq[u];
|
69
|
-
}
|
70
|
-
optPtr->litLengthSum = 0;
|
71
|
-
for (u=0; u<=MaxLL; u++) {
|
72
|
-
optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
|
73
|
-
optPtr->litLengthSum += optPtr->litLengthFreq[u];
|
74
|
-
}
|
75
|
-
optPtr->matchLengthSum = 0;
|
76
|
-
for (u=0; u<=MaxML; u++) {
|
77
|
-
optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
|
78
|
-
optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
|
79
|
-
}
|
80
|
-
optPtr->offCodeSum = 0;
|
81
|
-
for (u=0; u<=MaxOff; u++) {
|
82
|
-
optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
|
83
|
-
optPtr->offCodeSum += optPtr->offCodeFreq[u];
|
84
168
|
}
|
169
|
+
|
170
|
+
} else { /* new block : re-use previous statistics, scaled down */
|
171
|
+
|
172
|
+
optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
|
173
|
+
optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
174
|
+
optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
175
|
+
optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
85
176
|
}
|
86
177
|
|
87
|
-
|
178
|
+
ZSTD_setBasePrices(optPtr, optLevel);
|
88
179
|
}
|
89
180
|
|
90
|
-
|
91
181
|
/* ZSTD_rawLiteralsCost() :
|
92
|
-
*
|
93
|
-
* does not include
|
182
|
+
* price of literals (only) in specified segment (which length can be 0).
|
183
|
+
* does not include price of literalLength symbol */
|
94
184
|
static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
95
|
-
const optState_t* const optPtr
|
185
|
+
const optState_t* const optPtr,
|
186
|
+
int optLevel)
|
96
187
|
{
|
97
|
-
if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
|
98
188
|
if (litLength == 0) return 0;
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
189
|
+
if (optPtr->priceType == zop_predef)
|
190
|
+
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
191
|
+
|
192
|
+
/* dynamic statistics */
|
193
|
+
{ U32 price = litLength * optPtr->litSumBasePrice;
|
194
|
+
U32 u;
|
195
|
+
for (u=0; u < litLength; u++) {
|
196
|
+
assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
|
197
|
+
price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
|
198
|
+
}
|
199
|
+
return price;
|
106
200
|
}
|
107
201
|
}
|
108
202
|
|
109
203
|
/* ZSTD_litLengthPrice() :
|
110
204
|
* cost of literalLength symbol */
|
111
|
-
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
|
205
|
+
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
112
206
|
{
|
113
|
-
if (optPtr->
|
207
|
+
if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
|
114
208
|
|
115
|
-
/*
|
209
|
+
/* dynamic statistics */
|
116
210
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
117
|
-
|
118
|
-
return price;
|
211
|
+
return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
|
119
212
|
}
|
120
213
|
}
|
121
214
|
|
122
|
-
/* ZSTD_litLengthPrice() :
|
123
|
-
* cost of the literal part of a sequence,
|
124
|
-
* including literals themselves, and literalLength symbol */
|
125
|
-
static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
|
126
|
-
const optState_t* const optPtr)
|
127
|
-
{
|
128
|
-
return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
|
129
|
-
+ ZSTD_litLengthPrice(litLength, optPtr);
|
130
|
-
}
|
131
|
-
|
132
215
|
/* ZSTD_litLengthContribution() :
|
133
216
|
* @return ( cost(litlength) - cost(0) )
|
134
217
|
* this value can then be added to rawLiteralsCost()
|
135
218
|
* to provide a cost which is directly comparable to a match ending at same position */
|
136
|
-
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
|
219
|
+
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
137
220
|
{
|
138
|
-
if (optPtr->
|
221
|
+
if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
|
139
222
|
|
140
|
-
/*
|
223
|
+
/* dynamic statistics */
|
141
224
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
142
|
-
int const contribution = LL_bits[llCode]
|
143
|
-
|
144
|
-
|
225
|
+
int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
226
|
+
+ WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
227
|
+
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
145
228
|
#if 1
|
146
229
|
return contribution;
|
147
230
|
#else
|
@@ -155,10 +238,11 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con
|
|
155
238
|
* which can be compared to the ending cost of a match
|
156
239
|
* should a new match start at this position */
|
157
240
|
static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
|
158
|
-
const optState_t* const optPtr
|
241
|
+
const optState_t* const optPtr,
|
242
|
+
int optLevel)
|
159
243
|
{
|
160
|
-
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
|
161
|
-
+ ZSTD_litLengthContribution(litLength, optPtr);
|
244
|
+
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
245
|
+
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
|
162
246
|
return contribution;
|
163
247
|
}
|
164
248
|
|
@@ -166,31 +250,38 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
|
|
166
250
|
* Provides the cost of the match part (offset + matchLength) of a sequence
|
167
251
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
168
252
|
* optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
|
169
|
-
FORCE_INLINE_TEMPLATE U32
|
170
|
-
|
171
|
-
|
172
|
-
|
253
|
+
FORCE_INLINE_TEMPLATE U32
|
254
|
+
ZSTD_getMatchPrice(U32 const offset,
|
255
|
+
U32 const matchLength,
|
256
|
+
const optState_t* const optPtr,
|
257
|
+
int const optLevel)
|
173
258
|
{
|
174
259
|
U32 price;
|
175
260
|
U32 const offCode = ZSTD_highbit32(offset+1);
|
176
261
|
U32 const mlBase = matchLength - MINMATCH;
|
177
262
|
assert(matchLength >= MINMATCH);
|
178
263
|
|
179
|
-
if (optPtr->
|
180
|
-
return
|
264
|
+
if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
|
265
|
+
return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
|
181
266
|
|
182
|
-
|
183
|
-
|
267
|
+
/* dynamic statistics */
|
268
|
+
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
|
269
|
+
if ((optLevel<2) /*static*/ && offCode >= 20)
|
270
|
+
price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
|
184
271
|
|
185
272
|
/* match Length */
|
186
273
|
{ U32 const mlCode = ZSTD_MLcode(mlBase);
|
187
|
-
price += ML_bits[mlCode] + optPtr->
|
274
|
+
price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
|
188
275
|
}
|
189
276
|
|
277
|
+
price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
|
278
|
+
|
190
279
|
DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
|
191
280
|
return price;
|
192
281
|
}
|
193
282
|
|
283
|
+
/* ZSTD_updateStats() :
|
284
|
+
* assumption : literals + litLengtn <= iend */
|
194
285
|
static void ZSTD_updateStats(optState_t* const optPtr,
|
195
286
|
U32 litLength, const BYTE* literals,
|
196
287
|
U32 offsetCode, U32 matchLength)
|
@@ -271,7 +362,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
|
|
271
362
|
static U32 ZSTD_insertBt1(
|
272
363
|
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
273
364
|
const BYTE* const ip, const BYTE* const iend,
|
274
|
-
U32 const mls,
|
365
|
+
U32 const mls, const int extDict)
|
275
366
|
{
|
276
367
|
U32* const hashTable = ms->hashTable;
|
277
368
|
U32 const hashLog = cParams->hashLog;
|
@@ -293,6 +384,7 @@ static U32 ZSTD_insertBt1(
|
|
293
384
|
U32* largerPtr = smallerPtr + 1;
|
294
385
|
U32 dummy32; /* to be nullified at the end */
|
295
386
|
U32 const windowLow = ms->window.lowLimit;
|
387
|
+
U32 const matchLow = windowLow ? windowLow : 1;
|
296
388
|
U32 matchEndIdx = current+8+1;
|
297
389
|
size_t bestLength = 8;
|
298
390
|
U32 nbCompares = 1U << cParams->searchLog;
|
@@ -308,7 +400,7 @@ static U32 ZSTD_insertBt1(
|
|
308
400
|
assert(ip <= iend-8); /* required for h calculation */
|
309
401
|
hashTable[h] = current; /* Update Hash Table */
|
310
402
|
|
311
|
-
while (nbCompares-- && (matchIndex
|
403
|
+
while (nbCompares-- && (matchIndex >= matchLow)) {
|
312
404
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
313
405
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
314
406
|
assert(matchIndex < current);
|
@@ -334,8 +426,8 @@ static U32 ZSTD_insertBt1(
|
|
334
426
|
}
|
335
427
|
#endif
|
336
428
|
|
337
|
-
if (
|
338
|
-
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict
|
429
|
+
if (!extDict || (matchIndex+matchLength >= dictLimit)) {
|
430
|
+
assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
|
339
431
|
match = base + matchIndex;
|
340
432
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
|
341
433
|
} else {
|
@@ -381,16 +473,16 @@ FORCE_INLINE_TEMPLATE
|
|
381
473
|
void ZSTD_updateTree_internal(
|
382
474
|
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
383
475
|
const BYTE* const ip, const BYTE* const iend,
|
384
|
-
const U32 mls, const
|
476
|
+
const U32 mls, const ZSTD_dictMode_e dictMode)
|
385
477
|
{
|
386
478
|
const BYTE* const base = ms->window.base;
|
387
479
|
U32 const target = (U32)(ip - base);
|
388
480
|
U32 idx = ms->nextToUpdate;
|
389
|
-
DEBUGLOG(
|
390
|
-
idx, target,
|
481
|
+
DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
482
|
+
idx, target, dictMode);
|
391
483
|
|
392
484
|
while(idx < target)
|
393
|
-
idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls,
|
485
|
+
idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
394
486
|
ms->nextToUpdate = target;
|
395
487
|
}
|
396
488
|
|
@@ -398,13 +490,13 @@ void ZSTD_updateTree(
|
|
398
490
|
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
399
491
|
const BYTE* ip, const BYTE* iend)
|
400
492
|
{
|
401
|
-
ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength,
|
493
|
+
ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, ZSTD_noDict);
|
402
494
|
}
|
403
495
|
|
404
496
|
FORCE_INLINE_TEMPLATE
|
405
497
|
U32 ZSTD_insertBtAndGetAllMatches (
|
406
498
|
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
407
|
-
const BYTE* const ip, const BYTE* const iLimit,
|
499
|
+
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
|
408
500
|
U32 rep[ZSTD_REP_NUM], U32 const ll0,
|
409
501
|
ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
|
410
502
|
{
|
@@ -426,6 +518,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
426
518
|
const BYTE* const prefixStart = base + dictLimit;
|
427
519
|
U32 const btLow = btMask >= current ? 0 : current - btMask;
|
428
520
|
U32 const windowLow = ms->window.lowLimit;
|
521
|
+
U32 const matchLow = windowLow ? windowLow : 1;
|
429
522
|
U32* smallerPtr = bt + 2*(current&btMask);
|
430
523
|
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
431
524
|
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
@@ -433,8 +526,16 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
433
526
|
U32 mnum = 0;
|
434
527
|
U32 nbCompares = 1U << cParams->searchLog;
|
435
528
|
|
529
|
+
const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
|
530
|
+
const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
|
531
|
+
const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
|
532
|
+
U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
|
533
|
+
U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
|
534
|
+
U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
|
535
|
+
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && btMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - btMask : dmsLowLimit;
|
536
|
+
|
436
537
|
size_t bestLength = lengthToBeat-1;
|
437
|
-
DEBUGLOG(
|
538
|
+
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
|
438
539
|
|
439
540
|
/* check repCode */
|
440
541
|
{ U32 const lastR = ZSTD_REP_NUM + ll0;
|
@@ -449,18 +550,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
449
550
|
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
450
551
|
}
|
451
552
|
} else { /* repIndex < dictLimit || repIndex >= current */
|
452
|
-
const BYTE* const repMatch =
|
553
|
+
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
|
554
|
+
dmsBase + repIndex - dmsIndexDelta :
|
555
|
+
dictBase + repIndex;
|
453
556
|
assert(current >= windowLow);
|
454
|
-
if (
|
557
|
+
if ( dictMode == ZSTD_extDict
|
455
558
|
&& ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
|
456
559
|
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
|
457
560
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
458
561
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
|
562
|
+
}
|
563
|
+
if (dictMode == ZSTD_dictMatchState
|
564
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
|
565
|
+
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
566
|
+
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
567
|
+
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
|
459
568
|
} }
|
460
569
|
/* save longer solution */
|
461
570
|
if (repLen > bestLength) {
|
462
|
-
DEBUGLOG(8, "found
|
463
|
-
repCode
|
571
|
+
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
572
|
+
repCode, ll0, repOffset, repLen);
|
464
573
|
bestLength = repLen;
|
465
574
|
matches[mnum].off = repCode - ll0;
|
466
575
|
matches[mnum].len = (U32)repLen;
|
@@ -473,10 +582,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
473
582
|
/* HC3 match finder */
|
474
583
|
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
475
584
|
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
|
476
|
-
if ((matchIndex3
|
585
|
+
if ((matchIndex3 >= matchLow)
|
477
586
|
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
478
587
|
size_t mlen;
|
479
|
-
if ((
|
588
|
+
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
|
480
589
|
const BYTE* const match = base + matchIndex3;
|
481
590
|
mlen = ZSTD_count(ip, match, iLimit);
|
482
591
|
} else {
|
@@ -498,17 +607,21 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
498
607
|
(ip+mlen == iLimit) ) { /* best possible length */
|
499
608
|
ms->nextToUpdate = current+1; /* skip insertion */
|
500
609
|
return 1;
|
501
|
-
|
610
|
+
}
|
611
|
+
}
|
612
|
+
}
|
613
|
+
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
614
|
+
}
|
502
615
|
|
503
616
|
hashTable[h] = current; /* Update Hash Table */
|
504
617
|
|
505
|
-
while (nbCompares-- && (matchIndex
|
618
|
+
while (nbCompares-- && (matchIndex >= matchLow)) {
|
506
619
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
507
620
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
508
621
|
const BYTE* match;
|
509
622
|
assert(current > matchIndex);
|
510
623
|
|
511
|
-
if ((
|
624
|
+
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
512
625
|
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
513
626
|
match = base + matchIndex;
|
514
627
|
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
|
@@ -520,8 +633,8 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
520
633
|
}
|
521
634
|
|
522
635
|
if (matchLength > bestLength) {
|
523
|
-
DEBUGLOG(8, "found match of length %u at distance %u",
|
524
|
-
(U32)matchLength, current - matchIndex);
|
636
|
+
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
|
637
|
+
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
|
525
638
|
assert(matchEndIdx > matchIndex);
|
526
639
|
if (matchLength > matchEndIdx - matchIndex)
|
527
640
|
matchEndIdx = matchIndex + (U32)matchLength;
|
@@ -529,9 +642,10 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
529
642
|
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
|
530
643
|
matches[mnum].len = (U32)matchLength;
|
531
644
|
mnum++;
|
532
|
-
if (matchLength > ZSTD_OPT_NUM)
|
533
|
-
|
534
|
-
|
645
|
+
if ( (matchLength > ZSTD_OPT_NUM)
|
646
|
+
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
647
|
+
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
648
|
+
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
535
649
|
}
|
536
650
|
}
|
537
651
|
|
@@ -552,6 +666,46 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
552
666
|
|
553
667
|
*smallerPtr = *largerPtr = 0;
|
554
668
|
|
669
|
+
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
670
|
+
U32 dictMatchIndex = dms->hashTable[h];
|
671
|
+
const U32* const dmsBt = dms->chainTable;
|
672
|
+
commonLengthSmaller = commonLengthLarger = 0;
|
673
|
+
while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
|
674
|
+
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & btMask);
|
675
|
+
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
676
|
+
const BYTE* match = dmsBase + dictMatchIndex;
|
677
|
+
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
|
678
|
+
if (dictMatchIndex+matchLength >= dmsHighLimit)
|
679
|
+
match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
|
680
|
+
|
681
|
+
if (matchLength > bestLength) {
|
682
|
+
matchIndex = dictMatchIndex + dmsIndexDelta;
|
683
|
+
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
|
684
|
+
(U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
|
685
|
+
if (matchLength > matchEndIdx - matchIndex)
|
686
|
+
matchEndIdx = matchIndex + (U32)matchLength;
|
687
|
+
bestLength = matchLength;
|
688
|
+
matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
|
689
|
+
matches[mnum].len = (U32)matchLength;
|
690
|
+
mnum++;
|
691
|
+
if ( (matchLength > ZSTD_OPT_NUM)
|
692
|
+
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
693
|
+
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
694
|
+
}
|
695
|
+
}
|
696
|
+
|
697
|
+
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
698
|
+
if (match[matchLength] < ip[matchLength]) {
|
699
|
+
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
|
700
|
+
dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
|
701
|
+
} else {
|
702
|
+
/* match is larger than current */
|
703
|
+
commonLengthLarger = matchLength;
|
704
|
+
dictMatchIndex = nextPtr[0];
|
705
|
+
}
|
706
|
+
}
|
707
|
+
}
|
708
|
+
|
555
709
|
assert(matchEndIdx > current+8);
|
556
710
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
557
711
|
return mnum;
|
@@ -560,22 +714,22 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
560
714
|
|
561
715
|
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
562
716
|
ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
|
563
|
-
const BYTE* ip, const BYTE* const iHighLimit,
|
717
|
+
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
564
718
|
U32 rep[ZSTD_REP_NUM], U32 const ll0,
|
565
719
|
ZSTD_match_t* matches, U32 const lengthToBeat)
|
566
720
|
{
|
567
721
|
U32 const matchLengthSearch = cParams->searchLength;
|
568
|
-
DEBUGLOG(
|
722
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches");
|
569
723
|
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
570
|
-
ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch,
|
724
|
+
ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, dictMode);
|
571
725
|
switch(matchLengthSearch)
|
572
726
|
{
|
573
|
-
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit,
|
727
|
+
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
|
574
728
|
default :
|
575
|
-
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit,
|
576
|
-
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit,
|
729
|
+
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
|
730
|
+
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
|
577
731
|
case 7 :
|
578
|
-
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit,
|
732
|
+
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
|
579
733
|
}
|
580
734
|
}
|
581
735
|
|
@@ -609,65 +763,18 @@ repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
|
|
609
763
|
}
|
610
764
|
|
611
765
|
|
612
|
-
|
613
|
-
const BYTE* anchor;
|
614
|
-
U32 litlen;
|
615
|
-
U32 rawLitCost;
|
616
|
-
} cachedLiteralPrice_t;
|
617
|
-
|
618
|
-
static U32 ZSTD_rawLiteralsCost_cached(
|
619
|
-
cachedLiteralPrice_t* const cachedLitPrice,
|
620
|
-
const BYTE* const anchor, U32 const litlen,
|
621
|
-
const optState_t* const optStatePtr)
|
622
|
-
{
|
623
|
-
U32 startCost;
|
624
|
-
U32 remainingLength;
|
625
|
-
const BYTE* startPosition;
|
626
|
-
|
627
|
-
if (anchor == cachedLitPrice->anchor) {
|
628
|
-
startCost = cachedLitPrice->rawLitCost;
|
629
|
-
startPosition = anchor + cachedLitPrice->litlen;
|
630
|
-
assert(litlen >= cachedLitPrice->litlen);
|
631
|
-
remainingLength = litlen - cachedLitPrice->litlen;
|
632
|
-
} else {
|
633
|
-
startCost = 0;
|
634
|
-
startPosition = anchor;
|
635
|
-
remainingLength = litlen;
|
636
|
-
}
|
637
|
-
|
638
|
-
{ U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
|
639
|
-
cachedLitPrice->anchor = anchor;
|
640
|
-
cachedLitPrice->litlen = litlen;
|
641
|
-
cachedLitPrice->rawLitCost = rawLitCost;
|
642
|
-
return rawLitCost;
|
643
|
-
}
|
644
|
-
}
|
645
|
-
|
646
|
-
static U32 ZSTD_fullLiteralsCost_cached(
|
647
|
-
cachedLiteralPrice_t* const cachedLitPrice,
|
648
|
-
const BYTE* const anchor, U32 const litlen,
|
649
|
-
const optState_t* const optStatePtr)
|
650
|
-
{
|
651
|
-
return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
|
652
|
-
+ ZSTD_litLengthPrice(litlen, optStatePtr);
|
653
|
-
}
|
654
|
-
|
655
|
-
static int ZSTD_literalsContribution_cached(
|
656
|
-
cachedLiteralPrice_t* const cachedLitPrice,
|
657
|
-
const BYTE* const anchor, U32 const litlen,
|
658
|
-
const optState_t* const optStatePtr)
|
766
|
+
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
659
767
|
{
|
660
|
-
|
661
|
-
+ ZSTD_litLengthContribution(litlen, optStatePtr);
|
662
|
-
return contribution;
|
768
|
+
return sol.litlen + sol.mlen;
|
663
769
|
}
|
664
770
|
|
665
|
-
FORCE_INLINE_TEMPLATE
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
771
|
+
FORCE_INLINE_TEMPLATE size_t
|
772
|
+
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
773
|
+
seqStore_t* seqStore,
|
774
|
+
U32 rep[ZSTD_REP_NUM],
|
775
|
+
const ZSTD_compressionParameters* cParams,
|
776
|
+
const void* src, size_t srcSize,
|
777
|
+
const int optLevel, const ZSTD_dictMode_e dictMode)
|
671
778
|
{
|
672
779
|
optState_t* const optStatePtr = &ms->opt;
|
673
780
|
const BYTE* const istart = (const BYTE*)src;
|
@@ -683,66 +790,69 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
|
|
683
790
|
|
684
791
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
685
792
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
686
|
-
|
793
|
+
ZSTD_optimal_t lastSequence;
|
687
794
|
|
688
795
|
/* init */
|
689
796
|
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
|
797
|
+
assert(optLevel <= 2);
|
690
798
|
ms->nextToUpdate3 = ms->nextToUpdate;
|
691
|
-
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
|
799
|
+
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
|
692
800
|
ip += (ip==prefixStart);
|
693
|
-
memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
|
694
801
|
|
695
802
|
/* Match Loop */
|
696
803
|
while (ip < ilimit) {
|
697
804
|
U32 cur, last_pos = 0;
|
698
|
-
U32 best_mlen, best_off;
|
699
805
|
|
700
806
|
/* find first match */
|
701
807
|
{ U32 const litlen = (U32)(ip - anchor);
|
702
808
|
U32 const ll0 = !litlen;
|
703
|
-
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend,
|
809
|
+
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, dictMode, rep, ll0, matches, minMatch);
|
704
810
|
if (!nbMatches) { ip++; continue; }
|
705
811
|
|
706
812
|
/* initialize opt[0] */
|
707
813
|
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
|
708
|
-
opt[0].mlen =
|
814
|
+
opt[0].mlen = 0; /* means is_a_literal */
|
709
815
|
opt[0].litlen = litlen;
|
816
|
+
opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
|
710
817
|
|
711
818
|
/* large match -> immediate encoding */
|
712
819
|
{ U32 const maxML = matches[nbMatches-1].len;
|
713
|
-
|
714
|
-
|
820
|
+
U32 const maxOffset = matches[nbMatches-1].off;
|
821
|
+
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
|
822
|
+
nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
|
715
823
|
|
716
824
|
if (maxML > sufficient_len) {
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
825
|
+
lastSequence.litlen = litlen;
|
826
|
+
lastSequence.mlen = maxML;
|
827
|
+
lastSequence.off = maxOffset;
|
828
|
+
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
|
829
|
+
maxML, sufficient_len);
|
721
830
|
cur = 0;
|
722
|
-
last_pos =
|
831
|
+
last_pos = ZSTD_totalLen(lastSequence);
|
723
832
|
goto _shortestPath;
|
724
833
|
} }
|
725
834
|
|
726
835
|
/* set prices for first matches starting position == 0 */
|
727
|
-
{ U32 const literalsPrice =
|
836
|
+
{ U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
728
837
|
U32 pos;
|
729
838
|
U32 matchNb;
|
730
|
-
for (pos =
|
731
|
-
opt[pos].
|
732
|
-
opt[pos].price = ZSTD_MAX_PRICE;
|
839
|
+
for (pos = 1; pos < minMatch; pos++) {
|
840
|
+
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
|
733
841
|
}
|
734
842
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
735
843
|
U32 const offset = matches[matchNb].off;
|
736
844
|
U32 const end = matches[matchNb].len;
|
737
845
|
repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
|
738
846
|
for ( ; pos <= end ; pos++ ) {
|
739
|
-
U32 const matchPrice =
|
740
|
-
|
741
|
-
|
847
|
+
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
|
848
|
+
U32 const sequencePrice = literalsPrice + matchPrice;
|
849
|
+
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
850
|
+
pos, ZSTD_fCost(sequencePrice));
|
742
851
|
opt[pos].mlen = pos;
|
743
852
|
opt[pos].off = offset;
|
744
853
|
opt[pos].litlen = litlen;
|
745
|
-
opt[pos].price =
|
854
|
+
opt[pos].price = sequencePrice;
|
855
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
|
746
856
|
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
|
747
857
|
} }
|
748
858
|
last_pos = pos-1;
|
@@ -753,55 +863,67 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
|
|
753
863
|
for (cur = 1; cur <= last_pos; cur++) {
|
754
864
|
const BYTE* const inr = ip + cur;
|
755
865
|
assert(cur < ZSTD_OPT_NUM);
|
866
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
|
756
867
|
|
757
868
|
/* Fix current position with one literal if cheaper */
|
758
|
-
{ U32 const litlen = (opt[cur-1].mlen ==
|
759
|
-
int price
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
|
764
|
-
}
|
869
|
+
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
870
|
+
int const price = opt[cur-1].price
|
871
|
+
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
872
|
+
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
873
|
+
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
765
874
|
assert(price < 1000000000); /* overflow check */
|
766
875
|
if (price <= opt[cur].price) {
|
767
|
-
DEBUGLOG(7, "rPos:%u : better price (
|
768
|
-
cur, price, opt[cur].price)
|
769
|
-
|
876
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
877
|
+
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
|
878
|
+
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
|
879
|
+
opt[cur].mlen = 0;
|
770
880
|
opt[cur].off = 0;
|
771
881
|
opt[cur].litlen = litlen;
|
772
882
|
opt[cur].price = price;
|
773
883
|
memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
|
774
|
-
|
884
|
+
} else {
|
885
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
|
886
|
+
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
|
887
|
+
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
|
888
|
+
}
|
889
|
+
}
|
775
890
|
|
776
891
|
/* last match must start at a minimum distance of 8 from oend */
|
777
892
|
if (inr > ilimit) continue;
|
778
893
|
|
779
894
|
if (cur == last_pos) break;
|
780
895
|
|
781
|
-
|
782
|
-
|
896
|
+
if ( (optLevel==0) /*static_test*/
|
897
|
+
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
|
898
|
+
DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
|
783
899
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
900
|
+
}
|
784
901
|
|
785
|
-
{ U32 const ll0 = (opt[cur].mlen !=
|
786
|
-
U32 const litlen = (opt[cur].mlen ==
|
787
|
-
U32 const previousPrice =
|
788
|
-
U32 const basePrice = previousPrice +
|
789
|
-
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend,
|
902
|
+
{ U32 const ll0 = (opt[cur].mlen != 0);
|
903
|
+
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
904
|
+
U32 const previousPrice = opt[cur].price;
|
905
|
+
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
906
|
+
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
|
790
907
|
U32 matchNb;
|
791
|
-
if (!nbMatches)
|
908
|
+
if (!nbMatches) {
|
909
|
+
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
910
|
+
continue;
|
911
|
+
}
|
792
912
|
|
793
913
|
{ U32 const maxML = matches[nbMatches-1].len;
|
794
|
-
DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
|
795
|
-
cur, nbMatches, maxML);
|
914
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
|
915
|
+
inr-istart, cur, nbMatches, maxML);
|
796
916
|
|
797
917
|
if ( (maxML > sufficient_len)
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
918
|
+
|| (cur + maxML >= ZSTD_OPT_NUM) ) {
|
919
|
+
lastSequence.mlen = maxML;
|
920
|
+
lastSequence.off = matches[nbMatches-1].off;
|
921
|
+
lastSequence.litlen = litlen;
|
922
|
+
cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
|
923
|
+
last_pos = cur + ZSTD_totalLen(lastSequence);
|
924
|
+
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
|
802
925
|
goto _shortestPath;
|
803
|
-
|
804
|
-
}
|
926
|
+
} }
|
805
927
|
|
806
928
|
/* set prices using matches found at position == cur */
|
807
929
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
@@ -811,81 +933,97 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore
|
|
811
933
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
812
934
|
U32 mlen;
|
813
935
|
|
814
|
-
DEBUGLOG(7, "testing match %u => offCode=%
|
936
|
+
DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
|
815
937
|
matchNb, matches[matchNb].off, lastML, litlen);
|
816
938
|
|
817
|
-
for (mlen = lastML; mlen >= startML; mlen--) {
|
939
|
+
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
818
940
|
U32 const pos = cur + mlen;
|
819
941
|
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
820
942
|
|
821
943
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
822
|
-
DEBUGLOG(7, "rPos:%u => new better price (
|
823
|
-
pos, price, opt[pos].price);
|
824
|
-
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
|
944
|
+
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
945
|
+
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
946
|
+
while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
|
825
947
|
opt[pos].mlen = mlen;
|
826
948
|
opt[pos].off = offset;
|
827
949
|
opt[pos].litlen = litlen;
|
828
950
|
opt[pos].price = price;
|
951
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
|
829
952
|
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
|
830
953
|
} else {
|
831
|
-
|
954
|
+
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
955
|
+
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
956
|
+
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
|
832
957
|
}
|
833
958
|
} } }
|
834
959
|
} /* for (cur = 1; cur <= last_pos; cur++) */
|
835
960
|
|
836
|
-
|
837
|
-
|
838
|
-
cur
|
961
|
+
lastSequence = opt[last_pos];
|
962
|
+
cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
|
963
|
+
assert(cur < ZSTD_OPT_NUM); /* control overflow*/
|
839
964
|
|
840
965
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
841
|
-
assert(opt[0].mlen ==
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
878
|
-
if (repCode >= 2) rep[2] = rep[1];
|
879
|
-
rep[1] = rep[0];
|
880
|
-
rep[0] = currentOffset;
|
966
|
+
assert(opt[0].mlen == 0);
|
967
|
+
|
968
|
+
{ U32 const storeEnd = cur + 1;
|
969
|
+
U32 storeStart = storeEnd;
|
970
|
+
U32 seqPos = cur;
|
971
|
+
|
972
|
+
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
|
973
|
+
last_pos, cur);
|
974
|
+
assert(storeEnd < ZSTD_OPT_NUM);
|
975
|
+
DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
976
|
+
storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
|
977
|
+
opt[storeEnd] = lastSequence;
|
978
|
+
while (seqPos > 0) {
|
979
|
+
U32 const backDist = ZSTD_totalLen(opt[seqPos]);
|
980
|
+
storeStart--;
|
981
|
+
DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
982
|
+
seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
|
983
|
+
opt[storeStart] = opt[seqPos];
|
984
|
+
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
|
985
|
+
}
|
986
|
+
|
987
|
+
/* save sequences */
|
988
|
+
DEBUGLOG(6, "sending selected sequences into seqStore")
|
989
|
+
{ U32 storePos;
|
990
|
+
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
991
|
+
U32 const llen = opt[storePos].litlen;
|
992
|
+
U32 const mlen = opt[storePos].mlen;
|
993
|
+
U32 const offCode = opt[storePos].off;
|
994
|
+
U32 const advance = llen + mlen;
|
995
|
+
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
996
|
+
anchor - istart, llen, mlen);
|
997
|
+
|
998
|
+
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
|
999
|
+
assert(storePos == storeEnd); /* must be last sequence */
|
1000
|
+
ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
|
1001
|
+
continue; /* will finish */
|
881
1002
|
}
|
882
|
-
}
|
883
1003
|
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
1004
|
+
/* repcodes update : like ZSTD_updateRep(), but update in place */
|
1005
|
+
if (offCode >= ZSTD_REP_NUM) { /* full offset */
|
1006
|
+
rep[2] = rep[1];
|
1007
|
+
rep[1] = rep[0];
|
1008
|
+
rep[0] = offCode - ZSTD_REP_MOVE;
|
1009
|
+
} else { /* repcode */
|
1010
|
+
U32 const repCode = offCode + (llen==0);
|
1011
|
+
if (repCode) { /* note : if repCode==0, no change */
|
1012
|
+
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
1013
|
+
if (repCode >= 2) rep[2] = rep[1];
|
1014
|
+
rep[1] = rep[0];
|
1015
|
+
rep[0] = currentOffset;
|
1016
|
+
} }
|
1017
|
+
|
1018
|
+
assert(anchor + llen <= iend);
|
1019
|
+
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
1020
|
+
ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
|
1021
|
+
anchor += advance;
|
1022
|
+
ip = anchor;
|
1023
|
+
} }
|
1024
|
+
ZSTD_setBasePrices(optStatePtr, optLevel);
|
1025
|
+
}
|
1026
|
+
|
889
1027
|
} /* while (ip < ilimit) */
|
890
1028
|
|
891
1029
|
/* Return the last literals size */
|
@@ -895,29 +1033,94 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
895
1033
|
|
896
1034
|
size_t ZSTD_compressBlock_btopt(
|
897
1035
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
898
|
-
ZSTD_compressionParameters
|
1036
|
+
const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
|
899
1037
|
{
|
900
1038
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
901
|
-
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/,
|
1039
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
|
1040
|
+
}
|
1041
|
+
|
1042
|
+
|
1043
|
+
/* used in 2-pass strategy */
|
1044
|
+
static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
|
1045
|
+
{
|
1046
|
+
U32 s, sum=0;
|
1047
|
+
assert(ZSTD_FREQ_DIV+bonus > 0);
|
1048
|
+
for (s=0; s<=lastEltIndex; s++) {
|
1049
|
+
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
1050
|
+
table[s]--;
|
1051
|
+
sum += table[s];
|
1052
|
+
}
|
1053
|
+
return sum;
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
/* used in 2-pass strategy */
|
1057
|
+
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
1058
|
+
{
|
1059
|
+
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
1060
|
+
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
|
1061
|
+
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
|
1062
|
+
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
|
902
1063
|
}
|
903
1064
|
|
904
1065
|
size_t ZSTD_compressBlock_btultra(
|
905
1066
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
906
|
-
ZSTD_compressionParameters
|
1067
|
+
const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
|
1068
|
+
{
|
1069
|
+
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
1070
|
+
#if 0
|
1071
|
+
/* 2-pass strategy (disabled)
|
1072
|
+
* this strategy makes a first pass over first block to collect statistics
|
1073
|
+
* and seed next round's statistics with it.
|
1074
|
+
* The compression ratio gain is generally small (~0.5% on first block),
|
1075
|
+
* the cost is 2x cpu time on first block. */
|
1076
|
+
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
1077
|
+
if ( (ms->opt.litLengthSum==0) /* first block */
|
1078
|
+
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
1079
|
+
&& (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
|
1080
|
+
U32 tmpRep[ZSTD_REP_NUM];
|
1081
|
+
DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
|
1082
|
+
assert(ms->nextToUpdate >= ms->window.dictLimit
|
1083
|
+
&& ms->nextToUpdate <= ms->window.dictLimit + 1);
|
1084
|
+
memcpy(tmpRep, rep, sizeof(tmpRep));
|
1085
|
+
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
|
1086
|
+
ZSTD_resetSeqStore(seqStore);
|
1087
|
+
/* invalidate first scan from history */
|
1088
|
+
ms->window.base -= srcSize;
|
1089
|
+
ms->window.dictLimit += (U32)srcSize;
|
1090
|
+
ms->window.lowLimit = ms->window.dictLimit;
|
1091
|
+
ms->nextToUpdate = ms->window.dictLimit;
|
1092
|
+
ms->nextToUpdate3 = ms->window.dictLimit;
|
1093
|
+
/* re-inforce weight of collected statistics */
|
1094
|
+
ZSTD_upscaleStats(&ms->opt);
|
1095
|
+
}
|
1096
|
+
#endif
|
1097
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
1098
|
+
}
|
1099
|
+
|
1100
|
+
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
1101
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1102
|
+
const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
|
1103
|
+
{
|
1104
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
|
1105
|
+
}
|
1106
|
+
|
1107
|
+
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
1108
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1109
|
+
const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
|
907
1110
|
{
|
908
|
-
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/,
|
1111
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
|
909
1112
|
}
|
910
1113
|
|
911
1114
|
size_t ZSTD_compressBlock_btopt_extDict(
|
912
1115
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
913
|
-
ZSTD_compressionParameters
|
1116
|
+
const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
|
914
1117
|
{
|
915
|
-
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/,
|
1118
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
|
916
1119
|
}
|
917
1120
|
|
918
1121
|
size_t ZSTD_compressBlock_btultra_extDict(
|
919
1122
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
920
|
-
ZSTD_compressionParameters
|
1123
|
+
const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize)
|
921
1124
|
{
|
922
|
-
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/,
|
1125
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
|
923
1126
|
}
|