zstdlib 0.7.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +20 -0
- data/README.md +7 -1
- data/Rakefile +38 -8
- data/ext/{zstdlib → zstdlib_c}/extconf.rb +11 -6
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
- data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
- data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/adler32.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/compress.c +0 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.c +1116 -0
- data/ext/zstdlib_c/zlib-1.2.12/crc32.h +9446 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.c +78 -30
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/deflate.h +12 -15
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzclose.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzguts.h +3 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzlib.c +5 -3
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzread.c +5 -7
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/gzwrite.c +25 -13
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/infback.c +2 -1
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.c +14 -14
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffast.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inffixed.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.c +39 -8
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inflate.h +3 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.c +3 -3
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/inftrees.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.c +27 -48
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/trees.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/uncompr.c +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zconf.h +0 -0
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zlib.h +123 -100
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.c +2 -2
- data/ext/{zstdlib/zlib-1.2.11 → zstdlib_c/zlib-1.2.12}/zutil.h +12 -9
- data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
- data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +46 -22
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.c +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +12 -19
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +2 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +41 -12
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +139 -22
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +47 -23
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +4 -4
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +6 -5
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +0 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_internal.h +191 -145
- data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +89 -46
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +27 -29
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +2 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress.c +2917 -868
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +458 -125
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +12 -11
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +41 -18
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.c +26 -298
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_superblock.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +234 -83
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +313 -138
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +329 -150
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +1 -1
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +321 -216
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +9 -2
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +412 -166
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +169 -453
- data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/huf_decompress.c +1044 -403
- data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +9 -9
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +2 -2
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +450 -105
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.c +913 -273
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +14 -5
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +699 -214
- data/ext/{zstdlib/zstd-1.4.5/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +2 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
- data/ext/{zstdlib/zstd-1.4.5 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/zstdlib.rb +2 -2
- metadata +118 -109
- data/ext/zstdlib/zlib-1.2.11/crc32.c +0 -442
- data/ext/zstdlib/zlib-1.2.11/crc32.h +0 -441
- data/ext/zstdlib/zstd-1.4.5/lib/common/compiler.h +0 -175
- data/ext/zstdlib/zstd-1.4.5/lib/common/entropy_common.c +0 -216
- data/ext/zstdlib/zstd-1.4.5/lib/common/error_private.h +0 -80
- data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.c +0 -864
- data/ext/zstdlib/zstd-1.4.5/lib/common/xxhash.h +0 -285
- data/ext/zstdlib/zstd-1.4.5/lib/compress/huf_compress.c +0 -798
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.c +0 -1138
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.5/lib/compress/zstdmt_compress.h +0 -192
- data/ext/zstdlib/zstd.mk +0 -14
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,7 +14,6 @@
|
|
14
14
|
|
15
15
|
|
16
16
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
17
|
-
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
18
17
|
#define ZSTD_MAX_PRICE (1<<30)
|
19
18
|
|
20
19
|
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
@@ -24,11 +23,11 @@
|
|
24
23
|
* Price functions for optimal parser
|
25
24
|
***************************************/
|
26
25
|
|
27
|
-
#if 0 /* approximation at bit level */
|
26
|
+
#if 0 /* approximation at bit level (for tests) */
|
28
27
|
# define BITCOST_ACCURACY 0
|
29
28
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
30
|
-
# define WEIGHT(stat)
|
31
|
-
#elif 0 /* fractional bit accuracy */
|
29
|
+
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
|
30
|
+
#elif 0 /* fractional bit accuracy (for tests) */
|
32
31
|
# define BITCOST_ACCURACY 8
|
33
32
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
34
33
|
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
|
66
65
|
|
67
66
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
68
67
|
{
|
69
|
-
return optPtr->literalCompressionMode !=
|
68
|
+
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
70
69
|
}
|
71
70
|
|
72
71
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
79
78
|
}
|
80
79
|
|
81
80
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
81
|
+
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
82
|
+
{
|
83
|
+
size_t n;
|
84
|
+
U32 total = 0;
|
85
|
+
for (n=0; n<nbElts; n++) {
|
86
|
+
total += table[n];
|
87
|
+
}
|
88
|
+
return total;
|
89
|
+
}
|
90
|
+
|
91
|
+
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
|
86
92
|
{
|
87
93
|
U32 s, sum=0;
|
88
|
-
DEBUGLOG(5, "
|
89
|
-
assert(
|
94
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
|
95
|
+
assert(shift < 30);
|
90
96
|
for (s=0; s<lastEltIndex+1; s++) {
|
91
|
-
table[s] = 1 + (table[s] >>
|
97
|
+
table[s] = 1 + (table[s] >> shift);
|
92
98
|
sum += table[s];
|
93
99
|
}
|
94
100
|
return sum;
|
95
101
|
}
|
96
102
|
|
103
|
+
/* ZSTD_scaleStats() :
|
104
|
+
* reduce all elements in table is sum too large
|
105
|
+
* return the resulting sum of elements */
|
106
|
+
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
107
|
+
{
|
108
|
+
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
109
|
+
U32 const factor = prevsum >> logTarget;
|
110
|
+
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
111
|
+
assert(logTarget < 30);
|
112
|
+
if (factor <= 1) return prevsum;
|
113
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
114
|
+
}
|
115
|
+
|
97
116
|
/* ZSTD_rescaleFreqs() :
|
98
117
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
99
118
|
* take hints from dictionary if there is one
|
100
|
-
*
|
119
|
+
* and init from zero if there is none,
|
120
|
+
* using src for literals stats, and baseline stats for sequence symbols
|
101
121
|
* otherwise downscale existing stats, to be used as seed for next block.
|
102
122
|
*/
|
103
123
|
static void
|
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
126
146
|
optPtr->litSum = 0;
|
127
147
|
for (lit=0; lit<=MaxLit; lit++) {
|
128
148
|
U32 const scaleLog = 11; /* scale to 2K */
|
129
|
-
U32 const bitCost =
|
149
|
+
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
130
150
|
assert(bitCost <= scaleLog);
|
131
151
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
132
152
|
optPtr->litSum += optPtr->litFreq[lit];
|
@@ -174,14 +194,19 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
174
194
|
if (compressedLiterals) {
|
175
195
|
unsigned lit = MaxLit;
|
176
196
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
177
|
-
optPtr->litSum =
|
197
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
178
198
|
}
|
179
199
|
|
180
|
-
{ unsigned
|
181
|
-
|
182
|
-
|
200
|
+
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
201
|
+
4, 2, 1, 1, 1, 1, 1, 1,
|
202
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
203
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
204
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
205
|
+
1, 1, 1, 1
|
206
|
+
};
|
207
|
+
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
|
208
|
+
optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
183
209
|
}
|
184
|
-
optPtr->litLengthSum = MaxLL+1;
|
185
210
|
|
186
211
|
{ unsigned ml;
|
187
212
|
for (ml=0; ml<=MaxML; ml++)
|
@@ -189,21 +214,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
189
214
|
}
|
190
215
|
optPtr->matchLengthSum = MaxML+1;
|
191
216
|
|
192
|
-
{ unsigned
|
193
|
-
|
194
|
-
|
217
|
+
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
218
|
+
6, 2, 1, 1, 2, 3, 4, 4,
|
219
|
+
4, 3, 2, 1, 1, 1, 1, 1,
|
220
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
221
|
+
1, 1, 1, 1, 1, 1, 1, 1
|
222
|
+
};
|
223
|
+
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
|
224
|
+
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
195
225
|
}
|
196
|
-
|
226
|
+
|
197
227
|
|
198
228
|
}
|
199
229
|
|
200
230
|
} else { /* new block : re-use previous statistics, scaled down */
|
201
231
|
|
202
232
|
if (compressedLiterals)
|
203
|
-
optPtr->litSum =
|
204
|
-
optPtr->litLengthSum =
|
205
|
-
optPtr->matchLengthSum =
|
206
|
-
optPtr->offCodeSum =
|
233
|
+
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
234
|
+
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
235
|
+
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
236
|
+
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
207
237
|
}
|
208
238
|
|
209
239
|
ZSTD_setBasePrices(optPtr, optLevel);
|
@@ -239,7 +269,16 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
239
269
|
* cost of literalLength symbol */
|
240
270
|
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
241
271
|
{
|
242
|
-
|
272
|
+
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
|
273
|
+
if (optPtr->priceType == zop_predef)
|
274
|
+
return WEIGHT(litLength, optLevel);
|
275
|
+
/* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
|
276
|
+
* because it isn't representable in the zstd format. So instead just
|
277
|
+
* call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
|
278
|
+
* would be all literals.
|
279
|
+
*/
|
280
|
+
if (litLength == ZSTD_BLOCKSIZE_MAX)
|
281
|
+
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
|
243
282
|
|
244
283
|
/* dynamic statistics */
|
245
284
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
@@ -252,15 +291,17 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
252
291
|
/* ZSTD_getMatchPrice() :
|
253
292
|
* Provides the cost of the match part (offset + matchLength) of a sequence
|
254
293
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
255
|
-
*
|
294
|
+
* @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
|
295
|
+
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
|
296
|
+
*/
|
256
297
|
FORCE_INLINE_TEMPLATE U32
|
257
|
-
ZSTD_getMatchPrice(U32 const
|
298
|
+
ZSTD_getMatchPrice(U32 const offcode,
|
258
299
|
U32 const matchLength,
|
259
300
|
const optState_t* const optPtr,
|
260
301
|
int const optLevel)
|
261
302
|
{
|
262
303
|
U32 price;
|
263
|
-
U32 const offCode = ZSTD_highbit32(
|
304
|
+
U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
|
264
305
|
U32 const mlBase = matchLength - MINMATCH;
|
265
306
|
assert(matchLength >= MINMATCH);
|
266
307
|
|
@@ -303,8 +344,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
|
303
344
|
optPtr->litLengthSum++;
|
304
345
|
}
|
305
346
|
|
306
|
-
/*
|
307
|
-
{ U32 const offCode = ZSTD_highbit32(offsetCode
|
347
|
+
/* offset code : expected to follow storeSeq() numeric representation */
|
348
|
+
{ U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
|
308
349
|
assert(offCode <= MaxOff);
|
309
350
|
optPtr->offCodeFreq[offCode]++;
|
310
351
|
optPtr->offCodeSum++;
|
@@ -338,7 +379,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
338
379
|
|
339
380
|
/* Update hashTable3 up to ip (excluded)
|
340
381
|
Assumption : always within prefix (i.e. not within extDict) */
|
341
|
-
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
382
|
+
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
342
383
|
U32* nextToUpdate3,
|
343
384
|
const BYTE* const ip)
|
344
385
|
{
|
@@ -364,11 +405,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
364
405
|
* Binary Tree search
|
365
406
|
***************************************/
|
366
407
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
367
|
-
*
|
408
|
+
* @param ip assumed <= iend-8 .
|
409
|
+
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
368
410
|
* @return : nb of positions added */
|
369
411
|
static U32 ZSTD_insertBt1(
|
370
|
-
ZSTD_matchState_t* ms,
|
412
|
+
const ZSTD_matchState_t* ms,
|
371
413
|
const BYTE* const ip, const BYTE* const iend,
|
414
|
+
U32 const target,
|
372
415
|
U32 const mls, const int extDict)
|
373
416
|
{
|
374
417
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -386,32 +429,36 @@ static U32 ZSTD_insertBt1(
|
|
386
429
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
387
430
|
const BYTE* const prefixStart = base + dictLimit;
|
388
431
|
const BYTE* match;
|
389
|
-
const U32
|
390
|
-
const U32 btLow = btMask >=
|
391
|
-
U32* smallerPtr = bt + 2*(
|
432
|
+
const U32 curr = (U32)(ip-base);
|
433
|
+
const U32 btLow = btMask >= curr ? 0 : curr - btMask;
|
434
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
392
435
|
U32* largerPtr = smallerPtr + 1;
|
393
436
|
U32 dummy32; /* to be nullified at the end */
|
394
|
-
|
395
|
-
|
437
|
+
/* windowLow is based on target because
|
438
|
+
* we only need positions that will be in the window at the end of the tree update.
|
439
|
+
*/
|
440
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
441
|
+
U32 matchEndIdx = curr+8+1;
|
396
442
|
size_t bestLength = 8;
|
397
443
|
U32 nbCompares = 1U << cParams->searchLog;
|
398
444
|
#ifdef ZSTD_C_PREDICT
|
399
|
-
U32 predictedSmall = *(bt + 2*((
|
400
|
-
U32 predictedLarge = *(bt + 2*((
|
445
|
+
U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
|
446
|
+
U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
|
401
447
|
predictedSmall += (predictedSmall>0);
|
402
448
|
predictedLarge += (predictedLarge>0);
|
403
449
|
#endif /* ZSTD_C_PREDICT */
|
404
450
|
|
405
|
-
DEBUGLOG(8, "ZSTD_insertBt1 (%u)",
|
451
|
+
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
406
452
|
|
453
|
+
assert(curr <= target);
|
407
454
|
assert(ip <= iend-8); /* required for h calculation */
|
408
|
-
hashTable[h] =
|
455
|
+
hashTable[h] = curr; /* Update Hash Table */
|
409
456
|
|
410
457
|
assert(windowLow > 0);
|
411
|
-
|
458
|
+
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
412
459
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
413
460
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
414
|
-
assert(matchIndex <
|
461
|
+
assert(matchIndex < curr);
|
415
462
|
|
416
463
|
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
417
464
|
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
@@ -474,8 +521,8 @@ static U32 ZSTD_insertBt1(
|
|
474
521
|
*smallerPtr = *largerPtr = 0;
|
475
522
|
{ U32 positions = 0;
|
476
523
|
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
477
|
-
assert(matchEndIdx >
|
478
|
-
return MAX(positions, matchEndIdx - (
|
524
|
+
assert(matchEndIdx > curr + 8);
|
525
|
+
return MAX(positions, matchEndIdx - (curr + 8));
|
479
526
|
}
|
480
527
|
}
|
481
528
|
|
@@ -492,7 +539,7 @@ void ZSTD_updateTree_internal(
|
|
492
539
|
idx, target, dictMode);
|
493
540
|
|
494
541
|
while(idx < target) {
|
495
|
-
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
542
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
496
543
|
assert(idx < (U32)(idx + forward));
|
497
544
|
idx += forward;
|
498
545
|
}
|
@@ -519,7 +566,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
519
566
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
520
567
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
521
568
|
const BYTE* const base = ms->window.base;
|
522
|
-
U32 const
|
569
|
+
U32 const curr = (U32)(ip-base);
|
523
570
|
U32 const hashLog = cParams->hashLog;
|
524
571
|
U32 const minMatch = (mls==3) ? 3 : 4;
|
525
572
|
U32* const hashTable = ms->hashTable;
|
@@ -533,12 +580,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
533
580
|
U32 const dictLimit = ms->window.dictLimit;
|
534
581
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
535
582
|
const BYTE* const prefixStart = base + dictLimit;
|
536
|
-
U32 const btLow = (btMask >=
|
537
|
-
U32 const windowLow = ZSTD_getLowestMatchIndex(ms,
|
583
|
+
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
|
584
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
|
538
585
|
U32 const matchLow = windowLow ? windowLow : 1;
|
539
|
-
U32* smallerPtr = bt + 2*(
|
540
|
-
U32* largerPtr = bt + 2*(
|
541
|
-
U32 matchEndIdx =
|
586
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
587
|
+
U32* largerPtr = bt + 2*(curr&btMask) + 1;
|
588
|
+
U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
542
589
|
U32 dummy32; /* to be nullified at the end */
|
543
590
|
U32 mnum = 0;
|
544
591
|
U32 nbCompares = 1U << cParams->searchLog;
|
@@ -557,7 +604,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
557
604
|
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
|
558
605
|
|
559
606
|
size_t bestLength = lengthToBeat-1;
|
560
|
-
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u",
|
607
|
+
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
|
561
608
|
|
562
609
|
/* check repCode */
|
563
610
|
assert(ll0 <= 1); /* necessarily 1 or 0 */
|
@@ -565,29 +612,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
565
612
|
U32 repCode;
|
566
613
|
for (repCode = ll0; repCode < lastR; repCode++) {
|
567
614
|
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
568
|
-
U32 const repIndex =
|
615
|
+
U32 const repIndex = curr - repOffset;
|
569
616
|
U32 repLen = 0;
|
570
|
-
assert(
|
571
|
-
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ <
|
617
|
+
assert(curr >= dictLimit);
|
618
|
+
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
|
572
619
|
/* We must validate the repcode offset because when we're using a dictionary the
|
573
620
|
* valid offset range shrinks when the dictionary goes out of bounds.
|
574
621
|
*/
|
575
622
|
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
|
576
623
|
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
577
624
|
}
|
578
|
-
} else { /* repIndex < dictLimit || repIndex >=
|
625
|
+
} else { /* repIndex < dictLimit || repIndex >= curr */
|
579
626
|
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
|
580
627
|
dmsBase + repIndex - dmsIndexDelta :
|
581
628
|
dictBase + repIndex;
|
582
|
-
assert(
|
629
|
+
assert(curr >= windowLow);
|
583
630
|
if ( dictMode == ZSTD_extDict
|
584
|
-
&& ( ((repOffset-1) /*intentional overflow*/ <
|
631
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
|
585
632
|
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
|
586
633
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
587
634
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
|
588
635
|
}
|
589
636
|
if (dictMode == ZSTD_dictMatchState
|
590
|
-
&& ( ((repOffset-1) /*intentional overflow*/ <
|
637
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
|
591
638
|
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
592
639
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
593
640
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
|
@@ -597,7 +644,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
597
644
|
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
598
645
|
repCode, ll0, repOffset, repLen);
|
599
646
|
bestLength = repLen;
|
600
|
-
matches[mnum].off = repCode - ll0;
|
647
|
+
matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
|
601
648
|
matches[mnum].len = (U32)repLen;
|
602
649
|
mnum++;
|
603
650
|
if ( (repLen > sufficient_len)
|
@@ -609,7 +656,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
609
656
|
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
610
657
|
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
611
658
|
if ((matchIndex3 >= matchLow)
|
612
|
-
& (
|
659
|
+
& (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
613
660
|
size_t mlen;
|
614
661
|
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
|
615
662
|
const BYTE* const match = base + matchIndex3;
|
@@ -624,26 +671,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
624
671
|
DEBUGLOG(8, "found small match with hlog3, of length %u",
|
625
672
|
(U32)mlen);
|
626
673
|
bestLength = mlen;
|
627
|
-
assert(
|
674
|
+
assert(curr > matchIndex3);
|
628
675
|
assert(mnum==0); /* no prior solution */
|
629
|
-
matches[0].off = (
|
676
|
+
matches[0].off = STORE_OFFSET(curr - matchIndex3);
|
630
677
|
matches[0].len = (U32)mlen;
|
631
678
|
mnum = 1;
|
632
679
|
if ( (mlen > sufficient_len) |
|
633
680
|
(ip+mlen == iLimit) ) { /* best possible length */
|
634
|
-
ms->nextToUpdate =
|
681
|
+
ms->nextToUpdate = curr+1; /* skip insertion */
|
635
682
|
return 1;
|
636
683
|
} } }
|
637
684
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
638
|
-
}
|
685
|
+
} /* if (mls == 3) */
|
639
686
|
|
640
|
-
hashTable[h] =
|
687
|
+
hashTable[h] = curr; /* Update Hash Table */
|
641
688
|
|
642
|
-
|
689
|
+
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
643
690
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
644
691
|
const BYTE* match;
|
645
692
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
646
|
-
assert(
|
693
|
+
assert(curr > matchIndex);
|
647
694
|
|
648
695
|
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
649
696
|
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
@@ -660,20 +707,19 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
660
707
|
|
661
708
|
if (matchLength > bestLength) {
|
662
709
|
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
|
663
|
-
(U32)matchLength,
|
710
|
+
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
|
664
711
|
assert(matchEndIdx > matchIndex);
|
665
712
|
if (matchLength > matchEndIdx - matchIndex)
|
666
713
|
matchEndIdx = matchIndex + (U32)matchLength;
|
667
714
|
bestLength = matchLength;
|
668
|
-
matches[mnum].off = (
|
715
|
+
matches[mnum].off = STORE_OFFSET(curr - matchIndex);
|
669
716
|
matches[mnum].len = (U32)matchLength;
|
670
717
|
mnum++;
|
671
718
|
if ( (matchLength > ZSTD_OPT_NUM)
|
672
719
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
673
720
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
674
721
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
675
|
-
|
676
|
-
}
|
722
|
+
} }
|
677
723
|
|
678
724
|
if (match[matchLength] < ip[matchLength]) {
|
679
725
|
/* match smaller than current */
|
@@ -692,12 +738,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
692
738
|
|
693
739
|
*smallerPtr = *largerPtr = 0;
|
694
740
|
|
741
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
695
742
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
696
743
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
697
744
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
698
745
|
const U32* const dmsBt = dms->chainTable;
|
699
746
|
commonLengthSmaller = commonLengthLarger = 0;
|
700
|
-
|
747
|
+
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
701
748
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
702
749
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
703
750
|
const BYTE* match = dmsBase + dictMatchIndex;
|
@@ -708,18 +755,17 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
708
755
|
if (matchLength > bestLength) {
|
709
756
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
710
757
|
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
|
711
|
-
(U32)matchLength,
|
758
|
+
(U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
|
712
759
|
if (matchLength > matchEndIdx - matchIndex)
|
713
760
|
matchEndIdx = matchIndex + (U32)matchLength;
|
714
761
|
bestLength = matchLength;
|
715
|
-
matches[mnum].off = (
|
762
|
+
matches[mnum].off = STORE_OFFSET(curr - matchIndex);
|
716
763
|
matches[mnum].len = (U32)matchLength;
|
717
764
|
mnum++;
|
718
765
|
if ( (matchLength > ZSTD_OPT_NUM)
|
719
766
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
720
767
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
721
|
-
|
722
|
-
}
|
768
|
+
} }
|
723
769
|
|
724
770
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
725
771
|
if (match[matchLength] < ip[matchLength]) {
|
@@ -729,47 +775,242 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
729
775
|
/* match is larger than current */
|
730
776
|
commonLengthLarger = matchLength;
|
731
777
|
dictMatchIndex = nextPtr[0];
|
732
|
-
|
733
|
-
}
|
734
|
-
}
|
778
|
+
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
735
779
|
|
736
|
-
assert(matchEndIdx >
|
780
|
+
assert(matchEndIdx > curr+8);
|
737
781
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
738
782
|
return mnum;
|
739
783
|
}
|
740
784
|
|
785
|
+
typedef U32 (*ZSTD_getAllMatchesFn)(
|
786
|
+
ZSTD_match_t*,
|
787
|
+
ZSTD_matchState_t*,
|
788
|
+
U32*,
|
789
|
+
const BYTE*,
|
790
|
+
const BYTE*,
|
791
|
+
const U32 rep[ZSTD_REP_NUM],
|
792
|
+
U32 const ll0,
|
793
|
+
U32 const lengthToBeat);
|
794
|
+
|
795
|
+
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
|
796
|
+
ZSTD_match_t* matches,
|
797
|
+
ZSTD_matchState_t* ms,
|
798
|
+
U32* nextToUpdate3,
|
799
|
+
const BYTE* ip,
|
800
|
+
const BYTE* const iHighLimit,
|
801
|
+
const U32 rep[ZSTD_REP_NUM],
|
802
|
+
U32 const ll0,
|
803
|
+
U32 const lengthToBeat,
|
804
|
+
const ZSTD_dictMode_e dictMode,
|
805
|
+
const U32 mls)
|
806
|
+
{
|
807
|
+
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
808
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
809
|
+
if (ip < ms->window.base + ms->nextToUpdate)
|
810
|
+
return 0; /* skipped area */
|
811
|
+
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
812
|
+
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
813
|
+
}
|
814
|
+
|
815
|
+
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
816
|
+
|
817
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
818
|
+
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
819
|
+
ZSTD_match_t* matches, \
|
820
|
+
ZSTD_matchState_t* ms, \
|
821
|
+
U32* nextToUpdate3, \
|
822
|
+
const BYTE* ip, \
|
823
|
+
const BYTE* const iHighLimit, \
|
824
|
+
const U32 rep[ZSTD_REP_NUM], \
|
825
|
+
U32 const ll0, \
|
826
|
+
U32 const lengthToBeat) \
|
827
|
+
{ \
|
828
|
+
return ZSTD_btGetAllMatches_internal( \
|
829
|
+
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
830
|
+
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
831
|
+
}
|
741
832
|
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
833
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
834
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
835
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
836
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
837
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
838
|
+
|
839
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
840
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
841
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
842
|
+
|
843
|
+
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
844
|
+
{ \
|
845
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
846
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
847
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
848
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
849
|
+
}
|
850
|
+
|
851
|
+
static ZSTD_getAllMatchesFn
|
852
|
+
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
750
853
|
{
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
854
|
+
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
855
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
856
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
857
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
858
|
+
};
|
859
|
+
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
860
|
+
assert((U32)dictMode < 3);
|
861
|
+
assert(mls - 3 < 4);
|
862
|
+
return getAllMatchesFns[(int)dictMode][mls - 3];
|
863
|
+
}
|
864
|
+
|
865
|
+
/*************************
|
866
|
+
* LDM helper functions *
|
867
|
+
*************************/
|
868
|
+
|
869
|
+
/* Struct containing info needed to make decision about ldm inclusion */
|
870
|
+
typedef struct {
|
871
|
+
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
872
|
+
U32 startPosInBlock; /* Start position of the current match candidate */
|
873
|
+
U32 endPosInBlock; /* End position of the current match candidate */
|
874
|
+
U32 offset; /* Offset of the match candidate */
|
875
|
+
} ZSTD_optLdm_t;
|
876
|
+
|
877
|
+
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
878
|
+
* Moves forward in @rawSeqStore by @nbBytes,
|
879
|
+
* which will update the fields 'pos' and 'posInSequence'.
|
880
|
+
*/
|
881
|
+
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
882
|
+
{
|
883
|
+
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
884
|
+
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
885
|
+
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
886
|
+
if (currPos >= currSeq.litLength + currSeq.matchLength) {
|
887
|
+
currPos -= currSeq.litLength + currSeq.matchLength;
|
888
|
+
rawSeqStore->pos++;
|
889
|
+
} else {
|
890
|
+
rawSeqStore->posInSequence = currPos;
|
891
|
+
break;
|
892
|
+
}
|
893
|
+
}
|
894
|
+
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
|
895
|
+
rawSeqStore->posInSequence = 0;
|
764
896
|
}
|
765
897
|
}
|
766
898
|
|
899
|
+
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
|
900
|
+
* Calculates the beginning and end of the next match in the current block.
|
901
|
+
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
902
|
+
*/
|
903
|
+
static void
|
904
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
905
|
+
U32 blockBytesRemaining)
|
906
|
+
{
|
907
|
+
rawSeq currSeq;
|
908
|
+
U32 currBlockEndPos;
|
909
|
+
U32 literalsBytesRemaining;
|
910
|
+
U32 matchBytesRemaining;
|
911
|
+
|
912
|
+
/* Setting match end position to MAX to ensure we never use an LDM during this block */
|
913
|
+
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
914
|
+
optLdm->startPosInBlock = UINT_MAX;
|
915
|
+
optLdm->endPosInBlock = UINT_MAX;
|
916
|
+
return;
|
917
|
+
}
|
918
|
+
/* Calculate appropriate bytes left in matchLength and litLength
|
919
|
+
* after adjusting based on ldmSeqStore->posInSequence */
|
920
|
+
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
921
|
+
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
922
|
+
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
923
|
+
literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
|
924
|
+
currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
|
925
|
+
0;
|
926
|
+
matchBytesRemaining = (literalsBytesRemaining == 0) ?
|
927
|
+
currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
|
928
|
+
currSeq.matchLength;
|
929
|
+
|
930
|
+
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */
|
931
|
+
if (literalsBytesRemaining >= blockBytesRemaining) {
|
932
|
+
optLdm->startPosInBlock = UINT_MAX;
|
933
|
+
optLdm->endPosInBlock = UINT_MAX;
|
934
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
|
935
|
+
return;
|
936
|
+
}
|
937
|
+
|
938
|
+
/* Matches may be < MINMATCH by this process. In that case, we will reject them
|
939
|
+
when we are deciding whether or not to add the ldm */
|
940
|
+
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
|
941
|
+
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
|
942
|
+
optLdm->offset = currSeq.offset;
|
943
|
+
|
944
|
+
if (optLdm->endPosInBlock > currBlockEndPos) {
|
945
|
+
/* Match ends after the block ends, we can't use the whole match */
|
946
|
+
optLdm->endPosInBlock = currBlockEndPos;
|
947
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
|
948
|
+
} else {
|
949
|
+
/* Consume nb of bytes equal to size of sequence left */
|
950
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
|
951
|
+
}
|
952
|
+
}
|
953
|
+
|
954
|
+
/* ZSTD_optLdm_maybeAddMatch():
|
955
|
+
* Adds a match if it's long enough,
|
956
|
+
* based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
|
957
|
+
* into 'matches'. Maintains the correct ordering of 'matches'.
|
958
|
+
*/
|
959
|
+
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
960
|
+
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
961
|
+
{
|
962
|
+
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
|
963
|
+
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
|
964
|
+
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
965
|
+
|
966
|
+
/* Ensure that current block position is not outside of the match */
|
967
|
+
if (currPosInBlock < optLdm->startPosInBlock
|
968
|
+
|| currPosInBlock >= optLdm->endPosInBlock
|
969
|
+
|| candidateMatchLength < MINMATCH) {
|
970
|
+
return;
|
971
|
+
}
|
972
|
+
|
973
|
+
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
974
|
+
U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
|
975
|
+
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
|
976
|
+
candidateOffCode, candidateMatchLength, currPosInBlock);
|
977
|
+
matches[*nbMatches].len = candidateMatchLength;
|
978
|
+
matches[*nbMatches].off = candidateOffCode;
|
979
|
+
(*nbMatches)++;
|
980
|
+
}
|
981
|
+
}
|
982
|
+
|
983
|
+
/* ZSTD_optLdm_processMatchCandidate():
|
984
|
+
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
985
|
+
*/
|
986
|
+
static void
|
987
|
+
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
|
988
|
+
ZSTD_match_t* matches, U32* nbMatches,
|
989
|
+
U32 currPosInBlock, U32 remainingBytes)
|
990
|
+
{
|
991
|
+
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
992
|
+
return;
|
993
|
+
}
|
994
|
+
|
995
|
+
if (currPosInBlock >= optLdm->endPosInBlock) {
|
996
|
+
if (currPosInBlock > optLdm->endPosInBlock) {
|
997
|
+
/* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
|
998
|
+
* at the end of a match from the ldm seq store, and will often be some bytes
|
999
|
+
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
1000
|
+
*/
|
1001
|
+
U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
1002
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
1003
|
+
}
|
1004
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
1005
|
+
}
|
1006
|
+
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
1007
|
+
}
|
1008
|
+
|
767
1009
|
|
768
1010
|
/*-*******************************
|
769
1011
|
* Optimal parser
|
770
1012
|
*********************************/
|
771
1013
|
|
772
|
-
|
773
1014
|
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
774
1015
|
{
|
775
1016
|
return sol.litlen + sol.mlen;
|
@@ -810,6 +1051,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
810
1051
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
811
1052
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
812
1053
|
|
1054
|
+
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
1055
|
+
|
813
1056
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
814
1057
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
815
1058
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
@@ -817,6 +1060,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
817
1060
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
818
1061
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
819
1062
|
ZSTD_optimal_t lastSequence;
|
1063
|
+
ZSTD_optLdm_t optLdm;
|
1064
|
+
|
1065
|
+
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
1066
|
+
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
1067
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
820
1068
|
|
821
1069
|
/* init */
|
822
1070
|
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
@@ -832,7 +1080,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
832
1080
|
/* find first match */
|
833
1081
|
{ U32 const litlen = (U32)(ip - anchor);
|
834
1082
|
U32 const ll0 = !litlen;
|
835
|
-
U32
|
1083
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
1084
|
+
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
1085
|
+
(U32)(ip-istart), (U32)(iend - ip));
|
836
1086
|
if (!nbMatches) { ip++; continue; }
|
837
1087
|
|
838
1088
|
/* initialize opt[0] */
|
@@ -844,18 +1094,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
844
1094
|
* in every price. We include the literal length to avoid negative
|
845
1095
|
* prices when we subtract the previous literal length.
|
846
1096
|
*/
|
847
|
-
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
1097
|
+
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
848
1098
|
|
849
1099
|
/* large match -> immediate encoding */
|
850
1100
|
{ U32 const maxML = matches[nbMatches-1].len;
|
851
|
-
U32 const
|
1101
|
+
U32 const maxOffcode = matches[nbMatches-1].off;
|
852
1102
|
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
|
853
|
-
nbMatches, maxML,
|
1103
|
+
nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
|
854
1104
|
|
855
1105
|
if (maxML > sufficient_len) {
|
856
1106
|
lastSequence.litlen = litlen;
|
857
1107
|
lastSequence.mlen = maxML;
|
858
|
-
lastSequence.off =
|
1108
|
+
lastSequence.off = maxOffcode;
|
859
1109
|
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
|
860
1110
|
maxML, sufficient_len);
|
861
1111
|
cur = 0;
|
@@ -864,24 +1114,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
864
1114
|
} }
|
865
1115
|
|
866
1116
|
/* set prices for first matches starting position == 0 */
|
867
|
-
|
1117
|
+
assert(opt[0].price >= 0);
|
1118
|
+
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
868
1119
|
U32 pos;
|
869
1120
|
U32 matchNb;
|
870
1121
|
for (pos = 1; pos < minMatch; pos++) {
|
871
1122
|
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
|
872
1123
|
}
|
873
1124
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
874
|
-
U32 const
|
1125
|
+
U32 const offcode = matches[matchNb].off;
|
875
1126
|
U32 const end = matches[matchNb].len;
|
876
1127
|
for ( ; pos <= end ; pos++ ) {
|
877
|
-
U32 const matchPrice = ZSTD_getMatchPrice(
|
1128
|
+
U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
|
878
1129
|
U32 const sequencePrice = literalsPrice + matchPrice;
|
879
1130
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
880
1131
|
pos, ZSTD_fCost(sequencePrice));
|
881
1132
|
opt[pos].mlen = pos;
|
882
|
-
opt[pos].off =
|
1133
|
+
opt[pos].off = offcode;
|
883
1134
|
opt[pos].litlen = litlen;
|
884
|
-
opt[pos].price = sequencePrice;
|
1135
|
+
opt[pos].price = (int)sequencePrice;
|
885
1136
|
} }
|
886
1137
|
last_pos = pos-1;
|
887
1138
|
}
|
@@ -896,9 +1147,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
896
1147
|
/* Fix current position with one literal if cheaper */
|
897
1148
|
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
898
1149
|
int const price = opt[cur-1].price
|
899
|
-
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
900
|
-
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
901
|
-
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
1150
|
+
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
1151
|
+
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
1152
|
+
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
902
1153
|
assert(price < 1000000000); /* overflow check */
|
903
1154
|
if (price <= opt[cur].price) {
|
904
1155
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
@@ -924,10 +1175,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
924
1175
|
assert(cur >= opt[cur].mlen);
|
925
1176
|
if (opt[cur].mlen != 0) {
|
926
1177
|
U32 const prev = cur - opt[cur].mlen;
|
927
|
-
repcodes_t newReps =
|
928
|
-
|
1178
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
1179
|
+
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
929
1180
|
} else {
|
930
|
-
|
1181
|
+
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
931
1182
|
}
|
932
1183
|
|
933
1184
|
/* last match must start at a minimum distance of 8 from oend */
|
@@ -941,12 +1192,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
941
1192
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
942
1193
|
}
|
943
1194
|
|
1195
|
+
assert(opt[cur].price >= 0);
|
944
1196
|
{ U32 const ll0 = (opt[cur].mlen != 0);
|
945
1197
|
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
946
|
-
U32 const previousPrice = opt[cur].price;
|
1198
|
+
U32 const previousPrice = (U32)opt[cur].price;
|
947
1199
|
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
948
|
-
U32
|
1200
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
949
1201
|
U32 matchNb;
|
1202
|
+
|
1203
|
+
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
1204
|
+
(U32)(inr-istart), (U32)(iend-inr));
|
1205
|
+
|
950
1206
|
if (!nbMatches) {
|
951
1207
|
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
952
1208
|
continue;
|
@@ -979,7 +1235,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
979
1235
|
|
980
1236
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
981
1237
|
U32 const pos = cur + mlen;
|
982
|
-
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1238
|
+
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
983
1239
|
|
984
1240
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
985
1241
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
@@ -1009,10 +1265,10 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1009
1265
|
* update them while traversing the sequences.
|
1010
1266
|
*/
|
1011
1267
|
if (lastSequence.mlen != 0) {
|
1012
|
-
repcodes_t reps =
|
1013
|
-
|
1268
|
+
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
1269
|
+
ZSTD_memcpy(rep, &reps, sizeof(reps));
|
1014
1270
|
} else {
|
1015
|
-
|
1271
|
+
ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
1016
1272
|
}
|
1017
1273
|
|
1018
1274
|
{ U32 const storeEnd = cur + 1;
|
@@ -1053,7 +1309,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1053
1309
|
|
1054
1310
|
assert(anchor + llen <= iend);
|
1055
1311
|
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
1056
|
-
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen
|
1312
|
+
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
|
1057
1313
|
anchor += advance;
|
1058
1314
|
ip = anchor;
|
1059
1315
|
} }
|
@@ -1065,38 +1321,30 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1065
1321
|
return (size_t)(iend - anchor);
|
1066
1322
|
}
|
1067
1323
|
|
1324
|
+
static size_t ZSTD_compressBlock_opt0(
|
1325
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1326
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1327
|
+
{
|
1328
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
1329
|
+
}
|
1330
|
+
|
1331
|
+
static size_t ZSTD_compressBlock_opt2(
|
1332
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1333
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1334
|
+
{
|
1335
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
1336
|
+
}
|
1068
1337
|
|
1069
1338
|
size_t ZSTD_compressBlock_btopt(
|
1070
1339
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1071
1340
|
const void* src, size_t srcSize)
|
1072
1341
|
{
|
1073
1342
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
1074
|
-
return
|
1343
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1075
1344
|
}
|
1076
1345
|
|
1077
1346
|
|
1078
|
-
/* used in 2-pass strategy */
|
1079
|
-
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
1080
|
-
{
|
1081
|
-
U32 s, sum=0;
|
1082
|
-
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
1083
|
-
for (s=0; s<lastEltIndex+1; s++) {
|
1084
|
-
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
1085
|
-
table[s]--;
|
1086
|
-
sum += table[s];
|
1087
|
-
}
|
1088
|
-
return sum;
|
1089
|
-
}
|
1090
1347
|
|
1091
|
-
/* used in 2-pass strategy */
|
1092
|
-
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
1093
|
-
{
|
1094
|
-
if (ZSTD_compressedLiterals(optPtr))
|
1095
|
-
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
1096
|
-
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
1097
|
-
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
1098
|
-
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
1099
|
-
}
|
1100
1348
|
|
1101
1349
|
/* ZSTD_initStats_ultra():
|
1102
1350
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
@@ -1110,7 +1358,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1110
1358
|
const void* src, size_t srcSize)
|
1111
1359
|
{
|
1112
1360
|
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
1113
|
-
|
1361
|
+
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
|
1114
1362
|
|
1115
1363
|
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
|
1116
1364
|
assert(ms->opt.litLengthSum == 0); /* first block */
|
@@ -1118,7 +1366,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1118
1366
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
1119
1367
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
1120
1368
|
|
1121
|
-
|
1369
|
+
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
1122
1370
|
|
1123
1371
|
/* invalidate first scan from history */
|
1124
1372
|
ZSTD_resetSeqStore(seqStore);
|
@@ -1127,8 +1375,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1127
1375
|
ms->window.lowLimit = ms->window.dictLimit;
|
1128
1376
|
ms->nextToUpdate = ms->window.dictLimit;
|
1129
1377
|
|
1130
|
-
/* re-inforce weight of collected statistics */
|
1131
|
-
ZSTD_upscaleStats(&ms->opt);
|
1132
1378
|
}
|
1133
1379
|
|
1134
1380
|
size_t ZSTD_compressBlock_btultra(
|
@@ -1136,14 +1382,14 @@ size_t ZSTD_compressBlock_btultra(
|
|
1136
1382
|
const void* src, size_t srcSize)
|
1137
1383
|
{
|
1138
1384
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
1139
|
-
return
|
1385
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1140
1386
|
}
|
1141
1387
|
|
1142
1388
|
size_t ZSTD_compressBlock_btultra2(
|
1143
1389
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1144
1390
|
const void* src, size_t srcSize)
|
1145
1391
|
{
|
1146
|
-
U32 const
|
1392
|
+
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
1147
1393
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
1148
1394
|
|
1149
1395
|
/* 2-pass strategy:
|
@@ -1158,41 +1404,41 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1158
1404
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
1159
1405
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
1160
1406
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
1161
|
-
&& (
|
1407
|
+
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
1162
1408
|
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
1163
1409
|
) {
|
1164
1410
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
1165
1411
|
}
|
1166
1412
|
|
1167
|
-
return
|
1413
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1168
1414
|
}
|
1169
1415
|
|
1170
1416
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
1171
1417
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1172
1418
|
const void* src, size_t srcSize)
|
1173
1419
|
{
|
1174
|
-
return
|
1420
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1175
1421
|
}
|
1176
1422
|
|
1177
1423
|
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
1178
1424
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1179
1425
|
const void* src, size_t srcSize)
|
1180
1426
|
{
|
1181
|
-
return
|
1427
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1182
1428
|
}
|
1183
1429
|
|
1184
1430
|
size_t ZSTD_compressBlock_btopt_extDict(
|
1185
1431
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1186
1432
|
const void* src, size_t srcSize)
|
1187
1433
|
{
|
1188
|
-
return
|
1434
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1189
1435
|
}
|
1190
1436
|
|
1191
1437
|
size_t ZSTD_compressBlock_btultra_extDict(
|
1192
1438
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1193
1439
|
const void* src, size_t srcSize)
|
1194
1440
|
{
|
1195
|
-
return
|
1441
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1196
1442
|
}
|
1197
1443
|
|
1198
1444
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|