zstd-ruby 1.4.5.0 → 1.5.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +2 -1
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +225 -222
- data/ext/zstdruby/libzstd/README.md +43 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
- data/ext/zstdruby/libzstd/common/compiler.h +182 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +41 -12
- data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
- data/ext/zstdruby/libzstd/common/huf.h +47 -23
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +6 -5
- data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
- data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
- data/ext/zstdruby/libzstd/zstd.h +699 -214
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +15 -6
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
17
17
|
{
|
|
18
|
-
BYTE* const ostart = (BYTE*
|
|
18
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
19
19
|
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
|
20
20
|
|
|
21
21
|
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
|
|
@@ -35,14 +35,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
|
|
35
35
|
assert(0);
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
ZSTD_memcpy(ostart + flSize, src, srcSize);
|
|
39
39
|
DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
|
|
40
40
|
return srcSize + flSize;
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
44
44
|
{
|
|
45
|
-
BYTE* const ostart = (BYTE*
|
|
45
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
46
46
|
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
|
47
47
|
|
|
48
48
|
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
|
|
@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
|
73
73
|
void* dst, size_t dstCapacity,
|
|
74
74
|
const void* src, size_t srcSize,
|
|
75
75
|
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
|
76
|
-
const int bmi2
|
|
76
|
+
const int bmi2,
|
|
77
|
+
unsigned suspectUncompressible)
|
|
77
78
|
{
|
|
78
79
|
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
|
79
80
|
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
|
@@ -86,7 +87,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
|
86
87
|
disableLiteralCompression, (U32)srcSize);
|
|
87
88
|
|
|
88
89
|
/* Prepare nextEntropy assuming reusing the existing table */
|
|
89
|
-
|
|
90
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
90
91
|
|
|
91
92
|
if (disableLiteralCompression)
|
|
92
93
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
|
@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
|
105
106
|
HUF_compress1X_repeat(
|
|
106
107
|
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
|
107
108
|
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
|
108
|
-
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
|
|
109
|
+
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
|
|
109
110
|
HUF_compress4X_repeat(
|
|
110
111
|
ostart+lhSize, dstCapacity-lhSize, src, srcSize,
|
|
111
112
|
HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
|
|
112
|
-
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
|
113
|
+
(HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
|
|
113
114
|
if (repeat != HUF_repeat_none) {
|
|
114
115
|
/* reused the existing table */
|
|
115
116
|
DEBUGLOG(5, "Reusing previous huffman table");
|
|
@@ -117,12 +118,12 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
|
117
118
|
}
|
|
118
119
|
}
|
|
119
120
|
|
|
120
|
-
if ((cLitSize==0)
|
|
121
|
-
|
|
121
|
+
if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
|
|
122
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
122
123
|
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
|
123
124
|
}
|
|
124
125
|
if (cLitSize==1) {
|
|
125
|
-
|
|
126
|
+
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
126
127
|
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
|
127
128
|
}
|
|
128
129
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
|
|
|
18
18
|
|
|
19
19
|
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
|
20
20
|
|
|
21
|
+
/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
|
|
21
22
|
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
|
22
23
|
ZSTD_hufCTables_t* nextHuf,
|
|
23
24
|
ZSTD_strategy strategy, int disableLiteralCompression,
|
|
24
25
|
void* dst, size_t dstCapacity,
|
|
25
26
|
const void* src, size_t srcSize,
|
|
26
27
|
void* entropyWorkspace, size_t entropyWorkspaceSize,
|
|
27
|
-
const int bmi2
|
|
28
|
+
const int bmi2,
|
|
29
|
+
unsigned suspectUncompressible);
|
|
28
30
|
|
|
29
31
|
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -50,6 +50,19 @@ static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
|
|
50
50
|
return maxSymbolValue;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
+
/**
|
|
54
|
+
* Returns true if we should use ncount=-1 else we should
|
|
55
|
+
* use ncount=1 for low probability symbols instead.
|
|
56
|
+
*/
|
|
57
|
+
static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
|
|
58
|
+
{
|
|
59
|
+
/* Heuristic: This should cover most blocks <= 16K and
|
|
60
|
+
* start to fade out after 16K to about 32K depending on
|
|
61
|
+
* comprssibility.
|
|
62
|
+
*/
|
|
63
|
+
return nbSeq >= 2048;
|
|
64
|
+
}
|
|
65
|
+
|
|
53
66
|
/**
|
|
54
67
|
* Returns the cost in bytes of encoding the normalized count header.
|
|
55
68
|
* Returns an error if any of the helper functions return an error.
|
|
@@ -60,7 +73,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
|
|
60
73
|
BYTE wksp[FSE_NCOUNTBOUND];
|
|
61
74
|
S16 norm[MaxSeq + 1];
|
|
62
75
|
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
|
63
|
-
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), "");
|
|
76
|
+
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
|
|
64
77
|
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
|
65
78
|
}
|
|
66
79
|
|
|
@@ -72,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
|
|
|
72
85
|
{
|
|
73
86
|
unsigned cost = 0;
|
|
74
87
|
unsigned s;
|
|
88
|
+
|
|
89
|
+
assert(total > 0);
|
|
75
90
|
for (s = 0; s <= max; ++s) {
|
|
76
91
|
unsigned norm = (unsigned)((256 * count[s]) / total);
|
|
77
92
|
if (count[s] != 0 && norm == 0)
|
|
@@ -219,6 +234,11 @@ ZSTD_selectEncodingType(
|
|
|
219
234
|
return set_compressed;
|
|
220
235
|
}
|
|
221
236
|
|
|
237
|
+
typedef struct {
|
|
238
|
+
S16 norm[MaxSeq + 1];
|
|
239
|
+
U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
|
|
240
|
+
} ZSTD_BuildCTableWksp;
|
|
241
|
+
|
|
222
242
|
size_t
|
|
223
243
|
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|
224
244
|
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
|
@@ -239,13 +259,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|
|
239
259
|
*op = codeTable[0];
|
|
240
260
|
return 1;
|
|
241
261
|
case set_repeat:
|
|
242
|
-
|
|
262
|
+
ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
|
|
243
263
|
return 0;
|
|
244
264
|
case set_basic:
|
|
245
265
|
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
|
|
246
266
|
return 0;
|
|
247
267
|
case set_compressed: {
|
|
248
|
-
|
|
268
|
+
ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
|
|
249
269
|
size_t nbSeq_1 = nbSeq;
|
|
250
270
|
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
|
251
271
|
if (count[codeTable[nbSeq-1]] > 1) {
|
|
@@ -253,10 +273,13 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
|
|
253
273
|
nbSeq_1--;
|
|
254
274
|
}
|
|
255
275
|
assert(nbSeq_1 > 1);
|
|
256
|
-
|
|
257
|
-
|
|
276
|
+
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
|
277
|
+
(void)entropyWorkspaceSize;
|
|
278
|
+
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
|
279
|
+
assert(oend >= op);
|
|
280
|
+
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
|
258
281
|
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
|
259
|
-
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog,
|
|
282
|
+
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
|
260
283
|
return NCountSize;
|
|
261
284
|
}
|
|
262
285
|
}
|
|
@@ -376,7 +399,7 @@ ZSTD_encodeSequences_default(
|
|
|
376
399
|
|
|
377
400
|
#if DYNAMIC_BMI2
|
|
378
401
|
|
|
379
|
-
static
|
|
402
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
380
403
|
ZSTD_encodeSequences_bmi2(
|
|
381
404
|
void* dst, size_t dstCapacity,
|
|
382
405
|
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,288 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
|
|
17
17
|
#include "hist.h" /* HIST_countFast_wksp */
|
|
18
|
-
#include "zstd_compress_internal.h"
|
|
18
|
+
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
|
|
19
19
|
#include "zstd_compress_sequences.h"
|
|
20
20
|
#include "zstd_compress_literals.h"
|
|
21
21
|
|
|
22
|
-
/*-*************************************
|
|
23
|
-
* Superblock entropy buffer structs
|
|
24
|
-
***************************************/
|
|
25
|
-
/** ZSTD_hufCTablesMetadata_t :
|
|
26
|
-
* Stores Literals Block Type for a super-block in hType, and
|
|
27
|
-
* huffman tree description in hufDesBuffer.
|
|
28
|
-
* hufDesSize refers to the size of huffman tree description in bytes.
|
|
29
|
-
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
|
30
|
-
typedef struct {
|
|
31
|
-
symbolEncodingType_e hType;
|
|
32
|
-
BYTE hufDesBuffer[500]; /* TODO give name to this value */
|
|
33
|
-
size_t hufDesSize;
|
|
34
|
-
} ZSTD_hufCTablesMetadata_t;
|
|
35
|
-
|
|
36
|
-
/** ZSTD_fseCTablesMetadata_t :
|
|
37
|
-
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
|
38
|
-
* fse tables in fseTablesBuffer.
|
|
39
|
-
* fseTablesSize refers to the size of fse tables in bytes.
|
|
40
|
-
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
|
|
41
|
-
typedef struct {
|
|
42
|
-
symbolEncodingType_e llType;
|
|
43
|
-
symbolEncodingType_e ofType;
|
|
44
|
-
symbolEncodingType_e mlType;
|
|
45
|
-
BYTE fseTablesBuffer[500]; /* TODO give name to this value */
|
|
46
|
-
size_t fseTablesSize;
|
|
47
|
-
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
|
|
48
|
-
} ZSTD_fseCTablesMetadata_t;
|
|
49
|
-
|
|
50
|
-
typedef struct {
|
|
51
|
-
ZSTD_hufCTablesMetadata_t hufMetadata;
|
|
52
|
-
ZSTD_fseCTablesMetadata_t fseMetadata;
|
|
53
|
-
} ZSTD_entropyCTablesMetadata_t;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
/** ZSTD_buildSuperBlockEntropy_literal() :
|
|
57
|
-
* Builds entropy for the super-block literals.
|
|
58
|
-
* Stores literals block type (raw, rle, compressed, repeat) and
|
|
59
|
-
* huffman description table to hufMetadata.
|
|
60
|
-
* @return : size of huffman description table or error code */
|
|
61
|
-
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
|
|
62
|
-
const ZSTD_hufCTables_t* prevHuf,
|
|
63
|
-
ZSTD_hufCTables_t* nextHuf,
|
|
64
|
-
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
65
|
-
const int disableLiteralsCompression,
|
|
66
|
-
void* workspace, size_t wkspSize)
|
|
67
|
-
{
|
|
68
|
-
BYTE* const wkspStart = (BYTE*)workspace;
|
|
69
|
-
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
70
|
-
BYTE* const countWkspStart = wkspStart;
|
|
71
|
-
unsigned* const countWksp = (unsigned*)workspace;
|
|
72
|
-
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
|
73
|
-
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
|
74
|
-
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
|
75
|
-
unsigned maxSymbolValue = 255;
|
|
76
|
-
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
|
77
|
-
HUF_repeat repeat = prevHuf->repeatMode;
|
|
78
|
-
|
|
79
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
|
80
|
-
|
|
81
|
-
/* Prepare nextEntropy assuming reusing the existing table */
|
|
82
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
83
|
-
|
|
84
|
-
if (disableLiteralsCompression) {
|
|
85
|
-
DEBUGLOG(5, "set_basic - disabled");
|
|
86
|
-
hufMetadata->hType = set_basic;
|
|
87
|
-
return 0;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/* small ? don't even attempt compression (speed opt) */
|
|
91
|
-
# define COMPRESS_LITERALS_SIZE_MIN 63
|
|
92
|
-
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
|
93
|
-
if (srcSize <= minLitSize) {
|
|
94
|
-
DEBUGLOG(5, "set_basic - too small");
|
|
95
|
-
hufMetadata->hType = set_basic;
|
|
96
|
-
return 0;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/* Scan input and build symbol stats */
|
|
101
|
-
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
|
|
102
|
-
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
|
103
|
-
if (largest == srcSize) {
|
|
104
|
-
DEBUGLOG(5, "set_rle");
|
|
105
|
-
hufMetadata->hType = set_rle;
|
|
106
|
-
return 0;
|
|
107
|
-
}
|
|
108
|
-
if (largest <= (srcSize >> 7)+4) {
|
|
109
|
-
DEBUGLOG(5, "set_basic - no gain");
|
|
110
|
-
hufMetadata->hType = set_basic;
|
|
111
|
-
return 0;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/* Validate the previous Huffman table */
|
|
116
|
-
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
|
117
|
-
repeat = HUF_repeat_none;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/* Build Huffman Tree */
|
|
121
|
-
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
|
122
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
123
|
-
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
|
124
|
-
maxSymbolValue, huffLog,
|
|
125
|
-
nodeWksp, nodeWkspSize);
|
|
126
|
-
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
|
127
|
-
huffLog = (U32)maxBits;
|
|
128
|
-
{ /* Build and write the CTable */
|
|
129
|
-
size_t const newCSize = HUF_estimateCompressedSize(
|
|
130
|
-
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
|
131
|
-
size_t const hSize = HUF_writeCTable(
|
|
132
|
-
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
|
133
|
-
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
|
|
134
|
-
/* Check against repeating the previous CTable */
|
|
135
|
-
if (repeat != HUF_repeat_none) {
|
|
136
|
-
size_t const oldCSize = HUF_estimateCompressedSize(
|
|
137
|
-
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
|
138
|
-
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
|
139
|
-
DEBUGLOG(5, "set_repeat - smaller");
|
|
140
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
141
|
-
hufMetadata->hType = set_repeat;
|
|
142
|
-
return 0;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
if (newCSize + hSize >= srcSize) {
|
|
146
|
-
DEBUGLOG(5, "set_basic - no gains");
|
|
147
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
148
|
-
hufMetadata->hType = set_basic;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
|
152
|
-
hufMetadata->hType = set_compressed;
|
|
153
|
-
nextHuf->repeatMode = HUF_repeat_check;
|
|
154
|
-
return hSize;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/** ZSTD_buildSuperBlockEntropy_sequences() :
|
|
160
|
-
* Builds entropy for the super-block sequences.
|
|
161
|
-
* Stores symbol compression modes and fse table to fseMetadata.
|
|
162
|
-
* @return : size of fse tables or error code */
|
|
163
|
-
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
|
164
|
-
const ZSTD_fseCTables_t* prevEntropy,
|
|
165
|
-
ZSTD_fseCTables_t* nextEntropy,
|
|
166
|
-
const ZSTD_CCtx_params* cctxParams,
|
|
167
|
-
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
168
|
-
void* workspace, size_t wkspSize)
|
|
169
|
-
{
|
|
170
|
-
BYTE* const wkspStart = (BYTE*)workspace;
|
|
171
|
-
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
172
|
-
BYTE* const countWkspStart = wkspStart;
|
|
173
|
-
unsigned* const countWksp = (unsigned*)workspace;
|
|
174
|
-
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
|
175
|
-
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
|
176
|
-
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
|
177
|
-
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
178
|
-
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
|
179
|
-
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
|
180
|
-
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
|
181
|
-
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
|
182
|
-
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
183
|
-
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
184
|
-
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
185
|
-
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
|
186
|
-
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
|
187
|
-
BYTE* op = ostart;
|
|
188
|
-
|
|
189
|
-
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
|
190
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
|
191
|
-
memset(workspace, 0, wkspSize);
|
|
192
|
-
|
|
193
|
-
fseMetadata->lastCountSize = 0;
|
|
194
|
-
/* convert length/distances into codes */
|
|
195
|
-
ZSTD_seqToCodes(seqStorePtr);
|
|
196
|
-
/* build CTable for Literal Lengths */
|
|
197
|
-
{ U32 LLtype;
|
|
198
|
-
unsigned max = MaxLL;
|
|
199
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
200
|
-
DEBUGLOG(5, "Building LL table");
|
|
201
|
-
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
|
202
|
-
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
|
|
203
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
204
|
-
LLFSELog, prevEntropy->litlengthCTable,
|
|
205
|
-
LL_defaultNorm, LL_defaultNormLog,
|
|
206
|
-
ZSTD_defaultAllowed, strategy);
|
|
207
|
-
assert(set_basic < set_compressed && set_rle < set_compressed);
|
|
208
|
-
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
209
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
|
210
|
-
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
211
|
-
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
|
212
|
-
cTableWksp, cTableWkspSize);
|
|
213
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
|
214
|
-
if (LLtype == set_compressed)
|
|
215
|
-
fseMetadata->lastCountSize = countSize;
|
|
216
|
-
op += countSize;
|
|
217
|
-
fseMetadata->llType = (symbolEncodingType_e) LLtype;
|
|
218
|
-
} }
|
|
219
|
-
/* build CTable for Offsets */
|
|
220
|
-
{ U32 Offtype;
|
|
221
|
-
unsigned max = MaxOff;
|
|
222
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
223
|
-
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
|
224
|
-
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
|
225
|
-
DEBUGLOG(5, "Building OF table");
|
|
226
|
-
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
|
227
|
-
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
|
|
228
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
229
|
-
OffFSELog, prevEntropy->offcodeCTable,
|
|
230
|
-
OF_defaultNorm, OF_defaultNormLog,
|
|
231
|
-
defaultPolicy, strategy);
|
|
232
|
-
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
233
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
|
234
|
-
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
235
|
-
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
|
236
|
-
cTableWksp, cTableWkspSize);
|
|
237
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
|
238
|
-
if (Offtype == set_compressed)
|
|
239
|
-
fseMetadata->lastCountSize = countSize;
|
|
240
|
-
op += countSize;
|
|
241
|
-
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
|
|
242
|
-
} }
|
|
243
|
-
/* build CTable for MatchLengths */
|
|
244
|
-
{ U32 MLtype;
|
|
245
|
-
unsigned max = MaxML;
|
|
246
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
247
|
-
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
|
|
248
|
-
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
|
249
|
-
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
|
|
250
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
251
|
-
MLFSELog, prevEntropy->matchlengthCTable,
|
|
252
|
-
ML_defaultNorm, ML_defaultNormLog,
|
|
253
|
-
ZSTD_defaultAllowed, strategy);
|
|
254
|
-
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
255
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
|
256
|
-
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
257
|
-
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
|
258
|
-
cTableWksp, cTableWkspSize);
|
|
259
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
|
260
|
-
if (MLtype == set_compressed)
|
|
261
|
-
fseMetadata->lastCountSize = countSize;
|
|
262
|
-
op += countSize;
|
|
263
|
-
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
|
|
264
|
-
} }
|
|
265
|
-
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
|
|
266
|
-
return op-ostart;
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
/** ZSTD_buildSuperBlockEntropy() :
|
|
271
|
-
* Builds entropy for the super-block.
|
|
272
|
-
* @return : 0 on success or error code */
|
|
273
|
-
static size_t
|
|
274
|
-
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
275
|
-
const ZSTD_entropyCTables_t* prevEntropy,
|
|
276
|
-
ZSTD_entropyCTables_t* nextEntropy,
|
|
277
|
-
const ZSTD_CCtx_params* cctxParams,
|
|
278
|
-
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
279
|
-
void* workspace, size_t wkspSize)
|
|
280
|
-
{
|
|
281
|
-
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
|
|
282
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
|
|
283
|
-
entropyMetadata->hufMetadata.hufDesSize =
|
|
284
|
-
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
|
|
285
|
-
&prevEntropy->huf, &nextEntropy->huf,
|
|
286
|
-
&entropyMetadata->hufMetadata,
|
|
287
|
-
ZSTD_disableLiteralsCompression(cctxParams),
|
|
288
|
-
workspace, wkspSize);
|
|
289
|
-
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
|
|
290
|
-
entropyMetadata->fseMetadata.fseTablesSize =
|
|
291
|
-
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
|
|
292
|
-
&prevEntropy->fse, &nextEntropy->fse,
|
|
293
|
-
cctxParams,
|
|
294
|
-
&entropyMetadata->fseMetadata,
|
|
295
|
-
workspace, wkspSize);
|
|
296
|
-
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
|
|
297
|
-
return 0;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
22
|
/** ZSTD_compressSubBlock_literal() :
|
|
301
23
|
* Compresses literals section for a sub-block.
|
|
302
24
|
* When we have to write the Huffman table we will sometimes choose a header
|
|
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
|
304
26
|
* before we know the table size + compressed size, so we have a bound on the
|
|
305
27
|
* table size. If we guessed incorrectly, we fall back to uncompressed literals.
|
|
306
28
|
*
|
|
307
|
-
* We write the header when writeEntropy=1 and set
|
|
29
|
+
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
|
|
308
30
|
* in writing the header, otherwise it is set to 0.
|
|
309
31
|
*
|
|
310
32
|
* hufMetadata->hType has literals block type info.
|
|
@@ -348,7 +70,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
348
70
|
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
|
|
349
71
|
|
|
350
72
|
if (writeEntropy && hufMetadata->hType == set_compressed) {
|
|
351
|
-
|
|
73
|
+
ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
|
352
74
|
op += hufMetadata->hufDesSize;
|
|
353
75
|
cLitSize += hufMetadata->hufDesSize;
|
|
354
76
|
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
|
@@ -410,6 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
|
|
410
132
|
const seqDef* sp = sstart;
|
|
411
133
|
size_t matchLengthSum = 0;
|
|
412
134
|
size_t litLengthSum = 0;
|
|
135
|
+
(void)(litLengthSum); /* suppress unused variable warning on some environments */
|
|
413
136
|
while (send-sp > 0) {
|
|
414
137
|
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
|
415
138
|
litLengthSum += seqLen.litLength;
|
|
@@ -474,7 +197,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|
|
474
197
|
const U32 MLtype = fseMetadata->mlType;
|
|
475
198
|
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
|
|
476
199
|
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
|
|
477
|
-
|
|
200
|
+
ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
|
478
201
|
op += fseMetadata->fseTablesSize;
|
|
479
202
|
} else {
|
|
480
203
|
const U32 repeat = set_repeat;
|
|
@@ -602,8 +325,8 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
|
|
602
325
|
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|
603
326
|
const BYTE* codeTable, unsigned maxCode,
|
|
604
327
|
size_t nbSeq, const FSE_CTable* fseCTable,
|
|
605
|
-
const
|
|
606
|
-
short const* defaultNorm, U32 defaultNormLog,
|
|
328
|
+
const U8* additionalBits,
|
|
329
|
+
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
607
330
|
void* workspace, size_t wkspSize)
|
|
608
331
|
{
|
|
609
332
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
@@ -615,7 +338,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|
|
615
338
|
|
|
616
339
|
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
617
340
|
if (type == set_basic) {
|
|
618
|
-
|
|
341
|
+
/* We selected this encoding type, so it must be valid. */
|
|
342
|
+
assert(max <= defaultMax);
|
|
343
|
+
cSymbolTypeSizeEstimateInBits = max <= defaultMax
|
|
344
|
+
? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
|
|
345
|
+
: ERROR(GENERIC);
|
|
619
346
|
} else if (type == set_rle) {
|
|
620
347
|
cSymbolTypeSizeEstimateInBits = 0;
|
|
621
348
|
} else if (type == set_compressed || type == set_repeat) {
|
|
@@ -639,19 +366,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|
|
639
366
|
void* workspace, size_t wkspSize,
|
|
640
367
|
int writeEntropy)
|
|
641
368
|
{
|
|
642
|
-
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
|
369
|
+
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
|
643
370
|
size_t cSeqSizeEstimate = 0;
|
|
371
|
+
if (nbSeq == 0) return sequencesSectionHeaderSize;
|
|
644
372
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
|
645
373
|
nbSeq, fseTables->offcodeCTable, NULL,
|
|
646
|
-
OF_defaultNorm, OF_defaultNormLog,
|
|
374
|
+
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
647
375
|
workspace, wkspSize);
|
|
648
376
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
|
|
649
377
|
nbSeq, fseTables->litlengthCTable, LL_bits,
|
|
650
|
-
LL_defaultNorm, LL_defaultNormLog,
|
|
378
|
+
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
651
379
|
workspace, wkspSize);
|
|
652
380
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
|
|
653
381
|
nbSeq, fseTables->matchlengthCTable, ML_bits,
|
|
654
|
-
ML_defaultNorm, ML_defaultNormLog,
|
|
382
|
+
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
655
383
|
workspace, wkspSize);
|
|
656
384
|
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
|
657
385
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
|
@@ -747,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
747
475
|
/* I think there is an optimization opportunity here.
|
|
748
476
|
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
|
749
477
|
* since it recalculates estimate from scratch.
|
|
750
|
-
* For example, it would recount literal distribution and symbol codes
|
|
478
|
+
* For example, it would recount literal distribution and symbol codes every time.
|
|
751
479
|
*/
|
|
752
480
|
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
|
753
481
|
&nextCBlock->entropy, entropyMetadata,
|
|
@@ -790,7 +518,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
790
518
|
} while (!lastSequence);
|
|
791
519
|
if (writeLitEntropy) {
|
|
792
520
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
|
|
793
|
-
|
|
521
|
+
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
|
794
522
|
}
|
|
795
523
|
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
|
|
796
524
|
/* If we haven't written our entropy tables, then we've violated our contract and
|
|
@@ -809,11 +537,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
809
537
|
if (sp < send) {
|
|
810
538
|
seqDef const* seq;
|
|
811
539
|
repcodes_t rep;
|
|
812
|
-
|
|
540
|
+
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
|
813
541
|
for (seq = sstart; seq < sp; ++seq) {
|
|
814
542
|
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
|
815
543
|
}
|
|
816
|
-
|
|
544
|
+
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
|
817
545
|
}
|
|
818
546
|
}
|
|
819
547
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
|
|
@@ -826,12 +554,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
|
|
826
554
|
unsigned lastBlock) {
|
|
827
555
|
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
|
828
556
|
|
|
829
|
-
FORWARD_IF_ERROR(
|
|
557
|
+
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
|
|
830
558
|
&zc->blockState.prevCBlock->entropy,
|
|
831
559
|
&zc->blockState.nextCBlock->entropy,
|
|
832
560
|
&zc->appliedParams,
|
|
833
561
|
&entropyMetadata,
|
|
834
|
-
zc->entropyWorkspace,
|
|
562
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
|
835
563
|
|
|
836
564
|
return ZSTD_compressSubBlock_multi(&zc->seqStore,
|
|
837
565
|
zc->blockState.prevCBlock,
|
|
@@ -841,5 +569,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
|
|
841
569
|
dst, dstCapacity,
|
|
842
570
|
src, srcSize,
|
|
843
571
|
zc->bmi2, lastBlock,
|
|
844
|
-
zc->entropyWorkspace,
|
|
572
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
|
845
573
|
}
|