extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,288 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
|
|
17
17
|
#include "hist.h" /* HIST_countFast_wksp */
|
|
18
|
-
#include "zstd_compress_internal.h"
|
|
18
|
+
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
|
|
19
19
|
#include "zstd_compress_sequences.h"
|
|
20
20
|
#include "zstd_compress_literals.h"
|
|
21
21
|
|
|
22
|
-
/*-*************************************
|
|
23
|
-
* Superblock entropy buffer structs
|
|
24
|
-
***************************************/
|
|
25
|
-
/** ZSTD_hufCTablesMetadata_t :
|
|
26
|
-
* Stores Literals Block Type for a super-block in hType, and
|
|
27
|
-
* huffman tree description in hufDesBuffer.
|
|
28
|
-
* hufDesSize refers to the size of huffman tree description in bytes.
|
|
29
|
-
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
|
30
|
-
typedef struct {
|
|
31
|
-
symbolEncodingType_e hType;
|
|
32
|
-
BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
|
|
33
|
-
size_t hufDesSize;
|
|
34
|
-
} ZSTD_hufCTablesMetadata_t;
|
|
35
|
-
|
|
36
|
-
/** ZSTD_fseCTablesMetadata_t :
|
|
37
|
-
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
|
38
|
-
* fse tables in fseTablesBuffer.
|
|
39
|
-
* fseTablesSize refers to the size of fse tables in bytes.
|
|
40
|
-
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
|
|
41
|
-
typedef struct {
|
|
42
|
-
symbolEncodingType_e llType;
|
|
43
|
-
symbolEncodingType_e ofType;
|
|
44
|
-
symbolEncodingType_e mlType;
|
|
45
|
-
BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
|
|
46
|
-
size_t fseTablesSize;
|
|
47
|
-
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
|
|
48
|
-
} ZSTD_fseCTablesMetadata_t;
|
|
49
|
-
|
|
50
|
-
typedef struct {
|
|
51
|
-
ZSTD_hufCTablesMetadata_t hufMetadata;
|
|
52
|
-
ZSTD_fseCTablesMetadata_t fseMetadata;
|
|
53
|
-
} ZSTD_entropyCTablesMetadata_t;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
/** ZSTD_buildSuperBlockEntropy_literal() :
|
|
57
|
-
* Builds entropy for the super-block literals.
|
|
58
|
-
* Stores literals block type (raw, rle, compressed, repeat) and
|
|
59
|
-
* huffman description table to hufMetadata.
|
|
60
|
-
* @return : size of huffman description table or error code */
|
|
61
|
-
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
|
|
62
|
-
const ZSTD_hufCTables_t* prevHuf,
|
|
63
|
-
ZSTD_hufCTables_t* nextHuf,
|
|
64
|
-
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
65
|
-
const int disableLiteralsCompression,
|
|
66
|
-
void* workspace, size_t wkspSize)
|
|
67
|
-
{
|
|
68
|
-
BYTE* const wkspStart = (BYTE*)workspace;
|
|
69
|
-
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
70
|
-
BYTE* const countWkspStart = wkspStart;
|
|
71
|
-
unsigned* const countWksp = (unsigned*)workspace;
|
|
72
|
-
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
|
73
|
-
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
|
74
|
-
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
|
75
|
-
unsigned maxSymbolValue = 255;
|
|
76
|
-
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
|
77
|
-
HUF_repeat repeat = prevHuf->repeatMode;
|
|
78
|
-
|
|
79
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
|
80
|
-
|
|
81
|
-
/* Prepare nextEntropy assuming reusing the existing table */
|
|
82
|
-
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
83
|
-
|
|
84
|
-
if (disableLiteralsCompression) {
|
|
85
|
-
DEBUGLOG(5, "set_basic - disabled");
|
|
86
|
-
hufMetadata->hType = set_basic;
|
|
87
|
-
return 0;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/* small ? don't even attempt compression (speed opt) */
|
|
91
|
-
# define COMPRESS_LITERALS_SIZE_MIN 63
|
|
92
|
-
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
|
93
|
-
if (srcSize <= minLitSize) {
|
|
94
|
-
DEBUGLOG(5, "set_basic - too small");
|
|
95
|
-
hufMetadata->hType = set_basic;
|
|
96
|
-
return 0;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/* Scan input and build symbol stats */
|
|
101
|
-
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
|
|
102
|
-
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
|
103
|
-
if (largest == srcSize) {
|
|
104
|
-
DEBUGLOG(5, "set_rle");
|
|
105
|
-
hufMetadata->hType = set_rle;
|
|
106
|
-
return 0;
|
|
107
|
-
}
|
|
108
|
-
if (largest <= (srcSize >> 7)+4) {
|
|
109
|
-
DEBUGLOG(5, "set_basic - no gain");
|
|
110
|
-
hufMetadata->hType = set_basic;
|
|
111
|
-
return 0;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/* Validate the previous Huffman table */
|
|
116
|
-
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
|
117
|
-
repeat = HUF_repeat_none;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/* Build Huffman Tree */
|
|
121
|
-
ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
|
122
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
123
|
-
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
|
124
|
-
maxSymbolValue, huffLog,
|
|
125
|
-
nodeWksp, nodeWkspSize);
|
|
126
|
-
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
|
127
|
-
huffLog = (U32)maxBits;
|
|
128
|
-
{ /* Build and write the CTable */
|
|
129
|
-
size_t const newCSize = HUF_estimateCompressedSize(
|
|
130
|
-
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
|
131
|
-
size_t const hSize = HUF_writeCTable(
|
|
132
|
-
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
|
133
|
-
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
|
|
134
|
-
/* Check against repeating the previous CTable */
|
|
135
|
-
if (repeat != HUF_repeat_none) {
|
|
136
|
-
size_t const oldCSize = HUF_estimateCompressedSize(
|
|
137
|
-
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
|
138
|
-
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
|
139
|
-
DEBUGLOG(5, "set_repeat - smaller");
|
|
140
|
-
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
141
|
-
hufMetadata->hType = set_repeat;
|
|
142
|
-
return 0;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
if (newCSize + hSize >= srcSize) {
|
|
146
|
-
DEBUGLOG(5, "set_basic - no gains");
|
|
147
|
-
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
148
|
-
hufMetadata->hType = set_basic;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
|
152
|
-
hufMetadata->hType = set_compressed;
|
|
153
|
-
nextHuf->repeatMode = HUF_repeat_check;
|
|
154
|
-
return hSize;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/** ZSTD_buildSuperBlockEntropy_sequences() :
|
|
160
|
-
* Builds entropy for the super-block sequences.
|
|
161
|
-
* Stores symbol compression modes and fse table to fseMetadata.
|
|
162
|
-
* @return : size of fse tables or error code */
|
|
163
|
-
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
|
164
|
-
const ZSTD_fseCTables_t* prevEntropy,
|
|
165
|
-
ZSTD_fseCTables_t* nextEntropy,
|
|
166
|
-
const ZSTD_CCtx_params* cctxParams,
|
|
167
|
-
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
168
|
-
void* workspace, size_t wkspSize)
|
|
169
|
-
{
|
|
170
|
-
BYTE* const wkspStart = (BYTE*)workspace;
|
|
171
|
-
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
172
|
-
BYTE* const countWkspStart = wkspStart;
|
|
173
|
-
unsigned* const countWksp = (unsigned*)workspace;
|
|
174
|
-
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
|
175
|
-
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
|
176
|
-
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
|
177
|
-
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
178
|
-
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
|
179
|
-
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
|
180
|
-
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
|
181
|
-
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
|
182
|
-
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
183
|
-
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
184
|
-
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
185
|
-
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
|
186
|
-
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
|
187
|
-
BYTE* op = ostart;
|
|
188
|
-
|
|
189
|
-
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
|
190
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
|
191
|
-
ZSTD_memset(workspace, 0, wkspSize);
|
|
192
|
-
|
|
193
|
-
fseMetadata->lastCountSize = 0;
|
|
194
|
-
/* convert length/distances into codes */
|
|
195
|
-
ZSTD_seqToCodes(seqStorePtr);
|
|
196
|
-
/* build CTable for Literal Lengths */
|
|
197
|
-
{ U32 LLtype;
|
|
198
|
-
unsigned max = MaxLL;
|
|
199
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
200
|
-
DEBUGLOG(5, "Building LL table");
|
|
201
|
-
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
|
202
|
-
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
|
|
203
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
204
|
-
LLFSELog, prevEntropy->litlengthCTable,
|
|
205
|
-
LL_defaultNorm, LL_defaultNormLog,
|
|
206
|
-
ZSTD_defaultAllowed, strategy);
|
|
207
|
-
assert(set_basic < set_compressed && set_rle < set_compressed);
|
|
208
|
-
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
209
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
|
210
|
-
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
211
|
-
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
|
212
|
-
cTableWksp, cTableWkspSize);
|
|
213
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
|
214
|
-
if (LLtype == set_compressed)
|
|
215
|
-
fseMetadata->lastCountSize = countSize;
|
|
216
|
-
op += countSize;
|
|
217
|
-
fseMetadata->llType = (symbolEncodingType_e) LLtype;
|
|
218
|
-
} }
|
|
219
|
-
/* build CTable for Offsets */
|
|
220
|
-
{ U32 Offtype;
|
|
221
|
-
unsigned max = MaxOff;
|
|
222
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
223
|
-
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
|
224
|
-
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
|
225
|
-
DEBUGLOG(5, "Building OF table");
|
|
226
|
-
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
|
227
|
-
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
|
|
228
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
229
|
-
OffFSELog, prevEntropy->offcodeCTable,
|
|
230
|
-
OF_defaultNorm, OF_defaultNormLog,
|
|
231
|
-
defaultPolicy, strategy);
|
|
232
|
-
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
233
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
|
234
|
-
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
235
|
-
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
|
236
|
-
cTableWksp, cTableWkspSize);
|
|
237
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
|
238
|
-
if (Offtype == set_compressed)
|
|
239
|
-
fseMetadata->lastCountSize = countSize;
|
|
240
|
-
op += countSize;
|
|
241
|
-
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
|
|
242
|
-
} }
|
|
243
|
-
/* build CTable for MatchLengths */
|
|
244
|
-
{ U32 MLtype;
|
|
245
|
-
unsigned max = MaxML;
|
|
246
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
247
|
-
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
|
|
248
|
-
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
|
249
|
-
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
|
|
250
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
251
|
-
MLFSELog, prevEntropy->matchlengthCTable,
|
|
252
|
-
ML_defaultNorm, ML_defaultNormLog,
|
|
253
|
-
ZSTD_defaultAllowed, strategy);
|
|
254
|
-
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
255
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
|
256
|
-
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
257
|
-
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
|
258
|
-
cTableWksp, cTableWkspSize);
|
|
259
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
|
260
|
-
if (MLtype == set_compressed)
|
|
261
|
-
fseMetadata->lastCountSize = countSize;
|
|
262
|
-
op += countSize;
|
|
263
|
-
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
|
|
264
|
-
} }
|
|
265
|
-
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
|
|
266
|
-
return op-ostart;
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
/** ZSTD_buildSuperBlockEntropy() :
|
|
271
|
-
* Builds entropy for the super-block.
|
|
272
|
-
* @return : 0 on success or error code */
|
|
273
|
-
static size_t
|
|
274
|
-
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
275
|
-
const ZSTD_entropyCTables_t* prevEntropy,
|
|
276
|
-
ZSTD_entropyCTables_t* nextEntropy,
|
|
277
|
-
const ZSTD_CCtx_params* cctxParams,
|
|
278
|
-
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
279
|
-
void* workspace, size_t wkspSize)
|
|
280
|
-
{
|
|
281
|
-
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
|
|
282
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
|
|
283
|
-
entropyMetadata->hufMetadata.hufDesSize =
|
|
284
|
-
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
|
|
285
|
-
&prevEntropy->huf, &nextEntropy->huf,
|
|
286
|
-
&entropyMetadata->hufMetadata,
|
|
287
|
-
ZSTD_disableLiteralsCompression(cctxParams),
|
|
288
|
-
workspace, wkspSize);
|
|
289
|
-
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
|
|
290
|
-
entropyMetadata->fseMetadata.fseTablesSize =
|
|
291
|
-
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
|
|
292
|
-
&prevEntropy->fse, &nextEntropy->fse,
|
|
293
|
-
cctxParams,
|
|
294
|
-
&entropyMetadata->fseMetadata,
|
|
295
|
-
workspace, wkspSize);
|
|
296
|
-
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
|
|
297
|
-
return 0;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
22
|
/** ZSTD_compressSubBlock_literal() :
|
|
301
23
|
* Compresses literals section for a sub-block.
|
|
302
24
|
* When we have to write the Huffman table we will sometimes choose a header
|
|
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
|
304
26
|
* before we know the table size + compressed size, so we have a bound on the
|
|
305
27
|
* table size. If we guessed incorrectly, we fall back to uncompressed literals.
|
|
306
28
|
*
|
|
307
|
-
* We write the header when writeEntropy=1 and set
|
|
29
|
+
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
|
|
308
30
|
* in writing the header, otherwise it is set to 0.
|
|
309
31
|
*
|
|
310
32
|
* hufMetadata->hType has literals block type info.
|
|
@@ -314,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
|
314
36
|
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
|
|
315
37
|
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
|
|
316
38
|
* @return : compressed size of literals section of a sub-block
|
|
317
|
-
* Or 0 if
|
|
39
|
+
* Or 0 if unable to compress.
|
|
318
40
|
* Or error code */
|
|
319
|
-
static size_t
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
41
|
+
static size_t
|
|
42
|
+
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
43
|
+
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
44
|
+
const BYTE* literals, size_t litSize,
|
|
45
|
+
void* dst, size_t dstSize,
|
|
46
|
+
const int bmi2, int writeEntropy, int* entropyWritten)
|
|
324
47
|
{
|
|
325
48
|
size_t const header = writeEntropy ? 200 : 0;
|
|
326
49
|
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
|
|
@@ -331,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
331
54
|
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
|
|
332
55
|
size_t cLitSize = 0;
|
|
333
56
|
|
|
334
|
-
(void)bmi2; /* TODO bmi2... */
|
|
335
|
-
|
|
336
57
|
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
|
|
337
58
|
|
|
338
59
|
*entropyWritten = 0;
|
|
@@ -354,9 +75,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
354
75
|
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
|
355
76
|
}
|
|
356
77
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
|
|
78
|
+
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
|
|
79
|
+
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
|
|
80
|
+
: HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
|
|
360
81
|
op += cSize;
|
|
361
82
|
cLitSize += cSize;
|
|
362
83
|
if (cSize == 0 || ERR_isError(cSize)) {
|
|
@@ -381,7 +102,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
381
102
|
switch(lhSize)
|
|
382
103
|
{
|
|
383
104
|
case 3: /* 2 - 2 - 10 - 10 */
|
|
384
|
-
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
|
105
|
+
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
|
|
385
106
|
MEM_writeLE24(ostart, lhc);
|
|
386
107
|
break;
|
|
387
108
|
}
|
|
@@ -401,25 +122,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
401
122
|
}
|
|
402
123
|
*entropyWritten = 1;
|
|
403
124
|
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
|
|
404
|
-
return op-ostart;
|
|
125
|
+
return (size_t)(op-ostart);
|
|
405
126
|
}
|
|
406
127
|
|
|
407
|
-
static size_t
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
128
|
+
static size_t
|
|
129
|
+
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
|
|
130
|
+
const seqDef* sequences, size_t nbSeqs,
|
|
131
|
+
size_t litSize, int lastSubBlock)
|
|
132
|
+
{
|
|
411
133
|
size_t matchLengthSum = 0;
|
|
412
134
|
size_t litLengthSum = 0;
|
|
413
|
-
|
|
414
|
-
|
|
135
|
+
size_t n;
|
|
136
|
+
for (n=0; n<nbSeqs; n++) {
|
|
137
|
+
const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
|
|
415
138
|
litLengthSum += seqLen.litLength;
|
|
416
139
|
matchLengthSum += seqLen.matchLength;
|
|
417
|
-
sp++;
|
|
418
140
|
}
|
|
419
|
-
|
|
420
|
-
|
|
141
|
+
DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
|
|
142
|
+
(unsigned)nbSeqs, (const void*)sequences,
|
|
143
|
+
(unsigned)litLengthSum, (unsigned)matchLengthSum);
|
|
144
|
+
if (!lastSubBlock)
|
|
421
145
|
assert(litLengthSum == litSize);
|
|
422
|
-
|
|
146
|
+
else
|
|
147
|
+
assert(litLengthSum <= litSize);
|
|
148
|
+
(void)litLengthSum;
|
|
423
149
|
return matchLengthSum + litSize;
|
|
424
150
|
}
|
|
425
151
|
|
|
@@ -433,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
|
|
433
159
|
* @return : compressed size of sequences section of a sub-block
|
|
434
160
|
* Or 0 if it is unable to compress
|
|
435
161
|
* Or error code. */
|
|
436
|
-
static size_t
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
162
|
+
static size_t
|
|
163
|
+
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|
164
|
+
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
165
|
+
const seqDef* sequences, size_t nbSeq,
|
|
166
|
+
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
|
167
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
168
|
+
void* dst, size_t dstCapacity,
|
|
169
|
+
const int bmi2, int writeEntropy, int* entropyWritten)
|
|
443
170
|
{
|
|
444
171
|
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
|
445
172
|
BYTE* const ostart = (BYTE*)dst;
|
|
@@ -453,14 +180,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|
|
453
180
|
/* Sequences Header */
|
|
454
181
|
RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
|
|
455
182
|
dstSize_tooSmall, "");
|
|
456
|
-
if (nbSeq <
|
|
183
|
+
if (nbSeq < 128)
|
|
457
184
|
*op++ = (BYTE)nbSeq;
|
|
458
185
|
else if (nbSeq < LONGNBSEQ)
|
|
459
186
|
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
|
|
460
187
|
else
|
|
461
188
|
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
|
462
189
|
if (nbSeq==0) {
|
|
463
|
-
return op - ostart;
|
|
190
|
+
return (size_t)(op - ostart);
|
|
464
191
|
}
|
|
465
192
|
|
|
466
193
|
/* seqHead : flags for FSE encoding type */
|
|
@@ -482,7 +209,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|
|
482
209
|
}
|
|
483
210
|
|
|
484
211
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
|
485
|
-
op, oend - op,
|
|
212
|
+
op, (size_t)(oend - op),
|
|
486
213
|
fseTables->matchlengthCTable, mlCode,
|
|
487
214
|
fseTables->offcodeCTable, ofCode,
|
|
488
215
|
fseTables->litlengthCTable, llCode,
|
|
@@ -526,7 +253,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|
|
526
253
|
#endif
|
|
527
254
|
|
|
528
255
|
*entropyWritten = 1;
|
|
529
|
-
return op - ostart;
|
|
256
|
+
return (size_t)(op - ostart);
|
|
530
257
|
}
|
|
531
258
|
|
|
532
259
|
/** ZSTD_compressSubBlock() :
|
|
@@ -552,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|
|
552
279
|
litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
|
|
553
280
|
{ size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
|
|
554
281
|
&entropyMetadata->hufMetadata, literals, litSize,
|
|
555
|
-
op, oend-op,
|
|
282
|
+
op, (size_t)(oend-op),
|
|
283
|
+
bmi2, writeLitEntropy, litEntropyWritten);
|
|
556
284
|
FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
|
|
557
285
|
if (cLitSize == 0) return 0;
|
|
558
286
|
op += cLitSize;
|
|
@@ -562,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|
|
562
290
|
sequences, nbSeq,
|
|
563
291
|
llCode, mlCode, ofCode,
|
|
564
292
|
cctxParams,
|
|
565
|
-
op, oend-op,
|
|
293
|
+
op, (size_t)(oend-op),
|
|
566
294
|
bmi2, writeSeqEntropy, seqEntropyWritten);
|
|
567
295
|
FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
|
|
568
296
|
if (cSeqSize == 0) return 0;
|
|
569
297
|
op += cSeqSize;
|
|
570
298
|
}
|
|
571
299
|
/* Write block header */
|
|
572
|
-
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
|
|
300
|
+
{ size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
|
|
573
301
|
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
|
574
302
|
MEM_writeLE24(ostart, cBlockHeader24);
|
|
575
303
|
}
|
|
576
|
-
return op-ostart;
|
|
304
|
+
return (size_t)(op-ostart);
|
|
577
305
|
}
|
|
578
306
|
|
|
579
307
|
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
|
@@ -602,7 +330,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
|
|
602
330
|
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|
603
331
|
const BYTE* codeTable, unsigned maxCode,
|
|
604
332
|
size_t nbSeq, const FSE_CTable* fseCTable,
|
|
605
|
-
const
|
|
333
|
+
const U8* additionalBits,
|
|
606
334
|
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
607
335
|
void* workspace, size_t wkspSize)
|
|
608
336
|
{
|
|
@@ -643,8 +371,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|
|
643
371
|
void* workspace, size_t wkspSize,
|
|
644
372
|
int writeEntropy)
|
|
645
373
|
{
|
|
646
|
-
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
|
374
|
+
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
|
647
375
|
size_t cSeqSizeEstimate = 0;
|
|
376
|
+
if (nbSeq == 0) return sequencesSectionHeaderSize;
|
|
648
377
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
|
649
378
|
nbSeq, fseTables->offcodeCTable, NULL,
|
|
650
379
|
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
@@ -661,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|
|
661
390
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
|
662
391
|
}
|
|
663
392
|
|
|
664
|
-
|
|
393
|
+
typedef struct {
|
|
394
|
+
size_t estLitSize;
|
|
395
|
+
size_t estBlockSize;
|
|
396
|
+
} EstimatedBlockSize;
|
|
397
|
+
static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
|
665
398
|
const BYTE* ofCodeTable,
|
|
666
399
|
const BYTE* llCodeTable,
|
|
667
400
|
const BYTE* mlCodeTable,
|
|
@@ -669,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
|
|
|
669
402
|
const ZSTD_entropyCTables_t* entropy,
|
|
670
403
|
const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
671
404
|
void* workspace, size_t wkspSize,
|
|
672
|
-
int writeLitEntropy, int writeSeqEntropy)
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
405
|
+
int writeLitEntropy, int writeSeqEntropy)
|
|
406
|
+
{
|
|
407
|
+
EstimatedBlockSize ebs;
|
|
408
|
+
ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
|
|
409
|
+
&entropy->huf, &entropyMetadata->hufMetadata,
|
|
410
|
+
workspace, wkspSize, writeLitEntropy);
|
|
411
|
+
ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
|
|
678
412
|
nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
|
|
679
413
|
workspace, wkspSize, writeSeqEntropy);
|
|
680
|
-
|
|
414
|
+
ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
|
|
415
|
+
return ebs;
|
|
681
416
|
}
|
|
682
417
|
|
|
683
418
|
static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
|
|
@@ -691,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
|
|
|
691
426
|
return 0;
|
|
692
427
|
}
|
|
693
428
|
|
|
429
|
+
static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
|
|
430
|
+
{
|
|
431
|
+
size_t n, total = 0;
|
|
432
|
+
assert(sp != NULL);
|
|
433
|
+
for (n=0; n<seqCount; n++) {
|
|
434
|
+
total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
|
|
435
|
+
}
|
|
436
|
+
DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
|
|
437
|
+
return total;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
#define BYTESCALE 256
|
|
441
|
+
|
|
442
|
+
static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
|
|
443
|
+
size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
|
|
444
|
+
int firstSubBlock)
|
|
445
|
+
{
|
|
446
|
+
size_t n, budget = 0, inSize=0;
|
|
447
|
+
/* entropy headers */
|
|
448
|
+
size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
|
|
449
|
+
assert(firstSubBlock==0 || firstSubBlock==1);
|
|
450
|
+
budget += headerSize;
|
|
451
|
+
|
|
452
|
+
/* first sequence => at least one sequence*/
|
|
453
|
+
budget += sp[0].litLength * avgLitCost + avgSeqCost;
|
|
454
|
+
if (budget > targetBudget) return 1;
|
|
455
|
+
inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
|
|
456
|
+
|
|
457
|
+
/* loop over sequences */
|
|
458
|
+
for (n=1; n<nbSeqs; n++) {
|
|
459
|
+
size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
|
|
460
|
+
budget += currentCost;
|
|
461
|
+
inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
|
|
462
|
+
/* stop when sub-block budget is reached */
|
|
463
|
+
if ( (budget > targetBudget)
|
|
464
|
+
/* though continue to expand until the sub-block is deemed compressible */
|
|
465
|
+
&& (budget < inSize * BYTESCALE) )
|
|
466
|
+
break;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
return n;
|
|
470
|
+
}
|
|
471
|
+
|
|
694
472
|
/** ZSTD_compressSubBlock_multi() :
|
|
695
473
|
* Breaks super-block into multiple sub-blocks and compresses them.
|
|
696
|
-
* Entropy will be written
|
|
697
|
-
* The following blocks
|
|
698
|
-
*
|
|
699
|
-
* @return : compressed size of the super block (which
|
|
700
|
-
*
|
|
474
|
+
* Entropy will be written into the first block.
|
|
475
|
+
* The following blocks use repeat_mode to compress.
|
|
476
|
+
* Sub-blocks are all compressed, except the last one when beneficial.
|
|
477
|
+
* @return : compressed size of the super block (which features multiple ZSTD blocks)
|
|
478
|
+
* or 0 if it failed to compress. */
|
|
701
479
|
static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
702
480
|
const ZSTD_compressedBlockState_t* prevCBlock,
|
|
703
481
|
ZSTD_compressedBlockState_t* nextCBlock,
|
|
@@ -710,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
710
488
|
{
|
|
711
489
|
const seqDef* const sstart = seqStorePtr->sequencesStart;
|
|
712
490
|
const seqDef* const send = seqStorePtr->sequences;
|
|
713
|
-
const seqDef* sp = sstart;
|
|
491
|
+
const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
|
|
492
|
+
size_t const nbSeqs = (size_t)(send - sstart);
|
|
714
493
|
const BYTE* const lstart = seqStorePtr->litStart;
|
|
715
494
|
const BYTE* const lend = seqStorePtr->lit;
|
|
716
495
|
const BYTE* lp = lstart;
|
|
496
|
+
size_t const nbLiterals = (size_t)(lend - lstart);
|
|
717
497
|
BYTE const* ip = (BYTE const*)src;
|
|
718
498
|
BYTE const* const iend = ip + srcSize;
|
|
719
499
|
BYTE* const ostart = (BYTE*)dst;
|
|
@@ -722,115 +502,174 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
722
502
|
const BYTE* llCodePtr = seqStorePtr->llCode;
|
|
723
503
|
const BYTE* mlCodePtr = seqStorePtr->mlCode;
|
|
724
504
|
const BYTE* ofCodePtr = seqStorePtr->ofCode;
|
|
725
|
-
size_t
|
|
726
|
-
size_t
|
|
727
|
-
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
|
|
505
|
+
size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
|
|
506
|
+
size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
|
|
507
|
+
int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
|
|
728
508
|
int writeSeqEntropy = 1;
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
509
|
+
|
|
510
|
+
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
|
|
511
|
+
(unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
|
|
512
|
+
|
|
513
|
+
/* let's start by a general estimation for the full block */
|
|
514
|
+
if (nbSeqs > 0) {
|
|
515
|
+
EstimatedBlockSize const ebs =
|
|
516
|
+
ZSTD_estimateSubBlockSize(lp, nbLiterals,
|
|
517
|
+
ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
|
|
518
|
+
&nextCBlock->entropy, entropyMetadata,
|
|
519
|
+
workspace, wkspSize,
|
|
520
|
+
writeLitEntropy, writeSeqEntropy);
|
|
521
|
+
/* quick estimation */
|
|
522
|
+
size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
|
|
523
|
+
size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
|
|
524
|
+
const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
|
|
525
|
+
size_t n, avgBlockBudget, blockBudgetSupp=0;
|
|
526
|
+
avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
|
|
527
|
+
DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
|
|
528
|
+
(unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
|
|
529
|
+
(unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
|
|
530
|
+
/* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
|
|
531
|
+
* this will result in the production of a single uncompressed block covering @srcSize.*/
|
|
532
|
+
if (ebs.estBlockSize > srcSize) return 0;
|
|
533
|
+
|
|
534
|
+
/* compress and write sub-blocks */
|
|
535
|
+
assert(nbSubBlocks>0);
|
|
536
|
+
for (n=0; n < nbSubBlocks-1; n++) {
|
|
537
|
+
/* determine nb of sequences for current sub-block + nbLiterals from next sequence */
|
|
538
|
+
size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
|
|
539
|
+
avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
|
|
540
|
+
/* if reached last sequence : break to last sub-block (simplification) */
|
|
541
|
+
assert(seqCount <= (size_t)(send-sp));
|
|
542
|
+
if (sp + seqCount == send) break;
|
|
543
|
+
assert(seqCount > 0);
|
|
544
|
+
/* compress sub-block */
|
|
545
|
+
{ int litEntropyWritten = 0;
|
|
546
|
+
int seqEntropyWritten = 0;
|
|
547
|
+
size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
|
|
548
|
+
const size_t decompressedSize =
|
|
549
|
+
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
|
|
550
|
+
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
|
551
|
+
sp, seqCount,
|
|
552
|
+
lp, litSize,
|
|
553
|
+
llCodePtr, mlCodePtr, ofCodePtr,
|
|
554
|
+
cctxParams,
|
|
555
|
+
op, (size_t)(oend-op),
|
|
556
|
+
bmi2, writeLitEntropy, writeSeqEntropy,
|
|
557
|
+
&litEntropyWritten, &seqEntropyWritten,
|
|
558
|
+
0);
|
|
559
|
+
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
|
560
|
+
|
|
561
|
+
/* check compressibility, update state components */
|
|
562
|
+
if (cSize > 0 && cSize < decompressedSize) {
|
|
563
|
+
DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
|
|
564
|
+
(unsigned)decompressedSize, (unsigned)cSize);
|
|
565
|
+
assert(ip + decompressedSize <= iend);
|
|
566
|
+
ip += decompressedSize;
|
|
567
|
+
lp += litSize;
|
|
568
|
+
op += cSize;
|
|
569
|
+
llCodePtr += seqCount;
|
|
570
|
+
mlCodePtr += seqCount;
|
|
571
|
+
ofCodePtr += seqCount;
|
|
572
|
+
/* Entropy only needs to be written once */
|
|
573
|
+
if (litEntropyWritten) {
|
|
574
|
+
writeLitEntropy = 0;
|
|
575
|
+
}
|
|
576
|
+
if (seqEntropyWritten) {
|
|
577
|
+
writeSeqEntropy = 0;
|
|
578
|
+
}
|
|
579
|
+
sp += seqCount;
|
|
580
|
+
blockBudgetSupp = 0;
|
|
581
|
+
} }
|
|
582
|
+
/* otherwise : do not compress yet, coalesce current sub-block with following one */
|
|
750
583
|
}
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
if (seqEntropyWritten) {
|
|
790
|
-
writeSeqEntropy = 0;
|
|
791
|
-
}
|
|
584
|
+
} /* if (nbSeqs > 0) */
|
|
585
|
+
|
|
586
|
+
/* write last block */
|
|
587
|
+
DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
|
|
588
|
+
{ int litEntropyWritten = 0;
|
|
589
|
+
int seqEntropyWritten = 0;
|
|
590
|
+
size_t litSize = (size_t)(lend - lp);
|
|
591
|
+
size_t seqCount = (size_t)(send - sp);
|
|
592
|
+
const size_t decompressedSize =
|
|
593
|
+
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
|
|
594
|
+
size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
|
595
|
+
sp, seqCount,
|
|
596
|
+
lp, litSize,
|
|
597
|
+
llCodePtr, mlCodePtr, ofCodePtr,
|
|
598
|
+
cctxParams,
|
|
599
|
+
op, (size_t)(oend-op),
|
|
600
|
+
bmi2, writeLitEntropy, writeSeqEntropy,
|
|
601
|
+
&litEntropyWritten, &seqEntropyWritten,
|
|
602
|
+
lastBlock);
|
|
603
|
+
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
|
604
|
+
|
|
605
|
+
/* update pointers, the nb of literals borrowed from next sequence must be preserved */
|
|
606
|
+
if (cSize > 0 && cSize < decompressedSize) {
|
|
607
|
+
DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
|
|
608
|
+
(unsigned)decompressedSize, (unsigned)cSize);
|
|
609
|
+
assert(ip + decompressedSize <= iend);
|
|
610
|
+
ip += decompressedSize;
|
|
611
|
+
lp += litSize;
|
|
612
|
+
op += cSize;
|
|
613
|
+
llCodePtr += seqCount;
|
|
614
|
+
mlCodePtr += seqCount;
|
|
615
|
+
ofCodePtr += seqCount;
|
|
616
|
+
/* Entropy only needs to be written once */
|
|
617
|
+
if (litEntropyWritten) {
|
|
618
|
+
writeLitEntropy = 0;
|
|
619
|
+
}
|
|
620
|
+
if (seqEntropyWritten) {
|
|
621
|
+
writeSeqEntropy = 0;
|
|
792
622
|
}
|
|
623
|
+
sp += seqCount;
|
|
793
624
|
}
|
|
794
|
-
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
|
|
795
628
|
if (writeLitEntropy) {
|
|
796
|
-
DEBUGLOG(5, "
|
|
629
|
+
DEBUGLOG(5, "Literal entropy tables were never written");
|
|
797
630
|
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
|
798
631
|
}
|
|
799
632
|
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
|
|
800
633
|
/* If we haven't written our entropy tables, then we've violated our contract and
|
|
801
634
|
* must emit an uncompressed block.
|
|
802
635
|
*/
|
|
803
|
-
DEBUGLOG(5, "
|
|
636
|
+
DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
|
|
804
637
|
return 0;
|
|
805
638
|
}
|
|
639
|
+
|
|
806
640
|
if (ip < iend) {
|
|
807
|
-
|
|
808
|
-
|
|
641
|
+
/* some data left : last part of the block sent uncompressed */
|
|
642
|
+
size_t const rSize = (size_t)((iend - ip));
|
|
643
|
+
size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
|
|
644
|
+
DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
|
|
809
645
|
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
|
810
646
|
assert(cSize != 0);
|
|
811
647
|
op += cSize;
|
|
812
648
|
/* We have to regenerate the repcodes because we've skipped some sequences */
|
|
813
649
|
if (sp < send) {
|
|
814
|
-
seqDef
|
|
650
|
+
const seqDef* seq;
|
|
815
651
|
repcodes_t rep;
|
|
816
652
|
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
|
817
653
|
for (seq = sstart; seq < sp; ++seq) {
|
|
818
|
-
|
|
654
|
+
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
|
819
655
|
}
|
|
820
656
|
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
|
821
657
|
}
|
|
822
658
|
}
|
|
823
|
-
|
|
824
|
-
|
|
659
|
+
|
|
660
|
+
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
|
|
661
|
+
(unsigned)(op-ostart));
|
|
662
|
+
return (size_t)(op-ostart);
|
|
825
663
|
}
|
|
826
664
|
|
|
827
665
|
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
|
828
666
|
void* dst, size_t dstCapacity,
|
|
829
|
-
void
|
|
830
|
-
unsigned lastBlock)
|
|
667
|
+
const void* src, size_t srcSize,
|
|
668
|
+
unsigned lastBlock)
|
|
669
|
+
{
|
|
831
670
|
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
|
832
671
|
|
|
833
|
-
FORWARD_IF_ERROR(
|
|
672
|
+
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
|
|
834
673
|
&zc->blockState.prevCBlock->entropy,
|
|
835
674
|
&zc->blockState.nextCBlock->entropy,
|
|
836
675
|
&zc->appliedParams,
|