zstd-ruby 1.4.5.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -15,288 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
|
|
17
17
|
#include "hist.h" /* HIST_countFast_wksp */
|
|
18
|
-
#include "zstd_compress_internal.h"
|
|
18
|
+
#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
|
|
19
19
|
#include "zstd_compress_sequences.h"
|
|
20
20
|
#include "zstd_compress_literals.h"
|
|
21
21
|
|
|
22
|
-
/*-*************************************
|
|
23
|
-
* Superblock entropy buffer structs
|
|
24
|
-
***************************************/
|
|
25
|
-
/** ZSTD_hufCTablesMetadata_t :
|
|
26
|
-
* Stores Literals Block Type for a super-block in hType, and
|
|
27
|
-
* huffman tree description in hufDesBuffer.
|
|
28
|
-
* hufDesSize refers to the size of huffman tree description in bytes.
|
|
29
|
-
* This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
|
|
30
|
-
typedef struct {
|
|
31
|
-
symbolEncodingType_e hType;
|
|
32
|
-
BYTE hufDesBuffer[500]; /* TODO give name to this value */
|
|
33
|
-
size_t hufDesSize;
|
|
34
|
-
} ZSTD_hufCTablesMetadata_t;
|
|
35
|
-
|
|
36
|
-
/** ZSTD_fseCTablesMetadata_t :
|
|
37
|
-
* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
|
|
38
|
-
* fse tables in fseTablesBuffer.
|
|
39
|
-
* fseTablesSize refers to the size of fse tables in bytes.
|
|
40
|
-
* This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
|
|
41
|
-
typedef struct {
|
|
42
|
-
symbolEncodingType_e llType;
|
|
43
|
-
symbolEncodingType_e ofType;
|
|
44
|
-
symbolEncodingType_e mlType;
|
|
45
|
-
BYTE fseTablesBuffer[500]; /* TODO give name to this value */
|
|
46
|
-
size_t fseTablesSize;
|
|
47
|
-
size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
|
|
48
|
-
} ZSTD_fseCTablesMetadata_t;
|
|
49
|
-
|
|
50
|
-
typedef struct {
|
|
51
|
-
ZSTD_hufCTablesMetadata_t hufMetadata;
|
|
52
|
-
ZSTD_fseCTablesMetadata_t fseMetadata;
|
|
53
|
-
} ZSTD_entropyCTablesMetadata_t;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
/** ZSTD_buildSuperBlockEntropy_literal() :
|
|
57
|
-
* Builds entropy for the super-block literals.
|
|
58
|
-
* Stores literals block type (raw, rle, compressed, repeat) and
|
|
59
|
-
* huffman description table to hufMetadata.
|
|
60
|
-
* @return : size of huffman description table or error code */
|
|
61
|
-
static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
|
|
62
|
-
const ZSTD_hufCTables_t* prevHuf,
|
|
63
|
-
ZSTD_hufCTables_t* nextHuf,
|
|
64
|
-
ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
65
|
-
const int disableLiteralsCompression,
|
|
66
|
-
void* workspace, size_t wkspSize)
|
|
67
|
-
{
|
|
68
|
-
BYTE* const wkspStart = (BYTE*)workspace;
|
|
69
|
-
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
70
|
-
BYTE* const countWkspStart = wkspStart;
|
|
71
|
-
unsigned* const countWksp = (unsigned*)workspace;
|
|
72
|
-
const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
|
|
73
|
-
BYTE* const nodeWksp = countWkspStart + countWkspSize;
|
|
74
|
-
const size_t nodeWkspSize = wkspEnd-nodeWksp;
|
|
75
|
-
unsigned maxSymbolValue = 255;
|
|
76
|
-
unsigned huffLog = HUF_TABLELOG_DEFAULT;
|
|
77
|
-
HUF_repeat repeat = prevHuf->repeatMode;
|
|
78
|
-
|
|
79
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
|
|
80
|
-
|
|
81
|
-
/* Prepare nextEntropy assuming reusing the existing table */
|
|
82
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
83
|
-
|
|
84
|
-
if (disableLiteralsCompression) {
|
|
85
|
-
DEBUGLOG(5, "set_basic - disabled");
|
|
86
|
-
hufMetadata->hType = set_basic;
|
|
87
|
-
return 0;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/* small ? don't even attempt compression (speed opt) */
|
|
91
|
-
# define COMPRESS_LITERALS_SIZE_MIN 63
|
|
92
|
-
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
|
93
|
-
if (srcSize <= minLitSize) {
|
|
94
|
-
DEBUGLOG(5, "set_basic - too small");
|
|
95
|
-
hufMetadata->hType = set_basic;
|
|
96
|
-
return 0;
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/* Scan input and build symbol stats */
|
|
101
|
-
{ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
|
|
102
|
-
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
|
|
103
|
-
if (largest == srcSize) {
|
|
104
|
-
DEBUGLOG(5, "set_rle");
|
|
105
|
-
hufMetadata->hType = set_rle;
|
|
106
|
-
return 0;
|
|
107
|
-
}
|
|
108
|
-
if (largest <= (srcSize >> 7)+4) {
|
|
109
|
-
DEBUGLOG(5, "set_basic - no gain");
|
|
110
|
-
hufMetadata->hType = set_basic;
|
|
111
|
-
return 0;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/* Validate the previous Huffman table */
|
|
116
|
-
if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
|
|
117
|
-
repeat = HUF_repeat_none;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/* Build Huffman Tree */
|
|
121
|
-
memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
|
|
122
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
123
|
-
{ size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
|
|
124
|
-
maxSymbolValue, huffLog,
|
|
125
|
-
nodeWksp, nodeWkspSize);
|
|
126
|
-
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
|
|
127
|
-
huffLog = (U32)maxBits;
|
|
128
|
-
{ /* Build and write the CTable */
|
|
129
|
-
size_t const newCSize = HUF_estimateCompressedSize(
|
|
130
|
-
(HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
|
|
131
|
-
size_t const hSize = HUF_writeCTable(
|
|
132
|
-
hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
|
|
133
|
-
(HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
|
|
134
|
-
/* Check against repeating the previous CTable */
|
|
135
|
-
if (repeat != HUF_repeat_none) {
|
|
136
|
-
size_t const oldCSize = HUF_estimateCompressedSize(
|
|
137
|
-
(HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
|
|
138
|
-
if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
|
|
139
|
-
DEBUGLOG(5, "set_repeat - smaller");
|
|
140
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
141
|
-
hufMetadata->hType = set_repeat;
|
|
142
|
-
return 0;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
if (newCSize + hSize >= srcSize) {
|
|
146
|
-
DEBUGLOG(5, "set_basic - no gains");
|
|
147
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
|
148
|
-
hufMetadata->hType = set_basic;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
|
|
152
|
-
hufMetadata->hType = set_compressed;
|
|
153
|
-
nextHuf->repeatMode = HUF_repeat_check;
|
|
154
|
-
return hSize;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/** ZSTD_buildSuperBlockEntropy_sequences() :
|
|
160
|
-
* Builds entropy for the super-block sequences.
|
|
161
|
-
* Stores symbol compression modes and fse table to fseMetadata.
|
|
162
|
-
* @return : size of fse tables or error code */
|
|
163
|
-
static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
|
|
164
|
-
const ZSTD_fseCTables_t* prevEntropy,
|
|
165
|
-
ZSTD_fseCTables_t* nextEntropy,
|
|
166
|
-
const ZSTD_CCtx_params* cctxParams,
|
|
167
|
-
ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
168
|
-
void* workspace, size_t wkspSize)
|
|
169
|
-
{
|
|
170
|
-
BYTE* const wkspStart = (BYTE*)workspace;
|
|
171
|
-
BYTE* const wkspEnd = wkspStart + wkspSize;
|
|
172
|
-
BYTE* const countWkspStart = wkspStart;
|
|
173
|
-
unsigned* const countWksp = (unsigned*)workspace;
|
|
174
|
-
const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
|
|
175
|
-
BYTE* const cTableWksp = countWkspStart + countWkspSize;
|
|
176
|
-
const size_t cTableWkspSize = wkspEnd-cTableWksp;
|
|
177
|
-
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
|
|
178
|
-
FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
|
|
179
|
-
FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
|
|
180
|
-
FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
|
|
181
|
-
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
|
182
|
-
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
|
183
|
-
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
|
184
|
-
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
|
|
185
|
-
BYTE* const ostart = fseMetadata->fseTablesBuffer;
|
|
186
|
-
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
|
|
187
|
-
BYTE* op = ostart;
|
|
188
|
-
|
|
189
|
-
assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
|
|
190
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
|
|
191
|
-
memset(workspace, 0, wkspSize);
|
|
192
|
-
|
|
193
|
-
fseMetadata->lastCountSize = 0;
|
|
194
|
-
/* convert length/distances into codes */
|
|
195
|
-
ZSTD_seqToCodes(seqStorePtr);
|
|
196
|
-
/* build CTable for Literal Lengths */
|
|
197
|
-
{ U32 LLtype;
|
|
198
|
-
unsigned max = MaxLL;
|
|
199
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
200
|
-
DEBUGLOG(5, "Building LL table");
|
|
201
|
-
nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
|
|
202
|
-
LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
|
|
203
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
204
|
-
LLFSELog, prevEntropy->litlengthCTable,
|
|
205
|
-
LL_defaultNorm, LL_defaultNormLog,
|
|
206
|
-
ZSTD_defaultAllowed, strategy);
|
|
207
|
-
assert(set_basic < set_compressed && set_rle < set_compressed);
|
|
208
|
-
assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
209
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
|
|
210
|
-
countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
211
|
-
prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
|
|
212
|
-
cTableWksp, cTableWkspSize);
|
|
213
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
|
|
214
|
-
if (LLtype == set_compressed)
|
|
215
|
-
fseMetadata->lastCountSize = countSize;
|
|
216
|
-
op += countSize;
|
|
217
|
-
fseMetadata->llType = (symbolEncodingType_e) LLtype;
|
|
218
|
-
} }
|
|
219
|
-
/* build CTable for Offsets */
|
|
220
|
-
{ U32 Offtype;
|
|
221
|
-
unsigned max = MaxOff;
|
|
222
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
223
|
-
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
|
|
224
|
-
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
|
|
225
|
-
DEBUGLOG(5, "Building OF table");
|
|
226
|
-
nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
|
|
227
|
-
Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
|
|
228
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
229
|
-
OffFSELog, prevEntropy->offcodeCTable,
|
|
230
|
-
OF_defaultNorm, OF_defaultNormLog,
|
|
231
|
-
defaultPolicy, strategy);
|
|
232
|
-
assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
233
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
|
|
234
|
-
countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
235
|
-
prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
|
|
236
|
-
cTableWksp, cTableWkspSize);
|
|
237
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
|
|
238
|
-
if (Offtype == set_compressed)
|
|
239
|
-
fseMetadata->lastCountSize = countSize;
|
|
240
|
-
op += countSize;
|
|
241
|
-
fseMetadata->ofType = (symbolEncodingType_e) Offtype;
|
|
242
|
-
} }
|
|
243
|
-
/* build CTable for MatchLengths */
|
|
244
|
-
{ U32 MLtype;
|
|
245
|
-
unsigned max = MaxML;
|
|
246
|
-
size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
247
|
-
DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
|
|
248
|
-
nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
|
|
249
|
-
MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
|
|
250
|
-
countWksp, max, mostFrequent, nbSeq,
|
|
251
|
-
MLFSELog, prevEntropy->matchlengthCTable,
|
|
252
|
-
ML_defaultNorm, ML_defaultNormLog,
|
|
253
|
-
ZSTD_defaultAllowed, strategy);
|
|
254
|
-
assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
|
|
255
|
-
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
|
|
256
|
-
countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
257
|
-
prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
|
|
258
|
-
cTableWksp, cTableWkspSize);
|
|
259
|
-
FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
|
|
260
|
-
if (MLtype == set_compressed)
|
|
261
|
-
fseMetadata->lastCountSize = countSize;
|
|
262
|
-
op += countSize;
|
|
263
|
-
fseMetadata->mlType = (symbolEncodingType_e) MLtype;
|
|
264
|
-
} }
|
|
265
|
-
assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
|
|
266
|
-
return op-ostart;
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
/** ZSTD_buildSuperBlockEntropy() :
|
|
271
|
-
* Builds entropy for the super-block.
|
|
272
|
-
* @return : 0 on success or error code */
|
|
273
|
-
static size_t
|
|
274
|
-
ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
275
|
-
const ZSTD_entropyCTables_t* prevEntropy,
|
|
276
|
-
ZSTD_entropyCTables_t* nextEntropy,
|
|
277
|
-
const ZSTD_CCtx_params* cctxParams,
|
|
278
|
-
ZSTD_entropyCTablesMetadata_t* entropyMetadata,
|
|
279
|
-
void* workspace, size_t wkspSize)
|
|
280
|
-
{
|
|
281
|
-
size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
|
|
282
|
-
DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
|
|
283
|
-
entropyMetadata->hufMetadata.hufDesSize =
|
|
284
|
-
ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
|
|
285
|
-
&prevEntropy->huf, &nextEntropy->huf,
|
|
286
|
-
&entropyMetadata->hufMetadata,
|
|
287
|
-
ZSTD_disableLiteralsCompression(cctxParams),
|
|
288
|
-
workspace, wkspSize);
|
|
289
|
-
FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
|
|
290
|
-
entropyMetadata->fseMetadata.fseTablesSize =
|
|
291
|
-
ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
|
|
292
|
-
&prevEntropy->fse, &nextEntropy->fse,
|
|
293
|
-
cctxParams,
|
|
294
|
-
&entropyMetadata->fseMetadata,
|
|
295
|
-
workspace, wkspSize);
|
|
296
|
-
FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
|
|
297
|
-
return 0;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
22
|
/** ZSTD_compressSubBlock_literal() :
|
|
301
23
|
* Compresses literals section for a sub-block.
|
|
302
24
|
* When we have to write the Huffman table we will sometimes choose a header
|
|
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
|
304
26
|
* before we know the table size + compressed size, so we have a bound on the
|
|
305
27
|
* table size. If we guessed incorrectly, we fall back to uncompressed literals.
|
|
306
28
|
*
|
|
307
|
-
* We write the header when writeEntropy=1 and set
|
|
29
|
+
* We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
|
|
308
30
|
* in writing the header, otherwise it is set to 0.
|
|
309
31
|
*
|
|
310
32
|
* hufMetadata->hType has literals block type info.
|
|
@@ -314,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
|
|
|
314
36
|
* If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
|
|
315
37
|
* and the following sub-blocks' literals sections will be Treeless_Literals_Block.
|
|
316
38
|
* @return : compressed size of literals section of a sub-block
|
|
317
|
-
* Or 0 if
|
|
39
|
+
* Or 0 if unable to compress.
|
|
318
40
|
* Or error code */
|
|
319
|
-
static size_t
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
41
|
+
static size_t
|
|
42
|
+
ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
43
|
+
const ZSTD_hufCTablesMetadata_t* hufMetadata,
|
|
44
|
+
const BYTE* literals, size_t litSize,
|
|
45
|
+
void* dst, size_t dstSize,
|
|
46
|
+
const int bmi2, int writeEntropy, int* entropyWritten)
|
|
324
47
|
{
|
|
325
48
|
size_t const header = writeEntropy ? 200 : 0;
|
|
326
49
|
size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
|
|
@@ -331,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
331
54
|
symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
|
|
332
55
|
size_t cLitSize = 0;
|
|
333
56
|
|
|
334
|
-
(void)bmi2; /* TODO bmi2... */
|
|
335
|
-
|
|
336
57
|
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
|
|
337
58
|
|
|
338
59
|
*entropyWritten = 0;
|
|
@@ -348,15 +69,15 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
348
69
|
assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
|
|
349
70
|
|
|
350
71
|
if (writeEntropy && hufMetadata->hType == set_compressed) {
|
|
351
|
-
|
|
72
|
+
ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
|
|
352
73
|
op += hufMetadata->hufDesSize;
|
|
353
74
|
cLitSize += hufMetadata->hufDesSize;
|
|
354
75
|
DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
|
|
355
76
|
}
|
|
356
77
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
|
|
78
|
+
{ int const flags = bmi2 ? HUF_flags_bmi2 : 0;
|
|
79
|
+
const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
|
|
80
|
+
: HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
|
|
360
81
|
op += cSize;
|
|
361
82
|
cLitSize += cSize;
|
|
362
83
|
if (cSize == 0 || ERR_isError(cSize)) {
|
|
@@ -404,12 +125,17 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|
|
404
125
|
return op-ostart;
|
|
405
126
|
}
|
|
406
127
|
|
|
407
|
-
static size_t
|
|
128
|
+
static size_t
|
|
129
|
+
ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
|
|
130
|
+
const seqDef* sequences, size_t nbSeq,
|
|
131
|
+
size_t litSize, int lastSequence)
|
|
132
|
+
{
|
|
408
133
|
const seqDef* const sstart = sequences;
|
|
409
134
|
const seqDef* const send = sequences + nbSeq;
|
|
410
135
|
const seqDef* sp = sstart;
|
|
411
136
|
size_t matchLengthSum = 0;
|
|
412
137
|
size_t litLengthSum = 0;
|
|
138
|
+
(void)(litLengthSum); /* suppress unused variable warning on some environments */
|
|
413
139
|
while (send-sp > 0) {
|
|
414
140
|
ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
|
|
415
141
|
litLengthSum += seqLen.litLength;
|
|
@@ -433,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
|
|
|
433
159
|
* @return : compressed size of sequences section of a sub-block
|
|
434
160
|
* Or 0 if it is unable to compress
|
|
435
161
|
* Or error code. */
|
|
436
|
-
static size_t
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
162
|
+
static size_t
|
|
163
|
+
ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|
164
|
+
const ZSTD_fseCTablesMetadata_t* fseMetadata,
|
|
165
|
+
const seqDef* sequences, size_t nbSeq,
|
|
166
|
+
const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
|
|
167
|
+
const ZSTD_CCtx_params* cctxParams,
|
|
168
|
+
void* dst, size_t dstCapacity,
|
|
169
|
+
const int bmi2, int writeEntropy, int* entropyWritten)
|
|
443
170
|
{
|
|
444
171
|
const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
|
|
445
172
|
BYTE* const ostart = (BYTE*)dst;
|
|
@@ -474,7 +201,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
|
|
|
474
201
|
const U32 MLtype = fseMetadata->mlType;
|
|
475
202
|
DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
|
|
476
203
|
*seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
|
|
477
|
-
|
|
204
|
+
ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
|
|
478
205
|
op += fseMetadata->fseTablesSize;
|
|
479
206
|
} else {
|
|
480
207
|
const U32 repeat = set_repeat;
|
|
@@ -602,8 +329,8 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
|
|
|
602
329
|
static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|
603
330
|
const BYTE* codeTable, unsigned maxCode,
|
|
604
331
|
size_t nbSeq, const FSE_CTable* fseCTable,
|
|
605
|
-
const
|
|
606
|
-
short const* defaultNorm, U32 defaultNormLog,
|
|
332
|
+
const U8* additionalBits,
|
|
333
|
+
short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
|
607
334
|
void* workspace, size_t wkspSize)
|
|
608
335
|
{
|
|
609
336
|
unsigned* const countWksp = (unsigned*)workspace;
|
|
@@ -615,7 +342,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
|
|
|
615
342
|
|
|
616
343
|
HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
|
|
617
344
|
if (type == set_basic) {
|
|
618
|
-
|
|
345
|
+
/* We selected this encoding type, so it must be valid. */
|
|
346
|
+
assert(max <= defaultMax);
|
|
347
|
+
cSymbolTypeSizeEstimateInBits = max <= defaultMax
|
|
348
|
+
? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
|
|
349
|
+
: ERROR(GENERIC);
|
|
619
350
|
} else if (type == set_rle) {
|
|
620
351
|
cSymbolTypeSizeEstimateInBits = 0;
|
|
621
352
|
} else if (type == set_compressed || type == set_repeat) {
|
|
@@ -639,19 +370,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
|
|
|
639
370
|
void* workspace, size_t wkspSize,
|
|
640
371
|
int writeEntropy)
|
|
641
372
|
{
|
|
642
|
-
size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
|
373
|
+
size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
|
|
643
374
|
size_t cSeqSizeEstimate = 0;
|
|
375
|
+
if (nbSeq == 0) return sequencesSectionHeaderSize;
|
|
644
376
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
|
|
645
377
|
nbSeq, fseTables->offcodeCTable, NULL,
|
|
646
|
-
OF_defaultNorm, OF_defaultNormLog,
|
|
378
|
+
OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
|
|
647
379
|
workspace, wkspSize);
|
|
648
380
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
|
|
649
381
|
nbSeq, fseTables->litlengthCTable, LL_bits,
|
|
650
|
-
LL_defaultNorm, LL_defaultNormLog,
|
|
382
|
+
LL_defaultNorm, LL_defaultNormLog, MaxLL,
|
|
651
383
|
workspace, wkspSize);
|
|
652
384
|
cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
|
|
653
385
|
nbSeq, fseTables->matchlengthCTable, ML_bits,
|
|
654
|
-
ML_defaultNorm, ML_defaultNormLog,
|
|
386
|
+
ML_defaultNorm, ML_defaultNormLog, MaxML,
|
|
655
387
|
workspace, wkspSize);
|
|
656
388
|
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
|
|
657
389
|
return cSeqSizeEstimate + sequencesSectionHeaderSize;
|
|
@@ -747,7 +479,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
747
479
|
/* I think there is an optimization opportunity here.
|
|
748
480
|
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
|
749
481
|
* since it recalculates estimate from scratch.
|
|
750
|
-
* For example, it would recount literal distribution and symbol codes
|
|
482
|
+
* For example, it would recount literal distribution and symbol codes every time.
|
|
751
483
|
*/
|
|
752
484
|
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
|
753
485
|
&nextCBlock->entropy, entropyMetadata,
|
|
@@ -790,7 +522,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
790
522
|
} while (!lastSequence);
|
|
791
523
|
if (writeLitEntropy) {
|
|
792
524
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
|
|
793
|
-
|
|
525
|
+
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
|
794
526
|
}
|
|
795
527
|
if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
|
|
796
528
|
/* If we haven't written our entropy tables, then we've violated our contract and
|
|
@@ -809,11 +541,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|
|
809
541
|
if (sp < send) {
|
|
810
542
|
seqDef const* seq;
|
|
811
543
|
repcodes_t rep;
|
|
812
|
-
|
|
544
|
+
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
|
813
545
|
for (seq = sstart; seq < sp; ++seq) {
|
|
814
|
-
|
|
546
|
+
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
|
815
547
|
}
|
|
816
|
-
|
|
548
|
+
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
|
817
549
|
}
|
|
818
550
|
}
|
|
819
551
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
|
|
@@ -826,12 +558,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
|
|
826
558
|
unsigned lastBlock) {
|
|
827
559
|
ZSTD_entropyCTablesMetadata_t entropyMetadata;
|
|
828
560
|
|
|
829
|
-
FORWARD_IF_ERROR(
|
|
561
|
+
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
|
|
830
562
|
&zc->blockState.prevCBlock->entropy,
|
|
831
563
|
&zc->blockState.nextCBlock->entropy,
|
|
832
564
|
&zc->appliedParams,
|
|
833
565
|
&entropyMetadata,
|
|
834
|
-
zc->entropyWorkspace,
|
|
566
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
|
|
835
567
|
|
|
836
568
|
return ZSTD_compressSubBlock_multi(&zc->seqStore,
|
|
837
569
|
zc->blockState.prevCBlock,
|
|
@@ -841,5 +573,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
|
|
841
573
|
dst, dstCapacity,
|
|
842
574
|
src, srcSize,
|
|
843
575
|
zc->bmi2, lastBlock,
|
|
844
|
-
zc->entropyWorkspace,
|
|
576
|
+
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
|
|
845
577
|
}
|