extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* Huffman encoder, part of New Generation Entropy library
|
|
3
|
-
* Copyright (c)
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
4
|
*
|
|
5
5
|
* You can contact the author at :
|
|
6
6
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
#include "hist.h"
|
|
30
30
|
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
|
|
31
31
|
#include "../common/fse.h" /* header compression */
|
|
32
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
33
32
|
#include "../common/huf.h"
|
|
34
33
|
#include "../common/error_private.h"
|
|
34
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
/* **************************************************************
|
|
@@ -42,24 +42,111 @@
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
/* **************************************************************
|
|
45
|
-
*
|
|
45
|
+
* Required declarations
|
|
46
46
|
****************************************************************/
|
|
47
|
-
|
|
47
|
+
typedef struct nodeElt_s {
|
|
48
|
+
U32 count;
|
|
49
|
+
U16 parent;
|
|
50
|
+
BYTE byte;
|
|
51
|
+
BYTE nbBits;
|
|
52
|
+
} nodeElt;
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
/* **************************************************************
|
|
56
|
+
* Debug Traces
|
|
57
|
+
****************************************************************/
|
|
58
|
+
|
|
59
|
+
#if DEBUGLEVEL >= 2
|
|
60
|
+
|
|
61
|
+
static size_t showU32(const U32* arr, size_t size)
|
|
62
|
+
{
|
|
63
|
+
size_t u;
|
|
64
|
+
for (u=0; u<size; u++) {
|
|
65
|
+
RAWLOG(6, " %u", arr[u]); (void)arr;
|
|
66
|
+
}
|
|
67
|
+
RAWLOG(6, " \n");
|
|
68
|
+
return size;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
static size_t HUF_getNbBits(HUF_CElt elt);
|
|
72
|
+
|
|
73
|
+
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
|
|
48
74
|
{
|
|
49
|
-
|
|
75
|
+
size_t u;
|
|
76
|
+
for (u=0; u<size; u++) {
|
|
77
|
+
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
|
|
78
|
+
}
|
|
79
|
+
RAWLOG(6, " \n");
|
|
80
|
+
return size;
|
|
81
|
+
|
|
50
82
|
}
|
|
51
83
|
|
|
84
|
+
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
|
|
85
|
+
{
|
|
86
|
+
size_t u;
|
|
87
|
+
for (u=0; u<size; u++) {
|
|
88
|
+
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
|
|
89
|
+
}
|
|
90
|
+
RAWLOG(6, " \n");
|
|
91
|
+
return size;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
|
|
95
|
+
{
|
|
96
|
+
size_t u;
|
|
97
|
+
for (u=0; u<size; u++) {
|
|
98
|
+
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
|
|
99
|
+
}
|
|
100
|
+
RAWLOG(6, " \n");
|
|
101
|
+
return size;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
#endif
|
|
105
|
+
|
|
52
106
|
|
|
53
107
|
/* *******************************************************
|
|
54
108
|
* HUF : Huffman block compression
|
|
55
109
|
*********************************************************/
|
|
110
|
+
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
|
111
|
+
|
|
112
|
+
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
|
113
|
+
{
|
|
114
|
+
size_t const mask = align - 1;
|
|
115
|
+
size_t const rem = (size_t)workspace & mask;
|
|
116
|
+
size_t const add = (align - rem) & mask;
|
|
117
|
+
BYTE* const aligned = (BYTE*)workspace + add;
|
|
118
|
+
assert((align & (align - 1)) == 0); /* pow 2 */
|
|
119
|
+
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
|
120
|
+
if (*workspaceSizePtr >= add) {
|
|
121
|
+
assert(add < align);
|
|
122
|
+
assert(((size_t)aligned & mask) == 0);
|
|
123
|
+
*workspaceSizePtr -= add;
|
|
124
|
+
return aligned;
|
|
125
|
+
} else {
|
|
126
|
+
*workspaceSizePtr = 0;
|
|
127
|
+
return NULL;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
56
132
|
/* HUF_compressWeights() :
|
|
57
133
|
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
|
58
134
|
* The use case needs much less stack memory.
|
|
59
135
|
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
|
|
60
136
|
*/
|
|
61
137
|
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
|
|
62
|
-
|
|
138
|
+
|
|
139
|
+
typedef struct {
|
|
140
|
+
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
|
141
|
+
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
|
142
|
+
unsigned count[HUF_TABLELOG_MAX+1];
|
|
143
|
+
S16 norm[HUF_TABLELOG_MAX+1];
|
|
144
|
+
} HUF_CompressWeightsWksp;
|
|
145
|
+
|
|
146
|
+
static size_t
|
|
147
|
+
HUF_compressWeights(void* dst, size_t dstSize,
|
|
148
|
+
const void* weightTable, size_t wtSize,
|
|
149
|
+
void* workspace, size_t workspaceSize)
|
|
63
150
|
{
|
|
64
151
|
BYTE* const ostart = (BYTE*) dst;
|
|
65
152
|
BYTE* op = ostart;
|
|
@@ -67,33 +154,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
67
154
|
|
|
68
155
|
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
|
69
156
|
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
|
157
|
+
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
70
158
|
|
|
71
|
-
|
|
72
|
-
BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
|
73
|
-
|
|
74
|
-
unsigned count[HUF_TABLELOG_MAX+1];
|
|
75
|
-
S16 norm[HUF_TABLELOG_MAX+1];
|
|
159
|
+
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
|
76
160
|
|
|
77
161
|
/* init conditions */
|
|
78
162
|
if (wtSize <= 1) return 0; /* Not compressible */
|
|
79
163
|
|
|
80
164
|
/* Scan input and build symbol stats */
|
|
81
|
-
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
165
|
+
{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
82
166
|
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
|
|
83
167
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
|
84
168
|
}
|
|
85
169
|
|
|
86
170
|
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
|
87
|
-
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
|
171
|
+
CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
|
88
172
|
|
|
89
173
|
/* Write table description header */
|
|
90
|
-
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
|
|
174
|
+
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
|
|
91
175
|
op += hSize;
|
|
92
176
|
}
|
|
93
177
|
|
|
94
178
|
/* Compress */
|
|
95
|
-
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
|
|
96
|
-
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
|
|
179
|
+
CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
|
|
180
|
+
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
|
|
97
181
|
if (cSize == 0) return 0; /* not enough space for compressed data */
|
|
98
182
|
op += cSize;
|
|
99
183
|
}
|
|
@@ -101,30 +185,94 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
101
185
|
return (size_t)(op-ostart);
|
|
102
186
|
}
|
|
103
187
|
|
|
188
|
+
static size_t HUF_getNbBits(HUF_CElt elt)
|
|
189
|
+
{
|
|
190
|
+
return elt & 0xFF;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
|
194
|
+
{
|
|
195
|
+
return elt;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static size_t HUF_getValue(HUF_CElt elt)
|
|
199
|
+
{
|
|
200
|
+
return elt & ~(size_t)0xFF;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
static size_t HUF_getValueFast(HUF_CElt elt)
|
|
204
|
+
{
|
|
205
|
+
return elt;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
|
209
|
+
{
|
|
210
|
+
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
211
|
+
*elt = nbBits;
|
|
212
|
+
}
|
|
104
213
|
|
|
105
|
-
|
|
106
|
-
`CTable` : Huffman tree to save, using huf representation.
|
|
107
|
-
@return : size of saved CTable */
|
|
108
|
-
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
109
|
-
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
|
214
|
+
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
|
110
215
|
{
|
|
216
|
+
size_t const nbBits = HUF_getNbBits(*elt);
|
|
217
|
+
if (nbBits > 0) {
|
|
218
|
+
assert((value >> nbBits) == 0);
|
|
219
|
+
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
|
|
224
|
+
{
|
|
225
|
+
HUF_CTableHeader header;
|
|
226
|
+
ZSTD_memcpy(&header, ctable, sizeof(header));
|
|
227
|
+
return header;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
|
|
231
|
+
{
|
|
232
|
+
HUF_CTableHeader header;
|
|
233
|
+
HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
|
|
234
|
+
ZSTD_memset(&header, 0, sizeof(header));
|
|
235
|
+
assert(tableLog < 256);
|
|
236
|
+
header.tableLog = (BYTE)tableLog;
|
|
237
|
+
assert(maxSymbolValue < 256);
|
|
238
|
+
header.maxSymbolValue = (BYTE)maxSymbolValue;
|
|
239
|
+
ZSTD_memcpy(ctable, &header, sizeof(header));
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
typedef struct {
|
|
243
|
+
HUF_CompressWeightsWksp wksp;
|
|
111
244
|
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
|
|
112
245
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
|
|
246
|
+
} HUF_WriteCTableWksp;
|
|
247
|
+
|
|
248
|
+
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
|
249
|
+
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
|
250
|
+
void* workspace, size_t workspaceSize)
|
|
251
|
+
{
|
|
252
|
+
HUF_CElt const* const ct = CTable + 1;
|
|
113
253
|
BYTE* op = (BYTE*)dst;
|
|
114
254
|
U32 n;
|
|
255
|
+
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
256
|
+
|
|
257
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
|
|
115
258
|
|
|
116
|
-
|
|
259
|
+
assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
|
|
260
|
+
assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
|
|
261
|
+
|
|
262
|
+
/* check conditions */
|
|
263
|
+
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
|
117
264
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
|
|
118
265
|
|
|
119
266
|
/* convert to weight */
|
|
120
|
-
bitsToWeight[0] = 0;
|
|
267
|
+
wksp->bitsToWeight[0] = 0;
|
|
121
268
|
for (n=1; n<huffLog+1; n++)
|
|
122
|
-
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
269
|
+
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
123
270
|
for (n=0; n<maxSymbolValue; n++)
|
|
124
|
-
huffWeight[n] = bitsToWeight[
|
|
271
|
+
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
|
125
272
|
|
|
126
273
|
/* attempt weights compression by FSE */
|
|
127
|
-
|
|
274
|
+
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
|
275
|
+
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
|
128
276
|
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
|
129
277
|
op[0] = (BYTE)hSize;
|
|
130
278
|
return hSize+1;
|
|
@@ -134,9 +282,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
|
134
282
|
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
|
|
135
283
|
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
|
|
136
284
|
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
|
|
137
|
-
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
285
|
+
wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
138
286
|
for (n=0; n<maxSymbolValue; n+=2)
|
|
139
|
-
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
|
|
287
|
+
op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
|
|
140
288
|
return ((maxSymbolValue+1)/2) + 1;
|
|
141
289
|
}
|
|
142
290
|
|
|
@@ -147,6 +295,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
147
295
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
|
148
296
|
U32 tableLog = 0;
|
|
149
297
|
U32 nbSymbols = 0;
|
|
298
|
+
HUF_CElt* const ct = CTable + 1;
|
|
150
299
|
|
|
151
300
|
/* get symbol weights */
|
|
152
301
|
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
|
@@ -156,6 +305,10 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
156
305
|
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
157
306
|
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
|
158
307
|
|
|
308
|
+
*maxSymbolValuePtr = nbSymbols - 1;
|
|
309
|
+
|
|
310
|
+
HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
|
|
311
|
+
|
|
159
312
|
/* Prepare base value per rank */
|
|
160
313
|
{ U32 n, nextRankStart = 0;
|
|
161
314
|
for (n=1; n<=tableLog; n++) {
|
|
@@ -167,13 +320,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
167
320
|
/* fill nbBits */
|
|
168
321
|
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
|
169
322
|
const U32 w = huffWeight[n];
|
|
170
|
-
|
|
323
|
+
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
|
171
324
|
} }
|
|
172
325
|
|
|
173
326
|
/* fill val */
|
|
174
327
|
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
|
175
328
|
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
|
176
|
-
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[
|
|
329
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
|
177
330
|
/* determine stating value per rank */
|
|
178
331
|
valPerRank[tableLog+1] = 0; /* for w==0 */
|
|
179
332
|
{ U16 min = 0;
|
|
@@ -183,77 +336,74 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
183
336
|
min >>= 1;
|
|
184
337
|
} }
|
|
185
338
|
/* assign value within rank, symbol order */
|
|
186
|
-
{ U32 n; for (n=0; n<nbSymbols; n++)
|
|
339
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
|
187
340
|
}
|
|
188
341
|
|
|
189
|
-
*maxSymbolValuePtr = nbSymbols - 1;
|
|
190
342
|
return readSize;
|
|
191
343
|
}
|
|
192
344
|
|
|
193
|
-
U32
|
|
345
|
+
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
|
194
346
|
{
|
|
195
|
-
const HUF_CElt*
|
|
347
|
+
const HUF_CElt* const ct = CTable + 1;
|
|
196
348
|
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
|
197
|
-
|
|
349
|
+
if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
|
|
350
|
+
return 0;
|
|
351
|
+
return (U32)HUF_getNbBits(ct[symbolValue]);
|
|
198
352
|
}
|
|
199
353
|
|
|
200
354
|
|
|
201
|
-
typedef struct nodeElt_s {
|
|
202
|
-
U32 count;
|
|
203
|
-
U16 parent;
|
|
204
|
-
BYTE byte;
|
|
205
|
-
BYTE nbBits;
|
|
206
|
-
} nodeElt;
|
|
207
|
-
|
|
208
355
|
/**
|
|
209
356
|
* HUF_setMaxHeight():
|
|
210
|
-
*
|
|
357
|
+
* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
|
|
211
358
|
*
|
|
212
|
-
* It
|
|
213
|
-
*
|
|
359
|
+
* It attempts to convert all nodes with nbBits > @targetNbBits
|
|
360
|
+
* to employ @targetNbBits instead. Then it adjusts the tree
|
|
361
|
+
* so that it remains a valid canonical Huffman tree.
|
|
214
362
|
*
|
|
215
363
|
* @pre The sum of the ranks of each symbol == 2^largestBits,
|
|
216
364
|
* where largestBits == huffNode[lastNonNull].nbBits.
|
|
217
365
|
* @post The sum of the ranks of each symbol == 2^largestBits,
|
|
218
|
-
* where largestBits is the return value <=
|
|
366
|
+
* where largestBits is the return value (expected <= targetNbBits).
|
|
219
367
|
*
|
|
220
|
-
* @param huffNode The Huffman tree modified in place to enforce
|
|
368
|
+
* @param huffNode The Huffman tree modified in place to enforce targetNbBits.
|
|
369
|
+
* It's presumed sorted, from most frequent to rarest symbol.
|
|
221
370
|
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
|
|
222
|
-
* @param
|
|
371
|
+
* @param targetNbBits The allowed number of bits, which the Huffman tree
|
|
223
372
|
* may not respect. After this function the Huffman tree will
|
|
224
|
-
* respect
|
|
225
|
-
* @return The maximum number of bits of the Huffman tree after adjustment
|
|
226
|
-
* necessarily no more than maxNbBits.
|
|
373
|
+
* respect targetNbBits.
|
|
374
|
+
* @return The maximum number of bits of the Huffman tree after adjustment.
|
|
227
375
|
*/
|
|
228
|
-
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32
|
|
376
|
+
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
|
|
229
377
|
{
|
|
230
378
|
const U32 largestBits = huffNode[lastNonNull].nbBits;
|
|
231
|
-
/* early exit : no elt >
|
|
232
|
-
if (largestBits <=
|
|
379
|
+
/* early exit : no elt > targetNbBits, so the tree is already valid. */
|
|
380
|
+
if (largestBits <= targetNbBits) return largestBits;
|
|
381
|
+
|
|
382
|
+
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
|
|
233
383
|
|
|
234
384
|
/* there are several too large elements (at least >= 2) */
|
|
235
385
|
{ int totalCost = 0;
|
|
236
|
-
const U32 baseCost = 1 << (largestBits -
|
|
386
|
+
const U32 baseCost = 1 << (largestBits - targetNbBits);
|
|
237
387
|
int n = (int)lastNonNull;
|
|
238
388
|
|
|
239
|
-
/* Adjust any ranks >
|
|
389
|
+
/* Adjust any ranks > targetNbBits to targetNbBits.
|
|
240
390
|
* Compute totalCost, which is how far the sum of the ranks is
|
|
241
391
|
* we are over 2^largestBits after adjust the offending ranks.
|
|
242
392
|
*/
|
|
243
|
-
while (huffNode[n].nbBits >
|
|
393
|
+
while (huffNode[n].nbBits > targetNbBits) {
|
|
244
394
|
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
|
|
245
|
-
huffNode[n].nbBits = (BYTE)
|
|
395
|
+
huffNode[n].nbBits = (BYTE)targetNbBits;
|
|
246
396
|
n--;
|
|
247
397
|
}
|
|
248
|
-
/* n stops at huffNode[n].nbBits <=
|
|
249
|
-
assert(huffNode[n].nbBits <=
|
|
250
|
-
/* n end at index of smallest symbol using <
|
|
251
|
-
while (huffNode[n].nbBits ==
|
|
398
|
+
/* n stops at huffNode[n].nbBits <= targetNbBits */
|
|
399
|
+
assert(huffNode[n].nbBits <= targetNbBits);
|
|
400
|
+
/* n end at index of smallest symbol using < targetNbBits */
|
|
401
|
+
while (huffNode[n].nbBits == targetNbBits) --n;
|
|
252
402
|
|
|
253
|
-
/* renorm totalCost from 2^largestBits to 2^
|
|
403
|
+
/* renorm totalCost from 2^largestBits to 2^targetNbBits
|
|
254
404
|
* note : totalCost is necessarily a multiple of baseCost */
|
|
255
|
-
assert((totalCost & (baseCost - 1)) == 0);
|
|
256
|
-
totalCost >>= (largestBits -
|
|
405
|
+
assert(((U32)totalCost & (baseCost - 1)) == 0);
|
|
406
|
+
totalCost >>= (largestBits - targetNbBits);
|
|
257
407
|
assert(totalCost > 0);
|
|
258
408
|
|
|
259
409
|
/* repay normalized cost */
|
|
@@ -262,19 +412,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
262
412
|
|
|
263
413
|
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
|
|
264
414
|
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
|
|
265
|
-
{ U32 currentNbBits =
|
|
415
|
+
{ U32 currentNbBits = targetNbBits;
|
|
266
416
|
int pos;
|
|
267
417
|
for (pos=n ; pos >= 0; pos--) {
|
|
268
418
|
if (huffNode[pos].nbBits >= currentNbBits) continue;
|
|
269
|
-
currentNbBits = huffNode[pos].nbBits; /* <
|
|
270
|
-
rankLast[
|
|
419
|
+
currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
|
|
420
|
+
rankLast[targetNbBits-currentNbBits] = (U32)pos;
|
|
271
421
|
} }
|
|
272
422
|
|
|
273
423
|
while (totalCost > 0) {
|
|
274
424
|
/* Try to reduce the next power of 2 above totalCost because we
|
|
275
425
|
* gain back half the rank.
|
|
276
426
|
*/
|
|
277
|
-
U32 nBitsToDecrease =
|
|
427
|
+
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
|
|
278
428
|
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
|
|
279
429
|
U32 const highPos = rankLast[nBitsToDecrease];
|
|
280
430
|
U32 const lowPos = rankLast[nBitsToDecrease-1];
|
|
@@ -314,7 +464,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
314
464
|
rankLast[nBitsToDecrease] = noSymbol;
|
|
315
465
|
else {
|
|
316
466
|
rankLast[nBitsToDecrease]--;
|
|
317
|
-
if (huffNode[rankLast[nBitsToDecrease]].nbBits !=
|
|
467
|
+
if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
|
|
318
468
|
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
|
|
319
469
|
}
|
|
320
470
|
} /* while (totalCost > 0) */
|
|
@@ -326,11 +476,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
326
476
|
* TODO.
|
|
327
477
|
*/
|
|
328
478
|
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
|
|
329
|
-
/* special case : no rank 1 symbol (using
|
|
330
|
-
* let's create one from largest rank 0 (using
|
|
479
|
+
/* special case : no rank 1 symbol (using targetNbBits-1);
|
|
480
|
+
* let's create one from largest rank 0 (using targetNbBits).
|
|
331
481
|
*/
|
|
332
482
|
if (rankLast[1] == noSymbol) {
|
|
333
|
-
while (huffNode[n].nbBits ==
|
|
483
|
+
while (huffNode[n].nbBits == targetNbBits) n--;
|
|
334
484
|
huffNode[n+1].nbBits--;
|
|
335
485
|
assert(n >= 0);
|
|
336
486
|
rankLast[1] = (U32)(n+1);
|
|
@@ -344,26 +494,122 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
344
494
|
} /* repay normalized cost */
|
|
345
495
|
} /* there are several too large elements (at least >= 2) */
|
|
346
496
|
|
|
347
|
-
return
|
|
497
|
+
return targetNbBits;
|
|
348
498
|
}
|
|
349
499
|
|
|
350
500
|
typedef struct {
|
|
351
|
-
|
|
352
|
-
|
|
501
|
+
U16 base;
|
|
502
|
+
U16 curr;
|
|
353
503
|
} rankPos;
|
|
354
504
|
|
|
355
|
-
typedef nodeElt huffNodeTable[
|
|
505
|
+
typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
|
|
356
506
|
|
|
357
|
-
|
|
507
|
+
/* Number of buckets available for HUF_sort() */
|
|
508
|
+
#define RANK_POSITION_TABLE_SIZE 192
|
|
358
509
|
|
|
359
510
|
typedef struct {
|
|
360
511
|
huffNodeTable huffNodeTbl;
|
|
361
512
|
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
|
362
513
|
} HUF_buildCTable_wksp_tables;
|
|
363
514
|
|
|
515
|
+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
|
516
|
+
* Strategy is to use as many buckets as possible for representing distinct
|
|
517
|
+
* counts while using the remainder to represent all "large" counts.
|
|
518
|
+
*
|
|
519
|
+
* To satisfy this requirement for 192 buckets, we can do the following:
|
|
520
|
+
* Let buckets 0-166 represent distinct counts of [0, 166]
|
|
521
|
+
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
|
522
|
+
*/
|
|
523
|
+
#define RANK_POSITION_MAX_COUNT_LOG 32
|
|
524
|
+
#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
|
|
525
|
+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
|
|
526
|
+
|
|
527
|
+
/* Return the appropriate bucket index for a given count. See definition of
|
|
528
|
+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
|
529
|
+
*/
|
|
530
|
+
static U32 HUF_getIndex(U32 const count) {
|
|
531
|
+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
|
532
|
+
? count
|
|
533
|
+
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
/* Helper swap function for HUF_quickSortPartition() */
|
|
537
|
+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
|
538
|
+
nodeElt tmp = *a;
|
|
539
|
+
*a = *b;
|
|
540
|
+
*b = tmp;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/* Returns 0 if the huffNode array is not sorted by descending count */
|
|
544
|
+
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
|
545
|
+
U32 i;
|
|
546
|
+
for (i = 1; i < maxSymbolValue1; ++i) {
|
|
547
|
+
if (huffNode[i].count > huffNode[i-1].count) {
|
|
548
|
+
return 0;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
return 1;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/* Insertion sort by descending order */
|
|
555
|
+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
|
556
|
+
int i;
|
|
557
|
+
int const size = high-low+1;
|
|
558
|
+
huffNode += low;
|
|
559
|
+
for (i = 1; i < size; ++i) {
|
|
560
|
+
nodeElt const key = huffNode[i];
|
|
561
|
+
int j = i - 1;
|
|
562
|
+
while (j >= 0 && huffNode[j].count < key.count) {
|
|
563
|
+
huffNode[j + 1] = huffNode[j];
|
|
564
|
+
j--;
|
|
565
|
+
}
|
|
566
|
+
huffNode[j + 1] = key;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
/* Pivot helper function for quicksort. */
|
|
571
|
+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
|
572
|
+
/* Simply select rightmost element as pivot. "Better" selectors like
|
|
573
|
+
* median-of-three don't experimentally appear to have any benefit.
|
|
574
|
+
*/
|
|
575
|
+
U32 const pivot = arr[high].count;
|
|
576
|
+
int i = low - 1;
|
|
577
|
+
int j = low;
|
|
578
|
+
for ( ; j < high; j++) {
|
|
579
|
+
if (arr[j].count > pivot) {
|
|
580
|
+
i++;
|
|
581
|
+
HUF_swapNodes(&arr[i], &arr[j]);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
|
585
|
+
return i + 1;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
/* Classic quicksort by descending with partially iterative calls
|
|
589
|
+
* to reduce worst case callstack size.
|
|
590
|
+
*/
|
|
591
|
+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
|
592
|
+
int const kInsertionSortThreshold = 8;
|
|
593
|
+
if (high - low < kInsertionSortThreshold) {
|
|
594
|
+
HUF_insertionSort(arr, low, high);
|
|
595
|
+
return;
|
|
596
|
+
}
|
|
597
|
+
while (low < high) {
|
|
598
|
+
int const idx = HUF_quickSortPartition(arr, low, high);
|
|
599
|
+
if (idx - low < high - idx) {
|
|
600
|
+
HUF_simpleQuickSort(arr, low, idx - 1);
|
|
601
|
+
low = idx + 1;
|
|
602
|
+
} else {
|
|
603
|
+
HUF_simpleQuickSort(arr, idx + 1, high);
|
|
604
|
+
high = idx - 1;
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
364
609
|
/**
|
|
365
610
|
* HUF_sort():
|
|
366
611
|
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
|
612
|
+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
|
367
613
|
*
|
|
368
614
|
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
|
369
615
|
* Must have (maxSymbolValue + 1) entries.
|
|
@@ -371,42 +617,51 @@ typedef struct {
|
|
|
371
617
|
* @param[in] maxSymbolValue Maximum symbol value.
|
|
372
618
|
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
|
373
619
|
*/
|
|
374
|
-
static void HUF_sort(nodeElt
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
|
|
620
|
+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
|
621
|
+
U32 n;
|
|
622
|
+
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
|
378
623
|
|
|
379
624
|
/* Compute base and set curr to base.
|
|
380
|
-
* For symbol s let lowerRank =
|
|
381
|
-
*
|
|
625
|
+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
|
626
|
+
* See HUF_getIndex to see bucketing strategy.
|
|
382
627
|
* We attribute each symbol to lowerRank's base value, because we want to know where
|
|
383
628
|
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
|
384
629
|
*/
|
|
385
630
|
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
|
386
631
|
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
387
|
-
U32 lowerRank =
|
|
632
|
+
U32 lowerRank = HUF_getIndex(count[n]);
|
|
633
|
+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
|
388
634
|
rankPosition[lowerRank].base++;
|
|
389
635
|
}
|
|
636
|
+
|
|
390
637
|
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
|
638
|
+
/* Set up the rankPosition table */
|
|
391
639
|
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
|
392
640
|
rankPosition[n-1].base += rankPosition[n].base;
|
|
393
641
|
rankPosition[n-1].curr = rankPosition[n-1].base;
|
|
394
642
|
}
|
|
395
|
-
|
|
643
|
+
|
|
644
|
+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
|
396
645
|
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
397
646
|
U32 const c = count[n];
|
|
398
|
-
U32 const r =
|
|
399
|
-
U32 pos = rankPosition[r].curr++;
|
|
400
|
-
|
|
401
|
-
* We have at most 256 symbols, so this insertion should be fine.
|
|
402
|
-
*/
|
|
403
|
-
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
|
|
404
|
-
huffNode[pos] = huffNode[pos-1];
|
|
405
|
-
pos--;
|
|
406
|
-
}
|
|
647
|
+
U32 const r = HUF_getIndex(c) + 1;
|
|
648
|
+
U32 const pos = rankPosition[r].curr++;
|
|
649
|
+
assert(pos < maxSymbolValue1);
|
|
407
650
|
huffNode[pos].count = c;
|
|
408
651
|
huffNode[pos].byte = (BYTE)n;
|
|
409
652
|
}
|
|
653
|
+
|
|
654
|
+
/* Sort each bucket. */
|
|
655
|
+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
|
656
|
+
int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
|
|
657
|
+
U32 const bucketStartIdx = rankPosition[n].base;
|
|
658
|
+
if (bucketSize > 1) {
|
|
659
|
+
assert(bucketStartIdx < maxSymbolValue1);
|
|
660
|
+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
|
410
665
|
}
|
|
411
666
|
|
|
412
667
|
|
|
@@ -430,6 +685,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
|
430
685
|
int lowS, lowN;
|
|
431
686
|
int nodeNb = STARTNODE;
|
|
432
687
|
int n, nodeRoot;
|
|
688
|
+
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
|
|
433
689
|
/* init for parents */
|
|
434
690
|
nonNullRank = (int)maxSymbolValue;
|
|
435
691
|
while(huffNode[nonNullRank].count == 0) nonNullRank--;
|
|
@@ -456,6 +712,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
|
456
712
|
for (n=0; n<=nonNullRank; n++)
|
|
457
713
|
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
|
|
458
714
|
|
|
715
|
+
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
|
|
716
|
+
|
|
459
717
|
return nonNullRank;
|
|
460
718
|
}
|
|
461
719
|
|
|
@@ -471,6 +729,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
|
471
729
|
*/
|
|
472
730
|
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
|
473
731
|
{
|
|
732
|
+
HUF_CElt* const ct = CTable + 1;
|
|
474
733
|
/* fill result into ctable (val, nbBits) */
|
|
475
734
|
int n;
|
|
476
735
|
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
@@ -486,127 +745,381 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
|
|
|
486
745
|
min >>= 1;
|
|
487
746
|
} }
|
|
488
747
|
for (n=0; n<alphabetSize; n++)
|
|
489
|
-
|
|
748
|
+
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
|
490
749
|
for (n=0; n<alphabetSize; n++)
|
|
491
|
-
|
|
750
|
+
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
|
751
|
+
|
|
752
|
+
HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
|
|
492
753
|
}
|
|
493
754
|
|
|
494
|
-
size_t
|
|
755
|
+
size_t
|
|
756
|
+
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
|
757
|
+
void* workSpace, size_t wkspSize)
|
|
495
758
|
{
|
|
496
|
-
HUF_buildCTable_wksp_tables* const wksp_tables =
|
|
759
|
+
HUF_buildCTable_wksp_tables* const wksp_tables =
|
|
760
|
+
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
|
497
761
|
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
|
498
762
|
nodeElt* const huffNode = huffNode0+1;
|
|
499
763
|
int nonNullRank;
|
|
500
764
|
|
|
765
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
|
|
766
|
+
|
|
767
|
+
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
|
|
768
|
+
|
|
501
769
|
/* safety checks */
|
|
502
|
-
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
|
503
770
|
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
|
504
|
-
|
|
771
|
+
return ERROR(workSpace_tooSmall);
|
|
505
772
|
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
|
506
773
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
|
507
|
-
|
|
774
|
+
return ERROR(maxSymbolValue_tooLarge);
|
|
508
775
|
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
|
|
509
776
|
|
|
510
777
|
/* sort, decreasing order */
|
|
511
778
|
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
|
779
|
+
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
|
|
512
780
|
|
|
513
781
|
/* build tree */
|
|
514
782
|
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
|
|
515
783
|
|
|
516
|
-
/* enforce maxTableLog */
|
|
784
|
+
/* determine and enforce maxTableLog */
|
|
517
785
|
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
|
518
786
|
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
|
519
787
|
|
|
520
|
-
HUF_buildCTableFromTree(
|
|
788
|
+
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
|
521
789
|
|
|
522
790
|
return maxNbBits;
|
|
523
791
|
}
|
|
524
792
|
|
|
525
793
|
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
|
526
794
|
{
|
|
795
|
+
HUF_CElt const* ct = CTable + 1;
|
|
527
796
|
size_t nbBits = 0;
|
|
528
797
|
int s;
|
|
529
798
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
530
|
-
nbBits +=
|
|
799
|
+
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
|
531
800
|
}
|
|
532
801
|
return nbBits >> 3;
|
|
533
802
|
}
|
|
534
803
|
|
|
535
804
|
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
805
|
+
HUF_CTableHeader header = HUF_readCTableHeader(CTable);
|
|
806
|
+
HUF_CElt const* ct = CTable + 1;
|
|
807
|
+
int bad = 0;
|
|
808
|
+
int s;
|
|
809
|
+
|
|
810
|
+
assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
811
|
+
|
|
812
|
+
if (header.maxSymbolValue < maxSymbolValue)
|
|
813
|
+
return 0;
|
|
814
|
+
|
|
815
|
+
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
816
|
+
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
|
817
|
+
}
|
|
818
|
+
return !bad;
|
|
542
819
|
}
|
|
543
820
|
|
|
544
821
|
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
|
545
822
|
|
|
823
|
+
/** HUF_CStream_t:
|
|
824
|
+
* Huffman uses its own BIT_CStream_t implementation.
|
|
825
|
+
* There are three major differences from BIT_CStream_t:
|
|
826
|
+
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
|
827
|
+
* the pair (nbBits, value) in the format:
|
|
828
|
+
* format:
|
|
829
|
+
* - Bits [0, 4) = nbBits
|
|
830
|
+
* - Bits [4, 64 - nbBits) = 0
|
|
831
|
+
* - Bits [64 - nbBits, 64) = value
|
|
832
|
+
* 2. The bitContainer is built from the upper bits and
|
|
833
|
+
* right shifted. E.g. to add a new value of N bits
|
|
834
|
+
* you right shift the bitContainer by N, then or in
|
|
835
|
+
* the new value into the N upper bits.
|
|
836
|
+
* 3. The bitstream has two bit containers. You can add
|
|
837
|
+
* bits to the second container and merge them into
|
|
838
|
+
* the first container.
|
|
839
|
+
*/
|
|
840
|
+
|
|
841
|
+
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
|
842
|
+
|
|
843
|
+
typedef struct {
|
|
844
|
+
size_t bitContainer[2];
|
|
845
|
+
size_t bitPos[2];
|
|
846
|
+
|
|
847
|
+
BYTE* startPtr;
|
|
848
|
+
BYTE* ptr;
|
|
849
|
+
BYTE* endPtr;
|
|
850
|
+
} HUF_CStream_t;
|
|
851
|
+
|
|
852
|
+
/**! HUF_initCStream():
|
|
853
|
+
* Initializes the bitstream.
|
|
854
|
+
* @returns 0 or an error code.
|
|
855
|
+
*/
|
|
856
|
+
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
|
857
|
+
void* startPtr, size_t dstCapacity)
|
|
858
|
+
{
|
|
859
|
+
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
|
860
|
+
bitC->startPtr = (BYTE*)startPtr;
|
|
861
|
+
bitC->ptr = bitC->startPtr;
|
|
862
|
+
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
|
863
|
+
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
|
864
|
+
return 0;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
/*! HUF_addBits():
|
|
868
|
+
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
|
869
|
+
*
|
|
870
|
+
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
|
871
|
+
* See the HUF_CStream_t docs for the format.
|
|
872
|
+
* @param idx Insert into the bitstream at this idx.
|
|
873
|
+
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
|
874
|
+
* to have at least 4 unused bits after this call it may be 1,
|
|
875
|
+
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
|
876
|
+
*/
|
|
877
|
+
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
|
878
|
+
{
|
|
879
|
+
assert(idx <= 1);
|
|
880
|
+
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
881
|
+
/* This is efficient on x86-64 with BMI2 because shrx
|
|
882
|
+
* only reads the low 6 bits of the register. The compiler
|
|
883
|
+
* knows this and elides the mask. When fast is set,
|
|
884
|
+
* every operation can use the same value loaded from elt.
|
|
885
|
+
*/
|
|
886
|
+
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
|
887
|
+
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
|
888
|
+
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
|
889
|
+
* doesn't matter that the high bits have noise from the value.
|
|
890
|
+
*/
|
|
891
|
+
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
|
892
|
+
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
893
|
+
/* The last 4-bits of elt are dirty if fast is set,
|
|
894
|
+
* so we must not be overwriting bits that have already been
|
|
895
|
+
* inserted into the bit container.
|
|
896
|
+
*/
|
|
897
|
+
#if DEBUGLEVEL >= 1
|
|
898
|
+
{
|
|
899
|
+
size_t const nbBits = HUF_getNbBits(elt);
|
|
900
|
+
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
|
|
901
|
+
(void)dirtyBits;
|
|
902
|
+
/* Middle bits are 0. */
|
|
903
|
+
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
|
904
|
+
/* We didn't overwrite any bits in the bit container. */
|
|
905
|
+
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
906
|
+
(void)dirtyBits;
|
|
907
|
+
}
|
|
908
|
+
#endif
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
|
912
|
+
{
|
|
913
|
+
bitC->bitContainer[1] = 0;
|
|
914
|
+
bitC->bitPos[1] = 0;
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
/*! HUF_mergeIndex1() :
|
|
918
|
+
* Merges the bit container @ index 1 into the bit container @ index 0
|
|
919
|
+
* and zeros the bit container @ index 1.
|
|
920
|
+
*/
|
|
921
|
+
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
|
922
|
+
{
|
|
923
|
+
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
|
924
|
+
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
|
925
|
+
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
|
926
|
+
bitC->bitPos[0] += bitC->bitPos[1];
|
|
927
|
+
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
/*! HUF_flushBits() :
|
|
931
|
+
* Flushes the bits in the bit container @ index 0.
|
|
932
|
+
*
|
|
933
|
+
* @post bitPos will be < 8.
|
|
934
|
+
* @param kFast If kFast is set then we must know a-priori that
|
|
935
|
+
* the bit container will not overflow.
|
|
936
|
+
*/
|
|
937
|
+
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
|
938
|
+
{
|
|
939
|
+
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
|
940
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
941
|
+
size_t const nbBytes = nbBits >> 3;
|
|
942
|
+
/* The top nbBits bits of bitContainer are the ones we need. */
|
|
943
|
+
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
|
944
|
+
/* Mask bitPos to account for the bytes we consumed. */
|
|
945
|
+
bitC->bitPos[0] &= 7;
|
|
946
|
+
assert(nbBits > 0);
|
|
947
|
+
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
|
948
|
+
assert(bitC->ptr <= bitC->endPtr);
|
|
949
|
+
MEM_writeLEST(bitC->ptr, bitContainer);
|
|
950
|
+
bitC->ptr += nbBytes;
|
|
951
|
+
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
|
952
|
+
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
|
953
|
+
/* bitContainer doesn't need to be modified because the leftover
|
|
954
|
+
* bits are already the top bitPos bits. And we don't care about
|
|
955
|
+
* noise in the lower values.
|
|
956
|
+
*/
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
/*! HUF_endMark()
|
|
960
|
+
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
|
961
|
+
*/
|
|
962
|
+
static HUF_CElt HUF_endMark(void)
|
|
963
|
+
{
|
|
964
|
+
HUF_CElt endMark;
|
|
965
|
+
HUF_setNbBits(&endMark, 1);
|
|
966
|
+
HUF_setValue(&endMark, 1);
|
|
967
|
+
return endMark;
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
/*! HUF_closeCStream() :
|
|
971
|
+
* @return Size of CStream, in bytes,
|
|
972
|
+
* or 0 if it could not fit into dstBuffer */
|
|
973
|
+
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
|
974
|
+
{
|
|
975
|
+
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
|
976
|
+
HUF_flushBits(bitC, /* kFast */ 0);
|
|
977
|
+
{
|
|
978
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
979
|
+
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
|
980
|
+
return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
|
|
546
984
|
FORCE_INLINE_TEMPLATE void
|
|
547
|
-
HUF_encodeSymbol(
|
|
985
|
+
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
|
548
986
|
{
|
|
549
|
-
|
|
987
|
+
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
|
550
988
|
}
|
|
551
989
|
|
|
552
|
-
|
|
990
|
+
FORCE_INLINE_TEMPLATE void
|
|
991
|
+
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
|
992
|
+
const BYTE* ip, size_t srcSize,
|
|
993
|
+
const HUF_CElt* ct,
|
|
994
|
+
int kUnroll, int kFastFlush, int kLastFast)
|
|
995
|
+
{
|
|
996
|
+
/* Join to kUnroll */
|
|
997
|
+
int n = (int)srcSize;
|
|
998
|
+
int rem = n % kUnroll;
|
|
999
|
+
if (rem > 0) {
|
|
1000
|
+
for (; rem > 0; --rem) {
|
|
1001
|
+
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
|
1002
|
+
}
|
|
1003
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
1004
|
+
}
|
|
1005
|
+
assert(n % kUnroll == 0);
|
|
1006
|
+
|
|
1007
|
+
/* Join to 2 * kUnroll */
|
|
1008
|
+
if (n % (2 * kUnroll)) {
|
|
1009
|
+
int u;
|
|
1010
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
1011
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
|
1012
|
+
}
|
|
1013
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
|
1014
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
1015
|
+
n -= kUnroll;
|
|
1016
|
+
}
|
|
1017
|
+
assert(n % (2 * kUnroll) == 0);
|
|
1018
|
+
|
|
1019
|
+
for (; n>0; n-= 2 * kUnroll) {
|
|
1020
|
+
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
|
1021
|
+
int u;
|
|
1022
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
1023
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
|
1024
|
+
}
|
|
1025
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
|
1026
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
1027
|
+
/* Encode kUnroll symbols into the bitstream @ index 1.
|
|
1028
|
+
* This allows us to start filling the bit container
|
|
1029
|
+
* without any data dependencies.
|
|
1030
|
+
*/
|
|
1031
|
+
HUF_zeroIndex1(bitC);
|
|
1032
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
1033
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
|
1034
|
+
}
|
|
1035
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
|
1036
|
+
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
|
1037
|
+
HUF_mergeIndex1(bitC);
|
|
1038
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
1039
|
+
}
|
|
1040
|
+
assert(n == 0);
|
|
1041
|
+
|
|
1042
|
+
}
|
|
553
1043
|
|
|
554
|
-
|
|
555
|
-
|
|
1044
|
+
/**
|
|
1045
|
+
* Returns a tight upper bound on the output space needed by Huffman
|
|
1046
|
+
* with 8 bytes buffer to handle over-writes. If the output is at least
|
|
1047
|
+
* this large we don't need to do bounds checks during Huffman encoding.
|
|
1048
|
+
*/
|
|
1049
|
+
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
|
1050
|
+
{
|
|
1051
|
+
return ((srcSize * tableLog) >> 3) + 8;
|
|
1052
|
+
}
|
|
556
1053
|
|
|
557
|
-
#define HUF_FLUSHBITS_2(stream) \
|
|
558
|
-
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
|
559
1054
|
|
|
560
1055
|
FORCE_INLINE_TEMPLATE size_t
|
|
561
1056
|
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
|
562
1057
|
const void* src, size_t srcSize,
|
|
563
1058
|
const HUF_CElt* CTable)
|
|
564
1059
|
{
|
|
1060
|
+
U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
|
|
1061
|
+
HUF_CElt const* ct = CTable + 1;
|
|
565
1062
|
const BYTE* ip = (const BYTE*) src;
|
|
566
1063
|
BYTE* const ostart = (BYTE*)dst;
|
|
567
1064
|
BYTE* const oend = ostart + dstSize;
|
|
568
|
-
|
|
569
|
-
size_t n;
|
|
570
|
-
BIT_CStream_t bitC;
|
|
1065
|
+
HUF_CStream_t bitC;
|
|
571
1066
|
|
|
572
1067
|
/* init */
|
|
573
1068
|
if (dstSize < 8) return 0; /* not enough space to compress */
|
|
574
|
-
{
|
|
1069
|
+
{ BYTE* op = ostart;
|
|
1070
|
+
size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
|
575
1071
|
if (HUF_isError(initErr)) return 0; }
|
|
576
1072
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
{
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
1073
|
+
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
|
1074
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
|
1075
|
+
else {
|
|
1076
|
+
if (MEM_32bits()) {
|
|
1077
|
+
switch (tableLog) {
|
|
1078
|
+
case 11:
|
|
1079
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1080
|
+
break;
|
|
1081
|
+
case 10: ZSTD_FALLTHROUGH;
|
|
1082
|
+
case 9: ZSTD_FALLTHROUGH;
|
|
1083
|
+
case 8:
|
|
1084
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1085
|
+
break;
|
|
1086
|
+
case 7: ZSTD_FALLTHROUGH;
|
|
1087
|
+
default:
|
|
1088
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1089
|
+
break;
|
|
1090
|
+
}
|
|
1091
|
+
} else {
|
|
1092
|
+
switch (tableLog) {
|
|
1093
|
+
case 11:
|
|
1094
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1095
|
+
break;
|
|
1096
|
+
case 10:
|
|
1097
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1098
|
+
break;
|
|
1099
|
+
case 9:
|
|
1100
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1101
|
+
break;
|
|
1102
|
+
case 8:
|
|
1103
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1104
|
+
break;
|
|
1105
|
+
case 7:
|
|
1106
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1107
|
+
break;
|
|
1108
|
+
case 6: ZSTD_FALLTHROUGH;
|
|
1109
|
+
default:
|
|
1110
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1111
|
+
break;
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
602
1114
|
}
|
|
1115
|
+
assert(bitC.ptr <= bitC.endPtr);
|
|
603
1116
|
|
|
604
|
-
return
|
|
1117
|
+
return HUF_closeCStream(&bitC);
|
|
605
1118
|
}
|
|
606
1119
|
|
|
607
1120
|
#if DYNAMIC_BMI2
|
|
608
1121
|
|
|
609
|
-
static
|
|
1122
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
610
1123
|
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
|
611
1124
|
const void* src, size_t srcSize,
|
|
612
1125
|
const HUF_CElt* CTable)
|
|
@@ -625,9 +1138,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
|
|
|
625
1138
|
static size_t
|
|
626
1139
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
627
1140
|
const void* src, size_t srcSize,
|
|
628
|
-
const HUF_CElt* CTable, const int
|
|
1141
|
+
const HUF_CElt* CTable, const int flags)
|
|
629
1142
|
{
|
|
630
|
-
if (
|
|
1143
|
+
if (flags & HUF_flags_bmi2) {
|
|
631
1144
|
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
|
|
632
1145
|
}
|
|
633
1146
|
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
|
|
@@ -638,24 +1151,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
638
1151
|
static size_t
|
|
639
1152
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
640
1153
|
const void* src, size_t srcSize,
|
|
641
|
-
const HUF_CElt* CTable, const int
|
|
1154
|
+
const HUF_CElt* CTable, const int flags)
|
|
642
1155
|
{
|
|
643
|
-
(void)
|
|
1156
|
+
(void)flags;
|
|
644
1157
|
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
|
|
645
1158
|
}
|
|
646
1159
|
|
|
647
1160
|
#endif
|
|
648
1161
|
|
|
649
|
-
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
1162
|
+
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
|
650
1163
|
{
|
|
651
|
-
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
|
1164
|
+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
|
652
1165
|
}
|
|
653
1166
|
|
|
654
|
-
|
|
655
1167
|
static size_t
|
|
656
1168
|
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
657
1169
|
const void* src, size_t srcSize,
|
|
658
|
-
const HUF_CElt* CTable, int
|
|
1170
|
+
const HUF_CElt* CTable, int flags)
|
|
659
1171
|
{
|
|
660
1172
|
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
|
|
661
1173
|
const BYTE* ip = (const BYTE*) src;
|
|
@@ -669,27 +1181,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
669
1181
|
op += 6; /* jumpTable */
|
|
670
1182
|
|
|
671
1183
|
assert(op <= oend);
|
|
672
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
673
|
-
if (cSize==0) return 0;
|
|
674
|
-
assert(cSize <= 65535);
|
|
1184
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1185
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
675
1186
|
MEM_writeLE16(ostart, (U16)cSize);
|
|
676
1187
|
op += cSize;
|
|
677
1188
|
}
|
|
678
1189
|
|
|
679
1190
|
ip += segmentSize;
|
|
680
1191
|
assert(op <= oend);
|
|
681
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
682
|
-
if (cSize==0) return 0;
|
|
683
|
-
assert(cSize <= 65535);
|
|
1192
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1193
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
684
1194
|
MEM_writeLE16(ostart+2, (U16)cSize);
|
|
685
1195
|
op += cSize;
|
|
686
1196
|
}
|
|
687
1197
|
|
|
688
1198
|
ip += segmentSize;
|
|
689
1199
|
assert(op <= oend);
|
|
690
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
691
|
-
if (cSize==0) return 0;
|
|
692
|
-
assert(cSize <= 65535);
|
|
1200
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1201
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
693
1202
|
MEM_writeLE16(ostart+4, (U16)cSize);
|
|
694
1203
|
op += cSize;
|
|
695
1204
|
}
|
|
@@ -697,17 +1206,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
697
1206
|
ip += segmentSize;
|
|
698
1207
|
assert(op <= oend);
|
|
699
1208
|
assert(ip <= iend);
|
|
700
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable,
|
|
701
|
-
if (cSize==0) return 0;
|
|
1209
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
|
|
1210
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
702
1211
|
op += cSize;
|
|
703
1212
|
}
|
|
704
1213
|
|
|
705
1214
|
return (size_t)(op-ostart);
|
|
706
1215
|
}
|
|
707
1216
|
|
|
708
|
-
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
1217
|
+
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
|
709
1218
|
{
|
|
710
|
-
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
|
1219
|
+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
|
711
1220
|
}
|
|
712
1221
|
|
|
713
1222
|
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
@@ -715,11 +1224,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
|
715
1224
|
static size_t HUF_compressCTable_internal(
|
|
716
1225
|
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
|
717
1226
|
const void* src, size_t srcSize,
|
|
718
|
-
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int
|
|
1227
|
+
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
|
|
719
1228
|
{
|
|
720
1229
|
size_t const cSize = (nbStreams==HUF_singleStream) ?
|
|
721
|
-
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
|
722
|
-
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
|
1230
|
+
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
|
|
1231
|
+
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
|
|
723
1232
|
if (HUF_isError(cSize)) { return cSize; }
|
|
724
1233
|
if (cSize==0) { return 0; } /* uncompressible */
|
|
725
1234
|
op += cSize;
|
|
@@ -731,31 +1240,113 @@ static size_t HUF_compressCTable_internal(
|
|
|
731
1240
|
|
|
732
1241
|
typedef struct {
|
|
733
1242
|
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
|
734
|
-
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX
|
|
735
|
-
|
|
1243
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
|
1244
|
+
union {
|
|
1245
|
+
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
|
1246
|
+
HUF_WriteCTableWksp writeCTable_wksp;
|
|
1247
|
+
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
|
1248
|
+
} wksps;
|
|
736
1249
|
} HUF_compress_tables_t;
|
|
737
1250
|
|
|
1251
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
|
1252
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
|
1253
|
+
|
|
1254
|
+
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
|
|
1255
|
+
{
|
|
1256
|
+
unsigned cardinality = 0;
|
|
1257
|
+
unsigned i;
|
|
1258
|
+
|
|
1259
|
+
for (i = 0; i < maxSymbolValue + 1; i++) {
|
|
1260
|
+
if (count[i] != 0) cardinality += 1;
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
return cardinality;
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
unsigned HUF_minTableLog(unsigned symbolCardinality)
|
|
1267
|
+
{
|
|
1268
|
+
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
|
|
1269
|
+
return minBitsSymbols;
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
unsigned HUF_optimalTableLog(
|
|
1273
|
+
unsigned maxTableLog,
|
|
1274
|
+
size_t srcSize,
|
|
1275
|
+
unsigned maxSymbolValue,
|
|
1276
|
+
void* workSpace, size_t wkspSize,
|
|
1277
|
+
HUF_CElt* table,
|
|
1278
|
+
const unsigned* count,
|
|
1279
|
+
int flags)
|
|
1280
|
+
{
|
|
1281
|
+
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
|
1282
|
+
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
|
|
1283
|
+
|
|
1284
|
+
if (!(flags & HUF_flags_optimalDepth)) {
|
|
1285
|
+
/* cheap evaluation, based on FSE */
|
|
1286
|
+
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
|
|
1287
|
+
}
|
|
1288
|
+
|
|
1289
|
+
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
|
|
1290
|
+
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
|
|
1291
|
+
size_t hSize, newSize;
|
|
1292
|
+
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
|
|
1293
|
+
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
|
|
1294
|
+
size_t optSize = ((size_t) ~0) - 1;
|
|
1295
|
+
unsigned optLog = maxTableLog, optLogGuess;
|
|
1296
|
+
|
|
1297
|
+
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
|
|
1298
|
+
|
|
1299
|
+
/* Search until size increases */
|
|
1300
|
+
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
|
|
1301
|
+
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
|
|
1302
|
+
|
|
1303
|
+
{ size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
|
|
1304
|
+
if (ERR_isError(maxBits)) continue;
|
|
1305
|
+
|
|
1306
|
+
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
|
|
1307
|
+
|
|
1308
|
+
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (ERR_isError(hSize)) continue;
|
|
1312
|
+
|
|
1313
|
+
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
|
|
1314
|
+
|
|
1315
|
+
if (newSize > optSize + 1) {
|
|
1316
|
+
break;
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
if (newSize < optSize) {
|
|
1320
|
+
optSize = newSize;
|
|
1321
|
+
optLog = optLogGuess;
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
assert(optLog <= HUF_TABLELOG_MAX);
|
|
1325
|
+
return optLog;
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1328
|
+
|
|
738
1329
|
/* HUF_compress_internal() :
|
|
739
|
-
* `
|
|
1330
|
+
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
|
1331
|
+
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
|
740
1332
|
static size_t
|
|
741
1333
|
HUF_compress_internal (void* dst, size_t dstSize,
|
|
742
1334
|
const void* src, size_t srcSize,
|
|
743
1335
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
744
1336
|
HUF_nbStreams_e nbStreams,
|
|
745
1337
|
void* workSpace, size_t wkspSize,
|
|
746
|
-
HUF_CElt* oldHufTable, HUF_repeat* repeat, int
|
|
747
|
-
const int bmi2)
|
|
1338
|
+
HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
|
|
748
1339
|
{
|
|
749
|
-
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
|
|
1340
|
+
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
|
750
1341
|
BYTE* const ostart = (BYTE*)dst;
|
|
751
1342
|
BYTE* const oend = ostart + dstSize;
|
|
752
1343
|
BYTE* op = ostart;
|
|
753
1344
|
|
|
754
|
-
|
|
1345
|
+
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
|
|
1346
|
+
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
|
755
1347
|
|
|
756
1348
|
/* checks & inits */
|
|
757
|
-
if (
|
|
758
|
-
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
|
1349
|
+
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
|
759
1350
|
if (!srcSize) return 0; /* Uncompressed */
|
|
760
1351
|
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
|
761
1352
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
|
@@ -765,17 +1356,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
765
1356
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
|
766
1357
|
|
|
767
1358
|
/* Heuristic : If old table is valid, use it for small inputs */
|
|
768
|
-
if (
|
|
1359
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
|
|
769
1360
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
770
1361
|
src, srcSize,
|
|
771
|
-
nbStreams, oldHufTable,
|
|
1362
|
+
nbStreams, oldHufTable, flags);
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1365
|
+
/* If uncompressible data is suspected, do a smaller sampling first */
|
|
1366
|
+
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
|
1367
|
+
if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
|
1368
|
+
size_t largestTotal = 0;
|
|
1369
|
+
DEBUGLOG(5, "input suspected incompressible : sampling to check");
|
|
1370
|
+
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
|
1371
|
+
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1372
|
+
largestTotal += largestBegin;
|
|
1373
|
+
}
|
|
1374
|
+
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
|
1375
|
+
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1376
|
+
largestTotal += largestEnd;
|
|
1377
|
+
}
|
|
1378
|
+
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
772
1379
|
}
|
|
773
1380
|
|
|
774
1381
|
/* Scan input and build symbol stats */
|
|
775
|
-
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize,
|
|
1382
|
+
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
|
776
1383
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
|
777
1384
|
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
778
1385
|
}
|
|
1386
|
+
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
|
|
779
1387
|
|
|
780
1388
|
/* Check validity of previous table */
|
|
781
1389
|
if ( repeat
|
|
@@ -784,26 +1392,25 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
784
1392
|
*repeat = HUF_repeat_none;
|
|
785
1393
|
}
|
|
786
1394
|
/* Heuristic : use existing table for small inputs */
|
|
787
|
-
if (
|
|
1395
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
|
|
788
1396
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
789
1397
|
src, srcSize,
|
|
790
|
-
nbStreams, oldHufTable,
|
|
1398
|
+
nbStreams, oldHufTable, flags);
|
|
791
1399
|
}
|
|
792
1400
|
|
|
793
1401
|
/* Build Huffman Tree */
|
|
794
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
1402
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
|
|
795
1403
|
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
|
|
796
1404
|
maxSymbolValue, huffLog,
|
|
797
|
-
&table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
|
|
1405
|
+
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
|
798
1406
|
CHECK_F(maxBits);
|
|
799
1407
|
huffLog = (U32)maxBits;
|
|
800
|
-
|
|
801
|
-
ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
|
|
802
|
-
sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
|
|
1408
|
+
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
|
|
803
1409
|
}
|
|
804
1410
|
|
|
805
1411
|
/* Write table description header */
|
|
806
|
-
{ CHECK_V_F(hSize,
|
|
1412
|
+
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
|
|
1413
|
+
&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
|
|
807
1414
|
/* Check if using previous huffman table is beneficial */
|
|
808
1415
|
if (repeat && *repeat != HUF_repeat_none) {
|
|
809
1416
|
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
|
|
@@ -811,7 +1418,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
811
1418
|
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
|
812
1419
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
813
1420
|
src, srcSize,
|
|
814
|
-
nbStreams, oldHufTable,
|
|
1421
|
+
nbStreams, oldHufTable, flags);
|
|
815
1422
|
} }
|
|
816
1423
|
|
|
817
1424
|
/* Use the new huffman table */
|
|
@@ -823,91 +1430,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
823
1430
|
}
|
|
824
1431
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
825
1432
|
src, srcSize,
|
|
826
|
-
nbStreams, table->CTable,
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
831
|
-
const void* src, size_t srcSize,
|
|
832
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
|
833
|
-
void* workSpace, size_t wkspSize)
|
|
834
|
-
{
|
|
835
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
836
|
-
maxSymbolValue, huffLog, HUF_singleStream,
|
|
837
|
-
workSpace, wkspSize,
|
|
838
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
|
1433
|
+
nbStreams, table->CTable, flags);
|
|
839
1434
|
}
|
|
840
1435
|
|
|
841
1436
|
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
|
842
1437
|
const void* src, size_t srcSize,
|
|
843
1438
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
844
1439
|
void* workSpace, size_t wkspSize,
|
|
845
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
|
1440
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
|
846
1441
|
{
|
|
1442
|
+
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
|
|
847
1443
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
848
1444
|
maxSymbolValue, huffLog, HUF_singleStream,
|
|
849
1445
|
workSpace, wkspSize, hufTable,
|
|
850
|
-
repeat,
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
/* HUF_compress4X_repeat():
|
|
854
|
-
* compress input using 4 streams.
|
|
855
|
-
* provide workspace to generate compression tables */
|
|
856
|
-
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
857
|
-
const void* src, size_t srcSize,
|
|
858
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
|
859
|
-
void* workSpace, size_t wkspSize)
|
|
860
|
-
{
|
|
861
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
862
|
-
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
863
|
-
workSpace, wkspSize,
|
|
864
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
|
1446
|
+
repeat, flags);
|
|
865
1447
|
}
|
|
866
1448
|
|
|
867
1449
|
/* HUF_compress4X_repeat():
|
|
868
1450
|
* compress input using 4 streams.
|
|
869
|
-
*
|
|
1451
|
+
* consider skipping quickly
|
|
1452
|
+
* reuse an existing huffman compression table */
|
|
870
1453
|
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
|
871
1454
|
const void* src, size_t srcSize,
|
|
872
1455
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
873
1456
|
void* workSpace, size_t wkspSize,
|
|
874
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
|
1457
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
|
875
1458
|
{
|
|
1459
|
+
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
|
|
876
1460
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
877
1461
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
878
1462
|
workSpace, wkspSize,
|
|
879
|
-
hufTable, repeat,
|
|
1463
|
+
hufTable, repeat, flags);
|
|
880
1464
|
}
|
|
881
|
-
|
|
882
|
-
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
|
883
|
-
/** HUF_buildCTable() :
|
|
884
|
-
* @return : maxNbBits
|
|
885
|
-
* Note : count is used before tree is written, so they can safely overlap
|
|
886
|
-
*/
|
|
887
|
-
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
|
|
888
|
-
{
|
|
889
|
-
HUF_buildCTable_wksp_tables workspace;
|
|
890
|
-
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
size_t HUF_compress1X (void* dst, size_t dstSize,
|
|
894
|
-
const void* src, size_t srcSize,
|
|
895
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
896
|
-
{
|
|
897
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
898
|
-
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
899
|
-
}
|
|
900
|
-
|
|
901
|
-
size_t HUF_compress2 (void* dst, size_t dstSize,
|
|
902
|
-
const void* src, size_t srcSize,
|
|
903
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
904
|
-
{
|
|
905
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
906
|
-
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
907
|
-
}
|
|
908
|
-
|
|
909
|
-
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
|
910
|
-
{
|
|
911
|
-
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
|
|
912
|
-
}
|
|
913
|
-
#endif
|