extzstd 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/contrib/zstd/CHANGELOG +188 -1
- data/contrib/zstd/CONTRIBUTING.md +157 -74
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +81 -58
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +59 -35
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/appveyor.yml +49 -136
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +87 -181
- data/contrib/zstd/lib/README.md +23 -6
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +33 -59
- data/contrib/zstd/lib/common/compiler.h +115 -45
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +1 -1
- data/contrib/zstd/lib/common/debug.h +1 -1
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +82 -3
- data/contrib/zstd/lib/common/fse.h +9 -85
- data/contrib/zstd/lib/common/fse_decompress.c +29 -111
- data/contrib/zstd/lib/common/huf.h +84 -172
- data/contrib/zstd/lib/common/mem.h +58 -49
- data/contrib/zstd/lib/common/pool.c +37 -16
- data/contrib/zstd/lib/common/pool.h +9 -3
- data/contrib/zstd/lib/common/portability_macros.h +156 -0
- data/contrib/zstd/lib/common/threading.c +68 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +7 -809
- data/contrib/zstd/lib/common/xxhash.h +5568 -167
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +64 -150
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +69 -150
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +773 -251
- data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
- data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
- data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
- data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
- data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +214 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +922 -293
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +7 -6
- data/ext/extzstd.c +13 -10
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +16 -5
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* Huffman encoder, part of New Generation Entropy library
|
|
3
|
-
* Copyright (c)
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
4
|
*
|
|
5
5
|
* You can contact the author at :
|
|
6
6
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -29,9 +29,9 @@
|
|
|
29
29
|
#include "hist.h"
|
|
30
30
|
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
|
|
31
31
|
#include "../common/fse.h" /* header compression */
|
|
32
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
33
32
|
#include "../common/huf.h"
|
|
34
33
|
#include "../common/error_private.h"
|
|
34
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
/* **************************************************************
|
|
@@ -42,24 +42,111 @@
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
/* **************************************************************
|
|
45
|
-
*
|
|
45
|
+
* Required declarations
|
|
46
46
|
****************************************************************/
|
|
47
|
-
|
|
47
|
+
typedef struct nodeElt_s {
|
|
48
|
+
U32 count;
|
|
49
|
+
U16 parent;
|
|
50
|
+
BYTE byte;
|
|
51
|
+
BYTE nbBits;
|
|
52
|
+
} nodeElt;
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
/* **************************************************************
|
|
56
|
+
* Debug Traces
|
|
57
|
+
****************************************************************/
|
|
58
|
+
|
|
59
|
+
#if DEBUGLEVEL >= 2
|
|
60
|
+
|
|
61
|
+
static size_t showU32(const U32* arr, size_t size)
|
|
62
|
+
{
|
|
63
|
+
size_t u;
|
|
64
|
+
for (u=0; u<size; u++) {
|
|
65
|
+
RAWLOG(6, " %u", arr[u]); (void)arr;
|
|
66
|
+
}
|
|
67
|
+
RAWLOG(6, " \n");
|
|
68
|
+
return size;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
static size_t HUF_getNbBits(HUF_CElt elt);
|
|
72
|
+
|
|
73
|
+
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
|
|
74
|
+
{
|
|
75
|
+
size_t u;
|
|
76
|
+
for (u=0; u<size; u++) {
|
|
77
|
+
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
|
|
78
|
+
}
|
|
79
|
+
RAWLOG(6, " \n");
|
|
80
|
+
return size;
|
|
81
|
+
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
|
|
85
|
+
{
|
|
86
|
+
size_t u;
|
|
87
|
+
for (u=0; u<size; u++) {
|
|
88
|
+
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
|
|
89
|
+
}
|
|
90
|
+
RAWLOG(6, " \n");
|
|
91
|
+
return size;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
|
|
48
95
|
{
|
|
49
|
-
|
|
96
|
+
size_t u;
|
|
97
|
+
for (u=0; u<size; u++) {
|
|
98
|
+
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
|
|
99
|
+
}
|
|
100
|
+
RAWLOG(6, " \n");
|
|
101
|
+
return size;
|
|
50
102
|
}
|
|
51
103
|
|
|
104
|
+
#endif
|
|
105
|
+
|
|
52
106
|
|
|
53
107
|
/* *******************************************************
|
|
54
108
|
* HUF : Huffman block compression
|
|
55
109
|
*********************************************************/
|
|
110
|
+
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
|
111
|
+
|
|
112
|
+
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
|
113
|
+
{
|
|
114
|
+
size_t const mask = align - 1;
|
|
115
|
+
size_t const rem = (size_t)workspace & mask;
|
|
116
|
+
size_t const add = (align - rem) & mask;
|
|
117
|
+
BYTE* const aligned = (BYTE*)workspace + add;
|
|
118
|
+
assert((align & (align - 1)) == 0); /* pow 2 */
|
|
119
|
+
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
|
120
|
+
if (*workspaceSizePtr >= add) {
|
|
121
|
+
assert(add < align);
|
|
122
|
+
assert(((size_t)aligned & mask) == 0);
|
|
123
|
+
*workspaceSizePtr -= add;
|
|
124
|
+
return aligned;
|
|
125
|
+
} else {
|
|
126
|
+
*workspaceSizePtr = 0;
|
|
127
|
+
return NULL;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
56
132
|
/* HUF_compressWeights() :
|
|
57
133
|
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
|
58
134
|
* The use case needs much less stack memory.
|
|
59
135
|
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
|
|
60
136
|
*/
|
|
61
137
|
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
|
|
62
|
-
|
|
138
|
+
|
|
139
|
+
typedef struct {
|
|
140
|
+
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
|
141
|
+
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
|
142
|
+
unsigned count[HUF_TABLELOG_MAX+1];
|
|
143
|
+
S16 norm[HUF_TABLELOG_MAX+1];
|
|
144
|
+
} HUF_CompressWeightsWksp;
|
|
145
|
+
|
|
146
|
+
static size_t
|
|
147
|
+
HUF_compressWeights(void* dst, size_t dstSize,
|
|
148
|
+
const void* weightTable, size_t wtSize,
|
|
149
|
+
void* workspace, size_t workspaceSize)
|
|
63
150
|
{
|
|
64
151
|
BYTE* const ostart = (BYTE*) dst;
|
|
65
152
|
BYTE* op = ostart;
|
|
@@ -67,33 +154,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
67
154
|
|
|
68
155
|
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
|
69
156
|
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
|
157
|
+
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
70
158
|
|
|
71
|
-
|
|
72
|
-
BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
|
73
|
-
|
|
74
|
-
unsigned count[HUF_TABLELOG_MAX+1];
|
|
75
|
-
S16 norm[HUF_TABLELOG_MAX+1];
|
|
159
|
+
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
|
76
160
|
|
|
77
161
|
/* init conditions */
|
|
78
162
|
if (wtSize <= 1) return 0; /* Not compressible */
|
|
79
163
|
|
|
80
164
|
/* Scan input and build symbol stats */
|
|
81
|
-
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
165
|
+
{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
82
166
|
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
|
|
83
167
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
|
84
168
|
}
|
|
85
169
|
|
|
86
170
|
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
|
87
|
-
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
|
171
|
+
CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
|
88
172
|
|
|
89
173
|
/* Write table description header */
|
|
90
|
-
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
|
|
174
|
+
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
|
|
91
175
|
op += hSize;
|
|
92
176
|
}
|
|
93
177
|
|
|
94
178
|
/* Compress */
|
|
95
|
-
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
|
|
96
|
-
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
|
|
179
|
+
CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
|
|
180
|
+
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
|
|
97
181
|
if (cSize == 0) return 0; /* not enough space for compressed data */
|
|
98
182
|
op += cSize;
|
|
99
183
|
}
|
|
@@ -101,30 +185,72 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
101
185
|
return (size_t)(op-ostart);
|
|
102
186
|
}
|
|
103
187
|
|
|
188
|
+
static size_t HUF_getNbBits(HUF_CElt elt)
|
|
189
|
+
{
|
|
190
|
+
return elt & 0xFF;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
|
194
|
+
{
|
|
195
|
+
return elt;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static size_t HUF_getValue(HUF_CElt elt)
|
|
199
|
+
{
|
|
200
|
+
return elt & ~(size_t)0xFF;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
static size_t HUF_getValueFast(HUF_CElt elt)
|
|
204
|
+
{
|
|
205
|
+
return elt;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
|
209
|
+
{
|
|
210
|
+
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
211
|
+
*elt = nbBits;
|
|
212
|
+
}
|
|
104
213
|
|
|
105
|
-
|
|
106
|
-
`CTable` : Huffman tree to save, using huf representation.
|
|
107
|
-
@return : size of saved CTable */
|
|
108
|
-
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
109
|
-
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
|
214
|
+
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
|
110
215
|
{
|
|
216
|
+
size_t const nbBits = HUF_getNbBits(*elt);
|
|
217
|
+
if (nbBits > 0) {
|
|
218
|
+
assert((value >> nbBits) == 0);
|
|
219
|
+
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
typedef struct {
|
|
224
|
+
HUF_CompressWeightsWksp wksp;
|
|
111
225
|
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
|
|
112
226
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
|
|
227
|
+
} HUF_WriteCTableWksp;
|
|
228
|
+
|
|
229
|
+
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
|
230
|
+
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
|
231
|
+
void* workspace, size_t workspaceSize)
|
|
232
|
+
{
|
|
233
|
+
HUF_CElt const* const ct = CTable + 1;
|
|
113
234
|
BYTE* op = (BYTE*)dst;
|
|
114
235
|
U32 n;
|
|
236
|
+
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
115
237
|
|
|
116
|
-
|
|
238
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
|
|
239
|
+
|
|
240
|
+
/* check conditions */
|
|
241
|
+
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
|
117
242
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
|
|
118
243
|
|
|
119
244
|
/* convert to weight */
|
|
120
|
-
bitsToWeight[0] = 0;
|
|
245
|
+
wksp->bitsToWeight[0] = 0;
|
|
121
246
|
for (n=1; n<huffLog+1; n++)
|
|
122
|
-
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
247
|
+
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
123
248
|
for (n=0; n<maxSymbolValue; n++)
|
|
124
|
-
huffWeight[n] = bitsToWeight[
|
|
249
|
+
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
|
125
250
|
|
|
126
251
|
/* attempt weights compression by FSE */
|
|
127
|
-
|
|
252
|
+
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
|
253
|
+
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
|
128
254
|
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
|
129
255
|
op[0] = (BYTE)hSize;
|
|
130
256
|
return hSize+1;
|
|
@@ -134,9 +260,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
|
134
260
|
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
|
|
135
261
|
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
|
|
136
262
|
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
|
|
137
|
-
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
263
|
+
wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
138
264
|
for (n=0; n<maxSymbolValue; n+=2)
|
|
139
|
-
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
|
|
265
|
+
op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
|
|
140
266
|
return ((maxSymbolValue+1)/2) + 1;
|
|
141
267
|
}
|
|
142
268
|
|
|
@@ -147,6 +273,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
147
273
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
|
148
274
|
U32 tableLog = 0;
|
|
149
275
|
U32 nbSymbols = 0;
|
|
276
|
+
HUF_CElt* const ct = CTable + 1;
|
|
150
277
|
|
|
151
278
|
/* get symbol weights */
|
|
152
279
|
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
|
@@ -156,6 +283,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
156
283
|
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
157
284
|
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
|
158
285
|
|
|
286
|
+
CTable[0] = tableLog;
|
|
287
|
+
|
|
159
288
|
/* Prepare base value per rank */
|
|
160
289
|
{ U32 n, nextRankStart = 0;
|
|
161
290
|
for (n=1; n<=tableLog; n++) {
|
|
@@ -167,13 +296,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
167
296
|
/* fill nbBits */
|
|
168
297
|
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
|
169
298
|
const U32 w = huffWeight[n];
|
|
170
|
-
|
|
299
|
+
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
|
171
300
|
} }
|
|
172
301
|
|
|
173
302
|
/* fill val */
|
|
174
303
|
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
|
175
304
|
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
|
176
|
-
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[
|
|
305
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
|
177
306
|
/* determine stating value per rank */
|
|
178
307
|
valPerRank[tableLog+1] = 0; /* for w==0 */
|
|
179
308
|
{ U16 min = 0;
|
|
@@ -183,77 +312,73 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
183
312
|
min >>= 1;
|
|
184
313
|
} }
|
|
185
314
|
/* assign value within rank, symbol order */
|
|
186
|
-
{ U32 n; for (n=0; n<nbSymbols; n++)
|
|
315
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
|
187
316
|
}
|
|
188
317
|
|
|
189
318
|
*maxSymbolValuePtr = nbSymbols - 1;
|
|
190
319
|
return readSize;
|
|
191
320
|
}
|
|
192
321
|
|
|
193
|
-
U32
|
|
322
|
+
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
|
194
323
|
{
|
|
195
|
-
const HUF_CElt*
|
|
324
|
+
const HUF_CElt* const ct = CTable + 1;
|
|
196
325
|
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
|
197
|
-
return
|
|
326
|
+
return (U32)HUF_getNbBits(ct[symbolValue]);
|
|
198
327
|
}
|
|
199
328
|
|
|
200
329
|
|
|
201
|
-
typedef struct nodeElt_s {
|
|
202
|
-
U32 count;
|
|
203
|
-
U16 parent;
|
|
204
|
-
BYTE byte;
|
|
205
|
-
BYTE nbBits;
|
|
206
|
-
} nodeElt;
|
|
207
|
-
|
|
208
330
|
/**
|
|
209
331
|
* HUF_setMaxHeight():
|
|
210
|
-
*
|
|
332
|
+
* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
|
|
211
333
|
*
|
|
212
|
-
* It
|
|
213
|
-
*
|
|
334
|
+
* It attempts to convert all nodes with nbBits > @targetNbBits
|
|
335
|
+
* to employ @targetNbBits instead. Then it adjusts the tree
|
|
336
|
+
* so that it remains a valid canonical Huffman tree.
|
|
214
337
|
*
|
|
215
338
|
* @pre The sum of the ranks of each symbol == 2^largestBits,
|
|
216
339
|
* where largestBits == huffNode[lastNonNull].nbBits.
|
|
217
340
|
* @post The sum of the ranks of each symbol == 2^largestBits,
|
|
218
|
-
* where largestBits is the return value <=
|
|
341
|
+
* where largestBits is the return value (expected <= targetNbBits).
|
|
219
342
|
*
|
|
220
|
-
* @param huffNode The Huffman tree modified in place to enforce
|
|
343
|
+
* @param huffNode The Huffman tree modified in place to enforce targetNbBits.
|
|
344
|
+
* It's presumed sorted, from most frequent to rarest symbol.
|
|
221
345
|
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
|
|
222
|
-
* @param
|
|
346
|
+
* @param targetNbBits The allowed number of bits, which the Huffman tree
|
|
223
347
|
* may not respect. After this function the Huffman tree will
|
|
224
|
-
* respect
|
|
225
|
-
* @return The maximum number of bits of the Huffman tree after adjustment
|
|
226
|
-
* necessarily no more than maxNbBits.
|
|
348
|
+
* respect targetNbBits.
|
|
349
|
+
* @return The maximum number of bits of the Huffman tree after adjustment.
|
|
227
350
|
*/
|
|
228
|
-
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32
|
|
351
|
+
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
|
|
229
352
|
{
|
|
230
353
|
const U32 largestBits = huffNode[lastNonNull].nbBits;
|
|
231
|
-
/* early exit : no elt >
|
|
232
|
-
if (largestBits <=
|
|
354
|
+
/* early exit : no elt > targetNbBits, so the tree is already valid. */
|
|
355
|
+
if (largestBits <= targetNbBits) return largestBits;
|
|
356
|
+
|
|
357
|
+
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
|
|
233
358
|
|
|
234
359
|
/* there are several too large elements (at least >= 2) */
|
|
235
360
|
{ int totalCost = 0;
|
|
236
|
-
const U32 baseCost = 1 << (largestBits -
|
|
361
|
+
const U32 baseCost = 1 << (largestBits - targetNbBits);
|
|
237
362
|
int n = (int)lastNonNull;
|
|
238
363
|
|
|
239
|
-
/* Adjust any ranks >
|
|
364
|
+
/* Adjust any ranks > targetNbBits to targetNbBits.
|
|
240
365
|
* Compute totalCost, which is how far the sum of the ranks is
|
|
241
366
|
* we are over 2^largestBits after adjust the offending ranks.
|
|
242
367
|
*/
|
|
243
|
-
while (huffNode[n].nbBits >
|
|
368
|
+
while (huffNode[n].nbBits > targetNbBits) {
|
|
244
369
|
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
|
|
245
|
-
huffNode[n].nbBits = (BYTE)
|
|
370
|
+
huffNode[n].nbBits = (BYTE)targetNbBits;
|
|
246
371
|
n--;
|
|
247
372
|
}
|
|
248
|
-
/* n stops at huffNode[n].nbBits <=
|
|
249
|
-
assert(huffNode[n].nbBits <=
|
|
250
|
-
/* n end at index of smallest symbol using <
|
|
251
|
-
while (huffNode[n].nbBits ==
|
|
373
|
+
/* n stops at huffNode[n].nbBits <= targetNbBits */
|
|
374
|
+
assert(huffNode[n].nbBits <= targetNbBits);
|
|
375
|
+
/* n end at index of smallest symbol using < targetNbBits */
|
|
376
|
+
while (huffNode[n].nbBits == targetNbBits) --n;
|
|
252
377
|
|
|
253
|
-
/* renorm totalCost from 2^largestBits to 2^
|
|
378
|
+
/* renorm totalCost from 2^largestBits to 2^targetNbBits
|
|
254
379
|
* note : totalCost is necessarily a multiple of baseCost */
|
|
255
|
-
assert((totalCost & (baseCost - 1)) == 0);
|
|
256
|
-
totalCost >>= (largestBits -
|
|
380
|
+
assert(((U32)totalCost & (baseCost - 1)) == 0);
|
|
381
|
+
totalCost >>= (largestBits - targetNbBits);
|
|
257
382
|
assert(totalCost > 0);
|
|
258
383
|
|
|
259
384
|
/* repay normalized cost */
|
|
@@ -262,19 +387,19 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
262
387
|
|
|
263
388
|
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
|
|
264
389
|
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
|
|
265
|
-
{ U32 currentNbBits =
|
|
390
|
+
{ U32 currentNbBits = targetNbBits;
|
|
266
391
|
int pos;
|
|
267
392
|
for (pos=n ; pos >= 0; pos--) {
|
|
268
393
|
if (huffNode[pos].nbBits >= currentNbBits) continue;
|
|
269
|
-
currentNbBits = huffNode[pos].nbBits; /* <
|
|
270
|
-
rankLast[
|
|
394
|
+
currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
|
|
395
|
+
rankLast[targetNbBits-currentNbBits] = (U32)pos;
|
|
271
396
|
} }
|
|
272
397
|
|
|
273
398
|
while (totalCost > 0) {
|
|
274
399
|
/* Try to reduce the next power of 2 above totalCost because we
|
|
275
400
|
* gain back half the rank.
|
|
276
401
|
*/
|
|
277
|
-
U32 nBitsToDecrease =
|
|
402
|
+
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
|
|
278
403
|
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
|
|
279
404
|
U32 const highPos = rankLast[nBitsToDecrease];
|
|
280
405
|
U32 const lowPos = rankLast[nBitsToDecrease-1];
|
|
@@ -314,7 +439,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
314
439
|
rankLast[nBitsToDecrease] = noSymbol;
|
|
315
440
|
else {
|
|
316
441
|
rankLast[nBitsToDecrease]--;
|
|
317
|
-
if (huffNode[rankLast[nBitsToDecrease]].nbBits !=
|
|
442
|
+
if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
|
|
318
443
|
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
|
|
319
444
|
}
|
|
320
445
|
} /* while (totalCost > 0) */
|
|
@@ -326,11 +451,11 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
326
451
|
* TODO.
|
|
327
452
|
*/
|
|
328
453
|
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
|
|
329
|
-
/* special case : no rank 1 symbol (using
|
|
330
|
-
* let's create one from largest rank 0 (using
|
|
454
|
+
/* special case : no rank 1 symbol (using targetNbBits-1);
|
|
455
|
+
* let's create one from largest rank 0 (using targetNbBits).
|
|
331
456
|
*/
|
|
332
457
|
if (rankLast[1] == noSymbol) {
|
|
333
|
-
while (huffNode[n].nbBits ==
|
|
458
|
+
while (huffNode[n].nbBits == targetNbBits) n--;
|
|
334
459
|
huffNode[n+1].nbBits--;
|
|
335
460
|
assert(n >= 0);
|
|
336
461
|
rankLast[1] = (U32)(n+1);
|
|
@@ -344,26 +469,122 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
344
469
|
} /* repay normalized cost */
|
|
345
470
|
} /* there are several too large elements (at least >= 2) */
|
|
346
471
|
|
|
347
|
-
return
|
|
472
|
+
return targetNbBits;
|
|
348
473
|
}
|
|
349
474
|
|
|
350
475
|
typedef struct {
|
|
351
|
-
|
|
352
|
-
|
|
476
|
+
U16 base;
|
|
477
|
+
U16 curr;
|
|
353
478
|
} rankPos;
|
|
354
479
|
|
|
355
|
-
typedef nodeElt huffNodeTable[
|
|
480
|
+
typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
|
|
356
481
|
|
|
357
|
-
|
|
482
|
+
/* Number of buckets available for HUF_sort() */
|
|
483
|
+
#define RANK_POSITION_TABLE_SIZE 192
|
|
358
484
|
|
|
359
485
|
typedef struct {
|
|
360
486
|
huffNodeTable huffNodeTbl;
|
|
361
487
|
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
|
362
488
|
} HUF_buildCTable_wksp_tables;
|
|
363
489
|
|
|
490
|
+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
|
491
|
+
* Strategy is to use as many buckets as possible for representing distinct
|
|
492
|
+
* counts while using the remainder to represent all "large" counts.
|
|
493
|
+
*
|
|
494
|
+
* To satisfy this requirement for 192 buckets, we can do the following:
|
|
495
|
+
* Let buckets 0-166 represent distinct counts of [0, 166]
|
|
496
|
+
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
|
497
|
+
*/
|
|
498
|
+
#define RANK_POSITION_MAX_COUNT_LOG 32
|
|
499
|
+
#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
|
|
500
|
+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
|
|
501
|
+
|
|
502
|
+
/* Return the appropriate bucket index for a given count. See definition of
|
|
503
|
+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
|
504
|
+
*/
|
|
505
|
+
static U32 HUF_getIndex(U32 const count) {
|
|
506
|
+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
|
507
|
+
? count
|
|
508
|
+
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/* Helper swap function for HUF_quickSortPartition() */
|
|
512
|
+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
|
513
|
+
nodeElt tmp = *a;
|
|
514
|
+
*a = *b;
|
|
515
|
+
*b = tmp;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
/* Returns 0 if the huffNode array is not sorted by descending count */
|
|
519
|
+
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
|
520
|
+
U32 i;
|
|
521
|
+
for (i = 1; i < maxSymbolValue1; ++i) {
|
|
522
|
+
if (huffNode[i].count > huffNode[i-1].count) {
|
|
523
|
+
return 0;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
return 1;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/* Insertion sort by descending order */
|
|
530
|
+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
|
531
|
+
int i;
|
|
532
|
+
int const size = high-low+1;
|
|
533
|
+
huffNode += low;
|
|
534
|
+
for (i = 1; i < size; ++i) {
|
|
535
|
+
nodeElt const key = huffNode[i];
|
|
536
|
+
int j = i - 1;
|
|
537
|
+
while (j >= 0 && huffNode[j].count < key.count) {
|
|
538
|
+
huffNode[j + 1] = huffNode[j];
|
|
539
|
+
j--;
|
|
540
|
+
}
|
|
541
|
+
huffNode[j + 1] = key;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/* Pivot helper function for quicksort. */
|
|
546
|
+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
|
547
|
+
/* Simply select rightmost element as pivot. "Better" selectors like
|
|
548
|
+
* median-of-three don't experimentally appear to have any benefit.
|
|
549
|
+
*/
|
|
550
|
+
U32 const pivot = arr[high].count;
|
|
551
|
+
int i = low - 1;
|
|
552
|
+
int j = low;
|
|
553
|
+
for ( ; j < high; j++) {
|
|
554
|
+
if (arr[j].count > pivot) {
|
|
555
|
+
i++;
|
|
556
|
+
HUF_swapNodes(&arr[i], &arr[j]);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
|
560
|
+
return i + 1;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/* Classic quicksort by descending with partially iterative calls
|
|
564
|
+
* to reduce worst case callstack size.
|
|
565
|
+
*/
|
|
566
|
+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
|
567
|
+
int const kInsertionSortThreshold = 8;
|
|
568
|
+
if (high - low < kInsertionSortThreshold) {
|
|
569
|
+
HUF_insertionSort(arr, low, high);
|
|
570
|
+
return;
|
|
571
|
+
}
|
|
572
|
+
while (low < high) {
|
|
573
|
+
int const idx = HUF_quickSortPartition(arr, low, high);
|
|
574
|
+
if (idx - low < high - idx) {
|
|
575
|
+
HUF_simpleQuickSort(arr, low, idx - 1);
|
|
576
|
+
low = idx + 1;
|
|
577
|
+
} else {
|
|
578
|
+
HUF_simpleQuickSort(arr, idx + 1, high);
|
|
579
|
+
high = idx - 1;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
364
584
|
/**
|
|
365
585
|
* HUF_sort():
|
|
366
586
|
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
|
587
|
+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
|
367
588
|
*
|
|
368
589
|
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
|
369
590
|
* Must have (maxSymbolValue + 1) entries.
|
|
@@ -371,42 +592,51 @@ typedef struct {
|
|
|
371
592
|
* @param[in] maxSymbolValue Maximum symbol value.
|
|
372
593
|
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
|
373
594
|
*/
|
|
374
|
-
static void HUF_sort(nodeElt
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
|
|
595
|
+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
|
596
|
+
U32 n;
|
|
597
|
+
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
|
378
598
|
|
|
379
599
|
/* Compute base and set curr to base.
|
|
380
|
-
* For symbol s let lowerRank =
|
|
381
|
-
*
|
|
600
|
+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
|
601
|
+
* See HUF_getIndex to see bucketing strategy.
|
|
382
602
|
* We attribute each symbol to lowerRank's base value, because we want to know where
|
|
383
603
|
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
|
384
604
|
*/
|
|
385
605
|
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
|
386
606
|
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
387
|
-
U32 lowerRank =
|
|
607
|
+
U32 lowerRank = HUF_getIndex(count[n]);
|
|
608
|
+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
|
388
609
|
rankPosition[lowerRank].base++;
|
|
389
610
|
}
|
|
611
|
+
|
|
390
612
|
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
|
613
|
+
/* Set up the rankPosition table */
|
|
391
614
|
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
|
392
615
|
rankPosition[n-1].base += rankPosition[n].base;
|
|
393
616
|
rankPosition[n-1].curr = rankPosition[n-1].base;
|
|
394
617
|
}
|
|
395
|
-
|
|
618
|
+
|
|
619
|
+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
|
396
620
|
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
397
621
|
U32 const c = count[n];
|
|
398
|
-
U32 const r =
|
|
399
|
-
U32 pos = rankPosition[r].curr++;
|
|
400
|
-
|
|
401
|
-
* We have at most 256 symbols, so this insertion should be fine.
|
|
402
|
-
*/
|
|
403
|
-
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
|
|
404
|
-
huffNode[pos] = huffNode[pos-1];
|
|
405
|
-
pos--;
|
|
406
|
-
}
|
|
622
|
+
U32 const r = HUF_getIndex(c) + 1;
|
|
623
|
+
U32 const pos = rankPosition[r].curr++;
|
|
624
|
+
assert(pos < maxSymbolValue1);
|
|
407
625
|
huffNode[pos].count = c;
|
|
408
626
|
huffNode[pos].byte = (BYTE)n;
|
|
409
627
|
}
|
|
628
|
+
|
|
629
|
+
/* Sort each bucket. */
|
|
630
|
+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
|
631
|
+
int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
|
|
632
|
+
U32 const bucketStartIdx = rankPosition[n].base;
|
|
633
|
+
if (bucketSize > 1) {
|
|
634
|
+
assert(bucketStartIdx < maxSymbolValue1);
|
|
635
|
+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
|
410
640
|
}
|
|
411
641
|
|
|
412
642
|
|
|
@@ -430,6 +660,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
|
430
660
|
int lowS, lowN;
|
|
431
661
|
int nodeNb = STARTNODE;
|
|
432
662
|
int n, nodeRoot;
|
|
663
|
+
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
|
|
433
664
|
/* init for parents */
|
|
434
665
|
nonNullRank = (int)maxSymbolValue;
|
|
435
666
|
while(huffNode[nonNullRank].count == 0) nonNullRank--;
|
|
@@ -456,6 +687,8 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
|
456
687
|
for (n=0; n<=nonNullRank; n++)
|
|
457
688
|
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
|
|
458
689
|
|
|
690
|
+
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
|
|
691
|
+
|
|
459
692
|
return nonNullRank;
|
|
460
693
|
}
|
|
461
694
|
|
|
@@ -471,6 +704,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
|
471
704
|
*/
|
|
472
705
|
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
|
473
706
|
{
|
|
707
|
+
HUF_CElt* const ct = CTable + 1;
|
|
474
708
|
/* fill result into ctable (val, nbBits) */
|
|
475
709
|
int n;
|
|
476
710
|
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
@@ -486,127 +720,373 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
|
|
|
486
720
|
min >>= 1;
|
|
487
721
|
} }
|
|
488
722
|
for (n=0; n<alphabetSize; n++)
|
|
489
|
-
|
|
723
|
+
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
|
490
724
|
for (n=0; n<alphabetSize; n++)
|
|
491
|
-
|
|
725
|
+
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
|
726
|
+
CTable[0] = maxNbBits;
|
|
492
727
|
}
|
|
493
728
|
|
|
494
|
-
size_t
|
|
729
|
+
size_t
|
|
730
|
+
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
|
731
|
+
void* workSpace, size_t wkspSize)
|
|
495
732
|
{
|
|
496
|
-
HUF_buildCTable_wksp_tables* const wksp_tables =
|
|
733
|
+
HUF_buildCTable_wksp_tables* const wksp_tables =
|
|
734
|
+
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
|
497
735
|
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
|
498
736
|
nodeElt* const huffNode = huffNode0+1;
|
|
499
737
|
int nonNullRank;
|
|
500
738
|
|
|
739
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
|
|
740
|
+
|
|
741
|
+
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
|
|
742
|
+
|
|
501
743
|
/* safety checks */
|
|
502
|
-
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
|
503
744
|
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
|
504
|
-
|
|
745
|
+
return ERROR(workSpace_tooSmall);
|
|
505
746
|
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
|
506
747
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
|
507
|
-
|
|
748
|
+
return ERROR(maxSymbolValue_tooLarge);
|
|
508
749
|
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
|
|
509
750
|
|
|
510
751
|
/* sort, decreasing order */
|
|
511
752
|
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
|
753
|
+
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
|
|
512
754
|
|
|
513
755
|
/* build tree */
|
|
514
756
|
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
|
|
515
757
|
|
|
516
|
-
/* enforce maxTableLog */
|
|
758
|
+
/* determine and enforce maxTableLog */
|
|
517
759
|
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
|
518
760
|
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
|
519
761
|
|
|
520
|
-
HUF_buildCTableFromTree(
|
|
762
|
+
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
|
521
763
|
|
|
522
764
|
return maxNbBits;
|
|
523
765
|
}
|
|
524
766
|
|
|
525
767
|
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
|
526
768
|
{
|
|
769
|
+
HUF_CElt const* ct = CTable + 1;
|
|
527
770
|
size_t nbBits = 0;
|
|
528
771
|
int s;
|
|
529
772
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
530
|
-
nbBits +=
|
|
773
|
+
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
|
531
774
|
}
|
|
532
775
|
return nbBits >> 3;
|
|
533
776
|
}
|
|
534
777
|
|
|
535
778
|
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
|
779
|
+
HUF_CElt const* ct = CTable + 1;
|
|
536
780
|
int bad = 0;
|
|
537
781
|
int s;
|
|
538
782
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
539
|
-
bad |= (count[s] != 0) & (
|
|
783
|
+
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
|
540
784
|
}
|
|
541
785
|
return !bad;
|
|
542
786
|
}
|
|
543
787
|
|
|
544
788
|
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
|
545
789
|
|
|
790
|
+
/** HUF_CStream_t:
|
|
791
|
+
* Huffman uses its own BIT_CStream_t implementation.
|
|
792
|
+
* There are three major differences from BIT_CStream_t:
|
|
793
|
+
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
|
794
|
+
* the pair (nbBits, value) in the format:
|
|
795
|
+
* format:
|
|
796
|
+
* - Bits [0, 4) = nbBits
|
|
797
|
+
* - Bits [4, 64 - nbBits) = 0
|
|
798
|
+
* - Bits [64 - nbBits, 64) = value
|
|
799
|
+
* 2. The bitContainer is built from the upper bits and
|
|
800
|
+
* right shifted. E.g. to add a new value of N bits
|
|
801
|
+
* you right shift the bitContainer by N, then or in
|
|
802
|
+
* the new value into the N upper bits.
|
|
803
|
+
* 3. The bitstream has two bit containers. You can add
|
|
804
|
+
* bits to the second container and merge them into
|
|
805
|
+
* the first container.
|
|
806
|
+
*/
|
|
807
|
+
|
|
808
|
+
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
|
809
|
+
|
|
810
|
+
typedef struct {
|
|
811
|
+
size_t bitContainer[2];
|
|
812
|
+
size_t bitPos[2];
|
|
813
|
+
|
|
814
|
+
BYTE* startPtr;
|
|
815
|
+
BYTE* ptr;
|
|
816
|
+
BYTE* endPtr;
|
|
817
|
+
} HUF_CStream_t;
|
|
818
|
+
|
|
819
|
+
/**! HUF_initCStream():
|
|
820
|
+
* Initializes the bitstream.
|
|
821
|
+
* @returns 0 or an error code.
|
|
822
|
+
*/
|
|
823
|
+
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
|
824
|
+
void* startPtr, size_t dstCapacity)
|
|
825
|
+
{
|
|
826
|
+
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
|
827
|
+
bitC->startPtr = (BYTE*)startPtr;
|
|
828
|
+
bitC->ptr = bitC->startPtr;
|
|
829
|
+
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
|
830
|
+
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
|
831
|
+
return 0;
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/*! HUF_addBits():
|
|
835
|
+
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
|
836
|
+
*
|
|
837
|
+
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
|
838
|
+
* See the HUF_CStream_t docs for the format.
|
|
839
|
+
* @param idx Insert into the bitstream at this idx.
|
|
840
|
+
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
|
841
|
+
* to have at least 4 unused bits after this call it may be 1,
|
|
842
|
+
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
|
843
|
+
*/
|
|
844
|
+
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
|
845
|
+
{
|
|
846
|
+
assert(idx <= 1);
|
|
847
|
+
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
848
|
+
/* This is efficient on x86-64 with BMI2 because shrx
|
|
849
|
+
* only reads the low 6 bits of the register. The compiler
|
|
850
|
+
* knows this and elides the mask. When fast is set,
|
|
851
|
+
* every operation can use the same value loaded from elt.
|
|
852
|
+
*/
|
|
853
|
+
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
|
854
|
+
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
|
855
|
+
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
|
856
|
+
* doesn't matter that the high bits have noise from the value.
|
|
857
|
+
*/
|
|
858
|
+
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
|
859
|
+
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
860
|
+
/* The last 4-bits of elt are dirty if fast is set,
|
|
861
|
+
* so we must not be overwriting bits that have already been
|
|
862
|
+
* inserted into the bit container.
|
|
863
|
+
*/
|
|
864
|
+
#if DEBUGLEVEL >= 1
|
|
865
|
+
{
|
|
866
|
+
size_t const nbBits = HUF_getNbBits(elt);
|
|
867
|
+
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
|
|
868
|
+
(void)dirtyBits;
|
|
869
|
+
/* Middle bits are 0. */
|
|
870
|
+
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
|
871
|
+
/* We didn't overwrite any bits in the bit container. */
|
|
872
|
+
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
873
|
+
(void)dirtyBits;
|
|
874
|
+
}
|
|
875
|
+
#endif
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
|
879
|
+
{
|
|
880
|
+
bitC->bitContainer[1] = 0;
|
|
881
|
+
bitC->bitPos[1] = 0;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
/*! HUF_mergeIndex1() :
|
|
885
|
+
* Merges the bit container @ index 1 into the bit container @ index 0
|
|
886
|
+
* and zeros the bit container @ index 1.
|
|
887
|
+
*/
|
|
888
|
+
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
|
889
|
+
{
|
|
890
|
+
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
|
891
|
+
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
|
892
|
+
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
|
893
|
+
bitC->bitPos[0] += bitC->bitPos[1];
|
|
894
|
+
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
/*! HUF_flushBits() :
|
|
898
|
+
* Flushes the bits in the bit container @ index 0.
|
|
899
|
+
*
|
|
900
|
+
* @post bitPos will be < 8.
|
|
901
|
+
* @param kFast If kFast is set then we must know a-priori that
|
|
902
|
+
* the bit container will not overflow.
|
|
903
|
+
*/
|
|
904
|
+
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
|
905
|
+
{
|
|
906
|
+
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
|
907
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
908
|
+
size_t const nbBytes = nbBits >> 3;
|
|
909
|
+
/* The top nbBits bits of bitContainer are the ones we need. */
|
|
910
|
+
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
|
911
|
+
/* Mask bitPos to account for the bytes we consumed. */
|
|
912
|
+
bitC->bitPos[0] &= 7;
|
|
913
|
+
assert(nbBits > 0);
|
|
914
|
+
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
|
915
|
+
assert(bitC->ptr <= bitC->endPtr);
|
|
916
|
+
MEM_writeLEST(bitC->ptr, bitContainer);
|
|
917
|
+
bitC->ptr += nbBytes;
|
|
918
|
+
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
|
919
|
+
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
|
920
|
+
/* bitContainer doesn't need to be modified because the leftover
|
|
921
|
+
* bits are already the top bitPos bits. And we don't care about
|
|
922
|
+
* noise in the lower values.
|
|
923
|
+
*/
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
/*! HUF_endMark()
|
|
927
|
+
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
|
928
|
+
*/
|
|
929
|
+
static HUF_CElt HUF_endMark(void)
|
|
930
|
+
{
|
|
931
|
+
HUF_CElt endMark;
|
|
932
|
+
HUF_setNbBits(&endMark, 1);
|
|
933
|
+
HUF_setValue(&endMark, 1);
|
|
934
|
+
return endMark;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
/*! HUF_closeCStream() :
|
|
938
|
+
* @return Size of CStream, in bytes,
|
|
939
|
+
* or 0 if it could not fit into dstBuffer */
|
|
940
|
+
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
|
941
|
+
{
|
|
942
|
+
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
|
943
|
+
HUF_flushBits(bitC, /* kFast */ 0);
|
|
944
|
+
{
|
|
945
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
946
|
+
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
|
947
|
+
return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
|
|
546
951
|
FORCE_INLINE_TEMPLATE void
|
|
547
|
-
HUF_encodeSymbol(
|
|
952
|
+
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
|
548
953
|
{
|
|
549
|
-
|
|
954
|
+
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
|
550
955
|
}
|
|
551
956
|
|
|
552
|
-
|
|
957
|
+
FORCE_INLINE_TEMPLATE void
|
|
958
|
+
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
|
959
|
+
const BYTE* ip, size_t srcSize,
|
|
960
|
+
const HUF_CElt* ct,
|
|
961
|
+
int kUnroll, int kFastFlush, int kLastFast)
|
|
962
|
+
{
|
|
963
|
+
/* Join to kUnroll */
|
|
964
|
+
int n = (int)srcSize;
|
|
965
|
+
int rem = n % kUnroll;
|
|
966
|
+
if (rem > 0) {
|
|
967
|
+
for (; rem > 0; --rem) {
|
|
968
|
+
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
|
969
|
+
}
|
|
970
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
971
|
+
}
|
|
972
|
+
assert(n % kUnroll == 0);
|
|
553
973
|
|
|
554
|
-
|
|
555
|
-
if (
|
|
974
|
+
/* Join to 2 * kUnroll */
|
|
975
|
+
if (n % (2 * kUnroll)) {
|
|
976
|
+
int u;
|
|
977
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
978
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
|
979
|
+
}
|
|
980
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
|
981
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
982
|
+
n -= kUnroll;
|
|
983
|
+
}
|
|
984
|
+
assert(n % (2 * kUnroll) == 0);
|
|
985
|
+
|
|
986
|
+
for (; n>0; n-= 2 * kUnroll) {
|
|
987
|
+
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
|
988
|
+
int u;
|
|
989
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
990
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
|
991
|
+
}
|
|
992
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
|
993
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
994
|
+
/* Encode kUnroll symbols into the bitstream @ index 1.
|
|
995
|
+
* This allows us to start filling the bit container
|
|
996
|
+
* without any data dependencies.
|
|
997
|
+
*/
|
|
998
|
+
HUF_zeroIndex1(bitC);
|
|
999
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
1000
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
|
1001
|
+
}
|
|
1002
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
|
1003
|
+
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
|
1004
|
+
HUF_mergeIndex1(bitC);
|
|
1005
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
1006
|
+
}
|
|
1007
|
+
assert(n == 0);
|
|
1008
|
+
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
/**
|
|
1012
|
+
* Returns a tight upper bound on the output space needed by Huffman
|
|
1013
|
+
* with 8 bytes buffer to handle over-writes. If the output is at least
|
|
1014
|
+
* this large we don't need to do bounds checks during Huffman encoding.
|
|
1015
|
+
*/
|
|
1016
|
+
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
|
1017
|
+
{
|
|
1018
|
+
return ((srcSize * tableLog) >> 3) + 8;
|
|
1019
|
+
}
|
|
556
1020
|
|
|
557
|
-
#define HUF_FLUSHBITS_2(stream) \
|
|
558
|
-
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
|
559
1021
|
|
|
560
1022
|
FORCE_INLINE_TEMPLATE size_t
|
|
561
1023
|
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
|
562
1024
|
const void* src, size_t srcSize,
|
|
563
1025
|
const HUF_CElt* CTable)
|
|
564
1026
|
{
|
|
1027
|
+
U32 const tableLog = (U32)CTable[0];
|
|
1028
|
+
HUF_CElt const* ct = CTable + 1;
|
|
565
1029
|
const BYTE* ip = (const BYTE*) src;
|
|
566
1030
|
BYTE* const ostart = (BYTE*)dst;
|
|
567
1031
|
BYTE* const oend = ostart + dstSize;
|
|
568
1032
|
BYTE* op = ostart;
|
|
569
|
-
|
|
570
|
-
BIT_CStream_t bitC;
|
|
1033
|
+
HUF_CStream_t bitC;
|
|
571
1034
|
|
|
572
1035
|
/* init */
|
|
573
1036
|
if (dstSize < 8) return 0; /* not enough space to compress */
|
|
574
|
-
{ size_t const initErr =
|
|
1037
|
+
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
|
575
1038
|
if (HUF_isError(initErr)) return 0; }
|
|
576
1039
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
{
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
1040
|
+
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
|
1041
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
|
1042
|
+
else {
|
|
1043
|
+
if (MEM_32bits()) {
|
|
1044
|
+
switch (tableLog) {
|
|
1045
|
+
case 11:
|
|
1046
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1047
|
+
break;
|
|
1048
|
+
case 10: ZSTD_FALLTHROUGH;
|
|
1049
|
+
case 9: ZSTD_FALLTHROUGH;
|
|
1050
|
+
case 8:
|
|
1051
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1052
|
+
break;
|
|
1053
|
+
case 7: ZSTD_FALLTHROUGH;
|
|
1054
|
+
default:
|
|
1055
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1056
|
+
break;
|
|
1057
|
+
}
|
|
1058
|
+
} else {
|
|
1059
|
+
switch (tableLog) {
|
|
1060
|
+
case 11:
|
|
1061
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1062
|
+
break;
|
|
1063
|
+
case 10:
|
|
1064
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1065
|
+
break;
|
|
1066
|
+
case 9:
|
|
1067
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1068
|
+
break;
|
|
1069
|
+
case 8:
|
|
1070
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1071
|
+
break;
|
|
1072
|
+
case 7:
|
|
1073
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1074
|
+
break;
|
|
1075
|
+
case 6: ZSTD_FALLTHROUGH;
|
|
1076
|
+
default:
|
|
1077
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1078
|
+
break;
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
602
1081
|
}
|
|
1082
|
+
assert(bitC.ptr <= bitC.endPtr);
|
|
603
1083
|
|
|
604
|
-
return
|
|
1084
|
+
return HUF_closeCStream(&bitC);
|
|
605
1085
|
}
|
|
606
1086
|
|
|
607
1087
|
#if DYNAMIC_BMI2
|
|
608
1088
|
|
|
609
|
-
static
|
|
1089
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
610
1090
|
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
|
611
1091
|
const void* src, size_t srcSize,
|
|
612
1092
|
const HUF_CElt* CTable)
|
|
@@ -625,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
|
|
|
625
1105
|
static size_t
|
|
626
1106
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
627
1107
|
const void* src, size_t srcSize,
|
|
628
|
-
const HUF_CElt* CTable, const int
|
|
1108
|
+
const HUF_CElt* CTable, const int flags)
|
|
629
1109
|
{
|
|
630
|
-
if (
|
|
1110
|
+
if (flags & HUF_flags_bmi2) {
|
|
631
1111
|
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
|
|
632
1112
|
}
|
|
633
1113
|
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
|
|
@@ -638,24 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
638
1118
|
static size_t
|
|
639
1119
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
640
1120
|
const void* src, size_t srcSize,
|
|
641
|
-
const HUF_CElt* CTable, const int
|
|
1121
|
+
const HUF_CElt* CTable, const int flags)
|
|
642
1122
|
{
|
|
643
|
-
(void)
|
|
1123
|
+
(void)flags;
|
|
644
1124
|
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
|
|
645
1125
|
}
|
|
646
1126
|
|
|
647
1127
|
#endif
|
|
648
1128
|
|
|
649
|
-
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
1129
|
+
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
|
650
1130
|
{
|
|
651
|
-
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
|
1131
|
+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
|
652
1132
|
}
|
|
653
1133
|
|
|
654
|
-
|
|
655
1134
|
static size_t
|
|
656
1135
|
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
657
1136
|
const void* src, size_t srcSize,
|
|
658
|
-
const HUF_CElt* CTable, int
|
|
1137
|
+
const HUF_CElt* CTable, int flags)
|
|
659
1138
|
{
|
|
660
1139
|
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
|
|
661
1140
|
const BYTE* ip = (const BYTE*) src;
|
|
@@ -669,27 +1148,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
669
1148
|
op += 6; /* jumpTable */
|
|
670
1149
|
|
|
671
1150
|
assert(op <= oend);
|
|
672
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
673
|
-
if (cSize==0) return 0;
|
|
674
|
-
assert(cSize <= 65535);
|
|
1151
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1152
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
675
1153
|
MEM_writeLE16(ostart, (U16)cSize);
|
|
676
1154
|
op += cSize;
|
|
677
1155
|
}
|
|
678
1156
|
|
|
679
1157
|
ip += segmentSize;
|
|
680
1158
|
assert(op <= oend);
|
|
681
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
682
|
-
if (cSize==0) return 0;
|
|
683
|
-
assert(cSize <= 65535);
|
|
1159
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1160
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
684
1161
|
MEM_writeLE16(ostart+2, (U16)cSize);
|
|
685
1162
|
op += cSize;
|
|
686
1163
|
}
|
|
687
1164
|
|
|
688
1165
|
ip += segmentSize;
|
|
689
1166
|
assert(op <= oend);
|
|
690
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
691
|
-
if (cSize==0) return 0;
|
|
692
|
-
assert(cSize <= 65535);
|
|
1167
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1168
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
693
1169
|
MEM_writeLE16(ostart+4, (U16)cSize);
|
|
694
1170
|
op += cSize;
|
|
695
1171
|
}
|
|
@@ -697,17 +1173,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
697
1173
|
ip += segmentSize;
|
|
698
1174
|
assert(op <= oend);
|
|
699
1175
|
assert(ip <= iend);
|
|
700
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable,
|
|
701
|
-
if (cSize==0) return 0;
|
|
1176
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
|
|
1177
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
702
1178
|
op += cSize;
|
|
703
1179
|
}
|
|
704
1180
|
|
|
705
1181
|
return (size_t)(op-ostart);
|
|
706
1182
|
}
|
|
707
1183
|
|
|
708
|
-
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
1184
|
+
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
|
709
1185
|
{
|
|
710
|
-
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
|
1186
|
+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
|
711
1187
|
}
|
|
712
1188
|
|
|
713
1189
|
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
@@ -715,11 +1191,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
|
715
1191
|
static size_t HUF_compressCTable_internal(
|
|
716
1192
|
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
|
717
1193
|
const void* src, size_t srcSize,
|
|
718
|
-
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int
|
|
1194
|
+
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
|
|
719
1195
|
{
|
|
720
1196
|
size_t const cSize = (nbStreams==HUF_singleStream) ?
|
|
721
|
-
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
|
722
|
-
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
|
1197
|
+
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
|
|
1198
|
+
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
|
|
723
1199
|
if (HUF_isError(cSize)) { return cSize; }
|
|
724
1200
|
if (cSize==0) { return 0; } /* uncompressible */
|
|
725
1201
|
op += cSize;
|
|
@@ -731,31 +1207,111 @@ static size_t HUF_compressCTable_internal(
|
|
|
731
1207
|
|
|
732
1208
|
typedef struct {
|
|
733
1209
|
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
|
734
|
-
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX
|
|
735
|
-
|
|
1210
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
|
1211
|
+
union {
|
|
1212
|
+
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
|
1213
|
+
HUF_WriteCTableWksp writeCTable_wksp;
|
|
1214
|
+
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
|
1215
|
+
} wksps;
|
|
736
1216
|
} HUF_compress_tables_t;
|
|
737
1217
|
|
|
1218
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
|
1219
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
|
1220
|
+
|
|
1221
|
+
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
|
|
1222
|
+
{
|
|
1223
|
+
unsigned cardinality = 0;
|
|
1224
|
+
unsigned i;
|
|
1225
|
+
|
|
1226
|
+
for (i = 0; i < maxSymbolValue + 1; i++) {
|
|
1227
|
+
if (count[i] != 0) cardinality += 1;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
return cardinality;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
unsigned HUF_minTableLog(unsigned symbolCardinality)
|
|
1234
|
+
{
|
|
1235
|
+
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
|
|
1236
|
+
return minBitsSymbols;
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
unsigned HUF_optimalTableLog(
|
|
1240
|
+
unsigned maxTableLog,
|
|
1241
|
+
size_t srcSize,
|
|
1242
|
+
unsigned maxSymbolValue,
|
|
1243
|
+
void* workSpace, size_t wkspSize,
|
|
1244
|
+
HUF_CElt* table,
|
|
1245
|
+
const unsigned* count,
|
|
1246
|
+
int flags)
|
|
1247
|
+
{
|
|
1248
|
+
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
|
1249
|
+
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
|
|
1250
|
+
|
|
1251
|
+
if (!(flags & HUF_flags_optimalDepth)) {
|
|
1252
|
+
/* cheap evaluation, based on FSE */
|
|
1253
|
+
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
|
|
1257
|
+
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
|
|
1258
|
+
size_t maxBits, hSize, newSize;
|
|
1259
|
+
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
|
|
1260
|
+
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
|
|
1261
|
+
size_t optSize = ((size_t) ~0) - 1;
|
|
1262
|
+
unsigned optLog = maxTableLog, optLogGuess;
|
|
1263
|
+
|
|
1264
|
+
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
|
|
1265
|
+
|
|
1266
|
+
/* Search until size increases */
|
|
1267
|
+
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
|
|
1268
|
+
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
|
|
1269
|
+
maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
|
|
1270
|
+
if (ERR_isError(maxBits)) continue;
|
|
1271
|
+
|
|
1272
|
+
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
|
|
1273
|
+
|
|
1274
|
+
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
|
|
1275
|
+
|
|
1276
|
+
if (ERR_isError(hSize)) continue;
|
|
1277
|
+
|
|
1278
|
+
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
|
|
1279
|
+
|
|
1280
|
+
if (newSize > optSize + 1) {
|
|
1281
|
+
break;
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
if (newSize < optSize) {
|
|
1285
|
+
optSize = newSize;
|
|
1286
|
+
optLog = optLogGuess;
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
assert(optLog <= HUF_TABLELOG_MAX);
|
|
1290
|
+
return optLog;
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
|
|
738
1294
|
/* HUF_compress_internal() :
|
|
739
|
-
* `
|
|
1295
|
+
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
|
1296
|
+
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
|
740
1297
|
static size_t
|
|
741
1298
|
HUF_compress_internal (void* dst, size_t dstSize,
|
|
742
1299
|
const void* src, size_t srcSize,
|
|
743
1300
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
744
1301
|
HUF_nbStreams_e nbStreams,
|
|
745
1302
|
void* workSpace, size_t wkspSize,
|
|
746
|
-
HUF_CElt* oldHufTable, HUF_repeat* repeat, int
|
|
747
|
-
const int bmi2)
|
|
1303
|
+
HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
|
|
748
1304
|
{
|
|
749
|
-
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
|
|
1305
|
+
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
|
750
1306
|
BYTE* const ostart = (BYTE*)dst;
|
|
751
1307
|
BYTE* const oend = ostart + dstSize;
|
|
752
1308
|
BYTE* op = ostart;
|
|
753
1309
|
|
|
754
|
-
|
|
1310
|
+
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
|
|
1311
|
+
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
|
755
1312
|
|
|
756
1313
|
/* checks & inits */
|
|
757
|
-
if (
|
|
758
|
-
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
|
1314
|
+
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
|
759
1315
|
if (!srcSize) return 0; /* Uncompressed */
|
|
760
1316
|
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
|
761
1317
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
|
@@ -765,17 +1321,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
765
1321
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
|
766
1322
|
|
|
767
1323
|
/* Heuristic : If old table is valid, use it for small inputs */
|
|
768
|
-
if (
|
|
1324
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
|
|
769
1325
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
770
1326
|
src, srcSize,
|
|
771
|
-
nbStreams, oldHufTable,
|
|
1327
|
+
nbStreams, oldHufTable, flags);
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
/* If uncompressible data is suspected, do a smaller sampling first */
|
|
1331
|
+
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
|
1332
|
+
if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
|
1333
|
+
size_t largestTotal = 0;
|
|
1334
|
+
DEBUGLOG(5, "input suspected incompressible : sampling to check");
|
|
1335
|
+
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
|
1336
|
+
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1337
|
+
largestTotal += largestBegin;
|
|
1338
|
+
}
|
|
1339
|
+
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
|
1340
|
+
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1341
|
+
largestTotal += largestEnd;
|
|
1342
|
+
}
|
|
1343
|
+
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
772
1344
|
}
|
|
773
1345
|
|
|
774
1346
|
/* Scan input and build symbol stats */
|
|
775
|
-
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize,
|
|
1347
|
+
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
|
776
1348
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
|
777
1349
|
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
778
1350
|
}
|
|
1351
|
+
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
|
|
779
1352
|
|
|
780
1353
|
/* Check validity of previous table */
|
|
781
1354
|
if ( repeat
|
|
@@ -784,26 +1357,31 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
784
1357
|
*repeat = HUF_repeat_none;
|
|
785
1358
|
}
|
|
786
1359
|
/* Heuristic : use existing table for small inputs */
|
|
787
|
-
if (
|
|
1360
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
|
|
788
1361
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
789
1362
|
src, srcSize,
|
|
790
|
-
nbStreams, oldHufTable,
|
|
1363
|
+
nbStreams, oldHufTable, flags);
|
|
791
1364
|
}
|
|
792
1365
|
|
|
793
1366
|
/* Build Huffman Tree */
|
|
794
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
1367
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
|
|
795
1368
|
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
|
|
796
1369
|
maxSymbolValue, huffLog,
|
|
797
|
-
&table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
|
|
1370
|
+
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
|
798
1371
|
CHECK_F(maxBits);
|
|
799
1372
|
huffLog = (U32)maxBits;
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
1373
|
+
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
|
|
1374
|
+
}
|
|
1375
|
+
/* Zero unused symbols in CTable, so we can check it for validity */
|
|
1376
|
+
{
|
|
1377
|
+
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
|
1378
|
+
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
|
1379
|
+
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
|
803
1380
|
}
|
|
804
1381
|
|
|
805
1382
|
/* Write table description header */
|
|
806
|
-
{ CHECK_V_F(hSize,
|
|
1383
|
+
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
|
|
1384
|
+
&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
|
|
807
1385
|
/* Check if using previous huffman table is beneficial */
|
|
808
1386
|
if (repeat && *repeat != HUF_repeat_none) {
|
|
809
1387
|
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
|
|
@@ -811,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
811
1389
|
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
|
812
1390
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
813
1391
|
src, srcSize,
|
|
814
|
-
nbStreams, oldHufTable,
|
|
1392
|
+
nbStreams, oldHufTable, flags);
|
|
815
1393
|
} }
|
|
816
1394
|
|
|
817
1395
|
/* Use the new huffman table */
|
|
@@ -823,91 +1401,35 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
823
1401
|
}
|
|
824
1402
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
825
1403
|
src, srcSize,
|
|
826
|
-
nbStreams, table->CTable,
|
|
827
|
-
}
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
831
|
-
const void* src, size_t srcSize,
|
|
832
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
|
833
|
-
void* workSpace, size_t wkspSize)
|
|
834
|
-
{
|
|
835
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
836
|
-
maxSymbolValue, huffLog, HUF_singleStream,
|
|
837
|
-
workSpace, wkspSize,
|
|
838
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
|
1404
|
+
nbStreams, table->CTable, flags);
|
|
839
1405
|
}
|
|
840
1406
|
|
|
841
1407
|
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
|
842
1408
|
const void* src, size_t srcSize,
|
|
843
1409
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
844
1410
|
void* workSpace, size_t wkspSize,
|
|
845
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
|
1411
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
|
846
1412
|
{
|
|
1413
|
+
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
|
|
847
1414
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
848
1415
|
maxSymbolValue, huffLog, HUF_singleStream,
|
|
849
1416
|
workSpace, wkspSize, hufTable,
|
|
850
|
-
repeat,
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
/* HUF_compress4X_repeat():
|
|
854
|
-
* compress input using 4 streams.
|
|
855
|
-
* provide workspace to generate compression tables */
|
|
856
|
-
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
857
|
-
const void* src, size_t srcSize,
|
|
858
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
|
859
|
-
void* workSpace, size_t wkspSize)
|
|
860
|
-
{
|
|
861
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
862
|
-
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
863
|
-
workSpace, wkspSize,
|
|
864
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
|
1417
|
+
repeat, flags);
|
|
865
1418
|
}
|
|
866
1419
|
|
|
867
1420
|
/* HUF_compress4X_repeat():
|
|
868
1421
|
* compress input using 4 streams.
|
|
1422
|
+
* consider skipping quickly
|
|
869
1423
|
* re-use an existing huffman compression table */
|
|
870
1424
|
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
|
871
1425
|
const void* src, size_t srcSize,
|
|
872
1426
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
873
1427
|
void* workSpace, size_t wkspSize,
|
|
874
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
|
1428
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
|
875
1429
|
{
|
|
1430
|
+
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
|
|
876
1431
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
877
1432
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
878
1433
|
workSpace, wkspSize,
|
|
879
|
-
hufTable, repeat,
|
|
1434
|
+
hufTable, repeat, flags);
|
|
880
1435
|
}
|
|
881
|
-
|
|
882
|
-
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
|
883
|
-
/** HUF_buildCTable() :
|
|
884
|
-
* @return : maxNbBits
|
|
885
|
-
* Note : count is used before tree is written, so they can safely overlap
|
|
886
|
-
*/
|
|
887
|
-
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
|
|
888
|
-
{
|
|
889
|
-
HUF_buildCTable_wksp_tables workspace;
|
|
890
|
-
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
size_t HUF_compress1X (void* dst, size_t dstSize,
|
|
894
|
-
const void* src, size_t srcSize,
|
|
895
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
896
|
-
{
|
|
897
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
898
|
-
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
899
|
-
}
|
|
900
|
-
|
|
901
|
-
size_t HUF_compress2 (void* dst, size_t dstSize,
|
|
902
|
-
const void* src, size_t srcSize,
|
|
903
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
904
|
-
{
|
|
905
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
906
|
-
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
907
|
-
}
|
|
908
|
-
|
|
909
|
-
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
|
910
|
-
{
|
|
911
|
-
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
|
|
912
|
-
}
|
|
913
|
-
#endif
|