zstd-ruby 1.4.5.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* Huffman encoder, part of New Generation Entropy library
|
|
3
|
-
* Copyright (c)
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
4
|
*
|
|
5
5
|
* You can contact the author at :
|
|
6
6
|
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -23,16 +23,15 @@
|
|
|
23
23
|
/* **************************************************************
|
|
24
24
|
* Includes
|
|
25
25
|
****************************************************************/
|
|
26
|
-
#include
|
|
27
|
-
#include <stdio.h> /* printf (debug) */
|
|
26
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
|
28
27
|
#include "../common/compiler.h"
|
|
29
28
|
#include "../common/bitstream.h"
|
|
30
29
|
#include "hist.h"
|
|
31
30
|
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
|
|
32
31
|
#include "../common/fse.h" /* header compression */
|
|
33
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
34
32
|
#include "../common/huf.h"
|
|
35
33
|
#include "../common/error_private.h"
|
|
34
|
+
#include "../common/bits.h" /* ZSTD_highbit32 */
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
/* **************************************************************
|
|
@@ -43,24 +42,111 @@
|
|
|
43
42
|
|
|
44
43
|
|
|
45
44
|
/* **************************************************************
|
|
46
|
-
*
|
|
45
|
+
* Required declarations
|
|
47
46
|
****************************************************************/
|
|
48
|
-
|
|
47
|
+
typedef struct nodeElt_s {
|
|
48
|
+
U32 count;
|
|
49
|
+
U16 parent;
|
|
50
|
+
BYTE byte;
|
|
51
|
+
BYTE nbBits;
|
|
52
|
+
} nodeElt;
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
/* **************************************************************
|
|
56
|
+
* Debug Traces
|
|
57
|
+
****************************************************************/
|
|
58
|
+
|
|
59
|
+
#if DEBUGLEVEL >= 2
|
|
60
|
+
|
|
61
|
+
static size_t showU32(const U32* arr, size_t size)
|
|
62
|
+
{
|
|
63
|
+
size_t u;
|
|
64
|
+
for (u=0; u<size; u++) {
|
|
65
|
+
RAWLOG(6, " %u", arr[u]); (void)arr;
|
|
66
|
+
}
|
|
67
|
+
RAWLOG(6, " \n");
|
|
68
|
+
return size;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
static size_t HUF_getNbBits(HUF_CElt elt);
|
|
72
|
+
|
|
73
|
+
static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
|
|
74
|
+
{
|
|
75
|
+
size_t u;
|
|
76
|
+
for (u=0; u<size; u++) {
|
|
77
|
+
RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
|
|
78
|
+
}
|
|
79
|
+
RAWLOG(6, " \n");
|
|
80
|
+
return size;
|
|
81
|
+
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
|
|
49
85
|
{
|
|
50
|
-
|
|
86
|
+
size_t u;
|
|
87
|
+
for (u=0; u<size; u++) {
|
|
88
|
+
RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
|
|
89
|
+
}
|
|
90
|
+
RAWLOG(6, " \n");
|
|
91
|
+
return size;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
static size_t showHNodeBits(const nodeElt* hnode, size_t size)
|
|
95
|
+
{
|
|
96
|
+
size_t u;
|
|
97
|
+
for (u=0; u<size; u++) {
|
|
98
|
+
RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
|
|
99
|
+
}
|
|
100
|
+
RAWLOG(6, " \n");
|
|
101
|
+
return size;
|
|
51
102
|
}
|
|
52
103
|
|
|
104
|
+
#endif
|
|
105
|
+
|
|
53
106
|
|
|
54
107
|
/* *******************************************************
|
|
55
108
|
* HUF : Huffman block compression
|
|
56
109
|
*********************************************************/
|
|
110
|
+
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
|
111
|
+
|
|
112
|
+
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
|
113
|
+
{
|
|
114
|
+
size_t const mask = align - 1;
|
|
115
|
+
size_t const rem = (size_t)workspace & mask;
|
|
116
|
+
size_t const add = (align - rem) & mask;
|
|
117
|
+
BYTE* const aligned = (BYTE*)workspace + add;
|
|
118
|
+
assert((align & (align - 1)) == 0); /* pow 2 */
|
|
119
|
+
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
|
120
|
+
if (*workspaceSizePtr >= add) {
|
|
121
|
+
assert(add < align);
|
|
122
|
+
assert(((size_t)aligned & mask) == 0);
|
|
123
|
+
*workspaceSizePtr -= add;
|
|
124
|
+
return aligned;
|
|
125
|
+
} else {
|
|
126
|
+
*workspaceSizePtr = 0;
|
|
127
|
+
return NULL;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
|
|
57
132
|
/* HUF_compressWeights() :
|
|
58
133
|
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
|
59
134
|
* The use case needs much less stack memory.
|
|
60
135
|
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
|
|
61
136
|
*/
|
|
62
137
|
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
|
|
63
|
-
|
|
138
|
+
|
|
139
|
+
typedef struct {
|
|
140
|
+
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
|
141
|
+
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
|
142
|
+
unsigned count[HUF_TABLELOG_MAX+1];
|
|
143
|
+
S16 norm[HUF_TABLELOG_MAX+1];
|
|
144
|
+
} HUF_CompressWeightsWksp;
|
|
145
|
+
|
|
146
|
+
static size_t
|
|
147
|
+
HUF_compressWeights(void* dst, size_t dstSize,
|
|
148
|
+
const void* weightTable, size_t wtSize,
|
|
149
|
+
void* workspace, size_t workspaceSize)
|
|
64
150
|
{
|
|
65
151
|
BYTE* const ostart = (BYTE*) dst;
|
|
66
152
|
BYTE* op = ostart;
|
|
@@ -68,33 +154,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
68
154
|
|
|
69
155
|
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
|
70
156
|
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
|
157
|
+
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
71
158
|
|
|
72
|
-
|
|
73
|
-
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
|
|
74
|
-
|
|
75
|
-
unsigned count[HUF_TABLELOG_MAX+1];
|
|
76
|
-
S16 norm[HUF_TABLELOG_MAX+1];
|
|
159
|
+
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
|
77
160
|
|
|
78
161
|
/* init conditions */
|
|
79
162
|
if (wtSize <= 1) return 0; /* Not compressible */
|
|
80
163
|
|
|
81
164
|
/* Scan input and build symbol stats */
|
|
82
|
-
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
165
|
+
{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
83
166
|
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
|
|
84
167
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
|
85
168
|
}
|
|
86
169
|
|
|
87
170
|
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
|
88
|
-
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
|
|
171
|
+
CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
|
89
172
|
|
|
90
173
|
/* Write table description header */
|
|
91
|
-
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
|
|
174
|
+
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
|
|
92
175
|
op += hSize;
|
|
93
176
|
}
|
|
94
177
|
|
|
95
178
|
/* Compress */
|
|
96
|
-
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
|
|
97
|
-
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
|
|
179
|
+
CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
|
|
180
|
+
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
|
|
98
181
|
if (cSize == 0) return 0; /* not enough space for compressed data */
|
|
99
182
|
op += cSize;
|
|
100
183
|
}
|
|
@@ -102,35 +185,72 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
102
185
|
return (size_t)(op-ostart);
|
|
103
186
|
}
|
|
104
187
|
|
|
188
|
+
static size_t HUF_getNbBits(HUF_CElt elt)
|
|
189
|
+
{
|
|
190
|
+
return elt & 0xFF;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
|
194
|
+
{
|
|
195
|
+
return elt;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static size_t HUF_getValue(HUF_CElt elt)
|
|
199
|
+
{
|
|
200
|
+
return elt & ~(size_t)0xFF;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
static size_t HUF_getValueFast(HUF_CElt elt)
|
|
204
|
+
{
|
|
205
|
+
return elt;
|
|
206
|
+
}
|
|
105
207
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
208
|
+
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
|
209
|
+
{
|
|
210
|
+
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
211
|
+
*elt = nbBits;
|
|
212
|
+
}
|
|
110
213
|
|
|
111
|
-
|
|
112
|
-
`CTable` : Huffman tree to save, using huf representation.
|
|
113
|
-
@return : size of saved CTable */
|
|
114
|
-
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
115
|
-
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
|
214
|
+
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
|
116
215
|
{
|
|
216
|
+
size_t const nbBits = HUF_getNbBits(*elt);
|
|
217
|
+
if (nbBits > 0) {
|
|
218
|
+
assert((value >> nbBits) == 0);
|
|
219
|
+
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
typedef struct {
|
|
224
|
+
HUF_CompressWeightsWksp wksp;
|
|
117
225
|
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
|
|
118
226
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
|
|
227
|
+
} HUF_WriteCTableWksp;
|
|
228
|
+
|
|
229
|
+
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
|
230
|
+
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
|
231
|
+
void* workspace, size_t workspaceSize)
|
|
232
|
+
{
|
|
233
|
+
HUF_CElt const* const ct = CTable + 1;
|
|
119
234
|
BYTE* op = (BYTE*)dst;
|
|
120
235
|
U32 n;
|
|
236
|
+
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
237
|
+
|
|
238
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
|
|
121
239
|
|
|
122
|
-
|
|
240
|
+
/* check conditions */
|
|
241
|
+
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
|
123
242
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
|
|
124
243
|
|
|
125
244
|
/* convert to weight */
|
|
126
|
-
bitsToWeight[0] = 0;
|
|
245
|
+
wksp->bitsToWeight[0] = 0;
|
|
127
246
|
for (n=1; n<huffLog+1; n++)
|
|
128
|
-
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
247
|
+
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
129
248
|
for (n=0; n<maxSymbolValue; n++)
|
|
130
|
-
huffWeight[n] = bitsToWeight[
|
|
249
|
+
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
|
131
250
|
|
|
132
251
|
/* attempt weights compression by FSE */
|
|
133
|
-
|
|
252
|
+
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
|
253
|
+
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
|
134
254
|
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
|
135
255
|
op[0] = (BYTE)hSize;
|
|
136
256
|
return hSize+1;
|
|
@@ -140,9 +260,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
|
140
260
|
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
|
|
141
261
|
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
|
|
142
262
|
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
|
|
143
|
-
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
263
|
+
wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
144
264
|
for (n=0; n<maxSymbolValue; n+=2)
|
|
145
|
-
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
|
|
265
|
+
op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
|
|
146
266
|
return ((maxSymbolValue+1)/2) + 1;
|
|
147
267
|
}
|
|
148
268
|
|
|
@@ -153,34 +273,36 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
153
273
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
|
154
274
|
U32 tableLog = 0;
|
|
155
275
|
U32 nbSymbols = 0;
|
|
276
|
+
HUF_CElt* const ct = CTable + 1;
|
|
156
277
|
|
|
157
278
|
/* get symbol weights */
|
|
158
279
|
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
|
280
|
+
*hasZeroWeights = (rankVal[0] > 0);
|
|
159
281
|
|
|
160
282
|
/* check result */
|
|
161
283
|
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
162
284
|
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
|
163
285
|
|
|
286
|
+
CTable[0] = tableLog;
|
|
287
|
+
|
|
164
288
|
/* Prepare base value per rank */
|
|
165
289
|
{ U32 n, nextRankStart = 0;
|
|
166
290
|
for (n=1; n<=tableLog; n++) {
|
|
167
|
-
U32
|
|
291
|
+
U32 curr = nextRankStart;
|
|
168
292
|
nextRankStart += (rankVal[n] << (n-1));
|
|
169
|
-
rankVal[n] =
|
|
293
|
+
rankVal[n] = curr;
|
|
170
294
|
} }
|
|
171
295
|
|
|
172
296
|
/* fill nbBits */
|
|
173
|
-
*hasZeroWeights = 0;
|
|
174
297
|
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
|
175
298
|
const U32 w = huffWeight[n];
|
|
176
|
-
|
|
177
|
-
CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
|
|
299
|
+
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
|
178
300
|
} }
|
|
179
301
|
|
|
180
302
|
/* fill val */
|
|
181
303
|
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
|
182
304
|
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
|
183
|
-
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[
|
|
305
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
|
184
306
|
/* determine stating value per rank */
|
|
185
307
|
valPerRank[tableLog+1] = 0; /* for w==0 */
|
|
186
308
|
{ U16 min = 0;
|
|
@@ -190,92 +312,150 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
190
312
|
min >>= 1;
|
|
191
313
|
} }
|
|
192
314
|
/* assign value within rank, symbol order */
|
|
193
|
-
{ U32 n; for (n=0; n<nbSymbols; n++)
|
|
315
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
|
194
316
|
}
|
|
195
317
|
|
|
196
318
|
*maxSymbolValuePtr = nbSymbols - 1;
|
|
197
319
|
return readSize;
|
|
198
320
|
}
|
|
199
321
|
|
|
200
|
-
U32
|
|
322
|
+
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
|
201
323
|
{
|
|
202
|
-
const HUF_CElt*
|
|
324
|
+
const HUF_CElt* const ct = CTable + 1;
|
|
203
325
|
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
|
204
|
-
return
|
|
326
|
+
return (U32)HUF_getNbBits(ct[symbolValue]);
|
|
205
327
|
}
|
|
206
328
|
|
|
207
329
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
330
|
+
/**
|
|
331
|
+
* HUF_setMaxHeight():
|
|
332
|
+
* Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
|
|
333
|
+
*
|
|
334
|
+
* It attempts to convert all nodes with nbBits > @targetNbBits
|
|
335
|
+
* to employ @targetNbBits instead. Then it adjusts the tree
|
|
336
|
+
* so that it remains a valid canonical Huffman tree.
|
|
337
|
+
*
|
|
338
|
+
* @pre The sum of the ranks of each symbol == 2^largestBits,
|
|
339
|
+
* where largestBits == huffNode[lastNonNull].nbBits.
|
|
340
|
+
* @post The sum of the ranks of each symbol == 2^largestBits,
|
|
341
|
+
* where largestBits is the return value (expected <= targetNbBits).
|
|
342
|
+
*
|
|
343
|
+
* @param huffNode The Huffman tree modified in place to enforce targetNbBits.
|
|
344
|
+
* It's presumed sorted, from most frequent to rarest symbol.
|
|
345
|
+
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
|
|
346
|
+
* @param targetNbBits The allowed number of bits, which the Huffman tree
|
|
347
|
+
* may not respect. After this function the Huffman tree will
|
|
348
|
+
* respect targetNbBits.
|
|
349
|
+
* @return The maximum number of bits of the Huffman tree after adjustment.
|
|
350
|
+
*/
|
|
351
|
+
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
|
|
216
352
|
{
|
|
217
353
|
const U32 largestBits = huffNode[lastNonNull].nbBits;
|
|
218
|
-
|
|
354
|
+
/* early exit : no elt > targetNbBits, so the tree is already valid. */
|
|
355
|
+
if (largestBits <= targetNbBits) return largestBits;
|
|
356
|
+
|
|
357
|
+
DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
|
|
219
358
|
|
|
220
359
|
/* there are several too large elements (at least >= 2) */
|
|
221
360
|
{ int totalCost = 0;
|
|
222
|
-
const U32 baseCost = 1 << (largestBits -
|
|
361
|
+
const U32 baseCost = 1 << (largestBits - targetNbBits);
|
|
223
362
|
int n = (int)lastNonNull;
|
|
224
363
|
|
|
225
|
-
|
|
364
|
+
/* Adjust any ranks > targetNbBits to targetNbBits.
|
|
365
|
+
* Compute totalCost, which is how far the sum of the ranks is
|
|
366
|
+
* we are over 2^largestBits after adjust the offending ranks.
|
|
367
|
+
*/
|
|
368
|
+
while (huffNode[n].nbBits > targetNbBits) {
|
|
226
369
|
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
|
|
227
|
-
huffNode[n].nbBits = (BYTE)
|
|
228
|
-
n
|
|
229
|
-
}
|
|
230
|
-
|
|
370
|
+
huffNode[n].nbBits = (BYTE)targetNbBits;
|
|
371
|
+
n--;
|
|
372
|
+
}
|
|
373
|
+
/* n stops at huffNode[n].nbBits <= targetNbBits */
|
|
374
|
+
assert(huffNode[n].nbBits <= targetNbBits);
|
|
375
|
+
/* n end at index of smallest symbol using < targetNbBits */
|
|
376
|
+
while (huffNode[n].nbBits == targetNbBits) --n;
|
|
231
377
|
|
|
232
|
-
/* renorm totalCost
|
|
233
|
-
|
|
378
|
+
/* renorm totalCost from 2^largestBits to 2^targetNbBits
|
|
379
|
+
* note : totalCost is necessarily a multiple of baseCost */
|
|
380
|
+
assert(((U32)totalCost & (baseCost - 1)) == 0);
|
|
381
|
+
totalCost >>= (largestBits - targetNbBits);
|
|
382
|
+
assert(totalCost > 0);
|
|
234
383
|
|
|
235
384
|
/* repay normalized cost */
|
|
236
385
|
{ U32 const noSymbol = 0xF0F0F0F0;
|
|
237
386
|
U32 rankLast[HUF_TABLELOG_MAX+2];
|
|
238
387
|
|
|
239
|
-
/* Get pos of last (smallest) symbol per rank */
|
|
240
|
-
|
|
241
|
-
{ U32 currentNbBits =
|
|
388
|
+
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
|
|
389
|
+
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
|
|
390
|
+
{ U32 currentNbBits = targetNbBits;
|
|
242
391
|
int pos;
|
|
243
392
|
for (pos=n ; pos >= 0; pos--) {
|
|
244
393
|
if (huffNode[pos].nbBits >= currentNbBits) continue;
|
|
245
|
-
currentNbBits = huffNode[pos].nbBits; /* <
|
|
246
|
-
rankLast[
|
|
394
|
+
currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
|
|
395
|
+
rankLast[targetNbBits-currentNbBits] = (U32)pos;
|
|
247
396
|
} }
|
|
248
397
|
|
|
249
398
|
while (totalCost > 0) {
|
|
250
|
-
|
|
399
|
+
/* Try to reduce the next power of 2 above totalCost because we
|
|
400
|
+
* gain back half the rank.
|
|
401
|
+
*/
|
|
402
|
+
U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
|
|
251
403
|
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
|
|
252
404
|
U32 const highPos = rankLast[nBitsToDecrease];
|
|
253
405
|
U32 const lowPos = rankLast[nBitsToDecrease-1];
|
|
254
406
|
if (highPos == noSymbol) continue;
|
|
407
|
+
/* Decrease highPos if no symbols of lowPos or if it is
|
|
408
|
+
* not cheaper to remove 2 lowPos than highPos.
|
|
409
|
+
*/
|
|
255
410
|
if (lowPos == noSymbol) break;
|
|
256
411
|
{ U32 const highTotal = huffNode[highPos].count;
|
|
257
412
|
U32 const lowTotal = 2 * huffNode[lowPos].count;
|
|
258
413
|
if (highTotal <= lowTotal) break;
|
|
259
414
|
} }
|
|
260
415
|
/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
|
|
416
|
+
assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
|
|
261
417
|
/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
|
|
262
418
|
while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
|
|
263
|
-
nBitsToDecrease
|
|
419
|
+
nBitsToDecrease++;
|
|
420
|
+
assert(rankLast[nBitsToDecrease] != noSymbol);
|
|
421
|
+
/* Increase the number of bits to gain back half the rank cost. */
|
|
264
422
|
totalCost -= 1 << (nBitsToDecrease-1);
|
|
423
|
+
huffNode[rankLast[nBitsToDecrease]].nbBits++;
|
|
424
|
+
|
|
425
|
+
/* Fix up the new rank.
|
|
426
|
+
* If the new rank was empty, this symbol is now its smallest.
|
|
427
|
+
* Otherwise, this symbol will be the largest in the new rank so no adjustment.
|
|
428
|
+
*/
|
|
265
429
|
if (rankLast[nBitsToDecrease-1] == noSymbol)
|
|
266
|
-
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
|
267
|
-
|
|
430
|
+
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
|
431
|
+
/* Fix up the old rank.
|
|
432
|
+
* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
|
|
433
|
+
* it must be the only symbol in its rank, so the old rank now has no symbols.
|
|
434
|
+
* Otherwise, since the Huffman nodes are sorted by count, the previous position is now
|
|
435
|
+
* the smallest node in the rank. If the previous position belongs to a different rank,
|
|
436
|
+
* then the rank is now empty.
|
|
437
|
+
*/
|
|
268
438
|
if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
|
|
269
439
|
rankLast[nBitsToDecrease] = noSymbol;
|
|
270
440
|
else {
|
|
271
441
|
rankLast[nBitsToDecrease]--;
|
|
272
|
-
if (huffNode[rankLast[nBitsToDecrease]].nbBits !=
|
|
442
|
+
if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
|
|
273
443
|
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
|
|
274
|
-
|
|
275
|
-
|
|
444
|
+
}
|
|
445
|
+
} /* while (totalCost > 0) */
|
|
446
|
+
|
|
447
|
+
/* If we've removed too much weight, then we have to add it back.
|
|
448
|
+
* To avoid overshooting again, we only adjust the smallest rank.
|
|
449
|
+
* We take the largest nodes from the lowest rank 0 and move them
|
|
450
|
+
* to rank 1. There's guaranteed to be enough rank 0 symbols because
|
|
451
|
+
* TODO.
|
|
452
|
+
*/
|
|
276
453
|
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
|
|
277
|
-
|
|
278
|
-
|
|
454
|
+
/* special case : no rank 1 symbol (using targetNbBits-1);
|
|
455
|
+
* let's create one from largest rank 0 (using targetNbBits).
|
|
456
|
+
*/
|
|
457
|
+
if (rankLast[1] == noSymbol) {
|
|
458
|
+
while (huffNode[n].nbBits == targetNbBits) n--;
|
|
279
459
|
huffNode[n+1].nbBits--;
|
|
280
460
|
assert(n >= 0);
|
|
281
461
|
rankLast[1] = (U32)(n+1);
|
|
@@ -285,47 +465,178 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
|
285
465
|
huffNode[ rankLast[1] + 1 ].nbBits--;
|
|
286
466
|
rankLast[1]++;
|
|
287
467
|
totalCost ++;
|
|
288
|
-
|
|
468
|
+
}
|
|
469
|
+
} /* repay normalized cost */
|
|
470
|
+
} /* there are several too large elements (at least >= 2) */
|
|
289
471
|
|
|
290
|
-
return
|
|
472
|
+
return targetNbBits;
|
|
291
473
|
}
|
|
292
474
|
|
|
293
475
|
typedef struct {
|
|
294
|
-
|
|
295
|
-
|
|
476
|
+
U16 base;
|
|
477
|
+
U16 curr;
|
|
296
478
|
} rankPos;
|
|
297
479
|
|
|
298
|
-
typedef nodeElt huffNodeTable[
|
|
480
|
+
typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
|
|
299
481
|
|
|
300
|
-
|
|
482
|
+
/* Number of buckets available for HUF_sort() */
|
|
483
|
+
#define RANK_POSITION_TABLE_SIZE 192
|
|
301
484
|
|
|
302
485
|
typedef struct {
|
|
303
486
|
huffNodeTable huffNodeTbl;
|
|
304
487
|
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
|
305
488
|
} HUF_buildCTable_wksp_tables;
|
|
306
489
|
|
|
307
|
-
|
|
308
|
-
|
|
490
|
+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
|
491
|
+
* Strategy is to use as many buckets as possible for representing distinct
|
|
492
|
+
* counts while using the remainder to represent all "large" counts.
|
|
493
|
+
*
|
|
494
|
+
* To satisfy this requirement for 192 buckets, we can do the following:
|
|
495
|
+
* Let buckets 0-166 represent distinct counts of [0, 166]
|
|
496
|
+
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
|
497
|
+
*/
|
|
498
|
+
#define RANK_POSITION_MAX_COUNT_LOG 32
|
|
499
|
+
#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
|
|
500
|
+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
|
|
501
|
+
|
|
502
|
+
/* Return the appropriate bucket index for a given count. See definition of
|
|
503
|
+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
|
504
|
+
*/
|
|
505
|
+
static U32 HUF_getIndex(U32 const count) {
|
|
506
|
+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
|
507
|
+
? count
|
|
508
|
+
: ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
/* Helper swap function for HUF_quickSortPartition() */
|
|
512
|
+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
|
513
|
+
nodeElt tmp = *a;
|
|
514
|
+
*a = *b;
|
|
515
|
+
*b = tmp;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
/* Returns 0 if the huffNode array is not sorted by descending count */
|
|
519
|
+
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
|
520
|
+
U32 i;
|
|
521
|
+
for (i = 1; i < maxSymbolValue1; ++i) {
|
|
522
|
+
if (huffNode[i].count > huffNode[i-1].count) {
|
|
523
|
+
return 0;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
return 1;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/* Insertion sort by descending order */
|
|
530
|
+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
|
531
|
+
int i;
|
|
532
|
+
int const size = high-low+1;
|
|
533
|
+
huffNode += low;
|
|
534
|
+
for (i = 1; i < size; ++i) {
|
|
535
|
+
nodeElt const key = huffNode[i];
|
|
536
|
+
int j = i - 1;
|
|
537
|
+
while (j >= 0 && huffNode[j].count < key.count) {
|
|
538
|
+
huffNode[j + 1] = huffNode[j];
|
|
539
|
+
j--;
|
|
540
|
+
}
|
|
541
|
+
huffNode[j + 1] = key;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/* Pivot helper function for quicksort. */
|
|
546
|
+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
|
547
|
+
/* Simply select rightmost element as pivot. "Better" selectors like
|
|
548
|
+
* median-of-three don't experimentally appear to have any benefit.
|
|
549
|
+
*/
|
|
550
|
+
U32 const pivot = arr[high].count;
|
|
551
|
+
int i = low - 1;
|
|
552
|
+
int j = low;
|
|
553
|
+
for ( ; j < high; j++) {
|
|
554
|
+
if (arr[j].count > pivot) {
|
|
555
|
+
i++;
|
|
556
|
+
HUF_swapNodes(&arr[i], &arr[j]);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
|
560
|
+
return i + 1;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/* Classic quicksort by descending with partially iterative calls
|
|
564
|
+
* to reduce worst case callstack size.
|
|
565
|
+
*/
|
|
566
|
+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
|
567
|
+
int const kInsertionSortThreshold = 8;
|
|
568
|
+
if (high - low < kInsertionSortThreshold) {
|
|
569
|
+
HUF_insertionSort(arr, low, high);
|
|
570
|
+
return;
|
|
571
|
+
}
|
|
572
|
+
while (low < high) {
|
|
573
|
+
int const idx = HUF_quickSortPartition(arr, low, high);
|
|
574
|
+
if (idx - low < high - idx) {
|
|
575
|
+
HUF_simpleQuickSort(arr, low, idx - 1);
|
|
576
|
+
low = idx + 1;
|
|
577
|
+
} else {
|
|
578
|
+
HUF_simpleQuickSort(arr, idx + 1, high);
|
|
579
|
+
high = idx - 1;
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* HUF_sort():
|
|
586
|
+
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
|
587
|
+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
|
588
|
+
*
|
|
589
|
+
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
|
590
|
+
* Must have (maxSymbolValue + 1) entries.
|
|
591
|
+
* @param[in] count Histogram of the symbols.
|
|
592
|
+
* @param[in] maxSymbolValue Maximum symbol value.
|
|
593
|
+
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
|
594
|
+
*/
|
|
595
|
+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
|
309
596
|
U32 n;
|
|
597
|
+
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
|
598
|
+
|
|
599
|
+
/* Compute base and set curr to base.
|
|
600
|
+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
|
601
|
+
* See HUF_getIndex to see bucketing strategy.
|
|
602
|
+
* We attribute each symbol to lowerRank's base value, because we want to know where
|
|
603
|
+
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
|
604
|
+
*/
|
|
605
|
+
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
|
606
|
+
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
607
|
+
U32 lowerRank = HUF_getIndex(count[n]);
|
|
608
|
+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
|
609
|
+
rankPosition[lowerRank].base++;
|
|
610
|
+
}
|
|
310
611
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
rankPosition[
|
|
612
|
+
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
|
613
|
+
/* Set up the rankPosition table */
|
|
614
|
+
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
|
615
|
+
rankPosition[n-1].base += rankPosition[n].base;
|
|
616
|
+
rankPosition[n-1].curr = rankPosition[n-1].base;
|
|
315
617
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
for (n=0; n
|
|
618
|
+
|
|
619
|
+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
|
620
|
+
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
319
621
|
U32 const c = count[n];
|
|
320
|
-
U32 const r =
|
|
321
|
-
U32 pos = rankPosition[r].
|
|
322
|
-
|
|
323
|
-
huffNode[pos] = huffNode[pos-1];
|
|
324
|
-
pos--;
|
|
325
|
-
}
|
|
622
|
+
U32 const r = HUF_getIndex(c) + 1;
|
|
623
|
+
U32 const pos = rankPosition[r].curr++;
|
|
624
|
+
assert(pos < maxSymbolValue1);
|
|
326
625
|
huffNode[pos].count = c;
|
|
327
626
|
huffNode[pos].byte = (BYTE)n;
|
|
328
627
|
}
|
|
628
|
+
|
|
629
|
+
/* Sort each bucket. */
|
|
630
|
+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
|
631
|
+
int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
|
|
632
|
+
U32 const bucketStartIdx = rankPosition[n].base;
|
|
633
|
+
if (bucketSize > 1) {
|
|
634
|
+
assert(bucketStartIdx < maxSymbolValue1);
|
|
635
|
+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
|
329
640
|
}
|
|
330
641
|
|
|
331
642
|
|
|
@@ -335,28 +646,21 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
|
|
|
335
646
|
*/
|
|
336
647
|
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
|
337
648
|
|
|
338
|
-
|
|
649
|
+
/* HUF_buildTree():
|
|
650
|
+
* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
|
|
651
|
+
*
|
|
652
|
+
* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
|
|
653
|
+
* @param maxSymbolValue The maximum symbol value.
|
|
654
|
+
* @return The smallest node in the Huffman tree (by count).
|
|
655
|
+
*/
|
|
656
|
+
static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
339
657
|
{
|
|
340
|
-
|
|
341
|
-
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
|
342
|
-
nodeElt* const huffNode = huffNode0+1;
|
|
658
|
+
nodeElt* const huffNode0 = huffNode - 1;
|
|
343
659
|
int nonNullRank;
|
|
344
660
|
int lowS, lowN;
|
|
345
661
|
int nodeNb = STARTNODE;
|
|
346
662
|
int n, nodeRoot;
|
|
347
|
-
|
|
348
|
-
/* safety checks */
|
|
349
|
-
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
|
350
|
-
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
|
351
|
-
return ERROR(workSpace_tooSmall);
|
|
352
|
-
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
|
353
|
-
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
|
354
|
-
return ERROR(maxSymbolValue_tooLarge);
|
|
355
|
-
memset(huffNode0, 0, sizeof(huffNodeTable));
|
|
356
|
-
|
|
357
|
-
/* sort, decreasing order */
|
|
358
|
-
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
|
359
|
-
|
|
663
|
+
DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
|
|
360
664
|
/* init for parents */
|
|
361
665
|
nonNullRank = (int)maxSymbolValue;
|
|
362
666
|
while(huffNode[nonNullRank].count == 0) nonNullRank--;
|
|
@@ -383,127 +687,406 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
|
|
383
687
|
for (n=0; n<=nonNullRank; n++)
|
|
384
688
|
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
|
|
385
689
|
|
|
386
|
-
|
|
387
|
-
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
|
388
|
-
|
|
389
|
-
/* fill result into tree (val, nbBits) */
|
|
390
|
-
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
391
|
-
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
392
|
-
int const alphabetSize = (int)(maxSymbolValue + 1);
|
|
393
|
-
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
|
394
|
-
for (n=0; n<=nonNullRank; n++)
|
|
395
|
-
nbPerRank[huffNode[n].nbBits]++;
|
|
396
|
-
/* determine stating value per rank */
|
|
397
|
-
{ U16 min = 0;
|
|
398
|
-
for (n=(int)maxNbBits; n>0; n--) {
|
|
399
|
-
valPerRank[n] = min; /* get starting value within each rank */
|
|
400
|
-
min += nbPerRank[n];
|
|
401
|
-
min >>= 1;
|
|
402
|
-
} }
|
|
403
|
-
for (n=0; n<alphabetSize; n++)
|
|
404
|
-
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
|
405
|
-
for (n=0; n<alphabetSize; n++)
|
|
406
|
-
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
|
|
407
|
-
}
|
|
690
|
+
DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
|
|
408
691
|
|
|
409
|
-
return
|
|
692
|
+
return nonNullRank;
|
|
410
693
|
}
|
|
411
694
|
|
|
412
|
-
/**
|
|
413
|
-
*
|
|
414
|
-
*
|
|
695
|
+
/**
|
|
696
|
+
* HUF_buildCTableFromTree():
|
|
697
|
+
* Build the CTable given the Huffman tree in huffNode.
|
|
698
|
+
*
|
|
699
|
+
* @param[out] CTable The output Huffman CTable.
|
|
700
|
+
* @param huffNode The Huffman tree.
|
|
701
|
+
* @param nonNullRank The last and smallest node in the Huffman tree.
|
|
702
|
+
* @param maxSymbolValue The maximum symbol value.
|
|
703
|
+
* @param maxNbBits The exact maximum number of bits used in the Huffman tree.
|
|
415
704
|
*/
|
|
416
|
-
|
|
705
|
+
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
|
417
706
|
{
|
|
418
|
-
|
|
419
|
-
|
|
707
|
+
HUF_CElt* const ct = CTable + 1;
|
|
708
|
+
/* fill result into ctable (val, nbBits) */
|
|
709
|
+
int n;
|
|
710
|
+
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
711
|
+
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
712
|
+
int const alphabetSize = (int)(maxSymbolValue + 1);
|
|
713
|
+
for (n=0; n<=nonNullRank; n++)
|
|
714
|
+
nbPerRank[huffNode[n].nbBits]++;
|
|
715
|
+
/* determine starting value per rank */
|
|
716
|
+
{ U16 min = 0;
|
|
717
|
+
for (n=(int)maxNbBits; n>0; n--) {
|
|
718
|
+
valPerRank[n] = min; /* get starting value within each rank */
|
|
719
|
+
min += nbPerRank[n];
|
|
720
|
+
min >>= 1;
|
|
721
|
+
} }
|
|
722
|
+
for (n=0; n<alphabetSize; n++)
|
|
723
|
+
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
|
724
|
+
for (n=0; n<alphabetSize; n++)
|
|
725
|
+
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
|
726
|
+
CTable[0] = maxNbBits;
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
size_t
|
|
730
|
+
HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
|
731
|
+
void* workSpace, size_t wkspSize)
|
|
732
|
+
{
|
|
733
|
+
HUF_buildCTable_wksp_tables* const wksp_tables =
|
|
734
|
+
(HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
|
735
|
+
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
|
736
|
+
nodeElt* const huffNode = huffNode0+1;
|
|
737
|
+
int nonNullRank;
|
|
738
|
+
|
|
739
|
+
HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
|
|
740
|
+
|
|
741
|
+
DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
|
|
742
|
+
|
|
743
|
+
/* safety checks */
|
|
744
|
+
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
|
745
|
+
return ERROR(workSpace_tooSmall);
|
|
746
|
+
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
|
747
|
+
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
|
748
|
+
return ERROR(maxSymbolValue_tooLarge);
|
|
749
|
+
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
|
|
750
|
+
|
|
751
|
+
/* sort, decreasing order */
|
|
752
|
+
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
|
753
|
+
DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
|
|
754
|
+
|
|
755
|
+
/* build tree */
|
|
756
|
+
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
|
|
757
|
+
|
|
758
|
+
/* determine and enforce maxTableLog */
|
|
759
|
+
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
|
760
|
+
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
|
761
|
+
|
|
762
|
+
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
|
763
|
+
|
|
764
|
+
return maxNbBits;
|
|
420
765
|
}
|
|
421
766
|
|
|
422
767
|
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
|
423
768
|
{
|
|
769
|
+
HUF_CElt const* ct = CTable + 1;
|
|
424
770
|
size_t nbBits = 0;
|
|
425
771
|
int s;
|
|
426
772
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
427
|
-
nbBits +=
|
|
773
|
+
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
|
428
774
|
}
|
|
429
775
|
return nbBits >> 3;
|
|
430
776
|
}
|
|
431
777
|
|
|
432
778
|
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
|
779
|
+
HUF_CElt const* ct = CTable + 1;
|
|
433
780
|
int bad = 0;
|
|
434
781
|
int s;
|
|
435
782
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
436
|
-
bad |= (count[s] != 0) & (
|
|
783
|
+
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
|
437
784
|
}
|
|
438
785
|
return !bad;
|
|
439
786
|
}
|
|
440
787
|
|
|
441
788
|
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
|
442
789
|
|
|
790
|
+
/** HUF_CStream_t:
|
|
791
|
+
* Huffman uses its own BIT_CStream_t implementation.
|
|
792
|
+
* There are three major differences from BIT_CStream_t:
|
|
793
|
+
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
|
794
|
+
* the pair (nbBits, value) in the format:
|
|
795
|
+
* format:
|
|
796
|
+
* - Bits [0, 4) = nbBits
|
|
797
|
+
* - Bits [4, 64 - nbBits) = 0
|
|
798
|
+
* - Bits [64 - nbBits, 64) = value
|
|
799
|
+
* 2. The bitContainer is built from the upper bits and
|
|
800
|
+
* right shifted. E.g. to add a new value of N bits
|
|
801
|
+
* you right shift the bitContainer by N, then or in
|
|
802
|
+
* the new value into the N upper bits.
|
|
803
|
+
* 3. The bitstream has two bit containers. You can add
|
|
804
|
+
* bits to the second container and merge them into
|
|
805
|
+
* the first container.
|
|
806
|
+
*/
|
|
807
|
+
|
|
808
|
+
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
|
809
|
+
|
|
810
|
+
typedef struct {
|
|
811
|
+
size_t bitContainer[2];
|
|
812
|
+
size_t bitPos[2];
|
|
813
|
+
|
|
814
|
+
BYTE* startPtr;
|
|
815
|
+
BYTE* ptr;
|
|
816
|
+
BYTE* endPtr;
|
|
817
|
+
} HUF_CStream_t;
|
|
818
|
+
|
|
819
|
+
/**! HUF_initCStream():
|
|
820
|
+
* Initializes the bitstream.
|
|
821
|
+
* @returns 0 or an error code.
|
|
822
|
+
*/
|
|
823
|
+
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
|
824
|
+
void* startPtr, size_t dstCapacity)
|
|
825
|
+
{
|
|
826
|
+
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
|
827
|
+
bitC->startPtr = (BYTE*)startPtr;
|
|
828
|
+
bitC->ptr = bitC->startPtr;
|
|
829
|
+
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
|
830
|
+
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
|
831
|
+
return 0;
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/*! HUF_addBits():
|
|
835
|
+
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
|
836
|
+
*
|
|
837
|
+
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
|
838
|
+
* See the HUF_CStream_t docs for the format.
|
|
839
|
+
* @param idx Insert into the bitstream at this idx.
|
|
840
|
+
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
|
841
|
+
* to have at least 4 unused bits after this call it may be 1,
|
|
842
|
+
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
|
843
|
+
*/
|
|
844
|
+
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
|
845
|
+
{
|
|
846
|
+
assert(idx <= 1);
|
|
847
|
+
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
848
|
+
/* This is efficient on x86-64 with BMI2 because shrx
|
|
849
|
+
* only reads the low 6 bits of the register. The compiler
|
|
850
|
+
* knows this and elides the mask. When fast is set,
|
|
851
|
+
* every operation can use the same value loaded from elt.
|
|
852
|
+
*/
|
|
853
|
+
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
|
854
|
+
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
|
855
|
+
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
|
856
|
+
* doesn't matter that the high bits have noise from the value.
|
|
857
|
+
*/
|
|
858
|
+
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
|
859
|
+
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
860
|
+
/* The last 4-bits of elt are dirty if fast is set,
|
|
861
|
+
* so we must not be overwriting bits that have already been
|
|
862
|
+
* inserted into the bit container.
|
|
863
|
+
*/
|
|
864
|
+
#if DEBUGLEVEL >= 1
|
|
865
|
+
{
|
|
866
|
+
size_t const nbBits = HUF_getNbBits(elt);
|
|
867
|
+
size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
|
|
868
|
+
(void)dirtyBits;
|
|
869
|
+
/* Middle bits are 0. */
|
|
870
|
+
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
|
871
|
+
/* We didn't overwrite any bits in the bit container. */
|
|
872
|
+
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
873
|
+
(void)dirtyBits;
|
|
874
|
+
}
|
|
875
|
+
#endif
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
|
879
|
+
{
|
|
880
|
+
bitC->bitContainer[1] = 0;
|
|
881
|
+
bitC->bitPos[1] = 0;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
/*! HUF_mergeIndex1() :
|
|
885
|
+
* Merges the bit container @ index 1 into the bit container @ index 0
|
|
886
|
+
* and zeros the bit container @ index 1.
|
|
887
|
+
*/
|
|
888
|
+
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
|
889
|
+
{
|
|
890
|
+
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
|
891
|
+
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
|
892
|
+
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
|
893
|
+
bitC->bitPos[0] += bitC->bitPos[1];
|
|
894
|
+
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
/*! HUF_flushBits() :
|
|
898
|
+
* Flushes the bits in the bit container @ index 0.
|
|
899
|
+
*
|
|
900
|
+
* @post bitPos will be < 8.
|
|
901
|
+
* @param kFast If kFast is set then we must know a-priori that
|
|
902
|
+
* the bit container will not overflow.
|
|
903
|
+
*/
|
|
904
|
+
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
|
905
|
+
{
|
|
906
|
+
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
|
907
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
908
|
+
size_t const nbBytes = nbBits >> 3;
|
|
909
|
+
/* The top nbBits bits of bitContainer are the ones we need. */
|
|
910
|
+
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
|
911
|
+
/* Mask bitPos to account for the bytes we consumed. */
|
|
912
|
+
bitC->bitPos[0] &= 7;
|
|
913
|
+
assert(nbBits > 0);
|
|
914
|
+
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
|
915
|
+
assert(bitC->ptr <= bitC->endPtr);
|
|
916
|
+
MEM_writeLEST(bitC->ptr, bitContainer);
|
|
917
|
+
bitC->ptr += nbBytes;
|
|
918
|
+
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
|
919
|
+
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
|
920
|
+
/* bitContainer doesn't need to be modified because the leftover
|
|
921
|
+
* bits are already the top bitPos bits. And we don't care about
|
|
922
|
+
* noise in the lower values.
|
|
923
|
+
*/
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
/*! HUF_endMark()
|
|
927
|
+
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
|
928
|
+
*/
|
|
929
|
+
static HUF_CElt HUF_endMark(void)
|
|
930
|
+
{
|
|
931
|
+
HUF_CElt endMark;
|
|
932
|
+
HUF_setNbBits(&endMark, 1);
|
|
933
|
+
HUF_setValue(&endMark, 1);
|
|
934
|
+
return endMark;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
/*! HUF_closeCStream() :
|
|
938
|
+
* @return Size of CStream, in bytes,
|
|
939
|
+
* or 0 if it could not fit into dstBuffer */
|
|
940
|
+
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
|
941
|
+
{
|
|
942
|
+
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
|
943
|
+
HUF_flushBits(bitC, /* kFast */ 0);
|
|
944
|
+
{
|
|
945
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
946
|
+
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
|
947
|
+
return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
|
|
443
951
|
FORCE_INLINE_TEMPLATE void
|
|
444
|
-
HUF_encodeSymbol(
|
|
952
|
+
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
|
445
953
|
{
|
|
446
|
-
|
|
954
|
+
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
|
447
955
|
}
|
|
448
956
|
|
|
449
|
-
|
|
957
|
+
FORCE_INLINE_TEMPLATE void
|
|
958
|
+
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
|
959
|
+
const BYTE* ip, size_t srcSize,
|
|
960
|
+
const HUF_CElt* ct,
|
|
961
|
+
int kUnroll, int kFastFlush, int kLastFast)
|
|
962
|
+
{
|
|
963
|
+
/* Join to kUnroll */
|
|
964
|
+
int n = (int)srcSize;
|
|
965
|
+
int rem = n % kUnroll;
|
|
966
|
+
if (rem > 0) {
|
|
967
|
+
for (; rem > 0; --rem) {
|
|
968
|
+
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
|
969
|
+
}
|
|
970
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
971
|
+
}
|
|
972
|
+
assert(n % kUnroll == 0);
|
|
973
|
+
|
|
974
|
+
/* Join to 2 * kUnroll */
|
|
975
|
+
if (n % (2 * kUnroll)) {
|
|
976
|
+
int u;
|
|
977
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
978
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
|
979
|
+
}
|
|
980
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
|
981
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
982
|
+
n -= kUnroll;
|
|
983
|
+
}
|
|
984
|
+
assert(n % (2 * kUnroll) == 0);
|
|
450
985
|
|
|
451
|
-
|
|
452
|
-
|
|
986
|
+
for (; n>0; n-= 2 * kUnroll) {
|
|
987
|
+
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
|
988
|
+
int u;
|
|
989
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
990
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
|
991
|
+
}
|
|
992
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
|
993
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
994
|
+
/* Encode kUnroll symbols into the bitstream @ index 1.
|
|
995
|
+
* This allows us to start filling the bit container
|
|
996
|
+
* without any data dependencies.
|
|
997
|
+
*/
|
|
998
|
+
HUF_zeroIndex1(bitC);
|
|
999
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
1000
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
|
1001
|
+
}
|
|
1002
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
|
1003
|
+
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
|
1004
|
+
HUF_mergeIndex1(bitC);
|
|
1005
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
1006
|
+
}
|
|
1007
|
+
assert(n == 0);
|
|
1008
|
+
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
/**
|
|
1012
|
+
* Returns a tight upper bound on the output space needed by Huffman
|
|
1013
|
+
* with 8 bytes buffer to handle over-writes. If the output is at least
|
|
1014
|
+
* this large we don't need to do bounds checks during Huffman encoding.
|
|
1015
|
+
*/
|
|
1016
|
+
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
|
1017
|
+
{
|
|
1018
|
+
return ((srcSize * tableLog) >> 3) + 8;
|
|
1019
|
+
}
|
|
453
1020
|
|
|
454
|
-
#define HUF_FLUSHBITS_2(stream) \
|
|
455
|
-
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
|
456
1021
|
|
|
457
1022
|
FORCE_INLINE_TEMPLATE size_t
|
|
458
1023
|
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
|
459
1024
|
const void* src, size_t srcSize,
|
|
460
1025
|
const HUF_CElt* CTable)
|
|
461
1026
|
{
|
|
1027
|
+
U32 const tableLog = (U32)CTable[0];
|
|
1028
|
+
HUF_CElt const* ct = CTable + 1;
|
|
462
1029
|
const BYTE* ip = (const BYTE*) src;
|
|
463
1030
|
BYTE* const ostart = (BYTE*)dst;
|
|
464
1031
|
BYTE* const oend = ostart + dstSize;
|
|
465
1032
|
BYTE* op = ostart;
|
|
466
|
-
|
|
467
|
-
BIT_CStream_t bitC;
|
|
1033
|
+
HUF_CStream_t bitC;
|
|
468
1034
|
|
|
469
1035
|
/* init */
|
|
470
1036
|
if (dstSize < 8) return 0; /* not enough space to compress */
|
|
471
|
-
{ size_t const initErr =
|
|
1037
|
+
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
|
472
1038
|
if (HUF_isError(initErr)) return 0; }
|
|
473
1039
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
{
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
1040
|
+
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
|
1041
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
|
1042
|
+
else {
|
|
1043
|
+
if (MEM_32bits()) {
|
|
1044
|
+
switch (tableLog) {
|
|
1045
|
+
case 11:
|
|
1046
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1047
|
+
break;
|
|
1048
|
+
case 10: ZSTD_FALLTHROUGH;
|
|
1049
|
+
case 9: ZSTD_FALLTHROUGH;
|
|
1050
|
+
case 8:
|
|
1051
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1052
|
+
break;
|
|
1053
|
+
case 7: ZSTD_FALLTHROUGH;
|
|
1054
|
+
default:
|
|
1055
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1056
|
+
break;
|
|
1057
|
+
}
|
|
1058
|
+
} else {
|
|
1059
|
+
switch (tableLog) {
|
|
1060
|
+
case 11:
|
|
1061
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1062
|
+
break;
|
|
1063
|
+
case 10:
|
|
1064
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1065
|
+
break;
|
|
1066
|
+
case 9:
|
|
1067
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1068
|
+
break;
|
|
1069
|
+
case 8:
|
|
1070
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1071
|
+
break;
|
|
1072
|
+
case 7:
|
|
1073
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1074
|
+
break;
|
|
1075
|
+
case 6: ZSTD_FALLTHROUGH;
|
|
1076
|
+
default:
|
|
1077
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1078
|
+
break;
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
499
1081
|
}
|
|
1082
|
+
assert(bitC.ptr <= bitC.endPtr);
|
|
500
1083
|
|
|
501
|
-
return
|
|
1084
|
+
return HUF_closeCStream(&bitC);
|
|
502
1085
|
}
|
|
503
1086
|
|
|
504
1087
|
#if DYNAMIC_BMI2
|
|
505
1088
|
|
|
506
|
-
static
|
|
1089
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
507
1090
|
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
|
508
1091
|
const void* src, size_t srcSize,
|
|
509
1092
|
const HUF_CElt* CTable)
|
|
@@ -522,9 +1105,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
|
|
|
522
1105
|
static size_t
|
|
523
1106
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
524
1107
|
const void* src, size_t srcSize,
|
|
525
|
-
const HUF_CElt* CTable, const int
|
|
1108
|
+
const HUF_CElt* CTable, const int flags)
|
|
526
1109
|
{
|
|
527
|
-
if (
|
|
1110
|
+
if (flags & HUF_flags_bmi2) {
|
|
528
1111
|
return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
|
|
529
1112
|
}
|
|
530
1113
|
return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
|
|
@@ -535,24 +1118,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
535
1118
|
static size_t
|
|
536
1119
|
HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
537
1120
|
const void* src, size_t srcSize,
|
|
538
|
-
const HUF_CElt* CTable, const int
|
|
1121
|
+
const HUF_CElt* CTable, const int flags)
|
|
539
1122
|
{
|
|
540
|
-
(void)
|
|
1123
|
+
(void)flags;
|
|
541
1124
|
return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
|
|
542
1125
|
}
|
|
543
1126
|
|
|
544
1127
|
#endif
|
|
545
1128
|
|
|
546
|
-
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
1129
|
+
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
|
547
1130
|
{
|
|
548
|
-
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
|
1131
|
+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
|
549
1132
|
}
|
|
550
1133
|
|
|
551
|
-
|
|
552
1134
|
static size_t
|
|
553
1135
|
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
554
1136
|
const void* src, size_t srcSize,
|
|
555
|
-
const HUF_CElt* CTable, int
|
|
1137
|
+
const HUF_CElt* CTable, int flags)
|
|
556
1138
|
{
|
|
557
1139
|
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
|
|
558
1140
|
const BYTE* ip = (const BYTE*) src;
|
|
@@ -566,27 +1148,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
566
1148
|
op += 6; /* jumpTable */
|
|
567
1149
|
|
|
568
1150
|
assert(op <= oend);
|
|
569
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
570
|
-
if (cSize==0) return 0;
|
|
571
|
-
assert(cSize <= 65535);
|
|
1151
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1152
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
572
1153
|
MEM_writeLE16(ostart, (U16)cSize);
|
|
573
1154
|
op += cSize;
|
|
574
1155
|
}
|
|
575
1156
|
|
|
576
1157
|
ip += segmentSize;
|
|
577
1158
|
assert(op <= oend);
|
|
578
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
579
|
-
if (cSize==0) return 0;
|
|
580
|
-
assert(cSize <= 65535);
|
|
1159
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1160
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
581
1161
|
MEM_writeLE16(ostart+2, (U16)cSize);
|
|
582
1162
|
op += cSize;
|
|
583
1163
|
}
|
|
584
1164
|
|
|
585
1165
|
ip += segmentSize;
|
|
586
1166
|
assert(op <= oend);
|
|
587
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable,
|
|
588
|
-
if (cSize==0) return 0;
|
|
589
|
-
assert(cSize <= 65535);
|
|
1167
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
|
|
1168
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
590
1169
|
MEM_writeLE16(ostart+4, (U16)cSize);
|
|
591
1170
|
op += cSize;
|
|
592
1171
|
}
|
|
@@ -594,17 +1173,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
594
1173
|
ip += segmentSize;
|
|
595
1174
|
assert(op <= oend);
|
|
596
1175
|
assert(ip <= iend);
|
|
597
|
-
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable,
|
|
598
|
-
if (cSize==0) return 0;
|
|
1176
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
|
|
1177
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
599
1178
|
op += cSize;
|
|
600
1179
|
}
|
|
601
1180
|
|
|
602
1181
|
return (size_t)(op-ostart);
|
|
603
1182
|
}
|
|
604
1183
|
|
|
605
|
-
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
1184
|
+
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
|
|
606
1185
|
{
|
|
607
|
-
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable,
|
|
1186
|
+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
|
|
608
1187
|
}
|
|
609
1188
|
|
|
610
1189
|
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
@@ -612,11 +1191,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
|
612
1191
|
static size_t HUF_compressCTable_internal(
|
|
613
1192
|
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
|
614
1193
|
const void* src, size_t srcSize,
|
|
615
|
-
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int
|
|
1194
|
+
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
|
|
616
1195
|
{
|
|
617
1196
|
size_t const cSize = (nbStreams==HUF_singleStream) ?
|
|
618
|
-
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
|
619
|
-
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable,
|
|
1197
|
+
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
|
|
1198
|
+
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
|
|
620
1199
|
if (HUF_isError(cSize)) { return cSize; }
|
|
621
1200
|
if (cSize==0) { return 0; } /* uncompressible */
|
|
622
1201
|
op += cSize;
|
|
@@ -628,31 +1207,111 @@ static size_t HUF_compressCTable_internal(
|
|
|
628
1207
|
|
|
629
1208
|
typedef struct {
|
|
630
1209
|
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
|
631
|
-
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX
|
|
632
|
-
|
|
1210
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
|
1211
|
+
union {
|
|
1212
|
+
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
|
1213
|
+
HUF_WriteCTableWksp writeCTable_wksp;
|
|
1214
|
+
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
|
1215
|
+
} wksps;
|
|
633
1216
|
} HUF_compress_tables_t;
|
|
634
1217
|
|
|
1218
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
|
1219
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
|
1220
|
+
|
|
1221
|
+
unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
|
|
1222
|
+
{
|
|
1223
|
+
unsigned cardinality = 0;
|
|
1224
|
+
unsigned i;
|
|
1225
|
+
|
|
1226
|
+
for (i = 0; i < maxSymbolValue + 1; i++) {
|
|
1227
|
+
if (count[i] != 0) cardinality += 1;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
return cardinality;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
unsigned HUF_minTableLog(unsigned symbolCardinality)
|
|
1234
|
+
{
|
|
1235
|
+
U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
|
|
1236
|
+
return minBitsSymbols;
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
unsigned HUF_optimalTableLog(
|
|
1240
|
+
unsigned maxTableLog,
|
|
1241
|
+
size_t srcSize,
|
|
1242
|
+
unsigned maxSymbolValue,
|
|
1243
|
+
void* workSpace, size_t wkspSize,
|
|
1244
|
+
HUF_CElt* table,
|
|
1245
|
+
const unsigned* count,
|
|
1246
|
+
int flags)
|
|
1247
|
+
{
|
|
1248
|
+
assert(srcSize > 1); /* Not supported, RLE should be used instead */
|
|
1249
|
+
assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
|
|
1250
|
+
|
|
1251
|
+
if (!(flags & HUF_flags_optimalDepth)) {
|
|
1252
|
+
/* cheap evaluation, based on FSE */
|
|
1253
|
+
return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
{ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
|
|
1257
|
+
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
|
|
1258
|
+
size_t maxBits, hSize, newSize;
|
|
1259
|
+
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
|
|
1260
|
+
const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
|
|
1261
|
+
size_t optSize = ((size_t) ~0) - 1;
|
|
1262
|
+
unsigned optLog = maxTableLog, optLogGuess;
|
|
1263
|
+
|
|
1264
|
+
DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
|
|
1265
|
+
|
|
1266
|
+
/* Search until size increases */
|
|
1267
|
+
for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
|
|
1268
|
+
DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
|
|
1269
|
+
maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
|
|
1270
|
+
if (ERR_isError(maxBits)) continue;
|
|
1271
|
+
|
|
1272
|
+
if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
|
|
1273
|
+
|
|
1274
|
+
hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
|
|
1275
|
+
|
|
1276
|
+
if (ERR_isError(hSize)) continue;
|
|
1277
|
+
|
|
1278
|
+
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
|
|
1279
|
+
|
|
1280
|
+
if (newSize > optSize + 1) {
|
|
1281
|
+
break;
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
if (newSize < optSize) {
|
|
1285
|
+
optSize = newSize;
|
|
1286
|
+
optLog = optLogGuess;
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
assert(optLog <= HUF_TABLELOG_MAX);
|
|
1290
|
+
return optLog;
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
|
|
635
1294
|
/* HUF_compress_internal() :
|
|
636
|
-
* `
|
|
1295
|
+
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
|
1296
|
+
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
|
637
1297
|
static size_t
|
|
638
1298
|
HUF_compress_internal (void* dst, size_t dstSize,
|
|
639
1299
|
const void* src, size_t srcSize,
|
|
640
1300
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
641
1301
|
HUF_nbStreams_e nbStreams,
|
|
642
1302
|
void* workSpace, size_t wkspSize,
|
|
643
|
-
HUF_CElt* oldHufTable, HUF_repeat* repeat, int
|
|
644
|
-
const int bmi2)
|
|
1303
|
+
HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
|
|
645
1304
|
{
|
|
646
|
-
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
|
|
1305
|
+
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
|
647
1306
|
BYTE* const ostart = (BYTE*)dst;
|
|
648
1307
|
BYTE* const oend = ostart + dstSize;
|
|
649
1308
|
BYTE* op = ostart;
|
|
650
1309
|
|
|
651
|
-
|
|
1310
|
+
DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
|
|
1311
|
+
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
|
652
1312
|
|
|
653
1313
|
/* checks & inits */
|
|
654
|
-
if (
|
|
655
|
-
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
|
1314
|
+
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
|
656
1315
|
if (!srcSize) return 0; /* Uncompressed */
|
|
657
1316
|
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
|
658
1317
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
|
@@ -662,17 +1321,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
662
1321
|
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
|
|
663
1322
|
|
|
664
1323
|
/* Heuristic : If old table is valid, use it for small inputs */
|
|
665
|
-
if (
|
|
1324
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
|
|
666
1325
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
667
1326
|
src, srcSize,
|
|
668
|
-
nbStreams, oldHufTable,
|
|
1327
|
+
nbStreams, oldHufTable, flags);
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
/* If uncompressible data is suspected, do a smaller sampling first */
|
|
1331
|
+
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
|
1332
|
+
if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
|
1333
|
+
size_t largestTotal = 0;
|
|
1334
|
+
DEBUGLOG(5, "input suspected incompressible : sampling to check");
|
|
1335
|
+
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
|
1336
|
+
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1337
|
+
largestTotal += largestBegin;
|
|
1338
|
+
}
|
|
1339
|
+
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
|
1340
|
+
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1341
|
+
largestTotal += largestEnd;
|
|
1342
|
+
}
|
|
1343
|
+
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
669
1344
|
}
|
|
670
1345
|
|
|
671
1346
|
/* Scan input and build symbol stats */
|
|
672
|
-
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize,
|
|
1347
|
+
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
|
673
1348
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
|
674
1349
|
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
675
1350
|
}
|
|
1351
|
+
DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
|
|
676
1352
|
|
|
677
1353
|
/* Check validity of previous table */
|
|
678
1354
|
if ( repeat
|
|
@@ -681,26 +1357,31 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
681
1357
|
*repeat = HUF_repeat_none;
|
|
682
1358
|
}
|
|
683
1359
|
/* Heuristic : use existing table for small inputs */
|
|
684
|
-
if (
|
|
1360
|
+
if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
|
|
685
1361
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
686
1362
|
src, srcSize,
|
|
687
|
-
nbStreams, oldHufTable,
|
|
1363
|
+
nbStreams, oldHufTable, flags);
|
|
688
1364
|
}
|
|
689
1365
|
|
|
690
1366
|
/* Build Huffman Tree */
|
|
691
|
-
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
1367
|
+
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
|
|
692
1368
|
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
|
|
693
1369
|
maxSymbolValue, huffLog,
|
|
694
|
-
&table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
|
|
1370
|
+
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
|
695
1371
|
CHECK_F(maxBits);
|
|
696
1372
|
huffLog = (U32)maxBits;
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
1373
|
+
DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
|
|
1374
|
+
}
|
|
1375
|
+
/* Zero unused symbols in CTable, so we can check it for validity */
|
|
1376
|
+
{
|
|
1377
|
+
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
|
1378
|
+
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
|
1379
|
+
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
|
700
1380
|
}
|
|
701
1381
|
|
|
702
1382
|
/* Write table description header */
|
|
703
|
-
{ CHECK_V_F(hSize,
|
|
1383
|
+
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
|
|
1384
|
+
&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
|
|
704
1385
|
/* Check if using previous huffman table is beneficial */
|
|
705
1386
|
if (repeat && *repeat != HUF_repeat_none) {
|
|
706
1387
|
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
|
|
@@ -708,7 +1389,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
708
1389
|
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
|
709
1390
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
710
1391
|
src, srcSize,
|
|
711
|
-
nbStreams, oldHufTable,
|
|
1392
|
+
nbStreams, oldHufTable, flags);
|
|
712
1393
|
} }
|
|
713
1394
|
|
|
714
1395
|
/* Use the new huffman table */
|
|
@@ -716,83 +1397,39 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
716
1397
|
op += hSize;
|
|
717
1398
|
if (repeat) { *repeat = HUF_repeat_none; }
|
|
718
1399
|
if (oldHufTable)
|
|
719
|
-
|
|
1400
|
+
ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
|
|
720
1401
|
}
|
|
721
1402
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
722
1403
|
src, srcSize,
|
|
723
|
-
nbStreams, table->CTable,
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
728
|
-
const void* src, size_t srcSize,
|
|
729
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
|
730
|
-
void* workSpace, size_t wkspSize)
|
|
731
|
-
{
|
|
732
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
733
|
-
maxSymbolValue, huffLog, HUF_singleStream,
|
|
734
|
-
workSpace, wkspSize,
|
|
735
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
|
1404
|
+
nbStreams, table->CTable, flags);
|
|
736
1405
|
}
|
|
737
1406
|
|
|
738
1407
|
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
|
739
1408
|
const void* src, size_t srcSize,
|
|
740
1409
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
741
1410
|
void* workSpace, size_t wkspSize,
|
|
742
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
|
1411
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
|
743
1412
|
{
|
|
1413
|
+
DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
|
|
744
1414
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
745
1415
|
maxSymbolValue, huffLog, HUF_singleStream,
|
|
746
1416
|
workSpace, wkspSize, hufTable,
|
|
747
|
-
repeat,
|
|
748
|
-
}
|
|
749
|
-
|
|
750
|
-
size_t HUF_compress1X (void* dst, size_t dstSize,
|
|
751
|
-
const void* src, size_t srcSize,
|
|
752
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
753
|
-
{
|
|
754
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
755
|
-
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
756
|
-
}
|
|
757
|
-
|
|
758
|
-
/* HUF_compress4X_repeat():
|
|
759
|
-
* compress input using 4 streams.
|
|
760
|
-
* provide workspace to generate compression tables */
|
|
761
|
-
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
762
|
-
const void* src, size_t srcSize,
|
|
763
|
-
unsigned maxSymbolValue, unsigned huffLog,
|
|
764
|
-
void* workSpace, size_t wkspSize)
|
|
765
|
-
{
|
|
766
|
-
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
767
|
-
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
768
|
-
workSpace, wkspSize,
|
|
769
|
-
NULL, NULL, 0, 0 /*bmi2*/);
|
|
1417
|
+
repeat, flags);
|
|
770
1418
|
}
|
|
771
1419
|
|
|
772
1420
|
/* HUF_compress4X_repeat():
|
|
773
1421
|
* compress input using 4 streams.
|
|
1422
|
+
* consider skipping quickly
|
|
774
1423
|
* re-use an existing huffman compression table */
|
|
775
1424
|
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
|
776
1425
|
const void* src, size_t srcSize,
|
|
777
1426
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
778
1427
|
void* workSpace, size_t wkspSize,
|
|
779
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int
|
|
1428
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
|
|
780
1429
|
{
|
|
1430
|
+
DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
|
|
781
1431
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
782
1432
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
783
1433
|
workSpace, wkspSize,
|
|
784
|
-
hufTable, repeat,
|
|
785
|
-
}
|
|
786
|
-
|
|
787
|
-
size_t HUF_compress2 (void* dst, size_t dstSize,
|
|
788
|
-
const void* src, size_t srcSize,
|
|
789
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
790
|
-
{
|
|
791
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
792
|
-
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
|
796
|
-
{
|
|
797
|
-
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
|
|
1434
|
+
hufTable, repeat, flags);
|
|
798
1435
|
}
|