zstd-ruby 1.4.4.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +241 -173
- data/ext/zstdruby/libzstd/README.md +76 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
- data/ext/zstdruby/libzstd/common/compiler.h +196 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +51 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
- data/ext/zstdruby/libzstd/common/huf.h +60 -54
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +10 -8
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +760 -234
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +20 -9
- data/.travis.yml +0 -14
|
@@ -1,35 +1,15 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
* Redistributions in binary form must reproduce the above
|
|
14
|
-
copyright notice, this list of conditions and the following disclaimer
|
|
15
|
-
in the documentation and/or other materials provided with the
|
|
16
|
-
distribution.
|
|
17
|
-
|
|
18
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19
|
-
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20
|
-
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21
|
-
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
22
|
-
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
23
|
-
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
24
|
-
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
25
|
-
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
26
|
-
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
27
|
-
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
-
|
|
30
|
-
You can contact the author at :
|
|
31
|
-
- FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
32
|
-
- Public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
2
|
+
* Huffman encoder, part of New Generation Entropy library
|
|
3
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
4
|
+
*
|
|
5
|
+
* You can contact the author at :
|
|
6
|
+
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
7
|
+
* - Public forum : https://groups.google.com/forum/#!forum/lz4c
|
|
8
|
+
*
|
|
9
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
10
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
11
|
+
* in the COPYING file in the root directory of this source tree).
|
|
12
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
33
13
|
****************************************************************** */
|
|
34
14
|
|
|
35
15
|
/* **************************************************************
|
|
@@ -43,16 +23,15 @@
|
|
|
43
23
|
/* **************************************************************
|
|
44
24
|
* Includes
|
|
45
25
|
****************************************************************/
|
|
46
|
-
#include
|
|
47
|
-
#include
|
|
48
|
-
#include "
|
|
49
|
-
#include "bitstream.h"
|
|
26
|
+
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
|
|
27
|
+
#include "../common/compiler.h"
|
|
28
|
+
#include "../common/bitstream.h"
|
|
50
29
|
#include "hist.h"
|
|
51
30
|
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
|
|
52
|
-
#include "fse.h" /* header compression */
|
|
31
|
+
#include "../common/fse.h" /* header compression */
|
|
53
32
|
#define HUF_STATIC_LINKING_ONLY
|
|
54
|
-
#include "huf.h"
|
|
55
|
-
#include "error_private.h"
|
|
33
|
+
#include "../common/huf.h"
|
|
34
|
+
#include "../common/error_private.h"
|
|
56
35
|
|
|
57
36
|
|
|
58
37
|
/* **************************************************************
|
|
@@ -60,8 +39,6 @@
|
|
|
60
39
|
****************************************************************/
|
|
61
40
|
#define HUF_isError ERR_isError
|
|
62
41
|
#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
|
|
63
|
-
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
|
|
64
|
-
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
|
|
65
42
|
|
|
66
43
|
|
|
67
44
|
/* **************************************************************
|
|
@@ -76,13 +53,43 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
|
|
76
53
|
/* *******************************************************
|
|
77
54
|
* HUF : Huffman block compression
|
|
78
55
|
*********************************************************/
|
|
56
|
+
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
|
57
|
+
|
|
58
|
+
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
|
59
|
+
{
|
|
60
|
+
size_t const mask = align - 1;
|
|
61
|
+
size_t const rem = (size_t)workspace & mask;
|
|
62
|
+
size_t const add = (align - rem) & mask;
|
|
63
|
+
BYTE* const aligned = (BYTE*)workspace + add;
|
|
64
|
+
assert((align & (align - 1)) == 0); /* pow 2 */
|
|
65
|
+
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
|
66
|
+
if (*workspaceSizePtr >= add) {
|
|
67
|
+
assert(add < align);
|
|
68
|
+
assert(((size_t)aligned & mask) == 0);
|
|
69
|
+
*workspaceSizePtr -= add;
|
|
70
|
+
return aligned;
|
|
71
|
+
} else {
|
|
72
|
+
*workspaceSizePtr = 0;
|
|
73
|
+
return NULL;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
79
78
|
/* HUF_compressWeights() :
|
|
80
79
|
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
|
81
80
|
* The use case needs much less stack memory.
|
|
82
81
|
* Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
|
|
83
82
|
*/
|
|
84
83
|
#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
|
|
85
|
-
|
|
84
|
+
|
|
85
|
+
typedef struct {
|
|
86
|
+
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
|
87
|
+
U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
|
|
88
|
+
unsigned count[HUF_TABLELOG_MAX+1];
|
|
89
|
+
S16 norm[HUF_TABLELOG_MAX+1];
|
|
90
|
+
} HUF_CompressWeightsWksp;
|
|
91
|
+
|
|
92
|
+
static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
|
|
86
93
|
{
|
|
87
94
|
BYTE* const ostart = (BYTE*) dst;
|
|
88
95
|
BYTE* op = ostart;
|
|
@@ -90,69 +97,101 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
|
|
90
97
|
|
|
91
98
|
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
|
92
99
|
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
|
100
|
+
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
93
101
|
|
|
94
|
-
|
|
95
|
-
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
|
|
96
|
-
|
|
97
|
-
unsigned count[HUF_TABLELOG_MAX+1];
|
|
98
|
-
S16 norm[HUF_TABLELOG_MAX+1];
|
|
102
|
+
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
|
99
103
|
|
|
100
104
|
/* init conditions */
|
|
101
105
|
if (wtSize <= 1) return 0; /* Not compressible */
|
|
102
106
|
|
|
103
107
|
/* Scan input and build symbol stats */
|
|
104
|
-
{ unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
108
|
+
{ unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
|
|
105
109
|
if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
|
|
106
110
|
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
|
107
111
|
}
|
|
108
112
|
|
|
109
113
|
tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
|
|
110
|
-
CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
|
|
114
|
+
CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
|
|
111
115
|
|
|
112
116
|
/* Write table description header */
|
|
113
|
-
{ CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
|
|
117
|
+
{ CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
|
|
114
118
|
op += hSize;
|
|
115
119
|
}
|
|
116
120
|
|
|
117
121
|
/* Compress */
|
|
118
|
-
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
|
|
119
|
-
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
|
|
122
|
+
CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
|
|
123
|
+
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
|
|
120
124
|
if (cSize == 0) return 0; /* not enough space for compressed data */
|
|
121
125
|
op += cSize;
|
|
122
126
|
}
|
|
123
127
|
|
|
124
|
-
return op-ostart;
|
|
128
|
+
return (size_t)(op-ostart);
|
|
125
129
|
}
|
|
126
130
|
|
|
131
|
+
static size_t HUF_getNbBits(HUF_CElt elt)
|
|
132
|
+
{
|
|
133
|
+
return elt & 0xFF;
|
|
134
|
+
}
|
|
127
135
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
136
|
+
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
|
137
|
+
{
|
|
138
|
+
return elt;
|
|
139
|
+
}
|
|
132
140
|
|
|
133
|
-
|
|
134
|
-
`CTable` : Huffman tree to save, using huf representation.
|
|
135
|
-
@return : size of saved CTable */
|
|
136
|
-
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
137
|
-
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
|
141
|
+
static size_t HUF_getValue(HUF_CElt elt)
|
|
138
142
|
{
|
|
143
|
+
return elt & ~0xFF;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
static size_t HUF_getValueFast(HUF_CElt elt)
|
|
147
|
+
{
|
|
148
|
+
return elt;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
|
152
|
+
{
|
|
153
|
+
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
154
|
+
*elt = nbBits;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
|
158
|
+
{
|
|
159
|
+
size_t const nbBits = HUF_getNbBits(*elt);
|
|
160
|
+
if (nbBits > 0) {
|
|
161
|
+
assert((value >> nbBits) == 0);
|
|
162
|
+
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
typedef struct {
|
|
167
|
+
HUF_CompressWeightsWksp wksp;
|
|
139
168
|
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
|
|
140
169
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
|
|
170
|
+
} HUF_WriteCTableWksp;
|
|
171
|
+
|
|
172
|
+
size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
|
173
|
+
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
|
174
|
+
void* workspace, size_t workspaceSize)
|
|
175
|
+
{
|
|
176
|
+
HUF_CElt const* const ct = CTable + 1;
|
|
141
177
|
BYTE* op = (BYTE*)dst;
|
|
142
178
|
U32 n;
|
|
179
|
+
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
|
143
180
|
|
|
144
|
-
|
|
181
|
+
/* check conditions */
|
|
182
|
+
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
|
145
183
|
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
|
|
146
184
|
|
|
147
185
|
/* convert to weight */
|
|
148
|
-
bitsToWeight[0] = 0;
|
|
186
|
+
wksp->bitsToWeight[0] = 0;
|
|
149
187
|
for (n=1; n<huffLog+1; n++)
|
|
150
|
-
bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
188
|
+
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
|
151
189
|
for (n=0; n<maxSymbolValue; n++)
|
|
152
|
-
huffWeight[n] = bitsToWeight[
|
|
190
|
+
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
|
153
191
|
|
|
154
192
|
/* attempt weights compression by FSE */
|
|
155
|
-
|
|
193
|
+
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
|
194
|
+
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
|
156
195
|
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
|
157
196
|
op[0] = (BYTE)hSize;
|
|
158
197
|
return hSize+1;
|
|
@@ -162,45 +201,59 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
|
162
201
|
if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
|
|
163
202
|
if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
|
|
164
203
|
op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
|
|
165
|
-
huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
204
|
+
wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
|
|
166
205
|
for (n=0; n<maxSymbolValue; n+=2)
|
|
167
|
-
op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
|
|
206
|
+
op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
|
|
168
207
|
return ((maxSymbolValue+1)/2) + 1;
|
|
169
208
|
}
|
|
170
209
|
|
|
210
|
+
/*! HUF_writeCTable() :
|
|
211
|
+
`CTable` : Huffman tree to save, using huf representation.
|
|
212
|
+
@return : size of saved CTable */
|
|
213
|
+
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
|
214
|
+
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
|
215
|
+
{
|
|
216
|
+
HUF_WriteCTableWksp wksp;
|
|
217
|
+
return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
|
|
218
|
+
}
|
|
219
|
+
|
|
171
220
|
|
|
172
|
-
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
|
|
221
|
+
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
|
|
173
222
|
{
|
|
174
223
|
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
|
|
175
224
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
|
176
225
|
U32 tableLog = 0;
|
|
177
226
|
U32 nbSymbols = 0;
|
|
227
|
+
HUF_CElt* const ct = CTable + 1;
|
|
178
228
|
|
|
179
229
|
/* get symbol weights */
|
|
180
230
|
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
|
231
|
+
*hasZeroWeights = (rankVal[0] > 0);
|
|
181
232
|
|
|
182
233
|
/* check result */
|
|
183
234
|
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
|
184
235
|
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
|
185
236
|
|
|
237
|
+
CTable[0] = tableLog;
|
|
238
|
+
|
|
186
239
|
/* Prepare base value per rank */
|
|
187
240
|
{ U32 n, nextRankStart = 0;
|
|
188
241
|
for (n=1; n<=tableLog; n++) {
|
|
189
|
-
U32
|
|
242
|
+
U32 curr = nextRankStart;
|
|
190
243
|
nextRankStart += (rankVal[n] << (n-1));
|
|
191
|
-
rankVal[n] =
|
|
244
|
+
rankVal[n] = curr;
|
|
192
245
|
} }
|
|
193
246
|
|
|
194
247
|
/* fill nbBits */
|
|
195
248
|
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
|
196
249
|
const U32 w = huffWeight[n];
|
|
197
|
-
|
|
250
|
+
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
|
198
251
|
} }
|
|
199
252
|
|
|
200
253
|
/* fill val */
|
|
201
254
|
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
|
202
255
|
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
|
203
|
-
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[
|
|
256
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
|
204
257
|
/* determine stating value per rank */
|
|
205
258
|
valPerRank[tableLog+1] = 0; /* for w==0 */
|
|
206
259
|
{ U16 min = 0;
|
|
@@ -210,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
|
210
263
|
min >>= 1;
|
|
211
264
|
} }
|
|
212
265
|
/* assign value within rank, symbol order */
|
|
213
|
-
{ U32 n; for (n=0; n<nbSymbols; n++)
|
|
266
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
|
214
267
|
}
|
|
215
268
|
|
|
216
269
|
*maxSymbolValuePtr = nbSymbols - 1;
|
|
217
270
|
return readSize;
|
|
218
271
|
}
|
|
219
272
|
|
|
220
|
-
U32
|
|
273
|
+
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
|
221
274
|
{
|
|
222
|
-
const HUF_CElt*
|
|
275
|
+
const HUF_CElt* ct = CTable + 1;
|
|
223
276
|
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
|
224
|
-
return
|
|
277
|
+
return (U32)HUF_getNbBits(ct[symbolValue]);
|
|
225
278
|
}
|
|
226
279
|
|
|
227
280
|
|
|
@@ -232,156 +285,351 @@ typedef struct nodeElt_s {
|
|
|
232
285
|
BYTE nbBits;
|
|
233
286
|
} nodeElt;
|
|
234
287
|
|
|
288
|
+
/**
|
|
289
|
+
* HUF_setMaxHeight():
|
|
290
|
+
* Enforces maxNbBits on the Huffman tree described in huffNode.
|
|
291
|
+
*
|
|
292
|
+
* It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
|
|
293
|
+
* the tree to so that it is a valid canonical Huffman tree.
|
|
294
|
+
*
|
|
295
|
+
* @pre The sum of the ranks of each symbol == 2^largestBits,
|
|
296
|
+
* where largestBits == huffNode[lastNonNull].nbBits.
|
|
297
|
+
* @post The sum of the ranks of each symbol == 2^largestBits,
|
|
298
|
+
* where largestBits is the return value <= maxNbBits.
|
|
299
|
+
*
|
|
300
|
+
* @param huffNode The Huffman tree modified in place to enforce maxNbBits.
|
|
301
|
+
* @param lastNonNull The symbol with the lowest count in the Huffman tree.
|
|
302
|
+
* @param maxNbBits The maximum allowed number of bits, which the Huffman tree
|
|
303
|
+
* may not respect. After this function the Huffman tree will
|
|
304
|
+
* respect maxNbBits.
|
|
305
|
+
* @return The maximum number of bits of the Huffman tree after adjustment,
|
|
306
|
+
* necessarily no more than maxNbBits.
|
|
307
|
+
*/
|
|
235
308
|
static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
236
309
|
{
|
|
237
310
|
const U32 largestBits = huffNode[lastNonNull].nbBits;
|
|
238
|
-
|
|
311
|
+
/* early exit : no elt > maxNbBits, so the tree is already valid. */
|
|
312
|
+
if (largestBits <= maxNbBits) return largestBits;
|
|
239
313
|
|
|
240
314
|
/* there are several too large elements (at least >= 2) */
|
|
241
315
|
{ int totalCost = 0;
|
|
242
316
|
const U32 baseCost = 1 << (largestBits - maxNbBits);
|
|
243
|
-
|
|
317
|
+
int n = (int)lastNonNull;
|
|
244
318
|
|
|
319
|
+
/* Adjust any ranks > maxNbBits to maxNbBits.
|
|
320
|
+
* Compute totalCost, which is how far the sum of the ranks is
|
|
321
|
+
* we are over 2^largestBits after adjust the offending ranks.
|
|
322
|
+
*/
|
|
245
323
|
while (huffNode[n].nbBits > maxNbBits) {
|
|
246
324
|
totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
|
|
247
325
|
huffNode[n].nbBits = (BYTE)maxNbBits;
|
|
248
|
-
n
|
|
249
|
-
}
|
|
250
|
-
|
|
326
|
+
n--;
|
|
327
|
+
}
|
|
328
|
+
/* n stops at huffNode[n].nbBits <= maxNbBits */
|
|
329
|
+
assert(huffNode[n].nbBits <= maxNbBits);
|
|
330
|
+
/* n end at index of smallest symbol using < maxNbBits */
|
|
331
|
+
while (huffNode[n].nbBits == maxNbBits) --n;
|
|
251
332
|
|
|
252
|
-
/* renorm totalCost
|
|
253
|
-
|
|
333
|
+
/* renorm totalCost from 2^largestBits to 2^maxNbBits
|
|
334
|
+
* note : totalCost is necessarily a multiple of baseCost */
|
|
335
|
+
assert((totalCost & (baseCost - 1)) == 0);
|
|
336
|
+
totalCost >>= (largestBits - maxNbBits);
|
|
337
|
+
assert(totalCost > 0);
|
|
254
338
|
|
|
255
339
|
/* repay normalized cost */
|
|
256
340
|
{ U32 const noSymbol = 0xF0F0F0F0;
|
|
257
341
|
U32 rankLast[HUF_TABLELOG_MAX+2];
|
|
258
|
-
int pos;
|
|
259
342
|
|
|
260
|
-
/* Get pos of last (smallest) symbol per rank */
|
|
261
|
-
|
|
343
|
+
/* Get pos of last (smallest = lowest cum. count) symbol per rank */
|
|
344
|
+
ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
|
|
262
345
|
{ U32 currentNbBits = maxNbBits;
|
|
346
|
+
int pos;
|
|
263
347
|
for (pos=n ; pos >= 0; pos--) {
|
|
264
348
|
if (huffNode[pos].nbBits >= currentNbBits) continue;
|
|
265
349
|
currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
|
|
266
|
-
rankLast[maxNbBits-currentNbBits] = pos;
|
|
350
|
+
rankLast[maxNbBits-currentNbBits] = (U32)pos;
|
|
267
351
|
} }
|
|
268
352
|
|
|
269
353
|
while (totalCost > 0) {
|
|
270
|
-
|
|
354
|
+
/* Try to reduce the next power of 2 above totalCost because we
|
|
355
|
+
* gain back half the rank.
|
|
356
|
+
*/
|
|
357
|
+
U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
|
|
271
358
|
for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
|
|
272
|
-
U32 highPos = rankLast[nBitsToDecrease];
|
|
273
|
-
U32 lowPos = rankLast[nBitsToDecrease-1];
|
|
359
|
+
U32 const highPos = rankLast[nBitsToDecrease];
|
|
360
|
+
U32 const lowPos = rankLast[nBitsToDecrease-1];
|
|
274
361
|
if (highPos == noSymbol) continue;
|
|
362
|
+
/* Decrease highPos if no symbols of lowPos or if it is
|
|
363
|
+
* not cheaper to remove 2 lowPos than highPos.
|
|
364
|
+
*/
|
|
275
365
|
if (lowPos == noSymbol) break;
|
|
276
366
|
{ U32 const highTotal = huffNode[highPos].count;
|
|
277
367
|
U32 const lowTotal = 2 * huffNode[lowPos].count;
|
|
278
368
|
if (highTotal <= lowTotal) break;
|
|
279
369
|
} }
|
|
280
370
|
/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
|
|
371
|
+
assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
|
|
281
372
|
/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
|
|
282
373
|
while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
|
|
283
|
-
nBitsToDecrease
|
|
374
|
+
nBitsToDecrease++;
|
|
375
|
+
assert(rankLast[nBitsToDecrease] != noSymbol);
|
|
376
|
+
/* Increase the number of bits to gain back half the rank cost. */
|
|
284
377
|
totalCost -= 1 << (nBitsToDecrease-1);
|
|
378
|
+
huffNode[rankLast[nBitsToDecrease]].nbBits++;
|
|
379
|
+
|
|
380
|
+
/* Fix up the new rank.
|
|
381
|
+
* If the new rank was empty, this symbol is now its smallest.
|
|
382
|
+
* Otherwise, this symbol will be the largest in the new rank so no adjustment.
|
|
383
|
+
*/
|
|
285
384
|
if (rankLast[nBitsToDecrease-1] == noSymbol)
|
|
286
|
-
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
|
287
|
-
|
|
385
|
+
rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
|
|
386
|
+
/* Fix up the old rank.
|
|
387
|
+
* If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
|
|
388
|
+
* it must be the only symbol in its rank, so the old rank now has no symbols.
|
|
389
|
+
* Otherwise, since the Huffman nodes are sorted by count, the previous position is now
|
|
390
|
+
* the smallest node in the rank. If the previous position belongs to a different rank,
|
|
391
|
+
* then the rank is now empty.
|
|
392
|
+
*/
|
|
288
393
|
if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
|
|
289
394
|
rankLast[nBitsToDecrease] = noSymbol;
|
|
290
395
|
else {
|
|
291
396
|
rankLast[nBitsToDecrease]--;
|
|
292
397
|
if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
|
|
293
398
|
rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
|
|
294
|
-
|
|
295
|
-
|
|
399
|
+
}
|
|
400
|
+
} /* while (totalCost > 0) */
|
|
401
|
+
|
|
402
|
+
/* If we've removed too much weight, then we have to add it back.
|
|
403
|
+
* To avoid overshooting again, we only adjust the smallest rank.
|
|
404
|
+
* We take the largest nodes from the lowest rank 0 and move them
|
|
405
|
+
* to rank 1. There's guaranteed to be enough rank 0 symbols because
|
|
406
|
+
* TODO.
|
|
407
|
+
*/
|
|
296
408
|
while (totalCost < 0) { /* Sometimes, cost correction overshoot */
|
|
297
|
-
|
|
409
|
+
/* special case : no rank 1 symbol (using maxNbBits-1);
|
|
410
|
+
* let's create one from largest rank 0 (using maxNbBits).
|
|
411
|
+
*/
|
|
412
|
+
if (rankLast[1] == noSymbol) {
|
|
298
413
|
while (huffNode[n].nbBits == maxNbBits) n--;
|
|
299
414
|
huffNode[n+1].nbBits--;
|
|
300
|
-
|
|
415
|
+
assert(n >= 0);
|
|
416
|
+
rankLast[1] = (U32)(n+1);
|
|
301
417
|
totalCost++;
|
|
302
418
|
continue;
|
|
303
419
|
}
|
|
304
420
|
huffNode[ rankLast[1] + 1 ].nbBits--;
|
|
305
421
|
rankLast[1]++;
|
|
306
422
|
totalCost ++;
|
|
307
|
-
|
|
423
|
+
}
|
|
424
|
+
} /* repay normalized cost */
|
|
425
|
+
} /* there are several too large elements (at least >= 2) */
|
|
308
426
|
|
|
309
427
|
return maxNbBits;
|
|
310
428
|
}
|
|
311
429
|
|
|
312
|
-
|
|
313
430
|
typedef struct {
|
|
314
|
-
|
|
315
|
-
|
|
431
|
+
U16 base;
|
|
432
|
+
U16 curr;
|
|
316
433
|
} rankPos;
|
|
317
434
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
435
|
+
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
|
436
|
+
|
|
437
|
+
/* Number of buckets available for HUF_sort() */
|
|
438
|
+
#define RANK_POSITION_TABLE_SIZE 192
|
|
439
|
+
|
|
440
|
+
typedef struct {
|
|
441
|
+
huffNodeTable huffNodeTbl;
|
|
442
|
+
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
|
443
|
+
} HUF_buildCTable_wksp_tables;
|
|
444
|
+
|
|
445
|
+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
|
446
|
+
* Strategy is to use as many buckets as possible for representing distinct
|
|
447
|
+
* counts while using the remainder to represent all "large" counts.
|
|
448
|
+
*
|
|
449
|
+
* To satisfy this requirement for 192 buckets, we can do the following:
|
|
450
|
+
* Let buckets 0-166 represent distinct counts of [0, 166]
|
|
451
|
+
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
|
452
|
+
*/
|
|
453
|
+
#define RANK_POSITION_MAX_COUNT_LOG 32
|
|
454
|
+
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
|
|
455
|
+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
|
|
456
|
+
|
|
457
|
+
/* Return the appropriate bucket index for a given count. See definition of
|
|
458
|
+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
|
459
|
+
*/
|
|
460
|
+
static U32 HUF_getIndex(U32 const count) {
|
|
461
|
+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
|
462
|
+
? count
|
|
463
|
+
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/* Helper swap function for HUF_quickSortPartition() */
|
|
467
|
+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
|
468
|
+
nodeElt tmp = *a;
|
|
469
|
+
*a = *b;
|
|
470
|
+
*b = tmp;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/* Returns 0 if the huffNode array is not sorted by descending count */
|
|
474
|
+
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
|
475
|
+
U32 i;
|
|
476
|
+
for (i = 1; i < maxSymbolValue1; ++i) {
|
|
477
|
+
if (huffNode[i].count > huffNode[i-1].count) {
|
|
478
|
+
return 0;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
return 1;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
/* Insertion sort by descending order */
|
|
485
|
+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
|
486
|
+
int i;
|
|
487
|
+
int const size = high-low+1;
|
|
488
|
+
huffNode += low;
|
|
489
|
+
for (i = 1; i < size; ++i) {
|
|
490
|
+
nodeElt const key = huffNode[i];
|
|
491
|
+
int j = i - 1;
|
|
492
|
+
while (j >= 0 && huffNode[j].count < key.count) {
|
|
493
|
+
huffNode[j + 1] = huffNode[j];
|
|
494
|
+
j--;
|
|
495
|
+
}
|
|
496
|
+
huffNode[j + 1] = key;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/* Pivot helper function for quicksort. */
|
|
501
|
+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
|
502
|
+
/* Simply select rightmost element as pivot. "Better" selectors like
|
|
503
|
+
* median-of-three don't experimentally appear to have any benefit.
|
|
504
|
+
*/
|
|
505
|
+
U32 const pivot = arr[high].count;
|
|
506
|
+
int i = low - 1;
|
|
507
|
+
int j = low;
|
|
508
|
+
for ( ; j < high; j++) {
|
|
509
|
+
if (arr[j].count > pivot) {
|
|
510
|
+
i++;
|
|
511
|
+
HUF_swapNodes(&arr[i], &arr[j]);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
|
515
|
+
return i + 1;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
/* Classic quicksort by descending with partially iterative calls
|
|
519
|
+
* to reduce worst case callstack size.
|
|
520
|
+
*/
|
|
521
|
+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
|
522
|
+
int const kInsertionSortThreshold = 8;
|
|
523
|
+
if (high - low < kInsertionSortThreshold) {
|
|
524
|
+
HUF_insertionSort(arr, low, high);
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
while (low < high) {
|
|
528
|
+
int const idx = HUF_quickSortPartition(arr, low, high);
|
|
529
|
+
if (idx - low < high - idx) {
|
|
530
|
+
HUF_simpleQuickSort(arr, low, idx - 1);
|
|
531
|
+
low = idx + 1;
|
|
532
|
+
} else {
|
|
533
|
+
HUF_simpleQuickSort(arr, idx + 1, high);
|
|
534
|
+
high = idx - 1;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
/**
|
|
540
|
+
* HUF_sort():
|
|
541
|
+
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
|
542
|
+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
|
543
|
+
*
|
|
544
|
+
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
|
545
|
+
* Must have (maxSymbolValue + 1) entries.
|
|
546
|
+
* @param[in] count Histogram of the symbols.
|
|
547
|
+
* @param[in] maxSymbolValue Maximum symbol value.
|
|
548
|
+
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
|
549
|
+
*/
|
|
550
|
+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
|
321
551
|
U32 n;
|
|
552
|
+
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
|
553
|
+
|
|
554
|
+
/* Compute base and set curr to base.
|
|
555
|
+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
|
556
|
+
* See HUF_getIndex to see bucketing strategy.
|
|
557
|
+
* We attribute each symbol to lowerRank's base value, because we want to know where
|
|
558
|
+
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
|
559
|
+
*/
|
|
560
|
+
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
|
561
|
+
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
562
|
+
U32 lowerRank = HUF_getIndex(count[n]);
|
|
563
|
+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
|
564
|
+
rankPosition[lowerRank].base++;
|
|
565
|
+
}
|
|
322
566
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
567
|
+
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
|
568
|
+
/* Set up the rankPosition table */
|
|
569
|
+
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
|
570
|
+
rankPosition[n-1].base += rankPosition[n].base;
|
|
571
|
+
rankPosition[n-1].curr = rankPosition[n-1].base;
|
|
327
572
|
}
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
for (n=0; n
|
|
573
|
+
|
|
574
|
+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
|
575
|
+
for (n = 0; n < maxSymbolValue1; ++n) {
|
|
331
576
|
U32 const c = count[n];
|
|
332
|
-
U32 const r =
|
|
333
|
-
U32 pos =
|
|
334
|
-
|
|
335
|
-
huffNode[pos] = huffNode[pos-1];
|
|
336
|
-
pos--;
|
|
337
|
-
}
|
|
577
|
+
U32 const r = HUF_getIndex(c) + 1;
|
|
578
|
+
U32 const pos = rankPosition[r].curr++;
|
|
579
|
+
assert(pos < maxSymbolValue1);
|
|
338
580
|
huffNode[pos].count = c;
|
|
339
581
|
huffNode[pos].byte = (BYTE)n;
|
|
340
582
|
}
|
|
341
|
-
}
|
|
342
583
|
|
|
584
|
+
/* Sort each bucket. */
|
|
585
|
+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
|
586
|
+
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
|
|
587
|
+
U32 const bucketStartIdx = rankPosition[n].base;
|
|
588
|
+
if (bucketSize > 1) {
|
|
589
|
+
assert(bucketStartIdx < maxSymbolValue1);
|
|
590
|
+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
|
595
|
+
}
|
|
343
596
|
|
|
344
597
|
/** HUF_buildCTable_wksp() :
|
|
345
598
|
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
|
346
|
-
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as
|
|
599
|
+
* `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
|
|
347
600
|
*/
|
|
348
601
|
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
|
349
|
-
|
|
350
|
-
|
|
602
|
+
|
|
603
|
+
/* HUF_buildTree():
|
|
604
|
+
* Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
|
|
605
|
+
*
|
|
606
|
+
* @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
|
|
607
|
+
* @param maxSymbolValue The maximum symbol value.
|
|
608
|
+
* @return The smallest node in the Huffman tree (by count).
|
|
609
|
+
*/
|
|
610
|
+
static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
351
611
|
{
|
|
352
|
-
nodeElt* const huffNode0 =
|
|
353
|
-
|
|
354
|
-
U32 n, nonNullRank;
|
|
612
|
+
nodeElt* const huffNode0 = huffNode - 1;
|
|
613
|
+
int nonNullRank;
|
|
355
614
|
int lowS, lowN;
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
/* safety checks */
|
|
360
|
-
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
|
361
|
-
if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
|
|
362
|
-
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
|
363
|
-
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
|
|
364
|
-
memset(huffNode0, 0, sizeof(huffNodeTable));
|
|
365
|
-
|
|
366
|
-
/* sort, decreasing order */
|
|
367
|
-
HUF_sort(huffNode, count, maxSymbolValue);
|
|
368
|
-
|
|
615
|
+
int nodeNb = STARTNODE;
|
|
616
|
+
int n, nodeRoot;
|
|
369
617
|
/* init for parents */
|
|
370
|
-
nonNullRank = maxSymbolValue;
|
|
618
|
+
nonNullRank = (int)maxSymbolValue;
|
|
371
619
|
while(huffNode[nonNullRank].count == 0) nonNullRank--;
|
|
372
620
|
lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
|
|
373
621
|
huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
|
|
374
|
-
huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
|
|
622
|
+
huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
|
|
375
623
|
nodeNb++; lowS-=2;
|
|
376
624
|
for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
|
|
377
625
|
huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
|
|
378
626
|
|
|
379
627
|
/* create parents */
|
|
380
628
|
while (nodeNb <= nodeRoot) {
|
|
381
|
-
|
|
382
|
-
|
|
629
|
+
int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
|
|
630
|
+
int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
|
|
383
631
|
huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
|
|
384
|
-
huffNode[n1].parent = huffNode[n2].parent = nodeNb;
|
|
632
|
+
huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
|
|
385
633
|
nodeNb++;
|
|
386
634
|
}
|
|
387
635
|
|
|
@@ -392,126 +640,396 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
|
|
392
640
|
for (n=0; n<=nonNullRank; n++)
|
|
393
641
|
huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
|
|
394
642
|
|
|
395
|
-
|
|
396
|
-
maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
|
|
397
|
-
|
|
398
|
-
/* fill result into tree (val, nbBits) */
|
|
399
|
-
{ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
400
|
-
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
401
|
-
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
|
402
|
-
for (n=0; n<=nonNullRank; n++)
|
|
403
|
-
nbPerRank[huffNode[n].nbBits]++;
|
|
404
|
-
/* determine stating value per rank */
|
|
405
|
-
{ U16 min = 0;
|
|
406
|
-
for (n=maxNbBits; n>0; n--) {
|
|
407
|
-
valPerRank[n] = min; /* get starting value within each rank */
|
|
408
|
-
min += nbPerRank[n];
|
|
409
|
-
min >>= 1;
|
|
410
|
-
} }
|
|
411
|
-
for (n=0; n<=maxSymbolValue; n++)
|
|
412
|
-
tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
|
|
413
|
-
for (n=0; n<=maxSymbolValue; n++)
|
|
414
|
-
tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
return maxNbBits;
|
|
643
|
+
return nonNullRank;
|
|
418
644
|
}
|
|
419
645
|
|
|
420
|
-
/**
|
|
421
|
-
*
|
|
422
|
-
*
|
|
646
|
+
/**
|
|
647
|
+
* HUF_buildCTableFromTree():
|
|
648
|
+
* Build the CTable given the Huffman tree in huffNode.
|
|
649
|
+
*
|
|
650
|
+
* @param[out] CTable The output Huffman CTable.
|
|
651
|
+
* @param huffNode The Huffman tree.
|
|
652
|
+
* @param nonNullRank The last and smallest node in the Huffman tree.
|
|
653
|
+
* @param maxSymbolValue The maximum symbol value.
|
|
654
|
+
* @param maxNbBits The exact maximum number of bits used in the Huffman tree.
|
|
423
655
|
*/
|
|
424
|
-
|
|
656
|
+
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
|
425
657
|
{
|
|
426
|
-
|
|
427
|
-
|
|
658
|
+
HUF_CElt* const ct = CTable + 1;
|
|
659
|
+
/* fill result into ctable (val, nbBits) */
|
|
660
|
+
int n;
|
|
661
|
+
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
662
|
+
U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
|
|
663
|
+
int const alphabetSize = (int)(maxSymbolValue + 1);
|
|
664
|
+
for (n=0; n<=nonNullRank; n++)
|
|
665
|
+
nbPerRank[huffNode[n].nbBits]++;
|
|
666
|
+
/* determine starting value per rank */
|
|
667
|
+
{ U16 min = 0;
|
|
668
|
+
for (n=(int)maxNbBits; n>0; n--) {
|
|
669
|
+
valPerRank[n] = min; /* get starting value within each rank */
|
|
670
|
+
min += nbPerRank[n];
|
|
671
|
+
min >>= 1;
|
|
672
|
+
} }
|
|
673
|
+
for (n=0; n<alphabetSize; n++)
|
|
674
|
+
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
|
675
|
+
for (n=0; n<alphabetSize; n++)
|
|
676
|
+
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
|
677
|
+
CTable[0] = maxNbBits;
|
|
428
678
|
}
|
|
429
679
|
|
|
430
|
-
|
|
680
|
+
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
|
431
681
|
{
|
|
682
|
+
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
|
683
|
+
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
|
684
|
+
nodeElt* const huffNode = huffNode0+1;
|
|
685
|
+
int nonNullRank;
|
|
686
|
+
|
|
687
|
+
/* safety checks */
|
|
688
|
+
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
|
689
|
+
return ERROR(workSpace_tooSmall);
|
|
690
|
+
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
|
691
|
+
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
|
|
692
|
+
return ERROR(maxSymbolValue_tooLarge);
|
|
693
|
+
ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
|
|
694
|
+
|
|
695
|
+
/* sort, decreasing order */
|
|
696
|
+
HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
|
|
697
|
+
|
|
698
|
+
/* build tree */
|
|
699
|
+
nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
|
|
700
|
+
|
|
701
|
+
/* enforce maxTableLog */
|
|
702
|
+
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
|
703
|
+
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
|
704
|
+
|
|
705
|
+
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
|
706
|
+
|
|
707
|
+
return maxNbBits;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
|
711
|
+
{
|
|
712
|
+
HUF_CElt const* ct = CTable + 1;
|
|
432
713
|
size_t nbBits = 0;
|
|
433
714
|
int s;
|
|
434
715
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
435
|
-
nbBits +=
|
|
716
|
+
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
|
436
717
|
}
|
|
437
718
|
return nbBits >> 3;
|
|
438
719
|
}
|
|
439
720
|
|
|
440
|
-
|
|
721
|
+
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
|
722
|
+
HUF_CElt const* ct = CTable + 1;
|
|
441
723
|
int bad = 0;
|
|
442
724
|
int s;
|
|
443
725
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
|
444
|
-
bad |= (count[s] != 0) & (
|
|
726
|
+
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
|
445
727
|
}
|
|
446
728
|
return !bad;
|
|
447
729
|
}
|
|
448
730
|
|
|
449
731
|
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
|
450
732
|
|
|
733
|
+
/** HUF_CStream_t:
|
|
734
|
+
* Huffman uses its own BIT_CStream_t implementation.
|
|
735
|
+
* There are three major differences from BIT_CStream_t:
|
|
736
|
+
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
|
737
|
+
* the pair (nbBits, value) in the format:
|
|
738
|
+
* format:
|
|
739
|
+
* - Bits [0, 4) = nbBits
|
|
740
|
+
* - Bits [4, 64 - nbBits) = 0
|
|
741
|
+
* - Bits [64 - nbBits, 64) = value
|
|
742
|
+
* 2. The bitContainer is built from the upper bits and
|
|
743
|
+
* right shifted. E.g. to add a new value of N bits
|
|
744
|
+
* you right shift the bitContainer by N, then or in
|
|
745
|
+
* the new value into the N upper bits.
|
|
746
|
+
* 3. The bitstream has two bit containers. You can add
|
|
747
|
+
* bits to the second container and merge them into
|
|
748
|
+
* the first container.
|
|
749
|
+
*/
|
|
750
|
+
|
|
751
|
+
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
|
752
|
+
|
|
753
|
+
typedef struct {
|
|
754
|
+
size_t bitContainer[2];
|
|
755
|
+
size_t bitPos[2];
|
|
756
|
+
|
|
757
|
+
BYTE* startPtr;
|
|
758
|
+
BYTE* ptr;
|
|
759
|
+
BYTE* endPtr;
|
|
760
|
+
} HUF_CStream_t;
|
|
761
|
+
|
|
762
|
+
/**! HUF_initCStream():
|
|
763
|
+
* Initializes the bitstream.
|
|
764
|
+
* @returns 0 or an error code.
|
|
765
|
+
*/
|
|
766
|
+
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
|
767
|
+
void* startPtr, size_t dstCapacity)
|
|
768
|
+
{
|
|
769
|
+
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
|
770
|
+
bitC->startPtr = (BYTE*)startPtr;
|
|
771
|
+
bitC->ptr = bitC->startPtr;
|
|
772
|
+
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
|
773
|
+
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
|
774
|
+
return 0;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
/*! HUF_addBits():
|
|
778
|
+
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
|
779
|
+
*
|
|
780
|
+
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
|
781
|
+
* See the HUF_CStream_t docs for the format.
|
|
782
|
+
* @param idx Insert into the bitstream at this idx.
|
|
783
|
+
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
|
784
|
+
* to have at least 4 unused bits after this call it may be 1,
|
|
785
|
+
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
|
786
|
+
*/
|
|
787
|
+
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
|
788
|
+
{
|
|
789
|
+
assert(idx <= 1);
|
|
790
|
+
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
|
791
|
+
/* This is efficient on x86-64 with BMI2 because shrx
|
|
792
|
+
* only reads the low 6 bits of the register. The compiler
|
|
793
|
+
* knows this and elides the mask. When fast is set,
|
|
794
|
+
* every operation can use the same value loaded from elt.
|
|
795
|
+
*/
|
|
796
|
+
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
|
797
|
+
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
|
798
|
+
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
|
799
|
+
* doesn't matter that the high bits have noise from the value.
|
|
800
|
+
*/
|
|
801
|
+
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
|
802
|
+
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
803
|
+
/* The last 4-bits of elt are dirty if fast is set,
|
|
804
|
+
* so we must not be overwriting bits that have already been
|
|
805
|
+
* inserted into the bit container.
|
|
806
|
+
*/
|
|
807
|
+
#if DEBUGLEVEL >= 1
|
|
808
|
+
{
|
|
809
|
+
size_t const nbBits = HUF_getNbBits(elt);
|
|
810
|
+
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
|
|
811
|
+
(void)dirtyBits;
|
|
812
|
+
/* Middle bits are 0. */
|
|
813
|
+
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
|
814
|
+
/* We didn't overwrite any bits in the bit container. */
|
|
815
|
+
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
816
|
+
(void)dirtyBits;
|
|
817
|
+
}
|
|
818
|
+
#endif
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
|
822
|
+
{
|
|
823
|
+
bitC->bitContainer[1] = 0;
|
|
824
|
+
bitC->bitPos[1] = 0;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
/*! HUF_mergeIndex1() :
|
|
828
|
+
* Merges the bit container @ index 1 into the bit container @ index 0
|
|
829
|
+
* and zeros the bit container @ index 1.
|
|
830
|
+
*/
|
|
831
|
+
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
|
832
|
+
{
|
|
833
|
+
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
|
834
|
+
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
|
835
|
+
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
|
836
|
+
bitC->bitPos[0] += bitC->bitPos[1];
|
|
837
|
+
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
/*! HUF_flushBits() :
|
|
841
|
+
* Flushes the bits in the bit container @ index 0.
|
|
842
|
+
*
|
|
843
|
+
* @post bitPos will be < 8.
|
|
844
|
+
* @param kFast If kFast is set then we must know a-priori that
|
|
845
|
+
* the bit container will not overflow.
|
|
846
|
+
*/
|
|
847
|
+
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
|
848
|
+
{
|
|
849
|
+
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
|
850
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
851
|
+
size_t const nbBytes = nbBits >> 3;
|
|
852
|
+
/* The top nbBits bits of bitContainer are the ones we need. */
|
|
853
|
+
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
|
854
|
+
/* Mask bitPos to account for the bytes we consumed. */
|
|
855
|
+
bitC->bitPos[0] &= 7;
|
|
856
|
+
assert(nbBits > 0);
|
|
857
|
+
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
|
858
|
+
assert(bitC->ptr <= bitC->endPtr);
|
|
859
|
+
MEM_writeLEST(bitC->ptr, bitContainer);
|
|
860
|
+
bitC->ptr += nbBytes;
|
|
861
|
+
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
|
862
|
+
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
|
863
|
+
/* bitContainer doesn't need to be modified because the leftover
|
|
864
|
+
* bits are already the top bitPos bits. And we don't care about
|
|
865
|
+
* noise in the lower values.
|
|
866
|
+
*/
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
/*! HUF_endMark()
|
|
870
|
+
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
|
871
|
+
*/
|
|
872
|
+
static HUF_CElt HUF_endMark(void)
|
|
873
|
+
{
|
|
874
|
+
HUF_CElt endMark;
|
|
875
|
+
HUF_setNbBits(&endMark, 1);
|
|
876
|
+
HUF_setValue(&endMark, 1);
|
|
877
|
+
return endMark;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/*! HUF_closeCStream() :
|
|
881
|
+
* @return Size of CStream, in bytes,
|
|
882
|
+
* or 0 if it could not fit into dstBuffer */
|
|
883
|
+
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
|
884
|
+
{
|
|
885
|
+
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
|
886
|
+
HUF_flushBits(bitC, /* kFast */ 0);
|
|
887
|
+
{
|
|
888
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
|
889
|
+
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
|
890
|
+
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
|
|
451
894
|
FORCE_INLINE_TEMPLATE void
|
|
452
|
-
HUF_encodeSymbol(
|
|
895
|
+
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
|
453
896
|
{
|
|
454
|
-
|
|
897
|
+
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
|
455
898
|
}
|
|
456
899
|
|
|
457
|
-
|
|
900
|
+
FORCE_INLINE_TEMPLATE void
|
|
901
|
+
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
|
902
|
+
const BYTE* ip, size_t srcSize,
|
|
903
|
+
const HUF_CElt* ct,
|
|
904
|
+
int kUnroll, int kFastFlush, int kLastFast)
|
|
905
|
+
{
|
|
906
|
+
/* Join to kUnroll */
|
|
907
|
+
int n = (int)srcSize;
|
|
908
|
+
int rem = n % kUnroll;
|
|
909
|
+
if (rem > 0) {
|
|
910
|
+
for (; rem > 0; --rem) {
|
|
911
|
+
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
|
912
|
+
}
|
|
913
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
914
|
+
}
|
|
915
|
+
assert(n % kUnroll == 0);
|
|
916
|
+
|
|
917
|
+
/* Join to 2 * kUnroll */
|
|
918
|
+
if (n % (2 * kUnroll)) {
|
|
919
|
+
int u;
|
|
920
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
921
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
|
922
|
+
}
|
|
923
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
|
924
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
925
|
+
n -= kUnroll;
|
|
926
|
+
}
|
|
927
|
+
assert(n % (2 * kUnroll) == 0);
|
|
458
928
|
|
|
459
|
-
|
|
460
|
-
|
|
929
|
+
for (; n>0; n-= 2 * kUnroll) {
|
|
930
|
+
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
|
931
|
+
int u;
|
|
932
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
933
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
|
934
|
+
}
|
|
935
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
|
936
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
937
|
+
/* Encode kUnroll symbols into the bitstream @ index 1.
|
|
938
|
+
* This allows us to start filling the bit container
|
|
939
|
+
* without any data dependencies.
|
|
940
|
+
*/
|
|
941
|
+
HUF_zeroIndex1(bitC);
|
|
942
|
+
for (u = 1; u < kUnroll; ++u) {
|
|
943
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
|
944
|
+
}
|
|
945
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
|
946
|
+
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
|
947
|
+
HUF_mergeIndex1(bitC);
|
|
948
|
+
HUF_flushBits(bitC, kFastFlush);
|
|
949
|
+
}
|
|
950
|
+
assert(n == 0);
|
|
951
|
+
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
/**
|
|
955
|
+
* Returns a tight upper bound on the output space needed by Huffman
|
|
956
|
+
* with 8 bytes buffer to handle over-writes. If the output is at least
|
|
957
|
+
* this large we don't need to do bounds checks during Huffman encoding.
|
|
958
|
+
*/
|
|
959
|
+
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
|
960
|
+
{
|
|
961
|
+
return ((srcSize * tableLog) >> 3) + 8;
|
|
962
|
+
}
|
|
461
963
|
|
|
462
|
-
#define HUF_FLUSHBITS_2(stream) \
|
|
463
|
-
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
|
464
964
|
|
|
465
965
|
FORCE_INLINE_TEMPLATE size_t
|
|
466
966
|
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
|
467
967
|
const void* src, size_t srcSize,
|
|
468
968
|
const HUF_CElt* CTable)
|
|
469
969
|
{
|
|
970
|
+
U32 const tableLog = (U32)CTable[0];
|
|
971
|
+
HUF_CElt const* ct = CTable + 1;
|
|
470
972
|
const BYTE* ip = (const BYTE*) src;
|
|
471
973
|
BYTE* const ostart = (BYTE*)dst;
|
|
472
974
|
BYTE* const oend = ostart + dstSize;
|
|
473
975
|
BYTE* op = ostart;
|
|
474
|
-
|
|
475
|
-
BIT_CStream_t bitC;
|
|
976
|
+
HUF_CStream_t bitC;
|
|
476
977
|
|
|
477
978
|
/* init */
|
|
478
979
|
if (dstSize < 8) return 0; /* not enough space to compress */
|
|
479
|
-
{ size_t const initErr =
|
|
980
|
+
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
|
480
981
|
if (HUF_isError(initErr)) return 0; }
|
|
481
982
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
{
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
983
|
+
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
|
984
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
|
985
|
+
else {
|
|
986
|
+
if (MEM_32bits()) {
|
|
987
|
+
switch (tableLog) {
|
|
988
|
+
case 11:
|
|
989
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
990
|
+
break;
|
|
991
|
+
case 10: ZSTD_FALLTHROUGH;
|
|
992
|
+
case 9: ZSTD_FALLTHROUGH;
|
|
993
|
+
case 8:
|
|
994
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
995
|
+
break;
|
|
996
|
+
case 7: ZSTD_FALLTHROUGH;
|
|
997
|
+
default:
|
|
998
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
999
|
+
break;
|
|
1000
|
+
}
|
|
1001
|
+
} else {
|
|
1002
|
+
switch (tableLog) {
|
|
1003
|
+
case 11:
|
|
1004
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1005
|
+
break;
|
|
1006
|
+
case 10:
|
|
1007
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1008
|
+
break;
|
|
1009
|
+
case 9:
|
|
1010
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1011
|
+
break;
|
|
1012
|
+
case 8:
|
|
1013
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1014
|
+
break;
|
|
1015
|
+
case 7:
|
|
1016
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
|
1017
|
+
break;
|
|
1018
|
+
case 6: ZSTD_FALLTHROUGH;
|
|
1019
|
+
default:
|
|
1020
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
|
1021
|
+
break;
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
507
1024
|
}
|
|
1025
|
+
assert(bitC.ptr <= bitC.endPtr);
|
|
508
1026
|
|
|
509
|
-
return
|
|
1027
|
+
return HUF_closeCStream(&bitC);
|
|
510
1028
|
}
|
|
511
1029
|
|
|
512
1030
|
#if DYNAMIC_BMI2
|
|
513
1031
|
|
|
514
|
-
static
|
|
1032
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
|
515
1033
|
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
|
516
1034
|
const void* src, size_t srcSize,
|
|
517
1035
|
const HUF_CElt* CTable)
|
|
@@ -553,9 +1071,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
553
1071
|
|
|
554
1072
|
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
555
1073
|
{
|
|
556
|
-
return
|
|
1074
|
+
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
|
557
1075
|
}
|
|
558
1076
|
|
|
1077
|
+
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
|
1078
|
+
{
|
|
1079
|
+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
|
1080
|
+
}
|
|
559
1081
|
|
|
560
1082
|
static size_t
|
|
561
1083
|
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
@@ -573,41 +1095,48 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
|
573
1095
|
if (srcSize < 12) return 0; /* no saving possible : too small input */
|
|
574
1096
|
op += 6; /* jumpTable */
|
|
575
1097
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
1098
|
+
assert(op <= oend);
|
|
1099
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
|
1100
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
579
1101
|
MEM_writeLE16(ostart, (U16)cSize);
|
|
580
1102
|
op += cSize;
|
|
581
1103
|
}
|
|
582
1104
|
|
|
583
1105
|
ip += segmentSize;
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
1106
|
+
assert(op <= oend);
|
|
1107
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
|
1108
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
587
1109
|
MEM_writeLE16(ostart+2, (U16)cSize);
|
|
588
1110
|
op += cSize;
|
|
589
1111
|
}
|
|
590
1112
|
|
|
591
1113
|
ip += segmentSize;
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
1114
|
+
assert(op <= oend);
|
|
1115
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
|
1116
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
595
1117
|
MEM_writeLE16(ostart+4, (U16)cSize);
|
|
596
1118
|
op += cSize;
|
|
597
1119
|
}
|
|
598
1120
|
|
|
599
1121
|
ip += segmentSize;
|
|
600
|
-
|
|
601
|
-
|
|
1122
|
+
assert(op <= oend);
|
|
1123
|
+
assert(ip <= iend);
|
|
1124
|
+
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
|
|
1125
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
|
602
1126
|
op += cSize;
|
|
603
1127
|
}
|
|
604
1128
|
|
|
605
|
-
return op-ostart;
|
|
1129
|
+
return (size_t)(op-ostart);
|
|
606
1130
|
}
|
|
607
1131
|
|
|
608
1132
|
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
|
609
1133
|
{
|
|
610
|
-
return
|
|
1134
|
+
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
|
1138
|
+
{
|
|
1139
|
+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
|
611
1140
|
}
|
|
612
1141
|
|
|
613
1142
|
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
|
@@ -618,24 +1147,33 @@ static size_t HUF_compressCTable_internal(
|
|
|
618
1147
|
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
|
|
619
1148
|
{
|
|
620
1149
|
size_t const cSize = (nbStreams==HUF_singleStream) ?
|
|
621
|
-
HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
|
|
622
|
-
HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
|
|
1150
|
+
HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
|
|
1151
|
+
HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
|
|
623
1152
|
if (HUF_isError(cSize)) { return cSize; }
|
|
624
1153
|
if (cSize==0) { return 0; } /* uncompressible */
|
|
625
1154
|
op += cSize;
|
|
626
1155
|
/* check compressibility */
|
|
1156
|
+
assert(op >= ostart);
|
|
627
1157
|
if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
|
|
628
|
-
return op-ostart;
|
|
1158
|
+
return (size_t)(op-ostart);
|
|
629
1159
|
}
|
|
630
1160
|
|
|
631
1161
|
typedef struct {
|
|
632
1162
|
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
|
633
|
-
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX
|
|
634
|
-
|
|
1163
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
|
1164
|
+
union {
|
|
1165
|
+
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
|
1166
|
+
HUF_WriteCTableWksp writeCTable_wksp;
|
|
1167
|
+
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
|
1168
|
+
} wksps;
|
|
635
1169
|
} HUF_compress_tables_t;
|
|
636
1170
|
|
|
1171
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
|
1172
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
|
1173
|
+
|
|
637
1174
|
/* HUF_compress_internal() :
|
|
638
|
-
* `
|
|
1175
|
+
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
|
1176
|
+
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
|
639
1177
|
static size_t
|
|
640
1178
|
HUF_compress_internal (void* dst, size_t dstSize,
|
|
641
1179
|
const void* src, size_t srcSize,
|
|
@@ -643,16 +1181,17 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
643
1181
|
HUF_nbStreams_e nbStreams,
|
|
644
1182
|
void* workSpace, size_t wkspSize,
|
|
645
1183
|
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
|
646
|
-
const int bmi2)
|
|
1184
|
+
const int bmi2, unsigned suspectUncompressible)
|
|
647
1185
|
{
|
|
648
|
-
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
|
|
1186
|
+
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
|
649
1187
|
BYTE* const ostart = (BYTE*)dst;
|
|
650
1188
|
BYTE* const oend = ostart + dstSize;
|
|
651
1189
|
BYTE* op = ostart;
|
|
652
1190
|
|
|
1191
|
+
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
|
1192
|
+
|
|
653
1193
|
/* checks & inits */
|
|
654
|
-
if (
|
|
655
|
-
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
|
1194
|
+
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
|
656
1195
|
if (!srcSize) return 0; /* Uncompressed */
|
|
657
1196
|
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
|
658
1197
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
|
@@ -668,8 +1207,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
668
1207
|
nbStreams, oldHufTable, bmi2);
|
|
669
1208
|
}
|
|
670
1209
|
|
|
1210
|
+
/* If uncompressible data is suspected, do a smaller sampling first */
|
|
1211
|
+
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
|
1212
|
+
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
|
1213
|
+
size_t largestTotal = 0;
|
|
1214
|
+
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
|
1215
|
+
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1216
|
+
largestTotal += largestBegin;
|
|
1217
|
+
}
|
|
1218
|
+
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
|
1219
|
+
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
|
1220
|
+
largestTotal += largestEnd;
|
|
1221
|
+
}
|
|
1222
|
+
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
1223
|
+
}
|
|
1224
|
+
|
|
671
1225
|
/* Scan input and build symbol stats */
|
|
672
|
-
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize,
|
|
1226
|
+
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
|
673
1227
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
|
674
1228
|
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
|
675
1229
|
}
|
|
@@ -691,16 +1245,20 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
691
1245
|
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
|
692
1246
|
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
|
|
693
1247
|
maxSymbolValue, huffLog,
|
|
694
|
-
table->
|
|
1248
|
+
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
|
695
1249
|
CHECK_F(maxBits);
|
|
696
1250
|
huffLog = (U32)maxBits;
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
1251
|
+
}
|
|
1252
|
+
/* Zero unused symbols in CTable, so we can check it for validity */
|
|
1253
|
+
{
|
|
1254
|
+
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
|
1255
|
+
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
|
1256
|
+
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
|
700
1257
|
}
|
|
701
1258
|
|
|
702
1259
|
/* Write table description header */
|
|
703
|
-
{ CHECK_V_F(hSize,
|
|
1260
|
+
{ CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
|
|
1261
|
+
&table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
|
|
704
1262
|
/* Check if using previous huffman table is beneficial */
|
|
705
1263
|
if (repeat && *repeat != HUF_repeat_none) {
|
|
706
1264
|
size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
|
|
@@ -716,7 +1274,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
|
716
1274
|
op += hSize;
|
|
717
1275
|
if (repeat) { *repeat = HUF_repeat_none; }
|
|
718
1276
|
if (oldHufTable)
|
|
719
|
-
|
|
1277
|
+
ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
|
|
720
1278
|
}
|
|
721
1279
|
return HUF_compressCTable_internal(ostart, op, oend,
|
|
722
1280
|
src, srcSize,
|
|
@@ -732,27 +1290,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
|
732
1290
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
733
1291
|
maxSymbolValue, huffLog, HUF_singleStream,
|
|
734
1292
|
workSpace, wkspSize,
|
|
735
|
-
NULL, NULL, 0, 0 /*bmi2
|
|
1293
|
+
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
|
736
1294
|
}
|
|
737
1295
|
|
|
738
1296
|
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
|
739
1297
|
const void* src, size_t srcSize,
|
|
740
1298
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
741
1299
|
void* workSpace, size_t wkspSize,
|
|
742
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
|
|
1300
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
|
|
1301
|
+
int bmi2, unsigned suspectUncompressible)
|
|
743
1302
|
{
|
|
744
1303
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
745
1304
|
maxSymbolValue, huffLog, HUF_singleStream,
|
|
746
1305
|
workSpace, wkspSize, hufTable,
|
|
747
|
-
repeat, preferRepeat, bmi2);
|
|
748
|
-
}
|
|
749
|
-
|
|
750
|
-
size_t HUF_compress1X (void* dst, size_t dstSize,
|
|
751
|
-
const void* src, size_t srcSize,
|
|
752
|
-
unsigned maxSymbolValue, unsigned huffLog)
|
|
753
|
-
{
|
|
754
|
-
unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
|
|
755
|
-
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
1306
|
+
repeat, preferRepeat, bmi2, suspectUncompressible);
|
|
756
1307
|
}
|
|
757
1308
|
|
|
758
1309
|
/* HUF_compress4X_repeat():
|
|
@@ -766,29 +1317,49 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
|
766
1317
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
767
1318
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
768
1319
|
workSpace, wkspSize,
|
|
769
|
-
NULL, NULL, 0, 0 /*bmi2
|
|
1320
|
+
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
|
770
1321
|
}
|
|
771
1322
|
|
|
772
1323
|
/* HUF_compress4X_repeat():
|
|
773
1324
|
* compress input using 4 streams.
|
|
1325
|
+
* consider skipping quickly
|
|
774
1326
|
* re-use an existing huffman compression table */
|
|
775
1327
|
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
|
776
1328
|
const void* src, size_t srcSize,
|
|
777
1329
|
unsigned maxSymbolValue, unsigned huffLog,
|
|
778
1330
|
void* workSpace, size_t wkspSize,
|
|
779
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
|
1331
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
|
|
780
1332
|
{
|
|
781
1333
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
|
782
1334
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
|
783
1335
|
workSpace, wkspSize,
|
|
784
|
-
hufTable, repeat, preferRepeat, bmi2);
|
|
1336
|
+
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
|
1340
|
+
/** HUF_buildCTable() :
|
|
1341
|
+
* @return : maxNbBits
|
|
1342
|
+
* Note : count is used before tree is written, so they can safely overlap
|
|
1343
|
+
*/
|
|
1344
|
+
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
|
|
1345
|
+
{
|
|
1346
|
+
HUF_buildCTable_wksp_tables workspace;
|
|
1347
|
+
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
size_t HUF_compress1X (void* dst, size_t dstSize,
|
|
1351
|
+
const void* src, size_t srcSize,
|
|
1352
|
+
unsigned maxSymbolValue, unsigned huffLog)
|
|
1353
|
+
{
|
|
1354
|
+
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
|
1355
|
+
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
785
1356
|
}
|
|
786
1357
|
|
|
787
1358
|
size_t HUF_compress2 (void* dst, size_t dstSize,
|
|
788
1359
|
const void* src, size_t srcSize,
|
|
789
1360
|
unsigned maxSymbolValue, unsigned huffLog)
|
|
790
1361
|
{
|
|
791
|
-
|
|
1362
|
+
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
|
792
1363
|
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
|
793
1364
|
}
|
|
794
1365
|
|
|
@@ -796,3 +1367,4 @@ size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSi
|
|
|
796
1367
|
{
|
|
797
1368
|
return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
|
|
798
1369
|
}
|
|
1370
|
+
#endif
|