zstdlib 0.13.0-x86-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +6 -0
- data/CHANGES.md +107 -0
- data/Gemfile +3 -0
- data/README.md +107 -0
- data/Rakefile +59 -0
- data/ext/zstdlib_c/extconf.rb +59 -0
- data/ext/zstdlib_c/ruby/zlib-2.2/zstdlib.c +4675 -0
- data/ext/zstdlib_c/ruby/zlib-2.3/zstdlib.c +4702 -0
- data/ext/zstdlib_c/ruby/zlib-2.4/zstdlib.c +4859 -0
- data/ext/zstdlib_c/ruby/zlib-2.5/zstdlib.c +4864 -0
- data/ext/zstdlib_c/ruby/zlib-2.6/zstdlib.c +4906 -0
- data/ext/zstdlib_c/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
- data/ext/zstdlib_c/ruby/zlib-3.2/zstdlib.c +5090 -0
- data/ext/zstdlib_c/ruby/zlib-3.3/zstdlib.c +5090 -0
- data/ext/zstdlib_c/zlib-1.3.1/adler32.c +164 -0
- data/ext/zstdlib_c/zlib-1.3.1/compress.c +75 -0
- data/ext/zstdlib_c/zlib-1.3.1/crc32.c +1049 -0
- data/ext/zstdlib_c/zlib-1.3.1/crc32.h +9446 -0
- data/ext/zstdlib_c/zlib-1.3.1/deflate.c +2139 -0
- data/ext/zstdlib_c/zlib-1.3.1/deflate.h +377 -0
- data/ext/zstdlib_c/zlib-1.3.1/gzclose.c +23 -0
- data/ext/zstdlib_c/zlib-1.3.1/gzguts.h +214 -0
- data/ext/zstdlib_c/zlib-1.3.1/gzlib.c +582 -0
- data/ext/zstdlib_c/zlib-1.3.1/gzread.c +602 -0
- data/ext/zstdlib_c/zlib-1.3.1/gzwrite.c +631 -0
- data/ext/zstdlib_c/zlib-1.3.1/infback.c +628 -0
- data/ext/zstdlib_c/zlib-1.3.1/inffast.c +320 -0
- data/ext/zstdlib_c/zlib-1.3.1/inffast.h +11 -0
- data/ext/zstdlib_c/zlib-1.3.1/inffixed.h +94 -0
- data/ext/zstdlib_c/zlib-1.3.1/inflate.c +1526 -0
- data/ext/zstdlib_c/zlib-1.3.1/inflate.h +126 -0
- data/ext/zstdlib_c/zlib-1.3.1/inftrees.c +299 -0
- data/ext/zstdlib_c/zlib-1.3.1/inftrees.h +62 -0
- data/ext/zstdlib_c/zlib-1.3.1/trees.c +1117 -0
- data/ext/zstdlib_c/zlib-1.3.1/trees.h +128 -0
- data/ext/zstdlib_c/zlib-1.3.1/uncompr.c +85 -0
- data/ext/zstdlib_c/zlib-1.3.1/zconf.h +543 -0
- data/ext/zstdlib_c/zlib-1.3.1/zlib.h +1938 -0
- data/ext/zstdlib_c/zlib-1.3.1/zutil.c +299 -0
- data/ext/zstdlib_c/zlib-1.3.1/zutil.h +254 -0
- data/ext/zstdlib_c/zlib.mk +14 -0
- data/ext/zstdlib_c/zlibwrapper/zlibwrapper.c +10 -0
- data/ext/zstdlib_c/zlibwrapper.mk +14 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/allocations.h +55 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/bits.h +200 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/bitstream.h +457 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/compiler.h +450 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/cpu.h +249 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/debug.c +30 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/debug.h +116 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/entropy_common.c +340 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/error_private.c +63 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/error_private.h +168 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/fse.h +640 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/fse_decompress.c +313 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/huf.h +286 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/mem.h +426 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/pool.c +371 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/pool.h +90 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/portability_macros.h +158 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/threading.c +182 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/threading.h +150 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/xxhash.c +18 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/xxhash.h +7020 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_common.c +48 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_internal.h +392 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/common/zstd_trace.h +163 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/clevels.h +134 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/fse_compress.c +625 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/hist.c +181 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/hist.h +75 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/huf_compress.c +1464 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress.c +7153 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_internal.h +1534 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_literals.c +235 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_literals.h +39 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_sequences.c +442 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_superblock.c +688 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_cwksp.h +748 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_double_fast.c +770 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_double_fast.h +50 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_fast.c +968 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_fast.h +38 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_lazy.c +2199 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_lazy.h +202 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_ldm.c +730 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_ldm.h +117 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_opt.c +1576 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstd_opt.h +80 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstdmt_compress.c +1882 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/compress/zstdmt_compress.h +113 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/huf_decompress.c +1944 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/huf_decompress_amd64.S +595 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_ddict.c +244 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_ddict.h +44 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress.c +2407 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress_block.c +2215 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress_block.h +73 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/decompress/zstd_decompress_internal.h +240 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/zdict.h +474 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/zstd.h +3089 -0
- data/ext/zstdlib_c/zstd-1.5.6/lib/zstd_errors.h +114 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzclose.c +26 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzcompatibility.h +68 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzguts.h +229 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzlib.c +587 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzread.c +637 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/gzwrite.c +631 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/zstd_zlibwrapper.c +1200 -0
- data/ext/zstdlib_c/zstd-1.5.6/zlibWrapper/zstd_zlibwrapper.h +91 -0
- data/ext/zstdlib_c/zstd.mk +15 -0
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/3.1/zstdlib_c.so +0 -0
- data/lib/3.2/zstdlib_c.so +0 -0
- data/lib/3.3/zstdlib_c.so +0 -0
- data/lib/zstdlib.rb +6 -0
- data/test/zstdlib_test.rb +21 -0
- metadata +243 -0
@@ -0,0 +1,442 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
/*-*************************************
|
12
|
+
* Dependencies
|
13
|
+
***************************************/
|
14
|
+
#include "zstd_compress_sequences.h"
|
15
|
+
|
16
|
+
/**
|
17
|
+
* -log2(x / 256) lookup table for x in [0, 256).
|
18
|
+
* If x == 0: Return 0
|
19
|
+
* Else: Return floor(-log2(x / 256) * 256)
|
20
|
+
*/
|
21
|
+
static unsigned const kInverseProbabilityLog256[256] = {
|
22
|
+
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
|
23
|
+
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
|
24
|
+
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
|
25
|
+
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
|
26
|
+
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
|
27
|
+
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
|
28
|
+
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
|
29
|
+
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
|
30
|
+
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
|
31
|
+
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
|
32
|
+
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
|
33
|
+
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
|
34
|
+
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
|
35
|
+
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
|
36
|
+
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
|
37
|
+
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
|
38
|
+
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
|
39
|
+
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
|
40
|
+
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
|
41
|
+
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
|
42
|
+
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
|
43
|
+
5, 4, 2, 1,
|
44
|
+
};
|
45
|
+
|
46
|
+
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
47
|
+
void const* ptr = ctable;
|
48
|
+
U16 const* u16ptr = (U16 const*)ptr;
|
49
|
+
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
|
50
|
+
return maxSymbolValue;
|
51
|
+
}
|
52
|
+
|
53
|
+
/**
|
54
|
+
* Returns true if we should use ncount=-1 else we should
|
55
|
+
* use ncount=1 for low probability symbols instead.
|
56
|
+
*/
|
57
|
+
static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
|
58
|
+
{
|
59
|
+
/* Heuristic: This should cover most blocks <= 16K and
|
60
|
+
* start to fade out after 16K to about 32K depending on
|
61
|
+
* compressibility.
|
62
|
+
*/
|
63
|
+
return nbSeq >= 2048;
|
64
|
+
}
|
65
|
+
|
66
|
+
/**
|
67
|
+
* Returns the cost in bytes of encoding the normalized count header.
|
68
|
+
* Returns an error if any of the helper functions return an error.
|
69
|
+
*/
|
70
|
+
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
71
|
+
size_t const nbSeq, unsigned const FSELog)
|
72
|
+
{
|
73
|
+
BYTE wksp[FSE_NCOUNTBOUND];
|
74
|
+
S16 norm[MaxSeq + 1];
|
75
|
+
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
76
|
+
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
|
77
|
+
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
78
|
+
}
|
79
|
+
|
80
|
+
/**
|
81
|
+
* Returns the cost in bits of encoding the distribution described by count
|
82
|
+
* using the entropy bound.
|
83
|
+
*/
|
84
|
+
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
|
85
|
+
{
|
86
|
+
unsigned cost = 0;
|
87
|
+
unsigned s;
|
88
|
+
|
89
|
+
assert(total > 0);
|
90
|
+
for (s = 0; s <= max; ++s) {
|
91
|
+
unsigned norm = (unsigned)((256 * count[s]) / total);
|
92
|
+
if (count[s] != 0 && norm == 0)
|
93
|
+
norm = 1;
|
94
|
+
assert(count[s] < total);
|
95
|
+
cost += count[s] * kInverseProbabilityLog256[norm];
|
96
|
+
}
|
97
|
+
return cost >> 8;
|
98
|
+
}
|
99
|
+
|
100
|
+
/**
|
101
|
+
* Returns the cost in bits of encoding the distribution in count using ctable.
|
102
|
+
* Returns an error if ctable cannot represent all the symbols in count.
|
103
|
+
*/
|
104
|
+
size_t ZSTD_fseBitCost(
|
105
|
+
FSE_CTable const* ctable,
|
106
|
+
unsigned const* count,
|
107
|
+
unsigned const max)
|
108
|
+
{
|
109
|
+
unsigned const kAccuracyLog = 8;
|
110
|
+
size_t cost = 0;
|
111
|
+
unsigned s;
|
112
|
+
FSE_CState_t cstate;
|
113
|
+
FSE_initCState(&cstate, ctable);
|
114
|
+
if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
|
115
|
+
DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
|
116
|
+
ZSTD_getFSEMaxSymbolValue(ctable), max);
|
117
|
+
return ERROR(GENERIC);
|
118
|
+
}
|
119
|
+
for (s = 0; s <= max; ++s) {
|
120
|
+
unsigned const tableLog = cstate.stateLog;
|
121
|
+
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
|
122
|
+
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
|
123
|
+
if (count[s] == 0)
|
124
|
+
continue;
|
125
|
+
if (bitCost >= badCost) {
|
126
|
+
DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
|
127
|
+
return ERROR(GENERIC);
|
128
|
+
}
|
129
|
+
cost += (size_t)count[s] * bitCost;
|
130
|
+
}
|
131
|
+
return cost >> kAccuracyLog;
|
132
|
+
}
|
133
|
+
|
134
|
+
/**
|
135
|
+
* Returns the cost in bits of encoding the distribution in count using the
|
136
|
+
* table described by norm. The max symbol support by norm is assumed >= max.
|
137
|
+
* norm must be valid for every symbol with non-zero probability in count.
|
138
|
+
*/
|
139
|
+
size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
140
|
+
unsigned const* count, unsigned const max)
|
141
|
+
{
|
142
|
+
unsigned const shift = 8 - accuracyLog;
|
143
|
+
size_t cost = 0;
|
144
|
+
unsigned s;
|
145
|
+
assert(accuracyLog <= 8);
|
146
|
+
for (s = 0; s <= max; ++s) {
|
147
|
+
unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
|
148
|
+
unsigned const norm256 = normAcc << shift;
|
149
|
+
assert(norm256 > 0);
|
150
|
+
assert(norm256 < 256);
|
151
|
+
cost += count[s] * kInverseProbabilityLog256[norm256];
|
152
|
+
}
|
153
|
+
return cost >> 8;
|
154
|
+
}
|
155
|
+
|
156
|
+
symbolEncodingType_e
|
157
|
+
ZSTD_selectEncodingType(
|
158
|
+
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
159
|
+
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
160
|
+
FSE_CTable const* prevCTable,
|
161
|
+
short const* defaultNorm, U32 defaultNormLog,
|
162
|
+
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
163
|
+
ZSTD_strategy const strategy)
|
164
|
+
{
|
165
|
+
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
166
|
+
if (mostFrequent == nbSeq) {
|
167
|
+
*repeatMode = FSE_repeat_none;
|
168
|
+
if (isDefaultAllowed && nbSeq <= 2) {
|
169
|
+
/* Prefer set_basic over set_rle when there are 2 or fewer symbols,
|
170
|
+
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
171
|
+
* If basic encoding isn't possible, always choose RLE.
|
172
|
+
*/
|
173
|
+
DEBUGLOG(5, "Selected set_basic");
|
174
|
+
return set_basic;
|
175
|
+
}
|
176
|
+
DEBUGLOG(5, "Selected set_rle");
|
177
|
+
return set_rle;
|
178
|
+
}
|
179
|
+
if (strategy < ZSTD_lazy) {
|
180
|
+
if (isDefaultAllowed) {
|
181
|
+
size_t const staticFse_nbSeq_max = 1000;
|
182
|
+
size_t const mult = 10 - strategy;
|
183
|
+
size_t const baseLog = 3;
|
184
|
+
size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
|
185
|
+
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
|
186
|
+
assert(mult <= 9 && mult >= 7);
|
187
|
+
if ( (*repeatMode == FSE_repeat_valid)
|
188
|
+
&& (nbSeq < staticFse_nbSeq_max) ) {
|
189
|
+
DEBUGLOG(5, "Selected set_repeat");
|
190
|
+
return set_repeat;
|
191
|
+
}
|
192
|
+
if ( (nbSeq < dynamicFse_nbSeq_min)
|
193
|
+
|| (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
|
194
|
+
DEBUGLOG(5, "Selected set_basic");
|
195
|
+
/* The format allows default tables to be repeated, but it isn't useful.
|
196
|
+
* When using simple heuristics to select encoding type, we don't want
|
197
|
+
* to confuse these tables with dictionaries. When running more careful
|
198
|
+
* analysis, we don't need to waste time checking both repeating tables
|
199
|
+
* and default tables.
|
200
|
+
*/
|
201
|
+
*repeatMode = FSE_repeat_none;
|
202
|
+
return set_basic;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
} else {
|
206
|
+
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
|
207
|
+
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
|
208
|
+
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
|
209
|
+
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
|
210
|
+
|
211
|
+
if (isDefaultAllowed) {
|
212
|
+
assert(!ZSTD_isError(basicCost));
|
213
|
+
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
|
214
|
+
}
|
215
|
+
assert(!ZSTD_isError(NCountCost));
|
216
|
+
assert(compressedCost < ERROR(maxCode));
|
217
|
+
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
|
218
|
+
(unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
|
219
|
+
if (basicCost <= repeatCost && basicCost <= compressedCost) {
|
220
|
+
DEBUGLOG(5, "Selected set_basic");
|
221
|
+
assert(isDefaultAllowed);
|
222
|
+
*repeatMode = FSE_repeat_none;
|
223
|
+
return set_basic;
|
224
|
+
}
|
225
|
+
if (repeatCost <= compressedCost) {
|
226
|
+
DEBUGLOG(5, "Selected set_repeat");
|
227
|
+
assert(!ZSTD_isError(repeatCost));
|
228
|
+
return set_repeat;
|
229
|
+
}
|
230
|
+
assert(compressedCost < basicCost && compressedCost < repeatCost);
|
231
|
+
}
|
232
|
+
DEBUGLOG(5, "Selected set_compressed");
|
233
|
+
*repeatMode = FSE_repeat_check;
|
234
|
+
return set_compressed;
|
235
|
+
}
|
236
|
+
|
237
|
+
typedef struct {
|
238
|
+
S16 norm[MaxSeq + 1];
|
239
|
+
U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
|
240
|
+
} ZSTD_BuildCTableWksp;
|
241
|
+
|
242
|
+
size_t
|
243
|
+
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
244
|
+
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
245
|
+
unsigned* count, U32 max,
|
246
|
+
const BYTE* codeTable, size_t nbSeq,
|
247
|
+
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
248
|
+
const FSE_CTable* prevCTable, size_t prevCTableSize,
|
249
|
+
void* entropyWorkspace, size_t entropyWorkspaceSize)
|
250
|
+
{
|
251
|
+
BYTE* op = (BYTE*)dst;
|
252
|
+
const BYTE* const oend = op + dstCapacity;
|
253
|
+
DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
|
254
|
+
|
255
|
+
switch (type) {
|
256
|
+
case set_rle:
|
257
|
+
FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
|
258
|
+
RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
|
259
|
+
*op = codeTable[0];
|
260
|
+
return 1;
|
261
|
+
case set_repeat:
|
262
|
+
ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
|
263
|
+
return 0;
|
264
|
+
case set_basic:
|
265
|
+
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
|
266
|
+
return 0;
|
267
|
+
case set_compressed: {
|
268
|
+
ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
|
269
|
+
size_t nbSeq_1 = nbSeq;
|
270
|
+
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
271
|
+
if (count[codeTable[nbSeq-1]] > 1) {
|
272
|
+
count[codeTable[nbSeq-1]]--;
|
273
|
+
nbSeq_1--;
|
274
|
+
}
|
275
|
+
assert(nbSeq_1 > 1);
|
276
|
+
assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
|
277
|
+
(void)entropyWorkspaceSize;
|
278
|
+
FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
|
279
|
+
assert(oend >= op);
|
280
|
+
{ size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */
|
281
|
+
FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
|
282
|
+
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
|
283
|
+
return NCountSize;
|
284
|
+
}
|
285
|
+
}
|
286
|
+
default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
|
287
|
+
}
|
288
|
+
}
|
289
|
+
|
290
|
+
FORCE_INLINE_TEMPLATE size_t
|
291
|
+
ZSTD_encodeSequences_body(
|
292
|
+
void* dst, size_t dstCapacity,
|
293
|
+
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
294
|
+
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
295
|
+
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
296
|
+
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
297
|
+
{
|
298
|
+
BIT_CStream_t blockStream;
|
299
|
+
FSE_CState_t stateMatchLength;
|
300
|
+
FSE_CState_t stateOffsetBits;
|
301
|
+
FSE_CState_t stateLitLength;
|
302
|
+
|
303
|
+
RETURN_ERROR_IF(
|
304
|
+
ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
|
305
|
+
dstSize_tooSmall, "not enough space remaining");
|
306
|
+
DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
|
307
|
+
(int)(blockStream.endPtr - blockStream.startPtr),
|
308
|
+
(unsigned)dstCapacity);
|
309
|
+
|
310
|
+
/* first symbols */
|
311
|
+
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
|
312
|
+
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
|
313
|
+
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
314
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
315
|
+
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
316
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
|
317
|
+
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
318
|
+
if (longOffsets) {
|
319
|
+
U32 const ofBits = ofCodeTable[nbSeq-1];
|
320
|
+
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
321
|
+
if (extraBits) {
|
322
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
|
323
|
+
BIT_flushBits(&blockStream);
|
324
|
+
}
|
325
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
|
326
|
+
ofBits - extraBits);
|
327
|
+
} else {
|
328
|
+
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
|
329
|
+
}
|
330
|
+
BIT_flushBits(&blockStream);
|
331
|
+
|
332
|
+
{ size_t n;
|
333
|
+
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
|
334
|
+
BYTE const llCode = llCodeTable[n];
|
335
|
+
BYTE const ofCode = ofCodeTable[n];
|
336
|
+
BYTE const mlCode = mlCodeTable[n];
|
337
|
+
U32 const llBits = LL_bits[llCode];
|
338
|
+
U32 const ofBits = ofCode;
|
339
|
+
U32 const mlBits = ML_bits[mlCode];
|
340
|
+
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
341
|
+
(unsigned)sequences[n].litLength,
|
342
|
+
(unsigned)sequences[n].mlBase + MINMATCH,
|
343
|
+
(unsigned)sequences[n].offBase);
|
344
|
+
/* 32b*/ /* 64b*/
|
345
|
+
/* (7)*/ /* (7)*/
|
346
|
+
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
347
|
+
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
|
348
|
+
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
349
|
+
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
|
350
|
+
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
|
351
|
+
BIT_flushBits(&blockStream); /* (7)*/
|
352
|
+
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
353
|
+
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
354
|
+
BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
|
355
|
+
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
356
|
+
if (longOffsets) {
|
357
|
+
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
358
|
+
if (extraBits) {
|
359
|
+
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
|
360
|
+
BIT_flushBits(&blockStream); /* (7)*/
|
361
|
+
}
|
362
|
+
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
|
363
|
+
ofBits - extraBits); /* 31 */
|
364
|
+
} else {
|
365
|
+
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
|
366
|
+
}
|
367
|
+
BIT_flushBits(&blockStream); /* (7)*/
|
368
|
+
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
369
|
+
} }
|
370
|
+
|
371
|
+
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
|
372
|
+
FSE_flushCState(&blockStream, &stateMatchLength);
|
373
|
+
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
|
374
|
+
FSE_flushCState(&blockStream, &stateOffsetBits);
|
375
|
+
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
|
376
|
+
FSE_flushCState(&blockStream, &stateLitLength);
|
377
|
+
|
378
|
+
{ size_t const streamSize = BIT_closeCStream(&blockStream);
|
379
|
+
RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
|
380
|
+
return streamSize;
|
381
|
+
}
|
382
|
+
}
|
383
|
+
|
384
|
+
static size_t
|
385
|
+
ZSTD_encodeSequences_default(
|
386
|
+
void* dst, size_t dstCapacity,
|
387
|
+
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
388
|
+
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
389
|
+
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
390
|
+
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
391
|
+
{
|
392
|
+
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
393
|
+
CTable_MatchLength, mlCodeTable,
|
394
|
+
CTable_OffsetBits, ofCodeTable,
|
395
|
+
CTable_LitLength, llCodeTable,
|
396
|
+
sequences, nbSeq, longOffsets);
|
397
|
+
}
|
398
|
+
|
399
|
+
|
400
|
+
#if DYNAMIC_BMI2
|
401
|
+
|
402
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
403
|
+
ZSTD_encodeSequences_bmi2(
|
404
|
+
void* dst, size_t dstCapacity,
|
405
|
+
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
406
|
+
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
407
|
+
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
408
|
+
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
409
|
+
{
|
410
|
+
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
411
|
+
CTable_MatchLength, mlCodeTable,
|
412
|
+
CTable_OffsetBits, ofCodeTable,
|
413
|
+
CTable_LitLength, llCodeTable,
|
414
|
+
sequences, nbSeq, longOffsets);
|
415
|
+
}
|
416
|
+
|
417
|
+
#endif
|
418
|
+
|
419
|
+
size_t ZSTD_encodeSequences(
|
420
|
+
void* dst, size_t dstCapacity,
|
421
|
+
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
422
|
+
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
423
|
+
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
424
|
+
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
|
425
|
+
{
|
426
|
+
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
|
427
|
+
#if DYNAMIC_BMI2
|
428
|
+
if (bmi2) {
|
429
|
+
return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
|
430
|
+
CTable_MatchLength, mlCodeTable,
|
431
|
+
CTable_OffsetBits, ofCodeTable,
|
432
|
+
CTable_LitLength, llCodeTable,
|
433
|
+
sequences, nbSeq, longOffsets);
|
434
|
+
}
|
435
|
+
#endif
|
436
|
+
(void)bmi2;
|
437
|
+
return ZSTD_encodeSequences_default(dst, dstCapacity,
|
438
|
+
CTable_MatchLength, mlCodeTable,
|
439
|
+
CTable_OffsetBits, ofCodeTable,
|
440
|
+
CTable_LitLength, llCodeTable,
|
441
|
+
sequences, nbSeq, longOffsets);
|
442
|
+
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef ZSTD_COMPRESS_SEQUENCES_H
|
12
|
+
#define ZSTD_COMPRESS_SEQUENCES_H
|
13
|
+
|
14
|
+
#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
|
15
|
+
#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
|
16
|
+
|
17
|
+
typedef enum {
|
18
|
+
ZSTD_defaultDisallowed = 0,
|
19
|
+
ZSTD_defaultAllowed = 1
|
20
|
+
} ZSTD_defaultPolicy_e;
|
21
|
+
|
22
|
+
symbolEncodingType_e
|
23
|
+
ZSTD_selectEncodingType(
|
24
|
+
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
25
|
+
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
26
|
+
FSE_CTable const* prevCTable,
|
27
|
+
short const* defaultNorm, U32 defaultNormLog,
|
28
|
+
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
29
|
+
ZSTD_strategy const strategy);
|
30
|
+
|
31
|
+
size_t
|
32
|
+
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
33
|
+
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
34
|
+
unsigned* count, U32 max,
|
35
|
+
const BYTE* codeTable, size_t nbSeq,
|
36
|
+
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
37
|
+
const FSE_CTable* prevCTable, size_t prevCTableSize,
|
38
|
+
void* entropyWorkspace, size_t entropyWorkspaceSize);
|
39
|
+
|
40
|
+
size_t ZSTD_encodeSequences(
|
41
|
+
void* dst, size_t dstCapacity,
|
42
|
+
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
43
|
+
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
44
|
+
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
45
|
+
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
|
46
|
+
|
47
|
+
size_t ZSTD_fseBitCost(
|
48
|
+
FSE_CTable const* ctable,
|
49
|
+
unsigned const* count,
|
50
|
+
unsigned const max);
|
51
|
+
|
52
|
+
size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
53
|
+
unsigned const* count, unsigned const max);
|
54
|
+
#endif /* ZSTD_COMPRESS_SEQUENCES_H */
|