zstd-ruby 1.4.1.0 → 1.4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3 -574
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +24 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +149 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +415 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +47 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +16 -13
- data/ext/zstdruby/libzstd/zstd.h +1 -1
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 69e6ba233f94b8ed930f34217befa3268b0bd94fb8116130066e7976c0b31d23
|
4
|
+
data.tar.gz: 7d01847ddb0a3a5e1996eb4a45bb2c84ad9230f5b9c002dea32d202036be15d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7122bbd06a6ff9a52240c8d0b708f083c6567e4b586affd3c48bc17e412cb6beaaf097288627ded338787c4c7a940ec5dab32f704db54c29a7cba7fde848dca8
|
7
|
+
data.tar.gz: 1839b136a9b3c181cfc3f18cce8f07b96eaa2e537d0753d8f085243ea8f9b91019c2c59174a1a2a5ca2419ec39a23542faf175ba685c70b0543597f461c1d937
|
data/README.md
CHANGED
@@ -21,6 +21,8 @@
|
|
21
21
|
#define HUF_STATIC_LINKING_ONLY
|
22
22
|
#include "huf.h"
|
23
23
|
#include "zstd_compress_internal.h"
|
24
|
+
#include "zstd_compress_sequences.h"
|
25
|
+
#include "zstd_compress_literals.h"
|
24
26
|
#include "zstd_fast.h"
|
25
27
|
#include "zstd_double_fast.h"
|
26
28
|
#include "zstd_lazy.h"
|
@@ -397,18 +399,6 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
|
397
399
|
}
|
398
400
|
}
|
399
401
|
|
400
|
-
/* ZSTD_cParam_withinBounds:
|
401
|
-
* @return 1 if value is within cParam bounds,
|
402
|
-
* 0 otherwise */
|
403
|
-
static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
|
404
|
-
{
|
405
|
-
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
|
406
|
-
if (ZSTD_isError(bounds.error)) return 0;
|
407
|
-
if (value < bounds.lowerBound) return 0;
|
408
|
-
if (value > bounds.upperBound) return 0;
|
409
|
-
return 1;
|
410
|
-
}
|
411
|
-
|
412
402
|
/* ZSTD_cParam_clampBounds:
|
413
403
|
* Clamps the value into the bounded range.
|
414
404
|
*/
|
@@ -1903,155 +1893,6 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* s
|
|
1903
1893
|
return ZSTD_blockHeaderSize + srcSize;
|
1904
1894
|
}
|
1905
1895
|
|
1906
|
-
static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
1907
|
-
{
|
1908
|
-
BYTE* const ostart = (BYTE* const)dst;
|
1909
|
-
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
1910
|
-
|
1911
|
-
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
|
1912
|
-
|
1913
|
-
switch(flSize)
|
1914
|
-
{
|
1915
|
-
case 1: /* 2 - 1 - 5 */
|
1916
|
-
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
|
1917
|
-
break;
|
1918
|
-
case 2: /* 2 - 2 - 12 */
|
1919
|
-
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
|
1920
|
-
break;
|
1921
|
-
case 3: /* 2 - 2 - 20 */
|
1922
|
-
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
|
1923
|
-
break;
|
1924
|
-
default: /* not necessary : flSize is {1,2,3} */
|
1925
|
-
assert(0);
|
1926
|
-
}
|
1927
|
-
|
1928
|
-
memcpy(ostart + flSize, src, srcSize);
|
1929
|
-
return srcSize + flSize;
|
1930
|
-
}
|
1931
|
-
|
1932
|
-
static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
1933
|
-
{
|
1934
|
-
BYTE* const ostart = (BYTE* const)dst;
|
1935
|
-
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
1936
|
-
|
1937
|
-
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
|
1938
|
-
|
1939
|
-
switch(flSize)
|
1940
|
-
{
|
1941
|
-
case 1: /* 2 - 1 - 5 */
|
1942
|
-
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
|
1943
|
-
break;
|
1944
|
-
case 2: /* 2 - 2 - 12 */
|
1945
|
-
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
|
1946
|
-
break;
|
1947
|
-
case 3: /* 2 - 2 - 20 */
|
1948
|
-
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
|
1949
|
-
break;
|
1950
|
-
default: /* not necessary : flSize is {1,2,3} */
|
1951
|
-
assert(0);
|
1952
|
-
}
|
1953
|
-
|
1954
|
-
ostart[flSize] = *(const BYTE*)src;
|
1955
|
-
return flSize+1;
|
1956
|
-
}
|
1957
|
-
|
1958
|
-
|
1959
|
-
/* ZSTD_minGain() :
|
1960
|
-
* minimum compression required
|
1961
|
-
* to generate a compress block or a compressed literals section.
|
1962
|
-
* note : use same formula for both situations */
|
1963
|
-
static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
1964
|
-
{
|
1965
|
-
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
|
1966
|
-
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
|
1967
|
-
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
|
1968
|
-
return (srcSize >> minlog) + 2;
|
1969
|
-
}
|
1970
|
-
|
1971
|
-
static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
1972
|
-
ZSTD_hufCTables_t* nextHuf,
|
1973
|
-
ZSTD_strategy strategy, int disableLiteralCompression,
|
1974
|
-
void* dst, size_t dstCapacity,
|
1975
|
-
const void* src, size_t srcSize,
|
1976
|
-
void* workspace, size_t wkspSize,
|
1977
|
-
const int bmi2)
|
1978
|
-
{
|
1979
|
-
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
1980
|
-
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
1981
|
-
BYTE* const ostart = (BYTE*)dst;
|
1982
|
-
U32 singleStream = srcSize < 256;
|
1983
|
-
symbolEncodingType_e hType = set_compressed;
|
1984
|
-
size_t cLitSize;
|
1985
|
-
|
1986
|
-
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
|
1987
|
-
disableLiteralCompression);
|
1988
|
-
|
1989
|
-
/* Prepare nextEntropy assuming reusing the existing table */
|
1990
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
1991
|
-
|
1992
|
-
if (disableLiteralCompression)
|
1993
|
-
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
1994
|
-
|
1995
|
-
/* small ? don't even attempt compression (speed opt) */
|
1996
|
-
# define COMPRESS_LITERALS_SIZE_MIN 63
|
1997
|
-
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
1998
|
-
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
1999
|
-
}
|
2000
|
-
|
2001
|
-
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
|
2002
|
-
{ HUF_repeat repeat = prevHuf->repeatMode;
|
2003
|
-
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
2004
|
-
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
2005
|
-
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
2006
|
-
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
|
2007
|
-
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
2008
|
-
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
2009
|
-
if (repeat != HUF_repeat_none) {
|
2010
|
-
/* reused the existing table */
|
2011
|
-
hType = set_repeat;
|
2012
|
-
}
|
2013
|
-
}
|
2014
|
-
|
2015
|
-
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
2016
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
2017
|
-
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
2018
|
-
}
|
2019
|
-
if (cLitSize==1) {
|
2020
|
-
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
2021
|
-
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
2022
|
-
}
|
2023
|
-
|
2024
|
-
if (hType == set_compressed) {
|
2025
|
-
/* using a newly constructed table */
|
2026
|
-
nextHuf->repeatMode = HUF_repeat_check;
|
2027
|
-
}
|
2028
|
-
|
2029
|
-
/* Build header */
|
2030
|
-
switch(lhSize)
|
2031
|
-
{
|
2032
|
-
case 3: /* 2 - 2 - 10 - 10 */
|
2033
|
-
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
2034
|
-
MEM_writeLE24(ostart, lhc);
|
2035
|
-
break;
|
2036
|
-
}
|
2037
|
-
case 4: /* 2 - 2 - 14 - 14 */
|
2038
|
-
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
|
2039
|
-
MEM_writeLE32(ostart, lhc);
|
2040
|
-
break;
|
2041
|
-
}
|
2042
|
-
case 5: /* 2 - 2 - 18 - 18 */
|
2043
|
-
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
|
2044
|
-
MEM_writeLE32(ostart, lhc);
|
2045
|
-
ostart[4] = (BYTE)(cLitSize >> 10);
|
2046
|
-
break;
|
2047
|
-
}
|
2048
|
-
default: /* not possible : lhSize is {3,4,5} */
|
2049
|
-
assert(0);
|
2050
|
-
}
|
2051
|
-
return lhSize+cLitSize;
|
2052
|
-
}
|
2053
|
-
|
2054
|
-
|
2055
1896
|
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
2056
1897
|
{
|
2057
1898
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
@@ -2074,418 +1915,6 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|
2074
1915
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
2075
1916
|
}
|
2076
1917
|
|
2077
|
-
|
2078
|
-
/**
|
2079
|
-
* -log2(x / 256) lookup table for x in [0, 256).
|
2080
|
-
* If x == 0: Return 0
|
2081
|
-
* Else: Return floor(-log2(x / 256) * 256)
|
2082
|
-
*/
|
2083
|
-
static unsigned const kInverseProbabilityLog256[256] = {
|
2084
|
-
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
|
2085
|
-
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
|
2086
|
-
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
|
2087
|
-
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
|
2088
|
-
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
|
2089
|
-
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
|
2090
|
-
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
|
2091
|
-
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
|
2092
|
-
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
|
2093
|
-
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
|
2094
|
-
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
|
2095
|
-
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
|
2096
|
-
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
|
2097
|
-
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
|
2098
|
-
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
|
2099
|
-
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
|
2100
|
-
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
|
2101
|
-
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
|
2102
|
-
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
|
2103
|
-
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
|
2104
|
-
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
|
2105
|
-
5, 4, 2, 1,
|
2106
|
-
};
|
2107
|
-
|
2108
|
-
|
2109
|
-
/**
|
2110
|
-
* Returns the cost in bits of encoding the distribution described by count
|
2111
|
-
* using the entropy bound.
|
2112
|
-
*/
|
2113
|
-
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
|
2114
|
-
{
|
2115
|
-
unsigned cost = 0;
|
2116
|
-
unsigned s;
|
2117
|
-
for (s = 0; s <= max; ++s) {
|
2118
|
-
unsigned norm = (unsigned)((256 * count[s]) / total);
|
2119
|
-
if (count[s] != 0 && norm == 0)
|
2120
|
-
norm = 1;
|
2121
|
-
assert(count[s] < total);
|
2122
|
-
cost += count[s] * kInverseProbabilityLog256[norm];
|
2123
|
-
}
|
2124
|
-
return cost >> 8;
|
2125
|
-
}
|
2126
|
-
|
2127
|
-
|
2128
|
-
/**
|
2129
|
-
* Returns the cost in bits of encoding the distribution in count using the
|
2130
|
-
* table described by norm. The max symbol support by norm is assumed >= max.
|
2131
|
-
* norm must be valid for every symbol with non-zero probability in count.
|
2132
|
-
*/
|
2133
|
-
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
2134
|
-
unsigned const* count, unsigned const max)
|
2135
|
-
{
|
2136
|
-
unsigned const shift = 8 - accuracyLog;
|
2137
|
-
size_t cost = 0;
|
2138
|
-
unsigned s;
|
2139
|
-
assert(accuracyLog <= 8);
|
2140
|
-
for (s = 0; s <= max; ++s) {
|
2141
|
-
unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
|
2142
|
-
unsigned const norm256 = normAcc << shift;
|
2143
|
-
assert(norm256 > 0);
|
2144
|
-
assert(norm256 < 256);
|
2145
|
-
cost += count[s] * kInverseProbabilityLog256[norm256];
|
2146
|
-
}
|
2147
|
-
return cost >> 8;
|
2148
|
-
}
|
2149
|
-
|
2150
|
-
|
2151
|
-
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
2152
|
-
void const* ptr = ctable;
|
2153
|
-
U16 const* u16ptr = (U16 const*)ptr;
|
2154
|
-
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
|
2155
|
-
return maxSymbolValue;
|
2156
|
-
}
|
2157
|
-
|
2158
|
-
|
2159
|
-
/**
|
2160
|
-
* Returns the cost in bits of encoding the distribution in count using ctable.
|
2161
|
-
* Returns an error if ctable cannot represent all the symbols in count.
|
2162
|
-
*/
|
2163
|
-
static size_t ZSTD_fseBitCost(
|
2164
|
-
FSE_CTable const* ctable,
|
2165
|
-
unsigned const* count,
|
2166
|
-
unsigned const max)
|
2167
|
-
{
|
2168
|
-
unsigned const kAccuracyLog = 8;
|
2169
|
-
size_t cost = 0;
|
2170
|
-
unsigned s;
|
2171
|
-
FSE_CState_t cstate;
|
2172
|
-
FSE_initCState(&cstate, ctable);
|
2173
|
-
RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
|
2174
|
-
"Repeat FSE_CTable has maxSymbolValue %u < %u",
|
2175
|
-
ZSTD_getFSEMaxSymbolValue(ctable), max);
|
2176
|
-
for (s = 0; s <= max; ++s) {
|
2177
|
-
unsigned const tableLog = cstate.stateLog;
|
2178
|
-
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
|
2179
|
-
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
|
2180
|
-
if (count[s] == 0)
|
2181
|
-
continue;
|
2182
|
-
RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
|
2183
|
-
"Repeat FSE_CTable has Prob[%u] == 0", s);
|
2184
|
-
cost += count[s] * bitCost;
|
2185
|
-
}
|
2186
|
-
return cost >> kAccuracyLog;
|
2187
|
-
}
|
2188
|
-
|
2189
|
-
/**
|
2190
|
-
* Returns the cost in bytes of encoding the normalized count header.
|
2191
|
-
* Returns an error if any of the helper functions return an error.
|
2192
|
-
*/
|
2193
|
-
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
2194
|
-
size_t const nbSeq, unsigned const FSELog)
|
2195
|
-
{
|
2196
|
-
BYTE wksp[FSE_NCOUNTBOUND];
|
2197
|
-
S16 norm[MaxSeq + 1];
|
2198
|
-
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
2199
|
-
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
|
2200
|
-
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
2201
|
-
}
|
2202
|
-
|
2203
|
-
|
2204
|
-
typedef enum {
|
2205
|
-
ZSTD_defaultDisallowed = 0,
|
2206
|
-
ZSTD_defaultAllowed = 1
|
2207
|
-
} ZSTD_defaultPolicy_e;
|
2208
|
-
|
2209
|
-
MEM_STATIC symbolEncodingType_e
|
2210
|
-
ZSTD_selectEncodingType(
|
2211
|
-
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
2212
|
-
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
2213
|
-
FSE_CTable const* prevCTable,
|
2214
|
-
short const* defaultNorm, U32 defaultNormLog,
|
2215
|
-
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
2216
|
-
ZSTD_strategy const strategy)
|
2217
|
-
{
|
2218
|
-
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
2219
|
-
if (mostFrequent == nbSeq) {
|
2220
|
-
*repeatMode = FSE_repeat_none;
|
2221
|
-
if (isDefaultAllowed && nbSeq <= 2) {
|
2222
|
-
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
2223
|
-
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
2224
|
-
* If basic encoding isn't possible, always choose RLE.
|
2225
|
-
*/
|
2226
|
-
DEBUGLOG(5, "Selected set_basic");
|
2227
|
-
return set_basic;
|
2228
|
-
}
|
2229
|
-
DEBUGLOG(5, "Selected set_rle");
|
2230
|
-
return set_rle;
|
2231
|
-
}
|
2232
|
-
if (strategy < ZSTD_lazy) {
|
2233
|
-
if (isDefaultAllowed) {
|
2234
|
-
size_t const staticFse_nbSeq_max = 1000;
|
2235
|
-
size_t const mult = 10 - strategy;
|
2236
|
-
size_t const baseLog = 3;
|
2237
|
-
size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
|
2238
|
-
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
|
2239
|
-
assert(mult <= 9 && mult >= 7);
|
2240
|
-
if ( (*repeatMode == FSE_repeat_valid)
|
2241
|
-
&& (nbSeq < staticFse_nbSeq_max) ) {
|
2242
|
-
DEBUGLOG(5, "Selected set_repeat");
|
2243
|
-
return set_repeat;
|
2244
|
-
}
|
2245
|
-
if ( (nbSeq < dynamicFse_nbSeq_min)
|
2246
|
-
|| (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
|
2247
|
-
DEBUGLOG(5, "Selected set_basic");
|
2248
|
-
/* The format allows default tables to be repeated, but it isn't useful.
|
2249
|
-
* When using simple heuristics to select encoding type, we don't want
|
2250
|
-
* to confuse these tables with dictionaries. When running more careful
|
2251
|
-
* analysis, we don't need to waste time checking both repeating tables
|
2252
|
-
* and default tables.
|
2253
|
-
*/
|
2254
|
-
*repeatMode = FSE_repeat_none;
|
2255
|
-
return set_basic;
|
2256
|
-
}
|
2257
|
-
}
|
2258
|
-
} else {
|
2259
|
-
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
|
2260
|
-
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
|
2261
|
-
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
|
2262
|
-
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
|
2263
|
-
|
2264
|
-
if (isDefaultAllowed) {
|
2265
|
-
assert(!ZSTD_isError(basicCost));
|
2266
|
-
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
|
2267
|
-
}
|
2268
|
-
assert(!ZSTD_isError(NCountCost));
|
2269
|
-
assert(compressedCost < ERROR(maxCode));
|
2270
|
-
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
|
2271
|
-
(unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
|
2272
|
-
if (basicCost <= repeatCost && basicCost <= compressedCost) {
|
2273
|
-
DEBUGLOG(5, "Selected set_basic");
|
2274
|
-
assert(isDefaultAllowed);
|
2275
|
-
*repeatMode = FSE_repeat_none;
|
2276
|
-
return set_basic;
|
2277
|
-
}
|
2278
|
-
if (repeatCost <= compressedCost) {
|
2279
|
-
DEBUGLOG(5, "Selected set_repeat");
|
2280
|
-
assert(!ZSTD_isError(repeatCost));
|
2281
|
-
return set_repeat;
|
2282
|
-
}
|
2283
|
-
assert(compressedCost < basicCost && compressedCost < repeatCost);
|
2284
|
-
}
|
2285
|
-
DEBUGLOG(5, "Selected set_compressed");
|
2286
|
-
*repeatMode = FSE_repeat_check;
|
2287
|
-
return set_compressed;
|
2288
|
-
}
|
2289
|
-
|
2290
|
-
MEM_STATIC size_t
|
2291
|
-
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
2292
|
-
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
2293
|
-
unsigned* count, U32 max,
|
2294
|
-
const BYTE* codeTable, size_t nbSeq,
|
2295
|
-
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
2296
|
-
const FSE_CTable* prevCTable, size_t prevCTableSize,
|
2297
|
-
void* workspace, size_t workspaceSize)
|
2298
|
-
{
|
2299
|
-
BYTE* op = (BYTE*)dst;
|
2300
|
-
const BYTE* const oend = op + dstCapacity;
|
2301
|
-
DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
|
2302
|
-
|
2303
|
-
switch (type) {
|
2304
|
-
case set_rle:
|
2305
|
-
FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
|
2306
|
-
RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
|
2307
|
-
*op = codeTable[0];
|
2308
|
-
return 1;
|
2309
|
-
case set_repeat:
|
2310
|
-
memcpy(nextCTable, prevCTable, prevCTableSize);
|
2311
|
-
return 0;
|
2312
|
-
case set_basic:
|
2313
|
-
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
|
2314
|
-
return 0;
|
2315
|
-
case set_compressed: {
|
2316
|
-
S16 norm[MaxSeq + 1];
|
2317
|
-
size_t nbSeq_1 = nbSeq;
|
2318
|
-
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
2319
|
-
if (count[codeTable[nbSeq-1]] > 1) {
|
2320
|
-
count[codeTable[nbSeq-1]]--;
|
2321
|
-
nbSeq_1--;
|
2322
|
-
}
|
2323
|
-
assert(nbSeq_1 > 1);
|
2324
|
-
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
|
2325
|
-
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
|
2326
|
-
FORWARD_IF_ERROR(NCountSize);
|
2327
|
-
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
|
2328
|
-
return NCountSize;
|
2329
|
-
}
|
2330
|
-
}
|
2331
|
-
default: assert(0); RETURN_ERROR(GENERIC);
|
2332
|
-
}
|
2333
|
-
}
|
2334
|
-
|
2335
|
-
FORCE_INLINE_TEMPLATE size_t
|
2336
|
-
ZSTD_encodeSequences_body(
|
2337
|
-
void* dst, size_t dstCapacity,
|
2338
|
-
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
2339
|
-
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
2340
|
-
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
2341
|
-
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
2342
|
-
{
|
2343
|
-
BIT_CStream_t blockStream;
|
2344
|
-
FSE_CState_t stateMatchLength;
|
2345
|
-
FSE_CState_t stateOffsetBits;
|
2346
|
-
FSE_CState_t stateLitLength;
|
2347
|
-
|
2348
|
-
RETURN_ERROR_IF(
|
2349
|
-
ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
|
2350
|
-
dstSize_tooSmall, "not enough space remaining");
|
2351
|
-
DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
|
2352
|
-
(int)(blockStream.endPtr - blockStream.startPtr),
|
2353
|
-
(unsigned)dstCapacity);
|
2354
|
-
|
2355
|
-
/* first symbols */
|
2356
|
-
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
|
2357
|
-
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
|
2358
|
-
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
2359
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
2360
|
-
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
2361
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
2362
|
-
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
2363
|
-
if (longOffsets) {
|
2364
|
-
U32 const ofBits = ofCodeTable[nbSeq-1];
|
2365
|
-
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
2366
|
-
if (extraBits) {
|
2367
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
2368
|
-
BIT_flushBits(&blockStream);
|
2369
|
-
}
|
2370
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
2371
|
-
ofBits - extraBits);
|
2372
|
-
} else {
|
2373
|
-
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
2374
|
-
}
|
2375
|
-
BIT_flushBits(&blockStream);
|
2376
|
-
|
2377
|
-
{ size_t n;
|
2378
|
-
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
|
2379
|
-
BYTE const llCode = llCodeTable[n];
|
2380
|
-
BYTE const ofCode = ofCodeTable[n];
|
2381
|
-
BYTE const mlCode = mlCodeTable[n];
|
2382
|
-
U32 const llBits = LL_bits[llCode];
|
2383
|
-
U32 const ofBits = ofCode;
|
2384
|
-
U32 const mlBits = ML_bits[mlCode];
|
2385
|
-
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
2386
|
-
(unsigned)sequences[n].litLength,
|
2387
|
-
(unsigned)sequences[n].matchLength + MINMATCH,
|
2388
|
-
(unsigned)sequences[n].offset);
|
2389
|
-
/* 32b*/ /* 64b*/
|
2390
|
-
/* (7)*/ /* (7)*/
|
2391
|
-
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
2392
|
-
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
|
2393
|
-
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
2394
|
-
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
|
2395
|
-
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
|
2396
|
-
BIT_flushBits(&blockStream); /* (7)*/
|
2397
|
-
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
2398
|
-
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
2399
|
-
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
2400
|
-
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
2401
|
-
if (longOffsets) {
|
2402
|
-
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
2403
|
-
if (extraBits) {
|
2404
|
-
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
2405
|
-
BIT_flushBits(&blockStream); /* (7)*/
|
2406
|
-
}
|
2407
|
-
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
2408
|
-
ofBits - extraBits); /* 31 */
|
2409
|
-
} else {
|
2410
|
-
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
2411
|
-
}
|
2412
|
-
BIT_flushBits(&blockStream); /* (7)*/
|
2413
|
-
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
2414
|
-
} }
|
2415
|
-
|
2416
|
-
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
|
2417
|
-
FSE_flushCState(&blockStream, &stateMatchLength);
|
2418
|
-
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
|
2419
|
-
FSE_flushCState(&blockStream, &stateOffsetBits);
|
2420
|
-
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
|
2421
|
-
FSE_flushCState(&blockStream, &stateLitLength);
|
2422
|
-
|
2423
|
-
{ size_t const streamSize = BIT_closeCStream(&blockStream);
|
2424
|
-
RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
|
2425
|
-
return streamSize;
|
2426
|
-
}
|
2427
|
-
}
|
2428
|
-
|
2429
|
-
static size_t
|
2430
|
-
ZSTD_encodeSequences_default(
|
2431
|
-
void* dst, size_t dstCapacity,
|
2432
|
-
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
2433
|
-
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
2434
|
-
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
2435
|
-
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
2436
|
-
{
|
2437
|
-
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
2438
|
-
CTable_MatchLength, mlCodeTable,
|
2439
|
-
CTable_OffsetBits, ofCodeTable,
|
2440
|
-
CTable_LitLength, llCodeTable,
|
2441
|
-
sequences, nbSeq, longOffsets);
|
2442
|
-
}
|
2443
|
-
|
2444
|
-
|
2445
|
-
#if DYNAMIC_BMI2
|
2446
|
-
|
2447
|
-
static TARGET_ATTRIBUTE("bmi2") size_t
|
2448
|
-
ZSTD_encodeSequences_bmi2(
|
2449
|
-
void* dst, size_t dstCapacity,
|
2450
|
-
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
2451
|
-
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
2452
|
-
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
2453
|
-
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
2454
|
-
{
|
2455
|
-
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
2456
|
-
CTable_MatchLength, mlCodeTable,
|
2457
|
-
CTable_OffsetBits, ofCodeTable,
|
2458
|
-
CTable_LitLength, llCodeTable,
|
2459
|
-
sequences, nbSeq, longOffsets);
|
2460
|
-
}
|
2461
|
-
|
2462
|
-
#endif
|
2463
|
-
|
2464
|
-
static size_t ZSTD_encodeSequences(
|
2465
|
-
void* dst, size_t dstCapacity,
|
2466
|
-
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
2467
|
-
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
2468
|
-
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
2469
|
-
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
|
2470
|
-
{
|
2471
|
-
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
|
2472
|
-
#if DYNAMIC_BMI2
|
2473
|
-
if (bmi2) {
|
2474
|
-
return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
|
2475
|
-
CTable_MatchLength, mlCodeTable,
|
2476
|
-
CTable_OffsetBits, ofCodeTable,
|
2477
|
-
CTable_LitLength, llCodeTable,
|
2478
|
-
sequences, nbSeq, longOffsets);
|
2479
|
-
}
|
2480
|
-
#endif
|
2481
|
-
(void)bmi2;
|
2482
|
-
return ZSTD_encodeSequences_default(dst, dstCapacity,
|
2483
|
-
CTable_MatchLength, mlCodeTable,
|
2484
|
-
CTable_OffsetBits, ofCodeTable,
|
2485
|
-
CTable_LitLength, llCodeTable,
|
2486
|
-
sequences, nbSeq, longOffsets);
|
2487
|
-
}
|
2488
|
-
|
2489
1918
|
static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
|
2490
1919
|
{
|
2491
1920
|
switch (cctxParams->literalCompressionMode) {
|
@@ -2530,8 +1959,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
|
|
2530
1959
|
BYTE* seqHead;
|
2531
1960
|
BYTE* lastNCount = NULL;
|
2532
1961
|
|
1962
|
+
DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
|
2533
1963
|
ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
|
2534
|
-
DEBUGLOG(5, "ZSTD_compressSequences_internal");
|
2535
1964
|
|
2536
1965
|
/* Compress literals */
|
2537
1966
|
{ const BYTE* const literals = seqStorePtr->litStart;
|