zstd-ruby 1.4.5.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* FSE : Finite State Entropy codec
|
|
3
3
|
* Public Prototypes declaration
|
|
4
|
-
* Copyright (c)
|
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
5
5
|
*
|
|
6
6
|
* You can contact the author at :
|
|
7
7
|
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -23,7 +23,7 @@ extern "C" {
|
|
|
23
23
|
/*-*****************************************
|
|
24
24
|
* Dependencies
|
|
25
25
|
******************************************/
|
|
26
|
-
#include
|
|
26
|
+
#include "zstd_deps.h" /* size_t, ptrdiff_t */
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
/*-*****************************************
|
|
@@ -53,34 +53,6 @@ extern "C" {
|
|
|
53
53
|
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
/*-****************************************
|
|
57
|
-
* FSE simple functions
|
|
58
|
-
******************************************/
|
|
59
|
-
/*! FSE_compress() :
|
|
60
|
-
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
|
|
61
|
-
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
|
|
62
|
-
@return : size of compressed data (<= dstCapacity).
|
|
63
|
-
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
|
|
64
|
-
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
|
|
65
|
-
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
|
|
66
|
-
*/
|
|
67
|
-
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
|
|
68
|
-
const void* src, size_t srcSize);
|
|
69
|
-
|
|
70
|
-
/*! FSE_decompress():
|
|
71
|
-
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
|
|
72
|
-
into already allocated destination buffer 'dst', of size 'dstCapacity'.
|
|
73
|
-
@return : size of regenerated data (<= maxDstSize),
|
|
74
|
-
or an error code, which can be tested using FSE_isError() .
|
|
75
|
-
|
|
76
|
-
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
|
|
77
|
-
Why ? : making this distinction requires a header.
|
|
78
|
-
Header management is intentionally delegated to the user layer, which can better manage special cases.
|
|
79
|
-
*/
|
|
80
|
-
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
|
|
81
|
-
const void* cSrc, size_t cSrcSize);
|
|
82
|
-
|
|
83
|
-
|
|
84
56
|
/*-*****************************************
|
|
85
57
|
* Tool functions
|
|
86
58
|
******************************************/
|
|
@@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
|
|
|
91
63
|
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
|
|
92
64
|
|
|
93
65
|
|
|
94
|
-
/*-*****************************************
|
|
95
|
-
* FSE advanced functions
|
|
96
|
-
******************************************/
|
|
97
|
-
/*! FSE_compress2() :
|
|
98
|
-
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
|
|
99
|
-
Both parameters can be defined as '0' to mean : use default value
|
|
100
|
-
@return : size of compressed data
|
|
101
|
-
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
|
|
102
|
-
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
|
|
103
|
-
if FSE_isError(return), it's an error code.
|
|
104
|
-
*/
|
|
105
|
-
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
|
106
|
-
|
|
107
|
-
|
|
108
66
|
/*-*****************************************
|
|
109
67
|
* FSE detailed API
|
|
110
68
|
******************************************/
|
|
@@ -137,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
|
|
|
137
95
|
/*! FSE_normalizeCount():
|
|
138
96
|
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
|
|
139
97
|
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
|
|
98
|
+
useLowProbCount is a boolean parameter which trades off compressed size for
|
|
99
|
+
faster header decoding. When it is set to 1, the compressed data will be slightly
|
|
100
|
+
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
|
|
101
|
+
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
|
|
102
|
+
is a good default, since header deserialization makes a big speed difference.
|
|
103
|
+
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
|
|
140
104
|
@return : tableLog,
|
|
141
105
|
or an errorCode, which can be tested using FSE_isError() */
|
|
142
106
|
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
|
|
143
|
-
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
|
|
107
|
+
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
|
|
144
108
|
|
|
145
109
|
/*! FSE_NCountWriteBound():
|
|
146
110
|
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
|
|
@@ -158,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
|
|
158
122
|
/*! Constructor and Destructor of FSE_CTable.
|
|
159
123
|
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
|
|
160
124
|
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
|
|
161
|
-
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
|
|
162
|
-
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
|
|
163
125
|
|
|
164
126
|
/*! FSE_buildCTable():
|
|
165
127
|
Builds `ct`, which must be already allocated, using FSE_createCTable().
|
|
@@ -228,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
|
|
|
228
190
|
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
|
229
191
|
const void* rBuffer, size_t rBuffSize);
|
|
230
192
|
|
|
231
|
-
/*!
|
|
232
|
-
|
|
193
|
+
/*! FSE_readNCount_bmi2():
|
|
194
|
+
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
|
|
195
|
+
*/
|
|
196
|
+
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
|
|
197
|
+
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
|
198
|
+
const void* rBuffer, size_t rBuffSize, int bmi2);
|
|
199
|
+
|
|
233
200
|
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
|
|
234
|
-
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
|
|
235
|
-
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
|
|
236
|
-
|
|
237
|
-
/*! FSE_buildDTable():
|
|
238
|
-
Builds 'dt', which must be already allocated, using FSE_createDTable().
|
|
239
|
-
return : 0, or an errorCode, which can be tested using FSE_isError() */
|
|
240
|
-
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
|
|
241
|
-
|
|
242
|
-
/*! FSE_decompress_usingDTable():
|
|
243
|
-
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
|
|
244
|
-
into `dst` which must be already allocated.
|
|
245
|
-
@return : size of regenerated data (necessarily <= `dstCapacity`),
|
|
246
|
-
or an errorCode, which can be tested using FSE_isError() */
|
|
247
|
-
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
|
|
248
201
|
|
|
249
202
|
/*!
|
|
250
203
|
Tutorial :
|
|
@@ -288,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste
|
|
|
288
241
|
*******************************************/
|
|
289
242
|
/* FSE buffer bounds */
|
|
290
243
|
#define FSE_NCOUNTBOUND 512
|
|
291
|
-
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
|
244
|
+
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
|
292
245
|
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
|
293
246
|
|
|
294
247
|
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
|
|
295
|
-
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
|
|
296
|
-
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
|
|
248
|
+
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
|
|
249
|
+
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
|
|
297
250
|
|
|
298
251
|
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
|
|
299
252
|
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
|
|
@@ -307,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste
|
|
|
307
260
|
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
|
|
308
261
|
/**< same as FSE_optimalTableLog(), which used `minus==2` */
|
|
309
262
|
|
|
310
|
-
/* FSE_compress_wksp() :
|
|
311
|
-
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
|
312
|
-
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
|
|
313
|
-
*/
|
|
314
|
-
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
|
|
315
|
-
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
|
316
|
-
|
|
317
|
-
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
|
318
|
-
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
|
|
319
|
-
|
|
320
263
|
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
|
321
264
|
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
|
|
322
265
|
|
|
323
266
|
/* FSE_buildCTable_wksp() :
|
|
324
267
|
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
|
325
|
-
* `wkspSize` must be >= `(
|
|
268
|
+
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
|
|
269
|
+
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
|
|
326
270
|
*/
|
|
271
|
+
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
|
|
272
|
+
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
|
|
327
273
|
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
|
328
274
|
|
|
329
|
-
|
|
330
|
-
|
|
275
|
+
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
|
|
276
|
+
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
|
|
277
|
+
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
|
278
|
+
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
|
|
331
279
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
280
|
+
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
|
|
281
|
+
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
|
|
282
|
+
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
|
|
283
|
+
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
|
|
284
|
+
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
|
|
337
285
|
|
|
338
286
|
typedef enum {
|
|
339
287
|
FSE_repeat_none, /**< Cannot use the previous table */
|
|
@@ -529,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
|
|
|
529
477
|
|
|
530
478
|
/* FSE_getMaxNbBits() :
|
|
531
479
|
* Approximate maximum cost of a symbol, in bits.
|
|
532
|
-
* Fractional get rounded up (i.e
|
|
480
|
+
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
|
|
533
481
|
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
|
534
482
|
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
|
|
535
483
|
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
|
|
@@ -644,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|
|
644
592
|
#ifndef FSE_DEFAULT_MEMORY_USAGE
|
|
645
593
|
# define FSE_DEFAULT_MEMORY_USAGE 13
|
|
646
594
|
#endif
|
|
595
|
+
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
|
|
596
|
+
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
|
|
597
|
+
#endif
|
|
647
598
|
|
|
648
599
|
/*!FSE_MAX_SYMBOL_VALUE :
|
|
649
600
|
* Maximum symbol value authorized.
|
|
@@ -677,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|
|
677
628
|
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
|
|
678
629
|
#endif
|
|
679
630
|
|
|
680
|
-
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
|
|
631
|
+
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
|
|
681
632
|
|
|
682
633
|
|
|
683
634
|
#endif /* FSE_STATIC_LINKING_ONLY */
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/* ******************************************************************
|
|
2
2
|
* FSE : Finite State Entropy decoder
|
|
3
|
-
* Copyright (c)
|
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
4
4
|
*
|
|
5
5
|
* You can contact the author at :
|
|
6
6
|
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
|
@@ -16,13 +16,15 @@
|
|
|
16
16
|
/* **************************************************************
|
|
17
17
|
* Includes
|
|
18
18
|
****************************************************************/
|
|
19
|
-
#include
|
|
20
|
-
#include <string.h> /* memcpy, memset */
|
|
19
|
+
#include "debug.h" /* assert */
|
|
21
20
|
#include "bitstream.h"
|
|
22
21
|
#include "compiler.h"
|
|
23
22
|
#define FSE_STATIC_LINKING_ONLY
|
|
24
23
|
#include "fse.h"
|
|
25
24
|
#include "error_private.h"
|
|
25
|
+
#define ZSTD_DEPS_NEED_MALLOC
|
|
26
|
+
#include "zstd_deps.h"
|
|
27
|
+
#include "bits.h" /* ZSTD_highbit32 */
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
/* **************************************************************
|
|
@@ -54,30 +56,19 @@
|
|
|
54
56
|
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
|
|
55
57
|
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
|
|
56
58
|
|
|
57
|
-
|
|
58
|
-
/* Function templates */
|
|
59
|
-
FSE_DTable* FSE_createDTable (unsigned tableLog)
|
|
60
|
-
{
|
|
61
|
-
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
|
62
|
-
return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
void FSE_freeDTable (FSE_DTable* dt)
|
|
66
|
-
{
|
|
67
|
-
free(dt);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
|
59
|
+
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
|
71
60
|
{
|
|
72
61
|
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
|
|
73
62
|
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
|
|
74
|
-
U16 symbolNext
|
|
63
|
+
U16* symbolNext = (U16*)workSpace;
|
|
64
|
+
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
|
|
75
65
|
|
|
76
66
|
U32 const maxSV1 = maxSymbolValue + 1;
|
|
77
67
|
U32 const tableSize = 1 << tableLog;
|
|
78
68
|
U32 highThreshold = tableSize-1;
|
|
79
69
|
|
|
80
70
|
/* Sanity Checks */
|
|
71
|
+
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
|
|
81
72
|
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
|
|
82
73
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
83
74
|
|
|
@@ -95,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
|
|
95
86
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
|
96
87
|
symbolNext[s] = normalizedCounter[s];
|
|
97
88
|
} } }
|
|
98
|
-
|
|
89
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
|
99
90
|
}
|
|
100
91
|
|
|
101
92
|
/* Spread symbols */
|
|
102
|
-
|
|
93
|
+
if (highThreshold == tableSize - 1) {
|
|
94
|
+
size_t const tableMask = tableSize-1;
|
|
95
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
|
96
|
+
/* First lay down the symbols in order.
|
|
97
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
|
98
|
+
* misses since small blocks generally have small table logs, so nearly
|
|
99
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
|
100
|
+
* our buffer to handle the over-write.
|
|
101
|
+
*/
|
|
102
|
+
{
|
|
103
|
+
U64 const add = 0x0101010101010101ull;
|
|
104
|
+
size_t pos = 0;
|
|
105
|
+
U64 sv = 0;
|
|
106
|
+
U32 s;
|
|
107
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
|
108
|
+
int i;
|
|
109
|
+
int const n = normalizedCounter[s];
|
|
110
|
+
MEM_write64(spread + pos, sv);
|
|
111
|
+
for (i = 8; i < n; i += 8) {
|
|
112
|
+
MEM_write64(spread + pos + i, sv);
|
|
113
|
+
}
|
|
114
|
+
pos += n;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/* Now we spread those positions across the table.
|
|
118
|
+
* The benefit of doing it in two stages is that we avoid the
|
|
119
|
+
* variable size inner loop, which caused lots of branch misses.
|
|
120
|
+
* Now we can run through all the positions without any branch misses.
|
|
121
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
|
122
|
+
*/
|
|
123
|
+
{
|
|
124
|
+
size_t position = 0;
|
|
125
|
+
size_t s;
|
|
126
|
+
size_t const unroll = 2;
|
|
127
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
|
128
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
|
129
|
+
size_t u;
|
|
130
|
+
for (u = 0; u < unroll; ++u) {
|
|
131
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
|
132
|
+
tableDecode[uPosition].symbol = spread[s + u];
|
|
133
|
+
}
|
|
134
|
+
position = (position + (unroll * step)) & tableMask;
|
|
135
|
+
}
|
|
136
|
+
assert(position == 0);
|
|
137
|
+
}
|
|
138
|
+
} else {
|
|
139
|
+
U32 const tableMask = tableSize-1;
|
|
103
140
|
U32 const step = FSE_TABLESTEP(tableSize);
|
|
104
141
|
U32 s, position = 0;
|
|
105
142
|
for (s=0; s<maxSV1; s++) {
|
|
@@ -117,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
|
|
117
154
|
for (u=0; u<tableSize; u++) {
|
|
118
155
|
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
|
|
119
156
|
U32 const nextState = symbolNext[symbol]++;
|
|
120
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
|
157
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
|
121
158
|
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
|
122
159
|
} }
|
|
123
160
|
|
|
124
161
|
return 0;
|
|
125
162
|
}
|
|
126
163
|
|
|
164
|
+
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
|
165
|
+
{
|
|
166
|
+
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
|
|
167
|
+
}
|
|
168
|
+
|
|
127
169
|
|
|
128
170
|
#ifndef FSE_COMMONDEFS_ONLY
|
|
129
171
|
|
|
130
172
|
/*-*******************************************************
|
|
131
173
|
* Decompression (Byte symbols)
|
|
132
174
|
*********************************************************/
|
|
133
|
-
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
|
|
134
|
-
{
|
|
135
|
-
void* ptr = dt;
|
|
136
|
-
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
|
137
|
-
void* dPtr = dt + 1;
|
|
138
|
-
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
|
|
139
|
-
|
|
140
|
-
DTableH->tableLog = 0;
|
|
141
|
-
DTableH->fastMode = 0;
|
|
142
|
-
|
|
143
|
-
cell->newState = 0;
|
|
144
|
-
cell->symbol = symbolValue;
|
|
145
|
-
cell->nbBits = 0;
|
|
146
|
-
|
|
147
|
-
return 0;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
|
|
152
|
-
{
|
|
153
|
-
void* ptr = dt;
|
|
154
|
-
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
|
155
|
-
void* dPtr = dt + 1;
|
|
156
|
-
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
|
|
157
|
-
const unsigned tableSize = 1 << nbBits;
|
|
158
|
-
const unsigned tableMask = tableSize - 1;
|
|
159
|
-
const unsigned maxSV1 = tableMask+1;
|
|
160
|
-
unsigned s;
|
|
161
|
-
|
|
162
|
-
/* Sanity checks */
|
|
163
|
-
if (nbBits < 1) return ERROR(GENERIC); /* min size */
|
|
164
|
-
|
|
165
|
-
/* Build Decoding Table */
|
|
166
|
-
DTableH->tableLog = (U16)nbBits;
|
|
167
|
-
DTableH->fastMode = 1;
|
|
168
|
-
for (s=0; s<maxSV1; s++) {
|
|
169
|
-
dinfo[s].newState = 0;
|
|
170
|
-
dinfo[s].symbol = (BYTE)s;
|
|
171
|
-
dinfo[s].nbBits = (BYTE)nbBits;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
return 0;
|
|
175
|
-
}
|
|
176
175
|
|
|
177
176
|
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
|
178
177
|
void* dst, size_t maxDstSize,
|
|
@@ -236,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
|
|
236
235
|
return op-ostart;
|
|
237
236
|
}
|
|
238
237
|
|
|
238
|
+
typedef struct {
|
|
239
|
+
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
|
|
240
|
+
FSE_DTable dtable[1]; /* Dynamically sized */
|
|
241
|
+
} FSE_DecompressWksp;
|
|
239
242
|
|
|
240
|
-
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
|
|
241
|
-
const void* cSrc, size_t cSrcSize,
|
|
242
|
-
const FSE_DTable* dt)
|
|
243
|
-
{
|
|
244
|
-
const void* ptr = dt;
|
|
245
|
-
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
|
246
|
-
const U32 fastMode = DTableH->fastMode;
|
|
247
243
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
|
|
244
|
+
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
|
|
245
|
+
void* dst, size_t dstCapacity,
|
|
246
|
+
const void* cSrc, size_t cSrcSize,
|
|
247
|
+
unsigned maxLog, void* workSpace, size_t wkspSize,
|
|
248
|
+
int bmi2)
|
|
255
249
|
{
|
|
256
250
|
const BYTE* const istart = (const BYTE*)cSrc;
|
|
257
251
|
const BYTE* ip = istart;
|
|
258
|
-
short counting[FSE_MAX_SYMBOL_VALUE+1];
|
|
259
252
|
unsigned tableLog;
|
|
260
253
|
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
|
254
|
+
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
|
|
255
|
+
|
|
256
|
+
DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
|
|
257
|
+
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
|
|
261
258
|
|
|
262
259
|
/* normal FSE decoding mode */
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
260
|
+
{
|
|
261
|
+
size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
|
|
262
|
+
if (FSE_isError(NCountLength)) return NCountLength;
|
|
263
|
+
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
|
|
264
|
+
assert(NCountLength <= cSrcSize);
|
|
265
|
+
ip += NCountLength;
|
|
266
|
+
cSrcSize -= NCountLength;
|
|
267
|
+
}
|
|
269
268
|
|
|
270
|
-
|
|
269
|
+
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
270
|
+
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
|
|
271
|
+
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
|
272
|
+
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
|
271
273
|
|
|
272
|
-
|
|
273
|
-
}
|
|
274
|
+
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
|
|
274
275
|
|
|
276
|
+
{
|
|
277
|
+
const void* ptr = wksp->dtable;
|
|
278
|
+
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
|
279
|
+
const U32 fastMode = DTableH->fastMode;
|
|
275
280
|
|
|
276
|
-
|
|
281
|
+
/* select fast mode (static) */
|
|
282
|
+
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
|
|
283
|
+
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
277
286
|
|
|
278
|
-
|
|
287
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
|
288
|
+
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
|
279
289
|
{
|
|
280
|
-
|
|
281
|
-
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
|
|
290
|
+
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
|
|
282
291
|
}
|
|
283
292
|
|
|
293
|
+
#if DYNAMIC_BMI2
|
|
294
|
+
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
|
295
|
+
{
|
|
296
|
+
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
|
297
|
+
}
|
|
298
|
+
#endif
|
|
284
299
|
|
|
300
|
+
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
|
|
301
|
+
{
|
|
302
|
+
#if DYNAMIC_BMI2
|
|
303
|
+
if (bmi2) {
|
|
304
|
+
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
|
305
|
+
}
|
|
306
|
+
#endif
|
|
307
|
+
(void)bmi2;
|
|
308
|
+
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
|
309
|
+
}
|
|
285
310
|
|
|
286
311
|
#endif /* FSE_COMMONDEFS_ONLY */
|