zstd-ruby 1.4.5.0 → 1.5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
* FSE : Finite State Entropy codec
|
3
3
|
* Public Prototypes declaration
|
4
|
-
* Copyright (c)
|
4
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
5
5
|
*
|
6
6
|
* You can contact the author at :
|
7
7
|
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
@@ -23,7 +23,7 @@ extern "C" {
|
|
23
23
|
/*-*****************************************
|
24
24
|
* Dependencies
|
25
25
|
******************************************/
|
26
|
-
#include
|
26
|
+
#include "zstd_deps.h" /* size_t, ptrdiff_t */
|
27
27
|
|
28
28
|
|
29
29
|
/*-*****************************************
|
@@ -53,34 +53,6 @@ extern "C" {
|
|
53
53
|
FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
|
54
54
|
|
55
55
|
|
56
|
-
/*-****************************************
|
57
|
-
* FSE simple functions
|
58
|
-
******************************************/
|
59
|
-
/*! FSE_compress() :
|
60
|
-
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
|
61
|
-
'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
|
62
|
-
@return : size of compressed data (<= dstCapacity).
|
63
|
-
Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
|
64
|
-
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
|
65
|
-
if FSE_isError(return), compression failed (more details using FSE_getErrorName())
|
66
|
-
*/
|
67
|
-
FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
|
68
|
-
const void* src, size_t srcSize);
|
69
|
-
|
70
|
-
/*! FSE_decompress():
|
71
|
-
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
|
72
|
-
into already allocated destination buffer 'dst', of size 'dstCapacity'.
|
73
|
-
@return : size of regenerated data (<= maxDstSize),
|
74
|
-
or an error code, which can be tested using FSE_isError() .
|
75
|
-
|
76
|
-
** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
|
77
|
-
Why ? : making this distinction requires a header.
|
78
|
-
Header management is intentionally delegated to the user layer, which can better manage special cases.
|
79
|
-
*/
|
80
|
-
FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
|
81
|
-
const void* cSrc, size_t cSrcSize);
|
82
|
-
|
83
|
-
|
84
56
|
/*-*****************************************
|
85
57
|
* Tool functions
|
86
58
|
******************************************/
|
@@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
|
|
91
63
|
FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
|
92
64
|
|
93
65
|
|
94
|
-
/*-*****************************************
|
95
|
-
* FSE advanced functions
|
96
|
-
******************************************/
|
97
|
-
/*! FSE_compress2() :
|
98
|
-
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
|
99
|
-
Both parameters can be defined as '0' to mean : use default value
|
100
|
-
@return : size of compressed data
|
101
|
-
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
|
102
|
-
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
|
103
|
-
if FSE_isError(return), it's an error code.
|
104
|
-
*/
|
105
|
-
FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
|
106
|
-
|
107
|
-
|
108
66
|
/*-*****************************************
|
109
67
|
* FSE detailed API
|
110
68
|
******************************************/
|
@@ -137,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
|
|
137
95
|
/*! FSE_normalizeCount():
|
138
96
|
normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
|
139
97
|
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
|
98
|
+
useLowProbCount is a boolean parameter which trades off compressed size for
|
99
|
+
faster header decoding. When it is set to 1, the compressed data will be slightly
|
100
|
+
smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
|
101
|
+
faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
|
102
|
+
is a good default, since header deserialization makes a big speed difference.
|
103
|
+
Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
|
140
104
|
@return : tableLog,
|
141
105
|
or an errorCode, which can be tested using FSE_isError() */
|
142
106
|
FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
|
143
|
-
const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
|
107
|
+
const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
|
144
108
|
|
145
109
|
/*! FSE_NCountWriteBound():
|
146
110
|
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
|
@@ -158,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
|
|
158
122
|
/*! Constructor and Destructor of FSE_CTable.
|
159
123
|
Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
|
160
124
|
typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
|
161
|
-
FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
|
162
|
-
FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
|
163
125
|
|
164
126
|
/*! FSE_buildCTable():
|
165
127
|
Builds `ct`, which must be already allocated, using FSE_createCTable().
|
@@ -228,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
|
|
228
190
|
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
229
191
|
const void* rBuffer, size_t rBuffSize);
|
230
192
|
|
231
|
-
/*!
|
232
|
-
|
193
|
+
/*! FSE_readNCount_bmi2():
|
194
|
+
* Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
|
195
|
+
*/
|
196
|
+
FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
|
197
|
+
unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
|
198
|
+
const void* rBuffer, size_t rBuffSize, int bmi2);
|
199
|
+
|
233
200
|
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
|
234
|
-
FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
|
235
|
-
FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
|
236
|
-
|
237
|
-
/*! FSE_buildDTable():
|
238
|
-
Builds 'dt', which must be already allocated, using FSE_createDTable().
|
239
|
-
return : 0, or an errorCode, which can be tested using FSE_isError() */
|
240
|
-
FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
|
241
|
-
|
242
|
-
/*! FSE_decompress_usingDTable():
|
243
|
-
Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
|
244
|
-
into `dst` which must be already allocated.
|
245
|
-
@return : size of regenerated data (necessarily <= `dstCapacity`),
|
246
|
-
or an errorCode, which can be tested using FSE_isError() */
|
247
|
-
FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
|
248
201
|
|
249
202
|
/*!
|
250
203
|
Tutorial :
|
@@ -288,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste
|
|
288
241
|
*******************************************/
|
289
242
|
/* FSE buffer bounds */
|
290
243
|
#define FSE_NCOUNTBOUND 512
|
291
|
-
#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
244
|
+
#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
|
292
245
|
#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
|
293
246
|
|
294
247
|
/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
|
295
|
-
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
|
296
|
-
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
|
248
|
+
#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
|
249
|
+
#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
|
297
250
|
|
298
251
|
/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
|
299
252
|
#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
|
@@ -307,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste
|
|
307
260
|
unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
|
308
261
|
/**< same as FSE_optimalTableLog(), which used `minus==2` */
|
309
262
|
|
310
|
-
/* FSE_compress_wksp() :
|
311
|
-
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
312
|
-
* FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
|
313
|
-
*/
|
314
|
-
#define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
|
315
|
-
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
316
|
-
|
317
|
-
size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
|
318
|
-
/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
|
319
|
-
|
320
263
|
size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
|
321
264
|
/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
|
322
265
|
|
323
266
|
/* FSE_buildCTable_wksp() :
|
324
267
|
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
325
|
-
* `wkspSize` must be >= `(
|
268
|
+
* `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
|
269
|
+
* See FSE_buildCTable_wksp() for breakdown of workspace usage.
|
326
270
|
*/
|
271
|
+
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
|
272
|
+
#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
|
327
273
|
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
328
274
|
|
329
|
-
|
330
|
-
|
275
|
+
#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
|
276
|
+
#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
|
277
|
+
FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
|
278
|
+
/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
|
331
279
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
280
|
+
#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
|
281
|
+
#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
|
282
|
+
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
|
283
|
+
/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
|
284
|
+
* Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
|
337
285
|
|
338
286
|
typedef enum {
|
339
287
|
FSE_repeat_none, /**< Cannot use the previous table */
|
@@ -529,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
|
|
529
477
|
|
530
478
|
/* FSE_getMaxNbBits() :
|
531
479
|
* Approximate maximum cost of a symbol, in bits.
|
532
|
-
* Fractional get rounded up (i.e
|
480
|
+
* Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
|
533
481
|
* note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
534
482
|
* note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
|
535
483
|
MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
|
@@ -644,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|
644
592
|
#ifndef FSE_DEFAULT_MEMORY_USAGE
|
645
593
|
# define FSE_DEFAULT_MEMORY_USAGE 13
|
646
594
|
#endif
|
595
|
+
#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
|
596
|
+
# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
|
597
|
+
#endif
|
647
598
|
|
648
599
|
/*!FSE_MAX_SYMBOL_VALUE :
|
649
600
|
* Maximum symbol value authorized.
|
@@ -677,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
|
|
677
628
|
# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
|
678
629
|
#endif
|
679
630
|
|
680
|
-
#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
|
631
|
+
#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
|
681
632
|
|
682
633
|
|
683
634
|
#endif /* FSE_STATIC_LINKING_ONLY */
|
@@ -1,6 +1,6 @@
|
|
1
1
|
/* ******************************************************************
|
2
2
|
* FSE : Finite State Entropy decoder
|
3
|
-
* Copyright (c)
|
3
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
4
4
|
*
|
5
5
|
* You can contact the author at :
|
6
6
|
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
|
@@ -16,13 +16,15 @@
|
|
16
16
|
/* **************************************************************
|
17
17
|
* Includes
|
18
18
|
****************************************************************/
|
19
|
-
#include
|
20
|
-
#include <string.h> /* memcpy, memset */
|
19
|
+
#include "debug.h" /* assert */
|
21
20
|
#include "bitstream.h"
|
22
21
|
#include "compiler.h"
|
23
22
|
#define FSE_STATIC_LINKING_ONLY
|
24
23
|
#include "fse.h"
|
25
24
|
#include "error_private.h"
|
25
|
+
#define ZSTD_DEPS_NEED_MALLOC
|
26
|
+
#include "zstd_deps.h"
|
27
|
+
#include "bits.h" /* ZSTD_highbit32 */
|
26
28
|
|
27
29
|
|
28
30
|
/* **************************************************************
|
@@ -54,30 +56,19 @@
|
|
54
56
|
#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
|
55
57
|
#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
|
56
58
|
|
57
|
-
|
58
|
-
/* Function templates */
|
59
|
-
FSE_DTable* FSE_createDTable (unsigned tableLog)
|
60
|
-
{
|
61
|
-
if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
|
62
|
-
return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
|
63
|
-
}
|
64
|
-
|
65
|
-
void FSE_freeDTable (FSE_DTable* dt)
|
66
|
-
{
|
67
|
-
free(dt);
|
68
|
-
}
|
69
|
-
|
70
|
-
size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
59
|
+
static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
71
60
|
{
|
72
61
|
void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
|
73
62
|
FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
|
74
|
-
U16 symbolNext
|
63
|
+
U16* symbolNext = (U16*)workSpace;
|
64
|
+
BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
|
75
65
|
|
76
66
|
U32 const maxSV1 = maxSymbolValue + 1;
|
77
67
|
U32 const tableSize = 1 << tableLog;
|
78
68
|
U32 highThreshold = tableSize-1;
|
79
69
|
|
80
70
|
/* Sanity Checks */
|
71
|
+
if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
|
81
72
|
if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
|
82
73
|
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
83
74
|
|
@@ -95,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
|
95
86
|
if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
|
96
87
|
symbolNext[s] = normalizedCounter[s];
|
97
88
|
} } }
|
98
|
-
|
89
|
+
ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
|
99
90
|
}
|
100
91
|
|
101
92
|
/* Spread symbols */
|
102
|
-
|
93
|
+
if (highThreshold == tableSize - 1) {
|
94
|
+
size_t const tableMask = tableSize-1;
|
95
|
+
size_t const step = FSE_TABLESTEP(tableSize);
|
96
|
+
/* First lay down the symbols in order.
|
97
|
+
* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
|
98
|
+
* misses since small blocks generally have small table logs, so nearly
|
99
|
+
* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
|
100
|
+
* our buffer to handle the over-write.
|
101
|
+
*/
|
102
|
+
{
|
103
|
+
U64 const add = 0x0101010101010101ull;
|
104
|
+
size_t pos = 0;
|
105
|
+
U64 sv = 0;
|
106
|
+
U32 s;
|
107
|
+
for (s=0; s<maxSV1; ++s, sv += add) {
|
108
|
+
int i;
|
109
|
+
int const n = normalizedCounter[s];
|
110
|
+
MEM_write64(spread + pos, sv);
|
111
|
+
for (i = 8; i < n; i += 8) {
|
112
|
+
MEM_write64(spread + pos + i, sv);
|
113
|
+
}
|
114
|
+
pos += n;
|
115
|
+
}
|
116
|
+
}
|
117
|
+
/* Now we spread those positions across the table.
|
118
|
+
* The benefit of doing it in two stages is that we avoid the
|
119
|
+
* variable size inner loop, which caused lots of branch misses.
|
120
|
+
* Now we can run through all the positions without any branch misses.
|
121
|
+
* We unroll the loop twice, since that is what empirically worked best.
|
122
|
+
*/
|
123
|
+
{
|
124
|
+
size_t position = 0;
|
125
|
+
size_t s;
|
126
|
+
size_t const unroll = 2;
|
127
|
+
assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
|
128
|
+
for (s = 0; s < (size_t)tableSize; s += unroll) {
|
129
|
+
size_t u;
|
130
|
+
for (u = 0; u < unroll; ++u) {
|
131
|
+
size_t const uPosition = (position + (u * step)) & tableMask;
|
132
|
+
tableDecode[uPosition].symbol = spread[s + u];
|
133
|
+
}
|
134
|
+
position = (position + (unroll * step)) & tableMask;
|
135
|
+
}
|
136
|
+
assert(position == 0);
|
137
|
+
}
|
138
|
+
} else {
|
139
|
+
U32 const tableMask = tableSize-1;
|
103
140
|
U32 const step = FSE_TABLESTEP(tableSize);
|
104
141
|
U32 s, position = 0;
|
105
142
|
for (s=0; s<maxSV1; s++) {
|
@@ -117,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
|
|
117
154
|
for (u=0; u<tableSize; u++) {
|
118
155
|
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
|
119
156
|
U32 const nextState = symbolNext[symbol]++;
|
120
|
-
tableDecode[u].nbBits = (BYTE) (tableLog -
|
157
|
+
tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
|
121
158
|
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
|
122
159
|
} }
|
123
160
|
|
124
161
|
return 0;
|
125
162
|
}
|
126
163
|
|
164
|
+
size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
165
|
+
{
|
166
|
+
return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
|
167
|
+
}
|
168
|
+
|
127
169
|
|
128
170
|
#ifndef FSE_COMMONDEFS_ONLY
|
129
171
|
|
130
172
|
/*-*******************************************************
|
131
173
|
* Decompression (Byte symbols)
|
132
174
|
*********************************************************/
|
133
|
-
size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
|
134
|
-
{
|
135
|
-
void* ptr = dt;
|
136
|
-
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
137
|
-
void* dPtr = dt + 1;
|
138
|
-
FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
|
139
|
-
|
140
|
-
DTableH->tableLog = 0;
|
141
|
-
DTableH->fastMode = 0;
|
142
|
-
|
143
|
-
cell->newState = 0;
|
144
|
-
cell->symbol = symbolValue;
|
145
|
-
cell->nbBits = 0;
|
146
|
-
|
147
|
-
return 0;
|
148
|
-
}
|
149
|
-
|
150
|
-
|
151
|
-
size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
|
152
|
-
{
|
153
|
-
void* ptr = dt;
|
154
|
-
FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
|
155
|
-
void* dPtr = dt + 1;
|
156
|
-
FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
|
157
|
-
const unsigned tableSize = 1 << nbBits;
|
158
|
-
const unsigned tableMask = tableSize - 1;
|
159
|
-
const unsigned maxSV1 = tableMask+1;
|
160
|
-
unsigned s;
|
161
|
-
|
162
|
-
/* Sanity checks */
|
163
|
-
if (nbBits < 1) return ERROR(GENERIC); /* min size */
|
164
|
-
|
165
|
-
/* Build Decoding Table */
|
166
|
-
DTableH->tableLog = (U16)nbBits;
|
167
|
-
DTableH->fastMode = 1;
|
168
|
-
for (s=0; s<maxSV1; s++) {
|
169
|
-
dinfo[s].newState = 0;
|
170
|
-
dinfo[s].symbol = (BYTE)s;
|
171
|
-
dinfo[s].nbBits = (BYTE)nbBits;
|
172
|
-
}
|
173
|
-
|
174
|
-
return 0;
|
175
|
-
}
|
176
175
|
|
177
176
|
FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
178
177
|
void* dst, size_t maxDstSize,
|
@@ -236,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
|
|
236
235
|
return op-ostart;
|
237
236
|
}
|
238
237
|
|
238
|
+
typedef struct {
|
239
|
+
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
|
240
|
+
FSE_DTable dtable[1]; /* Dynamically sized */
|
241
|
+
} FSE_DecompressWksp;
|
239
242
|
|
240
|
-
size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
|
241
|
-
const void* cSrc, size_t cSrcSize,
|
242
|
-
const FSE_DTable* dt)
|
243
|
-
{
|
244
|
-
const void* ptr = dt;
|
245
|
-
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
246
|
-
const U32 fastMode = DTableH->fastMode;
|
247
243
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
|
244
|
+
FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
|
245
|
+
void* dst, size_t dstCapacity,
|
246
|
+
const void* cSrc, size_t cSrcSize,
|
247
|
+
unsigned maxLog, void* workSpace, size_t wkspSize,
|
248
|
+
int bmi2)
|
255
249
|
{
|
256
250
|
const BYTE* const istart = (const BYTE*)cSrc;
|
257
251
|
const BYTE* ip = istart;
|
258
|
-
short counting[FSE_MAX_SYMBOL_VALUE+1];
|
259
252
|
unsigned tableLog;
|
260
253
|
unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
254
|
+
FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
|
255
|
+
|
256
|
+
DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
|
257
|
+
if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
|
261
258
|
|
262
259
|
/* normal FSE decoding mode */
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
260
|
+
{
|
261
|
+
size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
|
262
|
+
if (FSE_isError(NCountLength)) return NCountLength;
|
263
|
+
if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
|
264
|
+
assert(NCountLength <= cSrcSize);
|
265
|
+
ip += NCountLength;
|
266
|
+
cSrcSize -= NCountLength;
|
267
|
+
}
|
269
268
|
|
270
|
-
|
269
|
+
if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
|
270
|
+
assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
|
271
|
+
workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
272
|
+
wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
|
271
273
|
|
272
|
-
|
273
|
-
}
|
274
|
+
CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
|
274
275
|
|
276
|
+
{
|
277
|
+
const void* ptr = wksp->dtable;
|
278
|
+
const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
|
279
|
+
const U32 fastMode = DTableH->fastMode;
|
275
280
|
|
276
|
-
|
281
|
+
/* select fast mode (static) */
|
282
|
+
if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
|
283
|
+
return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
|
284
|
+
}
|
285
|
+
}
|
277
286
|
|
278
|
-
|
287
|
+
/* Avoids the FORCE_INLINE of the _body() function. */
|
288
|
+
static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
279
289
|
{
|
280
|
-
|
281
|
-
return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
|
290
|
+
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
|
282
291
|
}
|
283
292
|
|
293
|
+
#if DYNAMIC_BMI2
|
294
|
+
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
295
|
+
{
|
296
|
+
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
297
|
+
}
|
298
|
+
#endif
|
284
299
|
|
300
|
+
size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
|
301
|
+
{
|
302
|
+
#if DYNAMIC_BMI2
|
303
|
+
if (bmi2) {
|
304
|
+
return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
305
|
+
}
|
306
|
+
#endif
|
307
|
+
(void)bmi2;
|
308
|
+
return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
|
309
|
+
}
|
285
310
|
|
286
311
|
#endif /* FSE_COMMONDEFS_ONLY */
|