zstd-ruby 1.4.4.0 → 1.5.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- FSE : Finite State Entropy codec
3
- Public Prototypes declaration
4
- Copyright (C) 2013-2016, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * FSE : Finite State Entropy codec
3
+ * Public Prototypes declaration
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  #if defined (__cplusplus)
@@ -43,7 +23,7 @@ extern "C" {
43
23
  /*-*****************************************
44
24
  * Dependencies
45
25
  ******************************************/
46
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
47
27
 
48
28
 
49
29
  /*-*****************************************
@@ -73,34 +53,6 @@ extern "C" {
73
53
  FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
74
54
 
75
55
 
76
- /*-****************************************
77
- * FSE simple functions
78
- ******************************************/
79
- /*! FSE_compress() :
80
- Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
81
- 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
82
- @return : size of compressed data (<= dstCapacity).
83
- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
84
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
85
- if FSE_isError(return), compression failed (more details using FSE_getErrorName())
86
- */
87
- FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
88
- const void* src, size_t srcSize);
89
-
90
- /*! FSE_decompress():
91
- Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
92
- into already allocated destination buffer 'dst', of size 'dstCapacity'.
93
- @return : size of regenerated data (<= maxDstSize),
94
- or an error code, which can be tested using FSE_isError() .
95
-
96
- ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
97
- Why ? : making this distinction requires a header.
98
- Header management is intentionally delegated to the user layer, which can better manage special cases.
99
- */
100
- FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
101
- const void* cSrc, size_t cSrcSize);
102
-
103
-
104
56
  /*-*****************************************
105
57
  * Tool functions
106
58
  ******************************************/
@@ -111,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
111
63
  FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
112
64
 
113
65
 
114
- /*-*****************************************
115
- * FSE advanced functions
116
- ******************************************/
117
- /*! FSE_compress2() :
118
- Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
119
- Both parameters can be defined as '0' to mean : use default value
120
- @return : size of compressed data
121
- Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
122
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
123
- if FSE_isError(return), it's an error code.
124
- */
125
- FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
126
-
127
-
128
66
  /*-*****************************************
129
67
  * FSE detailed API
130
68
  ******************************************/
@@ -157,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
157
95
  /*! FSE_normalizeCount():
158
96
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
159
97
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
98
+ useLowProbCount is a boolean parameter which trades off compressed size for
99
+ faster header decoding. When it is set to 1, the compressed data will be slightly
100
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
101
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
102
+ is a good default, since header deserialization makes a big speed difference.
103
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
160
104
  @return : tableLog,
161
105
  or an errorCode, which can be tested using FSE_isError() */
162
106
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
163
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
107
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
164
108
 
165
109
  /*! FSE_NCountWriteBound():
166
110
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -178,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
178
122
  /*! Constructor and Destructor of FSE_CTable.
179
123
  Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
180
124
  typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
181
- FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
182
- FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
183
125
 
184
126
  /*! FSE_buildCTable():
185
127
  Builds `ct`, which must be already allocated, using FSE_createCTable().
@@ -248,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
248
190
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
249
191
  const void* rBuffer, size_t rBuffSize);
250
192
 
251
- /*! Constructor and Destructor of FSE_DTable.
252
- Note that its size depends on 'tableLog' */
193
+ /*! FSE_readNCount_bmi2():
194
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
195
+ */
196
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
197
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
198
+ const void* rBuffer, size_t rBuffSize, int bmi2);
199
+
253
200
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
254
- FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
255
- FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
256
-
257
- /*! FSE_buildDTable():
258
- Builds 'dt', which must be already allocated, using FSE_createDTable().
259
- return : 0, or an errorCode, which can be tested using FSE_isError() */
260
- FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
261
-
262
- /*! FSE_decompress_usingDTable():
263
- Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
264
- into `dst` which must be already allocated.
265
- @return : size of regenerated data (necessarily <= `dstCapacity`),
266
- or an errorCode, which can be tested using FSE_isError() */
267
- FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
268
201
 
269
202
  /*!
270
203
  Tutorial :
@@ -308,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste
308
241
  *******************************************/
309
242
  /* FSE buffer bounds */
310
243
  #define FSE_NCOUNTBOUND 512
311
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
244
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
312
245
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
313
246
 
314
247
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
315
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
316
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
248
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
249
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
317
250
 
318
251
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
319
252
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -327,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste
327
260
  unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
328
261
  /**< same as FSE_optimalTableLog(), which used `minus==2` */
329
262
 
330
- /* FSE_compress_wksp() :
331
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
332
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
333
- */
334
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
335
- size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
336
-
337
- size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
338
- /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
339
-
340
263
  size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
341
264
  /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
342
265
 
343
266
  /* FSE_buildCTable_wksp() :
344
267
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
345
- * `wkspSize` must be >= `(1<<tableLog)`.
268
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
269
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
346
270
  */
271
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
272
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
347
273
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
348
274
 
349
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
350
- /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
275
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
276
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
277
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
278
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
351
279
 
352
- size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
353
- /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
354
-
355
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
356
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
280
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
281
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
282
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
283
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
284
+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
357
285
 
358
286
  typedef enum {
359
287
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -549,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
549
477
 
550
478
  /* FSE_getMaxNbBits() :
551
479
  * Approximate maximum cost of a symbol, in bits.
552
- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
480
+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
553
481
  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
554
482
  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
555
483
  MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
@@ -664,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
664
592
  #ifndef FSE_DEFAULT_MEMORY_USAGE
665
593
  # define FSE_DEFAULT_MEMORY_USAGE 13
666
594
  #endif
595
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
596
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
597
+ #endif
667
598
 
668
599
  /*!FSE_MAX_SYMBOL_VALUE :
669
600
  * Maximum symbol value authorized.
@@ -697,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
697
628
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
698
629
  #endif
699
630
 
700
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
631
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
701
632
 
702
633
 
703
634
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -1,48 +1,30 @@
1
1
  /* ******************************************************************
2
- FSE : Finite State Entropy decoder
3
- Copyright (C) 2013-2015, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * FSE : Finite State Entropy decoder
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
 
36
16
  /* **************************************************************
37
17
  * Includes
38
18
  ****************************************************************/
39
- #include <stdlib.h> /* malloc, free, qsort */
40
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
41
20
  #include "bitstream.h"
42
21
  #include "compiler.h"
43
22
  #define FSE_STATIC_LINKING_ONLY
44
23
  #include "fse.h"
45
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
27
+ #include "bits.h" /* ZSTD_highbit32 */
46
28
 
47
29
 
48
30
  /* **************************************************************
@@ -51,11 +33,6 @@
51
33
  #define FSE_isError ERR_isError
52
34
  #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
53
35
 
54
- /* check and forward error code */
55
- #ifndef CHECK_F
56
- #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
57
- #endif
58
-
59
36
 
60
37
  /* **************************************************************
61
38
  * Templates
@@ -79,30 +56,19 @@
79
56
  #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
80
57
  #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
81
58
 
82
-
83
- /* Function templates */
84
- FSE_DTable* FSE_createDTable (unsigned tableLog)
85
- {
86
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
87
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
88
- }
89
-
90
- void FSE_freeDTable (FSE_DTable* dt)
91
- {
92
- free(dt);
93
- }
94
-
95
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
59
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
96
60
  {
97
61
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
98
62
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
99
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
63
+ U16* symbolNext = (U16*)workSpace;
64
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
100
65
 
101
66
  U32 const maxSV1 = maxSymbolValue + 1;
102
67
  U32 const tableSize = 1 << tableLog;
103
68
  U32 highThreshold = tableSize-1;
104
69
 
105
70
  /* Sanity Checks */
71
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
106
72
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
107
73
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
108
74
 
@@ -120,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
120
86
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
121
87
  symbolNext[s] = normalizedCounter[s];
122
88
  } } }
123
- memcpy(dt, &DTableH, sizeof(DTableH));
89
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
124
90
  }
125
91
 
126
92
  /* Spread symbols */
127
- { U32 const tableMask = tableSize-1;
93
+ if (highThreshold == tableSize - 1) {
94
+ size_t const tableMask = tableSize-1;
95
+ size_t const step = FSE_TABLESTEP(tableSize);
96
+ /* First lay down the symbols in order.
97
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
98
+ * misses since small blocks generally have small table logs, so nearly
99
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
100
+ * our buffer to handle the over-write.
101
+ */
102
+ {
103
+ U64 const add = 0x0101010101010101ull;
104
+ size_t pos = 0;
105
+ U64 sv = 0;
106
+ U32 s;
107
+ for (s=0; s<maxSV1; ++s, sv += add) {
108
+ int i;
109
+ int const n = normalizedCounter[s];
110
+ MEM_write64(spread + pos, sv);
111
+ for (i = 8; i < n; i += 8) {
112
+ MEM_write64(spread + pos + i, sv);
113
+ }
114
+ pos += n;
115
+ }
116
+ }
117
+ /* Now we spread those positions across the table.
118
+ * The benefit of doing it in two stages is that we avoid the
119
+ * variable size inner loop, which caused lots of branch misses.
120
+ * Now we can run through all the positions without any branch misses.
121
+ * We unroll the loop twice, since that is what empirically worked best.
122
+ */
123
+ {
124
+ size_t position = 0;
125
+ size_t s;
126
+ size_t const unroll = 2;
127
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
128
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
129
+ size_t u;
130
+ for (u = 0; u < unroll; ++u) {
131
+ size_t const uPosition = (position + (u * step)) & tableMask;
132
+ tableDecode[uPosition].symbol = spread[s + u];
133
+ }
134
+ position = (position + (unroll * step)) & tableMask;
135
+ }
136
+ assert(position == 0);
137
+ }
138
+ } else {
139
+ U32 const tableMask = tableSize-1;
128
140
  U32 const step = FSE_TABLESTEP(tableSize);
129
141
  U32 s, position = 0;
130
142
  for (s=0; s<maxSV1; s++) {
@@ -142,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
142
154
  for (u=0; u<tableSize; u++) {
143
155
  FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
144
156
  U32 const nextState = symbolNext[symbol]++;
145
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
157
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
146
158
  tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
147
159
  } }
148
160
 
149
161
  return 0;
150
162
  }
151
163
 
164
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
165
+ {
166
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
167
+ }
168
+
152
169
 
153
170
  #ifndef FSE_COMMONDEFS_ONLY
154
171
 
155
172
  /*-*******************************************************
156
173
  * Decompression (Byte symbols)
157
174
  *********************************************************/
158
- size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
159
- {
160
- void* ptr = dt;
161
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
162
- void* dPtr = dt + 1;
163
- FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
164
-
165
- DTableH->tableLog = 0;
166
- DTableH->fastMode = 0;
167
-
168
- cell->newState = 0;
169
- cell->symbol = symbolValue;
170
- cell->nbBits = 0;
171
-
172
- return 0;
173
- }
174
-
175
-
176
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
177
- {
178
- void* ptr = dt;
179
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
180
- void* dPtr = dt + 1;
181
- FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
182
- const unsigned tableSize = 1 << nbBits;
183
- const unsigned tableMask = tableSize - 1;
184
- const unsigned maxSV1 = tableMask+1;
185
- unsigned s;
186
-
187
- /* Sanity checks */
188
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
189
-
190
- /* Build Decoding Table */
191
- DTableH->tableLog = (U16)nbBits;
192
- DTableH->fastMode = 1;
193
- for (s=0; s<maxSV1; s++) {
194
- dinfo[s].newState = 0;
195
- dinfo[s].symbol = (BYTE)s;
196
- dinfo[s].nbBits = (BYTE)nbBits;
197
- }
198
-
199
- return 0;
200
- }
201
175
 
202
176
  FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
203
177
  void* dst, size_t maxDstSize,
@@ -261,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
261
235
  return op-ostart;
262
236
  }
263
237
 
238
+ typedef struct {
239
+ short ncount[FSE_MAX_SYMBOL_VALUE + 1];
240
+ FSE_DTable dtable[1]; /* Dynamically sized */
241
+ } FSE_DecompressWksp;
264
242
 
265
- size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
266
- const void* cSrc, size_t cSrcSize,
267
- const FSE_DTable* dt)
268
- {
269
- const void* ptr = dt;
270
- const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
271
- const U32 fastMode = DTableH->fastMode;
272
-
273
- /* select fast mode (static) */
274
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
275
- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
276
- }
277
243
 
278
-
279
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
244
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
245
+ void* dst, size_t dstCapacity,
246
+ const void* cSrc, size_t cSrcSize,
247
+ unsigned maxLog, void* workSpace, size_t wkspSize,
248
+ int bmi2)
280
249
  {
281
250
  const BYTE* const istart = (const BYTE*)cSrc;
282
251
  const BYTE* ip = istart;
283
- short counting[FSE_MAX_SYMBOL_VALUE+1];
284
252
  unsigned tableLog;
285
253
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
254
+ FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
255
+
256
+ DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
257
+ if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
286
258
 
287
259
  /* normal FSE decoding mode */
288
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
289
- if (FSE_isError(NCountLength)) return NCountLength;
290
- //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
291
- if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
292
- ip += NCountLength;
293
- cSrcSize -= NCountLength;
260
+ {
261
+ size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
262
+ if (FSE_isError(NCountLength)) return NCountLength;
263
+ if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
264
+ assert(NCountLength <= cSrcSize);
265
+ ip += NCountLength;
266
+ cSrcSize -= NCountLength;
267
+ }
294
268
 
295
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
269
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
270
+ assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
271
+ workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
272
+ wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
296
273
 
297
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
298
- }
274
+ CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
299
275
 
276
+ {
277
+ const void* ptr = wksp->dtable;
278
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
279
+ const U32 fastMode = DTableH->fastMode;
300
280
 
301
- typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
281
+ /* select fast mode (static) */
282
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
283
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
284
+ }
285
+ }
302
286
 
303
- size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
287
+ /* Avoids the FORCE_INLINE of the _body() function. */
288
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
304
289
  {
305
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
306
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
290
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
307
291
  }
308
292
 
293
+ #if DYNAMIC_BMI2
294
+ BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
295
+ {
296
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
297
+ }
298
+ #endif
309
299
 
300
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
301
+ {
302
+ #if DYNAMIC_BMI2
303
+ if (bmi2) {
304
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
305
+ }
306
+ #endif
307
+ (void)bmi2;
308
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
309
+ }
310
310
 
311
311
  #endif /* FSE_COMMONDEFS_ONLY */