zstd-ruby 1.4.4.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- FSE : Finite State Entropy codec
3
- Public Prototypes declaration
4
- Copyright (C) 2013-2016, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * FSE : Finite State Entropy codec
3
+ * Public Prototypes declaration
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  #if defined (__cplusplus)
@@ -43,7 +23,7 @@ extern "C" {
43
23
  /*-*****************************************
44
24
  * Dependencies
45
25
  ******************************************/
46
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
47
27
 
48
28
 
49
29
  /*-*****************************************
@@ -73,34 +53,6 @@ extern "C" {
73
53
  FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
74
54
 
75
55
 
76
- /*-****************************************
77
- * FSE simple functions
78
- ******************************************/
79
- /*! FSE_compress() :
80
- Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
81
- 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
82
- @return : size of compressed data (<= dstCapacity).
83
- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
84
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
85
- if FSE_isError(return), compression failed (more details using FSE_getErrorName())
86
- */
87
- FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
88
- const void* src, size_t srcSize);
89
-
90
- /*! FSE_decompress():
91
- Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
92
- into already allocated destination buffer 'dst', of size 'dstCapacity'.
93
- @return : size of regenerated data (<= maxDstSize),
94
- or an error code, which can be tested using FSE_isError() .
95
-
96
- ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
97
- Why ? : making this distinction requires a header.
98
- Header management is intentionally delegated to the user layer, which can better manage special cases.
99
- */
100
- FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
101
- const void* cSrc, size_t cSrcSize);
102
-
103
-
104
56
  /*-*****************************************
105
57
  * Tool functions
106
58
  ******************************************/
@@ -111,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
111
63
  FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
112
64
 
113
65
 
114
- /*-*****************************************
115
- * FSE advanced functions
116
- ******************************************/
117
- /*! FSE_compress2() :
118
- Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
119
- Both parameters can be defined as '0' to mean : use default value
120
- @return : size of compressed data
121
- Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
122
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
123
- if FSE_isError(return), it's an error code.
124
- */
125
- FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
126
-
127
-
128
66
  /*-*****************************************
129
67
  * FSE detailed API
130
68
  ******************************************/
@@ -157,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
157
95
  /*! FSE_normalizeCount():
158
96
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
159
97
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
98
+ useLowProbCount is a boolean parameter which trades off compressed size for
99
+ faster header decoding. When it is set to 1, the compressed data will be slightly
100
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
101
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
102
+ is a good default, since header deserialization makes a big speed difference.
103
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
160
104
  @return : tableLog,
161
105
  or an errorCode, which can be tested using FSE_isError() */
162
106
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
163
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
107
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
164
108
 
165
109
  /*! FSE_NCountWriteBound():
166
110
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -178,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
178
122
  /*! Constructor and Destructor of FSE_CTable.
179
123
  Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
180
124
  typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
181
- FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
182
- FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
183
125
 
184
126
  /*! FSE_buildCTable():
185
127
  Builds `ct`, which must be already allocated, using FSE_createCTable().
@@ -248,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
248
190
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
249
191
  const void* rBuffer, size_t rBuffSize);
250
192
 
251
- /*! Constructor and Destructor of FSE_DTable.
252
- Note that its size depends on 'tableLog' */
193
+ /*! FSE_readNCount_bmi2():
194
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
195
+ */
196
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
197
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
198
+ const void* rBuffer, size_t rBuffSize, int bmi2);
199
+
253
200
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
254
- FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
255
- FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
256
-
257
- /*! FSE_buildDTable():
258
- Builds 'dt', which must be already allocated, using FSE_createDTable().
259
- return : 0, or an errorCode, which can be tested using FSE_isError() */
260
- FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
261
-
262
- /*! FSE_decompress_usingDTable():
263
- Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
264
- into `dst` which must be already allocated.
265
- @return : size of regenerated data (necessarily <= `dstCapacity`),
266
- or an errorCode, which can be tested using FSE_isError() */
267
- FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
268
201
 
269
202
  /*!
270
203
  Tutorial :
@@ -308,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste
308
241
  *******************************************/
309
242
  /* FSE buffer bounds */
310
243
  #define FSE_NCOUNTBOUND 512
311
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
244
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
312
245
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
313
246
 
314
247
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
315
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
316
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
248
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
249
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
317
250
 
318
251
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
319
252
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -327,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste
327
260
  unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
328
261
  /**< same as FSE_optimalTableLog(), which used `minus==2` */
329
262
 
330
- /* FSE_compress_wksp() :
331
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
332
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
333
- */
334
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
335
- size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
336
-
337
- size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
338
- /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
339
-
340
263
  size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
341
264
  /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
342
265
 
343
266
  /* FSE_buildCTable_wksp() :
344
267
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
345
- * `wkspSize` must be >= `(1<<tableLog)`.
268
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
269
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
346
270
  */
271
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
272
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
347
273
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
348
274
 
349
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
350
- /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
275
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
276
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
277
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
278
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
351
279
 
352
- size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
353
- /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
354
-
355
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
356
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
280
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
281
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
282
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
283
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
284
+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
357
285
 
358
286
  typedef enum {
359
287
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -549,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
549
477
 
550
478
  /* FSE_getMaxNbBits() :
551
479
  * Approximate maximum cost of a symbol, in bits.
552
- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
480
+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
553
481
  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
554
482
  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
555
483
  MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
@@ -664,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
664
592
  #ifndef FSE_DEFAULT_MEMORY_USAGE
665
593
  # define FSE_DEFAULT_MEMORY_USAGE 13
666
594
  #endif
595
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
596
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
597
+ #endif
667
598
 
668
599
  /*!FSE_MAX_SYMBOL_VALUE :
669
600
  * Maximum symbol value authorized.
@@ -697,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
697
628
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
698
629
  #endif
699
630
 
700
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
631
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
701
632
 
702
633
 
703
634
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -1,48 +1,30 @@
1
1
  /* ******************************************************************
2
- FSE : Finite State Entropy decoder
3
- Copyright (C) 2013-2015, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * FSE : Finite State Entropy decoder
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
 
36
16
  /* **************************************************************
37
17
  * Includes
38
18
  ****************************************************************/
39
- #include <stdlib.h> /* malloc, free, qsort */
40
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
41
20
  #include "bitstream.h"
42
21
  #include "compiler.h"
43
22
  #define FSE_STATIC_LINKING_ONLY
44
23
  #include "fse.h"
45
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
27
+ #include "bits.h" /* ZSTD_highbit32 */
46
28
 
47
29
 
48
30
  /* **************************************************************
@@ -51,11 +33,6 @@
51
33
  #define FSE_isError ERR_isError
52
34
  #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
53
35
 
54
- /* check and forward error code */
55
- #ifndef CHECK_F
56
- #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
57
- #endif
58
-
59
36
 
60
37
  /* **************************************************************
61
38
  * Templates
@@ -79,30 +56,19 @@
79
56
  #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
80
57
  #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
81
58
 
82
-
83
- /* Function templates */
84
- FSE_DTable* FSE_createDTable (unsigned tableLog)
85
- {
86
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
87
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
88
- }
89
-
90
- void FSE_freeDTable (FSE_DTable* dt)
91
- {
92
- free(dt);
93
- }
94
-
95
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
59
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
96
60
  {
97
61
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
98
62
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
99
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
63
+ U16* symbolNext = (U16*)workSpace;
64
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
100
65
 
101
66
  U32 const maxSV1 = maxSymbolValue + 1;
102
67
  U32 const tableSize = 1 << tableLog;
103
68
  U32 highThreshold = tableSize-1;
104
69
 
105
70
  /* Sanity Checks */
71
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
106
72
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
107
73
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
108
74
 
@@ -120,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
120
86
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
121
87
  symbolNext[s] = normalizedCounter[s];
122
88
  } } }
123
- memcpy(dt, &DTableH, sizeof(DTableH));
89
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
124
90
  }
125
91
 
126
92
  /* Spread symbols */
127
- { U32 const tableMask = tableSize-1;
93
+ if (highThreshold == tableSize - 1) {
94
+ size_t const tableMask = tableSize-1;
95
+ size_t const step = FSE_TABLESTEP(tableSize);
96
+ /* First lay down the symbols in order.
97
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
98
+ * misses since small blocks generally have small table logs, so nearly
99
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
100
+ * our buffer to handle the over-write.
101
+ */
102
+ {
103
+ U64 const add = 0x0101010101010101ull;
104
+ size_t pos = 0;
105
+ U64 sv = 0;
106
+ U32 s;
107
+ for (s=0; s<maxSV1; ++s, sv += add) {
108
+ int i;
109
+ int const n = normalizedCounter[s];
110
+ MEM_write64(spread + pos, sv);
111
+ for (i = 8; i < n; i += 8) {
112
+ MEM_write64(spread + pos + i, sv);
113
+ }
114
+ pos += n;
115
+ }
116
+ }
117
+ /* Now we spread those positions across the table.
118
+ * The benefit of doing it in two stages is that we avoid the
119
+ * variable size inner loop, which caused lots of branch misses.
120
+ * Now we can run through all the positions without any branch misses.
121
+ * We unroll the loop twice, since that is what empirically worked best.
122
+ */
123
+ {
124
+ size_t position = 0;
125
+ size_t s;
126
+ size_t const unroll = 2;
127
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
128
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
129
+ size_t u;
130
+ for (u = 0; u < unroll; ++u) {
131
+ size_t const uPosition = (position + (u * step)) & tableMask;
132
+ tableDecode[uPosition].symbol = spread[s + u];
133
+ }
134
+ position = (position + (unroll * step)) & tableMask;
135
+ }
136
+ assert(position == 0);
137
+ }
138
+ } else {
139
+ U32 const tableMask = tableSize-1;
128
140
  U32 const step = FSE_TABLESTEP(tableSize);
129
141
  U32 s, position = 0;
130
142
  for (s=0; s<maxSV1; s++) {
@@ -142,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
142
154
  for (u=0; u<tableSize; u++) {
143
155
  FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
144
156
  U32 const nextState = symbolNext[symbol]++;
145
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
157
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
146
158
  tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
147
159
  } }
148
160
 
149
161
  return 0;
150
162
  }
151
163
 
164
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
165
+ {
166
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
167
+ }
168
+
152
169
 
153
170
  #ifndef FSE_COMMONDEFS_ONLY
154
171
 
155
172
  /*-*******************************************************
156
173
  * Decompression (Byte symbols)
157
174
  *********************************************************/
158
- size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
159
- {
160
- void* ptr = dt;
161
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
162
- void* dPtr = dt + 1;
163
- FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
164
-
165
- DTableH->tableLog = 0;
166
- DTableH->fastMode = 0;
167
-
168
- cell->newState = 0;
169
- cell->symbol = symbolValue;
170
- cell->nbBits = 0;
171
-
172
- return 0;
173
- }
174
-
175
-
176
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
177
- {
178
- void* ptr = dt;
179
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
180
- void* dPtr = dt + 1;
181
- FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
182
- const unsigned tableSize = 1 << nbBits;
183
- const unsigned tableMask = tableSize - 1;
184
- const unsigned maxSV1 = tableMask+1;
185
- unsigned s;
186
-
187
- /* Sanity checks */
188
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
189
-
190
- /* Build Decoding Table */
191
- DTableH->tableLog = (U16)nbBits;
192
- DTableH->fastMode = 1;
193
- for (s=0; s<maxSV1; s++) {
194
- dinfo[s].newState = 0;
195
- dinfo[s].symbol = (BYTE)s;
196
- dinfo[s].nbBits = (BYTE)nbBits;
197
- }
198
-
199
- return 0;
200
- }
201
175
 
202
176
  FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
203
177
  void* dst, size_t maxDstSize,
@@ -261,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
261
235
  return op-ostart;
262
236
  }
263
237
 
238
+ typedef struct {
239
+ short ncount[FSE_MAX_SYMBOL_VALUE + 1];
240
+ FSE_DTable dtable[1]; /* Dynamically sized */
241
+ } FSE_DecompressWksp;
264
242
 
265
- size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
266
- const void* cSrc, size_t cSrcSize,
267
- const FSE_DTable* dt)
268
- {
269
- const void* ptr = dt;
270
- const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
271
- const U32 fastMode = DTableH->fastMode;
272
-
273
- /* select fast mode (static) */
274
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
275
- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
276
- }
277
243
 
278
-
279
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
244
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
245
+ void* dst, size_t dstCapacity,
246
+ const void* cSrc, size_t cSrcSize,
247
+ unsigned maxLog, void* workSpace, size_t wkspSize,
248
+ int bmi2)
280
249
  {
281
250
  const BYTE* const istart = (const BYTE*)cSrc;
282
251
  const BYTE* ip = istart;
283
- short counting[FSE_MAX_SYMBOL_VALUE+1];
284
252
  unsigned tableLog;
285
253
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
254
+ FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
255
+
256
+ DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
257
+ if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
286
258
 
287
259
  /* normal FSE decoding mode */
288
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
289
- if (FSE_isError(NCountLength)) return NCountLength;
290
- //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
291
- if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
292
- ip += NCountLength;
293
- cSrcSize -= NCountLength;
260
+ {
261
+ size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
262
+ if (FSE_isError(NCountLength)) return NCountLength;
263
+ if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
264
+ assert(NCountLength <= cSrcSize);
265
+ ip += NCountLength;
266
+ cSrcSize -= NCountLength;
267
+ }
294
268
 
295
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
269
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
270
+ assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
271
+ workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
272
+ wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
296
273
 
297
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
298
- }
274
+ CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
299
275
 
276
+ {
277
+ const void* ptr = wksp->dtable;
278
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
279
+ const U32 fastMode = DTableH->fastMode;
300
280
 
301
- typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
281
+ /* select fast mode (static) */
282
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
283
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
284
+ }
285
+ }
302
286
 
303
- size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
287
+ /* Avoids the FORCE_INLINE of the _body() function. */
288
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
304
289
  {
305
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
306
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
290
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
307
291
  }
308
292
 
293
+ #if DYNAMIC_BMI2
294
+ BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
295
+ {
296
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
297
+ }
298
+ #endif
309
299
 
300
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
301
+ {
302
+ #if DYNAMIC_BMI2
303
+ if (bmi2) {
304
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
305
+ }
306
+ #endif
307
+ (void)bmi2;
308
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
309
+ }
310
310
 
311
311
  #endif /* FSE_COMMONDEFS_ONLY */