zstd-ruby 1.4.5.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy codec
3
3
  * Public Prototypes declaration
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -23,7 +23,7 @@ extern "C" {
23
23
  /*-*****************************************
24
24
  * Dependencies
25
25
  ******************************************/
26
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
27
27
 
28
28
 
29
29
  /*-*****************************************
@@ -53,34 +53,6 @@ extern "C" {
53
53
  FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
54
54
 
55
55
 
56
- /*-****************************************
57
- * FSE simple functions
58
- ******************************************/
59
- /*! FSE_compress() :
60
- Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
61
- 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
62
- @return : size of compressed data (<= dstCapacity).
63
- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
64
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
65
- if FSE_isError(return), compression failed (more details using FSE_getErrorName())
66
- */
67
- FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
68
- const void* src, size_t srcSize);
69
-
70
- /*! FSE_decompress():
71
- Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
72
- into already allocated destination buffer 'dst', of size 'dstCapacity'.
73
- @return : size of regenerated data (<= maxDstSize),
74
- or an error code, which can be tested using FSE_isError() .
75
-
76
- ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
77
- Why ? : making this distinction requires a header.
78
- Header management is intentionally delegated to the user layer, which can better manage special cases.
79
- */
80
- FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
81
- const void* cSrc, size_t cSrcSize);
82
-
83
-
84
56
  /*-*****************************************
85
57
  * Tool functions
86
58
  ******************************************/
@@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
91
63
  FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
92
64
 
93
65
 
94
- /*-*****************************************
95
- * FSE advanced functions
96
- ******************************************/
97
- /*! FSE_compress2() :
98
- Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
99
- Both parameters can be defined as '0' to mean : use default value
100
- @return : size of compressed data
101
- Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
102
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
103
- if FSE_isError(return), it's an error code.
104
- */
105
- FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
106
-
107
-
108
66
  /*-*****************************************
109
67
  * FSE detailed API
110
68
  ******************************************/
@@ -137,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
137
95
  /*! FSE_normalizeCount():
138
96
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
139
97
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
98
+ useLowProbCount is a boolean parameter which trades off compressed size for
99
+ faster header decoding. When it is set to 1, the compressed data will be slightly
100
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
101
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
102
+ is a good default, since header deserialization makes a big speed difference.
103
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
140
104
  @return : tableLog,
141
105
  or an errorCode, which can be tested using FSE_isError() */
142
106
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
143
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
107
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
144
108
 
145
109
  /*! FSE_NCountWriteBound():
146
110
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -158,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
158
122
  /*! Constructor and Destructor of FSE_CTable.
159
123
  Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
160
124
  typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
161
- FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
162
- FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
163
125
 
164
126
  /*! FSE_buildCTable():
165
127
  Builds `ct`, which must be already allocated, using FSE_createCTable().
@@ -228,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
228
190
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
229
191
  const void* rBuffer, size_t rBuffSize);
230
192
 
231
- /*! Constructor and Destructor of FSE_DTable.
232
- Note that its size depends on 'tableLog' */
193
+ /*! FSE_readNCount_bmi2():
194
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
195
+ */
196
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
197
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
198
+ const void* rBuffer, size_t rBuffSize, int bmi2);
199
+
233
200
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
234
- FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
235
- FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
236
-
237
- /*! FSE_buildDTable():
238
- Builds 'dt', which must be already allocated, using FSE_createDTable().
239
- return : 0, or an errorCode, which can be tested using FSE_isError() */
240
- FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
241
-
242
- /*! FSE_decompress_usingDTable():
243
- Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
244
- into `dst` which must be already allocated.
245
- @return : size of regenerated data (necessarily <= `dstCapacity`),
246
- or an errorCode, which can be tested using FSE_isError() */
247
- FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
248
201
 
249
202
  /*!
250
203
  Tutorial :
@@ -288,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste
288
241
  *******************************************/
289
242
  /* FSE buffer bounds */
290
243
  #define FSE_NCOUNTBOUND 512
291
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
244
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
292
245
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
293
246
 
294
247
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
295
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
296
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
248
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
249
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
297
250
 
298
251
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
299
252
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -307,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste
307
260
  unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
308
261
  /**< same as FSE_optimalTableLog(), which used `minus==2` */
309
262
 
310
- /* FSE_compress_wksp() :
311
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
312
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
313
- */
314
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
315
- size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
316
-
317
- size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
318
- /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
319
-
320
263
  size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
321
264
  /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
322
265
 
323
266
  /* FSE_buildCTable_wksp() :
324
267
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
325
- * `wkspSize` must be >= `(1<<tableLog)`.
268
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
269
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
326
270
  */
271
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
272
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
327
273
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
328
274
 
329
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
330
- /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
275
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
276
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
277
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
278
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
331
279
 
332
- size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
333
- /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
334
-
335
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
336
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
280
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
281
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
282
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
283
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
284
+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
337
285
 
338
286
  typedef enum {
339
287
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -529,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
529
477
 
530
478
  /* FSE_getMaxNbBits() :
531
479
  * Approximate maximum cost of a symbol, in bits.
532
- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
480
+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
533
481
  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
534
482
  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
535
483
  MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
@@ -644,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
644
592
  #ifndef FSE_DEFAULT_MEMORY_USAGE
645
593
  # define FSE_DEFAULT_MEMORY_USAGE 13
646
594
  #endif
595
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
596
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
597
+ #endif
647
598
 
648
599
  /*!FSE_MAX_SYMBOL_VALUE :
649
600
  * Maximum symbol value authorized.
@@ -677,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
677
628
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
678
629
  #endif
679
630
 
680
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
631
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
681
632
 
682
633
 
683
634
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy decoder
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -16,13 +16,15 @@
16
16
  /* **************************************************************
17
17
  * Includes
18
18
  ****************************************************************/
19
- #include <stdlib.h> /* malloc, free, qsort */
20
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
21
20
  #include "bitstream.h"
22
21
  #include "compiler.h"
23
22
  #define FSE_STATIC_LINKING_ONLY
24
23
  #include "fse.h"
25
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
27
+ #include "bits.h" /* ZSTD_highbit32 */
26
28
 
27
29
 
28
30
  /* **************************************************************
@@ -54,30 +56,19 @@
54
56
  #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
55
57
  #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
56
58
 
57
-
58
- /* Function templates */
59
- FSE_DTable* FSE_createDTable (unsigned tableLog)
60
- {
61
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
62
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
- }
64
-
65
- void FSE_freeDTable (FSE_DTable* dt)
66
- {
67
- free(dt);
68
- }
69
-
70
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
59
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
71
60
  {
72
61
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
73
62
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
74
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
63
+ U16* symbolNext = (U16*)workSpace;
64
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
75
65
 
76
66
  U32 const maxSV1 = maxSymbolValue + 1;
77
67
  U32 const tableSize = 1 << tableLog;
78
68
  U32 highThreshold = tableSize-1;
79
69
 
80
70
  /* Sanity Checks */
71
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
81
72
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
82
73
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
83
74
 
@@ -95,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
95
86
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
96
87
  symbolNext[s] = normalizedCounter[s];
97
88
  } } }
98
- memcpy(dt, &DTableH, sizeof(DTableH));
89
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
99
90
  }
100
91
 
101
92
  /* Spread symbols */
102
- { U32 const tableMask = tableSize-1;
93
+ if (highThreshold == tableSize - 1) {
94
+ size_t const tableMask = tableSize-1;
95
+ size_t const step = FSE_TABLESTEP(tableSize);
96
+ /* First lay down the symbols in order.
97
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
98
+ * misses since small blocks generally have small table logs, so nearly
99
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
100
+ * our buffer to handle the over-write.
101
+ */
102
+ {
103
+ U64 const add = 0x0101010101010101ull;
104
+ size_t pos = 0;
105
+ U64 sv = 0;
106
+ U32 s;
107
+ for (s=0; s<maxSV1; ++s, sv += add) {
108
+ int i;
109
+ int const n = normalizedCounter[s];
110
+ MEM_write64(spread + pos, sv);
111
+ for (i = 8; i < n; i += 8) {
112
+ MEM_write64(spread + pos + i, sv);
113
+ }
114
+ pos += n;
115
+ }
116
+ }
117
+ /* Now we spread those positions across the table.
118
+ * The benefit of doing it in two stages is that we avoid the
119
+ * variable size inner loop, which caused lots of branch misses.
120
+ * Now we can run through all the positions without any branch misses.
121
+ * We unroll the loop twice, since that is what empirically worked best.
122
+ */
123
+ {
124
+ size_t position = 0;
125
+ size_t s;
126
+ size_t const unroll = 2;
127
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
128
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
129
+ size_t u;
130
+ for (u = 0; u < unroll; ++u) {
131
+ size_t const uPosition = (position + (u * step)) & tableMask;
132
+ tableDecode[uPosition].symbol = spread[s + u];
133
+ }
134
+ position = (position + (unroll * step)) & tableMask;
135
+ }
136
+ assert(position == 0);
137
+ }
138
+ } else {
139
+ U32 const tableMask = tableSize-1;
103
140
  U32 const step = FSE_TABLESTEP(tableSize);
104
141
  U32 s, position = 0;
105
142
  for (s=0; s<maxSV1; s++) {
@@ -117,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
117
154
  for (u=0; u<tableSize; u++) {
118
155
  FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
119
156
  U32 const nextState = symbolNext[symbol]++;
120
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
157
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
121
158
  tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
122
159
  } }
123
160
 
124
161
  return 0;
125
162
  }
126
163
 
164
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
165
+ {
166
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
167
+ }
168
+
127
169
 
128
170
  #ifndef FSE_COMMONDEFS_ONLY
129
171
 
130
172
  /*-*******************************************************
131
173
  * Decompression (Byte symbols)
132
174
  *********************************************************/
133
- size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
134
- {
135
- void* ptr = dt;
136
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
137
- void* dPtr = dt + 1;
138
- FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
139
-
140
- DTableH->tableLog = 0;
141
- DTableH->fastMode = 0;
142
-
143
- cell->newState = 0;
144
- cell->symbol = symbolValue;
145
- cell->nbBits = 0;
146
-
147
- return 0;
148
- }
149
-
150
-
151
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
152
- {
153
- void* ptr = dt;
154
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
155
- void* dPtr = dt + 1;
156
- FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
157
- const unsigned tableSize = 1 << nbBits;
158
- const unsigned tableMask = tableSize - 1;
159
- const unsigned maxSV1 = tableMask+1;
160
- unsigned s;
161
-
162
- /* Sanity checks */
163
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
164
-
165
- /* Build Decoding Table */
166
- DTableH->tableLog = (U16)nbBits;
167
- DTableH->fastMode = 1;
168
- for (s=0; s<maxSV1; s++) {
169
- dinfo[s].newState = 0;
170
- dinfo[s].symbol = (BYTE)s;
171
- dinfo[s].nbBits = (BYTE)nbBits;
172
- }
173
-
174
- return 0;
175
- }
176
175
 
177
176
  FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
178
177
  void* dst, size_t maxDstSize,
@@ -236,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
236
235
  return op-ostart;
237
236
  }
238
237
 
238
+ typedef struct {
239
+ short ncount[FSE_MAX_SYMBOL_VALUE + 1];
240
+ FSE_DTable dtable[1]; /* Dynamically sized */
241
+ } FSE_DecompressWksp;
239
242
 
240
- size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
241
- const void* cSrc, size_t cSrcSize,
242
- const FSE_DTable* dt)
243
- {
244
- const void* ptr = dt;
245
- const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
246
- const U32 fastMode = DTableH->fastMode;
247
243
 
248
- /* select fast mode (static) */
249
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
250
- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
251
- }
252
-
253
-
254
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
244
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
245
+ void* dst, size_t dstCapacity,
246
+ const void* cSrc, size_t cSrcSize,
247
+ unsigned maxLog, void* workSpace, size_t wkspSize,
248
+ int bmi2)
255
249
  {
256
250
  const BYTE* const istart = (const BYTE*)cSrc;
257
251
  const BYTE* ip = istart;
258
- short counting[FSE_MAX_SYMBOL_VALUE+1];
259
252
  unsigned tableLog;
260
253
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
254
+ FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
255
+
256
+ DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
257
+ if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
261
258
 
262
259
  /* normal FSE decoding mode */
263
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
264
- if (FSE_isError(NCountLength)) return NCountLength;
265
- /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
266
- if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
267
- ip += NCountLength;
268
- cSrcSize -= NCountLength;
260
+ {
261
+ size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
262
+ if (FSE_isError(NCountLength)) return NCountLength;
263
+ if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
264
+ assert(NCountLength <= cSrcSize);
265
+ ip += NCountLength;
266
+ cSrcSize -= NCountLength;
267
+ }
269
268
 
270
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
269
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
270
+ assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
271
+ workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
272
+ wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
271
273
 
272
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
273
- }
274
+ CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
274
275
 
276
+ {
277
+ const void* ptr = wksp->dtable;
278
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
279
+ const U32 fastMode = DTableH->fastMode;
275
280
 
276
- typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
281
+ /* select fast mode (static) */
282
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
283
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
284
+ }
285
+ }
277
286
 
278
- size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
287
+ /* Avoids the FORCE_INLINE of the _body() function. */
288
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
279
289
  {
280
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
281
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
290
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
282
291
  }
283
292
 
293
+ #if DYNAMIC_BMI2
294
+ BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
295
+ {
296
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
297
+ }
298
+ #endif
284
299
 
300
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
301
+ {
302
+ #if DYNAMIC_BMI2
303
+ if (bmi2) {
304
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
305
+ }
306
+ #endif
307
+ (void)bmi2;
308
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
309
+ }
285
310
 
286
311
  #endif /* FSE_COMMONDEFS_ONLY */