extzstd 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +301 -56
  4. data/contrib/zstd/CONTRIBUTING.md +169 -72
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +116 -87
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +62 -32
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +52 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +225 -222
  13. data/contrib/zstd/lib/README.md +51 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +45 -62
  17. data/contrib/zstd/lib/common/compiler.h +205 -22
  18. data/contrib/zstd/lib/common/cpu.h +1 -3
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +12 -19
  21. data/contrib/zstd/lib/common/entropy_common.c +172 -48
  22. data/contrib/zstd/lib/common/error_private.c +10 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +37 -86
  25. data/contrib/zstd/lib/common/fse_decompress.c +117 -92
  26. data/contrib/zstd/lib/common/huf.h +99 -166
  27. data/contrib/zstd/lib/common/mem.h +124 -142
  28. data/contrib/zstd/lib/common/pool.c +54 -27
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -19
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -847
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +2 -37
  36. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +132 -187
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +83 -157
  41. data/contrib/zstd/lib/compress/hist.c +27 -29
  42. data/contrib/zstd/lib/compress/hist.h +2 -2
  43. data/contrib/zstd/lib/compress/huf_compress.c +916 -279
  44. data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
  79. data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +7 -6
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
  102. data/contrib/zstd/lib/zstd.h +1217 -287
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +19 -10
  106. data/ext/extzstd.h +6 -0
  107. data/ext/libzstd_conf.h +0 -1
  108. data/ext/zstd_decompress_asm.S +1 -0
  109. data/gemstub.rb +3 -21
  110. data/lib/extzstd/version.rb +6 -1
  111. data/lib/extzstd.rb +0 -2
  112. data/test/test_basic.rb +0 -5
  113. metadata +18 -6
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy codec
3
3
  * Public Prototypes declaration
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -23,7 +23,7 @@ extern "C" {
23
23
  /*-*****************************************
24
24
  * Dependencies
25
25
  ******************************************/
26
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
27
27
 
28
28
 
29
29
  /*-*****************************************
@@ -53,34 +53,6 @@ extern "C" {
53
53
  FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
54
54
 
55
55
 
56
- /*-****************************************
57
- * FSE simple functions
58
- ******************************************/
59
- /*! FSE_compress() :
60
- Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
61
- 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
62
- @return : size of compressed data (<= dstCapacity).
63
- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
64
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
65
- if FSE_isError(return), compression failed (more details using FSE_getErrorName())
66
- */
67
- FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
68
- const void* src, size_t srcSize);
69
-
70
- /*! FSE_decompress():
71
- Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
72
- into already allocated destination buffer 'dst', of size 'dstCapacity'.
73
- @return : size of regenerated data (<= maxDstSize),
74
- or an error code, which can be tested using FSE_isError() .
75
-
76
- ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
77
- Why ? : making this distinction requires a header.
78
- Header management is intentionally delegated to the user layer, which can better manage special cases.
79
- */
80
- FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
81
- const void* cSrc, size_t cSrcSize);
82
-
83
-
84
56
  /*-*****************************************
85
57
  * Tool functions
86
58
  ******************************************/
@@ -91,20 +63,6 @@ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return
91
63
  FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
92
64
 
93
65
 
94
- /*-*****************************************
95
- * FSE advanced functions
96
- ******************************************/
97
- /*! FSE_compress2() :
98
- Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
99
- Both parameters can be defined as '0' to mean : use default value
100
- @return : size of compressed data
101
- Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
102
- if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
103
- if FSE_isError(return), it's an error code.
104
- */
105
- FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
106
-
107
-
108
66
  /*-*****************************************
109
67
  * FSE detailed API
110
68
  ******************************************/
@@ -137,10 +95,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
137
95
  /*! FSE_normalizeCount():
138
96
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
139
97
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
98
+ useLowProbCount is a boolean parameter which trades off compressed size for
99
+ faster header decoding. When it is set to 1, the compressed data will be slightly
100
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
101
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
102
+ is a good default, since header deserialization makes a big speed difference.
103
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
140
104
  @return : tableLog,
141
105
  or an errorCode, which can be tested using FSE_isError() */
142
106
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
143
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
107
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
144
108
 
145
109
  /*! FSE_NCountWriteBound():
146
110
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -158,8 +122,6 @@ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
158
122
  /*! Constructor and Destructor of FSE_CTable.
159
123
  Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
160
124
  typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
161
- FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
162
- FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
163
125
 
164
126
  /*! FSE_buildCTable():
165
127
  Builds `ct`, which must be already allocated, using FSE_createCTable().
@@ -228,23 +190,14 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
228
190
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
229
191
  const void* rBuffer, size_t rBuffSize);
230
192
 
231
- /*! Constructor and Destructor of FSE_DTable.
232
- Note that its size depends on 'tableLog' */
193
+ /*! FSE_readNCount_bmi2():
194
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
195
+ */
196
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
197
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
198
+ const void* rBuffer, size_t rBuffSize, int bmi2);
199
+
233
200
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
234
- FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
235
- FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
236
-
237
- /*! FSE_buildDTable():
238
- Builds 'dt', which must be already allocated, using FSE_createDTable().
239
- return : 0, or an errorCode, which can be tested using FSE_isError() */
240
- FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
241
-
242
- /*! FSE_decompress_usingDTable():
243
- Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
244
- into `dst` which must be already allocated.
245
- @return : size of regenerated data (necessarily <= `dstCapacity`),
246
- or an errorCode, which can be tested using FSE_isError() */
247
- FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
248
201
 
249
202
  /*!
250
203
  Tutorial :
@@ -288,12 +241,12 @@ If there is an error, the function will return an error code, which can be teste
288
241
  *******************************************/
289
242
  /* FSE buffer bounds */
290
243
  #define FSE_NCOUNTBOUND 512
291
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
244
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
292
245
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
293
246
 
294
247
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
295
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
296
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
248
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
249
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
297
250
 
298
251
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
299
252
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -307,33 +260,28 @@ If there is an error, the function will return an error code, which can be teste
307
260
  unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
308
261
  /**< same as FSE_optimalTableLog(), which used `minus==2` */
309
262
 
310
- /* FSE_compress_wksp() :
311
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
312
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
313
- */
314
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
315
- size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
316
-
317
- size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
318
- /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
319
-
320
263
  size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
321
264
  /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
322
265
 
323
266
  /* FSE_buildCTable_wksp() :
324
267
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
325
- * `wkspSize` must be >= `(1<<tableLog)`.
268
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
269
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
326
270
  */
271
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
272
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
327
273
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
328
274
 
329
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
330
- /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
275
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
276
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
277
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
278
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
331
279
 
332
- size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
333
- /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
334
-
335
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
336
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
280
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
281
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
282
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
283
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`.
284
+ * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */
337
285
 
338
286
  typedef enum {
339
287
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -529,7 +477,7 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt
529
477
 
530
478
  /* FSE_getMaxNbBits() :
531
479
  * Approximate maximum cost of a symbol, in bits.
532
- * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
480
+ * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
533
481
  * note 1 : assume symbolValue is valid (<= maxSymbolValue)
534
482
  * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
535
483
  MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
@@ -644,6 +592,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
644
592
  #ifndef FSE_DEFAULT_MEMORY_USAGE
645
593
  # define FSE_DEFAULT_MEMORY_USAGE 13
646
594
  #endif
595
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
596
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
597
+ #endif
647
598
 
648
599
  /*!FSE_MAX_SYMBOL_VALUE :
649
600
  * Maximum symbol value authorized.
@@ -677,7 +628,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
677
628
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
678
629
  #endif
679
630
 
680
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
631
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
681
632
 
682
633
 
683
634
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy decoder
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -16,13 +16,15 @@
16
16
  /* **************************************************************
17
17
  * Includes
18
18
  ****************************************************************/
19
- #include <stdlib.h> /* malloc, free, qsort */
20
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
21
20
  #include "bitstream.h"
22
21
  #include "compiler.h"
23
22
  #define FSE_STATIC_LINKING_ONLY
24
23
  #include "fse.h"
25
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
27
+ #include "bits.h" /* ZSTD_highbit32 */
26
28
 
27
29
 
28
30
  /* **************************************************************
@@ -54,30 +56,19 @@
54
56
  #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
55
57
  #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
56
58
 
57
-
58
- /* Function templates */
59
- FSE_DTable* FSE_createDTable (unsigned tableLog)
60
- {
61
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
62
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
- }
64
-
65
- void FSE_freeDTable (FSE_DTable* dt)
66
- {
67
- free(dt);
68
- }
69
-
70
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
59
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
71
60
  {
72
61
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
73
62
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
74
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
63
+ U16* symbolNext = (U16*)workSpace;
64
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
75
65
 
76
66
  U32 const maxSV1 = maxSymbolValue + 1;
77
67
  U32 const tableSize = 1 << tableLog;
78
68
  U32 highThreshold = tableSize-1;
79
69
 
80
70
  /* Sanity Checks */
71
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
81
72
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
82
73
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
83
74
 
@@ -95,11 +86,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
95
86
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
96
87
  symbolNext[s] = normalizedCounter[s];
97
88
  } } }
98
- memcpy(dt, &DTableH, sizeof(DTableH));
89
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
99
90
  }
100
91
 
101
92
  /* Spread symbols */
102
- { U32 const tableMask = tableSize-1;
93
+ if (highThreshold == tableSize - 1) {
94
+ size_t const tableMask = tableSize-1;
95
+ size_t const step = FSE_TABLESTEP(tableSize);
96
+ /* First lay down the symbols in order.
97
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
98
+ * misses since small blocks generally have small table logs, so nearly
99
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
100
+ * our buffer to handle the over-write.
101
+ */
102
+ {
103
+ U64 const add = 0x0101010101010101ull;
104
+ size_t pos = 0;
105
+ U64 sv = 0;
106
+ U32 s;
107
+ for (s=0; s<maxSV1; ++s, sv += add) {
108
+ int i;
109
+ int const n = normalizedCounter[s];
110
+ MEM_write64(spread + pos, sv);
111
+ for (i = 8; i < n; i += 8) {
112
+ MEM_write64(spread + pos + i, sv);
113
+ }
114
+ pos += n;
115
+ }
116
+ }
117
+ /* Now we spread those positions across the table.
118
+ * The benefit of doing it in two stages is that we avoid the
119
+ * variable size inner loop, which caused lots of branch misses.
120
+ * Now we can run through all the positions without any branch misses.
121
+ * We unroll the loop twice, since that is what empirically worked best.
122
+ */
123
+ {
124
+ size_t position = 0;
125
+ size_t s;
126
+ size_t const unroll = 2;
127
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
128
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
129
+ size_t u;
130
+ for (u = 0; u < unroll; ++u) {
131
+ size_t const uPosition = (position + (u * step)) & tableMask;
132
+ tableDecode[uPosition].symbol = spread[s + u];
133
+ }
134
+ position = (position + (unroll * step)) & tableMask;
135
+ }
136
+ assert(position == 0);
137
+ }
138
+ } else {
139
+ U32 const tableMask = tableSize-1;
103
140
  U32 const step = FSE_TABLESTEP(tableSize);
104
141
  U32 s, position = 0;
105
142
  for (s=0; s<maxSV1; s++) {
@@ -117,62 +154,24 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
117
154
  for (u=0; u<tableSize; u++) {
118
155
  FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
119
156
  U32 const nextState = symbolNext[symbol]++;
120
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
157
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
121
158
  tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
122
159
  } }
123
160
 
124
161
  return 0;
125
162
  }
126
163
 
164
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
165
+ {
166
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
167
+ }
168
+
127
169
 
128
170
  #ifndef FSE_COMMONDEFS_ONLY
129
171
 
130
172
  /*-*******************************************************
131
173
  * Decompression (Byte symbols)
132
174
  *********************************************************/
133
- size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
134
- {
135
- void* ptr = dt;
136
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
137
- void* dPtr = dt + 1;
138
- FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
139
-
140
- DTableH->tableLog = 0;
141
- DTableH->fastMode = 0;
142
-
143
- cell->newState = 0;
144
- cell->symbol = symbolValue;
145
- cell->nbBits = 0;
146
-
147
- return 0;
148
- }
149
-
150
-
151
- size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
152
- {
153
- void* ptr = dt;
154
- FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
155
- void* dPtr = dt + 1;
156
- FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
157
- const unsigned tableSize = 1 << nbBits;
158
- const unsigned tableMask = tableSize - 1;
159
- const unsigned maxSV1 = tableMask+1;
160
- unsigned s;
161
-
162
- /* Sanity checks */
163
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
164
-
165
- /* Build Decoding Table */
166
- DTableH->tableLog = (U16)nbBits;
167
- DTableH->fastMode = 1;
168
- for (s=0; s<maxSV1; s++) {
169
- dinfo[s].newState = 0;
170
- dinfo[s].symbol = (BYTE)s;
171
- dinfo[s].nbBits = (BYTE)nbBits;
172
- }
173
-
174
- return 0;
175
- }
176
175
 
177
176
  FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
178
177
  void* dst, size_t maxDstSize,
@@ -236,51 +235,77 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
236
235
  return op-ostart;
237
236
  }
238
237
 
238
+ typedef struct {
239
+ short ncount[FSE_MAX_SYMBOL_VALUE + 1];
240
+ FSE_DTable dtable[1]; /* Dynamically sized */
241
+ } FSE_DecompressWksp;
239
242
 
240
- size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
241
- const void* cSrc, size_t cSrcSize,
242
- const FSE_DTable* dt)
243
- {
244
- const void* ptr = dt;
245
- const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
246
- const U32 fastMode = DTableH->fastMode;
247
243
 
248
- /* select fast mode (static) */
249
- if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
250
- return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
251
- }
252
-
253
-
254
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
244
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
245
+ void* dst, size_t dstCapacity,
246
+ const void* cSrc, size_t cSrcSize,
247
+ unsigned maxLog, void* workSpace, size_t wkspSize,
248
+ int bmi2)
255
249
  {
256
250
  const BYTE* const istart = (const BYTE*)cSrc;
257
251
  const BYTE* ip = istart;
258
- short counting[FSE_MAX_SYMBOL_VALUE+1];
259
252
  unsigned tableLog;
260
253
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
254
+ FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
255
+
256
+ DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
257
+ if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
261
258
 
262
259
  /* normal FSE decoding mode */
263
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
264
- if (FSE_isError(NCountLength)) return NCountLength;
265
- /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
266
- if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
267
- ip += NCountLength;
268
- cSrcSize -= NCountLength;
260
+ {
261
+ size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
262
+ if (FSE_isError(NCountLength)) return NCountLength;
263
+ if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
264
+ assert(NCountLength <= cSrcSize);
265
+ ip += NCountLength;
266
+ cSrcSize -= NCountLength;
267
+ }
269
268
 
270
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
269
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
270
+ assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize);
271
+ workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
272
+ wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
271
273
 
272
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
273
- }
274
+ CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
274
275
 
276
+ {
277
+ const void* ptr = wksp->dtable;
278
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
279
+ const U32 fastMode = DTableH->fastMode;
275
280
 
276
- typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
281
+ /* select fast mode (static) */
282
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
283
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
284
+ }
285
+ }
277
286
 
278
- size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
287
+ /* Avoids the FORCE_INLINE of the _body() function. */
288
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
279
289
  {
280
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
281
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
290
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
282
291
  }
283
292
 
293
+ #if DYNAMIC_BMI2
294
+ BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
295
+ {
296
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
297
+ }
298
+ #endif
284
299
 
300
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
301
+ {
302
+ #if DYNAMIC_BMI2
303
+ if (bmi2) {
304
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
305
+ }
306
+ #endif
307
+ (void)bmi2;
308
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
309
+ }
285
310
 
286
311
  #endif /* FSE_COMMONDEFS_ONLY */