extzstd 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +114 -56
  4. data/contrib/zstd/CONTRIBUTING.md +14 -0
  5. data/contrib/zstd/Makefile +37 -31
  6. data/contrib/zstd/README.md +6 -0
  7. data/contrib/zstd/appveyor.yml +4 -1
  8. data/contrib/zstd/lib/Makefile +231 -134
  9. data/contrib/zstd/lib/README.md +28 -0
  10. data/contrib/zstd/lib/common/bitstream.h +24 -15
  11. data/contrib/zstd/lib/common/compiler.h +116 -3
  12. data/contrib/zstd/lib/common/cpu.h +0 -2
  13. data/contrib/zstd/lib/common/debug.h +11 -18
  14. data/contrib/zstd/lib/common/entropy_common.c +188 -42
  15. data/contrib/zstd/lib/common/error_private.c +1 -0
  16. data/contrib/zstd/lib/common/error_private.h +1 -1
  17. data/contrib/zstd/lib/common/fse.h +38 -11
  18. data/contrib/zstd/lib/common/fse_decompress.c +123 -16
  19. data/contrib/zstd/lib/common/huf.h +26 -5
  20. data/contrib/zstd/lib/common/mem.h +66 -93
  21. data/contrib/zstd/lib/common/pool.c +22 -16
  22. data/contrib/zstd/lib/common/pool.h +1 -1
  23. data/contrib/zstd/lib/common/threading.c +6 -5
  24. data/contrib/zstd/lib/common/xxhash.c +18 -56
  25. data/contrib/zstd/lib/common/xxhash.h +1 -1
  26. data/contrib/zstd/lib/common/zstd_common.c +9 -9
  27. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  28. data/contrib/zstd/lib/common/zstd_errors.h +1 -0
  29. data/contrib/zstd/lib/common/zstd_internal.h +89 -58
  30. data/contrib/zstd/lib/compress/fse_compress.c +30 -23
  31. data/contrib/zstd/lib/compress/hist.c +26 -28
  32. data/contrib/zstd/lib/compress/hist.h +1 -1
  33. data/contrib/zstd/lib/compress/huf_compress.c +210 -95
  34. data/contrib/zstd/lib/compress/zstd_compress.c +1339 -409
  35. data/contrib/zstd/lib/compress/zstd_compress_internal.h +119 -41
  36. data/contrib/zstd/lib/compress/zstd_compress_literals.c +4 -4
  37. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +17 -3
  38. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +23 -19
  39. data/contrib/zstd/lib/compress/zstd_cwksp.h +60 -24
  40. data/contrib/zstd/lib/compress/zstd_double_fast.c +22 -22
  41. data/contrib/zstd/lib/compress/zstd_fast.c +19 -19
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +351 -77
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +20 -0
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +59 -18
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +6 -0
  46. data/contrib/zstd/lib/compress/zstd_opt.c +190 -45
  47. data/contrib/zstd/lib/compress/zstdmt_compress.c +74 -406
  48. data/contrib/zstd/lib/compress/zstdmt_compress.h +26 -108
  49. data/contrib/zstd/lib/decompress/huf_decompress.c +302 -200
  50. data/contrib/zstd/lib/decompress/zstd_ddict.c +8 -8
  51. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  52. data/contrib/zstd/lib/decompress/zstd_decompress.c +125 -80
  53. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +145 -37
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +5 -2
  55. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +11 -10
  56. data/contrib/zstd/lib/dictBuilder/cover.c +29 -20
  57. data/contrib/zstd/lib/dictBuilder/cover.h +1 -1
  58. data/contrib/zstd/lib/dictBuilder/fastcover.c +20 -19
  59. data/contrib/zstd/lib/dictBuilder/zdict.c +15 -16
  60. data/contrib/zstd/lib/dictBuilder/zdict.h +1 -1
  61. data/contrib/zstd/lib/legacy/zstd_v01.c +5 -1
  62. data/contrib/zstd/lib/legacy/zstd_v02.c +5 -1
  63. data/contrib/zstd/lib/legacy/zstd_v03.c +5 -1
  64. data/contrib/zstd/lib/legacy/zstd_v04.c +6 -2
  65. data/contrib/zstd/lib/legacy/zstd_v05.c +5 -1
  66. data/contrib/zstd/lib/legacy/zstd_v06.c +5 -1
  67. data/contrib/zstd/lib/legacy/zstd_v07.c +5 -1
  68. data/contrib/zstd/lib/libzstd.pc.in +3 -3
  69. data/contrib/zstd/lib/zstd.h +348 -47
  70. data/ext/extzstd.c +6 -0
  71. data/ext/extzstd.h +6 -0
  72. data/gemstub.rb +3 -21
  73. data/lib/extzstd.rb +0 -2
  74. data/lib/extzstd/version.rb +6 -1
  75. data/test/test_basic.rb +0 -5
  76. metadata +5 -4
@@ -48,6 +48,7 @@ const char* ERR_getErrorString(ERR_enum code)
48
48
  case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
49
49
  case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
50
50
  case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
51
+ case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
51
52
  case PREFIX(maxCode):
52
53
  default: return notErrorCode;
53
54
  }
@@ -21,7 +21,7 @@ extern "C" {
21
21
  /* ****************************************
22
22
  * Dependencies
23
23
  ******************************************/
24
- #include <stddef.h> /* size_t */
24
+ #include "zstd_deps.h" /* size_t */
25
25
  #include "zstd_errors.h" /* enum list */
26
26
 
27
27
 
@@ -23,7 +23,7 @@ extern "C" {
23
23
  /*-*****************************************
24
24
  * Dependencies
25
25
  ******************************************/
26
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
27
27
 
28
28
 
29
29
  /*-*****************************************
@@ -137,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
137
137
  /*! FSE_normalizeCount():
138
138
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
139
139
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
140
+ useLowProbCount is a boolean parameter which trades off compressed size for
141
+ faster header decoding. When it is set to 1, the compressed data will be slightly
142
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
143
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
144
+ is a good default, since header deserialization makes a big speed difference.
145
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
140
146
  @return : tableLog,
141
147
  or an errorCode, which can be tested using FSE_isError() */
142
148
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
143
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
149
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
144
150
 
145
151
  /*! FSE_NCountWriteBound():
146
152
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -228,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
228
234
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
229
235
  const void* rBuffer, size_t rBuffSize);
230
236
 
237
+ /*! FSE_readNCount_bmi2():
238
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
239
+ */
240
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
241
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
242
+ const void* rBuffer, size_t rBuffSize, int bmi2);
243
+
231
244
  /*! Constructor and Destructor of FSE_DTable.
232
245
  Note that its size depends on 'tableLog' */
233
246
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
@@ -288,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
288
301
  *******************************************/
289
302
  /* FSE buffer bounds */
290
303
  #define FSE_NCOUNTBOUND 512
291
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
304
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
292
305
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
293
306
 
294
307
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
295
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
296
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
308
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
309
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
297
310
 
298
311
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
299
312
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -309,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
309
322
 
310
323
  /* FSE_compress_wksp() :
311
324
  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
312
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
325
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
313
326
  */
314
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
327
+ #define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
315
328
  size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
316
329
 
317
330
  size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -322,18 +335,29 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
322
335
 
323
336
  /* FSE_buildCTable_wksp() :
324
337
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
325
- * `wkspSize` must be >= `(1<<tableLog)`.
338
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog)`.
326
339
  */
340
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * (maxSymbolValue + 2) + (1ull << tableLog))
327
341
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
328
342
 
343
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
344
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
345
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
346
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
347
+
329
348
  size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
330
349
  /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
331
350
 
332
351
  size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
333
352
  /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
334
353
 
335
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
336
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
354
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
355
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
356
+ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
357
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
358
+
359
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
360
+ /**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
337
361
 
338
362
  typedef enum {
339
363
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -644,6 +668,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
644
668
  #ifndef FSE_DEFAULT_MEMORY_USAGE
645
669
  # define FSE_DEFAULT_MEMORY_USAGE 13
646
670
  #endif
671
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
672
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
673
+ #endif
647
674
 
648
675
  /*!FSE_MAX_SYMBOL_VALUE :
649
676
  * Maximum symbol value authorized.
@@ -677,7 +704,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
677
704
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
678
705
  #endif
679
706
 
680
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
707
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
681
708
 
682
709
 
683
710
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -16,13 +16,14 @@
16
16
  /* **************************************************************
17
17
  * Includes
18
18
  ****************************************************************/
19
- #include <stdlib.h> /* malloc, free, qsort */
20
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
21
20
  #include "bitstream.h"
22
21
  #include "compiler.h"
23
22
  #define FSE_STATIC_LINKING_ONLY
24
23
  #include "fse.h"
25
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
26
27
 
27
28
 
28
29
  /* **************************************************************
@@ -59,25 +60,27 @@
59
60
  FSE_DTable* FSE_createDTable (unsigned tableLog)
60
61
  {
61
62
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
62
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
+ return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
64
  }
64
65
 
65
66
  void FSE_freeDTable (FSE_DTable* dt)
66
67
  {
67
- free(dt);
68
+ ZSTD_free(dt);
68
69
  }
69
70
 
70
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
71
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
71
72
  {
72
73
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
73
74
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
74
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
75
+ U16* symbolNext = (U16*)workSpace;
76
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
75
77
 
76
78
  U32 const maxSV1 = maxSymbolValue + 1;
77
79
  U32 const tableSize = 1 << tableLog;
78
80
  U32 highThreshold = tableSize-1;
79
81
 
80
82
  /* Sanity Checks */
83
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
81
84
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
82
85
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
83
86
 
@@ -95,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
95
98
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
96
99
  symbolNext[s] = normalizedCounter[s];
97
100
  } } }
98
- memcpy(dt, &DTableH, sizeof(DTableH));
101
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
99
102
  }
100
103
 
101
104
  /* Spread symbols */
102
- { U32 const tableMask = tableSize-1;
105
+ if (highThreshold == tableSize - 1) {
106
+ size_t const tableMask = tableSize-1;
107
+ size_t const step = FSE_TABLESTEP(tableSize);
108
+ /* First lay down the symbols in order.
109
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
110
+ * misses since small blocks generally have small table logs, so nearly
111
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
112
+ * our buffer to handle the over-write.
113
+ */
114
+ {
115
+ U64 const add = 0x0101010101010101ull;
116
+ size_t pos = 0;
117
+ U64 sv = 0;
118
+ U32 s;
119
+ for (s=0; s<maxSV1; ++s, sv += add) {
120
+ int i;
121
+ int const n = normalizedCounter[s];
122
+ MEM_write64(spread + pos, sv);
123
+ for (i = 8; i < n; i += 8) {
124
+ MEM_write64(spread + pos + i, sv);
125
+ }
126
+ pos += n;
127
+ }
128
+ }
129
+ /* Now we spread those positions across the table.
130
+ * The benefit of doing it in two stages is that we avoid the the
131
+ * variable size inner loop, which caused lots of branch misses.
132
+ * Now we can run through all the positions without any branch misses.
133
+ * We unroll the loop twice, since that is what emperically worked best.
134
+ */
135
+ {
136
+ size_t position = 0;
137
+ size_t s;
138
+ size_t const unroll = 2;
139
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
140
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
141
+ size_t u;
142
+ for (u = 0; u < unroll; ++u) {
143
+ size_t const uPosition = (position + (u * step)) & tableMask;
144
+ tableDecode[uPosition].symbol = spread[s + u];
145
+ }
146
+ position = (position + (unroll * step)) & tableMask;
147
+ }
148
+ assert(position == 0);
149
+ }
150
+ } else {
151
+ U32 const tableMask = tableSize-1;
103
152
  U32 const step = FSE_TABLESTEP(tableSize);
104
153
  U32 s, position = 0;
105
154
  for (s=0; s<maxSV1; s++) {
@@ -124,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
124
173
  return 0;
125
174
  }
126
175
 
176
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
177
+ {
178
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
179
+ }
180
+
127
181
 
128
182
  #ifndef FSE_COMMONDEFS_ONLY
129
183
 
@@ -251,36 +305,89 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
251
305
  }
252
306
 
253
307
 
254
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
308
+ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
309
+ {
310
+ return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
311
+ }
312
+
313
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
314
+ void* dst, size_t dstCapacity,
315
+ const void* cSrc, size_t cSrcSize,
316
+ unsigned maxLog, void* workSpace, size_t wkspSize,
317
+ int bmi2)
255
318
  {
256
319
  const BYTE* const istart = (const BYTE*)cSrc;
257
320
  const BYTE* ip = istart;
258
321
  short counting[FSE_MAX_SYMBOL_VALUE+1];
259
322
  unsigned tableLog;
260
323
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
324
+ FSE_DTable* const dtable = (FSE_DTable*)workSpace;
261
325
 
262
326
  /* normal FSE decoding mode */
263
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
327
+ size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
264
328
  if (FSE_isError(NCountLength)) return NCountLength;
265
- /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
266
329
  if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
330
+ assert(NCountLength <= cSrcSize);
267
331
  ip += NCountLength;
268
332
  cSrcSize -= NCountLength;
269
333
 
270
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
334
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
335
+ workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
336
+ wkspSize -= FSE_DTABLE_SIZE(tableLog);
337
+
338
+ CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
339
+
340
+ {
341
+ const void* ptr = dtable;
342
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
343
+ const U32 fastMode = DTableH->fastMode;
344
+
345
+ /* select fast mode (static) */
346
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
347
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
348
+ }
349
+ }
350
+
351
+ /* Avoids the FORCE_INLINE of the _body() function. */
352
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
353
+ {
354
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
355
+ }
356
+
357
+ #if DYNAMIC_BMI2
358
+ TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
359
+ {
360
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
361
+ }
362
+ #endif
271
363
 
272
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
364
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
365
+ {
366
+ #if DYNAMIC_BMI2
367
+ if (bmi2) {
368
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
369
+ }
370
+ #endif
371
+ (void)bmi2;
372
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
273
373
  }
274
374
 
275
375
 
276
376
  typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
277
377
 
378
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
379
+ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
380
+ U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
381
+ return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
382
+ }
383
+
278
384
  size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
279
385
  {
280
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
281
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
386
+ /* Static analyzer seems unable to understand this table will be properly initialized later */
387
+ U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
388
+ return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
282
389
  }
283
-
390
+ #endif
284
391
 
285
392
 
286
393
  #endif /* FSE_COMMONDEFS_ONLY */
@@ -20,7 +20,7 @@ extern "C" {
20
20
  #define HUF_H_298734234
21
21
 
22
22
  /* *** Dependencies *** */
23
- #include <stddef.h> /* size_t */
23
+ #include "zstd_deps.h" /* size_t */
24
24
 
25
25
 
26
26
  /* *** library symbols visibility *** */
@@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
111
111
 
112
112
  /* *** Dependencies *** */
113
113
  #include "mem.h" /* U32 */
114
+ #define FSE_STATIC_LINKING_ONLY
115
+ #include "fse.h"
114
116
 
115
117
 
116
118
  /* *** Constants *** */
@@ -133,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
133
135
  #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
134
136
 
135
137
  /* static allocation of HUF's Compression Table */
138
+ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
139
+ struct HUF_CElt_s {
140
+ U16 val;
141
+ BYTE nbBits;
142
+ }; /* typedef'd to HUF_CElt */
143
+ typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
136
144
  #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
137
145
  #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
138
146
  #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
139
- U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
140
- void* name##hv = &(name##hb); \
141
- HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
147
+ HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
142
148
 
143
149
  /* static allocation of HUF's DTable */
144
150
  typedef U32 HUF_DTable;
@@ -184,7 +190,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
184
190
  * or to save and regenerate 'CTable' using external methods.
185
191
  */
186
192
  unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
187
- typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
188
193
  size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
189
194
  size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
190
195
  size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@@ -226,6 +231,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
226
231
  U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
227
232
  const void* src, size_t srcSize);
228
233
 
234
+ /*! HUF_readStats_wksp() :
235
+ * Same as HUF_readStats() but takes an external workspace which must be
236
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
237
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
238
+ */
239
+ #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
240
+ #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
241
+ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
242
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
243
+ const void* src, size_t srcSize,
244
+ void* workspace, size_t wkspSize,
245
+ int bmi2);
246
+
229
247
  /** HUF_readCTable() :
230
248
  * Loading a CTable saved with HUF_writeCTable() */
231
249
  size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
@@ -332,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
332
350
  #endif
333
351
  size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
334
352
  size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
353
+ #ifndef HUF_FORCE_DECOMPRESS_X2
354
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
355
+ #endif
335
356
 
336
357
  #endif /* HUF_STATIC_LINKING_ONLY */
337
358
 
@@ -18,8 +18,10 @@ extern "C" {
18
18
  /*-****************************************
19
19
  * Dependencies
20
20
  ******************************************/
21
- #include <stddef.h> /* size_t, ptrdiff_t */
22
- #include <string.h> /* memcpy */
21
+ #include <stddef.h> /* size_t, ptrdiff_t */
22
+ #include "compiler.h" /* __has_builtin */
23
+ #include "debug.h" /* DEBUG_STATIC_ASSERT */
24
+ #include "zstd_deps.h" /* ZSTD_memcpy */
23
25
 
24
26
 
25
27
  /*-****************************************
@@ -39,93 +41,15 @@ extern "C" {
39
41
  # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
40
42
  #endif
41
43
 
42
- #ifndef __has_builtin
43
- # define __has_builtin(x) 0 /* compat. with non-clang compilers */
44
- #endif
45
-
46
- /* code only tested on 32 and 64 bits systems */
47
- #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
48
- MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
49
-
50
- /* detects whether we are being compiled under msan */
51
- #if defined (__has_feature)
52
- # if __has_feature(memory_sanitizer)
53
- # define MEMORY_SANITIZER 1
54
- # endif
55
- #endif
56
-
57
- #if defined (MEMORY_SANITIZER)
58
- /* Not all platforms that support msan provide sanitizers/msan_interface.h.
59
- * We therefore declare the functions we need ourselves, rather than trying to
60
- * include the header file... */
61
-
62
- #include <stdint.h> /* intptr_t */
63
-
64
- /* Make memory region fully initialized (without changing its contents). */
65
- void __msan_unpoison(const volatile void *a, size_t size);
66
-
67
- /* Make memory region fully uninitialized (without changing its contents).
68
- This is a legacy interface that does not update origin information. Use
69
- __msan_allocated_memory() instead. */
70
- void __msan_poison(const volatile void *a, size_t size);
71
-
72
- /* Returns the offset of the first (at least partially) poisoned byte in the
73
- memory range, or -1 if the whole range is good. */
74
- intptr_t __msan_test_shadow(const volatile void *x, size_t size);
75
- #endif
76
-
77
- /* detects whether we are being compiled under asan */
78
- #if defined (__has_feature)
79
- # if __has_feature(address_sanitizer)
80
- # define ADDRESS_SANITIZER 1
81
- # endif
82
- #elif defined(__SANITIZE_ADDRESS__)
83
- # define ADDRESS_SANITIZER 1
84
- #endif
85
-
86
- #if defined (ADDRESS_SANITIZER)
87
- /* Not all platforms that support asan provide sanitizers/asan_interface.h.
88
- * We therefore declare the functions we need ourselves, rather than trying to
89
- * include the header file... */
90
-
91
- /**
92
- * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
93
- *
94
- * This memory must be previously allocated by your program. Instrumented
95
- * code is forbidden from accessing addresses in this region until it is
96
- * unpoisoned. This function is not guaranteed to poison the entire region -
97
- * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
98
- * alignment restrictions.
99
- *
100
- * \note This function is not thread-safe because no two threads can poison or
101
- * unpoison memory in the same memory region simultaneously.
102
- *
103
- * \param addr Start of memory region.
104
- * \param size Size of memory region. */
105
- void __asan_poison_memory_region(void const volatile *addr, size_t size);
106
-
107
- /**
108
- * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
109
- *
110
- * This memory must be previously allocated by your program. Accessing
111
- * addresses in this region is allowed until this region is poisoned again.
112
- * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
113
- * to ASan alignment restrictions.
114
- *
115
- * \note This function is not thread-safe because no two threads can
116
- * poison or unpoison memory in the same memory region simultaneously.
117
- *
118
- * \param addr Start of memory region.
119
- * \param size Size of memory region. */
120
- void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
121
- #endif
122
-
123
-
124
44
  /*-**************************************************************
125
45
  * Basic Types
126
46
  *****************************************************************/
127
47
  #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
128
- # include <stdint.h>
48
+ # if defined(_AIX)
49
+ # include <inttypes.h>
50
+ # else
51
+ # include <stdint.h> /* intptr_t */
52
+ # endif
129
53
  typedef uint8_t BYTE;
130
54
  typedef uint16_t U16;
131
55
  typedef int16_t S16;
@@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
157
81
 
158
82
 
159
83
  /*-**************************************************************
160
- * Memory I/O
84
+ * Memory I/O API
85
+ *****************************************************************/
86
+ /*=== Static platform detection ===*/
87
+ MEM_STATIC unsigned MEM_32bits(void);
88
+ MEM_STATIC unsigned MEM_64bits(void);
89
+ MEM_STATIC unsigned MEM_isLittleEndian(void);
90
+
91
+ /*=== Native unaligned read/write ===*/
92
+ MEM_STATIC U16 MEM_read16(const void* memPtr);
93
+ MEM_STATIC U32 MEM_read32(const void* memPtr);
94
+ MEM_STATIC U64 MEM_read64(const void* memPtr);
95
+ MEM_STATIC size_t MEM_readST(const void* memPtr);
96
+
97
+ MEM_STATIC void MEM_write16(void* memPtr, U16 value);
98
+ MEM_STATIC void MEM_write32(void* memPtr, U32 value);
99
+ MEM_STATIC void MEM_write64(void* memPtr, U64 value);
100
+
101
+ /*=== Little endian unaligned read/write ===*/
102
+ MEM_STATIC U16 MEM_readLE16(const void* memPtr);
103
+ MEM_STATIC U32 MEM_readLE24(const void* memPtr);
104
+ MEM_STATIC U32 MEM_readLE32(const void* memPtr);
105
+ MEM_STATIC U64 MEM_readLE64(const void* memPtr);
106
+ MEM_STATIC size_t MEM_readLEST(const void* memPtr);
107
+
108
+ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
109
+ MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
110
+ MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
111
+ MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
112
+ MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
113
+
114
+ /*=== Big endian unaligned read/write ===*/
115
+ MEM_STATIC U32 MEM_readBE32(const void* memPtr);
116
+ MEM_STATIC U64 MEM_readBE64(const void* memPtr);
117
+ MEM_STATIC size_t MEM_readBEST(const void* memPtr);
118
+
119
+ MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
120
+ MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
121
+ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
122
+
123
+ /*=== Byteswap ===*/
124
+ MEM_STATIC U32 MEM_swap32(U32 in);
125
+ MEM_STATIC U64 MEM_swap64(U64 in);
126
+ MEM_STATIC size_t MEM_swapST(size_t in);
127
+
128
+
129
+ /*-**************************************************************
130
+ * Memory I/O Implementation
161
131
  *****************************************************************/
162
132
  /* MEM_FORCE_MEMORY_ACCESS :
163
133
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
@@ -236,37 +206,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
236
206
 
237
207
  MEM_STATIC U16 MEM_read16(const void* memPtr)
238
208
  {
239
- U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
209
+ U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
240
210
  }
241
211
 
242
212
  MEM_STATIC U32 MEM_read32(const void* memPtr)
243
213
  {
244
- U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
214
+ U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
245
215
  }
246
216
 
247
217
  MEM_STATIC U64 MEM_read64(const void* memPtr)
248
218
  {
249
- U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
219
+ U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
250
220
  }
251
221
 
252
222
  MEM_STATIC size_t MEM_readST(const void* memPtr)
253
223
  {
254
- size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
224
+ size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
255
225
  }
256
226
 
257
227
  MEM_STATIC void MEM_write16(void* memPtr, U16 value)
258
228
  {
259
- memcpy(memPtr, &value, sizeof(value));
229
+ ZSTD_memcpy(memPtr, &value, sizeof(value));
260
230
  }
261
231
 
262
232
  MEM_STATIC void MEM_write32(void* memPtr, U32 value)
263
233
  {
264
- memcpy(memPtr, &value, sizeof(value));
234
+ ZSTD_memcpy(memPtr, &value, sizeof(value));
265
235
  }
266
236
 
267
237
  MEM_STATIC void MEM_write64(void* memPtr, U64 value)
268
238
  {
269
- memcpy(memPtr, &value, sizeof(value));
239
+ ZSTD_memcpy(memPtr, &value, sizeof(value));
270
240
  }
271
241
 
272
242
  #endif /* MEM_FORCE_MEMORY_ACCESS */
@@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
445
415
  MEM_writeBE64(memPtr, (U64)val);
446
416
  }
447
417
 
418
+ /* code only tested on 32 and 64 bits systems */
419
+ MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
420
+
448
421
 
449
422
  #if defined (__cplusplus)
450
423
  }