extzstd 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +114 -56
  4. data/contrib/zstd/CONTRIBUTING.md +14 -0
  5. data/contrib/zstd/Makefile +37 -31
  6. data/contrib/zstd/README.md +6 -0
  7. data/contrib/zstd/appveyor.yml +4 -1
  8. data/contrib/zstd/lib/Makefile +231 -134
  9. data/contrib/zstd/lib/README.md +28 -0
  10. data/contrib/zstd/lib/common/bitstream.h +24 -15
  11. data/contrib/zstd/lib/common/compiler.h +116 -3
  12. data/contrib/zstd/lib/common/cpu.h +0 -2
  13. data/contrib/zstd/lib/common/debug.h +11 -18
  14. data/contrib/zstd/lib/common/entropy_common.c +188 -42
  15. data/contrib/zstd/lib/common/error_private.c +1 -0
  16. data/contrib/zstd/lib/common/error_private.h +1 -1
  17. data/contrib/zstd/lib/common/fse.h +38 -11
  18. data/contrib/zstd/lib/common/fse_decompress.c +123 -16
  19. data/contrib/zstd/lib/common/huf.h +26 -5
  20. data/contrib/zstd/lib/common/mem.h +66 -93
  21. data/contrib/zstd/lib/common/pool.c +22 -16
  22. data/contrib/zstd/lib/common/pool.h +1 -1
  23. data/contrib/zstd/lib/common/threading.c +6 -5
  24. data/contrib/zstd/lib/common/xxhash.c +18 -56
  25. data/contrib/zstd/lib/common/xxhash.h +1 -1
  26. data/contrib/zstd/lib/common/zstd_common.c +9 -9
  27. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  28. data/contrib/zstd/lib/common/zstd_errors.h +1 -0
  29. data/contrib/zstd/lib/common/zstd_internal.h +89 -58
  30. data/contrib/zstd/lib/compress/fse_compress.c +30 -23
  31. data/contrib/zstd/lib/compress/hist.c +26 -28
  32. data/contrib/zstd/lib/compress/hist.h +1 -1
  33. data/contrib/zstd/lib/compress/huf_compress.c +210 -95
  34. data/contrib/zstd/lib/compress/zstd_compress.c +1339 -409
  35. data/contrib/zstd/lib/compress/zstd_compress_internal.h +119 -41
  36. data/contrib/zstd/lib/compress/zstd_compress_literals.c +4 -4
  37. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +17 -3
  38. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +23 -19
  39. data/contrib/zstd/lib/compress/zstd_cwksp.h +60 -24
  40. data/contrib/zstd/lib/compress/zstd_double_fast.c +22 -22
  41. data/contrib/zstd/lib/compress/zstd_fast.c +19 -19
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +351 -77
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +20 -0
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +59 -18
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +6 -0
  46. data/contrib/zstd/lib/compress/zstd_opt.c +190 -45
  47. data/contrib/zstd/lib/compress/zstdmt_compress.c +74 -406
  48. data/contrib/zstd/lib/compress/zstdmt_compress.h +26 -108
  49. data/contrib/zstd/lib/decompress/huf_decompress.c +302 -200
  50. data/contrib/zstd/lib/decompress/zstd_ddict.c +8 -8
  51. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  52. data/contrib/zstd/lib/decompress/zstd_decompress.c +125 -80
  53. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +145 -37
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +5 -2
  55. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +11 -10
  56. data/contrib/zstd/lib/dictBuilder/cover.c +29 -20
  57. data/contrib/zstd/lib/dictBuilder/cover.h +1 -1
  58. data/contrib/zstd/lib/dictBuilder/fastcover.c +20 -19
  59. data/contrib/zstd/lib/dictBuilder/zdict.c +15 -16
  60. data/contrib/zstd/lib/dictBuilder/zdict.h +1 -1
  61. data/contrib/zstd/lib/legacy/zstd_v01.c +5 -1
  62. data/contrib/zstd/lib/legacy/zstd_v02.c +5 -1
  63. data/contrib/zstd/lib/legacy/zstd_v03.c +5 -1
  64. data/contrib/zstd/lib/legacy/zstd_v04.c +6 -2
  65. data/contrib/zstd/lib/legacy/zstd_v05.c +5 -1
  66. data/contrib/zstd/lib/legacy/zstd_v06.c +5 -1
  67. data/contrib/zstd/lib/legacy/zstd_v07.c +5 -1
  68. data/contrib/zstd/lib/libzstd.pc.in +3 -3
  69. data/contrib/zstd/lib/zstd.h +348 -47
  70. data/ext/extzstd.c +6 -0
  71. data/ext/extzstd.h +6 -0
  72. data/gemstub.rb +3 -21
  73. data/lib/extzstd.rb +0 -2
  74. data/lib/extzstd/version.rb +6 -1
  75. data/test/test_basic.rb +0 -5
  76. metadata +5 -4
@@ -48,6 +48,7 @@ const char* ERR_getErrorString(ERR_enum code)
48
48
  case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
49
49
  case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
50
50
  case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
51
+ case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
51
52
  case PREFIX(maxCode):
52
53
  default: return notErrorCode;
53
54
  }
@@ -21,7 +21,7 @@ extern "C" {
21
21
  /* ****************************************
22
22
  * Dependencies
23
23
  ******************************************/
24
- #include <stddef.h> /* size_t */
24
+ #include "zstd_deps.h" /* size_t */
25
25
  #include "zstd_errors.h" /* enum list */
26
26
 
27
27
 
@@ -23,7 +23,7 @@ extern "C" {
23
23
  /*-*****************************************
24
24
  * Dependencies
25
25
  ******************************************/
26
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
27
27
 
28
28
 
29
29
  /*-*****************************************
@@ -137,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
137
137
  /*! FSE_normalizeCount():
138
138
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
139
139
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
140
+ useLowProbCount is a boolean parameter which trades off compressed size for
141
+ faster header decoding. When it is set to 1, the compressed data will be slightly
142
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
143
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
144
+ is a good default, since header deserialization makes a big speed difference.
145
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
140
146
  @return : tableLog,
141
147
  or an errorCode, which can be tested using FSE_isError() */
142
148
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
143
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
149
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
144
150
 
145
151
  /*! FSE_NCountWriteBound():
146
152
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -228,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
228
234
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
229
235
  const void* rBuffer, size_t rBuffSize);
230
236
 
237
+ /*! FSE_readNCount_bmi2():
238
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
239
+ */
240
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
241
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
242
+ const void* rBuffer, size_t rBuffSize, int bmi2);
243
+
231
244
  /*! Constructor and Destructor of FSE_DTable.
232
245
  Note that its size depends on 'tableLog' */
233
246
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
@@ -288,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
288
301
  *******************************************/
289
302
  /* FSE buffer bounds */
290
303
  #define FSE_NCOUNTBOUND 512
291
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
304
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
292
305
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
293
306
 
294
307
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
295
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
296
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
308
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
309
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
297
310
 
298
311
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
299
312
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -309,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
309
322
 
310
323
  /* FSE_compress_wksp() :
311
324
  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
312
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
325
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
313
326
  */
314
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
327
+ #define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
315
328
  size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
316
329
 
317
330
  size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -322,18 +335,29 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
322
335
 
323
336
  /* FSE_buildCTable_wksp() :
324
337
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
325
- * `wkspSize` must be >= `(1<<tableLog)`.
338
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog)`.
326
339
  */
340
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * (maxSymbolValue + 2) + (1ull << tableLog))
327
341
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
328
342
 
343
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
344
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
345
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
346
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
347
+
329
348
  size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
330
349
  /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
331
350
 
332
351
  size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
333
352
  /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
334
353
 
335
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
336
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
354
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
355
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
356
+ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
357
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
358
+
359
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
360
+ /**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
337
361
 
338
362
  typedef enum {
339
363
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -644,6 +668,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
644
668
  #ifndef FSE_DEFAULT_MEMORY_USAGE
645
669
  # define FSE_DEFAULT_MEMORY_USAGE 13
646
670
  #endif
671
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
672
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
673
+ #endif
647
674
 
648
675
  /*!FSE_MAX_SYMBOL_VALUE :
649
676
  * Maximum symbol value authorized.
@@ -677,7 +704,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
677
704
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
678
705
  #endif
679
706
 
680
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
707
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
681
708
 
682
709
 
683
710
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -16,13 +16,14 @@
16
16
  /* **************************************************************
17
17
  * Includes
18
18
  ****************************************************************/
19
- #include <stdlib.h> /* malloc, free, qsort */
20
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
21
20
  #include "bitstream.h"
22
21
  #include "compiler.h"
23
22
  #define FSE_STATIC_LINKING_ONLY
24
23
  #include "fse.h"
25
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
26
27
 
27
28
 
28
29
  /* **************************************************************
@@ -59,25 +60,27 @@
59
60
  FSE_DTable* FSE_createDTable (unsigned tableLog)
60
61
  {
61
62
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
62
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
+ return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
64
  }
64
65
 
65
66
  void FSE_freeDTable (FSE_DTable* dt)
66
67
  {
67
- free(dt);
68
+ ZSTD_free(dt);
68
69
  }
69
70
 
70
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
71
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
71
72
  {
72
73
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
73
74
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
74
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
75
+ U16* symbolNext = (U16*)workSpace;
76
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
75
77
 
76
78
  U32 const maxSV1 = maxSymbolValue + 1;
77
79
  U32 const tableSize = 1 << tableLog;
78
80
  U32 highThreshold = tableSize-1;
79
81
 
80
82
  /* Sanity Checks */
83
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
81
84
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
82
85
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
83
86
 
@@ -95,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
95
98
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
96
99
  symbolNext[s] = normalizedCounter[s];
97
100
  } } }
98
- memcpy(dt, &DTableH, sizeof(DTableH));
101
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
99
102
  }
100
103
 
101
104
  /* Spread symbols */
102
- { U32 const tableMask = tableSize-1;
105
+ if (highThreshold == tableSize - 1) {
106
+ size_t const tableMask = tableSize-1;
107
+ size_t const step = FSE_TABLESTEP(tableSize);
108
+ /* First lay down the symbols in order.
109
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
110
+ * misses since small blocks generally have small table logs, so nearly
111
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
112
+ * our buffer to handle the over-write.
113
+ */
114
+ {
115
+ U64 const add = 0x0101010101010101ull;
116
+ size_t pos = 0;
117
+ U64 sv = 0;
118
+ U32 s;
119
+ for (s=0; s<maxSV1; ++s, sv += add) {
120
+ int i;
121
+ int const n = normalizedCounter[s];
122
+ MEM_write64(spread + pos, sv);
123
+ for (i = 8; i < n; i += 8) {
124
+ MEM_write64(spread + pos + i, sv);
125
+ }
126
+ pos += n;
127
+ }
128
+ }
129
+ /* Now we spread those positions across the table.
130
+ * The benefit of doing it in two stages is that we avoid the the
131
+ * variable size inner loop, which caused lots of branch misses.
132
+ * Now we can run through all the positions without any branch misses.
133
+ * We unroll the loop twice, since that is what emperically worked best.
134
+ */
135
+ {
136
+ size_t position = 0;
137
+ size_t s;
138
+ size_t const unroll = 2;
139
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
140
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
141
+ size_t u;
142
+ for (u = 0; u < unroll; ++u) {
143
+ size_t const uPosition = (position + (u * step)) & tableMask;
144
+ tableDecode[uPosition].symbol = spread[s + u];
145
+ }
146
+ position = (position + (unroll * step)) & tableMask;
147
+ }
148
+ assert(position == 0);
149
+ }
150
+ } else {
151
+ U32 const tableMask = tableSize-1;
103
152
  U32 const step = FSE_TABLESTEP(tableSize);
104
153
  U32 s, position = 0;
105
154
  for (s=0; s<maxSV1; s++) {
@@ -124,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
124
173
  return 0;
125
174
  }
126
175
 
176
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
177
+ {
178
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
179
+ }
180
+
127
181
 
128
182
  #ifndef FSE_COMMONDEFS_ONLY
129
183
 
@@ -251,36 +305,89 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
251
305
  }
252
306
 
253
307
 
254
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
308
+ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
309
+ {
310
+ return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
311
+ }
312
+
313
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
314
+ void* dst, size_t dstCapacity,
315
+ const void* cSrc, size_t cSrcSize,
316
+ unsigned maxLog, void* workSpace, size_t wkspSize,
317
+ int bmi2)
255
318
  {
256
319
  const BYTE* const istart = (const BYTE*)cSrc;
257
320
  const BYTE* ip = istart;
258
321
  short counting[FSE_MAX_SYMBOL_VALUE+1];
259
322
  unsigned tableLog;
260
323
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
324
+ FSE_DTable* const dtable = (FSE_DTable*)workSpace;
261
325
 
262
326
  /* normal FSE decoding mode */
263
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
327
+ size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
264
328
  if (FSE_isError(NCountLength)) return NCountLength;
265
- /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
266
329
  if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
330
+ assert(NCountLength <= cSrcSize);
267
331
  ip += NCountLength;
268
332
  cSrcSize -= NCountLength;
269
333
 
270
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
334
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
335
+ workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
336
+ wkspSize -= FSE_DTABLE_SIZE(tableLog);
337
+
338
+ CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
339
+
340
+ {
341
+ const void* ptr = dtable;
342
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
343
+ const U32 fastMode = DTableH->fastMode;
344
+
345
+ /* select fast mode (static) */
346
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
347
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
348
+ }
349
+ }
350
+
351
+ /* Avoids the FORCE_INLINE of the _body() function. */
352
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
353
+ {
354
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
355
+ }
356
+
357
+ #if DYNAMIC_BMI2
358
+ TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
359
+ {
360
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
361
+ }
362
+ #endif
271
363
 
272
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
364
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
365
+ {
366
+ #if DYNAMIC_BMI2
367
+ if (bmi2) {
368
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
369
+ }
370
+ #endif
371
+ (void)bmi2;
372
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
273
373
  }
274
374
 
275
375
 
276
376
  typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
277
377
 
378
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
379
+ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
380
+ U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
381
+ return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
382
+ }
383
+
278
384
  size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
279
385
  {
280
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
281
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
386
+ /* Static analyzer seems unable to understand this table will be properly initialized later */
387
+ U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
388
+ return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
282
389
  }
283
-
390
+ #endif
284
391
 
285
392
 
286
393
  #endif /* FSE_COMMONDEFS_ONLY */
@@ -20,7 +20,7 @@ extern "C" {
20
20
  #define HUF_H_298734234
21
21
 
22
22
  /* *** Dependencies *** */
23
- #include <stddef.h> /* size_t */
23
+ #include "zstd_deps.h" /* size_t */
24
24
 
25
25
 
26
26
  /* *** library symbols visibility *** */
@@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
111
111
 
112
112
  /* *** Dependencies *** */
113
113
  #include "mem.h" /* U32 */
114
+ #define FSE_STATIC_LINKING_ONLY
115
+ #include "fse.h"
114
116
 
115
117
 
116
118
  /* *** Constants *** */
@@ -133,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
133
135
  #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
134
136
 
135
137
  /* static allocation of HUF's Compression Table */
138
+ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
139
+ struct HUF_CElt_s {
140
+ U16 val;
141
+ BYTE nbBits;
142
+ }; /* typedef'd to HUF_CElt */
143
+ typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
136
144
  #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
137
145
  #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
138
146
  #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
139
- U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
140
- void* name##hv = &(name##hb); \
141
- HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
147
+ HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
142
148
 
143
149
  /* static allocation of HUF's DTable */
144
150
  typedef U32 HUF_DTable;
@@ -184,7 +190,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
184
190
  * or to save and regenerate 'CTable' using external methods.
185
191
  */
186
192
  unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
187
- typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
188
193
  size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
189
194
  size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
190
195
  size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@@ -226,6 +231,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
226
231
  U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
227
232
  const void* src, size_t srcSize);
228
233
 
234
+ /*! HUF_readStats_wksp() :
235
+ * Same as HUF_readStats() but takes an external workspace which must be
236
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
237
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
238
+ */
239
+ #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
240
+ #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
241
+ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
242
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
243
+ const void* src, size_t srcSize,
244
+ void* workspace, size_t wkspSize,
245
+ int bmi2);
246
+
229
247
  /** HUF_readCTable() :
230
248
  * Loading a CTable saved with HUF_writeCTable() */
231
249
  size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
@@ -332,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
332
350
  #endif
333
351
  size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
334
352
  size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
353
+ #ifndef HUF_FORCE_DECOMPRESS_X2
354
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
355
+ #endif
335
356
 
336
357
  #endif /* HUF_STATIC_LINKING_ONLY */
337
358
 
@@ -18,8 +18,10 @@ extern "C" {
18
18
  /*-****************************************
19
19
  * Dependencies
20
20
  ******************************************/
21
- #include <stddef.h> /* size_t, ptrdiff_t */
22
- #include <string.h> /* memcpy */
21
+ #include <stddef.h> /* size_t, ptrdiff_t */
22
+ #include "compiler.h" /* __has_builtin */
23
+ #include "debug.h" /* DEBUG_STATIC_ASSERT */
24
+ #include "zstd_deps.h" /* ZSTD_memcpy */
23
25
 
24
26
 
25
27
  /*-****************************************
@@ -39,93 +41,15 @@ extern "C" {
39
41
  # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
40
42
  #endif
41
43
 
42
- #ifndef __has_builtin
43
- # define __has_builtin(x) 0 /* compat. with non-clang compilers */
44
- #endif
45
-
46
- /* code only tested on 32 and 64 bits systems */
47
- #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
48
- MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
49
-
50
- /* detects whether we are being compiled under msan */
51
- #if defined (__has_feature)
52
- # if __has_feature(memory_sanitizer)
53
- # define MEMORY_SANITIZER 1
54
- # endif
55
- #endif
56
-
57
- #if defined (MEMORY_SANITIZER)
58
- /* Not all platforms that support msan provide sanitizers/msan_interface.h.
59
- * We therefore declare the functions we need ourselves, rather than trying to
60
- * include the header file... */
61
-
62
- #include <stdint.h> /* intptr_t */
63
-
64
- /* Make memory region fully initialized (without changing its contents). */
65
- void __msan_unpoison(const volatile void *a, size_t size);
66
-
67
- /* Make memory region fully uninitialized (without changing its contents).
68
- This is a legacy interface that does not update origin information. Use
69
- __msan_allocated_memory() instead. */
70
- void __msan_poison(const volatile void *a, size_t size);
71
-
72
- /* Returns the offset of the first (at least partially) poisoned byte in the
73
- memory range, or -1 if the whole range is good. */
74
- intptr_t __msan_test_shadow(const volatile void *x, size_t size);
75
- #endif
76
-
77
- /* detects whether we are being compiled under asan */
78
- #if defined (__has_feature)
79
- # if __has_feature(address_sanitizer)
80
- # define ADDRESS_SANITIZER 1
81
- # endif
82
- #elif defined(__SANITIZE_ADDRESS__)
83
- # define ADDRESS_SANITIZER 1
84
- #endif
85
-
86
- #if defined (ADDRESS_SANITIZER)
87
- /* Not all platforms that support asan provide sanitizers/asan_interface.h.
88
- * We therefore declare the functions we need ourselves, rather than trying to
89
- * include the header file... */
90
-
91
- /**
92
- * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
93
- *
94
- * This memory must be previously allocated by your program. Instrumented
95
- * code is forbidden from accessing addresses in this region until it is
96
- * unpoisoned. This function is not guaranteed to poison the entire region -
97
- * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
98
- * alignment restrictions.
99
- *
100
- * \note This function is not thread-safe because no two threads can poison or
101
- * unpoison memory in the same memory region simultaneously.
102
- *
103
- * \param addr Start of memory region.
104
- * \param size Size of memory region. */
105
- void __asan_poison_memory_region(void const volatile *addr, size_t size);
106
-
107
- /**
108
- * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
109
- *
110
- * This memory must be previously allocated by your program. Accessing
111
- * addresses in this region is allowed until this region is poisoned again.
112
- * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
113
- * to ASan alignment restrictions.
114
- *
115
- * \note This function is not thread-safe because no two threads can
116
- * poison or unpoison memory in the same memory region simultaneously.
117
- *
118
- * \param addr Start of memory region.
119
- * \param size Size of memory region. */
120
- void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
121
- #endif
122
-
123
-
124
44
  /*-**************************************************************
125
45
  * Basic Types
126
46
  *****************************************************************/
127
47
  #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
128
- # include <stdint.h>
48
+ # if defined(_AIX)
49
+ # include <inttypes.h>
50
+ # else
51
+ # include <stdint.h> /* intptr_t */
52
+ # endif
129
53
  typedef uint8_t BYTE;
130
54
  typedef uint16_t U16;
131
55
  typedef int16_t S16;
@@ -157,7 +81,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
157
81
 
158
82
 
159
83
  /*-**************************************************************
160
- * Memory I/O
84
+ * Memory I/O API
85
+ *****************************************************************/
86
+ /*=== Static platform detection ===*/
87
+ MEM_STATIC unsigned MEM_32bits(void);
88
+ MEM_STATIC unsigned MEM_64bits(void);
89
+ MEM_STATIC unsigned MEM_isLittleEndian(void);
90
+
91
+ /*=== Native unaligned read/write ===*/
92
+ MEM_STATIC U16 MEM_read16(const void* memPtr);
93
+ MEM_STATIC U32 MEM_read32(const void* memPtr);
94
+ MEM_STATIC U64 MEM_read64(const void* memPtr);
95
+ MEM_STATIC size_t MEM_readST(const void* memPtr);
96
+
97
+ MEM_STATIC void MEM_write16(void* memPtr, U16 value);
98
+ MEM_STATIC void MEM_write32(void* memPtr, U32 value);
99
+ MEM_STATIC void MEM_write64(void* memPtr, U64 value);
100
+
101
+ /*=== Little endian unaligned read/write ===*/
102
+ MEM_STATIC U16 MEM_readLE16(const void* memPtr);
103
+ MEM_STATIC U32 MEM_readLE24(const void* memPtr);
104
+ MEM_STATIC U32 MEM_readLE32(const void* memPtr);
105
+ MEM_STATIC U64 MEM_readLE64(const void* memPtr);
106
+ MEM_STATIC size_t MEM_readLEST(const void* memPtr);
107
+
108
+ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
109
+ MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
110
+ MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
111
+ MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
112
+ MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
113
+
114
+ /*=== Big endian unaligned read/write ===*/
115
+ MEM_STATIC U32 MEM_readBE32(const void* memPtr);
116
+ MEM_STATIC U64 MEM_readBE64(const void* memPtr);
117
+ MEM_STATIC size_t MEM_readBEST(const void* memPtr);
118
+
119
+ MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
120
+ MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
121
+ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
122
+
123
+ /*=== Byteswap ===*/
124
+ MEM_STATIC U32 MEM_swap32(U32 in);
125
+ MEM_STATIC U64 MEM_swap64(U64 in);
126
+ MEM_STATIC size_t MEM_swapST(size_t in);
127
+
128
+
129
+ /*-**************************************************************
130
+ * Memory I/O Implementation
161
131
  *****************************************************************/
162
132
  /* MEM_FORCE_MEMORY_ACCESS :
163
133
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
@@ -236,37 +206,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v =
236
206
 
237
207
  MEM_STATIC U16 MEM_read16(const void* memPtr)
238
208
  {
239
- U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
209
+ U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
240
210
  }
241
211
 
242
212
  MEM_STATIC U32 MEM_read32(const void* memPtr)
243
213
  {
244
- U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
214
+ U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
245
215
  }
246
216
 
247
217
  MEM_STATIC U64 MEM_read64(const void* memPtr)
248
218
  {
249
- U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
219
+ U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
250
220
  }
251
221
 
252
222
  MEM_STATIC size_t MEM_readST(const void* memPtr)
253
223
  {
254
- size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
224
+ size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val;
255
225
  }
256
226
 
257
227
  MEM_STATIC void MEM_write16(void* memPtr, U16 value)
258
228
  {
259
- memcpy(memPtr, &value, sizeof(value));
229
+ ZSTD_memcpy(memPtr, &value, sizeof(value));
260
230
  }
261
231
 
262
232
  MEM_STATIC void MEM_write32(void* memPtr, U32 value)
263
233
  {
264
- memcpy(memPtr, &value, sizeof(value));
234
+ ZSTD_memcpy(memPtr, &value, sizeof(value));
265
235
  }
266
236
 
267
237
  MEM_STATIC void MEM_write64(void* memPtr, U64 value)
268
238
  {
269
- memcpy(memPtr, &value, sizeof(value));
239
+ ZSTD_memcpy(memPtr, &value, sizeof(value));
270
240
  }
271
241
 
272
242
  #endif /* MEM_FORCE_MEMORY_ACCESS */
@@ -445,6 +415,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
445
415
  MEM_writeBE64(memPtr, (U64)val);
446
416
  }
447
417
 
418
+ /* code only tested on 32 and 64 bits systems */
419
+ MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
420
+
448
421
 
449
422
  #if defined (__cplusplus)
450
423
  }