zstd-ruby 1.4.5.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +237 -138
  5. data/ext/zstdruby/libzstd/README.md +28 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
  7. data/ext/zstdruby/libzstd/common/compiler.h +118 -4
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +2 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +40 -12
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
  16. data/ext/zstdruby/libzstd/common/huf.h +27 -6
  17. data/ext/zstdruby/libzstd/common/mem.h +67 -94
  18. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  21. data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
  22. data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
  27. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
  30. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  31. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
  56. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
  62. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
  68. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  69. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
  70. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
  90. data/ext/zstdruby/libzstd/zstd.h +414 -54
  91. data/lib/zstd-ruby/version.rb +1 -1
  92. metadata +7 -3
  93. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -48,6 +48,7 @@ const char* ERR_getErrorString(ERR_enum code)
48
48
  case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
49
49
  case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
50
50
  case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
51
+ case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
51
52
  case PREFIX(maxCode):
52
53
  default: return notErrorCode;
53
54
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -21,7 +21,7 @@ extern "C" {
21
21
  /* ****************************************
22
22
  * Dependencies
23
23
  ******************************************/
24
- #include <stddef.h> /* size_t */
24
+ #include "zstd_deps.h" /* size_t */
25
25
  #include "zstd_errors.h" /* enum list */
26
26
 
27
27
 
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy codec
3
3
  * Public Prototypes declaration
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -23,7 +23,7 @@ extern "C" {
23
23
  /*-*****************************************
24
24
  * Dependencies
25
25
  ******************************************/
26
- #include <stddef.h> /* size_t, ptrdiff_t */
26
+ #include "zstd_deps.h" /* size_t, ptrdiff_t */
27
27
 
28
28
 
29
29
  /*-*****************************************
@@ -137,10 +137,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize
137
137
  /*! FSE_normalizeCount():
138
138
  normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
139
139
  'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
140
+ useLowProbCount is a boolean parameter which trades off compressed size for
141
+ faster header decoding. When it is set to 1, the compressed data will be slightly
142
+ smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
143
+ faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
144
+ is a good default, since header deserialization makes a big speed difference.
145
+ Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
140
146
  @return : tableLog,
141
147
  or an errorCode, which can be tested using FSE_isError() */
142
148
  FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
143
- const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
149
+ const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
144
150
 
145
151
  /*! FSE_NCountWriteBound():
146
152
  Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -228,6 +234,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
228
234
  unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
229
235
  const void* rBuffer, size_t rBuffSize);
230
236
 
237
+ /*! FSE_readNCount_bmi2():
238
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
239
+ */
240
+ FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
241
+ unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
242
+ const void* rBuffer, size_t rBuffSize, int bmi2);
243
+
231
244
  /*! Constructor and Destructor of FSE_DTable.
232
245
  Note that its size depends on 'tableLog' */
233
246
  typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
@@ -288,12 +301,12 @@ If there is an error, the function will return an error code, which can be teste
288
301
  *******************************************/
289
302
  /* FSE buffer bounds */
290
303
  #define FSE_NCOUNTBOUND 512
291
- #define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
304
+ #define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
292
305
  #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
293
306
 
294
307
  /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
295
- #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
296
- #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
308
+ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
309
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog)))
297
310
 
298
311
  /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
299
312
  #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
@@ -309,9 +322,9 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi
309
322
 
310
323
  /* FSE_compress_wksp() :
311
324
  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
312
- * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
325
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
313
326
  */
314
- #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
327
+ #define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
315
328
  size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
316
329
 
317
330
  size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
@@ -322,18 +335,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
322
335
 
323
336
  /* FSE_buildCTable_wksp() :
324
337
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
325
- * `wkspSize` must be >= `(1<<tableLog)`.
338
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
326
339
  */
340
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
341
+ #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
327
342
  size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
328
343
 
344
+ #define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
345
+ #define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
346
+ FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
347
+ /**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
348
+
329
349
  size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
330
350
  /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
331
351
 
332
352
  size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
333
353
  /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
334
354
 
335
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
336
- /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
355
+ #define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue))
356
+ #define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
357
+ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
358
+ /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
359
+
360
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
361
+ /**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
337
362
 
338
363
  typedef enum {
339
364
  FSE_repeat_none, /**< Cannot use the previous table */
@@ -644,6 +669,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
644
669
  #ifndef FSE_DEFAULT_MEMORY_USAGE
645
670
  # define FSE_DEFAULT_MEMORY_USAGE 13
646
671
  #endif
672
+ #if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
673
+ # error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
674
+ #endif
647
675
 
648
676
  /*!FSE_MAX_SYMBOL_VALUE :
649
677
  * Maximum symbol value authorized.
@@ -677,7 +705,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
677
705
  # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
678
706
  #endif
679
707
 
680
- #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
708
+ #define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
681
709
 
682
710
 
683
711
  #endif /* FSE_STATIC_LINKING_ONLY */
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy decoder
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -16,13 +16,14 @@
16
16
  /* **************************************************************
17
17
  * Includes
18
18
  ****************************************************************/
19
- #include <stdlib.h> /* malloc, free, qsort */
20
- #include <string.h> /* memcpy, memset */
19
+ #include "debug.h" /* assert */
21
20
  #include "bitstream.h"
22
21
  #include "compiler.h"
23
22
  #define FSE_STATIC_LINKING_ONLY
24
23
  #include "fse.h"
25
24
  #include "error_private.h"
25
+ #define ZSTD_DEPS_NEED_MALLOC
26
+ #include "zstd_deps.h"
26
27
 
27
28
 
28
29
  /* **************************************************************
@@ -59,25 +60,27 @@
59
60
  FSE_DTable* FSE_createDTable (unsigned tableLog)
60
61
  {
61
62
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
62
- return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
+ return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
63
64
  }
64
65
 
65
66
  void FSE_freeDTable (FSE_DTable* dt)
66
67
  {
67
- free(dt);
68
+ ZSTD_free(dt);
68
69
  }
69
70
 
70
- size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
71
+ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
71
72
  {
72
73
  void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
73
74
  FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
74
- U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
75
+ U16* symbolNext = (U16*)workSpace;
76
+ BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
75
77
 
76
78
  U32 const maxSV1 = maxSymbolValue + 1;
77
79
  U32 const tableSize = 1 << tableLog;
78
80
  U32 highThreshold = tableSize-1;
79
81
 
80
82
  /* Sanity Checks */
83
+ if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
81
84
  if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
82
85
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
83
86
 
@@ -95,11 +98,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
95
98
  if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
96
99
  symbolNext[s] = normalizedCounter[s];
97
100
  } } }
98
- memcpy(dt, &DTableH, sizeof(DTableH));
101
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
99
102
  }
100
103
 
101
104
  /* Spread symbols */
102
- { U32 const tableMask = tableSize-1;
105
+ if (highThreshold == tableSize - 1) {
106
+ size_t const tableMask = tableSize-1;
107
+ size_t const step = FSE_TABLESTEP(tableSize);
108
+ /* First lay down the symbols in order.
109
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
110
+ * misses since small blocks generally have small table logs, so nearly
111
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
112
+ * our buffer to handle the over-write.
113
+ */
114
+ {
115
+ U64 const add = 0x0101010101010101ull;
116
+ size_t pos = 0;
117
+ U64 sv = 0;
118
+ U32 s;
119
+ for (s=0; s<maxSV1; ++s, sv += add) {
120
+ int i;
121
+ int const n = normalizedCounter[s];
122
+ MEM_write64(spread + pos, sv);
123
+ for (i = 8; i < n; i += 8) {
124
+ MEM_write64(spread + pos + i, sv);
125
+ }
126
+ pos += n;
127
+ }
128
+ }
129
+ /* Now we spread those positions across the table.
130
+ * The benefit of doing it in two stages is that we avoid the the
131
+ * variable size inner loop, which caused lots of branch misses.
132
+ * Now we can run through all the positions without any branch misses.
133
+ * We unroll the loop twice, since that is what emperically worked best.
134
+ */
135
+ {
136
+ size_t position = 0;
137
+ size_t s;
138
+ size_t const unroll = 2;
139
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
140
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
141
+ size_t u;
142
+ for (u = 0; u < unroll; ++u) {
143
+ size_t const uPosition = (position + (u * step)) & tableMask;
144
+ tableDecode[uPosition].symbol = spread[s + u];
145
+ }
146
+ position = (position + (unroll * step)) & tableMask;
147
+ }
148
+ assert(position == 0);
149
+ }
150
+ } else {
151
+ U32 const tableMask = tableSize-1;
103
152
  U32 const step = FSE_TABLESTEP(tableSize);
104
153
  U32 s, position = 0;
105
154
  for (s=0; s<maxSV1; s++) {
@@ -124,6 +173,11 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
124
173
  return 0;
125
174
  }
126
175
 
176
+ size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
177
+ {
178
+ return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
179
+ }
180
+
127
181
 
128
182
  #ifndef FSE_COMMONDEFS_ONLY
129
183
 
@@ -251,36 +305,89 @@ size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
251
305
  }
252
306
 
253
307
 
254
- size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog)
308
+ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
309
+ {
310
+ return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
311
+ }
312
+
313
+ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
314
+ void* dst, size_t dstCapacity,
315
+ const void* cSrc, size_t cSrcSize,
316
+ unsigned maxLog, void* workSpace, size_t wkspSize,
317
+ int bmi2)
255
318
  {
256
319
  const BYTE* const istart = (const BYTE*)cSrc;
257
320
  const BYTE* ip = istart;
258
321
  short counting[FSE_MAX_SYMBOL_VALUE+1];
259
322
  unsigned tableLog;
260
323
  unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
324
+ FSE_DTable* const dtable = (FSE_DTable*)workSpace;
261
325
 
262
326
  /* normal FSE decoding mode */
263
- size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
327
+ size_t const NCountLength = FSE_readNCount_bmi2(counting, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
264
328
  if (FSE_isError(NCountLength)) return NCountLength;
265
- /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */
266
329
  if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
330
+ assert(NCountLength <= cSrcSize);
267
331
  ip += NCountLength;
268
332
  cSrcSize -= NCountLength;
269
333
 
270
- CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) );
334
+ if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
335
+ workSpace = dtable + FSE_DTABLE_SIZE_U32(tableLog);
336
+ wkspSize -= FSE_DTABLE_SIZE(tableLog);
337
+
338
+ CHECK_F( FSE_buildDTable_internal(dtable, counting, maxSymbolValue, tableLog, workSpace, wkspSize) );
339
+
340
+ {
341
+ const void* ptr = dtable;
342
+ const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
343
+ const U32 fastMode = DTableH->fastMode;
344
+
345
+ /* select fast mode (static) */
346
+ if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
347
+ return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
348
+ }
349
+ }
350
+
351
+ /* Avoids the FORCE_INLINE of the _body() function. */
352
+ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
353
+ {
354
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
355
+ }
356
+
357
+ #if DYNAMIC_BMI2
358
+ TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
359
+ {
360
+ return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
361
+ }
362
+ #endif
271
363
 
272
- return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */
364
+ size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
365
+ {
366
+ #if DYNAMIC_BMI2
367
+ if (bmi2) {
368
+ return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
369
+ }
370
+ #endif
371
+ (void)bmi2;
372
+ return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
273
373
  }
274
374
 
275
375
 
276
376
  typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
277
377
 
378
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
379
+ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
380
+ U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
381
+ return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
382
+ }
383
+
278
384
  size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
279
385
  {
280
- DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
281
- return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG);
386
+ /* Static analyzer seems unable to understand this table will be properly initialized later */
387
+ U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
388
+ return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
282
389
  }
283
-
390
+ #endif
284
391
 
285
392
 
286
393
  #endif /* FSE_COMMONDEFS_ONLY */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * huff0 huffman codec,
3
3
  * part of Finite State Entropy library
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -20,7 +20,7 @@ extern "C" {
20
20
  #define HUF_H_298734234
21
21
 
22
22
  /* *** Dependencies *** */
23
- #include <stddef.h> /* size_t */
23
+ #include "zstd_deps.h" /* size_t */
24
24
 
25
25
 
26
26
  /* *** library symbols visibility *** */
@@ -111,6 +111,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
111
111
 
112
112
  /* *** Dependencies *** */
113
113
  #include "mem.h" /* U32 */
114
+ #define FSE_STATIC_LINKING_ONLY
115
+ #include "fse.h"
114
116
 
115
117
 
116
118
  /* *** Constants *** */
@@ -133,12 +135,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
133
135
  #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
134
136
 
135
137
  /* static allocation of HUF's Compression Table */
138
+ /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
139
+ struct HUF_CElt_s {
140
+ U16 val;
141
+ BYTE nbBits;
142
+ }; /* typedef'd to HUF_CElt */
143
+ typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */
136
144
  #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */
137
145
  #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
138
146
  #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
139
- U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \
140
- void* name##hv = &(name##hb); \
141
- HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
147
+ HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
142
148
 
143
149
  /* static allocation of HUF's DTable */
144
150
  typedef U32 HUF_DTable;
@@ -184,7 +190,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
184
190
  * or to save and regenerate 'CTable' using external methods.
185
191
  */
186
192
  unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
187
- typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
188
193
  size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
189
194
  size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
190
195
  size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@@ -226,6 +231,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
226
231
  U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
227
232
  const void* src, size_t srcSize);
228
233
 
234
+ /*! HUF_readStats_wksp() :
235
+ * Same as HUF_readStats() but takes an external workspace which must be
236
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
237
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
238
+ */
239
+ #define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
240
+ #define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
241
+ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
242
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
243
+ const void* src, size_t srcSize,
244
+ void* workspace, size_t wkspSize,
245
+ int bmi2);
246
+
229
247
  /** HUF_readCTable() :
230
248
  * Loading a CTable saved with HUF_writeCTable() */
231
249
  size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
@@ -332,6 +350,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
332
350
  #endif
333
351
  size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
334
352
  size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
353
+ #ifndef HUF_FORCE_DECOMPRESS_X2
354
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
355
+ #endif
335
356
 
336
357
  #endif /* HUF_STATIC_LINKING_ONLY */
337
358