zstd-ruby 1.4.1.0 → 1.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/libzstd/BUCK +5 -7
  6. data/ext/zstdruby/libzstd/Makefile +304 -113
  7. data/ext/zstdruby/libzstd/README.md +83 -20
  8. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  9. data/ext/zstdruby/libzstd/common/compiler.h +150 -8
  10. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  11. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  12. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  13. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  14. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  15. data/ext/zstdruby/libzstd/common/error_private.h +8 -4
  16. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  17. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
  18. data/ext/zstdruby/libzstd/common/huf.h +43 -39
  19. data/ext/zstdruby/libzstd/common/mem.h +69 -25
  20. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  21. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  22. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  23. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  24. data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
  25. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  26. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  27. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  28. data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  90. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  91. data/ext/zstdruby/libzstd/zstd.h +740 -153
  92. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  93. data/lib/zstd-ruby/version.rb +1 -1
  94. data/zstd-ruby.gemspec +1 -1
  95. metadata +21 -10
  96. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,7 +19,7 @@ extern "C" {
19
19
  * Dependencies
20
20
  ***************************************/
21
21
  #include <stddef.h> /* size_t */
22
- #include "mem.h" /* U64, U32 */
22
+ #include "../common/mem.h" /* U64, U32 */
23
23
 
24
24
 
25
25
  /* *************************************
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,7 @@
14
14
  #include <stddef.h> /* size_t, ptrdiff_t */
15
15
  #include <string.h> /* memcpy */
16
16
  #include <stdlib.h> /* malloc, free, qsort */
17
- #include "error_private.h"
17
+ #include "../common/error_private.h"
18
18
 
19
19
 
20
20
 
@@ -82,7 +82,11 @@ extern "C" {
82
82
  * Basic Types
83
83
  *****************************************************************/
84
84
  #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
85
- # include <stdint.h>
85
+ # if defined(_AIX)
86
+ # include <inttypes.h>
87
+ # else
88
+ # include <stdint.h> /* intptr_t */
89
+ # endif
86
90
  typedef uint8_t BYTE;
87
91
  typedef uint16_t U16;
88
92
  typedef int16_t S16;
@@ -860,7 +864,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
860
864
  _BitScanReverse ( &r, val );
861
865
  return (unsigned) r;
862
866
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
863
- return 31 - __builtin_clz (val);
867
+ return __builtin_clz (val) ^ 31;
864
868
  # else /* Software version */
865
869
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
866
870
  U32 v = val;
@@ -1862,7 +1866,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
1862
1866
 
1863
1867
  if (!srcSize) return ERROR(srcSize_wrong);
1864
1868
  iSize = ip[0];
1865
- //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */
1869
+ /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
1866
1870
 
1867
1871
  if (iSize >= 128) { /* special header */
1868
1872
  if (iSize >= (242)) { /* RLE */
@@ -2014,7 +2018,7 @@ size_t HUFv06_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
2014
2018
  HUFv06_DEltX2* const dt = (HUFv06_DEltX2*)dtPtr;
2015
2019
 
2016
2020
  HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */
2017
- //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */
2021
+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
2018
2022
 
2019
2023
  iSize = HUFv06_readStats(huffWeight, HUFv06_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
2020
2024
  if (HUFv06_isError(iSize)) return iSize;
@@ -2340,7 +2344,7 @@ size_t HUFv06_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
2340
2344
 
2341
2345
  HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
2342
2346
  if (memLog > HUFv06_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
2343
- //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
2347
+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
2344
2348
 
2345
2349
  iSize = HUFv06_readStats(weightList, HUFv06_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
2346
2350
  if (HUFv06_isError(iSize)) return iSize;
@@ -2664,13 +2668,13 @@ size_t HUFv06_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
2664
2668
 
2665
2669
  { U32 algoNb = 0;
2666
2670
  if (Dtime[1] < Dtime[0]) algoNb = 1;
2667
- // if (Dtime[2] < Dtime[algoNb]) algoNb = 2; /* current speed of HUFv06_decompress4X6 is not good */
2671
+ /* if (Dtime[2] < Dtime[algoNb]) algoNb = 2; */ /* current speed of HUFv06_decompress4X6 is not good */
2668
2672
  return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
2669
2673
  }
2670
2674
 
2671
- //return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
2672
- //return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */
2673
- //return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); /* multi-streams quad-symbols decoding */
2675
+ /* return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
2676
+ /* return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
2677
+ /* return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams quad-symbols decoding */
2674
2678
  }
2675
2679
  /*
2676
2680
  Common functions of Zstd compression library
@@ -3025,7 +3029,7 @@ typedef struct
3025
3029
  * Provides the size of compressed block from block header `src` */
3026
3030
  static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
3027
3031
  {
3028
- const BYTE* const in = (const BYTE* const)src;
3032
+ const BYTE* const in = (const BYTE*)src;
3029
3033
  U32 cSize;
3030
3034
 
3031
3035
  if (srcSize < ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
@@ -3219,7 +3223,7 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
3219
3223
  FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable,
3220
3224
  const void* src, size_t srcSize)
3221
3225
  {
3222
- const BYTE* const istart = (const BYTE* const)src;
3226
+ const BYTE* const istart = (const BYTE*)src;
3223
3227
  const BYTE* const iend = istart + srcSize;
3224
3228
  const BYTE* ip = istart;
3225
3229
 
@@ -3441,7 +3445,7 @@ static size_t ZSTDv06_decompressSequences(
3441
3445
  {
3442
3446
  const BYTE* ip = (const BYTE*)seqStart;
3443
3447
  const BYTE* const iend = ip + seqSize;
3444
- BYTE* const ostart = (BYTE* const)dst;
3448
+ BYTE* const ostart = (BYTE*)dst;
3445
3449
  BYTE* const oend = ostart + maxDstSize;
3446
3450
  BYTE* op = ostart;
3447
3451
  const BYTE* litPtr = dctx->litPtr;
@@ -3501,8 +3505,10 @@ static size_t ZSTDv06_decompressSequences(
3501
3505
  { size_t const lastLLSize = litEnd - litPtr;
3502
3506
  if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
3503
3507
  if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
3504
- memcpy(op, litPtr, lastLLSize);
3505
- op += lastLLSize;
3508
+ if (lastLLSize > 0) {
3509
+ memcpy(op, litPtr, lastLLSize);
3510
+ op += lastLLSize;
3511
+ }
3506
3512
  }
3507
3513
 
3508
3514
  return op-ostart;
@@ -3555,7 +3561,7 @@ static size_t ZSTDv06_decompressFrame(ZSTDv06_DCtx* dctx,
3555
3561
  {
3556
3562
  const BYTE* ip = (const BYTE*)src;
3557
3563
  const BYTE* const iend = ip + srcSize;
3558
- BYTE* const ostart = (BYTE* const)dst;
3564
+ BYTE* const ostart = (BYTE*)dst;
3559
3565
  BYTE* op = ostart;
3560
3566
  BYTE* const oend = ostart + dstCapacity;
3561
3567
  size_t remainingSize = srcSize;
@@ -4000,7 +4006,9 @@ size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* zbd)
4000
4006
  MEM_STATIC size_t ZBUFFv06_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
4001
4007
  {
4002
4008
  size_t length = MIN(dstCapacity, srcSize);
4003
- memcpy(dst, src, length);
4009
+ if (length > 0) {
4010
+ memcpy(dst, src, length);
4011
+ }
4004
4012
  return length;
4005
4013
  }
4006
4014
 
@@ -4109,7 +4117,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
4109
4117
  if (!decodedSize) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
4110
4118
  zbd->outEnd = zbd->outStart + decodedSize;
4111
4119
  zbd->stage = ZBUFFds_flush;
4112
- // break; /* ZBUFFds_flush follows */
4120
+ /* break; */ /* ZBUFFds_flush follows */
4113
4121
  }
4114
4122
  }
4115
4123
  /* fall-through */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,14 +17,14 @@
17
17
  #ifndef XXH_STATIC_LINKING_ONLY
18
18
  # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
19
19
  #endif
20
- #include "xxhash.h" /* XXH64_* */
20
+ #include "../common/xxhash.h" /* XXH64_* */
21
21
  #include "zstd_v07.h"
22
22
 
23
23
  #define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
24
24
  #define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
25
25
  #define ZSTDv07_STATIC_LINKING_ONLY
26
26
 
27
- #include "error_private.h"
27
+ #include "../common/error_private.h"
28
28
 
29
29
 
30
30
  #ifdef ZSTDv07_STATIC_LINKING_ONLY
@@ -242,7 +242,11 @@ extern "C" {
242
242
  * Basic Types
243
243
  *****************************************************************/
244
244
  #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
245
- # include <stdint.h>
245
+ # if defined(_AIX)
246
+ # include <inttypes.h>
247
+ # else
248
+ # include <stdint.h> /* intptr_t */
249
+ # endif
246
250
  typedef uint8_t BYTE;
247
251
  typedef uint16_t U16;
248
252
  typedef int16_t S16;
@@ -530,7 +534,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
530
534
  _BitScanReverse ( &r, val );
531
535
  return (unsigned) r;
532
536
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
533
- return 31 - __builtin_clz (val);
537
+ return __builtin_clz (val) ^ 31;
534
538
  # else /* Software version */
535
539
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
536
540
  U32 v = val;
@@ -1314,7 +1318,7 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
1314
1318
 
1315
1319
  if (!srcSize) return ERROR(srcSize_wrong);
1316
1320
  iSize = ip[0];
1317
- //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */
1321
+ /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
1318
1322
 
1319
1323
  if (iSize >= 128) { /* special header */
1320
1324
  if (iSize >= (242)) { /* RLE */
@@ -1784,7 +1788,7 @@ size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSi
1784
1788
  HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr;
1785
1789
 
1786
1790
  HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable));
1787
- //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */
1791
+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
1788
1792
 
1789
1793
  iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
1790
1794
  if (HUFv07_isError(iSize)) return iSize;
@@ -2148,7 +2152,7 @@ size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSi
2148
2152
 
2149
2153
  HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable)); /* if compilation fails here, assertion is false */
2150
2154
  if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
2151
- //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
2155
+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
2152
2156
 
2153
2157
  iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
2154
2158
  if (HUFv07_isError(iSize)) return iSize;
@@ -2530,8 +2534,8 @@ size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
2530
2534
  return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
2531
2535
  }
2532
2536
 
2533
- //return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
2534
- //return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */
2537
+ /* return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
2538
+ /* return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
2535
2539
  }
2536
2540
 
2537
2541
  size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
@@ -3254,7 +3258,7 @@ typedef struct
3254
3258
  * Provides the size of compressed block from block header `src` */
3255
3259
  static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
3256
3260
  {
3257
- const BYTE* const in = (const BYTE* const)src;
3261
+ const BYTE* const in = (const BYTE*)src;
3258
3262
  U32 cSize;
3259
3263
 
3260
3264
  if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
@@ -3272,7 +3276,9 @@ static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProper
3272
3276
  static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
3273
3277
  {
3274
3278
  if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
3275
- memcpy(dst, src, srcSize);
3279
+ if (srcSize > 0) {
3280
+ memcpy(dst, src, srcSize);
3281
+ }
3276
3282
  return srcSize;
3277
3283
  }
3278
3284
 
@@ -3447,7 +3453,7 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
3447
3453
  FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
3448
3454
  const void* src, size_t srcSize)
3449
3455
  {
3450
- const BYTE* const istart = (const BYTE* const)src;
3456
+ const BYTE* const istart = (const BYTE*)src;
3451
3457
  const BYTE* const iend = istart + srcSize;
3452
3458
  const BYTE* ip = istart;
3453
3459
 
@@ -3666,7 +3672,7 @@ static size_t ZSTDv07_decompressSequences(
3666
3672
  {
3667
3673
  const BYTE* ip = (const BYTE*)seqStart;
3668
3674
  const BYTE* const iend = ip + seqSize;
3669
- BYTE* const ostart = (BYTE* const)dst;
3675
+ BYTE* const ostart = (BYTE*)dst;
3670
3676
  BYTE* const oend = ostart + maxDstSize;
3671
3677
  BYTE* op = ostart;
3672
3678
  const BYTE* litPtr = dctx->litPtr;
@@ -3712,10 +3718,12 @@ static size_t ZSTDv07_decompressSequences(
3712
3718
 
3713
3719
  /* last literal segment */
3714
3720
  { size_t const lastLLSize = litEnd - litPtr;
3715
- //if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
3721
+ /* if (litPtr > litEnd) return ERROR(corruption_detected); */ /* too many literals already used */
3716
3722
  if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
3717
- memcpy(op, litPtr, lastLLSize);
3718
- op += lastLLSize;
3723
+ if (lastLLSize > 0) {
3724
+ memcpy(op, litPtr, lastLLSize);
3725
+ op += lastLLSize;
3726
+ }
3719
3727
  }
3720
3728
 
3721
3729
  return op-ostart;
@@ -3776,7 +3784,9 @@ ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockS
3776
3784
  static size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
3777
3785
  {
3778
3786
  if (length > dstCapacity) return ERROR(dstSize_tooSmall);
3779
- memset(dst, byte, length);
3787
+ if (length > 0) {
3788
+ memset(dst, byte, length);
3789
+ }
3780
3790
  return length;
3781
3791
  }
3782
3792
 
@@ -3789,7 +3799,7 @@ static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
3789
3799
  {
3790
3800
  const BYTE* ip = (const BYTE*)src;
3791
3801
  const BYTE* const iend = ip + srcSize;
3792
- BYTE* const ostart = (BYTE* const)dst;
3802
+ BYTE* const ostart = (BYTE*)dst;
3793
3803
  BYTE* const oend = ostart + dstCapacity;
3794
3804
  BYTE* op = ostart;
3795
3805
  size_t remainingSize = srcSize;
@@ -4378,7 +4388,9 @@ size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd)
4378
4388
  MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
4379
4389
  {
4380
4390
  size_t const length = MIN(dstCapacity, srcSize);
4381
- memcpy(dst, src, length);
4391
+ if (length > 0) {
4392
+ memcpy(dst, src, length);
4393
+ }
4382
4394
  return length;
4383
4395
  }
4384
4396
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -3,8 +3,9 @@
3
3
  # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
4
4
 
5
5
  prefix=@PREFIX@
6
- libdir=@LIBDIR@
6
+ exec_prefix=@EXEC_PREFIX@
7
7
  includedir=@INCLUDEDIR@
8
+ libdir=@LIBDIR@
8
9
 
9
10
  Name: zstd
10
11
  Description: fast lossless compression algorithm library
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,145 @@ extern "C" {
36
36
  # define ZDICTLIB_API ZDICTLIB_VISIBILITY
37
37
  #endif
38
38
 
39
+ /*******************************************************************************
40
+ * Zstd dictionary builder
41
+ *
42
+ * FAQ
43
+ * ===
44
+ * Why should I use a dictionary?
45
+ * ------------------------------
46
+ *
47
+ * Zstd can use dictionaries to improve compression ratio of small data.
48
+ * Traditionally small files don't compress well because there is very little
49
+ * repetion in a single sample, since it is small. But, if you are compressing
50
+ * many similar files, like a bunch of JSON records that share the same
51
+ * structure, you can train a dictionary on ahead of time on some samples of
52
+ * these files. Then, zstd can use the dictionary to find repetitions that are
53
+ * present across samples. This can vastly improve compression ratio.
54
+ *
55
+ * When is a dictionary useful?
56
+ * ----------------------------
57
+ *
58
+ * Dictionaries are useful when compressing many small files that are similar.
59
+ * The larger a file is, the less benefit a dictionary will have. Generally,
60
+ * we don't expect dictionary compression to be effective past 100KB. And the
61
+ * smaller a file is, the more we would expect the dictionary to help.
62
+ *
63
+ * How do I use a dictionary?
64
+ * --------------------------
65
+ *
66
+ * Simply pass the dictionary to the zstd compressor with
67
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
68
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
69
+ * more advanced functions that allow selecting some options, see zstd.h for
70
+ * complete documentation.
71
+ *
72
+ * What is a zstd dictionary?
73
+ * --------------------------
74
+ *
75
+ * A zstd dictionary has two pieces: Its header, and its content. The header
76
+ * contains a magic number, the dictionary ID, and entropy tables. These
77
+ * entropy tables allow zstd to save on header costs in the compressed file,
78
+ * which really matters for small data. The content is just bytes, which are
79
+ * repeated content that is common across many samples.
80
+ *
81
+ * What is a raw content dictionary?
82
+ * ---------------------------------
83
+ *
84
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
85
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
86
+ * content dictionary.
87
+ *
88
+ * How do I train a dictionary?
89
+ * ----------------------------
90
+ *
91
+ * Gather samples from your use case. These samples should be similar to each
92
+ * other. If you have several use cases, you could try to train one dictionary
93
+ * per use case.
94
+ *
95
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
96
+ * dictionary. There are a few advanced versions of this function, but this
97
+ * is a great starting point. If you want to further tune your dictionary
98
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
99
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
100
+ *
101
+ * If the dictionary training function fails, that is likely because you
102
+ * either passed too few samples, or a dictionary would not be effective
103
+ * for your data. Look at the messages that the dictionary trainer printed,
104
+ * if it doesn't say too few samples, then a dictionary would not be effective.
105
+ *
106
+ * How large should my dictionary be?
107
+ * ----------------------------------
108
+ *
109
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
110
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
111
+ * dictionary larger than that. But, most use cases can get away with a
112
+ * smaller dictionary. The advanced dictionary builders can automatically
113
+ * shrink the dictionary for you, and select a the smallest size that
114
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
115
+ * A smaller dictionary can save memory, and potentially speed up
116
+ * compression.
117
+ *
118
+ * How many samples should I provide to the dictionary builder?
119
+ * ------------------------------------------------------------
120
+ *
121
+ * We generally recommend passing ~100x the size of the dictionary
122
+ * in samples. A few thousand should suffice. Having too few samples
123
+ * can hurt the dictionaries effectiveness. Having more samples will
124
+ * only improve the dictionaries effectiveness. But having too many
125
+ * samples can slow down the dictionary builder.
126
+ *
127
+ * How do I determine if a dictionary will be effective?
128
+ * -----------------------------------------------------
129
+ *
130
+ * Simply train a dictionary and try it out. You can use zstd's built in
131
+ * benchmarking tool to test the dictionary effectiveness.
132
+ *
133
+ * # Benchmark levels 1-3 without a dictionary
134
+ * zstd -b1e3 -r /path/to/my/files
135
+ * # Benchmark levels 1-3 with a dictioanry
136
+ * zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
137
+ *
138
+ * When should I retrain a dictionary?
139
+ * -----------------------------------
140
+ *
141
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
142
+ * effectiveness drops as the data you are compressing changes. Generally, we do
143
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
144
+ * at which they decay depends on your use case. Internally, we regularly
145
+ * retrain dictionaries, and if the new dictionary performs significantly
146
+ * better than the old dictionary, we will ship the new dictionary.
147
+ *
148
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
149
+ * -------------------------------------------------------------------------
150
+ *
151
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
152
+ * using a third-party dictionary builder, you can turn it into a zstd
153
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
154
+ * provide some samples of the data. It will add the zstd header to the
155
+ * raw content, which contains a dictionary ID and entropy tables, which
156
+ * will improve compression ratio, and allow zstd to write the dictionary ID
157
+ * into the frame, if you so choose.
158
+ *
159
+ * Do I have to use zstd's dictionary builder?
160
+ * -------------------------------------------
161
+ *
162
+ * No! You can construct dictionary content however you please, it is just
163
+ * bytes. It will always be valid as a raw content dictionary. If you want
164
+ * a zstd dictionary, which can improve compression ratio, use
165
+ * `ZDICT_finalizeDictionary()`.
166
+ *
167
+ * What is the attack surface of a zstd dictionary?
168
+ * ------------------------------------------------
169
+ *
170
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
171
+ * zstd should never crash, or access out-of-bounds memory no matter what
172
+ * the dictionary is. However, if an attacker can control the dictionary
173
+ * during decompression, they can cause zstd to generate arbitrary bytes,
174
+ * just like if they controlled the compressed data.
175
+ *
176
+ ******************************************************************************/
177
+
39
178
 
40
179
  /*! ZDICT_trainFromBuffer():
41
180
  * Train a dictionary from an array of samples.
@@ -61,9 +200,64 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
61
200
  const void* samplesBuffer,
62
201
  const size_t* samplesSizes, unsigned nbSamples);
63
202
 
203
+ typedef struct {
204
+ int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
205
+ unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
206
+ unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
207
+ * NOTE: The zstd format reserves some dictionary IDs for future use.
208
+ * You may use them in private settings, but be warned that they
209
+ * may be used by zstd in a public dictionary registry in the future.
210
+ * These dictionary IDs are:
211
+ * - low range : <= 32767
212
+ * - high range : >= (2^31)
213
+ */
214
+ } ZDICT_params_t;
215
+
216
+ /*! ZDICT_finalizeDictionary():
217
+ * Given a custom content as a basis for dictionary, and a set of samples,
218
+ * finalize dictionary by adding headers and statistics according to the zstd
219
+ * dictionary format.
220
+ *
221
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
222
+ * supplied with an array of sizes `samplesSizes`, providing the size of each
223
+ * sample in order. The samples are used to construct the statistics, so they
224
+ * should be representative of what you will compress with this dictionary.
225
+ *
226
+ * The compression level can be set in `parameters`. You should pass the
227
+ * compression level you expect to use in production. The statistics for each
228
+ * compression level differ, so tuning the dictionary for the compression level
229
+ * can help quite a bit.
230
+ *
231
+ * You can set an explicit dictionary ID in `parameters`, or allow us to pick
232
+ * a random dictionary ID for you, but we can't guarantee no collisions.
233
+ *
234
+ * The dstDictBuffer and the dictContent may overlap, and the content will be
235
+ * appended to the end of the header. If the header + the content doesn't fit in
236
+ * maxDictSize the beginning of the content is truncated to make room, since it
237
+ * is presumed that the most profitable content is at the end of the dictionary,
238
+ * since that is the cheapest to reference.
239
+ *
240
+ * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
241
+ * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
242
+ *
243
+ * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
244
+ * or an error code, which can be tested by ZDICT_isError().
245
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
246
+ * instructed to, using notificationLevel>0.
247
+ * NOTE: This function currently may fail in several edge cases including:
248
+ * * Not enough samples
249
+ * * Samples are uncompressible
250
+ * * Samples are all exactly the same
251
+ */
252
+ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
253
+ const void* dictContent, size_t dictContentSize,
254
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
255
+ ZDICT_params_t parameters);
256
+
64
257
 
65
258
  /*====== Helper functions ======*/
66
259
  ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
260
+ ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
67
261
  ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
68
262
  ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
69
263
 
@@ -78,11 +272,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
78
272
  * Use them only in association with static linking.
79
273
  * ==================================================================================== */
80
274
 
81
- typedef struct {
82
- int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
83
- unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
84
- unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
85
- } ZDICT_params_t;
275
+ #define ZDICT_CONTENTSIZE_MIN 128
276
+ #define ZDICT_DICTSIZE_MIN 256
86
277
 
87
278
  /*! ZDICT_cover_params_t:
88
279
  * k and d are the only required parameters.
@@ -198,28 +389,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
198
389
  const size_t* samplesSizes, unsigned nbSamples,
199
390
  ZDICT_fastCover_params_t* parameters);
200
391
 
201
- /*! ZDICT_finalizeDictionary():
202
- * Given a custom content as a basis for dictionary, and a set of samples,
203
- * finalize dictionary by adding headers and statistics.
204
- *
205
- * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
206
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
207
- *
208
- * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
209
- * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
210
- *
211
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
212
- * or an error code, which can be tested by ZDICT_isError().
213
- * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
214
- * Note 2: dictBuffer and dictContent can overlap
215
- */
216
- #define ZDICT_CONTENTSIZE_MIN 128
217
- #define ZDICT_DICTSIZE_MIN 256
218
- ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
219
- const void* dictContent, size_t dictContentSize,
220
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
221
- ZDICT_params_t parameters);
222
-
223
392
  typedef struct {
224
393
  unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
225
394
  ZDICT_params_t zParams;
@@ -241,10 +410,11 @@ typedef struct {
241
410
  * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
242
411
  */
243
412
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
244
- void *dictBuffer, size_t dictBufferCapacity,
245
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
413
+ void* dictBuffer, size_t dictBufferCapacity,
414
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
246
415
  ZDICT_legacy_params_t parameters);
247
416
 
417
+
248
418
  /* Deprecation warnings */
249
419
  /* It is generally possible to disable deprecation warnings from compiler,
250
420
  for example with -Wno-deprecated-declarations for gcc
@@ -256,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
256
426
  # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
257
427
  # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
258
428
  # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
259
- # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
429
+ # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
260
430
  # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
261
431
  # elif (ZDICT_GCC_VERSION >= 301)
262
432
  # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))