zstd-ruby 1.4.1.0 → 1.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/libzstd/BUCK +5 -7
  6. data/ext/zstdruby/libzstd/Makefile +304 -113
  7. data/ext/zstdruby/libzstd/README.md +83 -20
  8. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  9. data/ext/zstdruby/libzstd/common/compiler.h +150 -8
  10. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  11. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  12. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  13. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  14. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  15. data/ext/zstdruby/libzstd/common/error_private.h +8 -4
  16. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  17. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
  18. data/ext/zstdruby/libzstd/common/huf.h +43 -39
  19. data/ext/zstdruby/libzstd/common/mem.h +69 -25
  20. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  21. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  22. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  23. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  24. data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
  25. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  26. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  27. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  28. data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  90. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  91. data/ext/zstdruby/libzstd/zstd.h +740 -153
  92. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  93. data/lib/zstd-ruby/version.rb +1 -1
  94. data/zstd-ruby.gemspec +1 -1
  95. metadata +21 -10
  96. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,7 +19,7 @@ extern "C" {
19
19
  * Dependencies
20
20
  ***************************************/
21
21
  #include <stddef.h> /* size_t */
22
- #include "mem.h" /* U64, U32 */
22
+ #include "../common/mem.h" /* U64, U32 */
23
23
 
24
24
 
25
25
  /* *************************************
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,7 @@
14
14
  #include <stddef.h> /* size_t, ptrdiff_t */
15
15
  #include <string.h> /* memcpy */
16
16
  #include <stdlib.h> /* malloc, free, qsort */
17
- #include "error_private.h"
17
+ #include "../common/error_private.h"
18
18
 
19
19
 
20
20
 
@@ -82,7 +82,11 @@ extern "C" {
82
82
  * Basic Types
83
83
  *****************************************************************/
84
84
  #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
85
- # include <stdint.h>
85
+ # if defined(_AIX)
86
+ # include <inttypes.h>
87
+ # else
88
+ # include <stdint.h> /* intptr_t */
89
+ # endif
86
90
  typedef uint8_t BYTE;
87
91
  typedef uint16_t U16;
88
92
  typedef int16_t S16;
@@ -860,7 +864,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
860
864
  _BitScanReverse ( &r, val );
861
865
  return (unsigned) r;
862
866
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
863
- return 31 - __builtin_clz (val);
867
+ return __builtin_clz (val) ^ 31;
864
868
  # else /* Software version */
865
869
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
866
870
  U32 v = val;
@@ -1862,7 +1866,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
1862
1866
 
1863
1867
  if (!srcSize) return ERROR(srcSize_wrong);
1864
1868
  iSize = ip[0];
1865
- //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */
1869
+ /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
1866
1870
 
1867
1871
  if (iSize >= 128) { /* special header */
1868
1872
  if (iSize >= (242)) { /* RLE */
@@ -2014,7 +2018,7 @@ size_t HUFv06_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
2014
2018
  HUFv06_DEltX2* const dt = (HUFv06_DEltX2*)dtPtr;
2015
2019
 
2016
2020
  HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */
2017
- //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */
2021
+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
2018
2022
 
2019
2023
  iSize = HUFv06_readStats(huffWeight, HUFv06_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
2020
2024
  if (HUFv06_isError(iSize)) return iSize;
@@ -2340,7 +2344,7 @@ size_t HUFv06_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
2340
2344
 
2341
2345
  HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
2342
2346
  if (memLog > HUFv06_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
2343
- //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
2347
+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
2344
2348
 
2345
2349
  iSize = HUFv06_readStats(weightList, HUFv06_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
2346
2350
  if (HUFv06_isError(iSize)) return iSize;
@@ -2664,13 +2668,13 @@ size_t HUFv06_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
2664
2668
 
2665
2669
  { U32 algoNb = 0;
2666
2670
  if (Dtime[1] < Dtime[0]) algoNb = 1;
2667
- // if (Dtime[2] < Dtime[algoNb]) algoNb = 2; /* current speed of HUFv06_decompress4X6 is not good */
2671
+ /* if (Dtime[2] < Dtime[algoNb]) algoNb = 2; */ /* current speed of HUFv06_decompress4X6 is not good */
2668
2672
  return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
2669
2673
  }
2670
2674
 
2671
- //return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
2672
- //return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */
2673
- //return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); /* multi-streams quad-symbols decoding */
2675
+ /* return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
2676
+ /* return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
2677
+ /* return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams quad-symbols decoding */
2674
2678
  }
2675
2679
  /*
2676
2680
  Common functions of Zstd compression library
@@ -3025,7 +3029,7 @@ typedef struct
3025
3029
  * Provides the size of compressed block from block header `src` */
3026
3030
  static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
3027
3031
  {
3028
- const BYTE* const in = (const BYTE* const)src;
3032
+ const BYTE* const in = (const BYTE*)src;
3029
3033
  U32 cSize;
3030
3034
 
3031
3035
  if (srcSize < ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
@@ -3219,7 +3223,7 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
3219
3223
  FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable,
3220
3224
  const void* src, size_t srcSize)
3221
3225
  {
3222
- const BYTE* const istart = (const BYTE* const)src;
3226
+ const BYTE* const istart = (const BYTE*)src;
3223
3227
  const BYTE* const iend = istart + srcSize;
3224
3228
  const BYTE* ip = istart;
3225
3229
 
@@ -3441,7 +3445,7 @@ static size_t ZSTDv06_decompressSequences(
3441
3445
  {
3442
3446
  const BYTE* ip = (const BYTE*)seqStart;
3443
3447
  const BYTE* const iend = ip + seqSize;
3444
- BYTE* const ostart = (BYTE* const)dst;
3448
+ BYTE* const ostart = (BYTE*)dst;
3445
3449
  BYTE* const oend = ostart + maxDstSize;
3446
3450
  BYTE* op = ostart;
3447
3451
  const BYTE* litPtr = dctx->litPtr;
@@ -3501,8 +3505,10 @@ static size_t ZSTDv06_decompressSequences(
3501
3505
  { size_t const lastLLSize = litEnd - litPtr;
3502
3506
  if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
3503
3507
  if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
3504
- memcpy(op, litPtr, lastLLSize);
3505
- op += lastLLSize;
3508
+ if (lastLLSize > 0) {
3509
+ memcpy(op, litPtr, lastLLSize);
3510
+ op += lastLLSize;
3511
+ }
3506
3512
  }
3507
3513
 
3508
3514
  return op-ostart;
@@ -3555,7 +3561,7 @@ static size_t ZSTDv06_decompressFrame(ZSTDv06_DCtx* dctx,
3555
3561
  {
3556
3562
  const BYTE* ip = (const BYTE*)src;
3557
3563
  const BYTE* const iend = ip + srcSize;
3558
- BYTE* const ostart = (BYTE* const)dst;
3564
+ BYTE* const ostart = (BYTE*)dst;
3559
3565
  BYTE* op = ostart;
3560
3566
  BYTE* const oend = ostart + dstCapacity;
3561
3567
  size_t remainingSize = srcSize;
@@ -4000,7 +4006,9 @@ size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* zbd)
4000
4006
  MEM_STATIC size_t ZBUFFv06_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
4001
4007
  {
4002
4008
  size_t length = MIN(dstCapacity, srcSize);
4003
- memcpy(dst, src, length);
4009
+ if (length > 0) {
4010
+ memcpy(dst, src, length);
4011
+ }
4004
4012
  return length;
4005
4013
  }
4006
4014
 
@@ -4109,7 +4117,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
4109
4117
  if (!decodedSize) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
4110
4118
  zbd->outEnd = zbd->outStart + decodedSize;
4111
4119
  zbd->stage = ZBUFFds_flush;
4112
- // break; /* ZBUFFds_flush follows */
4120
+ /* break; */ /* ZBUFFds_flush follows */
4113
4121
  }
4114
4122
  }
4115
4123
  /* fall-through */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -17,14 +17,14 @@
17
17
  #ifndef XXH_STATIC_LINKING_ONLY
18
18
  # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
19
19
  #endif
20
- #include "xxhash.h" /* XXH64_* */
20
+ #include "../common/xxhash.h" /* XXH64_* */
21
21
  #include "zstd_v07.h"
22
22
 
23
23
  #define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
24
24
  #define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
25
25
  #define ZSTDv07_STATIC_LINKING_ONLY
26
26
 
27
- #include "error_private.h"
27
+ #include "../common/error_private.h"
28
28
 
29
29
 
30
30
  #ifdef ZSTDv07_STATIC_LINKING_ONLY
@@ -242,7 +242,11 @@ extern "C" {
242
242
  * Basic Types
243
243
  *****************************************************************/
244
244
  #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
245
- # include <stdint.h>
245
+ # if defined(_AIX)
246
+ # include <inttypes.h>
247
+ # else
248
+ # include <stdint.h> /* intptr_t */
249
+ # endif
246
250
  typedef uint8_t BYTE;
247
251
  typedef uint16_t U16;
248
252
  typedef int16_t S16;
@@ -530,7 +534,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
530
534
  _BitScanReverse ( &r, val );
531
535
  return (unsigned) r;
532
536
  # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
533
- return 31 - __builtin_clz (val);
537
+ return __builtin_clz (val) ^ 31;
534
538
  # else /* Software version */
535
539
  static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
536
540
  U32 v = val;
@@ -1314,7 +1318,7 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
1314
1318
 
1315
1319
  if (!srcSize) return ERROR(srcSize_wrong);
1316
1320
  iSize = ip[0];
1317
- //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */
1321
+ /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
1318
1322
 
1319
1323
  if (iSize >= 128) { /* special header */
1320
1324
  if (iSize >= (242)) { /* RLE */
@@ -1784,7 +1788,7 @@ size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSi
1784
1788
  HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr;
1785
1789
 
1786
1790
  HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable));
1787
- //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */
1791
+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
1788
1792
 
1789
1793
  iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
1790
1794
  if (HUFv07_isError(iSize)) return iSize;
@@ -2148,7 +2152,7 @@ size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSi
2148
2152
 
2149
2153
  HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable)); /* if compilation fails here, assertion is false */
2150
2154
  if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
2151
- //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
2155
+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
2152
2156
 
2153
2157
  iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
2154
2158
  if (HUFv07_isError(iSize)) return iSize;
@@ -2530,8 +2534,8 @@ size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
2530
2534
  return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
2531
2535
  }
2532
2536
 
2533
- //return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); /* multi-streams single-symbol decoding */
2534
- //return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); /* multi-streams double-symbols decoding */
2537
+ /* return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
2538
+ /* return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
2535
2539
  }
2536
2540
 
2537
2541
  size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
@@ -3254,7 +3258,7 @@ typedef struct
3254
3258
  * Provides the size of compressed block from block header `src` */
3255
3259
  static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
3256
3260
  {
3257
- const BYTE* const in = (const BYTE* const)src;
3261
+ const BYTE* const in = (const BYTE*)src;
3258
3262
  U32 cSize;
3259
3263
 
3260
3264
  if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
@@ -3272,7 +3276,9 @@ static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProper
3272
3276
  static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
3273
3277
  {
3274
3278
  if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
3275
- memcpy(dst, src, srcSize);
3279
+ if (srcSize > 0) {
3280
+ memcpy(dst, src, srcSize);
3281
+ }
3276
3282
  return srcSize;
3277
3283
  }
3278
3284
 
@@ -3447,7 +3453,7 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
3447
3453
  FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
3448
3454
  const void* src, size_t srcSize)
3449
3455
  {
3450
- const BYTE* const istart = (const BYTE* const)src;
3456
+ const BYTE* const istart = (const BYTE*)src;
3451
3457
  const BYTE* const iend = istart + srcSize;
3452
3458
  const BYTE* ip = istart;
3453
3459
 
@@ -3666,7 +3672,7 @@ static size_t ZSTDv07_decompressSequences(
3666
3672
  {
3667
3673
  const BYTE* ip = (const BYTE*)seqStart;
3668
3674
  const BYTE* const iend = ip + seqSize;
3669
- BYTE* const ostart = (BYTE* const)dst;
3675
+ BYTE* const ostart = (BYTE*)dst;
3670
3676
  BYTE* const oend = ostart + maxDstSize;
3671
3677
  BYTE* op = ostart;
3672
3678
  const BYTE* litPtr = dctx->litPtr;
@@ -3712,10 +3718,12 @@ static size_t ZSTDv07_decompressSequences(
3712
3718
 
3713
3719
  /* last literal segment */
3714
3720
  { size_t const lastLLSize = litEnd - litPtr;
3715
- //if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
3721
+ /* if (litPtr > litEnd) return ERROR(corruption_detected); */ /* too many literals already used */
3716
3722
  if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
3717
- memcpy(op, litPtr, lastLLSize);
3718
- op += lastLLSize;
3723
+ if (lastLLSize > 0) {
3724
+ memcpy(op, litPtr, lastLLSize);
3725
+ op += lastLLSize;
3726
+ }
3719
3727
  }
3720
3728
 
3721
3729
  return op-ostart;
@@ -3776,7 +3784,9 @@ ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockS
3776
3784
  static size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
3777
3785
  {
3778
3786
  if (length > dstCapacity) return ERROR(dstSize_tooSmall);
3779
- memset(dst, byte, length);
3787
+ if (length > 0) {
3788
+ memset(dst, byte, length);
3789
+ }
3780
3790
  return length;
3781
3791
  }
3782
3792
 
@@ -3789,7 +3799,7 @@ static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
3789
3799
  {
3790
3800
  const BYTE* ip = (const BYTE*)src;
3791
3801
  const BYTE* const iend = ip + srcSize;
3792
- BYTE* const ostart = (BYTE* const)dst;
3802
+ BYTE* const ostart = (BYTE*)dst;
3793
3803
  BYTE* const oend = ostart + dstCapacity;
3794
3804
  BYTE* op = ostart;
3795
3805
  size_t remainingSize = srcSize;
@@ -4378,7 +4388,9 @@ size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd)
4378
4388
  MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
4379
4389
  {
4380
4390
  size_t const length = MIN(dstCapacity, srcSize);
4381
- memcpy(dst, src, length);
4391
+ if (length > 0) {
4392
+ memcpy(dst, src, length);
4393
+ }
4382
4394
  return length;
4383
4395
  }
4384
4396
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -3,8 +3,9 @@
3
3
  # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
4
4
 
5
5
  prefix=@PREFIX@
6
- libdir=@LIBDIR@
6
+ exec_prefix=@EXEC_PREFIX@
7
7
  includedir=@INCLUDEDIR@
8
+ libdir=@LIBDIR@
8
9
 
9
10
  Name: zstd
10
11
  Description: fast lossless compression algorithm library
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,145 @@ extern "C" {
36
36
  # define ZDICTLIB_API ZDICTLIB_VISIBILITY
37
37
  #endif
38
38
 
39
+ /*******************************************************************************
40
+ * Zstd dictionary builder
41
+ *
42
+ * FAQ
43
+ * ===
44
+ * Why should I use a dictionary?
45
+ * ------------------------------
46
+ *
47
+ * Zstd can use dictionaries to improve compression ratio of small data.
48
+ * Traditionally small files don't compress well because there is very little
49
+ * repetion in a single sample, since it is small. But, if you are compressing
50
+ * many similar files, like a bunch of JSON records that share the same
51
+ * structure, you can train a dictionary on ahead of time on some samples of
52
+ * these files. Then, zstd can use the dictionary to find repetitions that are
53
+ * present across samples. This can vastly improve compression ratio.
54
+ *
55
+ * When is a dictionary useful?
56
+ * ----------------------------
57
+ *
58
+ * Dictionaries are useful when compressing many small files that are similar.
59
+ * The larger a file is, the less benefit a dictionary will have. Generally,
60
+ * we don't expect dictionary compression to be effective past 100KB. And the
61
+ * smaller a file is, the more we would expect the dictionary to help.
62
+ *
63
+ * How do I use a dictionary?
64
+ * --------------------------
65
+ *
66
+ * Simply pass the dictionary to the zstd compressor with
67
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
68
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
69
+ * more advanced functions that allow selecting some options, see zstd.h for
70
+ * complete documentation.
71
+ *
72
+ * What is a zstd dictionary?
73
+ * --------------------------
74
+ *
75
+ * A zstd dictionary has two pieces: Its header, and its content. The header
76
+ * contains a magic number, the dictionary ID, and entropy tables. These
77
+ * entropy tables allow zstd to save on header costs in the compressed file,
78
+ * which really matters for small data. The content is just bytes, which are
79
+ * repeated content that is common across many samples.
80
+ *
81
+ * What is a raw content dictionary?
82
+ * ---------------------------------
83
+ *
84
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
85
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
86
+ * content dictionary.
87
+ *
88
+ * How do I train a dictionary?
89
+ * ----------------------------
90
+ *
91
+ * Gather samples from your use case. These samples should be similar to each
92
+ * other. If you have several use cases, you could try to train one dictionary
93
+ * per use case.
94
+ *
95
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
96
+ * dictionary. There are a few advanced versions of this function, but this
97
+ * is a great starting point. If you want to further tune your dictionary
98
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
99
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
100
+ *
101
+ * If the dictionary training function fails, that is likely because you
102
+ * either passed too few samples, or a dictionary would not be effective
103
+ * for your data. Look at the messages that the dictionary trainer printed,
104
+ * if it doesn't say too few samples, then a dictionary would not be effective.
105
+ *
106
+ * How large should my dictionary be?
107
+ * ----------------------------------
108
+ *
109
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
110
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
111
+ * dictionary larger than that. But, most use cases can get away with a
112
+ * smaller dictionary. The advanced dictionary builders can automatically
113
+ * shrink the dictionary for you, and select a the smallest size that
114
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
115
+ * A smaller dictionary can save memory, and potentially speed up
116
+ * compression.
117
+ *
118
+ * How many samples should I provide to the dictionary builder?
119
+ * ------------------------------------------------------------
120
+ *
121
+ * We generally recommend passing ~100x the size of the dictionary
122
+ * in samples. A few thousand should suffice. Having too few samples
123
+ * can hurt the dictionaries effectiveness. Having more samples will
124
+ * only improve the dictionaries effectiveness. But having too many
125
+ * samples can slow down the dictionary builder.
126
+ *
127
+ * How do I determine if a dictionary will be effective?
128
+ * -----------------------------------------------------
129
+ *
130
+ * Simply train a dictionary and try it out. You can use zstd's built in
131
+ * benchmarking tool to test the dictionary effectiveness.
132
+ *
133
+ * # Benchmark levels 1-3 without a dictionary
134
+ * zstd -b1e3 -r /path/to/my/files
135
+ * # Benchmark levels 1-3 with a dictioanry
136
+ * zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
137
+ *
138
+ * When should I retrain a dictionary?
139
+ * -----------------------------------
140
+ *
141
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
142
+ * effectiveness drops as the data you are compressing changes. Generally, we do
143
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
144
+ * at which they decay depends on your use case. Internally, we regularly
145
+ * retrain dictionaries, and if the new dictionary performs significantly
146
+ * better than the old dictionary, we will ship the new dictionary.
147
+ *
148
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
149
+ * -------------------------------------------------------------------------
150
+ *
151
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
152
+ * using a third-party dictionary builder, you can turn it into a zstd
153
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
154
+ * provide some samples of the data. It will add the zstd header to the
155
+ * raw content, which contains a dictionary ID and entropy tables, which
156
+ * will improve compression ratio, and allow zstd to write the dictionary ID
157
+ * into the frame, if you so choose.
158
+ *
159
+ * Do I have to use zstd's dictionary builder?
160
+ * -------------------------------------------
161
+ *
162
+ * No! You can construct dictionary content however you please, it is just
163
+ * bytes. It will always be valid as a raw content dictionary. If you want
164
+ * a zstd dictionary, which can improve compression ratio, use
165
+ * `ZDICT_finalizeDictionary()`.
166
+ *
167
+ * What is the attack surface of a zstd dictionary?
168
+ * ------------------------------------------------
169
+ *
170
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
171
+ * zstd should never crash, or access out-of-bounds memory no matter what
172
+ * the dictionary is. However, if an attacker can control the dictionary
173
+ * during decompression, they can cause zstd to generate arbitrary bytes,
174
+ * just like if they controlled the compressed data.
175
+ *
176
+ ******************************************************************************/
177
+
39
178
 
40
179
  /*! ZDICT_trainFromBuffer():
41
180
  * Train a dictionary from an array of samples.
@@ -61,9 +200,64 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
61
200
  const void* samplesBuffer,
62
201
  const size_t* samplesSizes, unsigned nbSamples);
63
202
 
203
+ typedef struct {
204
+ int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
205
+ unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
206
+ unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
207
+ * NOTE: The zstd format reserves some dictionary IDs for future use.
208
+ * You may use them in private settings, but be warned that they
209
+ * may be used by zstd in a public dictionary registry in the future.
210
+ * These dictionary IDs are:
211
+ * - low range : <= 32767
212
+ * - high range : >= (2^31)
213
+ */
214
+ } ZDICT_params_t;
215
+
216
+ /*! ZDICT_finalizeDictionary():
217
+ * Given a custom content as a basis for dictionary, and a set of samples,
218
+ * finalize dictionary by adding headers and statistics according to the zstd
219
+ * dictionary format.
220
+ *
221
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
222
+ * supplied with an array of sizes `samplesSizes`, providing the size of each
223
+ * sample in order. The samples are used to construct the statistics, so they
224
+ * should be representative of what you will compress with this dictionary.
225
+ *
226
+ * The compression level can be set in `parameters`. You should pass the
227
+ * compression level you expect to use in production. The statistics for each
228
+ * compression level differ, so tuning the dictionary for the compression level
229
+ * can help quite a bit.
230
+ *
231
+ * You can set an explicit dictionary ID in `parameters`, or allow us to pick
232
+ * a random dictionary ID for you, but we can't guarantee no collisions.
233
+ *
234
+ * The dstDictBuffer and the dictContent may overlap, and the content will be
235
+ * appended to the end of the header. If the header + the content doesn't fit in
236
+ * maxDictSize the beginning of the content is truncated to make room, since it
237
+ * is presumed that the most profitable content is at the end of the dictionary,
238
+ * since that is the cheapest to reference.
239
+ *
240
+ * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
241
+ * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
242
+ *
243
+ * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
244
+ * or an error code, which can be tested by ZDICT_isError().
245
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if
246
+ * instructed to, using notificationLevel>0.
247
+ * NOTE: This function currently may fail in several edge cases including:
248
+ * * Not enough samples
249
+ * * Samples are uncompressible
250
+ * * Samples are all exactly the same
251
+ */
252
+ ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
253
+ const void* dictContent, size_t dictContentSize,
254
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
255
+ ZDICT_params_t parameters);
256
+
64
257
 
65
258
  /*====== Helper functions ======*/
66
259
  ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
260
+ ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
67
261
  ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
68
262
  ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
69
263
 
@@ -78,11 +272,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
78
272
  * Use them only in association with static linking.
79
273
  * ==================================================================================== */
80
274
 
81
- typedef struct {
82
- int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
83
- unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
84
- unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
85
- } ZDICT_params_t;
275
+ #define ZDICT_CONTENTSIZE_MIN 128
276
+ #define ZDICT_DICTSIZE_MIN 256
86
277
 
87
278
  /*! ZDICT_cover_params_t:
88
279
  * k and d are the only required parameters.
@@ -198,28 +389,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
198
389
  const size_t* samplesSizes, unsigned nbSamples,
199
390
  ZDICT_fastCover_params_t* parameters);
200
391
 
201
- /*! ZDICT_finalizeDictionary():
202
- * Given a custom content as a basis for dictionary, and a set of samples,
203
- * finalize dictionary by adding headers and statistics.
204
- *
205
- * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
206
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
207
- *
208
- * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
209
- * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
210
- *
211
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
212
- * or an error code, which can be tested by ZDICT_isError().
213
- * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
214
- * Note 2: dictBuffer and dictContent can overlap
215
- */
216
- #define ZDICT_CONTENTSIZE_MIN 128
217
- #define ZDICT_DICTSIZE_MIN 256
218
- ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
219
- const void* dictContent, size_t dictContentSize,
220
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
221
- ZDICT_params_t parameters);
222
-
223
392
  typedef struct {
224
393
  unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
225
394
  ZDICT_params_t zParams;
@@ -241,10 +410,11 @@ typedef struct {
241
410
  * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
242
411
  */
243
412
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
244
- void *dictBuffer, size_t dictBufferCapacity,
245
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
413
+ void* dictBuffer, size_t dictBufferCapacity,
414
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
246
415
  ZDICT_legacy_params_t parameters);
247
416
 
417
+
248
418
  /* Deprecation warnings */
249
419
  /* It is generally possible to disable deprecation warnings from compiler,
250
420
  for example with -Wno-deprecated-declarations for gcc
@@ -256,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
256
426
  # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
257
427
  # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
258
428
  # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
259
- # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
429
+ # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
260
430
  # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
261
431
  # elif (ZDICT_GCC_VERSION >= 301)
262
432
  # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))