zstd-ruby 1.4.1.0 → 1.5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +304 -113
- data/ext/zstdruby/libzstd/README.md +83 -20
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +150 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +8 -4
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
- data/ext/zstdruby/libzstd/common/huf.h +43 -39
- data/ext/zstdruby/libzstd/common/mem.h +69 -25
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
- data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +740 -153
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +21 -10
- data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -19,7 +19,7 @@ extern "C" {
|
|
19
19
|
* Dependencies
|
20
20
|
***************************************/
|
21
21
|
#include <stddef.h> /* size_t */
|
22
|
-
#include "mem.h" /* U64, U32 */
|
22
|
+
#include "../common/mem.h" /* U64, U32 */
|
23
23
|
|
24
24
|
|
25
25
|
/* *************************************
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -14,7 +14,7 @@
|
|
14
14
|
#include <stddef.h> /* size_t, ptrdiff_t */
|
15
15
|
#include <string.h> /* memcpy */
|
16
16
|
#include <stdlib.h> /* malloc, free, qsort */
|
17
|
-
#include "error_private.h"
|
17
|
+
#include "../common/error_private.h"
|
18
18
|
|
19
19
|
|
20
20
|
|
@@ -82,7 +82,11 @@ extern "C" {
|
|
82
82
|
* Basic Types
|
83
83
|
*****************************************************************/
|
84
84
|
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
85
|
-
#
|
85
|
+
# if defined(_AIX)
|
86
|
+
# include <inttypes.h>
|
87
|
+
# else
|
88
|
+
# include <stdint.h> /* intptr_t */
|
89
|
+
# endif
|
86
90
|
typedef uint8_t BYTE;
|
87
91
|
typedef uint16_t U16;
|
88
92
|
typedef int16_t S16;
|
@@ -860,7 +864,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
|
|
860
864
|
_BitScanReverse ( &r, val );
|
861
865
|
return (unsigned) r;
|
862
866
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
863
|
-
return
|
867
|
+
return __builtin_clz (val) ^ 31;
|
864
868
|
# else /* Software version */
|
865
869
|
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
866
870
|
U32 v = val;
|
@@ -1862,7 +1866,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
|
|
1862
1866
|
|
1863
1867
|
if (!srcSize) return ERROR(srcSize_wrong);
|
1864
1868
|
iSize = ip[0];
|
1865
|
-
|
1869
|
+
/* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
|
1866
1870
|
|
1867
1871
|
if (iSize >= 128) { /* special header */
|
1868
1872
|
if (iSize >= (242)) { /* RLE */
|
@@ -2014,7 +2018,7 @@ size_t HUFv06_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
|
|
2014
2018
|
HUFv06_DEltX2* const dt = (HUFv06_DEltX2*)dtPtr;
|
2015
2019
|
|
2016
2020
|
HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */
|
2017
|
-
|
2021
|
+
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
2018
2022
|
|
2019
2023
|
iSize = HUFv06_readStats(huffWeight, HUFv06_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
2020
2024
|
if (HUFv06_isError(iSize)) return iSize;
|
@@ -2340,7 +2344,7 @@ size_t HUFv06_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
|
|
2340
2344
|
|
2341
2345
|
HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
|
2342
2346
|
if (memLog > HUFv06_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
2343
|
-
|
2347
|
+
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
2344
2348
|
|
2345
2349
|
iSize = HUFv06_readStats(weightList, HUFv06_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
|
2346
2350
|
if (HUFv06_isError(iSize)) return iSize;
|
@@ -2664,13 +2668,13 @@ size_t HUFv06_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
2664
2668
|
|
2665
2669
|
{ U32 algoNb = 0;
|
2666
2670
|
if (Dtime[1] < Dtime[0]) algoNb = 1;
|
2667
|
-
|
2671
|
+
/* if (Dtime[2] < Dtime[algoNb]) algoNb = 2; */ /* current speed of HUFv06_decompress4X6 is not good */
|
2668
2672
|
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
2669
2673
|
}
|
2670
2674
|
|
2671
|
-
|
2672
|
-
|
2673
|
-
|
2675
|
+
/* return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
|
2676
|
+
/* return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
|
2677
|
+
/* return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams quad-symbols decoding */
|
2674
2678
|
}
|
2675
2679
|
/*
|
2676
2680
|
Common functions of Zstd compression library
|
@@ -3025,7 +3029,7 @@ typedef struct
|
|
3025
3029
|
* Provides the size of compressed block from block header `src` */
|
3026
3030
|
static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
|
3027
3031
|
{
|
3028
|
-
const BYTE* const in = (const BYTE*
|
3032
|
+
const BYTE* const in = (const BYTE*)src;
|
3029
3033
|
U32 cSize;
|
3030
3034
|
|
3031
3035
|
if (srcSize < ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
|
@@ -3219,7 +3223,7 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
|
|
3219
3223
|
FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable,
|
3220
3224
|
const void* src, size_t srcSize)
|
3221
3225
|
{
|
3222
|
-
const BYTE* const istart = (const BYTE*
|
3226
|
+
const BYTE* const istart = (const BYTE*)src;
|
3223
3227
|
const BYTE* const iend = istart + srcSize;
|
3224
3228
|
const BYTE* ip = istart;
|
3225
3229
|
|
@@ -3441,7 +3445,7 @@ static size_t ZSTDv06_decompressSequences(
|
|
3441
3445
|
{
|
3442
3446
|
const BYTE* ip = (const BYTE*)seqStart;
|
3443
3447
|
const BYTE* const iend = ip + seqSize;
|
3444
|
-
BYTE* const ostart = (BYTE*
|
3448
|
+
BYTE* const ostart = (BYTE*)dst;
|
3445
3449
|
BYTE* const oend = ostart + maxDstSize;
|
3446
3450
|
BYTE* op = ostart;
|
3447
3451
|
const BYTE* litPtr = dctx->litPtr;
|
@@ -3501,8 +3505,10 @@ static size_t ZSTDv06_decompressSequences(
|
|
3501
3505
|
{ size_t const lastLLSize = litEnd - litPtr;
|
3502
3506
|
if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
|
3503
3507
|
if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
|
3504
|
-
|
3505
|
-
|
3508
|
+
if (lastLLSize > 0) {
|
3509
|
+
memcpy(op, litPtr, lastLLSize);
|
3510
|
+
op += lastLLSize;
|
3511
|
+
}
|
3506
3512
|
}
|
3507
3513
|
|
3508
3514
|
return op-ostart;
|
@@ -3555,7 +3561,7 @@ static size_t ZSTDv06_decompressFrame(ZSTDv06_DCtx* dctx,
|
|
3555
3561
|
{
|
3556
3562
|
const BYTE* ip = (const BYTE*)src;
|
3557
3563
|
const BYTE* const iend = ip + srcSize;
|
3558
|
-
BYTE* const ostart = (BYTE*
|
3564
|
+
BYTE* const ostart = (BYTE*)dst;
|
3559
3565
|
BYTE* op = ostart;
|
3560
3566
|
BYTE* const oend = ostart + dstCapacity;
|
3561
3567
|
size_t remainingSize = srcSize;
|
@@ -4000,7 +4006,9 @@ size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* zbd)
|
|
4000
4006
|
MEM_STATIC size_t ZBUFFv06_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
4001
4007
|
{
|
4002
4008
|
size_t length = MIN(dstCapacity, srcSize);
|
4003
|
-
|
4009
|
+
if (length > 0) {
|
4010
|
+
memcpy(dst, src, length);
|
4011
|
+
}
|
4004
4012
|
return length;
|
4005
4013
|
}
|
4006
4014
|
|
@@ -4109,7 +4117,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
|
|
4109
4117
|
if (!decodedSize) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
|
4110
4118
|
zbd->outEnd = zbd->outStart + decodedSize;
|
4111
4119
|
zbd->stage = ZBUFFds_flush;
|
4112
|
-
|
4120
|
+
/* break; */ /* ZBUFFds_flush follows */
|
4113
4121
|
}
|
4114
4122
|
}
|
4115
4123
|
/* fall-through */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -17,14 +17,14 @@
|
|
17
17
|
#ifndef XXH_STATIC_LINKING_ONLY
|
18
18
|
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
19
19
|
#endif
|
20
|
-
#include "xxhash.h" /* XXH64_* */
|
20
|
+
#include "../common/xxhash.h" /* XXH64_* */
|
21
21
|
#include "zstd_v07.h"
|
22
22
|
|
23
23
|
#define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
|
24
24
|
#define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
|
25
25
|
#define ZSTDv07_STATIC_LINKING_ONLY
|
26
26
|
|
27
|
-
#include "error_private.h"
|
27
|
+
#include "../common/error_private.h"
|
28
28
|
|
29
29
|
|
30
30
|
#ifdef ZSTDv07_STATIC_LINKING_ONLY
|
@@ -242,7 +242,11 @@ extern "C" {
|
|
242
242
|
* Basic Types
|
243
243
|
*****************************************************************/
|
244
244
|
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
245
|
-
#
|
245
|
+
# if defined(_AIX)
|
246
|
+
# include <inttypes.h>
|
247
|
+
# else
|
248
|
+
# include <stdint.h> /* intptr_t */
|
249
|
+
# endif
|
246
250
|
typedef uint8_t BYTE;
|
247
251
|
typedef uint16_t U16;
|
248
252
|
typedef int16_t S16;
|
@@ -530,7 +534,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
|
|
530
534
|
_BitScanReverse ( &r, val );
|
531
535
|
return (unsigned) r;
|
532
536
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
533
|
-
return
|
537
|
+
return __builtin_clz (val) ^ 31;
|
534
538
|
# else /* Software version */
|
535
539
|
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
536
540
|
U32 v = val;
|
@@ -1314,7 +1318,7 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|
1314
1318
|
|
1315
1319
|
if (!srcSize) return ERROR(srcSize_wrong);
|
1316
1320
|
iSize = ip[0];
|
1317
|
-
|
1321
|
+
/* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
|
1318
1322
|
|
1319
1323
|
if (iSize >= 128) { /* special header */
|
1320
1324
|
if (iSize >= (242)) { /* RLE */
|
@@ -1784,7 +1788,7 @@ size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSi
|
|
1784
1788
|
HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr;
|
1785
1789
|
|
1786
1790
|
HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable));
|
1787
|
-
|
1791
|
+
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
1788
1792
|
|
1789
1793
|
iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
1790
1794
|
if (HUFv07_isError(iSize)) return iSize;
|
@@ -2148,7 +2152,7 @@ size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSi
|
|
2148
2152
|
|
2149
2153
|
HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable)); /* if compilation fails here, assertion is false */
|
2150
2154
|
if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
|
2151
|
-
|
2155
|
+
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
2152
2156
|
|
2153
2157
|
iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
|
2154
2158
|
if (HUFv07_isError(iSize)) return iSize;
|
@@ -2530,8 +2534,8 @@ size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
2530
2534
|
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
2531
2535
|
}
|
2532
2536
|
|
2533
|
-
|
2534
|
-
|
2537
|
+
/* return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
|
2538
|
+
/* return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
|
2535
2539
|
}
|
2536
2540
|
|
2537
2541
|
size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
@@ -3254,7 +3258,7 @@ typedef struct
|
|
3254
3258
|
* Provides the size of compressed block from block header `src` */
|
3255
3259
|
static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
|
3256
3260
|
{
|
3257
|
-
const BYTE* const in = (const BYTE*
|
3261
|
+
const BYTE* const in = (const BYTE*)src;
|
3258
3262
|
U32 cSize;
|
3259
3263
|
|
3260
3264
|
if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
|
@@ -3272,7 +3276,9 @@ static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProper
|
|
3272
3276
|
static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
3273
3277
|
{
|
3274
3278
|
if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
|
3275
|
-
|
3279
|
+
if (srcSize > 0) {
|
3280
|
+
memcpy(dst, src, srcSize);
|
3281
|
+
}
|
3276
3282
|
return srcSize;
|
3277
3283
|
}
|
3278
3284
|
|
@@ -3447,7 +3453,7 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
|
|
3447
3453
|
FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
|
3448
3454
|
const void* src, size_t srcSize)
|
3449
3455
|
{
|
3450
|
-
const BYTE* const istart = (const BYTE*
|
3456
|
+
const BYTE* const istart = (const BYTE*)src;
|
3451
3457
|
const BYTE* const iend = istart + srcSize;
|
3452
3458
|
const BYTE* ip = istart;
|
3453
3459
|
|
@@ -3666,7 +3672,7 @@ static size_t ZSTDv07_decompressSequences(
|
|
3666
3672
|
{
|
3667
3673
|
const BYTE* ip = (const BYTE*)seqStart;
|
3668
3674
|
const BYTE* const iend = ip + seqSize;
|
3669
|
-
BYTE* const ostart = (BYTE*
|
3675
|
+
BYTE* const ostart = (BYTE*)dst;
|
3670
3676
|
BYTE* const oend = ostart + maxDstSize;
|
3671
3677
|
BYTE* op = ostart;
|
3672
3678
|
const BYTE* litPtr = dctx->litPtr;
|
@@ -3712,10 +3718,12 @@ static size_t ZSTDv07_decompressSequences(
|
|
3712
3718
|
|
3713
3719
|
/* last literal segment */
|
3714
3720
|
{ size_t const lastLLSize = litEnd - litPtr;
|
3715
|
-
|
3721
|
+
/* if (litPtr > litEnd) return ERROR(corruption_detected); */ /* too many literals already used */
|
3716
3722
|
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
|
3717
|
-
|
3718
|
-
|
3723
|
+
if (lastLLSize > 0) {
|
3724
|
+
memcpy(op, litPtr, lastLLSize);
|
3725
|
+
op += lastLLSize;
|
3726
|
+
}
|
3719
3727
|
}
|
3720
3728
|
|
3721
3729
|
return op-ostart;
|
@@ -3776,7 +3784,9 @@ ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockS
|
|
3776
3784
|
static size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
|
3777
3785
|
{
|
3778
3786
|
if (length > dstCapacity) return ERROR(dstSize_tooSmall);
|
3779
|
-
|
3787
|
+
if (length > 0) {
|
3788
|
+
memset(dst, byte, length);
|
3789
|
+
}
|
3780
3790
|
return length;
|
3781
3791
|
}
|
3782
3792
|
|
@@ -3789,7 +3799,7 @@ static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
|
|
3789
3799
|
{
|
3790
3800
|
const BYTE* ip = (const BYTE*)src;
|
3791
3801
|
const BYTE* const iend = ip + srcSize;
|
3792
|
-
BYTE* const ostart = (BYTE*
|
3802
|
+
BYTE* const ostart = (BYTE*)dst;
|
3793
3803
|
BYTE* const oend = ostart + dstCapacity;
|
3794
3804
|
BYTE* op = ostart;
|
3795
3805
|
size_t remainingSize = srcSize;
|
@@ -4378,7 +4388,9 @@ size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd)
|
|
4378
4388
|
MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
4379
4389
|
{
|
4380
4390
|
size_t const length = MIN(dstCapacity, srcSize);
|
4381
|
-
|
4391
|
+
if (length > 0) {
|
4392
|
+
memcpy(dst, src, length);
|
4393
|
+
}
|
4382
4394
|
return length;
|
4383
4395
|
}
|
4384
4396
|
|
@@ -3,8 +3,9 @@
|
|
3
3
|
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
4
4
|
|
5
5
|
prefix=@PREFIX@
|
6
|
-
|
6
|
+
exec_prefix=@EXEC_PREFIX@
|
7
7
|
includedir=@INCLUDEDIR@
|
8
|
+
libdir=@LIBDIR@
|
8
9
|
|
9
10
|
Name: zstd
|
10
11
|
Description: fast lossless compression algorithm library
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -36,6 +36,145 @@ extern "C" {
|
|
36
36
|
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
37
37
|
#endif
|
38
38
|
|
39
|
+
/*******************************************************************************
|
40
|
+
* Zstd dictionary builder
|
41
|
+
*
|
42
|
+
* FAQ
|
43
|
+
* ===
|
44
|
+
* Why should I use a dictionary?
|
45
|
+
* ------------------------------
|
46
|
+
*
|
47
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
48
|
+
* Traditionally small files don't compress well because there is very little
|
49
|
+
* repetion in a single sample, since it is small. But, if you are compressing
|
50
|
+
* many similar files, like a bunch of JSON records that share the same
|
51
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
52
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
53
|
+
* present across samples. This can vastly improve compression ratio.
|
54
|
+
*
|
55
|
+
* When is a dictionary useful?
|
56
|
+
* ----------------------------
|
57
|
+
*
|
58
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
59
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
60
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
61
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
62
|
+
*
|
63
|
+
* How do I use a dictionary?
|
64
|
+
* --------------------------
|
65
|
+
*
|
66
|
+
* Simply pass the dictionary to the zstd compressor with
|
67
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
68
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
69
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
70
|
+
* complete documentation.
|
71
|
+
*
|
72
|
+
* What is a zstd dictionary?
|
73
|
+
* --------------------------
|
74
|
+
*
|
75
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
76
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
77
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
78
|
+
* which really matters for small data. The content is just bytes, which are
|
79
|
+
* repeated content that is common across many samples.
|
80
|
+
*
|
81
|
+
* What is a raw content dictionary?
|
82
|
+
* ---------------------------------
|
83
|
+
*
|
84
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
85
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
86
|
+
* content dictionary.
|
87
|
+
*
|
88
|
+
* How do I train a dictionary?
|
89
|
+
* ----------------------------
|
90
|
+
*
|
91
|
+
* Gather samples from your use case. These samples should be similar to each
|
92
|
+
* other. If you have several use cases, you could try to train one dictionary
|
93
|
+
* per use case.
|
94
|
+
*
|
95
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
96
|
+
* dictionary. There are a few advanced versions of this function, but this
|
97
|
+
* is a great starting point. If you want to further tune your dictionary
|
98
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
99
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
100
|
+
*
|
101
|
+
* If the dictionary training function fails, that is likely because you
|
102
|
+
* either passed too few samples, or a dictionary would not be effective
|
103
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
104
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
105
|
+
*
|
106
|
+
* How large should my dictionary be?
|
107
|
+
* ----------------------------------
|
108
|
+
*
|
109
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
110
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
111
|
+
* dictionary larger than that. But, most use cases can get away with a
|
112
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
113
|
+
* shrink the dictionary for you, and select a the smallest size that
|
114
|
+
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
115
|
+
* A smaller dictionary can save memory, and potentially speed up
|
116
|
+
* compression.
|
117
|
+
*
|
118
|
+
* How many samples should I provide to the dictionary builder?
|
119
|
+
* ------------------------------------------------------------
|
120
|
+
*
|
121
|
+
* We generally recommend passing ~100x the size of the dictionary
|
122
|
+
* in samples. A few thousand should suffice. Having too few samples
|
123
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
124
|
+
* only improve the dictionaries effectiveness. But having too many
|
125
|
+
* samples can slow down the dictionary builder.
|
126
|
+
*
|
127
|
+
* How do I determine if a dictionary will be effective?
|
128
|
+
* -----------------------------------------------------
|
129
|
+
*
|
130
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
131
|
+
* benchmarking tool to test the dictionary effectiveness.
|
132
|
+
*
|
133
|
+
* # Benchmark levels 1-3 without a dictionary
|
134
|
+
* zstd -b1e3 -r /path/to/my/files
|
135
|
+
* # Benchmark levels 1-3 with a dictioanry
|
136
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
137
|
+
*
|
138
|
+
* When should I retrain a dictionary?
|
139
|
+
* -----------------------------------
|
140
|
+
*
|
141
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
142
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
143
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
144
|
+
* at which they decay depends on your use case. Internally, we regularly
|
145
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
146
|
+
* better than the old dictionary, we will ship the new dictionary.
|
147
|
+
*
|
148
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
149
|
+
* -------------------------------------------------------------------------
|
150
|
+
*
|
151
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
152
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
153
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
154
|
+
* provide some samples of the data. It will add the zstd header to the
|
155
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
156
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
157
|
+
* into the frame, if you so choose.
|
158
|
+
*
|
159
|
+
* Do I have to use zstd's dictionary builder?
|
160
|
+
* -------------------------------------------
|
161
|
+
*
|
162
|
+
* No! You can construct dictionary content however you please, it is just
|
163
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
164
|
+
* a zstd dictionary, which can improve compression ratio, use
|
165
|
+
* `ZDICT_finalizeDictionary()`.
|
166
|
+
*
|
167
|
+
* What is the attack surface of a zstd dictionary?
|
168
|
+
* ------------------------------------------------
|
169
|
+
*
|
170
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
171
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
172
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
173
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
174
|
+
* just like if they controlled the compressed data.
|
175
|
+
*
|
176
|
+
******************************************************************************/
|
177
|
+
|
39
178
|
|
40
179
|
/*! ZDICT_trainFromBuffer():
|
41
180
|
* Train a dictionary from an array of samples.
|
@@ -61,9 +200,64 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|
61
200
|
const void* samplesBuffer,
|
62
201
|
const size_t* samplesSizes, unsigned nbSamples);
|
63
202
|
|
203
|
+
typedef struct {
|
204
|
+
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
205
|
+
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
206
|
+
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
207
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
208
|
+
* You may use them in private settings, but be warned that they
|
209
|
+
* may be used by zstd in a public dictionary registry in the future.
|
210
|
+
* These dictionary IDs are:
|
211
|
+
* - low range : <= 32767
|
212
|
+
* - high range : >= (2^31)
|
213
|
+
*/
|
214
|
+
} ZDICT_params_t;
|
215
|
+
|
216
|
+
/*! ZDICT_finalizeDictionary():
|
217
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
218
|
+
* finalize dictionary by adding headers and statistics according to the zstd
|
219
|
+
* dictionary format.
|
220
|
+
*
|
221
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
222
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
223
|
+
* sample in order. The samples are used to construct the statistics, so they
|
224
|
+
* should be representative of what you will compress with this dictionary.
|
225
|
+
*
|
226
|
+
* The compression level can be set in `parameters`. You should pass the
|
227
|
+
* compression level you expect to use in production. The statistics for each
|
228
|
+
* compression level differ, so tuning the dictionary for the compression level
|
229
|
+
* can help quite a bit.
|
230
|
+
*
|
231
|
+
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
232
|
+
* a random dictionary ID for you, but we can't guarantee no collisions.
|
233
|
+
*
|
234
|
+
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
235
|
+
* appended to the end of the header. If the header + the content doesn't fit in
|
236
|
+
* maxDictSize the beginning of the content is truncated to make room, since it
|
237
|
+
* is presumed that the most profitable content is at the end of the dictionary,
|
238
|
+
* since that is the cheapest to reference.
|
239
|
+
*
|
240
|
+
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
241
|
+
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
242
|
+
*
|
243
|
+
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
244
|
+
* or an error code, which can be tested by ZDICT_isError().
|
245
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
246
|
+
* instructed to, using notificationLevel>0.
|
247
|
+
* NOTE: This function currently may fail in several edge cases including:
|
248
|
+
* * Not enough samples
|
249
|
+
* * Samples are uncompressible
|
250
|
+
* * Samples are all exactly the same
|
251
|
+
*/
|
252
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
253
|
+
const void* dictContent, size_t dictContentSize,
|
254
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
255
|
+
ZDICT_params_t parameters);
|
256
|
+
|
64
257
|
|
65
258
|
/*====== Helper functions ======*/
|
66
259
|
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
260
|
+
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
67
261
|
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
68
262
|
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
69
263
|
|
@@ -78,11 +272,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
78
272
|
* Use them only in association with static linking.
|
79
273
|
* ==================================================================================== */
|
80
274
|
|
81
|
-
|
82
|
-
|
83
|
-
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
84
|
-
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
85
|
-
} ZDICT_params_t;
|
275
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
276
|
+
#define ZDICT_DICTSIZE_MIN 256
|
86
277
|
|
87
278
|
/*! ZDICT_cover_params_t:
|
88
279
|
* k and d are the only required parameters.
|
@@ -198,28 +389,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
|
198
389
|
const size_t* samplesSizes, unsigned nbSamples,
|
199
390
|
ZDICT_fastCover_params_t* parameters);
|
200
391
|
|
201
|
-
/*! ZDICT_finalizeDictionary():
|
202
|
-
* Given a custom content as a basis for dictionary, and a set of samples,
|
203
|
-
* finalize dictionary by adding headers and statistics.
|
204
|
-
*
|
205
|
-
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
206
|
-
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
207
|
-
*
|
208
|
-
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
209
|
-
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
210
|
-
*
|
211
|
-
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
212
|
-
* or an error code, which can be tested by ZDICT_isError().
|
213
|
-
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
214
|
-
* Note 2: dictBuffer and dictContent can overlap
|
215
|
-
*/
|
216
|
-
#define ZDICT_CONTENTSIZE_MIN 128
|
217
|
-
#define ZDICT_DICTSIZE_MIN 256
|
218
|
-
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
219
|
-
const void* dictContent, size_t dictContentSize,
|
220
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
221
|
-
ZDICT_params_t parameters);
|
222
|
-
|
223
392
|
typedef struct {
|
224
393
|
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
225
394
|
ZDICT_params_t zParams;
|
@@ -241,10 +410,11 @@ typedef struct {
|
|
241
410
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
242
411
|
*/
|
243
412
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
244
|
-
void
|
245
|
-
const void
|
413
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
414
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
246
415
|
ZDICT_legacy_params_t parameters);
|
247
416
|
|
417
|
+
|
248
418
|
/* Deprecation warnings */
|
249
419
|
/* It is generally possible to disable deprecation warnings from compiler,
|
250
420
|
for example with -Wno-deprecated-declarations for gcc
|
@@ -256,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
|
256
426
|
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
257
427
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
258
428
|
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
259
|
-
# elif (ZDICT_GCC_VERSION >= 405)
|
429
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
260
430
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
261
431
|
# elif (ZDICT_GCC_VERSION >= 301)
|
262
432
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|