zstd-ruby 1.4.1.0 → 1.5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +304 -113
- data/ext/zstdruby/libzstd/README.md +83 -20
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +150 -8
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +8 -4
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
- data/ext/zstdruby/libzstd/common/huf.h +43 -39
- data/ext/zstdruby/libzstd/common/mem.h +69 -25
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
- data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +740 -153
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +21 -10
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -19,7 +19,7 @@ extern "C" {
|
|
|
19
19
|
* Dependencies
|
|
20
20
|
***************************************/
|
|
21
21
|
#include <stddef.h> /* size_t */
|
|
22
|
-
#include "mem.h" /* U64, U32 */
|
|
22
|
+
#include "../common/mem.h" /* U64, U32 */
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
/* *************************************
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
#include <stddef.h> /* size_t, ptrdiff_t */
|
|
15
15
|
#include <string.h> /* memcpy */
|
|
16
16
|
#include <stdlib.h> /* malloc, free, qsort */
|
|
17
|
-
#include "error_private.h"
|
|
17
|
+
#include "../common/error_private.h"
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
|
|
@@ -82,7 +82,11 @@ extern "C" {
|
|
|
82
82
|
* Basic Types
|
|
83
83
|
*****************************************************************/
|
|
84
84
|
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
|
85
|
-
#
|
|
85
|
+
# if defined(_AIX)
|
|
86
|
+
# include <inttypes.h>
|
|
87
|
+
# else
|
|
88
|
+
# include <stdint.h> /* intptr_t */
|
|
89
|
+
# endif
|
|
86
90
|
typedef uint8_t BYTE;
|
|
87
91
|
typedef uint16_t U16;
|
|
88
92
|
typedef int16_t S16;
|
|
@@ -860,7 +864,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
|
|
|
860
864
|
_BitScanReverse ( &r, val );
|
|
861
865
|
return (unsigned) r;
|
|
862
866
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
|
863
|
-
return
|
|
867
|
+
return __builtin_clz (val) ^ 31;
|
|
864
868
|
# else /* Software version */
|
|
865
869
|
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
|
866
870
|
U32 v = val;
|
|
@@ -1862,7 +1866,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta
|
|
|
1862
1866
|
|
|
1863
1867
|
if (!srcSize) return ERROR(srcSize_wrong);
|
|
1864
1868
|
iSize = ip[0];
|
|
1865
|
-
|
|
1869
|
+
/* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
|
|
1866
1870
|
|
|
1867
1871
|
if (iSize >= 128) { /* special header */
|
|
1868
1872
|
if (iSize >= (242)) { /* RLE */
|
|
@@ -2014,7 +2018,7 @@ size_t HUFv06_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
|
|
|
2014
2018
|
HUFv06_DEltX2* const dt = (HUFv06_DEltX2*)dtPtr;
|
|
2015
2019
|
|
|
2016
2020
|
HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */
|
|
2017
|
-
|
|
2021
|
+
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
|
2018
2022
|
|
|
2019
2023
|
iSize = HUFv06_readStats(huffWeight, HUFv06_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
|
2020
2024
|
if (HUFv06_isError(iSize)) return iSize;
|
|
@@ -2340,7 +2344,7 @@ size_t HUFv06_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
|
|
|
2340
2344
|
|
|
2341
2345
|
HUFv06_STATIC_ASSERT(sizeof(HUFv06_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
|
|
2342
2346
|
if (memLog > HUFv06_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
2343
|
-
|
|
2347
|
+
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
|
2344
2348
|
|
|
2345
2349
|
iSize = HUFv06_readStats(weightList, HUFv06_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
|
|
2346
2350
|
if (HUFv06_isError(iSize)) return iSize;
|
|
@@ -2664,13 +2668,13 @@ size_t HUFv06_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
|
2664
2668
|
|
|
2665
2669
|
{ U32 algoNb = 0;
|
|
2666
2670
|
if (Dtime[1] < Dtime[0]) algoNb = 1;
|
|
2667
|
-
|
|
2671
|
+
/* if (Dtime[2] < Dtime[algoNb]) algoNb = 2; */ /* current speed of HUFv06_decompress4X6 is not good */
|
|
2668
2672
|
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
|
2669
2673
|
}
|
|
2670
2674
|
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2675
|
+
/* return HUFv06_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
|
|
2676
|
+
/* return HUFv06_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
|
|
2677
|
+
/* return HUFv06_decompress4X6(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams quad-symbols decoding */
|
|
2674
2678
|
}
|
|
2675
2679
|
/*
|
|
2676
2680
|
Common functions of Zstd compression library
|
|
@@ -3025,7 +3029,7 @@ typedef struct
|
|
|
3025
3029
|
* Provides the size of compressed block from block header `src` */
|
|
3026
3030
|
static size_t ZSTDv06_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
|
|
3027
3031
|
{
|
|
3028
|
-
const BYTE* const in = (const BYTE*
|
|
3032
|
+
const BYTE* const in = (const BYTE*)src;
|
|
3029
3033
|
U32 cSize;
|
|
3030
3034
|
|
|
3031
3035
|
if (srcSize < ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
|
|
@@ -3219,7 +3223,7 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
|
|
|
3219
3223
|
FSEv06_DTable* DTableLL, FSEv06_DTable* DTableML, FSEv06_DTable* DTableOffb, U32 flagRepeatTable,
|
|
3220
3224
|
const void* src, size_t srcSize)
|
|
3221
3225
|
{
|
|
3222
|
-
const BYTE* const istart = (const BYTE*
|
|
3226
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
3223
3227
|
const BYTE* const iend = istart + srcSize;
|
|
3224
3228
|
const BYTE* ip = istart;
|
|
3225
3229
|
|
|
@@ -3441,7 +3445,7 @@ static size_t ZSTDv06_decompressSequences(
|
|
|
3441
3445
|
{
|
|
3442
3446
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
3443
3447
|
const BYTE* const iend = ip + seqSize;
|
|
3444
|
-
BYTE* const ostart = (BYTE*
|
|
3448
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
3445
3449
|
BYTE* const oend = ostart + maxDstSize;
|
|
3446
3450
|
BYTE* op = ostart;
|
|
3447
3451
|
const BYTE* litPtr = dctx->litPtr;
|
|
@@ -3501,8 +3505,10 @@ static size_t ZSTDv06_decompressSequences(
|
|
|
3501
3505
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
3502
3506
|
if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
|
|
3503
3507
|
if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall);
|
|
3504
|
-
|
|
3505
|
-
|
|
3508
|
+
if (lastLLSize > 0) {
|
|
3509
|
+
memcpy(op, litPtr, lastLLSize);
|
|
3510
|
+
op += lastLLSize;
|
|
3511
|
+
}
|
|
3506
3512
|
}
|
|
3507
3513
|
|
|
3508
3514
|
return op-ostart;
|
|
@@ -3555,7 +3561,7 @@ static size_t ZSTDv06_decompressFrame(ZSTDv06_DCtx* dctx,
|
|
|
3555
3561
|
{
|
|
3556
3562
|
const BYTE* ip = (const BYTE*)src;
|
|
3557
3563
|
const BYTE* const iend = ip + srcSize;
|
|
3558
|
-
BYTE* const ostart = (BYTE*
|
|
3564
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
3559
3565
|
BYTE* op = ostart;
|
|
3560
3566
|
BYTE* const oend = ostart + dstCapacity;
|
|
3561
3567
|
size_t remainingSize = srcSize;
|
|
@@ -4000,7 +4006,9 @@ size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* zbd)
|
|
|
4000
4006
|
MEM_STATIC size_t ZBUFFv06_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
4001
4007
|
{
|
|
4002
4008
|
size_t length = MIN(dstCapacity, srcSize);
|
|
4003
|
-
|
|
4009
|
+
if (length > 0) {
|
|
4010
|
+
memcpy(dst, src, length);
|
|
4011
|
+
}
|
|
4004
4012
|
return length;
|
|
4005
4013
|
}
|
|
4006
4014
|
|
|
@@ -4109,7 +4117,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
|
|
|
4109
4117
|
if (!decodedSize) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
|
|
4110
4118
|
zbd->outEnd = zbd->outStart + decodedSize;
|
|
4111
4119
|
zbd->stage = ZBUFFds_flush;
|
|
4112
|
-
|
|
4120
|
+
/* break; */ /* ZBUFFds_flush follows */
|
|
4113
4121
|
}
|
|
4114
4122
|
}
|
|
4115
4123
|
/* fall-through */
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -17,14 +17,14 @@
|
|
|
17
17
|
#ifndef XXH_STATIC_LINKING_ONLY
|
|
18
18
|
# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
|
19
19
|
#endif
|
|
20
|
-
#include "xxhash.h" /* XXH64_* */
|
|
20
|
+
#include "../common/xxhash.h" /* XXH64_* */
|
|
21
21
|
#include "zstd_v07.h"
|
|
22
22
|
|
|
23
23
|
#define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
|
|
24
24
|
#define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
|
|
25
25
|
#define ZSTDv07_STATIC_LINKING_ONLY
|
|
26
26
|
|
|
27
|
-
#include "error_private.h"
|
|
27
|
+
#include "../common/error_private.h"
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
#ifdef ZSTDv07_STATIC_LINKING_ONLY
|
|
@@ -242,7 +242,11 @@ extern "C" {
|
|
|
242
242
|
* Basic Types
|
|
243
243
|
*****************************************************************/
|
|
244
244
|
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
|
245
|
-
#
|
|
245
|
+
# if defined(_AIX)
|
|
246
|
+
# include <inttypes.h>
|
|
247
|
+
# else
|
|
248
|
+
# include <stdint.h> /* intptr_t */
|
|
249
|
+
# endif
|
|
246
250
|
typedef uint8_t BYTE;
|
|
247
251
|
typedef uint16_t U16;
|
|
248
252
|
typedef int16_t S16;
|
|
@@ -530,7 +534,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
|
|
|
530
534
|
_BitScanReverse ( &r, val );
|
|
531
535
|
return (unsigned) r;
|
|
532
536
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
|
533
|
-
return
|
|
537
|
+
return __builtin_clz (val) ^ 31;
|
|
534
538
|
# else /* Software version */
|
|
535
539
|
static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
|
|
536
540
|
U32 v = val;
|
|
@@ -1314,7 +1318,7 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
|
|
1314
1318
|
|
|
1315
1319
|
if (!srcSize) return ERROR(srcSize_wrong);
|
|
1316
1320
|
iSize = ip[0];
|
|
1317
|
-
|
|
1321
|
+
/* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
|
|
1318
1322
|
|
|
1319
1323
|
if (iSize >= 128) { /* special header */
|
|
1320
1324
|
if (iSize >= (242)) { /* RLE */
|
|
@@ -1784,7 +1788,7 @@ size_t HUFv07_readDTableX2 (HUFv07_DTable* DTable, const void* src, size_t srcSi
|
|
|
1784
1788
|
HUFv07_DEltX2* const dt = (HUFv07_DEltX2*)dtPtr;
|
|
1785
1789
|
|
|
1786
1790
|
HUFv07_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUFv07_DTable));
|
|
1787
|
-
|
|
1791
|
+
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
|
1788
1792
|
|
|
1789
1793
|
iSize = HUFv07_readStats(huffWeight, HUFv07_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
|
|
1790
1794
|
if (HUFv07_isError(iSize)) return iSize;
|
|
@@ -2148,7 +2152,7 @@ size_t HUFv07_readDTableX4 (HUFv07_DTable* DTable, const void* src, size_t srcSi
|
|
|
2148
2152
|
|
|
2149
2153
|
HUFv07_STATIC_ASSERT(sizeof(HUFv07_DEltX4) == sizeof(HUFv07_DTable)); /* if compilation fails here, assertion is false */
|
|
2150
2154
|
if (maxTableLog > HUFv07_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
|
|
2151
|
-
|
|
2155
|
+
/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
|
|
2152
2156
|
|
|
2153
2157
|
iSize = HUFv07_readStats(weightList, HUFv07_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
|
|
2154
2158
|
if (HUFv07_isError(iSize)) return iSize;
|
|
@@ -2530,8 +2534,8 @@ size_t HUFv07_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
|
2530
2534
|
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
|
2531
2535
|
}
|
|
2532
2536
|
|
|
2533
|
-
|
|
2534
|
-
|
|
2537
|
+
/* return HUFv07_decompress4X2(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams single-symbol decoding */
|
|
2538
|
+
/* return HUFv07_decompress4X4(dst, dstSize, cSrc, cSrcSize); */ /* multi-streams double-symbols decoding */
|
|
2535
2539
|
}
|
|
2536
2540
|
|
|
2537
2541
|
size_t HUFv07_decompress4X_DCtx (HUFv07_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
|
@@ -3254,7 +3258,7 @@ typedef struct
|
|
|
3254
3258
|
* Provides the size of compressed block from block header `src` */
|
|
3255
3259
|
static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
|
|
3256
3260
|
{
|
|
3257
|
-
const BYTE* const in = (const BYTE*
|
|
3261
|
+
const BYTE* const in = (const BYTE*)src;
|
|
3258
3262
|
U32 cSize;
|
|
3259
3263
|
|
|
3260
3264
|
if (srcSize < ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
|
|
@@ -3272,7 +3276,9 @@ static size_t ZSTDv07_getcBlockSize(const void* src, size_t srcSize, blockProper
|
|
|
3272
3276
|
static size_t ZSTDv07_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
3273
3277
|
{
|
|
3274
3278
|
if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
|
|
3275
|
-
|
|
3279
|
+
if (srcSize > 0) {
|
|
3280
|
+
memcpy(dst, src, srcSize);
|
|
3281
|
+
}
|
|
3276
3282
|
return srcSize;
|
|
3277
3283
|
}
|
|
3278
3284
|
|
|
@@ -3447,7 +3453,7 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
|
|
|
3447
3453
|
FSEv07_DTable* DTableLL, FSEv07_DTable* DTableML, FSEv07_DTable* DTableOffb, U32 flagRepeatTable,
|
|
3448
3454
|
const void* src, size_t srcSize)
|
|
3449
3455
|
{
|
|
3450
|
-
const BYTE* const istart = (const BYTE*
|
|
3456
|
+
const BYTE* const istart = (const BYTE*)src;
|
|
3451
3457
|
const BYTE* const iend = istart + srcSize;
|
|
3452
3458
|
const BYTE* ip = istart;
|
|
3453
3459
|
|
|
@@ -3666,7 +3672,7 @@ static size_t ZSTDv07_decompressSequences(
|
|
|
3666
3672
|
{
|
|
3667
3673
|
const BYTE* ip = (const BYTE*)seqStart;
|
|
3668
3674
|
const BYTE* const iend = ip + seqSize;
|
|
3669
|
-
BYTE* const ostart = (BYTE*
|
|
3675
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
3670
3676
|
BYTE* const oend = ostart + maxDstSize;
|
|
3671
3677
|
BYTE* op = ostart;
|
|
3672
3678
|
const BYTE* litPtr = dctx->litPtr;
|
|
@@ -3712,10 +3718,12 @@ static size_t ZSTDv07_decompressSequences(
|
|
|
3712
3718
|
|
|
3713
3719
|
/* last literal segment */
|
|
3714
3720
|
{ size_t const lastLLSize = litEnd - litPtr;
|
|
3715
|
-
|
|
3721
|
+
/* if (litPtr > litEnd) return ERROR(corruption_detected); */ /* too many literals already used */
|
|
3716
3722
|
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
|
|
3717
|
-
|
|
3718
|
-
|
|
3723
|
+
if (lastLLSize > 0) {
|
|
3724
|
+
memcpy(op, litPtr, lastLLSize);
|
|
3725
|
+
op += lastLLSize;
|
|
3726
|
+
}
|
|
3719
3727
|
}
|
|
3720
3728
|
|
|
3721
3729
|
return op-ostart;
|
|
@@ -3776,7 +3784,9 @@ ZSTDLIBv07_API size_t ZSTDv07_insertBlock(ZSTDv07_DCtx* dctx, const void* blockS
|
|
|
3776
3784
|
static size_t ZSTDv07_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
|
|
3777
3785
|
{
|
|
3778
3786
|
if (length > dstCapacity) return ERROR(dstSize_tooSmall);
|
|
3779
|
-
|
|
3787
|
+
if (length > 0) {
|
|
3788
|
+
memset(dst, byte, length);
|
|
3789
|
+
}
|
|
3780
3790
|
return length;
|
|
3781
3791
|
}
|
|
3782
3792
|
|
|
@@ -3789,7 +3799,7 @@ static size_t ZSTDv07_decompressFrame(ZSTDv07_DCtx* dctx,
|
|
|
3789
3799
|
{
|
|
3790
3800
|
const BYTE* ip = (const BYTE*)src;
|
|
3791
3801
|
const BYTE* const iend = ip + srcSize;
|
|
3792
|
-
BYTE* const ostart = (BYTE*
|
|
3802
|
+
BYTE* const ostart = (BYTE*)dst;
|
|
3793
3803
|
BYTE* const oend = ostart + dstCapacity;
|
|
3794
3804
|
BYTE* op = ostart;
|
|
3795
3805
|
size_t remainingSize = srcSize;
|
|
@@ -4378,7 +4388,9 @@ size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* zbd)
|
|
|
4378
4388
|
MEM_STATIC size_t ZBUFFv07_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
4379
4389
|
{
|
|
4380
4390
|
size_t const length = MIN(dstCapacity, srcSize);
|
|
4381
|
-
|
|
4391
|
+
if (length > 0) {
|
|
4392
|
+
memcpy(dst, src, length);
|
|
4393
|
+
}
|
|
4382
4394
|
return length;
|
|
4383
4395
|
}
|
|
4384
4396
|
|
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
4
4
|
|
|
5
5
|
prefix=@PREFIX@
|
|
6
|
-
|
|
6
|
+
exec_prefix=@EXEC_PREFIX@
|
|
7
7
|
includedir=@INCLUDEDIR@
|
|
8
|
+
libdir=@LIBDIR@
|
|
8
9
|
|
|
9
10
|
Name: zstd
|
|
10
11
|
Description: fast lossless compression algorithm library
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -36,6 +36,145 @@ extern "C" {
|
|
|
36
36
|
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
|
37
37
|
#endif
|
|
38
38
|
|
|
39
|
+
/*******************************************************************************
|
|
40
|
+
* Zstd dictionary builder
|
|
41
|
+
*
|
|
42
|
+
* FAQ
|
|
43
|
+
* ===
|
|
44
|
+
* Why should I use a dictionary?
|
|
45
|
+
* ------------------------------
|
|
46
|
+
*
|
|
47
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
|
48
|
+
* Traditionally small files don't compress well because there is very little
|
|
49
|
+
* repetion in a single sample, since it is small. But, if you are compressing
|
|
50
|
+
* many similar files, like a bunch of JSON records that share the same
|
|
51
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
|
52
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
|
53
|
+
* present across samples. This can vastly improve compression ratio.
|
|
54
|
+
*
|
|
55
|
+
* When is a dictionary useful?
|
|
56
|
+
* ----------------------------
|
|
57
|
+
*
|
|
58
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
|
59
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
|
60
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
|
61
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
|
62
|
+
*
|
|
63
|
+
* How do I use a dictionary?
|
|
64
|
+
* --------------------------
|
|
65
|
+
*
|
|
66
|
+
* Simply pass the dictionary to the zstd compressor with
|
|
67
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
|
68
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
|
69
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
|
70
|
+
* complete documentation.
|
|
71
|
+
*
|
|
72
|
+
* What is a zstd dictionary?
|
|
73
|
+
* --------------------------
|
|
74
|
+
*
|
|
75
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
|
76
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
|
77
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
|
78
|
+
* which really matters for small data. The content is just bytes, which are
|
|
79
|
+
* repeated content that is common across many samples.
|
|
80
|
+
*
|
|
81
|
+
* What is a raw content dictionary?
|
|
82
|
+
* ---------------------------------
|
|
83
|
+
*
|
|
84
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
|
85
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
|
86
|
+
* content dictionary.
|
|
87
|
+
*
|
|
88
|
+
* How do I train a dictionary?
|
|
89
|
+
* ----------------------------
|
|
90
|
+
*
|
|
91
|
+
* Gather samples from your use case. These samples should be similar to each
|
|
92
|
+
* other. If you have several use cases, you could try to train one dictionary
|
|
93
|
+
* per use case.
|
|
94
|
+
*
|
|
95
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
|
96
|
+
* dictionary. There are a few advanced versions of this function, but this
|
|
97
|
+
* is a great starting point. If you want to further tune your dictionary
|
|
98
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
|
99
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
|
100
|
+
*
|
|
101
|
+
* If the dictionary training function fails, that is likely because you
|
|
102
|
+
* either passed too few samples, or a dictionary would not be effective
|
|
103
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
|
104
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
|
105
|
+
*
|
|
106
|
+
* How large should my dictionary be?
|
|
107
|
+
* ----------------------------------
|
|
108
|
+
*
|
|
109
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
|
110
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
|
111
|
+
* dictionary larger than that. But, most use cases can get away with a
|
|
112
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
|
113
|
+
* shrink the dictionary for you, and select a the smallest size that
|
|
114
|
+
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
|
115
|
+
* A smaller dictionary can save memory, and potentially speed up
|
|
116
|
+
* compression.
|
|
117
|
+
*
|
|
118
|
+
* How many samples should I provide to the dictionary builder?
|
|
119
|
+
* ------------------------------------------------------------
|
|
120
|
+
*
|
|
121
|
+
* We generally recommend passing ~100x the size of the dictionary
|
|
122
|
+
* in samples. A few thousand should suffice. Having too few samples
|
|
123
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
|
124
|
+
* only improve the dictionaries effectiveness. But having too many
|
|
125
|
+
* samples can slow down the dictionary builder.
|
|
126
|
+
*
|
|
127
|
+
* How do I determine if a dictionary will be effective?
|
|
128
|
+
* -----------------------------------------------------
|
|
129
|
+
*
|
|
130
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
|
131
|
+
* benchmarking tool to test the dictionary effectiveness.
|
|
132
|
+
*
|
|
133
|
+
* # Benchmark levels 1-3 without a dictionary
|
|
134
|
+
* zstd -b1e3 -r /path/to/my/files
|
|
135
|
+
* # Benchmark levels 1-3 with a dictioanry
|
|
136
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
|
137
|
+
*
|
|
138
|
+
* When should I retrain a dictionary?
|
|
139
|
+
* -----------------------------------
|
|
140
|
+
*
|
|
141
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
|
142
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
|
143
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
|
144
|
+
* at which they decay depends on your use case. Internally, we regularly
|
|
145
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
|
146
|
+
* better than the old dictionary, we will ship the new dictionary.
|
|
147
|
+
*
|
|
148
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
|
149
|
+
* -------------------------------------------------------------------------
|
|
150
|
+
*
|
|
151
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
|
152
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
|
153
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
|
154
|
+
* provide some samples of the data. It will add the zstd header to the
|
|
155
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
|
156
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
|
157
|
+
* into the frame, if you so choose.
|
|
158
|
+
*
|
|
159
|
+
* Do I have to use zstd's dictionary builder?
|
|
160
|
+
* -------------------------------------------
|
|
161
|
+
*
|
|
162
|
+
* No! You can construct dictionary content however you please, it is just
|
|
163
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
|
164
|
+
* a zstd dictionary, which can improve compression ratio, use
|
|
165
|
+
* `ZDICT_finalizeDictionary()`.
|
|
166
|
+
*
|
|
167
|
+
* What is the attack surface of a zstd dictionary?
|
|
168
|
+
* ------------------------------------------------
|
|
169
|
+
*
|
|
170
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
|
171
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
|
172
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
|
173
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
|
174
|
+
* just like if they controlled the compressed data.
|
|
175
|
+
*
|
|
176
|
+
******************************************************************************/
|
|
177
|
+
|
|
39
178
|
|
|
40
179
|
/*! ZDICT_trainFromBuffer():
|
|
41
180
|
* Train a dictionary from an array of samples.
|
|
@@ -61,9 +200,64 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|
|
61
200
|
const void* samplesBuffer,
|
|
62
201
|
const size_t* samplesSizes, unsigned nbSamples);
|
|
63
202
|
|
|
203
|
+
typedef struct {
|
|
204
|
+
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
|
205
|
+
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
|
206
|
+
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
|
207
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
|
208
|
+
* You may use them in private settings, but be warned that they
|
|
209
|
+
* may be used by zstd in a public dictionary registry in the future.
|
|
210
|
+
* These dictionary IDs are:
|
|
211
|
+
* - low range : <= 32767
|
|
212
|
+
* - high range : >= (2^31)
|
|
213
|
+
*/
|
|
214
|
+
} ZDICT_params_t;
|
|
215
|
+
|
|
216
|
+
/*! ZDICT_finalizeDictionary():
|
|
217
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
|
218
|
+
* finalize dictionary by adding headers and statistics according to the zstd
|
|
219
|
+
* dictionary format.
|
|
220
|
+
*
|
|
221
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
|
222
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
|
223
|
+
* sample in order. The samples are used to construct the statistics, so they
|
|
224
|
+
* should be representative of what you will compress with this dictionary.
|
|
225
|
+
*
|
|
226
|
+
* The compression level can be set in `parameters`. You should pass the
|
|
227
|
+
* compression level you expect to use in production. The statistics for each
|
|
228
|
+
* compression level differ, so tuning the dictionary for the compression level
|
|
229
|
+
* can help quite a bit.
|
|
230
|
+
*
|
|
231
|
+
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
|
232
|
+
* a random dictionary ID for you, but we can't guarantee no collisions.
|
|
233
|
+
*
|
|
234
|
+
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
|
235
|
+
* appended to the end of the header. If the header + the content doesn't fit in
|
|
236
|
+
* maxDictSize the beginning of the content is truncated to make room, since it
|
|
237
|
+
* is presumed that the most profitable content is at the end of the dictionary,
|
|
238
|
+
* since that is the cheapest to reference.
|
|
239
|
+
*
|
|
240
|
+
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
|
241
|
+
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
|
242
|
+
*
|
|
243
|
+
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
|
244
|
+
* or an error code, which can be tested by ZDICT_isError().
|
|
245
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
|
246
|
+
* instructed to, using notificationLevel>0.
|
|
247
|
+
* NOTE: This function currently may fail in several edge cases including:
|
|
248
|
+
* * Not enough samples
|
|
249
|
+
* * Samples are uncompressible
|
|
250
|
+
* * Samples are all exactly the same
|
|
251
|
+
*/
|
|
252
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
|
253
|
+
const void* dictContent, size_t dictContentSize,
|
|
254
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
255
|
+
ZDICT_params_t parameters);
|
|
256
|
+
|
|
64
257
|
|
|
65
258
|
/*====== Helper functions ======*/
|
|
66
259
|
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
|
260
|
+
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
|
67
261
|
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
|
68
262
|
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
69
263
|
|
|
@@ -78,11 +272,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
|
78
272
|
* Use them only in association with static linking.
|
|
79
273
|
* ==================================================================================== */
|
|
80
274
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
|
84
|
-
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
|
85
|
-
} ZDICT_params_t;
|
|
275
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
|
276
|
+
#define ZDICT_DICTSIZE_MIN 256
|
|
86
277
|
|
|
87
278
|
/*! ZDICT_cover_params_t:
|
|
88
279
|
* k and d are the only required parameters.
|
|
@@ -198,28 +389,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
|
|
198
389
|
const size_t* samplesSizes, unsigned nbSamples,
|
|
199
390
|
ZDICT_fastCover_params_t* parameters);
|
|
200
391
|
|
|
201
|
-
/*! ZDICT_finalizeDictionary():
|
|
202
|
-
* Given a custom content as a basis for dictionary, and a set of samples,
|
|
203
|
-
* finalize dictionary by adding headers and statistics.
|
|
204
|
-
*
|
|
205
|
-
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
|
206
|
-
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
|
207
|
-
*
|
|
208
|
-
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
|
209
|
-
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
|
210
|
-
*
|
|
211
|
-
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
|
212
|
-
* or an error code, which can be tested by ZDICT_isError().
|
|
213
|
-
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
|
214
|
-
* Note 2: dictBuffer and dictContent can overlap
|
|
215
|
-
*/
|
|
216
|
-
#define ZDICT_CONTENTSIZE_MIN 128
|
|
217
|
-
#define ZDICT_DICTSIZE_MIN 256
|
|
218
|
-
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
219
|
-
const void* dictContent, size_t dictContentSize,
|
|
220
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
221
|
-
ZDICT_params_t parameters);
|
|
222
|
-
|
|
223
392
|
typedef struct {
|
|
224
393
|
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
|
225
394
|
ZDICT_params_t zParams;
|
|
@@ -241,10 +410,11 @@ typedef struct {
|
|
|
241
410
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
|
242
411
|
*/
|
|
243
412
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
|
244
|
-
void
|
|
245
|
-
const void
|
|
413
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
|
414
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
246
415
|
ZDICT_legacy_params_t parameters);
|
|
247
416
|
|
|
417
|
+
|
|
248
418
|
/* Deprecation warnings */
|
|
249
419
|
/* It is generally possible to disable deprecation warnings from compiler,
|
|
250
420
|
for example with -Wno-deprecated-declarations for gcc
|
|
@@ -256,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
|
|
256
426
|
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
|
257
427
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
|
258
428
|
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
|
259
|
-
# elif (ZDICT_GCC_VERSION >= 405)
|
|
429
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
|
260
430
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
|
261
431
|
# elif (ZDICT_GCC_VERSION >= 301)
|
|
262
432
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|