zstdlib 0.3.0-x64-mingw32 → 0.8.0-x64-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +30 -1
- data/README.md +2 -2
- data/Rakefile +1 -1
- data/ext/zstdlib/extconf.rb +3 -3
- data/ext/zstdlib/ruby/zlib-2.7/zstdlib.c +4895 -0
- data/ext/zstdlib/ruby/zlib-3.0/zstdlib.c +4994 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/bitstream.h +59 -51
- data/ext/zstdlib/zstd-1.5.0/lib/common/compiler.h +289 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/cpu.h +1 -3
- data/ext/zstdlib/zstd-1.5.0/lib/common/debug.c +24 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/debug.h +22 -49
- data/ext/zstdlib/zstd-1.5.0/lib/common/entropy_common.c +362 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.c +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/error_private.h +8 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse.h +50 -42
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/fse_decompress.c +149 -55
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/huf.h +43 -39
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/mem.h +69 -25
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.c +30 -20
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/pool.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.c +51 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/threading.h +36 -4
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.c +40 -92
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/xxhash.h +12 -32
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/common/zstd_common.c +10 -10
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_deps.h +111 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_internal.h +490 -0
- data/ext/zstdlib/zstd-1.5.0/lib/common/zstd_trace.h +154 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/fse_compress.c +47 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.c +41 -63
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/hist.h +13 -33
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/huf_compress.c +332 -193
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress.c +6393 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_internal.h +522 -86
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.c +25 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_literals.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.c +50 -24
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_compress_sequences.h +11 -4
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.c +572 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_cwksp.h +662 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.c +43 -41
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_double_fast.h +2 -2
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.c +85 -80
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_fast.h +2 -2
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.c +2184 -0
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_lazy.h +125 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.c +333 -208
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_ldm.h +15 -3
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.c +228 -129
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstd_opt.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/compress/zstdmt_compress.c +151 -440
- data/ext/zstdlib/zstd-1.5.0/lib/compress/zstdmt_compress.h +110 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/huf_decompress.c +395 -276
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.c +20 -16
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_ddict.h +3 -3
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress.c +628 -231
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.c +606 -380
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/decompress/zstd_decompress_internal.h +39 -9
- data/ext/zstdlib/zstd-1.5.0/lib/zdict.h +452 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/lib/zstd.h +740 -153
- data/ext/zstdlib/{zstd-1.4.2/lib/common → zstd-1.5.0/lib}/zstd_errors.h +3 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzclose.c +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzcompatibility.h +1 -1
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzlib.c +9 -9
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzread.c +16 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/gzwrite.c +8 -8
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.c +131 -45
- data/ext/zstdlib/{zstd-1.4.2 → zstd-1.5.0}/zlibWrapper/zstd_zlibwrapper.h +1 -1
- data/lib/2.2/zstdlib.so +0 -0
- data/lib/2.3/zstdlib.so +0 -0
- data/lib/2.4/zstdlib.so +0 -0
- data/lib/2.5/zstdlib.so +0 -0
- data/lib/2.6/zstdlib.so +0 -0
- data/lib/2.7/zstdlib.so +0 -0
- metadata +76 -67
- data/ext/zstdlib/zstd-1.4.2/lib/common/compiler.h +0 -147
- data/ext/zstdlib/zstd-1.4.2/lib/common/debug.c +0 -44
- data/ext/zstdlib/zstd-1.4.2/lib/common/entropy_common.c +0 -236
- data/ext/zstdlib/zstd-1.4.2/lib/common/zstd_internal.h +0 -371
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress.c +0 -3904
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.c +0 -1111
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_lazy.h +0 -67
- data/ext/zstdlib/zstd-1.4.2/lib/compress/zstdmt_compress.h +0 -192
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -15,9 +15,9 @@
|
|
15
15
|
/*-*******************************************************
|
16
16
|
* Dependencies
|
17
17
|
*********************************************************/
|
18
|
-
#include
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
18
|
+
#include "../common/zstd_deps.h" /* size_t */
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
22
22
|
|
23
23
|
|
@@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
|
48
48
|
* this function must be called with valid parameters only
|
49
49
|
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
50
50
|
* in which case it cannot fail.
|
51
|
+
* The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
|
52
|
+
* defined in zstd_decompress_internal.h.
|
51
53
|
* Internal use only.
|
52
54
|
*/
|
53
55
|
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
54
56
|
const short* normalizedCounter, unsigned maxSymbolValue,
|
55
57
|
const U32* baseValue, const U32* nbAdditionalBits,
|
56
|
-
unsigned tableLog
|
58
|
+
unsigned tableLog, void* wksp, size_t wkspSize,
|
59
|
+
int bmi2);
|
57
60
|
|
58
61
|
|
59
62
|
#endif /* ZSTD_DEC_BLOCK_H */
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -19,34 +19,34 @@
|
|
19
19
|
/*-*******************************************************
|
20
20
|
* Dependencies
|
21
21
|
*********************************************************/
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
23
|
-
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
|
+
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
24
24
|
|
25
25
|
|
26
26
|
|
27
27
|
/*-*******************************************************
|
28
28
|
* Constants
|
29
29
|
*********************************************************/
|
30
|
-
static const U32 LL_base[MaxLL+1] = {
|
30
|
+
static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
|
31
31
|
0, 1, 2, 3, 4, 5, 6, 7,
|
32
32
|
8, 9, 10, 11, 12, 13, 14, 15,
|
33
33
|
16, 18, 20, 22, 24, 28, 32, 40,
|
34
34
|
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
35
|
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
36
|
|
37
|
-
static const U32 OF_base[MaxOff+1] = {
|
37
|
+
static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
|
38
38
|
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
39
|
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
40
|
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
41
|
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
42
|
|
43
|
-
static const U32 OF_bits[MaxOff+1] = {
|
43
|
+
static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
|
44
44
|
0, 1, 2, 3, 4, 5, 6, 7,
|
45
45
|
8, 9, 10, 11, 12, 13, 14, 15,
|
46
46
|
16, 17, 18, 19, 20, 21, 22, 23,
|
47
47
|
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
48
|
|
49
|
-
static const U32 ML_base[MaxML+1] = {
|
49
|
+
static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
|
50
50
|
3, 4, 5, 6, 7, 8, 9, 10,
|
51
51
|
11, 12, 13, 14, 15, 16, 17, 18,
|
52
52
|
19, 20, 21, 22, 23, 24, 25, 26,
|
@@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = {
|
|
73
73
|
|
74
74
|
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
75
|
|
76
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
|
77
|
+
#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
|
78
|
+
|
76
79
|
typedef struct {
|
77
80
|
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
81
|
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
82
|
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
83
|
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
81
84
|
U32 rep[ZSTD_REP_NUM];
|
85
|
+
U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
|
82
86
|
} ZSTD_entropyDTables_t;
|
83
87
|
|
84
88
|
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
@@ -95,6 +99,13 @@ typedef enum {
|
|
95
99
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
96
100
|
} ZSTD_dictUses_e;
|
97
101
|
|
102
|
+
/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
|
103
|
+
typedef struct {
|
104
|
+
const ZSTD_DDict** ddictPtrTable;
|
105
|
+
size_t ddictPtrTableSize;
|
106
|
+
size_t ddictPtrCount;
|
107
|
+
} ZSTD_DDictHashSet;
|
108
|
+
|
98
109
|
struct ZSTD_DCtx_s
|
99
110
|
{
|
100
111
|
const ZSTD_seqSymbol* LLTptr;
|
@@ -109,6 +120,7 @@ struct ZSTD_DCtx_s
|
|
109
120
|
const void* dictEnd; /* end of previous segment */
|
110
121
|
size_t expected;
|
111
122
|
ZSTD_frameHeader fParams;
|
123
|
+
U64 processedCSize;
|
112
124
|
U64 decodedSize;
|
113
125
|
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
114
126
|
ZSTD_dStage stage;
|
@@ -117,6 +129,8 @@ struct ZSTD_DCtx_s
|
|
117
129
|
XXH64_state_t xxhState;
|
118
130
|
size_t headerSize;
|
119
131
|
ZSTD_format_e format;
|
132
|
+
ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
|
133
|
+
U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
|
120
134
|
const BYTE* litPtr;
|
121
135
|
ZSTD_customMem customMem;
|
122
136
|
size_t litSize;
|
@@ -130,6 +144,8 @@ struct ZSTD_DCtx_s
|
|
130
144
|
U32 dictID;
|
131
145
|
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
132
146
|
ZSTD_dictUses_e dictUses;
|
147
|
+
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
148
|
+
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
133
149
|
|
134
150
|
/* streaming */
|
135
151
|
ZSTD_dStreamStage streamStage;
|
@@ -147,10 +163,24 @@ struct ZSTD_DCtx_s
|
|
147
163
|
U32 legacyVersion;
|
148
164
|
U32 hostageByte;
|
149
165
|
int noForwardProgress;
|
166
|
+
ZSTD_bufferMode_e outBufferMode;
|
167
|
+
ZSTD_outBuffer expectedOutBuffer;
|
150
168
|
|
151
169
|
/* workspace */
|
152
170
|
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
153
171
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
172
|
+
|
173
|
+
size_t oversizedDuration;
|
174
|
+
|
175
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
176
|
+
void const* dictContentBeginForFuzzing;
|
177
|
+
void const* dictContentEndForFuzzing;
|
178
|
+
#endif
|
179
|
+
|
180
|
+
/* Tracing */
|
181
|
+
#if ZSTD_TRACE
|
182
|
+
ZSTD_TraceCtx traceCtx;
|
183
|
+
#endif
|
154
184
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
155
185
|
|
156
186
|
|
@@ -160,7 +190,7 @@ struct ZSTD_DCtx_s
|
|
160
190
|
|
161
191
|
/*! ZSTD_loadDEntropy() :
|
162
192
|
* dict : must point at beginning of a valid zstd dictionary.
|
163
|
-
* @return : size of entropy tables
|
193
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
164
194
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
165
195
|
const void* const dict, size_t const dictSize);
|
166
196
|
|
@@ -169,7 +199,7 @@ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
169
199
|
* If yes, do nothing (continue on current segment).
|
170
200
|
* If not, classify previous segment as "external dictionary", and start a new segment.
|
171
201
|
* This function cannot fail. */
|
172
|
-
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
202
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
|
173
203
|
|
174
204
|
|
175
205
|
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -0,0 +1,452 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
#ifndef DICTBUILDER_H_001
|
12
|
+
#define DICTBUILDER_H_001
|
13
|
+
|
14
|
+
#if defined (__cplusplus)
|
15
|
+
extern "C" {
|
16
|
+
#endif
|
17
|
+
|
18
|
+
|
19
|
+
/*====== Dependencies ======*/
|
20
|
+
#include <stddef.h> /* size_t */
|
21
|
+
|
22
|
+
|
23
|
+
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
24
|
+
#ifndef ZDICTLIB_VISIBILITY
|
25
|
+
# if defined(__GNUC__) && (__GNUC__ >= 4)
|
26
|
+
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
|
27
|
+
# else
|
28
|
+
# define ZDICTLIB_VISIBILITY
|
29
|
+
# endif
|
30
|
+
#endif
|
31
|
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
32
|
+
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
|
33
|
+
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
34
|
+
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
35
|
+
#else
|
36
|
+
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
37
|
+
#endif
|
38
|
+
|
39
|
+
/*******************************************************************************
|
40
|
+
* Zstd dictionary builder
|
41
|
+
*
|
42
|
+
* FAQ
|
43
|
+
* ===
|
44
|
+
* Why should I use a dictionary?
|
45
|
+
* ------------------------------
|
46
|
+
*
|
47
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
48
|
+
* Traditionally small files don't compress well because there is very little
|
49
|
+
* repetion in a single sample, since it is small. But, if you are compressing
|
50
|
+
* many similar files, like a bunch of JSON records that share the same
|
51
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
52
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
53
|
+
* present across samples. This can vastly improve compression ratio.
|
54
|
+
*
|
55
|
+
* When is a dictionary useful?
|
56
|
+
* ----------------------------
|
57
|
+
*
|
58
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
59
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
60
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
61
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
62
|
+
*
|
63
|
+
* How do I use a dictionary?
|
64
|
+
* --------------------------
|
65
|
+
*
|
66
|
+
* Simply pass the dictionary to the zstd compressor with
|
67
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
68
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
69
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
70
|
+
* complete documentation.
|
71
|
+
*
|
72
|
+
* What is a zstd dictionary?
|
73
|
+
* --------------------------
|
74
|
+
*
|
75
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
76
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
77
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
78
|
+
* which really matters for small data. The content is just bytes, which are
|
79
|
+
* repeated content that is common across many samples.
|
80
|
+
*
|
81
|
+
* What is a raw content dictionary?
|
82
|
+
* ---------------------------------
|
83
|
+
*
|
84
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
85
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
86
|
+
* content dictionary.
|
87
|
+
*
|
88
|
+
* How do I train a dictionary?
|
89
|
+
* ----------------------------
|
90
|
+
*
|
91
|
+
* Gather samples from your use case. These samples should be similar to each
|
92
|
+
* other. If you have several use cases, you could try to train one dictionary
|
93
|
+
* per use case.
|
94
|
+
*
|
95
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
96
|
+
* dictionary. There are a few advanced versions of this function, but this
|
97
|
+
* is a great starting point. If you want to further tune your dictionary
|
98
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
99
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
100
|
+
*
|
101
|
+
* If the dictionary training function fails, that is likely because you
|
102
|
+
* either passed too few samples, or a dictionary would not be effective
|
103
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
104
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
105
|
+
*
|
106
|
+
* How large should my dictionary be?
|
107
|
+
* ----------------------------------
|
108
|
+
*
|
109
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
110
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
111
|
+
* dictionary larger than that. But, most use cases can get away with a
|
112
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
113
|
+
* shrink the dictionary for you, and select a the smallest size that
|
114
|
+
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
115
|
+
* A smaller dictionary can save memory, and potentially speed up
|
116
|
+
* compression.
|
117
|
+
*
|
118
|
+
* How many samples should I provide to the dictionary builder?
|
119
|
+
* ------------------------------------------------------------
|
120
|
+
*
|
121
|
+
* We generally recommend passing ~100x the size of the dictionary
|
122
|
+
* in samples. A few thousand should suffice. Having too few samples
|
123
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
124
|
+
* only improve the dictionaries effectiveness. But having too many
|
125
|
+
* samples can slow down the dictionary builder.
|
126
|
+
*
|
127
|
+
* How do I determine if a dictionary will be effective?
|
128
|
+
* -----------------------------------------------------
|
129
|
+
*
|
130
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
131
|
+
* benchmarking tool to test the dictionary effectiveness.
|
132
|
+
*
|
133
|
+
* # Benchmark levels 1-3 without a dictionary
|
134
|
+
* zstd -b1e3 -r /path/to/my/files
|
135
|
+
* # Benchmark levels 1-3 with a dictioanry
|
136
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
137
|
+
*
|
138
|
+
* When should I retrain a dictionary?
|
139
|
+
* -----------------------------------
|
140
|
+
*
|
141
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
142
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
143
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
144
|
+
* at which they decay depends on your use case. Internally, we regularly
|
145
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
146
|
+
* better than the old dictionary, we will ship the new dictionary.
|
147
|
+
*
|
148
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
149
|
+
* -------------------------------------------------------------------------
|
150
|
+
*
|
151
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
152
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
153
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
154
|
+
* provide some samples of the data. It will add the zstd header to the
|
155
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
156
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
157
|
+
* into the frame, if you so choose.
|
158
|
+
*
|
159
|
+
* Do I have to use zstd's dictionary builder?
|
160
|
+
* -------------------------------------------
|
161
|
+
*
|
162
|
+
* No! You can construct dictionary content however you please, it is just
|
163
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
164
|
+
* a zstd dictionary, which can improve compression ratio, use
|
165
|
+
* `ZDICT_finalizeDictionary()`.
|
166
|
+
*
|
167
|
+
* What is the attack surface of a zstd dictionary?
|
168
|
+
* ------------------------------------------------
|
169
|
+
*
|
170
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
171
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
172
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
173
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
174
|
+
* just like if they controlled the compressed data.
|
175
|
+
*
|
176
|
+
******************************************************************************/
|
177
|
+
|
178
|
+
|
179
|
+
/*! ZDICT_trainFromBuffer():
|
180
|
+
* Train a dictionary from an array of samples.
|
181
|
+
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
|
182
|
+
* f=20, and accel=1.
|
183
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
184
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
185
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
186
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
187
|
+
* or an error code, which can be tested with ZDICT_isError().
|
188
|
+
* Note: Dictionary training will fail if there are not enough samples to construct a
|
189
|
+
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
|
190
|
+
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
|
191
|
+
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
|
192
|
+
* please open an issue with details, and we can look into it.
|
193
|
+
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
|
194
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
195
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
196
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
197
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
198
|
+
*/
|
199
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
200
|
+
const void* samplesBuffer,
|
201
|
+
const size_t* samplesSizes, unsigned nbSamples);
|
202
|
+
|
203
|
+
typedef struct {
|
204
|
+
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
205
|
+
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
206
|
+
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
207
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
208
|
+
* You may use them in private settings, but be warned that they
|
209
|
+
* may be used by zstd in a public dictionary registry in the future.
|
210
|
+
* These dictionary IDs are:
|
211
|
+
* - low range : <= 32767
|
212
|
+
* - high range : >= (2^31)
|
213
|
+
*/
|
214
|
+
} ZDICT_params_t;
|
215
|
+
|
216
|
+
/*! ZDICT_finalizeDictionary():
|
217
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
218
|
+
* finalize dictionary by adding headers and statistics according to the zstd
|
219
|
+
* dictionary format.
|
220
|
+
*
|
221
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
222
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
223
|
+
* sample in order. The samples are used to construct the statistics, so they
|
224
|
+
* should be representative of what you will compress with this dictionary.
|
225
|
+
*
|
226
|
+
* The compression level can be set in `parameters`. You should pass the
|
227
|
+
* compression level you expect to use in production. The statistics for each
|
228
|
+
* compression level differ, so tuning the dictionary for the compression level
|
229
|
+
* can help quite a bit.
|
230
|
+
*
|
231
|
+
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
232
|
+
* a random dictionary ID for you, but we can't guarantee no collisions.
|
233
|
+
*
|
234
|
+
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
235
|
+
* appended to the end of the header. If the header + the content doesn't fit in
|
236
|
+
* maxDictSize the beginning of the content is truncated to make room, since it
|
237
|
+
* is presumed that the most profitable content is at the end of the dictionary,
|
238
|
+
* since that is the cheapest to reference.
|
239
|
+
*
|
240
|
+
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
241
|
+
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
242
|
+
*
|
243
|
+
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
244
|
+
* or an error code, which can be tested by ZDICT_isError().
|
245
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
246
|
+
* instructed to, using notificationLevel>0.
|
247
|
+
* NOTE: This function currently may fail in several edge cases including:
|
248
|
+
* * Not enough samples
|
249
|
+
* * Samples are uncompressible
|
250
|
+
* * Samples are all exactly the same
|
251
|
+
*/
|
252
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
253
|
+
const void* dictContent, size_t dictContentSize,
|
254
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
255
|
+
ZDICT_params_t parameters);
|
256
|
+
|
257
|
+
|
258
|
+
/*====== Helper functions ======*/
|
259
|
+
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
260
|
+
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
261
|
+
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
262
|
+
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
#ifdef ZDICT_STATIC_LINKING_ONLY
|
267
|
+
|
268
|
+
/* ====================================================================================
|
269
|
+
* The definitions in this section are considered experimental.
|
270
|
+
* They should never be used with a dynamic library, as they may change in the future.
|
271
|
+
* They are provided for advanced usages.
|
272
|
+
* Use them only in association with static linking.
|
273
|
+
* ==================================================================================== */
|
274
|
+
|
275
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
276
|
+
#define ZDICT_DICTSIZE_MIN 256
|
277
|
+
|
278
|
+
/*! ZDICT_cover_params_t:
|
279
|
+
* k and d are the only required parameters.
|
280
|
+
* For others, value 0 means default.
|
281
|
+
*/
|
282
|
+
typedef struct {
|
283
|
+
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
284
|
+
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
285
|
+
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
286
|
+
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
287
|
+
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
|
288
|
+
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
|
289
|
+
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
|
290
|
+
ZDICT_params_t zParams;
|
291
|
+
} ZDICT_cover_params_t;
|
292
|
+
|
293
|
+
typedef struct {
|
294
|
+
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
295
|
+
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
296
|
+
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
|
297
|
+
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
|
298
|
+
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
|
299
|
+
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
|
300
|
+
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
|
301
|
+
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
|
302
|
+
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
|
303
|
+
|
304
|
+
ZDICT_params_t zParams;
|
305
|
+
} ZDICT_fastCover_params_t;
|
306
|
+
|
307
|
+
/*! ZDICT_trainFromBuffer_cover():
|
308
|
+
* Train a dictionary from an array of samples using the COVER algorithm.
|
309
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
310
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
311
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
312
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
313
|
+
* or an error code, which can be tested with ZDICT_isError().
|
314
|
+
* See ZDICT_trainFromBuffer() for details on failure modes.
|
315
|
+
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
316
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
317
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
318
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
319
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
320
|
+
*/
|
321
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
322
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
323
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
324
|
+
ZDICT_cover_params_t parameters);
|
325
|
+
|
326
|
+
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
327
|
+
* The same requirements as above hold for all the parameters except `parameters`.
|
328
|
+
* This function tries many parameter combinations and picks the best parameters.
|
329
|
+
* `*parameters` is filled with the best parameters found,
|
330
|
+
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
331
|
+
*
|
332
|
+
* All of the parameters d, k, steps are optional.
|
333
|
+
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
|
334
|
+
* if steps is zero it defaults to its default value.
|
335
|
+
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
336
|
+
*
|
337
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
338
|
+
* or an error code, which can be tested with ZDICT_isError().
|
339
|
+
* On success `*parameters` contains the parameters selected.
|
340
|
+
* See ZDICT_trainFromBuffer() for details on failure modes.
|
341
|
+
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
342
|
+
*/
|
343
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
344
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
345
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
346
|
+
ZDICT_cover_params_t* parameters);
|
347
|
+
|
348
|
+
/*! ZDICT_trainFromBuffer_fastCover():
|
349
|
+
* Train a dictionary from an array of samples using a modified version of COVER algorithm.
|
350
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
351
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
352
|
+
* d and k are required.
|
353
|
+
* All other parameters are optional, will use default values if not provided
|
354
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
355
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
356
|
+
* or an error code, which can be tested with ZDICT_isError().
|
357
|
+
* See ZDICT_trainFromBuffer() for details on failure modes.
|
358
|
+
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
|
359
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
360
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
361
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
362
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
363
|
+
*/
|
364
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
365
|
+
size_t dictBufferCapacity, const void *samplesBuffer,
|
366
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
367
|
+
ZDICT_fastCover_params_t parameters);
|
368
|
+
|
369
|
+
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
|
370
|
+
* The same requirements as above hold for all the parameters except `parameters`.
|
371
|
+
* This function tries many parameter combinations (specifically, k and d combinations)
|
372
|
+
* and picks the best parameters. `*parameters` is filled with the best parameters found,
|
373
|
+
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
374
|
+
* All of the parameters d, k, steps, f, and accel are optional.
|
375
|
+
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
|
376
|
+
* if steps is zero it defaults to its default value.
|
377
|
+
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
|
378
|
+
* If f is zero, default value of 20 is used.
|
379
|
+
* If accel is zero, default value of 1 is used.
|
380
|
+
*
|
381
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
382
|
+
* or an error code, which can be tested with ZDICT_isError().
|
383
|
+
* On success `*parameters` contains the parameters selected.
|
384
|
+
* See ZDICT_trainFromBuffer() for details on failure modes.
|
385
|
+
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
386
|
+
*/
|
387
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
388
|
+
size_t dictBufferCapacity, const void* samplesBuffer,
|
389
|
+
const size_t* samplesSizes, unsigned nbSamples,
|
390
|
+
ZDICT_fastCover_params_t* parameters);
|
391
|
+
|
392
|
+
typedef struct {
|
393
|
+
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
394
|
+
ZDICT_params_t zParams;
|
395
|
+
} ZDICT_legacy_params_t;
|
396
|
+
|
397
|
+
/*! ZDICT_trainFromBuffer_legacy():
|
398
|
+
* Train a dictionary from an array of samples.
|
399
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
400
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
401
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
402
|
+
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
403
|
+
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
404
|
+
* or an error code, which can be tested with ZDICT_isError().
|
405
|
+
* See ZDICT_trainFromBuffer() for details on failure modes.
|
406
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
407
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
408
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
409
|
+
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
410
|
+
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
411
|
+
*/
|
412
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
413
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
414
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
415
|
+
ZDICT_legacy_params_t parameters);
|
416
|
+
|
417
|
+
|
418
|
+
/* Deprecation warnings */
|
419
|
+
/* It is generally possible to disable deprecation warnings from compiler,
|
420
|
+
for example with -Wno-deprecated-declarations for gcc
|
421
|
+
or _CRT_SECURE_NO_WARNINGS in Visual.
|
422
|
+
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
423
|
+
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
424
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
|
425
|
+
#else
|
426
|
+
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
427
|
+
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
428
|
+
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
429
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
430
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
431
|
+
# elif (ZDICT_GCC_VERSION >= 301)
|
432
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|
433
|
+
# elif defined(_MSC_VER)
|
434
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
|
435
|
+
# else
|
436
|
+
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
437
|
+
# define ZDICT_DEPRECATED(message) ZDICTLIB_API
|
438
|
+
# endif
|
439
|
+
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
440
|
+
|
441
|
+
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
442
|
+
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
443
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
444
|
+
|
445
|
+
|
446
|
+
#endif /* ZDICT_STATIC_LINKING_ONLY */
|
447
|
+
|
448
|
+
#if defined (__cplusplus)
|
449
|
+
}
|
450
|
+
#endif
|
451
|
+
|
452
|
+
#endif /* DICTBUILDER_H_001 */
|