zstd-ruby 1.4.4.0 → 1.5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -8,34 +8,184 @@
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
9
9
|
*/
|
10
10
|
|
11
|
-
#ifndef DICTBUILDER_H_001
|
12
|
-
#define DICTBUILDER_H_001
|
13
|
-
|
14
11
|
#if defined (__cplusplus)
|
15
12
|
extern "C" {
|
16
13
|
#endif
|
17
14
|
|
15
|
+
#ifndef ZSTD_ZDICT_H
|
16
|
+
#define ZSTD_ZDICT_H
|
18
17
|
|
19
18
|
/*====== Dependencies ======*/
|
20
19
|
#include <stddef.h> /* size_t */
|
21
20
|
|
22
21
|
|
23
22
|
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
24
|
-
#ifndef
|
25
|
-
|
26
|
-
#
|
23
|
+
#ifndef ZDICTLIB_VISIBLE
|
24
|
+
/* Backwards compatibility with old macro name */
|
25
|
+
# ifdef ZDICTLIB_VISIBILITY
|
26
|
+
# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
|
27
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
28
|
+
# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
|
29
|
+
# else
|
30
|
+
# define ZDICTLIB_VISIBLE
|
31
|
+
# endif
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#ifndef ZDICTLIB_HIDDEN
|
35
|
+
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
36
|
+
# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden")))
|
27
37
|
# else
|
28
|
-
# define
|
38
|
+
# define ZDICTLIB_HIDDEN
|
29
39
|
# endif
|
30
40
|
#endif
|
41
|
+
|
31
42
|
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
32
|
-
# define ZDICTLIB_API __declspec(dllexport)
|
43
|
+
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
|
33
44
|
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
34
|
-
# define ZDICTLIB_API __declspec(dllimport)
|
45
|
+
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
35
46
|
#else
|
36
|
-
# define ZDICTLIB_API
|
47
|
+
# define ZDICTLIB_API ZDICTLIB_VISIBLE
|
37
48
|
#endif
|
38
49
|
|
50
|
+
/*******************************************************************************
|
51
|
+
* Zstd dictionary builder
|
52
|
+
*
|
53
|
+
* FAQ
|
54
|
+
* ===
|
55
|
+
* Why should I use a dictionary?
|
56
|
+
* ------------------------------
|
57
|
+
*
|
58
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
59
|
+
* Traditionally small files don't compress well because there is very little
|
60
|
+
* repetition in a single sample, since it is small. But, if you are compressing
|
61
|
+
* many similar files, like a bunch of JSON records that share the same
|
62
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
63
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
64
|
+
* present across samples. This can vastly improve compression ratio.
|
65
|
+
*
|
66
|
+
* When is a dictionary useful?
|
67
|
+
* ----------------------------
|
68
|
+
*
|
69
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
70
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
71
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
72
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
73
|
+
*
|
74
|
+
* How do I use a dictionary?
|
75
|
+
* --------------------------
|
76
|
+
*
|
77
|
+
* Simply pass the dictionary to the zstd compressor with
|
78
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
79
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
80
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
81
|
+
* complete documentation.
|
82
|
+
*
|
83
|
+
* What is a zstd dictionary?
|
84
|
+
* --------------------------
|
85
|
+
*
|
86
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
87
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
88
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
89
|
+
* which really matters for small data. The content is just bytes, which are
|
90
|
+
* repeated content that is common across many samples.
|
91
|
+
*
|
92
|
+
* What is a raw content dictionary?
|
93
|
+
* ---------------------------------
|
94
|
+
*
|
95
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
96
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
97
|
+
* content dictionary.
|
98
|
+
*
|
99
|
+
* How do I train a dictionary?
|
100
|
+
* ----------------------------
|
101
|
+
*
|
102
|
+
* Gather samples from your use case. These samples should be similar to each
|
103
|
+
* other. If you have several use cases, you could try to train one dictionary
|
104
|
+
* per use case.
|
105
|
+
*
|
106
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
107
|
+
* dictionary. There are a few advanced versions of this function, but this
|
108
|
+
* is a great starting point. If you want to further tune your dictionary
|
109
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
110
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
111
|
+
*
|
112
|
+
* If the dictionary training function fails, that is likely because you
|
113
|
+
* either passed too few samples, or a dictionary would not be effective
|
114
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
115
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
116
|
+
*
|
117
|
+
* How large should my dictionary be?
|
118
|
+
* ----------------------------------
|
119
|
+
*
|
120
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
121
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
122
|
+
* dictionary larger than that. But, most use cases can get away with a
|
123
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
124
|
+
* shrink the dictionary for you, and select the smallest size that doesn't
|
125
|
+
* hurt compression ratio too much. See the `shrinkDict` parameter.
|
126
|
+
* A smaller dictionary can save memory, and potentially speed up
|
127
|
+
* compression.
|
128
|
+
*
|
129
|
+
* How many samples should I provide to the dictionary builder?
|
130
|
+
* ------------------------------------------------------------
|
131
|
+
*
|
132
|
+
* We generally recommend passing ~100x the size of the dictionary
|
133
|
+
* in samples. A few thousand should suffice. Having too few samples
|
134
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
135
|
+
* only improve the dictionaries effectiveness. But having too many
|
136
|
+
* samples can slow down the dictionary builder.
|
137
|
+
*
|
138
|
+
* How do I determine if a dictionary will be effective?
|
139
|
+
* -----------------------------------------------------
|
140
|
+
*
|
141
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
142
|
+
* benchmarking tool to test the dictionary effectiveness.
|
143
|
+
*
|
144
|
+
* # Benchmark levels 1-3 without a dictionary
|
145
|
+
* zstd -b1e3 -r /path/to/my/files
|
146
|
+
* # Benchmark levels 1-3 with a dictionary
|
147
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
148
|
+
*
|
149
|
+
* When should I retrain a dictionary?
|
150
|
+
* -----------------------------------
|
151
|
+
*
|
152
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
153
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
154
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
155
|
+
* at which they decay depends on your use case. Internally, we regularly
|
156
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
157
|
+
* better than the old dictionary, we will ship the new dictionary.
|
158
|
+
*
|
159
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
160
|
+
* -------------------------------------------------------------------------
|
161
|
+
*
|
162
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
163
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
164
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
165
|
+
* provide some samples of the data. It will add the zstd header to the
|
166
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
167
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
168
|
+
* into the frame, if you so choose.
|
169
|
+
*
|
170
|
+
* Do I have to use zstd's dictionary builder?
|
171
|
+
* -------------------------------------------
|
172
|
+
*
|
173
|
+
* No! You can construct dictionary content however you please, it is just
|
174
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
175
|
+
* a zstd dictionary, which can improve compression ratio, use
|
176
|
+
* `ZDICT_finalizeDictionary()`.
|
177
|
+
*
|
178
|
+
* What is the attack surface of a zstd dictionary?
|
179
|
+
* ------------------------------------------------
|
180
|
+
*
|
181
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
182
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
183
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
184
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
185
|
+
* just like if they controlled the compressed data.
|
186
|
+
*
|
187
|
+
******************************************************************************/
|
188
|
+
|
39
189
|
|
40
190
|
/*! ZDICT_trainFromBuffer():
|
41
191
|
* Train a dictionary from an array of samples.
|
@@ -61,15 +211,81 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|
61
211
|
const void* samplesBuffer,
|
62
212
|
const size_t* samplesSizes, unsigned nbSamples);
|
63
213
|
|
214
|
+
typedef struct {
|
215
|
+
int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */
|
216
|
+
unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
217
|
+
unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value)
|
218
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
219
|
+
* You may use them in private settings, but be warned that they
|
220
|
+
* may be used by zstd in a public dictionary registry in the future.
|
221
|
+
* These dictionary IDs are:
|
222
|
+
* - low range : <= 32767
|
223
|
+
* - high range : >= (2^31)
|
224
|
+
*/
|
225
|
+
} ZDICT_params_t;
|
226
|
+
|
227
|
+
/*! ZDICT_finalizeDictionary():
|
228
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
229
|
+
* finalize dictionary by adding headers and statistics according to the zstd
|
230
|
+
* dictionary format.
|
231
|
+
*
|
232
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
233
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
234
|
+
* sample in order. The samples are used to construct the statistics, so they
|
235
|
+
* should be representative of what you will compress with this dictionary.
|
236
|
+
*
|
237
|
+
* The compression level can be set in `parameters`. You should pass the
|
238
|
+
* compression level you expect to use in production. The statistics for each
|
239
|
+
* compression level differ, so tuning the dictionary for the compression level
|
240
|
+
* can help quite a bit.
|
241
|
+
*
|
242
|
+
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
243
|
+
* a random dictionary ID for you, but we can't guarantee no collisions.
|
244
|
+
*
|
245
|
+
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
246
|
+
* appended to the end of the header. If the header + the content doesn't fit in
|
247
|
+
* maxDictSize the beginning of the content is truncated to make room, since it
|
248
|
+
* is presumed that the most profitable content is at the end of the dictionary,
|
249
|
+
* since that is the cheapest to reference.
|
250
|
+
*
|
251
|
+
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
252
|
+
*
|
253
|
+
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
254
|
+
* or an error code, which can be tested by ZDICT_isError().
|
255
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
256
|
+
* instructed to, using notificationLevel>0.
|
257
|
+
* NOTE: This function currently may fail in several edge cases including:
|
258
|
+
* * Not enough samples
|
259
|
+
* * Samples are uncompressible
|
260
|
+
* * Samples are all exactly the same
|
261
|
+
*/
|
262
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
263
|
+
const void* dictContent, size_t dictContentSize,
|
264
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
265
|
+
ZDICT_params_t parameters);
|
266
|
+
|
64
267
|
|
65
268
|
/*====== Helper functions ======*/
|
66
269
|
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
270
|
+
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
67
271
|
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
68
272
|
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
69
273
|
|
274
|
+
#endif /* ZSTD_ZDICT_H */
|
70
275
|
|
276
|
+
#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
|
277
|
+
#define ZSTD_ZDICT_H_STATIC
|
71
278
|
|
72
|
-
|
279
|
+
/* This can be overridden externally to hide static symbols. */
|
280
|
+
#ifndef ZDICTLIB_STATIC_API
|
281
|
+
# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
282
|
+
# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
|
283
|
+
# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
284
|
+
# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
|
285
|
+
# else
|
286
|
+
# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
|
287
|
+
# endif
|
288
|
+
#endif
|
73
289
|
|
74
290
|
/* ====================================================================================
|
75
291
|
* The definitions in this section are considered experimental.
|
@@ -78,11 +294,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
78
294
|
* Use them only in association with static linking.
|
79
295
|
* ==================================================================================== */
|
80
296
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
85
|
-
} ZDICT_params_t;
|
297
|
+
#define ZDICT_DICTSIZE_MIN 256
|
298
|
+
/* Deprecated: Remove in v1.6.0 */
|
299
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
86
300
|
|
87
301
|
/*! ZDICT_cover_params_t:
|
88
302
|
* k and d are the only required parameters.
|
@@ -127,7 +341,7 @@ typedef struct {
|
|
127
341
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
128
342
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
129
343
|
*/
|
130
|
-
|
344
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
|
131
345
|
void *dictBuffer, size_t dictBufferCapacity,
|
132
346
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
133
347
|
ZDICT_cover_params_t parameters);
|
@@ -149,7 +363,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
149
363
|
* See ZDICT_trainFromBuffer() for details on failure modes.
|
150
364
|
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
151
365
|
*/
|
152
|
-
|
366
|
+
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
153
367
|
void* dictBuffer, size_t dictBufferCapacity,
|
154
368
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
155
369
|
ZDICT_cover_params_t* parameters);
|
@@ -170,7 +384,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
170
384
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
171
385
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
172
386
|
*/
|
173
|
-
|
387
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
174
388
|
size_t dictBufferCapacity, const void *samplesBuffer,
|
175
389
|
const size_t *samplesSizes, unsigned nbSamples,
|
176
390
|
ZDICT_fastCover_params_t parameters);
|
@@ -193,33 +407,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
|
193
407
|
* See ZDICT_trainFromBuffer() for details on failure modes.
|
194
408
|
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
195
409
|
*/
|
196
|
-
|
410
|
+
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
197
411
|
size_t dictBufferCapacity, const void* samplesBuffer,
|
198
412
|
const size_t* samplesSizes, unsigned nbSamples,
|
199
413
|
ZDICT_fastCover_params_t* parameters);
|
200
414
|
|
201
|
-
/*! ZDICT_finalizeDictionary():
|
202
|
-
* Given a custom content as a basis for dictionary, and a set of samples,
|
203
|
-
* finalize dictionary by adding headers and statistics.
|
204
|
-
*
|
205
|
-
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
206
|
-
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
207
|
-
*
|
208
|
-
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
209
|
-
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
210
|
-
*
|
211
|
-
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
212
|
-
* or an error code, which can be tested by ZDICT_isError().
|
213
|
-
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
214
|
-
* Note 2: dictBuffer and dictContent can overlap
|
215
|
-
*/
|
216
|
-
#define ZDICT_CONTENTSIZE_MIN 128
|
217
|
-
#define ZDICT_DICTSIZE_MIN 256
|
218
|
-
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
219
|
-
const void* dictContent, size_t dictContentSize,
|
220
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
221
|
-
ZDICT_params_t parameters);
|
222
|
-
|
223
415
|
typedef struct {
|
224
416
|
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
225
417
|
ZDICT_params_t zParams;
|
@@ -240,43 +432,43 @@ typedef struct {
|
|
240
432
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
241
433
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
242
434
|
*/
|
243
|
-
|
244
|
-
void
|
245
|
-
const void
|
435
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
|
436
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
437
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
246
438
|
ZDICT_legacy_params_t parameters);
|
247
439
|
|
440
|
+
|
248
441
|
/* Deprecation warnings */
|
249
442
|
/* It is generally possible to disable deprecation warnings from compiler,
|
250
443
|
for example with -Wno-deprecated-declarations for gcc
|
251
444
|
or _CRT_SECURE_NO_WARNINGS in Visual.
|
252
445
|
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
253
446
|
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
254
|
-
# define ZDICT_DEPRECATED(message)
|
447
|
+
# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
|
255
448
|
#else
|
256
449
|
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
257
450
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
258
|
-
# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
|
259
|
-
# elif (ZDICT_GCC_VERSION >= 405)
|
260
|
-
# define ZDICT_DEPRECATED(message)
|
451
|
+
# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
|
452
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
453
|
+
# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
|
261
454
|
# elif (ZDICT_GCC_VERSION >= 301)
|
262
|
-
# define ZDICT_DEPRECATED(message)
|
455
|
+
# define ZDICT_DEPRECATED(message) __attribute__((deprecated))
|
263
456
|
# elif defined(_MSC_VER)
|
264
|
-
# define ZDICT_DEPRECATED(message)
|
457
|
+
# define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
|
265
458
|
# else
|
266
459
|
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
267
|
-
# define ZDICT_DEPRECATED(message)
|
460
|
+
# define ZDICT_DEPRECATED(message)
|
268
461
|
# endif
|
269
462
|
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
270
463
|
|
271
464
|
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
465
|
+
ZDICTLIB_STATIC_API
|
272
466
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
273
467
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
274
468
|
|
275
469
|
|
276
|
-
#endif /*
|
470
|
+
#endif /* ZSTD_ZDICT_H_STATIC */
|
277
471
|
|
278
472
|
#if defined (__cplusplus)
|
279
473
|
}
|
280
474
|
#endif
|
281
|
-
|
282
|
-
#endif /* DICTBUILDER_H_001 */
|