zstd-ruby 1.4.4.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -8,34 +8,184 @@
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
9
9
|
*/
|
10
10
|
|
11
|
-
#ifndef DICTBUILDER_H_001
|
12
|
-
#define DICTBUILDER_H_001
|
13
|
-
|
14
11
|
#if defined (__cplusplus)
|
15
12
|
extern "C" {
|
16
13
|
#endif
|
17
14
|
|
15
|
+
#ifndef ZSTD_ZDICT_H
|
16
|
+
#define ZSTD_ZDICT_H
|
18
17
|
|
19
18
|
/*====== Dependencies ======*/
|
20
19
|
#include <stddef.h> /* size_t */
|
21
20
|
|
22
21
|
|
23
22
|
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
|
24
|
-
#ifndef
|
25
|
-
|
26
|
-
#
|
23
|
+
#ifndef ZDICTLIB_VISIBLE
|
24
|
+
/* Backwards compatibility with old macro name */
|
25
|
+
# ifdef ZDICTLIB_VISIBILITY
|
26
|
+
# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
|
27
|
+
# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
28
|
+
# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
|
29
|
+
# else
|
30
|
+
# define ZDICTLIB_VISIBLE
|
31
|
+
# endif
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#ifndef ZDICTLIB_HIDDEN
|
35
|
+
# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
|
36
|
+
# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden")))
|
27
37
|
# else
|
28
|
-
# define
|
38
|
+
# define ZDICTLIB_HIDDEN
|
29
39
|
# endif
|
30
40
|
#endif
|
41
|
+
|
31
42
|
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
32
|
-
# define ZDICTLIB_API __declspec(dllexport)
|
43
|
+
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
|
33
44
|
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
34
|
-
# define ZDICTLIB_API __declspec(dllimport)
|
45
|
+
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
|
35
46
|
#else
|
36
|
-
# define ZDICTLIB_API
|
47
|
+
# define ZDICTLIB_API ZDICTLIB_VISIBLE
|
37
48
|
#endif
|
38
49
|
|
50
|
+
/*******************************************************************************
|
51
|
+
* Zstd dictionary builder
|
52
|
+
*
|
53
|
+
* FAQ
|
54
|
+
* ===
|
55
|
+
* Why should I use a dictionary?
|
56
|
+
* ------------------------------
|
57
|
+
*
|
58
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
59
|
+
* Traditionally small files don't compress well because there is very little
|
60
|
+
* repetition in a single sample, since it is small. But, if you are compressing
|
61
|
+
* many similar files, like a bunch of JSON records that share the same
|
62
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
63
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
64
|
+
* present across samples. This can vastly improve compression ratio.
|
65
|
+
*
|
66
|
+
* When is a dictionary useful?
|
67
|
+
* ----------------------------
|
68
|
+
*
|
69
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
70
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
71
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
72
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
73
|
+
*
|
74
|
+
* How do I use a dictionary?
|
75
|
+
* --------------------------
|
76
|
+
*
|
77
|
+
* Simply pass the dictionary to the zstd compressor with
|
78
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
79
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
80
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
81
|
+
* complete documentation.
|
82
|
+
*
|
83
|
+
* What is a zstd dictionary?
|
84
|
+
* --------------------------
|
85
|
+
*
|
86
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
87
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
88
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
89
|
+
* which really matters for small data. The content is just bytes, which are
|
90
|
+
* repeated content that is common across many samples.
|
91
|
+
*
|
92
|
+
* What is a raw content dictionary?
|
93
|
+
* ---------------------------------
|
94
|
+
*
|
95
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
96
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
97
|
+
* content dictionary.
|
98
|
+
*
|
99
|
+
* How do I train a dictionary?
|
100
|
+
* ----------------------------
|
101
|
+
*
|
102
|
+
* Gather samples from your use case. These samples should be similar to each
|
103
|
+
* other. If you have several use cases, you could try to train one dictionary
|
104
|
+
* per use case.
|
105
|
+
*
|
106
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
107
|
+
* dictionary. There are a few advanced versions of this function, but this
|
108
|
+
* is a great starting point. If you want to further tune your dictionary
|
109
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
110
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
111
|
+
*
|
112
|
+
* If the dictionary training function fails, that is likely because you
|
113
|
+
* either passed too few samples, or a dictionary would not be effective
|
114
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
115
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
116
|
+
*
|
117
|
+
* How large should my dictionary be?
|
118
|
+
* ----------------------------------
|
119
|
+
*
|
120
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
121
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
122
|
+
* dictionary larger than that. But, most use cases can get away with a
|
123
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
124
|
+
* shrink the dictionary for you, and select the smallest size that doesn't
|
125
|
+
* hurt compression ratio too much. See the `shrinkDict` parameter.
|
126
|
+
* A smaller dictionary can save memory, and potentially speed up
|
127
|
+
* compression.
|
128
|
+
*
|
129
|
+
* How many samples should I provide to the dictionary builder?
|
130
|
+
* ------------------------------------------------------------
|
131
|
+
*
|
132
|
+
* We generally recommend passing ~100x the size of the dictionary
|
133
|
+
* in samples. A few thousand should suffice. Having too few samples
|
134
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
135
|
+
* only improve the dictionaries effectiveness. But having too many
|
136
|
+
* samples can slow down the dictionary builder.
|
137
|
+
*
|
138
|
+
* How do I determine if a dictionary will be effective?
|
139
|
+
* -----------------------------------------------------
|
140
|
+
*
|
141
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
142
|
+
* benchmarking tool to test the dictionary effectiveness.
|
143
|
+
*
|
144
|
+
* # Benchmark levels 1-3 without a dictionary
|
145
|
+
* zstd -b1e3 -r /path/to/my/files
|
146
|
+
* # Benchmark levels 1-3 with a dictionary
|
147
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
148
|
+
*
|
149
|
+
* When should I retrain a dictionary?
|
150
|
+
* -----------------------------------
|
151
|
+
*
|
152
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
153
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
154
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
155
|
+
* at which they decay depends on your use case. Internally, we regularly
|
156
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
157
|
+
* better than the old dictionary, we will ship the new dictionary.
|
158
|
+
*
|
159
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
160
|
+
* -------------------------------------------------------------------------
|
161
|
+
*
|
162
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
163
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
164
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
165
|
+
* provide some samples of the data. It will add the zstd header to the
|
166
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
167
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
168
|
+
* into the frame, if you so choose.
|
169
|
+
*
|
170
|
+
* Do I have to use zstd's dictionary builder?
|
171
|
+
* -------------------------------------------
|
172
|
+
*
|
173
|
+
* No! You can construct dictionary content however you please, it is just
|
174
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
175
|
+
* a zstd dictionary, which can improve compression ratio, use
|
176
|
+
* `ZDICT_finalizeDictionary()`.
|
177
|
+
*
|
178
|
+
* What is the attack surface of a zstd dictionary?
|
179
|
+
* ------------------------------------------------
|
180
|
+
*
|
181
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
182
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
183
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
184
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
185
|
+
* just like if they controlled the compressed data.
|
186
|
+
*
|
187
|
+
******************************************************************************/
|
188
|
+
|
39
189
|
|
40
190
|
/*! ZDICT_trainFromBuffer():
|
41
191
|
* Train a dictionary from an array of samples.
|
@@ -61,15 +211,81 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|
61
211
|
const void* samplesBuffer,
|
62
212
|
const size_t* samplesSizes, unsigned nbSamples);
|
63
213
|
|
214
|
+
typedef struct {
|
215
|
+
int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */
|
216
|
+
unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
217
|
+
unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value)
|
218
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
219
|
+
* You may use them in private settings, but be warned that they
|
220
|
+
* may be used by zstd in a public dictionary registry in the future.
|
221
|
+
* These dictionary IDs are:
|
222
|
+
* - low range : <= 32767
|
223
|
+
* - high range : >= (2^31)
|
224
|
+
*/
|
225
|
+
} ZDICT_params_t;
|
226
|
+
|
227
|
+
/*! ZDICT_finalizeDictionary():
|
228
|
+
* Given a custom content as a basis for dictionary, and a set of samples,
|
229
|
+
* finalize dictionary by adding headers and statistics according to the zstd
|
230
|
+
* dictionary format.
|
231
|
+
*
|
232
|
+
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
233
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
234
|
+
* sample in order. The samples are used to construct the statistics, so they
|
235
|
+
* should be representative of what you will compress with this dictionary.
|
236
|
+
*
|
237
|
+
* The compression level can be set in `parameters`. You should pass the
|
238
|
+
* compression level you expect to use in production. The statistics for each
|
239
|
+
* compression level differ, so tuning the dictionary for the compression level
|
240
|
+
* can help quite a bit.
|
241
|
+
*
|
242
|
+
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
243
|
+
* a random dictionary ID for you, but we can't guarantee no collisions.
|
244
|
+
*
|
245
|
+
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
246
|
+
* appended to the end of the header. If the header + the content doesn't fit in
|
247
|
+
* maxDictSize the beginning of the content is truncated to make room, since it
|
248
|
+
* is presumed that the most profitable content is at the end of the dictionary,
|
249
|
+
* since that is the cheapest to reference.
|
250
|
+
*
|
251
|
+
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
252
|
+
*
|
253
|
+
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
254
|
+
* or an error code, which can be tested by ZDICT_isError().
|
255
|
+
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
256
|
+
* instructed to, using notificationLevel>0.
|
257
|
+
* NOTE: This function currently may fail in several edge cases including:
|
258
|
+
* * Not enough samples
|
259
|
+
* * Samples are uncompressible
|
260
|
+
* * Samples are all exactly the same
|
261
|
+
*/
|
262
|
+
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
263
|
+
const void* dictContent, size_t dictContentSize,
|
264
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
265
|
+
ZDICT_params_t parameters);
|
266
|
+
|
64
267
|
|
65
268
|
/*====== Helper functions ======*/
|
66
269
|
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
270
|
+
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
|
67
271
|
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
|
68
272
|
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
69
273
|
|
274
|
+
#endif /* ZSTD_ZDICT_H */
|
70
275
|
|
276
|
+
#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
|
277
|
+
#define ZSTD_ZDICT_H_STATIC
|
71
278
|
|
72
|
-
|
279
|
+
/* This can be overridden externally to hide static symbols. */
|
280
|
+
#ifndef ZDICTLIB_STATIC_API
|
281
|
+
# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
|
282
|
+
# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
|
283
|
+
# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
|
284
|
+
# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
|
285
|
+
# else
|
286
|
+
# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
|
287
|
+
# endif
|
288
|
+
#endif
|
73
289
|
|
74
290
|
/* ====================================================================================
|
75
291
|
* The definitions in this section are considered experimental.
|
@@ -78,11 +294,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
78
294
|
* Use them only in association with static linking.
|
79
295
|
* ==================================================================================== */
|
80
296
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
85
|
-
} ZDICT_params_t;
|
297
|
+
#define ZDICT_DICTSIZE_MIN 256
|
298
|
+
/* Deprecated: Remove in v1.6.0 */
|
299
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
86
300
|
|
87
301
|
/*! ZDICT_cover_params_t:
|
88
302
|
* k and d are the only required parameters.
|
@@ -127,7 +341,7 @@ typedef struct {
|
|
127
341
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
128
342
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
129
343
|
*/
|
130
|
-
|
344
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
|
131
345
|
void *dictBuffer, size_t dictBufferCapacity,
|
132
346
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
133
347
|
ZDICT_cover_params_t parameters);
|
@@ -149,7 +363,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
149
363
|
* See ZDICT_trainFromBuffer() for details on failure modes.
|
150
364
|
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
151
365
|
*/
|
152
|
-
|
366
|
+
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
153
367
|
void* dictBuffer, size_t dictBufferCapacity,
|
154
368
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
155
369
|
ZDICT_cover_params_t* parameters);
|
@@ -170,7 +384,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
170
384
|
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
171
385
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
172
386
|
*/
|
173
|
-
|
387
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
174
388
|
size_t dictBufferCapacity, const void *samplesBuffer,
|
175
389
|
const size_t *samplesSizes, unsigned nbSamples,
|
176
390
|
ZDICT_fastCover_params_t parameters);
|
@@ -193,33 +407,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
|
|
193
407
|
* See ZDICT_trainFromBuffer() for details on failure modes.
|
194
408
|
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
|
195
409
|
*/
|
196
|
-
|
410
|
+
ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
197
411
|
size_t dictBufferCapacity, const void* samplesBuffer,
|
198
412
|
const size_t* samplesSizes, unsigned nbSamples,
|
199
413
|
ZDICT_fastCover_params_t* parameters);
|
200
414
|
|
201
|
-
/*! ZDICT_finalizeDictionary():
|
202
|
-
* Given a custom content as a basis for dictionary, and a set of samples,
|
203
|
-
* finalize dictionary by adding headers and statistics.
|
204
|
-
*
|
205
|
-
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
206
|
-
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
207
|
-
*
|
208
|
-
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
209
|
-
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
210
|
-
*
|
211
|
-
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
212
|
-
* or an error code, which can be tested by ZDICT_isError().
|
213
|
-
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
214
|
-
* Note 2: dictBuffer and dictContent can overlap
|
215
|
-
*/
|
216
|
-
#define ZDICT_CONTENTSIZE_MIN 128
|
217
|
-
#define ZDICT_DICTSIZE_MIN 256
|
218
|
-
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
219
|
-
const void* dictContent, size_t dictContentSize,
|
220
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
221
|
-
ZDICT_params_t parameters);
|
222
|
-
|
223
415
|
typedef struct {
|
224
416
|
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
225
417
|
ZDICT_params_t zParams;
|
@@ -240,43 +432,43 @@ typedef struct {
|
|
240
432
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
241
433
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
242
434
|
*/
|
243
|
-
|
244
|
-
void
|
245
|
-
const void
|
435
|
+
ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
|
436
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
437
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
246
438
|
ZDICT_legacy_params_t parameters);
|
247
439
|
|
440
|
+
|
248
441
|
/* Deprecation warnings */
|
249
442
|
/* It is generally possible to disable deprecation warnings from compiler,
|
250
443
|
for example with -Wno-deprecated-declarations for gcc
|
251
444
|
or _CRT_SECURE_NO_WARNINGS in Visual.
|
252
445
|
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
253
446
|
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
|
254
|
-
# define ZDICT_DEPRECATED(message)
|
447
|
+
# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
|
255
448
|
#else
|
256
449
|
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
257
450
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
258
|
-
# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
|
259
|
-
# elif (ZDICT_GCC_VERSION >= 405)
|
260
|
-
# define ZDICT_DEPRECATED(message)
|
451
|
+
# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
|
452
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
453
|
+
# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
|
261
454
|
# elif (ZDICT_GCC_VERSION >= 301)
|
262
|
-
# define ZDICT_DEPRECATED(message)
|
455
|
+
# define ZDICT_DEPRECATED(message) __attribute__((deprecated))
|
263
456
|
# elif defined(_MSC_VER)
|
264
|
-
# define ZDICT_DEPRECATED(message)
|
457
|
+
# define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
|
265
458
|
# else
|
266
459
|
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
|
267
|
-
# define ZDICT_DEPRECATED(message)
|
460
|
+
# define ZDICT_DEPRECATED(message)
|
268
461
|
# endif
|
269
462
|
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
|
270
463
|
|
271
464
|
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
|
465
|
+
ZDICTLIB_STATIC_API
|
272
466
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
273
467
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
274
468
|
|
275
469
|
|
276
|
-
#endif /*
|
470
|
+
#endif /* ZSTD_ZDICT_H_STATIC */
|
277
471
|
|
278
472
|
#if defined (__cplusplus)
|
279
473
|
}
|
280
474
|
#endif
|
281
|
-
|
282
|
-
#endif /* DICTBUILDER_H_001 */
|