zstd-ruby 1.4.5.0 → 1.5.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +2 -1
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +225 -222
- data/ext/zstdruby/libzstd/README.md +43 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
- data/ext/zstdruby/libzstd/common/compiler.h +182 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +41 -12
- data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
- data/ext/zstdruby/libzstd/common/huf.h +47 -23
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +6 -5
- data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
- data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
- data/ext/zstdruby/libzstd/zstd.h +699 -214
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +15 -6
- data/.travis.yml +0 -14
@@ -3,13 +3,14 @@
|
|
3
3
|
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
4
4
|
|
5
5
|
prefix=@PREFIX@
|
6
|
-
exec_prefix
|
7
|
-
includedir
|
8
|
-
libdir
|
6
|
+
exec_prefix=@EXEC_PREFIX@
|
7
|
+
includedir=@INCLUDEDIR@
|
8
|
+
libdir=@LIBDIR@
|
9
9
|
|
10
10
|
Name: zstd
|
11
11
|
Description: fast lossless compression algorithm library
|
12
12
|
URL: http://www.zstd.net/
|
13
13
|
Version: @VERSION@
|
14
14
|
Libs: -L${libdir} -lzstd
|
15
|
+
Libs.private: @LIBS_PRIVATE@
|
15
16
|
Cflags: -I${includedir}
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -36,6 +36,145 @@ extern "C" {
|
|
36
36
|
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
37
37
|
#endif
|
38
38
|
|
39
|
+
/*******************************************************************************
|
40
|
+
* Zstd dictionary builder
|
41
|
+
*
|
42
|
+
* FAQ
|
43
|
+
* ===
|
44
|
+
* Why should I use a dictionary?
|
45
|
+
* ------------------------------
|
46
|
+
*
|
47
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
48
|
+
* Traditionally small files don't compress well because there is very little
|
49
|
+
* repetition in a single sample, since it is small. But, if you are compressing
|
50
|
+
* many similar files, like a bunch of JSON records that share the same
|
51
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
52
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
53
|
+
* present across samples. This can vastly improve compression ratio.
|
54
|
+
*
|
55
|
+
* When is a dictionary useful?
|
56
|
+
* ----------------------------
|
57
|
+
*
|
58
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
59
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
60
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
61
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
62
|
+
*
|
63
|
+
* How do I use a dictionary?
|
64
|
+
* --------------------------
|
65
|
+
*
|
66
|
+
* Simply pass the dictionary to the zstd compressor with
|
67
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
68
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
69
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
70
|
+
* complete documentation.
|
71
|
+
*
|
72
|
+
* What is a zstd dictionary?
|
73
|
+
* --------------------------
|
74
|
+
*
|
75
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
76
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
77
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
78
|
+
* which really matters for small data. The content is just bytes, which are
|
79
|
+
* repeated content that is common across many samples.
|
80
|
+
*
|
81
|
+
* What is a raw content dictionary?
|
82
|
+
* ---------------------------------
|
83
|
+
*
|
84
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
85
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
86
|
+
* content dictionary.
|
87
|
+
*
|
88
|
+
* How do I train a dictionary?
|
89
|
+
* ----------------------------
|
90
|
+
*
|
91
|
+
* Gather samples from your use case. These samples should be similar to each
|
92
|
+
* other. If you have several use cases, you could try to train one dictionary
|
93
|
+
* per use case.
|
94
|
+
*
|
95
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
96
|
+
* dictionary. There are a few advanced versions of this function, but this
|
97
|
+
* is a great starting point. If you want to further tune your dictionary
|
98
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
99
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
100
|
+
*
|
101
|
+
* If the dictionary training function fails, that is likely because you
|
102
|
+
* either passed too few samples, or a dictionary would not be effective
|
103
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
104
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
105
|
+
*
|
106
|
+
* How large should my dictionary be?
|
107
|
+
* ----------------------------------
|
108
|
+
*
|
109
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
110
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
111
|
+
* dictionary larger than that. But, most use cases can get away with a
|
112
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
113
|
+
* shrink the dictionary for you, and select a the smallest size that
|
114
|
+
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
115
|
+
* A smaller dictionary can save memory, and potentially speed up
|
116
|
+
* compression.
|
117
|
+
*
|
118
|
+
* How many samples should I provide to the dictionary builder?
|
119
|
+
* ------------------------------------------------------------
|
120
|
+
*
|
121
|
+
* We generally recommend passing ~100x the size of the dictionary
|
122
|
+
* in samples. A few thousand should suffice. Having too few samples
|
123
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
124
|
+
* only improve the dictionaries effectiveness. But having too many
|
125
|
+
* samples can slow down the dictionary builder.
|
126
|
+
*
|
127
|
+
* How do I determine if a dictionary will be effective?
|
128
|
+
* -----------------------------------------------------
|
129
|
+
*
|
130
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
131
|
+
* benchmarking tool to test the dictionary effectiveness.
|
132
|
+
*
|
133
|
+
* # Benchmark levels 1-3 without a dictionary
|
134
|
+
* zstd -b1e3 -r /path/to/my/files
|
135
|
+
* # Benchmark levels 1-3 with a dictionary
|
136
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
137
|
+
*
|
138
|
+
* When should I retrain a dictionary?
|
139
|
+
* -----------------------------------
|
140
|
+
*
|
141
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
142
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
143
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
144
|
+
* at which they decay depends on your use case. Internally, we regularly
|
145
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
146
|
+
* better than the old dictionary, we will ship the new dictionary.
|
147
|
+
*
|
148
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
149
|
+
* -------------------------------------------------------------------------
|
150
|
+
*
|
151
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
152
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
153
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
154
|
+
* provide some samples of the data. It will add the zstd header to the
|
155
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
156
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
157
|
+
* into the frame, if you so choose.
|
158
|
+
*
|
159
|
+
* Do I have to use zstd's dictionary builder?
|
160
|
+
* -------------------------------------------
|
161
|
+
*
|
162
|
+
* No! You can construct dictionary content however you please, it is just
|
163
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
164
|
+
* a zstd dictionary, which can improve compression ratio, use
|
165
|
+
* `ZDICT_finalizeDictionary()`.
|
166
|
+
*
|
167
|
+
* What is the attack surface of a zstd dictionary?
|
168
|
+
* ------------------------------------------------
|
169
|
+
*
|
170
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
171
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
172
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
173
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
174
|
+
* just like if they controlled the compressed data.
|
175
|
+
*
|
176
|
+
******************************************************************************/
|
177
|
+
|
39
178
|
|
40
179
|
/*! ZDICT_trainFromBuffer():
|
41
180
|
* Train a dictionary from an array of samples.
|
@@ -64,7 +203,14 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|
64
203
|
typedef struct {
|
65
204
|
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
66
205
|
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
67
|
-
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
206
|
+
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
207
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
208
|
+
* You may use them in private settings, but be warned that they
|
209
|
+
* may be used by zstd in a public dictionary registry in the future.
|
210
|
+
* These dictionary IDs are:
|
211
|
+
* - low range : <= 32767
|
212
|
+
* - high range : >= (2^31)
|
213
|
+
*/
|
68
214
|
} ZDICT_params_t;
|
69
215
|
|
70
216
|
/*! ZDICT_finalizeDictionary():
|
@@ -91,7 +237,6 @@ typedef struct {
|
|
91
237
|
* is presumed that the most profitable content is at the end of the dictionary,
|
92
238
|
* since that is the cheapest to reference.
|
93
239
|
*
|
94
|
-
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
95
240
|
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
96
241
|
*
|
97
242
|
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
@@ -126,8 +271,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
126
271
|
* Use them only in association with static linking.
|
127
272
|
* ==================================================================================== */
|
128
273
|
|
129
|
-
#define ZDICT_CONTENTSIZE_MIN 128
|
130
274
|
#define ZDICT_DICTSIZE_MIN 256
|
275
|
+
/* Deprecated: Remove in v1.6.0 */
|
276
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
131
277
|
|
132
278
|
/*! ZDICT_cover_params_t:
|
133
279
|
* k and d are the only required parameters.
|
@@ -264,10 +410,11 @@ typedef struct {
|
|
264
410
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
265
411
|
*/
|
266
412
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
267
|
-
void
|
268
|
-
const void
|
413
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
414
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
269
415
|
ZDICT_legacy_params_t parameters);
|
270
416
|
|
417
|
+
|
271
418
|
/* Deprecation warnings */
|
272
419
|
/* It is generally possible to disable deprecation warnings from compiler,
|
273
420
|
for example with -Wno-deprecated-declarations for gcc
|
@@ -279,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
|
279
426
|
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
280
427
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
281
428
|
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
282
|
-
# elif (ZDICT_GCC_VERSION >= 405)
|
429
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
283
430
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
284
431
|
# elif (ZDICT_GCC_VERSION >= 301)
|
285
432
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|