zstd-ruby 1.4.5.0 → 1.5.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +2 -1
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +225 -222
- data/ext/zstdruby/libzstd/README.md +43 -5
- data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
- data/ext/zstdruby/libzstd/common/compiler.h +182 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +41 -12
- data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
- data/ext/zstdruby/libzstd/common/huf.h +47 -23
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +2 -2
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +6 -5
- data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
- data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
- data/ext/zstdruby/libzstd/zstd.h +699 -214
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +15 -6
- data/.travis.yml +0 -14
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
4
4
|
|
|
5
5
|
prefix=@PREFIX@
|
|
6
|
-
exec_prefix
|
|
7
|
-
includedir
|
|
8
|
-
libdir
|
|
6
|
+
exec_prefix=@EXEC_PREFIX@
|
|
7
|
+
includedir=@INCLUDEDIR@
|
|
8
|
+
libdir=@LIBDIR@
|
|
9
9
|
|
|
10
10
|
Name: zstd
|
|
11
11
|
Description: fast lossless compression algorithm library
|
|
12
12
|
URL: http://www.zstd.net/
|
|
13
13
|
Version: @VERSION@
|
|
14
14
|
Libs: -L${libdir} -lzstd
|
|
15
|
+
Libs.private: @LIBS_PRIVATE@
|
|
15
16
|
Cflags: -I${includedir}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -36,6 +36,145 @@ extern "C" {
|
|
|
36
36
|
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
|
|
37
37
|
#endif
|
|
38
38
|
|
|
39
|
+
/*******************************************************************************
|
|
40
|
+
* Zstd dictionary builder
|
|
41
|
+
*
|
|
42
|
+
* FAQ
|
|
43
|
+
* ===
|
|
44
|
+
* Why should I use a dictionary?
|
|
45
|
+
* ------------------------------
|
|
46
|
+
*
|
|
47
|
+
* Zstd can use dictionaries to improve compression ratio of small data.
|
|
48
|
+
* Traditionally small files don't compress well because there is very little
|
|
49
|
+
* repetition in a single sample, since it is small. But, if you are compressing
|
|
50
|
+
* many similar files, like a bunch of JSON records that share the same
|
|
51
|
+
* structure, you can train a dictionary on ahead of time on some samples of
|
|
52
|
+
* these files. Then, zstd can use the dictionary to find repetitions that are
|
|
53
|
+
* present across samples. This can vastly improve compression ratio.
|
|
54
|
+
*
|
|
55
|
+
* When is a dictionary useful?
|
|
56
|
+
* ----------------------------
|
|
57
|
+
*
|
|
58
|
+
* Dictionaries are useful when compressing many small files that are similar.
|
|
59
|
+
* The larger a file is, the less benefit a dictionary will have. Generally,
|
|
60
|
+
* we don't expect dictionary compression to be effective past 100KB. And the
|
|
61
|
+
* smaller a file is, the more we would expect the dictionary to help.
|
|
62
|
+
*
|
|
63
|
+
* How do I use a dictionary?
|
|
64
|
+
* --------------------------
|
|
65
|
+
*
|
|
66
|
+
* Simply pass the dictionary to the zstd compressor with
|
|
67
|
+
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
|
|
68
|
+
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
|
|
69
|
+
* more advanced functions that allow selecting some options, see zstd.h for
|
|
70
|
+
* complete documentation.
|
|
71
|
+
*
|
|
72
|
+
* What is a zstd dictionary?
|
|
73
|
+
* --------------------------
|
|
74
|
+
*
|
|
75
|
+
* A zstd dictionary has two pieces: Its header, and its content. The header
|
|
76
|
+
* contains a magic number, the dictionary ID, and entropy tables. These
|
|
77
|
+
* entropy tables allow zstd to save on header costs in the compressed file,
|
|
78
|
+
* which really matters for small data. The content is just bytes, which are
|
|
79
|
+
* repeated content that is common across many samples.
|
|
80
|
+
*
|
|
81
|
+
* What is a raw content dictionary?
|
|
82
|
+
* ---------------------------------
|
|
83
|
+
*
|
|
84
|
+
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
|
|
85
|
+
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
|
|
86
|
+
* content dictionary.
|
|
87
|
+
*
|
|
88
|
+
* How do I train a dictionary?
|
|
89
|
+
* ----------------------------
|
|
90
|
+
*
|
|
91
|
+
* Gather samples from your use case. These samples should be similar to each
|
|
92
|
+
* other. If you have several use cases, you could try to train one dictionary
|
|
93
|
+
* per use case.
|
|
94
|
+
*
|
|
95
|
+
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
|
|
96
|
+
* dictionary. There are a few advanced versions of this function, but this
|
|
97
|
+
* is a great starting point. If you want to further tune your dictionary
|
|
98
|
+
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
|
|
99
|
+
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
|
|
100
|
+
*
|
|
101
|
+
* If the dictionary training function fails, that is likely because you
|
|
102
|
+
* either passed too few samples, or a dictionary would not be effective
|
|
103
|
+
* for your data. Look at the messages that the dictionary trainer printed,
|
|
104
|
+
* if it doesn't say too few samples, then a dictionary would not be effective.
|
|
105
|
+
*
|
|
106
|
+
* How large should my dictionary be?
|
|
107
|
+
* ----------------------------------
|
|
108
|
+
*
|
|
109
|
+
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
|
|
110
|
+
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
|
|
111
|
+
* dictionary larger than that. But, most use cases can get away with a
|
|
112
|
+
* smaller dictionary. The advanced dictionary builders can automatically
|
|
113
|
+
* shrink the dictionary for you, and select a the smallest size that
|
|
114
|
+
* doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
|
|
115
|
+
* A smaller dictionary can save memory, and potentially speed up
|
|
116
|
+
* compression.
|
|
117
|
+
*
|
|
118
|
+
* How many samples should I provide to the dictionary builder?
|
|
119
|
+
* ------------------------------------------------------------
|
|
120
|
+
*
|
|
121
|
+
* We generally recommend passing ~100x the size of the dictionary
|
|
122
|
+
* in samples. A few thousand should suffice. Having too few samples
|
|
123
|
+
* can hurt the dictionaries effectiveness. Having more samples will
|
|
124
|
+
* only improve the dictionaries effectiveness. But having too many
|
|
125
|
+
* samples can slow down the dictionary builder.
|
|
126
|
+
*
|
|
127
|
+
* How do I determine if a dictionary will be effective?
|
|
128
|
+
* -----------------------------------------------------
|
|
129
|
+
*
|
|
130
|
+
* Simply train a dictionary and try it out. You can use zstd's built in
|
|
131
|
+
* benchmarking tool to test the dictionary effectiveness.
|
|
132
|
+
*
|
|
133
|
+
* # Benchmark levels 1-3 without a dictionary
|
|
134
|
+
* zstd -b1e3 -r /path/to/my/files
|
|
135
|
+
* # Benchmark levels 1-3 with a dictionary
|
|
136
|
+
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
|
|
137
|
+
*
|
|
138
|
+
* When should I retrain a dictionary?
|
|
139
|
+
* -----------------------------------
|
|
140
|
+
*
|
|
141
|
+
* You should retrain a dictionary when its effectiveness drops. Dictionary
|
|
142
|
+
* effectiveness drops as the data you are compressing changes. Generally, we do
|
|
143
|
+
* expect dictionaries to "decay" over time, as your data changes, but the rate
|
|
144
|
+
* at which they decay depends on your use case. Internally, we regularly
|
|
145
|
+
* retrain dictionaries, and if the new dictionary performs significantly
|
|
146
|
+
* better than the old dictionary, we will ship the new dictionary.
|
|
147
|
+
*
|
|
148
|
+
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
|
|
149
|
+
* -------------------------------------------------------------------------
|
|
150
|
+
*
|
|
151
|
+
* If you have a raw content dictionary, e.g. by manually constructing it, or
|
|
152
|
+
* using a third-party dictionary builder, you can turn it into a zstd
|
|
153
|
+
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
|
|
154
|
+
* provide some samples of the data. It will add the zstd header to the
|
|
155
|
+
* raw content, which contains a dictionary ID and entropy tables, which
|
|
156
|
+
* will improve compression ratio, and allow zstd to write the dictionary ID
|
|
157
|
+
* into the frame, if you so choose.
|
|
158
|
+
*
|
|
159
|
+
* Do I have to use zstd's dictionary builder?
|
|
160
|
+
* -------------------------------------------
|
|
161
|
+
*
|
|
162
|
+
* No! You can construct dictionary content however you please, it is just
|
|
163
|
+
* bytes. It will always be valid as a raw content dictionary. If you want
|
|
164
|
+
* a zstd dictionary, which can improve compression ratio, use
|
|
165
|
+
* `ZDICT_finalizeDictionary()`.
|
|
166
|
+
*
|
|
167
|
+
* What is the attack surface of a zstd dictionary?
|
|
168
|
+
* ------------------------------------------------
|
|
169
|
+
*
|
|
170
|
+
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
|
|
171
|
+
* zstd should never crash, or access out-of-bounds memory no matter what
|
|
172
|
+
* the dictionary is. However, if an attacker can control the dictionary
|
|
173
|
+
* during decompression, they can cause zstd to generate arbitrary bytes,
|
|
174
|
+
* just like if they controlled the compressed data.
|
|
175
|
+
*
|
|
176
|
+
******************************************************************************/
|
|
177
|
+
|
|
39
178
|
|
|
40
179
|
/*! ZDICT_trainFromBuffer():
|
|
41
180
|
* Train a dictionary from an array of samples.
|
|
@@ -64,7 +203,14 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|
|
64
203
|
typedef struct {
|
|
65
204
|
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
|
66
205
|
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
|
67
|
-
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
|
206
|
+
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
|
|
207
|
+
* NOTE: The zstd format reserves some dictionary IDs for future use.
|
|
208
|
+
* You may use them in private settings, but be warned that they
|
|
209
|
+
* may be used by zstd in a public dictionary registry in the future.
|
|
210
|
+
* These dictionary IDs are:
|
|
211
|
+
* - low range : <= 32767
|
|
212
|
+
* - high range : >= (2^31)
|
|
213
|
+
*/
|
|
68
214
|
} ZDICT_params_t;
|
|
69
215
|
|
|
70
216
|
/*! ZDICT_finalizeDictionary():
|
|
@@ -91,7 +237,6 @@ typedef struct {
|
|
|
91
237
|
* is presumed that the most profitable content is at the end of the dictionary,
|
|
92
238
|
* since that is the cheapest to reference.
|
|
93
239
|
*
|
|
94
|
-
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
|
95
240
|
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
|
96
241
|
*
|
|
97
242
|
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
|
@@ -126,8 +271,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
|
126
271
|
* Use them only in association with static linking.
|
|
127
272
|
* ==================================================================================== */
|
|
128
273
|
|
|
129
|
-
#define ZDICT_CONTENTSIZE_MIN 128
|
|
130
274
|
#define ZDICT_DICTSIZE_MIN 256
|
|
275
|
+
/* Deprecated: Remove in v1.6.0 */
|
|
276
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
|
131
277
|
|
|
132
278
|
/*! ZDICT_cover_params_t:
|
|
133
279
|
* k and d are the only required parameters.
|
|
@@ -264,10 +410,11 @@ typedef struct {
|
|
|
264
410
|
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
|
265
411
|
*/
|
|
266
412
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
|
267
|
-
void
|
|
268
|
-
const void
|
|
413
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
|
414
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
269
415
|
ZDICT_legacy_params_t parameters);
|
|
270
416
|
|
|
417
|
+
|
|
271
418
|
/* Deprecation warnings */
|
|
272
419
|
/* It is generally possible to disable deprecation warnings from compiler,
|
|
273
420
|
for example with -Wno-deprecated-declarations for gcc
|
|
@@ -279,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
|
|
279
426
|
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
|
|
280
427
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
|
281
428
|
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
|
|
282
|
-
# elif (ZDICT_GCC_VERSION >= 405)
|
|
429
|
+
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
|
|
283
430
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
|
|
284
431
|
# elif (ZDICT_GCC_VERSION >= 301)
|
|
285
432
|
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
|