zstd-ruby 1.4.5.0 → 1.5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +2 -1
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +225 -222
  8. data/ext/zstdruby/libzstd/README.md +43 -5
  9. data/ext/zstdruby/libzstd/common/bitstream.h +46 -22
  10. data/ext/zstdruby/libzstd/common/compiler.h +182 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +196 -44
  15. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +41 -12
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +139 -22
  19. data/ext/zstdruby/libzstd/common/huf.h +47 -23
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  25. data/ext/zstdruby/libzstd/common/xxhash.c +6 -846
  26. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  27. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  28. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  29. data/ext/zstdruby/libzstd/common/zstd_internal.h +189 -142
  30. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  31. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  32. data/ext/zstdruby/libzstd/compress/fse_compress.c +89 -46
  33. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  34. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  35. data/ext/zstdruby/libzstd/compress/huf_compress.c +770 -198
  36. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2894 -863
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +390 -90
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +12 -11
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +31 -8
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -297
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +206 -69
  45. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +307 -132
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  47. data/ext/zstdruby/libzstd/compress/zstd_fast.c +322 -143
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1136 -174
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +316 -213
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  54. data/ext/zstdruby/libzstd/compress/zstd_opt.c +373 -150
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  56. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +152 -444
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +31 -113
  58. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1044 -403
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  60. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +450 -105
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +913 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +14 -5
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +59 -12
  66. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  67. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -38
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  72. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  73. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -34
  74. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +128 -58
  75. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  76. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  77. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +8 -8
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +9 -9
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +9 -9
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +10 -10
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  86. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +13 -13
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +13 -13
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  90. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +13 -13
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  92. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  93. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  94. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  95. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +154 -7
  96. data/ext/zstdruby/libzstd/zstd.h +699 -214
  97. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +2 -1
  98. data/ext/zstdruby/zstdruby.c +2 -2
  99. data/lib/zstd-ruby/version.rb +1 -1
  100. metadata +15 -6
  101. data/.travis.yml +0 -14
@@ -3,13 +3,14 @@
3
3
  # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
4
4
 
5
5
  prefix=@PREFIX@
6
- exec_prefix=${prefix}
7
- includedir=${prefix}/@INCLUDEDIR@
8
- libdir=${exec_prefix}/@LIBDIR@
6
+ exec_prefix=@EXEC_PREFIX@
7
+ includedir=@INCLUDEDIR@
8
+ libdir=@LIBDIR@
9
9
 
10
10
  Name: zstd
11
11
  Description: fast lossless compression algorithm library
12
12
  URL: http://www.zstd.net/
13
13
  Version: @VERSION@
14
14
  Libs: -L${libdir} -lzstd
15
+ Libs.private: @LIBS_PRIVATE@
15
16
  Cflags: -I${includedir}
@@ -0,0 +1,4 @@
1
+ module libzstd [extern_c] {
2
+ header "../zstd.h"
3
+ export *
4
+ }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,145 @@ extern "C" {
36
36
  # define ZDICTLIB_API ZDICTLIB_VISIBILITY
37
37
  #endif
38
38
 
39
+ /*******************************************************************************
40
+ * Zstd dictionary builder
41
+ *
42
+ * FAQ
43
+ * ===
44
+ * Why should I use a dictionary?
45
+ * ------------------------------
46
+ *
47
+ * Zstd can use dictionaries to improve compression ratio of small data.
48
+ * Traditionally small files don't compress well because there is very little
49
+ * repetition in a single sample, since it is small. But, if you are compressing
50
+ * many similar files, like a bunch of JSON records that share the same
51
+ * structure, you can train a dictionary on ahead of time on some samples of
52
+ * these files. Then, zstd can use the dictionary to find repetitions that are
53
+ * present across samples. This can vastly improve compression ratio.
54
+ *
55
+ * When is a dictionary useful?
56
+ * ----------------------------
57
+ *
58
+ * Dictionaries are useful when compressing many small files that are similar.
59
+ * The larger a file is, the less benefit a dictionary will have. Generally,
60
+ * we don't expect dictionary compression to be effective past 100KB. And the
61
+ * smaller a file is, the more we would expect the dictionary to help.
62
+ *
63
+ * How do I use a dictionary?
64
+ * --------------------------
65
+ *
66
+ * Simply pass the dictionary to the zstd compressor with
67
+ * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
68
+ * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
69
+ * more advanced functions that allow selecting some options, see zstd.h for
70
+ * complete documentation.
71
+ *
72
+ * What is a zstd dictionary?
73
+ * --------------------------
74
+ *
75
+ * A zstd dictionary has two pieces: Its header, and its content. The header
76
+ * contains a magic number, the dictionary ID, and entropy tables. These
77
+ * entropy tables allow zstd to save on header costs in the compressed file,
78
+ * which really matters for small data. The content is just bytes, which are
79
+ * repeated content that is common across many samples.
80
+ *
81
+ * What is a raw content dictionary?
82
+ * ---------------------------------
83
+ *
84
+ * A raw content dictionary is just bytes. It doesn't have a zstd dictionary
85
+ * header, a dictionary ID, or entropy tables. Any buffer is a valid raw
86
+ * content dictionary.
87
+ *
88
+ * How do I train a dictionary?
89
+ * ----------------------------
90
+ *
91
+ * Gather samples from your use case. These samples should be similar to each
92
+ * other. If you have several use cases, you could try to train one dictionary
93
+ * per use case.
94
+ *
95
+ * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
96
+ * dictionary. There are a few advanced versions of this function, but this
97
+ * is a great starting point. If you want to further tune your dictionary
98
+ * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
99
+ * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
100
+ *
101
+ * If the dictionary training function fails, that is likely because you
102
+ * either passed too few samples, or a dictionary would not be effective
103
+ * for your data. Look at the messages that the dictionary trainer printed,
104
+ * if it doesn't say too few samples, then a dictionary would not be effective.
105
+ *
106
+ * How large should my dictionary be?
107
+ * ----------------------------------
108
+ *
109
+ * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
110
+ * The zstd CLI defaults to a 110KB dictionary. You likely don't need a
111
+ * dictionary larger than that. But, most use cases can get away with a
112
+ * smaller dictionary. The advanced dictionary builders can automatically
113
+ * shrink the dictionary for you, and select a the smallest size that
114
+ * doesn't hurt compression ratio too much. See the `shrinkDict` parameter.
115
+ * A smaller dictionary can save memory, and potentially speed up
116
+ * compression.
117
+ *
118
+ * How many samples should I provide to the dictionary builder?
119
+ * ------------------------------------------------------------
120
+ *
121
+ * We generally recommend passing ~100x the size of the dictionary
122
+ * in samples. A few thousand should suffice. Having too few samples
123
+ * can hurt the dictionaries effectiveness. Having more samples will
124
+ * only improve the dictionaries effectiveness. But having too many
125
+ * samples can slow down the dictionary builder.
126
+ *
127
+ * How do I determine if a dictionary will be effective?
128
+ * -----------------------------------------------------
129
+ *
130
+ * Simply train a dictionary and try it out. You can use zstd's built in
131
+ * benchmarking tool to test the dictionary effectiveness.
132
+ *
133
+ * # Benchmark levels 1-3 without a dictionary
134
+ * zstd -b1e3 -r /path/to/my/files
135
+ * # Benchmark levels 1-3 with a dictionary
136
+ * zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
137
+ *
138
+ * When should I retrain a dictionary?
139
+ * -----------------------------------
140
+ *
141
+ * You should retrain a dictionary when its effectiveness drops. Dictionary
142
+ * effectiveness drops as the data you are compressing changes. Generally, we do
143
+ * expect dictionaries to "decay" over time, as your data changes, but the rate
144
+ * at which they decay depends on your use case. Internally, we regularly
145
+ * retrain dictionaries, and if the new dictionary performs significantly
146
+ * better than the old dictionary, we will ship the new dictionary.
147
+ *
148
+ * I have a raw content dictionary, how do I turn it into a zstd dictionary?
149
+ * -------------------------------------------------------------------------
150
+ *
151
+ * If you have a raw content dictionary, e.g. by manually constructing it, or
152
+ * using a third-party dictionary builder, you can turn it into a zstd
153
+ * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
154
+ * provide some samples of the data. It will add the zstd header to the
155
+ * raw content, which contains a dictionary ID and entropy tables, which
156
+ * will improve compression ratio, and allow zstd to write the dictionary ID
157
+ * into the frame, if you so choose.
158
+ *
159
+ * Do I have to use zstd's dictionary builder?
160
+ * -------------------------------------------
161
+ *
162
+ * No! You can construct dictionary content however you please, it is just
163
+ * bytes. It will always be valid as a raw content dictionary. If you want
164
+ * a zstd dictionary, which can improve compression ratio, use
165
+ * `ZDICT_finalizeDictionary()`.
166
+ *
167
+ * What is the attack surface of a zstd dictionary?
168
+ * ------------------------------------------------
169
+ *
170
+ * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
171
+ * zstd should never crash, or access out-of-bounds memory no matter what
172
+ * the dictionary is. However, if an attacker can control the dictionary
173
+ * during decompression, they can cause zstd to generate arbitrary bytes,
174
+ * just like if they controlled the compressed data.
175
+ *
176
+ ******************************************************************************/
177
+
39
178
 
40
179
  /*! ZDICT_trainFromBuffer():
41
180
  * Train a dictionary from an array of samples.
@@ -64,7 +203,14 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
64
203
  typedef struct {
65
204
  int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
66
205
  unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
67
- unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */
206
+ unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
207
+ * NOTE: The zstd format reserves some dictionary IDs for future use.
208
+ * You may use them in private settings, but be warned that they
209
+ * may be used by zstd in a public dictionary registry in the future.
210
+ * These dictionary IDs are:
211
+ * - low range : <= 32767
212
+ * - high range : >= (2^31)
213
+ */
68
214
  } ZDICT_params_t;
69
215
 
70
216
  /*! ZDICT_finalizeDictionary():
@@ -91,7 +237,6 @@ typedef struct {
91
237
  * is presumed that the most profitable content is at the end of the dictionary,
92
238
  * since that is the cheapest to reference.
93
239
  *
94
- * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
95
240
  * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
96
241
  *
97
242
  * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
@@ -126,8 +271,9 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
126
271
  * Use them only in association with static linking.
127
272
  * ==================================================================================== */
128
273
 
129
- #define ZDICT_CONTENTSIZE_MIN 128
130
274
  #define ZDICT_DICTSIZE_MIN 256
275
+ /* Deprecated: Remove in v1.6.0 */
276
+ #define ZDICT_CONTENTSIZE_MIN 128
131
277
 
132
278
  /*! ZDICT_cover_params_t:
133
279
  * k and d are the only required parameters.
@@ -264,10 +410,11 @@ typedef struct {
264
410
  * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
265
411
  */
266
412
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
267
- void *dictBuffer, size_t dictBufferCapacity,
268
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
413
+ void* dictBuffer, size_t dictBufferCapacity,
414
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
269
415
  ZDICT_legacy_params_t parameters);
270
416
 
417
+
271
418
  /* Deprecation warnings */
272
419
  /* It is generally possible to disable deprecation warnings from compiler,
273
420
  for example with -Wno-deprecated-declarations for gcc
@@ -279,7 +426,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
279
426
  # define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
280
427
  # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
281
428
  # define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
282
- # elif (ZDICT_GCC_VERSION >= 405) || defined(__clang__)
429
+ # elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
283
430
  # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
284
431
  # elif (ZDICT_GCC_VERSION >= 301)
285
432
  # define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))