extzstd 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -38,21 +38,28 @@ extern "C" {
38
38
 
39
39
 
40
40
  /*! ZDICT_trainFromBuffer():
41
- * Train a dictionary from an array of samples.
42
- * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
43
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45
- * The resulting dictionary will be saved into `dictBuffer`.
41
+ * Train a dictionary from an array of samples.
42
+ * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
43
+ * f=20, and accel=1.
44
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
45
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
46
+ * The resulting dictionary will be saved into `dictBuffer`.
46
47
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
47
- * or an error code, which can be tested with ZDICT_isError().
48
- * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
49
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
50
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
51
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
48
+ * or an error code, which can be tested with ZDICT_isError().
49
+ * Note: Dictionary training will fail if there are not enough samples to construct a
50
+ * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
51
+ * If dictionary training fails, you should use zstd without a dictionary, as the dictionary
52
+ * would've been ineffective anyways. If you believe your samples would benefit from a dictionary
53
+ * please open an issue with details, and we can look into it.
54
+ * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
55
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
56
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
57
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
52
58
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
53
59
  */
54
60
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
55
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
61
+ const void* samplesBuffer,
62
+ const size_t* samplesSizes, unsigned nbSamples);
56
63
 
57
64
 
58
65
  /*====== Helper functions ======*/
@@ -72,62 +79,119 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
72
79
  * ==================================================================================== */
73
80
 
74
81
  typedef struct {
75
- int compressionLevel; /* 0 means default; target a specific zstd compression level */
76
- unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
77
- unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
82
+ int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
83
+ unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
84
+ unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
78
85
  } ZDICT_params_t;
79
86
 
80
87
  /*! ZDICT_cover_params_t:
81
- * For all values 0 means default.
82
88
  * k and d are the only required parameters.
89
+ * For others, value 0 means default.
83
90
  */
84
91
  typedef struct {
85
92
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
86
93
  unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
87
- unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
94
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
88
95
  unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
96
+ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
89
97
  ZDICT_params_t zParams;
90
98
  } ZDICT_cover_params_t;
91
99
 
100
+ typedef struct {
101
+ unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
102
+ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
103
+ unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
104
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
105
+ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
106
+ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
107
+ unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
108
+ ZDICT_params_t zParams;
109
+ } ZDICT_fastCover_params_t;
92
110
 
93
111
  /*! ZDICT_trainFromBuffer_cover():
94
- * Train a dictionary from an array of samples using the COVER algorithm.
95
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
96
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
97
- * The resulting dictionary will be saved into `dictBuffer`.
112
+ * Train a dictionary from an array of samples using the COVER algorithm.
113
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
114
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
115
+ * The resulting dictionary will be saved into `dictBuffer`.
98
116
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
99
- * or an error code, which can be tested with ZDICT_isError().
100
- * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
101
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
102
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
103
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
117
+ * or an error code, which can be tested with ZDICT_isError().
118
+ * See ZDICT_trainFromBuffer() for details on failure modes.
119
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
120
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
121
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
122
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
104
123
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
105
124
  */
106
125
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
107
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
108
- const size_t *samplesSizes, unsigned nbSamples,
109
- ZDICT_cover_params_t parameters);
126
+ void *dictBuffer, size_t dictBufferCapacity,
127
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
128
+ ZDICT_cover_params_t parameters);
110
129
 
111
130
  /*! ZDICT_optimizeTrainFromBuffer_cover():
112
131
  * The same requirements as above hold for all the parameters except `parameters`.
113
132
  * This function tries many parameter combinations and picks the best parameters.
114
- * `*parameters` is filled with the best parameters found, and the dictionary
115
- * constructed with those parameters is stored in `dictBuffer`.
133
+ * `*parameters` is filled with the best parameters found,
134
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
116
135
  *
117
136
  * All of the parameters d, k, steps are optional.
118
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
137
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
119
138
  * if steps is zero it defaults to its default value.
120
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
139
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
121
140
  *
122
141
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
123
- * or an error code, which can be tested with ZDICT_isError().
124
- * On success `*parameters` contains the parameters selected.
142
+ * or an error code, which can be tested with ZDICT_isError().
143
+ * On success `*parameters` contains the parameters selected.
144
+ * See ZDICT_trainFromBuffer() for details on failure modes.
125
145
  * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
126
146
  */
127
147
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
128
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
129
- const size_t *samplesSizes, unsigned nbSamples,
130
- ZDICT_cover_params_t *parameters);
148
+ void* dictBuffer, size_t dictBufferCapacity,
149
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
150
+ ZDICT_cover_params_t* parameters);
151
+
152
+ /*! ZDICT_trainFromBuffer_fastCover():
153
+ * Train a dictionary from an array of samples using a modified version of COVER algorithm.
154
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
155
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
156
+ * d and k are required.
157
+ * All other parameters are optional, will use default values if not provided
158
+ * The resulting dictionary will be saved into `dictBuffer`.
159
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
160
+ * or an error code, which can be tested with ZDICT_isError().
161
+ * See ZDICT_trainFromBuffer() for details on failure modes.
162
+ * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
163
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
164
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
165
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
166
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
167
+ */
168
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
169
+ size_t dictBufferCapacity, const void *samplesBuffer,
170
+ const size_t *samplesSizes, unsigned nbSamples,
171
+ ZDICT_fastCover_params_t parameters);
172
+
173
+ /*! ZDICT_optimizeTrainFromBuffer_fastCover():
174
+ * The same requirements as above hold for all the parameters except `parameters`.
175
+ * This function tries many parameter combinations (specifically, k and d combinations)
176
+ * and picks the best parameters. `*parameters` is filled with the best parameters found,
177
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
178
+ * All of the parameters d, k, steps, f, and accel are optional.
179
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
180
+ * if steps is zero it defaults to its default value.
181
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
182
+ * If f is zero, default value of 20 is used.
183
+ * If accel is zero, default value of 1 is used.
184
+ *
185
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
186
+ * or an error code, which can be tested with ZDICT_isError().
187
+ * On success `*parameters` contains the parameters selected.
188
+ * See ZDICT_trainFromBuffer() for details on failure modes.
189
+ * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
190
+ */
191
+ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
192
+ size_t dictBufferCapacity, const void* samplesBuffer,
193
+ const size_t* samplesSizes, unsigned nbSamples,
194
+ ZDICT_fastCover_params_t* parameters);
131
195
 
132
196
  /*! ZDICT_finalizeDictionary():
133
197
  * Given a custom content as a basis for dictionary, and a set of samples,
@@ -140,7 +204,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
140
204
  * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
141
205
  *
142
206
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
143
- * or an error code, which can be tested by ZDICT_isError().
207
+ * or an error code, which can be tested by ZDICT_isError().
144
208
  * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
145
209
  * Note 2: dictBuffer and dictContent can overlap
146
210
  */
@@ -157,22 +221,24 @@ typedef struct {
157
221
  } ZDICT_legacy_params_t;
158
222
 
159
223
  /*! ZDICT_trainFromBuffer_legacy():
160
- * Train a dictionary from an array of samples.
161
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
162
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
163
- * The resulting dictionary will be saved into `dictBuffer`.
224
+ * Train a dictionary from an array of samples.
225
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
226
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
227
+ * The resulting dictionary will be saved into `dictBuffer`.
164
228
  * `parameters` is optional and can be provided with values set to 0 to mean "default".
165
229
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
166
- * or an error code, which can be tested with ZDICT_isError().
167
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
168
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
169
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
230
+ * or an error code, which can be tested with ZDICT_isError().
231
+ * See ZDICT_trainFromBuffer() for details on failure modes.
232
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
233
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
234
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
170
235
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
171
- * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
236
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
172
237
  */
173
238
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
174
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
175
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
239
+ void *dictBuffer, size_t dictBufferCapacity,
240
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
241
+ ZDICT_legacy_params_t parameters);
176
242
 
177
243
  /* Deprecation warnings */
178
244
  /* It is generally possible to disable deprecation warnings from compiler,
@@ -20,7 +20,7 @@ extern "C" {
20
20
  ***************************************/
21
21
  #include "mem.h" /* MEM_STATIC */
22
22
  #include "error_private.h" /* ERROR */
23
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
23
+ #include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */
24
24
 
25
25
  #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
26
26
  # undef ZSTD_LEGACY_SUPPORT
@@ -178,43 +178,73 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
178
178
  }
179
179
  }
180
180
 
181
- MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
182
- size_t compressedSize)
181
+ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
183
182
  {
184
- U32 const version = ZSTD_isLegacy(src, compressedSize);
183
+ ZSTD_frameSizeInfo frameSizeInfo;
184
+ U32 const version = ZSTD_isLegacy(src, srcSize);
185
185
  switch(version)
186
186
  {
187
187
  #if (ZSTD_LEGACY_SUPPORT <= 1)
188
188
  case 1 :
189
- return ZSTDv01_findFrameCompressedSize(src, compressedSize);
189
+ ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
190
+ &frameSizeInfo.compressedSize,
191
+ &frameSizeInfo.decompressedBound);
192
+ break;
190
193
  #endif
191
194
  #if (ZSTD_LEGACY_SUPPORT <= 2)
192
195
  case 2 :
193
- return ZSTDv02_findFrameCompressedSize(src, compressedSize);
196
+ ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
197
+ &frameSizeInfo.compressedSize,
198
+ &frameSizeInfo.decompressedBound);
199
+ break;
194
200
  #endif
195
201
  #if (ZSTD_LEGACY_SUPPORT <= 3)
196
202
  case 3 :
197
- return ZSTDv03_findFrameCompressedSize(src, compressedSize);
203
+ ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
204
+ &frameSizeInfo.compressedSize,
205
+ &frameSizeInfo.decompressedBound);
206
+ break;
198
207
  #endif
199
208
  #if (ZSTD_LEGACY_SUPPORT <= 4)
200
209
  case 4 :
201
- return ZSTDv04_findFrameCompressedSize(src, compressedSize);
210
+ ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
211
+ &frameSizeInfo.compressedSize,
212
+ &frameSizeInfo.decompressedBound);
213
+ break;
202
214
  #endif
203
215
  #if (ZSTD_LEGACY_SUPPORT <= 5)
204
216
  case 5 :
205
- return ZSTDv05_findFrameCompressedSize(src, compressedSize);
217
+ ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
218
+ &frameSizeInfo.compressedSize,
219
+ &frameSizeInfo.decompressedBound);
220
+ break;
206
221
  #endif
207
222
  #if (ZSTD_LEGACY_SUPPORT <= 6)
208
223
  case 6 :
209
- return ZSTDv06_findFrameCompressedSize(src, compressedSize);
224
+ ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
225
+ &frameSizeInfo.compressedSize,
226
+ &frameSizeInfo.decompressedBound);
227
+ break;
210
228
  #endif
211
229
  #if (ZSTD_LEGACY_SUPPORT <= 7)
212
230
  case 7 :
213
- return ZSTDv07_findFrameCompressedSize(src, compressedSize);
231
+ ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
232
+ &frameSizeInfo.compressedSize,
233
+ &frameSizeInfo.decompressedBound);
234
+ break;
214
235
  #endif
215
236
  default :
216
- return ERROR(prefix_unknown);
237
+ frameSizeInfo.compressedSize = ERROR(prefix_unknown);
238
+ frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
239
+ break;
217
240
  }
241
+ return frameSizeInfo;
242
+ }
243
+
244
+ MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
245
+ {
246
+ ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
247
+ return frameSizeInfo.compressedSize;
218
248
  }
219
249
 
220
250
  MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
@@ -246,6 +276,7 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
246
276
  MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
247
277
  const void* dict, size_t dictSize)
248
278
  {
279
+ DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
249
280
  if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
250
281
  switch(newVersion)
251
282
  {
@@ -304,6 +335,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
304
335
  MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
305
336
  ZSTD_outBuffer* output, ZSTD_inBuffer* input)
306
337
  {
338
+ DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
307
339
  switch(version)
308
340
  {
309
341
  default :
@@ -668,11 +668,17 @@ static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t
668
668
  switch(srcSize)
669
669
  {
670
670
  case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
671
+ /* fallthrough */
671
672
  case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
673
+ /* fallthrough */
672
674
  case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
675
+ /* fallthrough */
673
676
  case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
677
+ /* fallthrough */
674
678
  case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
679
+ /* fallthrough */
675
680
  case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
681
+ /* fallthrough */
676
682
  default:;
677
683
  }
678
684
  contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
@@ -1330,6 +1336,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header
1330
1336
  #define LITERAL_NOENTROPY 63
1331
1337
  #define COMMAND_NOENTROPY 7 /* to remove */
1332
1338
 
1339
+ #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
1340
+
1333
1341
  static const size_t ZSTD_blockHeaderSize = 3;
1334
1342
  static const size_t ZSTD_frameHeaderSize = 4;
1335
1343
 
@@ -1458,7 +1466,7 @@ unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1458
1466
  * Decompression code
1459
1467
  **************************************************************/
1460
1468
 
1461
- size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1469
+ static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1462
1470
  {
1463
1471
  const BYTE* const in = (const BYTE* const)src;
1464
1472
  BYTE headerFlags;
@@ -1511,7 +1519,7 @@ static size_t ZSTD_decompressLiterals(void* ctx,
1511
1519
  }
1512
1520
 
1513
1521
 
1514
- size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1522
+ static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1515
1523
  void* dst, size_t maxDstSize,
1516
1524
  const BYTE** litStart, size_t* litSize,
1517
1525
  const void* src, size_t srcSize)
@@ -1563,7 +1571,7 @@ size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1563
1571
  }
1564
1572
 
1565
1573
 
1566
- size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1574
+ static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1567
1575
  FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1568
1576
  const void* src, size_t srcSize)
1569
1577
  {
@@ -1751,7 +1759,7 @@ static size_t ZSTD_execSequence(BYTE* op,
1751
1759
  BYTE* const base, BYTE* const oend)
1752
1760
  {
1753
1761
  static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1754
- static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1762
+ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
1755
1763
  const BYTE* const ostart = op;
1756
1764
  const size_t litLength = sequence.litLength;
1757
1765
  BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@@ -1993,36 +2001,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
1993
2001
  return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1994
2002
  }
1995
2003
 
1996
- size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
2004
+ /* ZSTD_errorFrameSizeInfoLegacy() :
2005
+ assumes `cSize` and `dBound` are _not_ NULL */
2006
+ static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2007
+ {
2008
+ *cSize = ret;
2009
+ *dBound = ZSTD_CONTENTSIZE_ERROR;
2010
+ }
2011
+
2012
+ void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
1997
2013
  {
1998
2014
  const BYTE* ip = (const BYTE*)src;
1999
2015
  size_t remainingSize = srcSize;
2016
+ size_t nbBlocks = 0;
2000
2017
  U32 magicNumber;
2001
2018
  blockProperties_t blockProperties;
2002
2019
 
2003
2020
  /* Frame Header */
2004
- if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2021
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2022
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2023
+ return;
2024
+ }
2005
2025
  magicNumber = ZSTD_readBE32(src);
2006
- if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2026
+ if (magicNumber != ZSTD_magicNumber) {
2027
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2028
+ return;
2029
+ }
2007
2030
  ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2008
2031
 
2009
2032
  /* Loop on each block */
2010
2033
  while (1)
2011
2034
  {
2012
2035
  size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2013
- if (ZSTDv01_isError(blockSize)) return blockSize;
2036
+ if (ZSTDv01_isError(blockSize)) {
2037
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2038
+ return;
2039
+ }
2014
2040
 
2015
2041
  ip += ZSTD_blockHeaderSize;
2016
2042
  remainingSize -= ZSTD_blockHeaderSize;
2017
- if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2043
+ if (blockSize > remainingSize) {
2044
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2045
+ return;
2046
+ }
2018
2047
 
2019
2048
  if (blockSize == 0) break; /* bt_end */
2020
2049
 
2021
2050
  ip += blockSize;
2022
2051
  remainingSize -= blockSize;
2052
+ nbBlocks++;
2023
2053
  }
2024
2054
 
2025
- return ip - (const BYTE*)src;
2055
+ *cSize = ip - (const BYTE*)src;
2056
+ *dBound = nbBlocks * BLOCKSIZE;
2026
2057
  }
2027
2058
 
2028
2059
  /*******************************