extzstd 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -38,21 +38,28 @@ extern "C" {
38
38
 
39
39
 
40
40
  /*! ZDICT_trainFromBuffer():
41
- * Train a dictionary from an array of samples.
42
- * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
43
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45
- * The resulting dictionary will be saved into `dictBuffer`.
41
+ * Train a dictionary from an array of samples.
42
+ * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
43
+ * f=20, and accel=1.
44
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
45
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
46
+ * The resulting dictionary will be saved into `dictBuffer`.
46
47
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
47
- * or an error code, which can be tested with ZDICT_isError().
48
- * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
49
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
50
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
51
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
48
+ * or an error code, which can be tested with ZDICT_isError().
49
+ * Note: Dictionary training will fail if there are not enough samples to construct a
50
+ * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
51
+ * If dictionary training fails, you should use zstd without a dictionary, as the dictionary
52
+ * would've been ineffective anyways. If you believe your samples would benefit from a dictionary
53
+ * please open an issue with details, and we can look into it.
54
+ * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
55
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
56
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
57
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
52
58
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
53
59
  */
54
60
  ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
55
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
61
+ const void* samplesBuffer,
62
+ const size_t* samplesSizes, unsigned nbSamples);
56
63
 
57
64
 
58
65
  /*====== Helper functions ======*/
@@ -72,62 +79,119 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
72
79
  * ==================================================================================== */
73
80
 
74
81
  typedef struct {
75
- int compressionLevel; /* 0 means default; target a specific zstd compression level */
76
- unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
77
- unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
82
+ int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
83
+ unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
84
+ unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
78
85
  } ZDICT_params_t;
79
86
 
80
87
  /*! ZDICT_cover_params_t:
81
- * For all values 0 means default.
82
88
  * k and d are the only required parameters.
89
+ * For others, value 0 means default.
83
90
  */
84
91
  typedef struct {
85
92
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
86
93
  unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
87
- unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
94
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
88
95
  unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
96
+ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
89
97
  ZDICT_params_t zParams;
90
98
  } ZDICT_cover_params_t;
91
99
 
100
+ typedef struct {
101
+ unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
102
+ unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
103
+ unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
104
+ unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
105
+ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
106
+ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
107
+ unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
108
+ ZDICT_params_t zParams;
109
+ } ZDICT_fastCover_params_t;
92
110
 
93
111
  /*! ZDICT_trainFromBuffer_cover():
94
- * Train a dictionary from an array of samples using the COVER algorithm.
95
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
96
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
97
- * The resulting dictionary will be saved into `dictBuffer`.
112
+ * Train a dictionary from an array of samples using the COVER algorithm.
113
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
114
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
115
+ * The resulting dictionary will be saved into `dictBuffer`.
98
116
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
99
- * or an error code, which can be tested with ZDICT_isError().
100
- * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
101
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
102
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
103
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
117
+ * or an error code, which can be tested with ZDICT_isError().
118
+ * See ZDICT_trainFromBuffer() for details on failure modes.
119
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
120
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
121
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
122
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
104
123
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
105
124
  */
106
125
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
107
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
108
- const size_t *samplesSizes, unsigned nbSamples,
109
- ZDICT_cover_params_t parameters);
126
+ void *dictBuffer, size_t dictBufferCapacity,
127
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
128
+ ZDICT_cover_params_t parameters);
110
129
 
111
130
  /*! ZDICT_optimizeTrainFromBuffer_cover():
112
131
  * The same requirements as above hold for all the parameters except `parameters`.
113
132
  * This function tries many parameter combinations and picks the best parameters.
114
- * `*parameters` is filled with the best parameters found, and the dictionary
115
- * constructed with those parameters is stored in `dictBuffer`.
133
+ * `*parameters` is filled with the best parameters found,
134
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
116
135
  *
117
136
  * All of the parameters d, k, steps are optional.
118
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
137
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
119
138
  * if steps is zero it defaults to its default value.
120
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
139
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
121
140
  *
122
141
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
123
- * or an error code, which can be tested with ZDICT_isError().
124
- * On success `*parameters` contains the parameters selected.
142
+ * or an error code, which can be tested with ZDICT_isError().
143
+ * On success `*parameters` contains the parameters selected.
144
+ * See ZDICT_trainFromBuffer() for details on failure modes.
125
145
  * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
126
146
  */
127
147
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
128
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
129
- const size_t *samplesSizes, unsigned nbSamples,
130
- ZDICT_cover_params_t *parameters);
148
+ void* dictBuffer, size_t dictBufferCapacity,
149
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
150
+ ZDICT_cover_params_t* parameters);
151
+
152
+ /*! ZDICT_trainFromBuffer_fastCover():
153
+ * Train a dictionary from an array of samples using a modified version of COVER algorithm.
154
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
155
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
156
+ * d and k are required.
157
+ * All other parameters are optional, will use default values if not provided
158
+ * The resulting dictionary will be saved into `dictBuffer`.
159
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
160
+ * or an error code, which can be tested with ZDICT_isError().
161
+ * See ZDICT_trainFromBuffer() for details on failure modes.
162
+ * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
163
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
164
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
165
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
166
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
167
+ */
168
+ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
169
+ size_t dictBufferCapacity, const void *samplesBuffer,
170
+ const size_t *samplesSizes, unsigned nbSamples,
171
+ ZDICT_fastCover_params_t parameters);
172
+
173
+ /*! ZDICT_optimizeTrainFromBuffer_fastCover():
174
+ * The same requirements as above hold for all the parameters except `parameters`.
175
+ * This function tries many parameter combinations (specifically, k and d combinations)
176
+ * and picks the best parameters. `*parameters` is filled with the best parameters found,
177
+ * dictionary constructed with those parameters is stored in `dictBuffer`.
178
+ * All of the parameters d, k, steps, f, and accel are optional.
179
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
180
+ * if steps is zero it defaults to its default value.
181
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
182
+ * If f is zero, default value of 20 is used.
183
+ * If accel is zero, default value of 1 is used.
184
+ *
185
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
186
+ * or an error code, which can be tested with ZDICT_isError().
187
+ * On success `*parameters` contains the parameters selected.
188
+ * See ZDICT_trainFromBuffer() for details on failure modes.
189
+ * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
190
+ */
191
+ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
192
+ size_t dictBufferCapacity, const void* samplesBuffer,
193
+ const size_t* samplesSizes, unsigned nbSamples,
194
+ ZDICT_fastCover_params_t* parameters);
131
195
 
132
196
  /*! ZDICT_finalizeDictionary():
133
197
  * Given a custom content as a basis for dictionary, and a set of samples,
@@ -140,7 +204,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
140
204
  * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
141
205
  *
142
206
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
143
- * or an error code, which can be tested by ZDICT_isError().
207
+ * or an error code, which can be tested by ZDICT_isError().
144
208
  * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
145
209
  * Note 2: dictBuffer and dictContent can overlap
146
210
  */
@@ -157,22 +221,24 @@ typedef struct {
157
221
  } ZDICT_legacy_params_t;
158
222
 
159
223
  /*! ZDICT_trainFromBuffer_legacy():
160
- * Train a dictionary from an array of samples.
161
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
162
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
163
- * The resulting dictionary will be saved into `dictBuffer`.
224
+ * Train a dictionary from an array of samples.
225
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
226
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
227
+ * The resulting dictionary will be saved into `dictBuffer`.
164
228
  * `parameters` is optional and can be provided with values set to 0 to mean "default".
165
229
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
166
- * or an error code, which can be tested with ZDICT_isError().
167
- * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
168
- * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
169
- * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
230
+ * or an error code, which can be tested with ZDICT_isError().
231
+ * See ZDICT_trainFromBuffer() for details on failure modes.
232
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
233
+ * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
234
+ * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
170
235
  * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
171
- * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
236
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
172
237
  */
173
238
  ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
174
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
175
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
239
+ void *dictBuffer, size_t dictBufferCapacity,
240
+ const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
241
+ ZDICT_legacy_params_t parameters);
176
242
 
177
243
  /* Deprecation warnings */
178
244
  /* It is generally possible to disable deprecation warnings from compiler,
@@ -20,7 +20,7 @@ extern "C" {
20
20
  ***************************************/
21
21
  #include "mem.h" /* MEM_STATIC */
22
22
  #include "error_private.h" /* ERROR */
23
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
23
+ #include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */
24
24
 
25
25
  #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
26
26
  # undef ZSTD_LEGACY_SUPPORT
@@ -178,43 +178,73 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
178
178
  }
179
179
  }
180
180
 
181
- MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
182
- size_t compressedSize)
181
+ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
183
182
  {
184
- U32 const version = ZSTD_isLegacy(src, compressedSize);
183
+ ZSTD_frameSizeInfo frameSizeInfo;
184
+ U32 const version = ZSTD_isLegacy(src, srcSize);
185
185
  switch(version)
186
186
  {
187
187
  #if (ZSTD_LEGACY_SUPPORT <= 1)
188
188
  case 1 :
189
- return ZSTDv01_findFrameCompressedSize(src, compressedSize);
189
+ ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
190
+ &frameSizeInfo.compressedSize,
191
+ &frameSizeInfo.decompressedBound);
192
+ break;
190
193
  #endif
191
194
  #if (ZSTD_LEGACY_SUPPORT <= 2)
192
195
  case 2 :
193
- return ZSTDv02_findFrameCompressedSize(src, compressedSize);
196
+ ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
197
+ &frameSizeInfo.compressedSize,
198
+ &frameSizeInfo.decompressedBound);
199
+ break;
194
200
  #endif
195
201
  #if (ZSTD_LEGACY_SUPPORT <= 3)
196
202
  case 3 :
197
- return ZSTDv03_findFrameCompressedSize(src, compressedSize);
203
+ ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
204
+ &frameSizeInfo.compressedSize,
205
+ &frameSizeInfo.decompressedBound);
206
+ break;
198
207
  #endif
199
208
  #if (ZSTD_LEGACY_SUPPORT <= 4)
200
209
  case 4 :
201
- return ZSTDv04_findFrameCompressedSize(src, compressedSize);
210
+ ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
211
+ &frameSizeInfo.compressedSize,
212
+ &frameSizeInfo.decompressedBound);
213
+ break;
202
214
  #endif
203
215
  #if (ZSTD_LEGACY_SUPPORT <= 5)
204
216
  case 5 :
205
- return ZSTDv05_findFrameCompressedSize(src, compressedSize);
217
+ ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
218
+ &frameSizeInfo.compressedSize,
219
+ &frameSizeInfo.decompressedBound);
220
+ break;
206
221
  #endif
207
222
  #if (ZSTD_LEGACY_SUPPORT <= 6)
208
223
  case 6 :
209
- return ZSTDv06_findFrameCompressedSize(src, compressedSize);
224
+ ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
225
+ &frameSizeInfo.compressedSize,
226
+ &frameSizeInfo.decompressedBound);
227
+ break;
210
228
  #endif
211
229
  #if (ZSTD_LEGACY_SUPPORT <= 7)
212
230
  case 7 :
213
- return ZSTDv07_findFrameCompressedSize(src, compressedSize);
231
+ ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
232
+ &frameSizeInfo.compressedSize,
233
+ &frameSizeInfo.decompressedBound);
234
+ break;
214
235
  #endif
215
236
  default :
216
- return ERROR(prefix_unknown);
237
+ frameSizeInfo.compressedSize = ERROR(prefix_unknown);
238
+ frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
239
+ break;
217
240
  }
241
+ return frameSizeInfo;
242
+ }
243
+
244
+ MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
245
+ {
246
+ ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
247
+ return frameSizeInfo.compressedSize;
218
248
  }
219
249
 
220
250
  MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
@@ -246,6 +276,7 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
246
276
  MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
247
277
  const void* dict, size_t dictSize)
248
278
  {
279
+ DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
249
280
  if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
250
281
  switch(newVersion)
251
282
  {
@@ -304,6 +335,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
304
335
  MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
305
336
  ZSTD_outBuffer* output, ZSTD_inBuffer* input)
306
337
  {
338
+ DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
307
339
  switch(version)
308
340
  {
309
341
  default :
@@ -668,11 +668,17 @@ static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t
668
668
  switch(srcSize)
669
669
  {
670
670
  case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16);
671
+ /* fallthrough */
671
672
  case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24);
673
+ /* fallthrough */
672
674
  case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32);
675
+ /* fallthrough */
673
676
  case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24;
677
+ /* fallthrough */
674
678
  case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16;
679
+ /* fallthrough */
675
680
  case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8;
681
+ /* fallthrough */
676
682
  default:;
677
683
  }
678
684
  contain32 = ((const BYTE*)srcBuffer)[srcSize-1];
@@ -1330,6 +1336,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header
1330
1336
  #define LITERAL_NOENTROPY 63
1331
1337
  #define COMMAND_NOENTROPY 7 /* to remove */
1332
1338
 
1339
+ #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
1340
+
1333
1341
  static const size_t ZSTD_blockHeaderSize = 3;
1334
1342
  static const size_t ZSTD_frameHeaderSize = 4;
1335
1343
 
@@ -1458,7 +1466,7 @@ unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); }
1458
1466
  * Decompression code
1459
1467
  **************************************************************/
1460
1468
 
1461
- size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1469
+ static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1462
1470
  {
1463
1471
  const BYTE* const in = (const BYTE* const)src;
1464
1472
  BYTE headerFlags;
@@ -1511,7 +1519,7 @@ static size_t ZSTD_decompressLiterals(void* ctx,
1511
1519
  }
1512
1520
 
1513
1521
 
1514
- size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1522
+ static size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1515
1523
  void* dst, size_t maxDstSize,
1516
1524
  const BYTE** litStart, size_t* litSize,
1517
1525
  const void* src, size_t srcSize)
@@ -1563,7 +1571,7 @@ size_t ZSTDv01_decodeLiteralsBlock(void* ctx,
1563
1571
  }
1564
1572
 
1565
1573
 
1566
- size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1574
+ static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
1567
1575
  FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1568
1576
  const void* src, size_t srcSize)
1569
1577
  {
@@ -1751,7 +1759,7 @@ static size_t ZSTD_execSequence(BYTE* op,
1751
1759
  BYTE* const base, BYTE* const oend)
1752
1760
  {
1753
1761
  static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1754
- static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1762
+ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
1755
1763
  const BYTE* const ostart = op;
1756
1764
  const size_t litLength = sequence.litLength;
1757
1765
  BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@@ -1993,36 +2001,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
1993
2001
  return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1994
2002
  }
1995
2003
 
1996
- size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
2004
+ /* ZSTD_errorFrameSizeInfoLegacy() :
2005
+ assumes `cSize` and `dBound` are _not_ NULL */
2006
+ static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2007
+ {
2008
+ *cSize = ret;
2009
+ *dBound = ZSTD_CONTENTSIZE_ERROR;
2010
+ }
2011
+
2012
+ void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
1997
2013
  {
1998
2014
  const BYTE* ip = (const BYTE*)src;
1999
2015
  size_t remainingSize = srcSize;
2016
+ size_t nbBlocks = 0;
2000
2017
  U32 magicNumber;
2001
2018
  blockProperties_t blockProperties;
2002
2019
 
2003
2020
  /* Frame Header */
2004
- if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2021
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2022
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2023
+ return;
2024
+ }
2005
2025
  magicNumber = ZSTD_readBE32(src);
2006
- if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2026
+ if (magicNumber != ZSTD_magicNumber) {
2027
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2028
+ return;
2029
+ }
2007
2030
  ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2008
2031
 
2009
2032
  /* Loop on each block */
2010
2033
  while (1)
2011
2034
  {
2012
2035
  size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2013
- if (ZSTDv01_isError(blockSize)) return blockSize;
2036
+ if (ZSTDv01_isError(blockSize)) {
2037
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2038
+ return;
2039
+ }
2014
2040
 
2015
2041
  ip += ZSTD_blockHeaderSize;
2016
2042
  remainingSize -= ZSTD_blockHeaderSize;
2017
- if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2043
+ if (blockSize > remainingSize) {
2044
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2045
+ return;
2046
+ }
2018
2047
 
2019
2048
  if (blockSize == 0) break; /* bt_end */
2020
2049
 
2021
2050
  ip += blockSize;
2022
2051
  remainingSize -= blockSize;
2052
+ nbBlocks++;
2023
2053
  }
2024
2054
 
2025
- return ip - (const BYTE*)src;
2055
+ *cSize = ip - (const BYTE*)src;
2056
+ *dBound = nbBlocks * BLOCKSIZE;
2026
2057
  }
2027
2058
 
2028
2059
  /*******************************