zstd-ruby 1.3.8.0 → 1.4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +6 -5
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +7 -3
  5. data/ext/zstdruby/libzstd/README.md +4 -2
  6. data/ext/zstdruby/libzstd/common/compiler.h +1 -1
  7. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  8. data/ext/zstdruby/libzstd/common/threading.c +2 -2
  9. data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
  10. data/ext/zstdruby/libzstd/common/zstd_internal.h +55 -2
  11. data/ext/zstdruby/libzstd/compress/fse_compress.c +2 -2
  12. data/ext/zstdruby/libzstd/compress/zstd_compress.c +423 -296
  13. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +14 -11
  14. data/ext/zstdruby/libzstd/compress/zstd_fast.c +203 -124
  15. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +1 -1
  16. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
  17. data/ext/zstdruby/libzstd/compress/zstd_opt.c +27 -11
  18. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +41 -49
  19. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +43 -26
  20. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +4 -4
  21. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +257 -164
  22. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +51 -47
  23. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +7 -0
  24. data/ext/zstdruby/libzstd/dictBuilder/cover.c +58 -13
  25. data/ext/zstdruby/libzstd/dictBuilder/cover.h +29 -0
  26. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +25 -13
  27. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +18 -8
  28. data/ext/zstdruby/libzstd/dll/example/build_package.bat +3 -2
  29. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +42 -12
  30. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +32 -7
  31. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +12 -7
  32. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +31 -12
  33. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +12 -7
  34. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +32 -12
  35. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +12 -7
  36. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +32 -12
  37. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +12 -7
  38. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +32 -7
  39. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +12 -7
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +36 -8
  41. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +10 -5
  42. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +40 -9
  43. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +10 -5
  44. data/ext/zstdruby/libzstd/zstd.h +689 -542
  45. data/lib/zstd-ruby/version.rb +1 -1
  46. data/zstd-ruby.gemspec +1 -1
  47. metadata +6 -7
  48. data/ext/zstdruby/libzstd/dll/libzstd.def +0 -87
@@ -38,6 +38,35 @@ typedef struct {
38
38
  U32 score;
39
39
  } COVER_segment_t;
40
40
 
41
+ /**
42
+ *Number of epochs and size of each epoch.
43
+ */
44
+ typedef struct {
45
+ U32 num;
46
+ U32 size;
47
+ } COVER_epoch_info_t;
48
+
49
+ /**
50
+ * Computes the number of epochs and the size of each epoch.
51
+ * We will make sure that each epoch gets at least 10 * k bytes.
52
+ *
53
+ * The COVER algorithms divide the data up into epochs of equal size and
54
+ * select one segment from each epoch.
55
+ *
56
+ * @param maxDictSize The maximum allowed dictionary size.
57
+ * @param nbDmers The number of dmers we are training on.
58
+ * @param k The parameter k (segment size).
59
+ * @param passes The target number of passes over the dmer corpus.
60
+ * More passes means a better dictionary.
61
+ */
62
+ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
63
+ U32 k, U32 passes);
64
+
65
+ /**
66
+ * Warns the user when their corpus is too small.
67
+ */
68
+ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
69
+
41
70
  /**
42
71
  * Checks total compressed size of a dictionary
43
72
  */
@@ -132,7 +132,7 @@ typedef struct {
132
132
  *
133
133
  * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
134
134
  *
135
- * Once the dmer with hash value d is in the dictionay we set F(d) = 0.
135
+ * Once the dmer with hash value d is in the dictionary we set F(d) = 0.
136
136
  */
137
137
  static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
138
138
  U32 *freqs, U32 begin, U32 end,
@@ -161,7 +161,7 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
161
161
  /* Get hash value of current dmer */
162
162
  const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
163
163
 
164
- /* Add frequency of this index to score if this is the first occurence of index in active segment */
164
+ /* Add frequency of this index to score if this is the first occurrence of index in active segment */
165
165
  if (segmentFreqs[idx] == 0) {
166
166
  activeSegment.score += freqs[idx];
167
167
  }
@@ -386,29 +386,35 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
386
386
  {
387
387
  BYTE *const dict = (BYTE *)dictBuffer;
388
388
  size_t tail = dictBufferCapacity;
389
- /* Divide the data up into epochs of equal size.
390
- * We will select at least one segment from each epoch.
391
- */
392
- const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
393
- const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
389
+ /* Divide the data into epochs. We will select one segment from each epoch. */
390
+ const COVER_epoch_info_t epochs = COVER_computeEpochs(
391
+ (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
392
+ const size_t maxZeroScoreRun = 10;
393
+ size_t zeroScoreRun = 0;
394
394
  size_t epoch;
395
395
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
396
- epochs, epochSize);
396
+ (U32)epochs.num, (U32)epochs.size);
397
397
  /* Loop through the epochs until there are no more segments or the dictionary
398
398
  * is full.
399
399
  */
400
- for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
401
- const U32 epochBegin = (U32)(epoch * epochSize);
402
- const U32 epochEnd = epochBegin + epochSize;
400
+ for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
401
+ const U32 epochBegin = (U32)(epoch * epochs.size);
402
+ const U32 epochEnd = epochBegin + epochs.size;
403
403
  size_t segmentSize;
404
404
  /* Select a segment */
405
405
  COVER_segment_t segment = FASTCOVER_selectSegment(
406
406
  ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
407
407
 
408
- /* If the segment covers no dmers, then we are out of content */
408
+ /* If the segment covers no dmers, then we are out of content.
409
+ * There may be new content in other epochs, for continue for some time.
410
+ */
409
411
  if (segment.score == 0) {
410
- break;
412
+ if (++zeroScoreRun >= maxZeroScoreRun) {
413
+ break;
414
+ }
415
+ continue;
411
416
  }
417
+ zeroScoreRun = 0;
412
418
 
413
419
  /* Trim the segment if necessary and if it is too small then we are done */
414
420
  segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
@@ -564,6 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
564
570
  DISPLAYLEVEL(1, "Failed to initialize context\n");
565
571
  return ERROR(GENERIC);
566
572
  }
573
+ COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
567
574
  /* Build the dictionary */
568
575
  DISPLAYLEVEL(2, "Building dictionary\n");
569
576
  {
@@ -616,6 +623,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
616
623
  unsigned k;
617
624
  COVER_best_t best;
618
625
  POOL_ctx *pool = NULL;
626
+ int warned = 0;
619
627
  /* Checks */
620
628
  if (splitPoint <= 0 || splitPoint > 1) {
621
629
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
@@ -664,6 +672,10 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
664
672
  POOL_free(pool);
665
673
  return ERROR(GENERIC);
666
674
  }
675
+ if (!warned) {
676
+ COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
677
+ warned = 1;
678
+ }
667
679
  /* Loop through k reusing the same context */
668
680
  for (k = kMinK; k <= kMaxK; k += kStepSize) {
669
681
  /* Prepare the arguments */
@@ -46,7 +46,12 @@ extern "C" {
46
46
  * The resulting dictionary will be saved into `dictBuffer`.
47
47
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
48
48
  * or an error code, which can be tested with ZDICT_isError().
49
- * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
49
+ * Note: Dictionary training will fail if there are not enough samples to construct a
50
+ * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
51
+ * If dictionary training fails, you should use zstd without a dictionary, as the dictionary
52
+ * would've been ineffective anyways. If you believe your samples would benefit from a dictionary
53
+ * please open an issue with details, and we can look into it.
54
+ * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
50
55
  * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
51
56
  * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
52
57
  * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
@@ -110,6 +115,7 @@ typedef struct {
110
115
  * The resulting dictionary will be saved into `dictBuffer`.
111
116
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
112
117
  * or an error code, which can be tested with ZDICT_isError().
118
+ * See ZDICT_trainFromBuffer() for details on failure modes.
113
119
  * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
114
120
  * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
115
121
  * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
@@ -133,8 +139,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
133
139
  * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
134
140
  *
135
141
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
136
- * or an error code, which can be tested with ZDICT_isError().
137
- * On success `*parameters` contains the parameters selected.
142
+ * or an error code, which can be tested with ZDICT_isError().
143
+ * On success `*parameters` contains the parameters selected.
144
+ * See ZDICT_trainFromBuffer() for details on failure modes.
138
145
  * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
139
146
  */
140
147
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
@@ -151,7 +158,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
151
158
  * The resulting dictionary will be saved into `dictBuffer`.
152
159
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
153
160
  * or an error code, which can be tested with ZDICT_isError().
154
- * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory .
161
+ * See ZDICT_trainFromBuffer() for details on failure modes.
162
+ * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
155
163
  * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
156
164
  * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
157
165
  * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
@@ -175,9 +183,10 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
175
183
  * If accel is zero, default value of 1 is used.
176
184
  *
177
185
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
178
- * or an error code, which can be tested with ZDICT_isError().
179
- * On success `*parameters` contains the parameters selected.
180
- * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread.
186
+ * or an error code, which can be tested with ZDICT_isError().
187
+ * On success `*parameters` contains the parameters selected.
188
+ * See ZDICT_trainFromBuffer() for details on failure modes.
189
+ * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
181
190
  */
182
191
  ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
183
192
  size_t dictBufferCapacity, const void* samplesBuffer,
@@ -195,7 +204,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
195
204
  * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
196
205
  *
197
206
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
198
- * or an error code, which can be tested by ZDICT_isError().
207
+ * or an error code, which can be tested by ZDICT_isError().
199
208
  * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
200
209
  * Note 2: dictBuffer and dictContent can overlap
201
210
  */
@@ -219,6 +228,7 @@ typedef struct {
219
228
  * `parameters` is optional and can be provided with values set to 0 to mean "default".
220
229
  * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
221
230
  * or an error code, which can be tested with ZDICT_isError().
231
+ * See ZDICT_trainFromBuffer() for details on failure modes.
222
232
  * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
223
233
  * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
224
234
  * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
@@ -6,14 +6,15 @@ COPY programs\datagen.h bin\example\
6
6
  COPY programs\util.h bin\example\
7
7
  COPY programs\platform.h bin\example\
8
8
  COPY lib\common\mem.h bin\example\
9
- COPY lib\common\zstd_errors.h bin\example\
10
9
  COPY lib\common\zstd_internal.h bin\example\
11
10
  COPY lib\common\error_private.h bin\example\
12
11
  COPY lib\common\xxhash.h bin\example\
13
- COPY lib\zstd.h bin\include\
14
12
  COPY lib\libzstd.a bin\static\libzstd_static.lib
15
13
  COPY lib\dll\libzstd.* bin\dll\
16
14
  COPY lib\dll\example\Makefile bin\example\
17
15
  COPY lib\dll\example\fullbench-dll.* bin\example\
18
16
  COPY lib\dll\example\README.md bin\
17
+ COPY lib\zstd.h bin\include\
18
+ COPY lib\common\zstd_errors.h bin\include\
19
+ COPY lib\dictBuilder\zdict.h bin\include\
19
20
  COPY programs\zstd.exe bin\zstd.exe
@@ -20,7 +20,7 @@ extern "C" {
20
20
  ***************************************/
21
21
  #include "mem.h" /* MEM_STATIC */
22
22
  #include "error_private.h" /* ERROR */
23
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer */
23
+ #include "zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */
24
24
 
25
25
  #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
26
26
  # undef ZSTD_LEGACY_SUPPORT
@@ -178,43 +178,73 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
178
178
  }
179
179
  }
180
180
 
181
- MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
182
- size_t compressedSize)
181
+ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
183
182
  {
184
- U32 const version = ZSTD_isLegacy(src, compressedSize);
183
+ ZSTD_frameSizeInfo frameSizeInfo;
184
+ U32 const version = ZSTD_isLegacy(src, srcSize);
185
185
  switch(version)
186
186
  {
187
187
  #if (ZSTD_LEGACY_SUPPORT <= 1)
188
188
  case 1 :
189
- return ZSTDv01_findFrameCompressedSize(src, compressedSize);
189
+ ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
190
+ &frameSizeInfo.compressedSize,
191
+ &frameSizeInfo.decompressedBound);
192
+ break;
190
193
  #endif
191
194
  #if (ZSTD_LEGACY_SUPPORT <= 2)
192
195
  case 2 :
193
- return ZSTDv02_findFrameCompressedSize(src, compressedSize);
196
+ ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
197
+ &frameSizeInfo.compressedSize,
198
+ &frameSizeInfo.decompressedBound);
199
+ break;
194
200
  #endif
195
201
  #if (ZSTD_LEGACY_SUPPORT <= 3)
196
202
  case 3 :
197
- return ZSTDv03_findFrameCompressedSize(src, compressedSize);
203
+ ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
204
+ &frameSizeInfo.compressedSize,
205
+ &frameSizeInfo.decompressedBound);
206
+ break;
198
207
  #endif
199
208
  #if (ZSTD_LEGACY_SUPPORT <= 4)
200
209
  case 4 :
201
- return ZSTDv04_findFrameCompressedSize(src, compressedSize);
210
+ ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
211
+ &frameSizeInfo.compressedSize,
212
+ &frameSizeInfo.decompressedBound);
213
+ break;
202
214
  #endif
203
215
  #if (ZSTD_LEGACY_SUPPORT <= 5)
204
216
  case 5 :
205
- return ZSTDv05_findFrameCompressedSize(src, compressedSize);
217
+ ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
218
+ &frameSizeInfo.compressedSize,
219
+ &frameSizeInfo.decompressedBound);
220
+ break;
206
221
  #endif
207
222
  #if (ZSTD_LEGACY_SUPPORT <= 6)
208
223
  case 6 :
209
- return ZSTDv06_findFrameCompressedSize(src, compressedSize);
224
+ ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
225
+ &frameSizeInfo.compressedSize,
226
+ &frameSizeInfo.decompressedBound);
227
+ break;
210
228
  #endif
211
229
  #if (ZSTD_LEGACY_SUPPORT <= 7)
212
230
  case 7 :
213
- return ZSTDv07_findFrameCompressedSize(src, compressedSize);
231
+ ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
232
+ &frameSizeInfo.compressedSize,
233
+ &frameSizeInfo.decompressedBound);
234
+ break;
214
235
  #endif
215
236
  default :
216
- return ERROR(prefix_unknown);
237
+ frameSizeInfo.compressedSize = ERROR(prefix_unknown);
238
+ frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
239
+ break;
217
240
  }
241
+ return frameSizeInfo;
242
+ }
243
+
244
+ MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
245
+ {
246
+ ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
247
+ return frameSizeInfo.compressedSize;
218
248
  }
219
249
 
220
250
  MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
@@ -1336,6 +1336,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header
1336
1336
  #define LITERAL_NOENTROPY 63
1337
1337
  #define COMMAND_NOENTROPY 7 /* to remove */
1338
1338
 
1339
+ #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
1340
+
1339
1341
  static const size_t ZSTD_blockHeaderSize = 3;
1340
1342
  static const size_t ZSTD_frameHeaderSize = 4;
1341
1343
 
@@ -1757,7 +1759,7 @@ static size_t ZSTD_execSequence(BYTE* op,
1757
1759
  BYTE* const base, BYTE* const oend)
1758
1760
  {
1759
1761
  static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1760
- static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1762
+ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
1761
1763
  const BYTE* const ostart = op;
1762
1764
  const size_t litLength = sequence.litLength;
1763
1765
  BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@@ -1999,36 +2001,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
1999
2001
  return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
2000
2002
  }
2001
2003
 
2002
- size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
2004
+ /* ZSTD_errorFrameSizeInfoLegacy() :
2005
+ assumes `cSize` and `dBound` are _not_ NULL */
2006
+ static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
2007
+ {
2008
+ *cSize = ret;
2009
+ *dBound = ZSTD_CONTENTSIZE_ERROR;
2010
+ }
2011
+
2012
+ void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
2003
2013
  {
2004
2014
  const BYTE* ip = (const BYTE*)src;
2005
2015
  size_t remainingSize = srcSize;
2016
+ size_t nbBlocks = 0;
2006
2017
  U32 magicNumber;
2007
2018
  blockProperties_t blockProperties;
2008
2019
 
2009
2020
  /* Frame Header */
2010
- if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
2021
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
2022
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2023
+ return;
2024
+ }
2011
2025
  magicNumber = ZSTD_readBE32(src);
2012
- if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
2026
+ if (magicNumber != ZSTD_magicNumber) {
2027
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
2028
+ return;
2029
+ }
2013
2030
  ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
2014
2031
 
2015
2032
  /* Loop on each block */
2016
2033
  while (1)
2017
2034
  {
2018
2035
  size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
2019
- if (ZSTDv01_isError(blockSize)) return blockSize;
2036
+ if (ZSTDv01_isError(blockSize)) {
2037
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
2038
+ return;
2039
+ }
2020
2040
 
2021
2041
  ip += ZSTD_blockHeaderSize;
2022
2042
  remainingSize -= ZSTD_blockHeaderSize;
2023
- if (blockSize > remainingSize) return ERROR(srcSize_wrong);
2043
+ if (blockSize > remainingSize) {
2044
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
2045
+ return;
2046
+ }
2024
2047
 
2025
2048
  if (blockSize == 0) break; /* bt_end */
2026
2049
 
2027
2050
  ip += blockSize;
2028
2051
  remainingSize -= blockSize;
2052
+ nbBlocks++;
2029
2053
  }
2030
2054
 
2031
- return ip - (const BYTE*)src;
2055
+ *cSize = ip - (const BYTE*)src;
2056
+ *dBound = nbBlocks * BLOCKSIZE;
2032
2057
  }
2033
2058
 
2034
2059
  /*******************************
@@ -35,13 +35,18 @@ ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
35
35
  size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
36
36
  const void* src, size_t compressedSize);
37
37
 
38
- /**
39
- ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
40
- compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
41
- return : the number of bytes that would be read to decompress this frame
42
- or an errorCode if it fails (which can be tested using ZSTDv01_isError())
43
- */
44
- size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
38
+ /**
39
+ ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
40
+ srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
41
+ cSize (output parameter) : the number of bytes that would be read to decompress this frame
42
+ or an error code if it fails (which can be tested using ZSTDv01_isError())
43
+ dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
44
+ or ZSTD_CONTENTSIZE_ERROR if an error occurs
45
+
46
+ note : assumes `cSize` and `dBound` are _not_ NULL.
47
+ */
48
+ void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
49
+ size_t* cSize, unsigned long long* dBound);
45
50
 
46
51
  /**
47
52
  ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
@@ -2728,6 +2728,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
2728
2728
  #define LITERAL_NOENTROPY 63
2729
2729
  #define COMMAND_NOENTROPY 7 /* to remove */
2730
2730
 
2731
+ #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
2732
+
2731
2733
  static const size_t ZSTD_blockHeaderSize = 3;
2732
2734
  static const size_t ZSTD_frameHeaderSize = 4;
2733
2735
 
@@ -3096,7 +3098,7 @@ static size_t ZSTD_execSequence(BYTE* op,
3096
3098
  BYTE* const base, BYTE* const oend)
3097
3099
  {
3098
3100
  static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
3099
- static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
3101
+ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
3100
3102
  const BYTE* const ostart = op;
3101
3103
  BYTE* const oLitEnd = op + sequence.litLength;
3102
3104
  BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
@@ -3312,37 +3314,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
3312
3314
  return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
3313
3315
  }
3314
3316
 
3315
- static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
3317
+ /* ZSTD_errorFrameSizeInfoLegacy() :
3318
+ assumes `cSize` and `dBound` are _not_ NULL */
3319
+ static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
3316
3320
  {
3321
+ *cSize = ret;
3322
+ *dBound = ZSTD_CONTENTSIZE_ERROR;
3323
+ }
3317
3324
 
3325
+ void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
3326
+ {
3318
3327
  const BYTE* ip = (const BYTE*)src;
3319
3328
  size_t remainingSize = srcSize;
3329
+ size_t nbBlocks = 0;
3320
3330
  U32 magicNumber;
3321
3331
  blockProperties_t blockProperties;
3322
3332
 
3323
3333
  /* Frame Header */
3324
- if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
3334
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
3335
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
3336
+ return;
3337
+ }
3325
3338
  magicNumber = MEM_readLE32(src);
3326
- if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
3339
+ if (magicNumber != ZSTD_magicNumber) {
3340
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
3341
+ return;
3342
+ }
3327
3343
  ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
3328
3344
 
3329
3345
  /* Loop on each block */
3330
3346
  while (1)
3331
3347
  {
3332
3348
  size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
3333
- if (ZSTD_isError(cBlockSize)) return cBlockSize;
3349
+ if (ZSTD_isError(cBlockSize)) {
3350
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
3351
+ return;
3352
+ }
3334
3353
 
3335
3354
  ip += ZSTD_blockHeaderSize;
3336
3355
  remainingSize -= ZSTD_blockHeaderSize;
3337
- if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
3356
+ if (cBlockSize > remainingSize) {
3357
+ ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
3358
+ return;
3359
+ }
3338
3360
 
3339
3361
  if (cBlockSize == 0) break; /* bt_end */
3340
3362
 
3341
3363
  ip += cBlockSize;
3342
3364
  remainingSize -= cBlockSize;
3365
+ nbBlocks++;
3343
3366
  }
3344
3367
 
3345
- return ip - (const BYTE*)src;
3368
+ *cSize = ip - (const BYTE*)src;
3369
+ *dBound = nbBlocks * BLOCKSIZE;
3346
3370
  }
3347
3371
 
3348
3372
  /*******************************
@@ -3458,11 +3482,6 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
3458
3482
  return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
3459
3483
  }
3460
3484
 
3461
- size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
3462
- {
3463
- return ZSTD_findFrameCompressedSize(src, compressedSize);
3464
- }
3465
-
3466
3485
  ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
3467
3486
  {
3468
3487
  return (ZSTDv02_Dctx*)ZSTD_createDCtx();