extzstd 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/README.md +5 -5
  4. data/contrib/zstd/CONTRIBUTING.md +42 -0
  5. data/contrib/zstd/LICENSE-examples +11 -0
  6. data/contrib/zstd/Makefile +315 -0
  7. data/contrib/zstd/NEWS +261 -0
  8. data/contrib/zstd/PATENTS +33 -0
  9. data/contrib/zstd/README.md +121 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +178 -0
  12. data/contrib/zstd/circle.yml +75 -0
  13. data/contrib/zstd/lib/BUCK +186 -0
  14. data/contrib/zstd/lib/Makefile +163 -0
  15. data/contrib/zstd/lib/README.md +77 -0
  16. data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
  17. data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
  18. data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
  19. data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
  20. data/contrib/zstd/{common → lib/common}/fse.h +94 -34
  21. data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
  22. data/contrib/zstd/{common → lib/common}/huf.h +52 -20
  23. data/contrib/zstd/{common → lib/common}/mem.h +17 -13
  24. data/contrib/zstd/lib/common/pool.c +194 -0
  25. data/contrib/zstd/lib/common/pool.h +56 -0
  26. data/contrib/zstd/lib/common/threading.c +80 -0
  27. data/contrib/zstd/lib/common/threading.h +104 -0
  28. data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
  29. data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
  30. data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
  31. data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
  32. data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
  33. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
  34. data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
  35. data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
  36. data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
  37. data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
  38. data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
  39. data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
  40. data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
  41. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
  42. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  43. data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
  44. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
  45. data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
  46. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
  47. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  48. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
  49. data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
  50. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
  51. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
  52. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
  53. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
  54. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
  55. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
  56. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
  57. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
  58. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
  59. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
  60. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
  61. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
  62. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
  63. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
  64. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
  65. data/contrib/zstd/lib/libzstd.pc.in +14 -0
  66. data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
  67. data/ext/depend +2 -0
  68. data/ext/extconf.rb +4 -4
  69. data/ext/extzstd.c +1 -1
  70. data/ext/zstd_common.c +5 -5
  71. data/ext/zstd_compress.c +3 -3
  72. data/ext/zstd_decompress.c +2 -2
  73. data/ext/zstd_dictbuilder.c +2 -2
  74. data/ext/zstd_legacy_v01.c +1 -1
  75. data/ext/zstd_legacy_v02.c +1 -1
  76. data/ext/zstd_legacy_v03.c +1 -1
  77. data/ext/zstd_legacy_v04.c +1 -1
  78. data/ext/zstd_legacy_v05.c +1 -1
  79. data/ext/zstd_legacy_v06.c +1 -1
  80. data/ext/zstd_legacy_v07.c +1 -1
  81. data/gemstub.rb +9 -5
  82. data/lib/extzstd/version.rb +1 -1
  83. metadata +73 -51
  84. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  85. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  86. data/contrib/zstd/dictBuilder/zdict.h +0 -111
@@ -509,7 +509,7 @@ static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
509
509
 
510
510
  /*! ZSTDv05_wildcopy() :
511
511
  * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
512
- MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, size_t length)
512
+ MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, ptrdiff_t length)
513
513
  {
514
514
  const BYTE* ip = (const BYTE*)src;
515
515
  BYTE* op = (BYTE*)dst;
@@ -2731,7 +2731,6 @@ struct ZSTDv05_DCtx_s
2731
2731
  ZSTDv05_dStage stage;
2732
2732
  U32 flagStaticTables;
2733
2733
  const BYTE* litPtr;
2734
- size_t litBufSize;
2735
2734
  size_t litSize;
2736
2735
  BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
2737
2736
  BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
@@ -2978,8 +2977,8 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
2978
2977
  return ERROR(corruption_detected);
2979
2978
 
2980
2979
  dctx->litPtr = dctx->litBuffer;
2981
- dctx->litBufSize = BLOCKSIZE+8;
2982
2980
  dctx->litSize = litSize;
2981
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
2983
2982
  return litCSize + lhSize;
2984
2983
  }
2985
2984
  case IS_PCH:
@@ -2996,14 +2995,14 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
2996
2995
  lhSize=3;
2997
2996
  litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
2998
2997
  litCSize = ((istart[1] & 3) << 8) + istart[2];
2999
- if (litCSize + litSize > srcSize) return ERROR(corruption_detected);
2998
+ if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
3000
2999
 
3001
3000
  errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
3002
3001
  if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
3003
3002
 
3004
3003
  dctx->litPtr = dctx->litBuffer;
3005
- dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
3006
3004
  dctx->litSize = litSize;
3005
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3007
3006
  return litCSize + lhSize;
3008
3007
  }
3009
3008
  case IS_RAW:
@@ -3028,13 +3027,12 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
3028
3027
  if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
3029
3028
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
3030
3029
  dctx->litPtr = dctx->litBuffer;
3031
- dctx->litBufSize = BLOCKSIZE+8;
3032
3030
  dctx->litSize = litSize;
3031
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3033
3032
  return lhSize+litSize;
3034
3033
  }
3035
3034
  /* direct reference into compressed stream */
3036
3035
  dctx->litPtr = istart+lhSize;
3037
- dctx->litBufSize = srcSize-lhSize;
3038
3036
  dctx->litSize = litSize;
3039
3037
  return lhSize+litSize;
3040
3038
  }
@@ -3057,9 +3055,8 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
3057
3055
  break;
3058
3056
  }
3059
3057
  if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
3060
- memset(dctx->litBuffer, istart[lhSize], litSize);
3058
+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
3061
3059
  dctx->litPtr = dctx->litBuffer;
3062
- dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
3063
3060
  dctx->litSize = litSize;
3064
3061
  return lhSize+1;
3065
3062
  }
@@ -3233,7 +3230,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3233
3230
  if (litLength&1) litLength>>=1, dumps += 3;
3234
3231
  else litLength = (U16)(litLength)>>1, dumps += 2;
3235
3232
  }
3236
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3233
+ if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
3234
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3237
3235
  }
3238
3236
 
3239
3237
  /* Offset */
@@ -3266,7 +3264,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3266
3264
  if (matchLength&1) matchLength>>=1, dumps += 3;
3267
3265
  else matchLength = (U16)(matchLength)>>1, dumps += 2;
3268
3266
  }
3269
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3267
+ if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
3268
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3270
3269
  }
3271
3270
  matchLength += MINMATCH;
3272
3271
 
@@ -3289,7 +3288,7 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3289
3288
 
3290
3289
  static size_t ZSTDv05_execSequence(BYTE* op,
3291
3290
  BYTE* const oend, seq_t sequence,
3292
- const BYTE** litPtr, const BYTE* const litLimit_8,
3291
+ const BYTE** litPtr, const BYTE* const litLimit,
3293
3292
  const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
3294
3293
  {
3295
3294
  static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
@@ -3304,7 +3303,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3304
3303
  /* check */
3305
3304
  if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
3306
3305
  if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
3307
- if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
3306
+ if (litEnd > litLimit) return ERROR(corruption_detected); /* risk read beyond lit buffer */
3308
3307
 
3309
3308
  /* copy Literals */
3310
3309
  ZSTDv05_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
@@ -3328,7 +3327,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3328
3327
  op = oLitEnd + length1;
3329
3328
  sequence.matchLength -= length1;
3330
3329
  match = base;
3331
- if (op > oend_8) {
3330
+ if (op > oend_8 || sequence.matchLength < MINMATCH) {
3332
3331
  while (op < oMatchEnd) *op++ = *match++;
3333
3332
  return sequenceLength;
3334
3333
  }
@@ -3351,7 +3350,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3351
3350
  }
3352
3351
  op += 8; match += 8;
3353
3352
 
3354
- if (oMatchEnd > oend-12) {
3353
+ if (oMatchEnd > oend-(16-MINMATCH)) {
3355
3354
  if (op < oend_8) {
3356
3355
  ZSTDv05_wildcopy(op, match, oend_8 - op);
3357
3356
  match += oend_8 - op;
@@ -3360,7 +3359,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3360
3359
  while (op < oMatchEnd)
3361
3360
  *op++ = *match++;
3362
3361
  } else {
3363
- ZSTDv05_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
3362
+ ZSTDv05_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
3364
3363
  }
3365
3364
  return sequenceLength;
3366
3365
  }
@@ -3378,7 +3377,6 @@ static size_t ZSTDv05_decompressSequences(
3378
3377
  BYTE* const oend = ostart + maxDstSize;
3379
3378
  size_t errorCode, dumpsLength;
3380
3379
  const BYTE* litPtr = dctx->litPtr;
3381
- const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
3382
3380
  const BYTE* const litEnd = litPtr + dctx->litSize;
3383
3381
  int nbSeq;
3384
3382
  const BYTE* dumps;
@@ -3416,7 +3414,7 @@ static size_t ZSTDv05_decompressSequences(
3416
3414
  size_t oneSeqSize;
3417
3415
  nbSeq--;
3418
3416
  ZSTDv05_decodeSequence(&sequence, &seqState);
3419
- oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
3417
+ oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
3420
3418
  if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize;
3421
3419
  op += oneSeqSize;
3422
3420
  }
@@ -3585,6 +3583,35 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
3585
3583
  #endif
3586
3584
  }
3587
3585
 
3586
+ size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
3587
+ {
3588
+ const BYTE* ip = (const BYTE*)src;
3589
+ size_t remainingSize = srcSize;
3590
+ blockProperties_t blockProperties;
3591
+
3592
+ /* Frame Header */
3593
+ if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
3594
+ if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
3595
+ ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
3596
+
3597
+ /* Loop on each block */
3598
+ while (1)
3599
+ {
3600
+ size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
3601
+ if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
3602
+
3603
+ ip += ZSTDv05_blockHeaderSize;
3604
+ remainingSize -= ZSTDv05_blockHeaderSize;
3605
+ if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
3606
+
3607
+ if (cBlockSize == 0) break; /* bt_end */
3608
+
3609
+ ip += cBlockSize;
3610
+ remainingSize -= cBlockSize;
3611
+ }
3612
+
3613
+ return ip - (const BYTE*)src;
3614
+ }
3588
3615
 
3589
3616
  /* ******************************
3590
3617
  * Streaming Decompression API
@@ -32,6 +32,13 @@ extern "C" {
32
32
  size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
33
33
  const void* src, size_t compressedSize);
34
34
 
35
+ /**
36
+ ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
37
+ compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
38
+ return : the number of bytes that would be read to decompress this frame
39
+ or an errorCode if it fails (which can be tested using ZSTDv05_isError())
40
+ */
41
+ size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
35
42
 
36
43
  /* *************************************
37
44
  * Helper functions
@@ -537,7 +537,7 @@ static void ZSTDv06_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
537
537
  /*! ZSTDv06_wildcopy() :
538
538
  * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
539
539
  #define WILDCOPY_OVERLENGTH 8
540
- MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, size_t length)
540
+ MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, ptrdiff_t length)
541
541
  {
542
542
  const BYTE* ip = (const BYTE*)src;
543
543
  BYTE* op = (BYTE*)dst;
@@ -2893,7 +2893,6 @@ struct ZSTDv06_DCtx_s
2893
2893
  ZSTDv06_dStage stage;
2894
2894
  U32 flagRepeatTable;
2895
2895
  const BYTE* litPtr;
2896
- size_t litBufSize;
2897
2896
  size_t litSize;
2898
2897
  BYTE litBuffer[ZSTDv06_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
2899
2898
  BYTE headerBuffer[ZSTDv06_FRAMEHEADERSIZE_MAX];
@@ -3170,8 +3169,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3170
3169
  return ERROR(corruption_detected);
3171
3170
 
3172
3171
  dctx->litPtr = dctx->litBuffer;
3173
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
3174
3172
  dctx->litSize = litSize;
3173
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3175
3174
  return litCSize + lhSize;
3176
3175
  }
3177
3176
  case IS_PCH:
@@ -3186,14 +3185,14 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3186
3185
  lhSize=3;
3187
3186
  litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
3188
3187
  litCSize = ((istart[1] & 3) << 8) + istart[2];
3189
- if (litCSize + litSize > srcSize) return ERROR(corruption_detected);
3188
+ if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
3190
3189
 
3191
3190
  { size_t const errorCode = HUFv06_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
3192
3191
  if (HUFv06_isError(errorCode)) return ERROR(corruption_detected);
3193
3192
  }
3194
3193
  dctx->litPtr = dctx->litBuffer;
3195
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
3196
3194
  dctx->litSize = litSize;
3195
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3197
3196
  return litCSize + lhSize;
3198
3197
  }
3199
3198
  case IS_RAW:
@@ -3217,13 +3216,12 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3217
3216
  if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
3218
3217
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
3219
3218
  dctx->litPtr = dctx->litBuffer;
3220
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
3221
3219
  dctx->litSize = litSize;
3220
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3222
3221
  return lhSize+litSize;
3223
3222
  }
3224
3223
  /* direct reference into compressed stream */
3225
3224
  dctx->litPtr = istart+lhSize;
3226
- dctx->litBufSize = srcSize-lhSize;
3227
3225
  dctx->litSize = litSize;
3228
3226
  return lhSize+litSize;
3229
3227
  }
@@ -3245,9 +3243,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3245
3243
  break;
3246
3244
  }
3247
3245
  if (litSize > ZSTDv06_BLOCKSIZE_MAX) return ERROR(corruption_detected);
3248
- memset(dctx->litBuffer, istart[lhSize], litSize);
3246
+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
3249
3247
  dctx->litPtr = dctx->litBuffer;
3250
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
3251
3248
  dctx->litSize = litSize;
3252
3249
  return lhSize+1;
3253
3250
  }
@@ -3438,7 +3435,7 @@ static void ZSTDv06_decodeSequence(seq_t* seq, seqState_t* seqState)
3438
3435
 
3439
3436
  size_t ZSTDv06_execSequence(BYTE* op,
3440
3437
  BYTE* const oend, seq_t sequence,
3441
- const BYTE** litPtr, const BYTE* const litLimit_8,
3438
+ const BYTE** litPtr, const BYTE* const litLimit,
3442
3439
  const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
3443
3440
  {
3444
3441
  BYTE* const oLitEnd = op + sequence.litLength;
@@ -3451,7 +3448,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
3451
3448
  /* check */
3452
3449
  if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
3453
3450
  if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
3454
- if (iLitEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3451
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3455
3452
 
3456
3453
  /* copy Literals */
3457
3454
  ZSTDv06_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
@@ -3473,7 +3470,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
3473
3470
  op = oLitEnd + length1;
3474
3471
  sequence.matchLength -= length1;
3475
3472
  match = base;
3476
- if (op > oend_8) {
3473
+ if (op > oend_8 || sequence.matchLength < MINMATCH) {
3477
3474
  while (op < oMatchEnd) *op++ = *match++;
3478
3475
  return sequenceLength;
3479
3476
  }
@@ -3506,7 +3503,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
3506
3503
  }
3507
3504
  while (op < oMatchEnd) *op++ = *match++;
3508
3505
  } else {
3509
- ZSTDv06_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
3506
+ ZSTDv06_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
3510
3507
  }
3511
3508
  return sequenceLength;
3512
3509
  }
@@ -3523,7 +3520,6 @@ static size_t ZSTDv06_decompressSequences(
3523
3520
  BYTE* const oend = ostart + maxDstSize;
3524
3521
  BYTE* op = ostart;
3525
3522
  const BYTE* litPtr = dctx->litPtr;
3526
- const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
3527
3523
  const BYTE* const litEnd = litPtr + dctx->litSize;
3528
3524
  FSEv06_DTable* DTableLL = dctx->LLTable;
3529
3525
  FSEv06_DTable* DTableML = dctx->MLTable;
@@ -3567,7 +3563,7 @@ static size_t ZSTDv06_decompressSequences(
3567
3563
  pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
3568
3564
  #endif
3569
3565
 
3570
- { size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
3566
+ { size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
3571
3567
  if (ZSTDv06_isError(oneSeqSize)) return oneSeqSize;
3572
3568
  op += oneSeqSize;
3573
3569
  } }
@@ -3733,6 +3729,37 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
3733
3729
  #endif
3734
3730
  }
3735
3731
 
3732
+ size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
3733
+ {
3734
+ const BYTE* ip = (const BYTE*)src;
3735
+ size_t remainingSize = srcSize;
3736
+ blockProperties_t blockProperties = { bt_compressed, 0 };
3737
+
3738
+ /* Frame Header */
3739
+ { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
3740
+ if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
3741
+ if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
3742
+ if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
3743
+ ip += frameHeaderSize; remainingSize -= frameHeaderSize;
3744
+ }
3745
+
3746
+ /* Loop on each block */
3747
+ while (1) {
3748
+ size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
3749
+ if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
3750
+
3751
+ ip += ZSTDv06_blockHeaderSize;
3752
+ remainingSize -= ZSTDv06_blockHeaderSize;
3753
+ if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
3754
+
3755
+ if (cBlockSize == 0) break; /* bt_end */
3756
+
3757
+ ip += cBlockSize;
3758
+ remainingSize -= cBlockSize;
3759
+ }
3760
+
3761
+ return ip - (const BYTE*)src;
3762
+ }
3736
3763
 
3737
3764
  /*_******************************
3738
3765
  * Streaming Decompression API
@@ -4081,7 +4108,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
4081
4108
  zbd->inBuff = (char*)malloc(blockSize);
4082
4109
  if (zbd->inBuff == NULL) return ERROR(memory_allocation);
4083
4110
  }
4084
- { size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
4111
+ { size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize + WILDCOPY_OVERLENGTH * 2;
4085
4112
  if (zbd->outBuffSize < neededOutSize) {
4086
4113
  free(zbd->outBuff);
4087
4114
  zbd->outBuffSize = neededOutSize;
@@ -41,6 +41,13 @@ extern "C" {
41
41
  ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
42
42
  const void* src, size_t compressedSize);
43
43
 
44
+ /**
45
+ ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
46
+ compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
47
+ return : the number of bytes that would be read to decompress this frame
48
+ or an errorCode if it fails (which can be tested using ZSTDv06_isError())
49
+ */
50
+ size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
44
51
 
45
52
  /* *************************************
46
53
  * Helper functions
@@ -13,12 +13,14 @@
13
13
  #include <string.h> /* memcpy */
14
14
  #include <stdlib.h> /* malloc, free, qsort */
15
15
 
16
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17
- #include "xxhash.h" /* XXH64_* */
16
+ #ifndef XXH_STATIC_LINKING_ONLY
17
+ # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
18
+ #endif
19
+ #include "xxhash.h" /* XXH64_* */
18
20
  #include "zstd_v07.h"
19
21
 
20
- #define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
21
- #define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
22
+ #define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
23
+ #define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
22
24
  #define ZSTDv07_STATIC_LINKING_ONLY
23
25
 
24
26
  #include "error_private.h"
@@ -2845,7 +2847,7 @@ static void ZSTDv07_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
2845
2847
  /*! ZSTDv07_wildcopy() :
2846
2848
  * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
2847
2849
  #define WILDCOPY_OVERLENGTH 8
2848
- MEM_STATIC void ZSTDv07_wildcopy(void* dst, const void* src, size_t length)
2850
+ MEM_STATIC void ZSTDv07_wildcopy(void* dst, const void* src, ptrdiff_t length)
2849
2851
  {
2850
2852
  const BYTE* ip = (const BYTE*)src;
2851
2853
  BYTE* op = (BYTE*)dst;
@@ -3021,7 +3023,6 @@ struct ZSTDv07_DCtx_s
3021
3023
  U32 dictID;
3022
3024
  const BYTE* litPtr;
3023
3025
  ZSTDv07_customMem customMem;
3024
- size_t litBufSize;
3025
3026
  size_t litSize;
3026
3027
  BYTE litBuffer[ZSTDv07_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
3027
3028
  BYTE headerBuffer[ZSTDv07_FRAMEHEADERSIZE_MAX];
@@ -3395,9 +3396,9 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3395
3396
  return ERROR(corruption_detected);
3396
3397
 
3397
3398
  dctx->litPtr = dctx->litBuffer;
3398
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+8;
3399
3399
  dctx->litSize = litSize;
3400
3400
  dctx->litEntropy = 1;
3401
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3401
3402
  return litCSize + lhSize;
3402
3403
  }
3403
3404
  case lbt_repeat:
@@ -3418,8 +3419,8 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3418
3419
  if (HUFv07_isError(errorCode)) return ERROR(corruption_detected);
3419
3420
  }
3420
3421
  dctx->litPtr = dctx->litBuffer;
3421
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
3422
3422
  dctx->litSize = litSize;
3423
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3423
3424
  return litCSize + lhSize;
3424
3425
  }
3425
3426
  case lbt_raw:
@@ -3443,13 +3444,12 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3443
3444
  if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
3444
3445
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
3445
3446
  dctx->litPtr = dctx->litBuffer;
3446
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+8;
3447
3447
  dctx->litSize = litSize;
3448
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3448
3449
  return lhSize+litSize;
3449
3450
  }
3450
3451
  /* direct reference into compressed stream */
3451
3452
  dctx->litPtr = istart+lhSize;
3452
- dctx->litBufSize = srcSize-lhSize;
3453
3453
  dctx->litSize = litSize;
3454
3454
  return lhSize+litSize;
3455
3455
  }
@@ -3471,9 +3471,8 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3471
3471
  break;
3472
3472
  }
3473
3473
  if (litSize > ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
3474
- memset(dctx->litBuffer, istart[lhSize], litSize);
3474
+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
3475
3475
  dctx->litPtr = dctx->litBuffer;
3476
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
3477
3476
  dctx->litSize = litSize;
3478
3477
  return lhSize+1;
3479
3478
  }
@@ -3662,7 +3661,7 @@ static seq_t ZSTDv07_decodeSequence(seqState_t* seqState)
3662
3661
  static
3663
3662
  size_t ZSTDv07_execSequence(BYTE* op,
3664
3663
  BYTE* const oend, seq_t sequence,
3665
- const BYTE** litPtr, const BYTE* const litLimit_w,
3664
+ const BYTE** litPtr, const BYTE* const litLimit,
3666
3665
  const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
3667
3666
  {
3668
3667
  BYTE* const oLitEnd = op + sequence.litLength;
@@ -3674,7 +3673,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
3674
3673
 
3675
3674
  /* check */
3676
3675
  if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
3677
- if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3676
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3678
3677
 
3679
3678
  /* copy Literals */
3680
3679
  ZSTDv07_wildcopy(op, *litPtr, sequence.litLength); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
@@ -3696,7 +3695,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
3696
3695
  op = oLitEnd + length1;
3697
3696
  sequence.matchLength -= length1;
3698
3697
  match = base;
3699
- if (op > oend_w) {
3698
+ if (op > oend_w || sequence.matchLength < MINMATCH) {
3700
3699
  while (op < oMatchEnd) *op++ = *match++;
3701
3700
  return sequenceLength;
3702
3701
  }
@@ -3729,7 +3728,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
3729
3728
  }
3730
3729
  while (op < oMatchEnd) *op++ = *match++;
3731
3730
  } else {
3732
- ZSTDv07_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
3731
+ ZSTDv07_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
3733
3732
  }
3734
3733
  return sequenceLength;
3735
3734
  }
@@ -3746,7 +3745,6 @@ static size_t ZSTDv07_decompressSequences(
3746
3745
  BYTE* const oend = ostart + maxDstSize;
3747
3746
  BYTE* op = ostart;
3748
3747
  const BYTE* litPtr = dctx->litPtr;
3749
- const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH;
3750
3748
  const BYTE* const litEnd = litPtr + dctx->litSize;
3751
3749
  FSEv07_DTable* DTableLL = dctx->LLTable;
3752
3750
  FSEv07_DTable* DTableML = dctx->MLTable;
@@ -3776,7 +3774,7 @@ static size_t ZSTDv07_decompressSequences(
3776
3774
  for ( ; (BITv07_reloadDStream(&(seqState.DStream)) <= BITv07_DStream_completed) && nbSeq ; ) {
3777
3775
  nbSeq--;
3778
3776
  { seq_t const sequence = ZSTDv07_decodeSequence(&seqState);
3779
- size_t const oneSeqSize = ZSTDv07_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd);
3777
+ size_t const oneSeqSize = ZSTDv07_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
3780
3778
  if (ZSTDv07_isError(oneSeqSize)) return oneSeqSize;
3781
3779
  op += oneSeqSize;
3782
3780
  } }
@@ -3972,6 +3970,41 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
3972
3970
  #endif
3973
3971
  }
3974
3972
 
3973
+ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
3974
+ {
3975
+ const BYTE* ip = (const BYTE*)src;
3976
+ size_t remainingSize = srcSize;
3977
+
3978
+ /* check */
3979
+ if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
3980
+
3981
+ /* Frame Header */
3982
+ { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
3983
+ if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
3984
+ if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
3985
+ if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
3986
+ ip += frameHeaderSize; remainingSize -= frameHeaderSize;
3987
+ }
3988
+
3989
+ /* Loop on each block */
3990
+ while (1) {
3991
+ blockProperties_t blockProperties;
3992
+ size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
3993
+ if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
3994
+
3995
+ ip += ZSTDv07_blockHeaderSize;
3996
+ remainingSize -= ZSTDv07_blockHeaderSize;
3997
+
3998
+ if (blockProperties.blockType == bt_end) break;
3999
+
4000
+ if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
4001
+
4002
+ ip += cBlockSize;
4003
+ remainingSize -= cBlockSize;
4004
+ }
4005
+
4006
+ return ip - (const BYTE*)src;
4007
+ }
3975
4008
 
3976
4009
  /*_******************************
3977
4010
  * Streaming Decompression API
@@ -4138,9 +4171,9 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si
4138
4171
  }
4139
4172
 
4140
4173
  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
4141
- dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4142
- dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4143
- dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4174
+ dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4175
+ dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4176
+ dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4144
4177
  dictPtr += 12;
4145
4178
 
4146
4179
  dctx->litEntropy = dctx->fseEntropy = 1;
@@ -4452,7 +4485,7 @@ size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
4452
4485
  zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
4453
4486
  if (zbd->inBuff == NULL) return ERROR(memory_allocation);
4454
4487
  }
4455
- { size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
4488
+ { size_t const neededOutSize = zbd->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
4456
4489
  if (zbd->outBuffSize < neededOutSize) {
4457
4490
  zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
4458
4491
  zbd->outBuffSize = neededOutSize;
@@ -4505,7 +4538,8 @@ size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
4505
4538
  if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
4506
4539
  zbd->outEnd = zbd->outStart + decodedSize;
4507
4540
  zbd->stage = ZBUFFds_flush;
4508
- // break; /* ZBUFFds_flush follows */
4541
+ /* break; */
4542
+ /* pass-through */
4509
4543
  } }
4510
4544
 
4511
4545
  case ZBUFFds_flush:
@@ -48,6 +48,14 @@ unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
48
48
  ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
49
49
  const void* src, size_t compressedSize);
50
50
 
51
+ /**
52
+ ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
53
+ compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
54
+ return : the number of bytes that would be read to decompress this frame
55
+ or an errorCode if it fails (which can be tested using ZSTDv07_isError())
56
+ */
57
+ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
58
+
51
59
  /*====== Helper functions ======*/
52
60
  ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
53
61
  ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */
@@ -0,0 +1,14 @@
1
+ # ZSTD - standard compression algorithm
2
+ # Copyright (C) 2014-2016, Yann Collet, Facebook
3
+ # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
4
+
5
+ prefix=@PREFIX@
6
+ libdir=@LIBDIR@
7
+ includedir=@INCLUDEDIR@
8
+
9
+ Name: zstd
10
+ Description: fast lossless compression algorithm library
11
+ URL: http://www.zstd.net/
12
+ Version: @VERSION@
13
+ Libs: -L${libdir} -lzstd
14
+ Cflags: -I${includedir}