extzstd 0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/README.md +5 -5
  4. data/contrib/zstd/CONTRIBUTING.md +42 -0
  5. data/contrib/zstd/LICENSE-examples +11 -0
  6. data/contrib/zstd/Makefile +315 -0
  7. data/contrib/zstd/NEWS +261 -0
  8. data/contrib/zstd/PATENTS +33 -0
  9. data/contrib/zstd/README.md +121 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +178 -0
  12. data/contrib/zstd/circle.yml +75 -0
  13. data/contrib/zstd/lib/BUCK +186 -0
  14. data/contrib/zstd/lib/Makefile +163 -0
  15. data/contrib/zstd/lib/README.md +77 -0
  16. data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
  17. data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
  18. data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
  19. data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
  20. data/contrib/zstd/{common → lib/common}/fse.h +94 -34
  21. data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
  22. data/contrib/zstd/{common → lib/common}/huf.h +52 -20
  23. data/contrib/zstd/{common → lib/common}/mem.h +17 -13
  24. data/contrib/zstd/lib/common/pool.c +194 -0
  25. data/contrib/zstd/lib/common/pool.h +56 -0
  26. data/contrib/zstd/lib/common/threading.c +80 -0
  27. data/contrib/zstd/lib/common/threading.h +104 -0
  28. data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
  29. data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
  30. data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
  31. data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
  32. data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
  33. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
  34. data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
  35. data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
  36. data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
  37. data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
  38. data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
  39. data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
  40. data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
  41. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
  42. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  43. data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
  44. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
  45. data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
  46. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
  47. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  48. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
  49. data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
  50. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
  51. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
  52. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
  53. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
  54. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
  55. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
  56. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
  57. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
  58. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
  59. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
  60. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
  61. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
  62. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
  63. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
  64. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
  65. data/contrib/zstd/lib/libzstd.pc.in +14 -0
  66. data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
  67. data/ext/depend +2 -0
  68. data/ext/extconf.rb +4 -4
  69. data/ext/extzstd.c +1 -1
  70. data/ext/zstd_common.c +5 -5
  71. data/ext/zstd_compress.c +3 -3
  72. data/ext/zstd_decompress.c +2 -2
  73. data/ext/zstd_dictbuilder.c +2 -2
  74. data/ext/zstd_legacy_v01.c +1 -1
  75. data/ext/zstd_legacy_v02.c +1 -1
  76. data/ext/zstd_legacy_v03.c +1 -1
  77. data/ext/zstd_legacy_v04.c +1 -1
  78. data/ext/zstd_legacy_v05.c +1 -1
  79. data/ext/zstd_legacy_v06.c +1 -1
  80. data/ext/zstd_legacy_v07.c +1 -1
  81. data/gemstub.rb +9 -5
  82. data/lib/extzstd/version.rb +1 -1
  83. metadata +73 -51
  84. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  85. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  86. data/contrib/zstd/dictBuilder/zdict.h +0 -111
@@ -509,7 +509,7 @@ static void ZSTDv05_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
509
509
 
510
510
  /*! ZSTDv05_wildcopy() :
511
511
  * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
512
- MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, size_t length)
512
+ MEM_STATIC void ZSTDv05_wildcopy(void* dst, const void* src, ptrdiff_t length)
513
513
  {
514
514
  const BYTE* ip = (const BYTE*)src;
515
515
  BYTE* op = (BYTE*)dst;
@@ -2731,7 +2731,6 @@ struct ZSTDv05_DCtx_s
2731
2731
  ZSTDv05_dStage stage;
2732
2732
  U32 flagStaticTables;
2733
2733
  const BYTE* litPtr;
2734
- size_t litBufSize;
2735
2734
  size_t litSize;
2736
2735
  BYTE litBuffer[BLOCKSIZE + WILDCOPY_OVERLENGTH];
2737
2736
  BYTE headerBuffer[ZSTDv05_frameHeaderSize_max];
@@ -2978,8 +2977,8 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
2978
2977
  return ERROR(corruption_detected);
2979
2978
 
2980
2979
  dctx->litPtr = dctx->litBuffer;
2981
- dctx->litBufSize = BLOCKSIZE+8;
2982
2980
  dctx->litSize = litSize;
2981
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
2983
2982
  return litCSize + lhSize;
2984
2983
  }
2985
2984
  case IS_PCH:
@@ -2996,14 +2995,14 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
2996
2995
  lhSize=3;
2997
2996
  litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
2998
2997
  litCSize = ((istart[1] & 3) << 8) + istart[2];
2999
- if (litCSize + litSize > srcSize) return ERROR(corruption_detected);
2998
+ if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
3000
2999
 
3001
3000
  errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
3002
3001
  if (HUFv05_isError(errorCode)) return ERROR(corruption_detected);
3003
3002
 
3004
3003
  dctx->litPtr = dctx->litBuffer;
3005
- dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
3006
3004
  dctx->litSize = litSize;
3005
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3007
3006
  return litCSize + lhSize;
3008
3007
  }
3009
3008
  case IS_RAW:
@@ -3028,13 +3027,12 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
3028
3027
  if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
3029
3028
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
3030
3029
  dctx->litPtr = dctx->litBuffer;
3031
- dctx->litBufSize = BLOCKSIZE+8;
3032
3030
  dctx->litSize = litSize;
3031
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3033
3032
  return lhSize+litSize;
3034
3033
  }
3035
3034
  /* direct reference into compressed stream */
3036
3035
  dctx->litPtr = istart+lhSize;
3037
- dctx->litBufSize = srcSize-lhSize;
3038
3036
  dctx->litSize = litSize;
3039
3037
  return lhSize+litSize;
3040
3038
  }
@@ -3057,9 +3055,8 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
3057
3055
  break;
3058
3056
  }
3059
3057
  if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
3060
- memset(dctx->litBuffer, istart[lhSize], litSize);
3058
+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
3061
3059
  dctx->litPtr = dctx->litBuffer;
3062
- dctx->litBufSize = BLOCKSIZE+WILDCOPY_OVERLENGTH;
3063
3060
  dctx->litSize = litSize;
3064
3061
  return lhSize+1;
3065
3062
  }
@@ -3233,7 +3230,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3233
3230
  if (litLength&1) litLength>>=1, dumps += 3;
3234
3231
  else litLength = (U16)(litLength)>>1, dumps += 2;
3235
3232
  }
3236
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3233
+ if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
3234
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3237
3235
  }
3238
3236
 
3239
3237
  /* Offset */
@@ -3266,7 +3264,8 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3266
3264
  if (matchLength&1) matchLength>>=1, dumps += 3;
3267
3265
  else matchLength = (U16)(matchLength)>>1, dumps += 2;
3268
3266
  }
3269
- if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
3267
+ if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
3268
+ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
3270
3269
  }
3271
3270
  matchLength += MINMATCH;
3272
3271
 
@@ -3289,7 +3288,7 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
3289
3288
 
3290
3289
  static size_t ZSTDv05_execSequence(BYTE* op,
3291
3290
  BYTE* const oend, seq_t sequence,
3292
- const BYTE** litPtr, const BYTE* const litLimit_8,
3291
+ const BYTE** litPtr, const BYTE* const litLimit,
3293
3292
  const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
3294
3293
  {
3295
3294
  static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
@@ -3304,7 +3303,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3304
3303
  /* check */
3305
3304
  if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
3306
3305
  if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
3307
- if (litEnd > litLimit_8) return ERROR(corruption_detected); /* risk read beyond lit buffer */
3306
+ if (litEnd > litLimit) return ERROR(corruption_detected); /* risk read beyond lit buffer */
3308
3307
 
3309
3308
  /* copy Literals */
3310
3309
  ZSTDv05_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
@@ -3328,7 +3327,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3328
3327
  op = oLitEnd + length1;
3329
3328
  sequence.matchLength -= length1;
3330
3329
  match = base;
3331
- if (op > oend_8) {
3330
+ if (op > oend_8 || sequence.matchLength < MINMATCH) {
3332
3331
  while (op < oMatchEnd) *op++ = *match++;
3333
3332
  return sequenceLength;
3334
3333
  }
@@ -3351,7 +3350,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3351
3350
  }
3352
3351
  op += 8; match += 8;
3353
3352
 
3354
- if (oMatchEnd > oend-12) {
3353
+ if (oMatchEnd > oend-(16-MINMATCH)) {
3355
3354
  if (op < oend_8) {
3356
3355
  ZSTDv05_wildcopy(op, match, oend_8 - op);
3357
3356
  match += oend_8 - op;
@@ -3360,7 +3359,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
3360
3359
  while (op < oMatchEnd)
3361
3360
  *op++ = *match++;
3362
3361
  } else {
3363
- ZSTDv05_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
3362
+ ZSTDv05_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
3364
3363
  }
3365
3364
  return sequenceLength;
3366
3365
  }
@@ -3378,7 +3377,6 @@ static size_t ZSTDv05_decompressSequences(
3378
3377
  BYTE* const oend = ostart + maxDstSize;
3379
3378
  size_t errorCode, dumpsLength;
3380
3379
  const BYTE* litPtr = dctx->litPtr;
3381
- const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
3382
3380
  const BYTE* const litEnd = litPtr + dctx->litSize;
3383
3381
  int nbSeq;
3384
3382
  const BYTE* dumps;
@@ -3416,7 +3414,7 @@ static size_t ZSTDv05_decompressSequences(
3416
3414
  size_t oneSeqSize;
3417
3415
  nbSeq--;
3418
3416
  ZSTDv05_decodeSequence(&sequence, &seqState);
3419
- oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
3417
+ oneSeqSize = ZSTDv05_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
3420
3418
  if (ZSTDv05_isError(oneSeqSize)) return oneSeqSize;
3421
3419
  op += oneSeqSize;
3422
3420
  }
@@ -3585,6 +3583,35 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
3585
3583
  #endif
3586
3584
  }
3587
3585
 
3586
+ size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
3587
+ {
3588
+ const BYTE* ip = (const BYTE*)src;
3589
+ size_t remainingSize = srcSize;
3590
+ blockProperties_t blockProperties;
3591
+
3592
+ /* Frame Header */
3593
+ if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
3594
+ if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
3595
+ ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;
3596
+
3597
+ /* Loop on each block */
3598
+ while (1)
3599
+ {
3600
+ size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
3601
+ if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
3602
+
3603
+ ip += ZSTDv05_blockHeaderSize;
3604
+ remainingSize -= ZSTDv05_blockHeaderSize;
3605
+ if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
3606
+
3607
+ if (cBlockSize == 0) break; /* bt_end */
3608
+
3609
+ ip += cBlockSize;
3610
+ remainingSize -= cBlockSize;
3611
+ }
3612
+
3613
+ return ip - (const BYTE*)src;
3614
+ }
3588
3615
 
3589
3616
  /* ******************************
3590
3617
  * Streaming Decompression API
@@ -32,6 +32,13 @@ extern "C" {
32
32
  size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
33
33
  const void* src, size_t compressedSize);
34
34
 
35
+ /**
36
+ ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
37
+ compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
38
+ return : the number of bytes that would be read to decompress this frame
39
+ or an errorCode if it fails (which can be tested using ZSTDv05_isError())
40
+ */
41
+ size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
35
42
 
36
43
  /* *************************************
37
44
  * Helper functions
@@ -537,7 +537,7 @@ static void ZSTDv06_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
537
537
  /*! ZSTDv06_wildcopy() :
538
538
  * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
539
539
  #define WILDCOPY_OVERLENGTH 8
540
- MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, size_t length)
540
+ MEM_STATIC void ZSTDv06_wildcopy(void* dst, const void* src, ptrdiff_t length)
541
541
  {
542
542
  const BYTE* ip = (const BYTE*)src;
543
543
  BYTE* op = (BYTE*)dst;
@@ -2893,7 +2893,6 @@ struct ZSTDv06_DCtx_s
2893
2893
  ZSTDv06_dStage stage;
2894
2894
  U32 flagRepeatTable;
2895
2895
  const BYTE* litPtr;
2896
- size_t litBufSize;
2897
2896
  size_t litSize;
2898
2897
  BYTE litBuffer[ZSTDv06_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
2899
2898
  BYTE headerBuffer[ZSTDv06_FRAMEHEADERSIZE_MAX];
@@ -3170,8 +3169,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3170
3169
  return ERROR(corruption_detected);
3171
3170
 
3172
3171
  dctx->litPtr = dctx->litBuffer;
3173
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
3174
3172
  dctx->litSize = litSize;
3173
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3175
3174
  return litCSize + lhSize;
3176
3175
  }
3177
3176
  case IS_PCH:
@@ -3186,14 +3185,14 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3186
3185
  lhSize=3;
3187
3186
  litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2);
3188
3187
  litCSize = ((istart[1] & 3) << 8) + istart[2];
3189
- if (litCSize + litSize > srcSize) return ERROR(corruption_detected);
3188
+ if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
3190
3189
 
3191
3190
  { size_t const errorCode = HUFv06_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4);
3192
3191
  if (HUFv06_isError(errorCode)) return ERROR(corruption_detected);
3193
3192
  }
3194
3193
  dctx->litPtr = dctx->litBuffer;
3195
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
3196
3194
  dctx->litSize = litSize;
3195
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3197
3196
  return litCSize + lhSize;
3198
3197
  }
3199
3198
  case IS_RAW:
@@ -3217,13 +3216,12 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3217
3216
  if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
3218
3217
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
3219
3218
  dctx->litPtr = dctx->litBuffer;
3220
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+8;
3221
3219
  dctx->litSize = litSize;
3220
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3222
3221
  return lhSize+litSize;
3223
3222
  }
3224
3223
  /* direct reference into compressed stream */
3225
3224
  dctx->litPtr = istart+lhSize;
3226
- dctx->litBufSize = srcSize-lhSize;
3227
3225
  dctx->litSize = litSize;
3228
3226
  return lhSize+litSize;
3229
3227
  }
@@ -3245,9 +3243,8 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx,
3245
3243
  break;
3246
3244
  }
3247
3245
  if (litSize > ZSTDv06_BLOCKSIZE_MAX) return ERROR(corruption_detected);
3248
- memset(dctx->litBuffer, istart[lhSize], litSize);
3246
+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
3249
3247
  dctx->litPtr = dctx->litBuffer;
3250
- dctx->litBufSize = ZSTDv06_BLOCKSIZE_MAX+WILDCOPY_OVERLENGTH;
3251
3248
  dctx->litSize = litSize;
3252
3249
  return lhSize+1;
3253
3250
  }
@@ -3438,7 +3435,7 @@ static void ZSTDv06_decodeSequence(seq_t* seq, seqState_t* seqState)
3438
3435
 
3439
3436
  size_t ZSTDv06_execSequence(BYTE* op,
3440
3437
  BYTE* const oend, seq_t sequence,
3441
- const BYTE** litPtr, const BYTE* const litLimit_8,
3438
+ const BYTE** litPtr, const BYTE* const litLimit,
3442
3439
  const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
3443
3440
  {
3444
3441
  BYTE* const oLitEnd = op + sequence.litLength;
@@ -3451,7 +3448,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
3451
3448
  /* check */
3452
3449
  if (oLitEnd > oend_8) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of 8 from oend */
3453
3450
  if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
3454
- if (iLitEnd > litLimit_8) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3451
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3455
3452
 
3456
3453
  /* copy Literals */
3457
3454
  ZSTDv06_wildcopy(op, *litPtr, sequence.litLength); /* note : oLitEnd <= oend-8 : no risk of overwrite beyond oend */
@@ -3473,7 +3470,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
3473
3470
  op = oLitEnd + length1;
3474
3471
  sequence.matchLength -= length1;
3475
3472
  match = base;
3476
- if (op > oend_8) {
3473
+ if (op > oend_8 || sequence.matchLength < MINMATCH) {
3477
3474
  while (op < oMatchEnd) *op++ = *match++;
3478
3475
  return sequenceLength;
3479
3476
  }
@@ -3506,7 +3503,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
3506
3503
  }
3507
3504
  while (op < oMatchEnd) *op++ = *match++;
3508
3505
  } else {
3509
- ZSTDv06_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
3506
+ ZSTDv06_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
3510
3507
  }
3511
3508
  return sequenceLength;
3512
3509
  }
@@ -3523,7 +3520,6 @@ static size_t ZSTDv06_decompressSequences(
3523
3520
  BYTE* const oend = ostart + maxDstSize;
3524
3521
  BYTE* op = ostart;
3525
3522
  const BYTE* litPtr = dctx->litPtr;
3526
- const BYTE* const litLimit_8 = litPtr + dctx->litBufSize - 8;
3527
3523
  const BYTE* const litEnd = litPtr + dctx->litSize;
3528
3524
  FSEv06_DTable* DTableLL = dctx->LLTable;
3529
3525
  FSEv06_DTable* DTableML = dctx->MLTable;
@@ -3567,7 +3563,7 @@ static size_t ZSTDv06_decompressSequences(
3567
3563
  pos, (U32)sequence.litLength, (U32)sequence.matchLength, (U32)sequence.offset);
3568
3564
  #endif
3569
3565
 
3570
- { size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litLimit_8, base, vBase, dictEnd);
3566
+ { size_t const oneSeqSize = ZSTDv06_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
3571
3567
  if (ZSTDv06_isError(oneSeqSize)) return oneSeqSize;
3572
3568
  op += oneSeqSize;
3573
3569
  } }
@@ -3733,6 +3729,37 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
3733
3729
  #endif
3734
3730
  }
3735
3731
 
3732
+ size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
3733
+ {
3734
+ const BYTE* ip = (const BYTE*)src;
3735
+ size_t remainingSize = srcSize;
3736
+ blockProperties_t blockProperties = { bt_compressed, 0 };
3737
+
3738
+ /* Frame Header */
3739
+ { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
3740
+ if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
3741
+ if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
3742
+ if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
3743
+ ip += frameHeaderSize; remainingSize -= frameHeaderSize;
3744
+ }
3745
+
3746
+ /* Loop on each block */
3747
+ while (1) {
3748
+ size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
3749
+ if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
3750
+
3751
+ ip += ZSTDv06_blockHeaderSize;
3752
+ remainingSize -= ZSTDv06_blockHeaderSize;
3753
+ if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
3754
+
3755
+ if (cBlockSize == 0) break; /* bt_end */
3756
+
3757
+ ip += cBlockSize;
3758
+ remainingSize -= cBlockSize;
3759
+ }
3760
+
3761
+ return ip - (const BYTE*)src;
3762
+ }
3736
3763
 
3737
3764
  /*_******************************
3738
3765
  * Streaming Decompression API
@@ -4081,7 +4108,7 @@ size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* zbd,
4081
4108
  zbd->inBuff = (char*)malloc(blockSize);
4082
4109
  if (zbd->inBuff == NULL) return ERROR(memory_allocation);
4083
4110
  }
4084
- { size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize;
4111
+ { size_t const neededOutSize = ((size_t)1 << zbd->fParams.windowLog) + blockSize + WILDCOPY_OVERLENGTH * 2;
4085
4112
  if (zbd->outBuffSize < neededOutSize) {
4086
4113
  free(zbd->outBuff);
4087
4114
  zbd->outBuffSize = neededOutSize;
@@ -41,6 +41,13 @@ extern "C" {
41
41
  ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
42
42
  const void* src, size_t compressedSize);
43
43
 
44
+ /**
45
+ ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
46
+ compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
47
+ return : the number of bytes that would be read to decompress this frame
48
+ or an errorCode if it fails (which can be tested using ZSTDv06_isError())
49
+ */
50
+ size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
44
51
 
45
52
  /* *************************************
46
53
  * Helper functions
@@ -13,12 +13,14 @@
13
13
  #include <string.h> /* memcpy */
14
14
  #include <stdlib.h> /* malloc, free, qsort */
15
15
 
16
- #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17
- #include "xxhash.h" /* XXH64_* */
16
+ #ifndef XXH_STATIC_LINKING_ONLY
17
+ # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
18
+ #endif
19
+ #include "xxhash.h" /* XXH64_* */
18
20
  #include "zstd_v07.h"
19
21
 
20
- #define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
21
- #define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
22
+ #define FSEv07_STATIC_LINKING_ONLY /* FSEv07_MIN_TABLELOG */
23
+ #define HUFv07_STATIC_LINKING_ONLY /* HUFv07_TABLELOG_ABSOLUTEMAX */
22
24
  #define ZSTDv07_STATIC_LINKING_ONLY
23
25
 
24
26
  #include "error_private.h"
@@ -2845,7 +2847,7 @@ static void ZSTDv07_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
2845
2847
  /*! ZSTDv07_wildcopy() :
2846
2848
  * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
2847
2849
  #define WILDCOPY_OVERLENGTH 8
2848
- MEM_STATIC void ZSTDv07_wildcopy(void* dst, const void* src, size_t length)
2850
+ MEM_STATIC void ZSTDv07_wildcopy(void* dst, const void* src, ptrdiff_t length)
2849
2851
  {
2850
2852
  const BYTE* ip = (const BYTE*)src;
2851
2853
  BYTE* op = (BYTE*)dst;
@@ -3021,7 +3023,6 @@ struct ZSTDv07_DCtx_s
3021
3023
  U32 dictID;
3022
3024
  const BYTE* litPtr;
3023
3025
  ZSTDv07_customMem customMem;
3024
- size_t litBufSize;
3025
3026
  size_t litSize;
3026
3027
  BYTE litBuffer[ZSTDv07_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
3027
3028
  BYTE headerBuffer[ZSTDv07_FRAMEHEADERSIZE_MAX];
@@ -3395,9 +3396,9 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3395
3396
  return ERROR(corruption_detected);
3396
3397
 
3397
3398
  dctx->litPtr = dctx->litBuffer;
3398
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+8;
3399
3399
  dctx->litSize = litSize;
3400
3400
  dctx->litEntropy = 1;
3401
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3401
3402
  return litCSize + lhSize;
3402
3403
  }
3403
3404
  case lbt_repeat:
@@ -3418,8 +3419,8 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3418
3419
  if (HUFv07_isError(errorCode)) return ERROR(corruption_detected);
3419
3420
  }
3420
3421
  dctx->litPtr = dctx->litBuffer;
3421
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
3422
3422
  dctx->litSize = litSize;
3423
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3423
3424
  return litCSize + lhSize;
3424
3425
  }
3425
3426
  case lbt_raw:
@@ -3443,13 +3444,12 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3443
3444
  if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
3444
3445
  memcpy(dctx->litBuffer, istart+lhSize, litSize);
3445
3446
  dctx->litPtr = dctx->litBuffer;
3446
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+8;
3447
3447
  dctx->litSize = litSize;
3448
+ memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
3448
3449
  return lhSize+litSize;
3449
3450
  }
3450
3451
  /* direct reference into compressed stream */
3451
3452
  dctx->litPtr = istart+lhSize;
3452
- dctx->litBufSize = srcSize-lhSize;
3453
3453
  dctx->litSize = litSize;
3454
3454
  return lhSize+litSize;
3455
3455
  }
@@ -3471,9 +3471,8 @@ size_t ZSTDv07_decodeLiteralsBlock(ZSTDv07_DCtx* dctx,
3471
3471
  break;
3472
3472
  }
3473
3473
  if (litSize > ZSTDv07_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected);
3474
- memset(dctx->litBuffer, istart[lhSize], litSize);
3474
+ memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
3475
3475
  dctx->litPtr = dctx->litBuffer;
3476
- dctx->litBufSize = ZSTDv07_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH;
3477
3476
  dctx->litSize = litSize;
3478
3477
  return lhSize+1;
3479
3478
  }
@@ -3662,7 +3661,7 @@ static seq_t ZSTDv07_decodeSequence(seqState_t* seqState)
3662
3661
  static
3663
3662
  size_t ZSTDv07_execSequence(BYTE* op,
3664
3663
  BYTE* const oend, seq_t sequence,
3665
- const BYTE** litPtr, const BYTE* const litLimit_w,
3664
+ const BYTE** litPtr, const BYTE* const litLimit,
3666
3665
  const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
3667
3666
  {
3668
3667
  BYTE* const oLitEnd = op + sequence.litLength;
@@ -3674,7 +3673,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
3674
3673
 
3675
3674
  /* check */
3676
3675
  if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
3677
- if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3676
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
3678
3677
 
3679
3678
  /* copy Literals */
3680
3679
  ZSTDv07_wildcopy(op, *litPtr, sequence.litLength); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
@@ -3696,7 +3695,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
3696
3695
  op = oLitEnd + length1;
3697
3696
  sequence.matchLength -= length1;
3698
3697
  match = base;
3699
- if (op > oend_w) {
3698
+ if (op > oend_w || sequence.matchLength < MINMATCH) {
3700
3699
  while (op < oMatchEnd) *op++ = *match++;
3701
3700
  return sequenceLength;
3702
3701
  }
@@ -3729,7 +3728,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
3729
3728
  }
3730
3729
  while (op < oMatchEnd) *op++ = *match++;
3731
3730
  } else {
3732
- ZSTDv07_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
3731
+ ZSTDv07_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
3733
3732
  }
3734
3733
  return sequenceLength;
3735
3734
  }
@@ -3746,7 +3745,6 @@ static size_t ZSTDv07_decompressSequences(
3746
3745
  BYTE* const oend = ostart + maxDstSize;
3747
3746
  BYTE* op = ostart;
3748
3747
  const BYTE* litPtr = dctx->litPtr;
3749
- const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH;
3750
3748
  const BYTE* const litEnd = litPtr + dctx->litSize;
3751
3749
  FSEv07_DTable* DTableLL = dctx->LLTable;
3752
3750
  FSEv07_DTable* DTableML = dctx->MLTable;
@@ -3776,7 +3774,7 @@ static size_t ZSTDv07_decompressSequences(
3776
3774
  for ( ; (BITv07_reloadDStream(&(seqState.DStream)) <= BITv07_DStream_completed) && nbSeq ; ) {
3777
3775
  nbSeq--;
3778
3776
  { seq_t const sequence = ZSTDv07_decodeSequence(&seqState);
3779
- size_t const oneSeqSize = ZSTDv07_execSequence(op, oend, sequence, &litPtr, litLimit_w, base, vBase, dictEnd);
3777
+ size_t const oneSeqSize = ZSTDv07_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
3780
3778
  if (ZSTDv07_isError(oneSeqSize)) return oneSeqSize;
3781
3779
  op += oneSeqSize;
3782
3780
  } }
@@ -3972,6 +3970,41 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
3972
3970
  #endif
3973
3971
  }
3974
3972
 
3973
+ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
3974
+ {
3975
+ const BYTE* ip = (const BYTE*)src;
3976
+ size_t remainingSize = srcSize;
3977
+
3978
+ /* check */
3979
+ if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
3980
+
3981
+ /* Frame Header */
3982
+ { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
3983
+ if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
3984
+ if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
3985
+ if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
3986
+ ip += frameHeaderSize; remainingSize -= frameHeaderSize;
3987
+ }
3988
+
3989
+ /* Loop on each block */
3990
+ while (1) {
3991
+ blockProperties_t blockProperties;
3992
+ size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
3993
+ if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
3994
+
3995
+ ip += ZSTDv07_blockHeaderSize;
3996
+ remainingSize -= ZSTDv07_blockHeaderSize;
3997
+
3998
+ if (blockProperties.blockType == bt_end) break;
3999
+
4000
+ if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
4001
+
4002
+ ip += cBlockSize;
4003
+ remainingSize -= cBlockSize;
4004
+ }
4005
+
4006
+ return ip - (const BYTE*)src;
4007
+ }
3975
4008
 
3976
4009
  /*_******************************
3977
4010
  * Streaming Decompression API
@@ -4138,9 +4171,9 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si
4138
4171
  }
4139
4172
 
4140
4173
  if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
4141
- dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4142
- dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4143
- dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4174
+ dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
4175
+ dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
4176
+ dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
4144
4177
  dictPtr += 12;
4145
4178
 
4146
4179
  dctx->litEntropy = dctx->fseEntropy = 1;
@@ -4452,7 +4485,7 @@ size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
4452
4485
  zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
4453
4486
  if (zbd->inBuff == NULL) return ERROR(memory_allocation);
4454
4487
  }
4455
- { size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
4488
+ { size_t const neededOutSize = zbd->fParams.windowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
4456
4489
  if (zbd->outBuffSize < neededOutSize) {
4457
4490
  zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
4458
4491
  zbd->outBuffSize = neededOutSize;
@@ -4505,7 +4538,8 @@ size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* zbd,
4505
4538
  if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
4506
4539
  zbd->outEnd = zbd->outStart + decodedSize;
4507
4540
  zbd->stage = ZBUFFds_flush;
4508
- // break; /* ZBUFFds_flush follows */
4541
+ /* break; */
4542
+ /* pass-through */
4509
4543
  } }
4510
4544
 
4511
4545
  case ZBUFFds_flush:
@@ -48,6 +48,14 @@ unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize);
48
48
  ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
49
49
  const void* src, size_t compressedSize);
50
50
 
51
+ /**
52
+ ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
53
+ compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
54
+ return : the number of bytes that would be read to decompress this frame
55
+ or an errorCode if it fails (which can be tested using ZSTDv07_isError())
56
+ */
57
+ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
58
+
51
59
  /*====== Helper functions ======*/
52
60
  ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
53
61
  ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */
@@ -0,0 +1,14 @@
1
+ # ZSTD - standard compression algorithm
2
+ # Copyright (C) 2014-2016, Yann Collet, Facebook
3
+ # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
4
+
5
+ prefix=@PREFIX@
6
+ libdir=@LIBDIR@
7
+ includedir=@INCLUDEDIR@
8
+
9
+ Name: zstd
10
+ Description: fast lossless compression algorithm library
11
+ URL: http://www.zstd.net/
12
+ Version: @VERSION@
13
+ Libs: -L${libdir} -lzstd
14
+ Cflags: -I${includedir}