extlz4 0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +7 -0
  3. data/README.md +3 -3
  4. data/contrib/lz4/Makefile.inc +87 -0
  5. data/contrib/lz4/NEWS +7 -0
  6. data/contrib/lz4/README.md +1 -1
  7. data/contrib/lz4/lib/README.md +3 -5
  8. data/contrib/lz4/lib/liblz4-dll.rc.in +35 -0
  9. data/contrib/lz4/lib/lz4.c +296 -182
  10. data/contrib/lz4/lib/lz4.h +125 -40
  11. data/contrib/lz4/lib/lz4frame.c +30 -6
  12. data/contrib/lz4/lib/lz4frame.h +11 -2
  13. data/contrib/lz4/lib/lz4hc.c +93 -30
  14. data/contrib/lz4/lib/lz4hc.h +3 -0
  15. data/contrib/lz4/ossfuzz/Makefile +74 -0
  16. data/contrib/lz4/ossfuzz/compress_frame_fuzzer.c +42 -0
  17. data/contrib/lz4/ossfuzz/compress_fuzzer.c +51 -0
  18. data/contrib/lz4/ossfuzz/compress_hc_fuzzer.c +57 -0
  19. data/contrib/lz4/ossfuzz/decompress_frame_fuzzer.c +67 -0
  20. data/contrib/lz4/ossfuzz/decompress_fuzzer.c +58 -0
  21. data/contrib/lz4/ossfuzz/fuzz.h +48 -0
  22. data/contrib/lz4/ossfuzz/fuzz_helpers.h +94 -0
  23. data/contrib/lz4/ossfuzz/lz4_helpers.c +51 -0
  24. data/contrib/lz4/ossfuzz/lz4_helpers.h +13 -0
  25. data/contrib/lz4/ossfuzz/ossfuzz.sh +23 -0
  26. data/contrib/lz4/ossfuzz/round_trip_frame_fuzzer.c +39 -0
  27. data/contrib/lz4/ossfuzz/round_trip_fuzzer.c +50 -0
  28. data/contrib/lz4/ossfuzz/round_trip_hc_fuzzer.c +39 -0
  29. data/contrib/lz4/ossfuzz/round_trip_stream_fuzzer.c +302 -0
  30. data/contrib/lz4/ossfuzz/standaloneengine.c +74 -0
  31. data/contrib/lz4/ossfuzz/travisoss.sh +21 -0
  32. data/ext/blockapi.c +3 -3
  33. data/ext/hashargs.c +1 -1
  34. data/lib/extlz4.rb +5 -1
  35. data/lib/extlz4/version.rb +1 -1
  36. data/test/common.rb +2 -2
  37. metadata +22 -3
@@ -46,7 +46,7 @@ extern "C" {
46
46
  /**
47
47
  Introduction
48
48
 
49
- LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
49
+ LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
50
50
  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
51
51
  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
52
52
 
@@ -58,16 +58,19 @@ extern "C" {
58
58
  - unbounded multiple steps (described as Streaming compression)
59
59
 
60
60
  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
61
- Decompressing a block requires additional metadata, such as its compressed size.
61
+ Decompressing such a compressed block requires additional metadata.
62
+ Exact metadata depends on exact decompression function.
63
+ For the typical case of LZ4_decompress_safe(),
64
+ metadata includes block's compressed size, and maximum bound of decompressed size.
62
65
  Each application is free to encode and pass such metadata in whichever way it wants.
63
66
 
64
67
  lz4.h only handle blocks, it can not generate Frames.
65
68
 
66
69
  Blocks are different from Frames (doc/lz4_Frame_format.md).
67
70
  Frames bundle both blocks and metadata in a specified manner.
68
- This are required for compressed data to be self-contained and portable.
71
+ Embedding metadata is required for compressed data to be self-contained and portable.
69
72
  Frame format is delivered through a companion API, declared in lz4frame.h.
70
- Note that the `lz4` CLI can only manage frames.
73
+ The `lz4` CLI can only manage frames.
71
74
  */
72
75
 
73
76
  /*^***************************************************************
@@ -97,7 +100,7 @@ extern "C" {
97
100
  /*------ Version ------*/
98
101
  #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
99
102
  #define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
100
- #define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */
103
+ #define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */
101
104
 
102
105
  #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
103
106
 
@@ -129,29 +132,35 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
129
132
  * Simple Functions
130
133
  **************************************/
131
134
  /*! LZ4_compress_default() :
132
- Compresses 'srcSize' bytes from buffer 'src'
133
- into already allocated 'dst' buffer of size 'dstCapacity'.
134
- Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
135
- It also runs faster, so it's a recommended setting.
136
- If the function cannot compress 'src' into a more limited 'dst' budget,
137
- compression stops *immediately*, and the function result is zero.
138
- In which case, 'dst' content is undefined (invalid).
139
- srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
140
- dstCapacity : size of buffer 'dst' (which must be already allocated)
141
- @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
142
- or 0 if compression fails
143
- Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
144
- */
135
+ * Compresses 'srcSize' bytes from buffer 'src'
136
+ * into already allocated 'dst' buffer of size 'dstCapacity'.
137
+ * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
138
+ * It also runs faster, so it's a recommended setting.
139
+ * If the function cannot compress 'src' into a more limited 'dst' budget,
140
+ * compression stops *immediately*, and the function result is zero.
141
+ * In which case, 'dst' content is undefined (invalid).
142
+ * srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
143
+ * dstCapacity : size of buffer 'dst' (which must be already allocated)
144
+ * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
145
+ * or 0 if compression fails
146
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
147
+ */
145
148
  LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
146
149
 
147
150
  /*! LZ4_decompress_safe() :
148
- compressedSize : is the exact complete size of the compressed block.
149
- dstCapacity : is the size of destination buffer, which must be already allocated.
150
- @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
151
- If destination buffer is not large enough, decoding will stop and output an error code (negative value).
152
- If the source stream is detected malformed, the function will stop decoding and return a negative result.
153
- Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
154
- */
151
+ * compressedSize : is the exact complete size of the compressed block.
152
+ * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
153
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
154
+ * If destination buffer is not large enough, decoding will stop and output an error code (negative value).
155
+ * If the source stream is detected malformed, the function will stop decoding and return a negative result.
156
+ * Note 1 : This function is protected against malicious data packets :
157
+ * it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
158
+ * even if the compressed block is maliciously modified to order the decoder to do these actions.
159
+ * In such case, the decoder stops immediately, and considers the compressed block malformed.
160
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
161
+ * The implementation is free to send / store / derive this information in whichever way is most beneficial.
162
+ * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
163
+ */
155
164
  LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
156
165
 
157
166
 
@@ -388,6 +397,8 @@ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecod
388
397
  */
389
398
  LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
390
399
 
400
+ #endif /* LZ4_H_2983827168210 */
401
+
391
402
 
392
403
  /*^*************************************
393
404
  * !!!!!! STATIC LINKING ONLY !!!!!!
@@ -413,14 +424,17 @@ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int sr
413
424
  * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
414
425
  ******************************************************************************/
415
426
 
427
+ #ifdef LZ4_STATIC_LINKING_ONLY
428
+
429
+ #ifndef LZ4_STATIC_3504398509
430
+ #define LZ4_STATIC_3504398509
431
+
416
432
  #ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
417
433
  #define LZ4LIB_STATIC_API LZ4LIB_API
418
434
  #else
419
435
  #define LZ4LIB_STATIC_API
420
436
  #endif
421
437
 
422
- #ifdef LZ4_STATIC_LINKING_ONLY
423
-
424
438
 
425
439
  /*! LZ4_compress_fast_extState_fastReset() :
426
440
  * A variant of LZ4_compress_fast_extState().
@@ -462,8 +476,75 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
462
476
  */
463
477
  LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
464
478
 
479
+
480
+ /*! In-place compression and decompression
481
+ *
482
+ * It's possible to have input and output sharing the same buffer,
483
+ * for highly contrained memory environments.
484
+ * In both cases, it requires input to lay at the end of the buffer,
485
+ * and decompression to start at beginning of the buffer.
486
+ * Buffer size must feature some margin, hence be larger than final size.
487
+ *
488
+ * |<------------------------buffer--------------------------------->|
489
+ * |<-----------compressed data--------->|
490
+ * |<-----------decompressed size------------------>|
491
+ * |<----margin---->|
492
+ *
493
+ * This technique is more useful for decompression,
494
+ * since decompressed size is typically larger,
495
+ * and margin is short.
496
+ *
497
+ * In-place decompression will work inside any buffer
498
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
499
+ * This presumes that decompressedSize > compressedSize.
500
+ * Otherwise, it means compression actually expanded data,
501
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
502
+ * This can happen when data is not compressible (already compressed, or encrypted).
503
+ *
504
+ * For in-place compression, margin is larger, as it must be able to cope with both
505
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
506
+ * and data expansion, which can happen when input is not compressible.
507
+ * As a consequence, buffer size requirements are much higher,
508
+ * and memory savings offered by in-place compression are more limited.
509
+ *
510
+ * There are ways to limit this cost for compression :
511
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
512
+ * Note that it is a compile-time constant, so all compressions will apply this limit.
513
+ * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
514
+ * so it's a reasonable trick when inputs are known to be small.
515
+ * - Require the compressor to deliver a "maximum compressed size".
516
+ * This is the `dstCapacity` parameter in `LZ4_compress*()`.
517
+ * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
518
+ * in which case, the return code will be 0 (zero).
519
+ * The caller must be ready for these cases to happen,
520
+ * and typically design a backup scheme to send data uncompressed.
521
+ * The combination of both techniques can significantly reduce
522
+ * the amount of margin required for in-place compression.
523
+ *
524
+ * In-place compression can work in any buffer
525
+ * which size is >= (maxCompressedSize)
526
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
527
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
528
+ * so it's possible to reduce memory requirements by playing with them.
529
+ */
530
+
531
+ #define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
532
+ #define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
533
+
534
+ #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
535
+ # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
465
536
  #endif
466
537
 
538
+ #define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
539
+ #define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
540
+
541
+ #endif /* LZ4_STATIC_3504398509 */
542
+ #endif /* LZ4_STATIC_LINKING_ONLY */
543
+
544
+
545
+
546
+ #ifndef LZ4_H_98237428734687
547
+ #define LZ4_H_98237428734687
467
548
 
468
549
  /*-************************************************************
469
550
  * PRIVATE DEFINITIONS
@@ -567,6 +648,7 @@ union LZ4_streamDecode_u {
567
648
  } ; /* previously typedef'd to LZ4_streamDecode_t */
568
649
 
569
650
 
651
+
570
652
  /*-************************************
571
653
  * Obsolete Functions
572
654
  **************************************/
@@ -601,8 +683,8 @@ union LZ4_streamDecode_u {
601
683
  #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
602
684
 
603
685
  /* Obsolete compression functions */
604
- LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* source, char* dest, int sourceSize);
605
- LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
686
+ LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
687
+ LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
606
688
  LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
607
689
  LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
608
690
  LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
@@ -631,15 +713,18 @@ LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4
631
713
  LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
632
714
 
633
715
  /*! LZ4_decompress_fast() : **unsafe!**
634
- * These functions are generally slightly faster than LZ4_decompress_safe(),
635
- * though the difference is small (generally ~5%).
636
- * However, the real cost is a risk : LZ4_decompress_safe() is protected vs malformed input, while `LZ4_decompress_fast()` is not, making it a security liability.
716
+ * These functions used to be faster than LZ4_decompress_safe(),
717
+ * but it has changed, and they are now slower than LZ4_decompress_safe().
718
+ * This is because LZ4_decompress_fast() doesn't know the input size,
719
+ * and therefore must progress more cautiously in the input buffer to not read beyond the end of block.
720
+ * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
637
721
  * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
638
- * These functions will generate a deprecation warning in the future.
639
722
  *
640
- * Last LZ4_decompress_fast() specificity is that it can decompress a block without knowing its compressed size.
641
- * Note that even that functionality could be achieved in a more secure manner if need be,
642
- * though it would require new prototypes, and adaptation of the implementation to this new use case.
723
+ * The last remaining LZ4_decompress_fast() specificity is that
724
+ * it can decompress a block without knowing its compressed size.
725
+ * Such functionality could be achieved in a more secure manner,
726
+ * by also providing the maximum size of input buffer,
727
+ * but it would require new prototypes, and adaptation of the implementation to this new use case.
643
728
  *
644
729
  * Parameters:
645
730
  * originalSize : is the uncompressed size to regenerate.
@@ -655,11 +740,11 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4
655
740
  * As a consequence, use these functions in trusted environments with trusted data **only**.
656
741
  */
657
742
 
658
- /* LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") */
743
+ LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
659
744
  LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
660
- /* LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") */
745
+ LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
661
746
  LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
662
- /* LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") */
747
+ LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
663
748
  LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
664
749
 
665
750
  /*! LZ4_resetStream() :
@@ -671,7 +756,7 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or
671
756
  LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
672
757
 
673
758
 
674
- #endif /* LZ4_H_2983827168210 */
759
+ #endif /* LZ4_H_98237428734687 */
675
760
 
676
761
 
677
762
  #if defined (__cplusplus)
@@ -213,8 +213,8 @@ static void LZ4F_writeLE64 (void* dst, U64 value64)
213
213
 
214
214
  static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */
215
215
  static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */
216
- static const size_t BHSize = 4; /* block header : size, and compress flag */
217
- static const size_t BFSize = 4; /* block footer : checksum (optional) */
216
+ static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */
217
+ static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */
218
218
 
219
219
 
220
220
  /*-************************************
@@ -325,9 +325,9 @@ static size_t LZ4F_compressBound_internal(size_t srcSize,
325
325
  const LZ4F_preferences_t* preferencesPtr,
326
326
  size_t alreadyBuffered)
327
327
  {
328
- LZ4F_preferences_t prefsNull;
329
- MEM_INIT(&prefsNull, 0, sizeof(prefsNull));
328
+ LZ4F_preferences_t prefsNull = LZ4F_INIT_PREFERENCES;
330
329
  prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; /* worst case */
330
+ prefsNull.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; /* worst case */
331
331
  { const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
332
332
  U32 const flush = prefsPtr->autoFlush | (srcSize==0);
333
333
  LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
@@ -1065,7 +1065,10 @@ struct LZ4F_dctx_s {
1065
1065
  LZ4F_errorCode_t LZ4F_createDecompressionContext(LZ4F_dctx** LZ4F_decompressionContextPtr, unsigned versionNumber)
1066
1066
  {
1067
1067
  LZ4F_dctx* const dctx = (LZ4F_dctx*)ALLOC_AND_ZERO(sizeof(LZ4F_dctx));
1068
- if (dctx==NULL) return err0r(LZ4F_ERROR_GENERIC);
1068
+ if (dctx == NULL) { /* failed allocation */
1069
+ *LZ4F_decompressionContextPtr = NULL;
1070
+ return err0r(LZ4F_ERROR_allocation_failed);
1071
+ }
1069
1072
 
1070
1073
  dctx->version = versionNumber;
1071
1074
  *LZ4F_decompressionContextPtr = dctx;
@@ -1128,8 +1131,10 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
1128
1131
  }
1129
1132
 
1130
1133
  /* control magic number */
1134
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1131
1135
  if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER)
1132
1136
  return err0r(LZ4F_ERROR_frameType_unknown);
1137
+ #endif
1133
1138
  dctx->frameInfo.frameType = LZ4F_frame;
1134
1139
 
1135
1140
  /* Flags */
@@ -1168,10 +1173,12 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
1168
1173
 
1169
1174
  /* check header */
1170
1175
  assert(frameHeaderSize > 5);
1176
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1171
1177
  { BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
1172
1178
  if (HC != srcPtr[frameHeaderSize-1])
1173
1179
  return err0r(LZ4F_ERROR_headerChecksum_invalid);
1174
1180
  }
1181
+ #endif
1175
1182
 
1176
1183
  /* save */
1177
1184
  dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
@@ -1208,8 +1215,10 @@ size_t LZ4F_headerSize(const void* src, size_t srcSize)
1208
1215
  return 8;
1209
1216
 
1210
1217
  /* control magic number */
1218
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1211
1219
  if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
1212
1220
  return err0r(LZ4F_ERROR_frameType_unknown);
1221
+ #endif
1213
1222
 
1214
1223
  /* Frame Header Size */
1215
1224
  { BYTE const FLG = ((const BYTE*)src)[4];
@@ -1491,7 +1500,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
1491
1500
  /* next block is a compressed block */
1492
1501
  dctx->tmpInTarget = nextCBlockSize + crcSize;
1493
1502
  dctx->dStage = dstage_getCBlock;
1494
- if (dstPtr==dstEnd) {
1503
+ if (dstPtr==dstEnd || srcPtr==srcEnd) {
1495
1504
  nextSrcSizeHint = BHSize + nextCBlockSize + crcSize;
1496
1505
  doAnotherStage = 0;
1497
1506
  }
@@ -1552,8 +1561,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
1552
1561
  }
1553
1562
  { U32 const readCRC = LZ4F_readLE32(crcSrc);
1554
1563
  U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
1564
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1555
1565
  if (readCRC != calcCRC)
1556
1566
  return err0r(LZ4F_ERROR_blockChecksum_invalid);
1567
+ #else
1568
+ (void)readCRC;
1569
+ (void)calcCRC;
1570
+ #endif
1557
1571
  } }
1558
1572
  dctx->dStage = dstage_getBlockHeader; /* new block */
1559
1573
  break;
@@ -1592,8 +1606,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
1592
1606
  assert(selectedIn != NULL); /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */
1593
1607
  { U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
1594
1608
  U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
1609
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1595
1610
  if (readBlockCrc != calcBlockCrc)
1596
1611
  return err0r(LZ4F_ERROR_blockChecksum_invalid);
1612
+ #else
1613
+ (void)readBlockCrc;
1614
+ (void)calcBlockCrc;
1615
+ #endif
1597
1616
  } }
1598
1617
 
1599
1618
  if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
@@ -1721,8 +1740,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
1721
1740
  /* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
1722
1741
  { U32 const readCRC = LZ4F_readLE32(selectedIn);
1723
1742
  U32 const resultCRC = XXH32_digest(&(dctx->xxh));
1743
+ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1724
1744
  if (readCRC != resultCRC)
1725
1745
  return err0r(LZ4F_ERROR_contentChecksum_invalid);
1746
+ #else
1747
+ (void)readCRC;
1748
+ (void)resultCRC;
1749
+ #endif
1726
1750
  nextSrcSizeHint = 0;
1727
1751
  LZ4F_resetDecompressionContext(dctx);
1728
1752
  doAnotherStage = 0;
@@ -176,7 +176,7 @@ typedef struct {
176
176
  LZ4F_blockChecksum_t blockChecksumFlag; /* 1: each block followed by a checksum of block's compressed data; 0: disabled (default) */
177
177
  } LZ4F_frameInfo_t;
178
178
 
179
- #define LZ4F_INIT_FRAMEINFO { 0, 0, 0, 0, 0, 0, 0 } /* v1.8.3+ */
179
+ #define LZ4F_INIT_FRAMEINFO { LZ4F_default, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0ULL, 0U, LZ4F_noBlockChecksum } /* v1.8.3+ */
180
180
 
181
181
  /*! LZ4F_preferences_t :
182
182
  * makes it possible to supply advanced compression instructions to streaming interface.
@@ -191,7 +191,7 @@ typedef struct {
191
191
  unsigned reserved[3]; /* must be zero for forward compatibility */
192
192
  } LZ4F_preferences_t;
193
193
 
194
- #define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0, 0, { 0, 0, 0 } } /* v1.8.3+ */
194
+ #define LZ4F_INIT_PREFERENCES { LZ4F_INIT_FRAMEINFO, 0, 0u, 0u, { 0u, 0u, 0u } } /* v1.8.3+ */
195
195
 
196
196
 
197
197
  /*-*********************************
@@ -253,6 +253,15 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
253
253
  #define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */
254
254
  #define LZ4F_HEADER_SIZE_MAX 19
255
255
 
256
+ /* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */
257
+ #define LZ4F_BLOCK_HEADER_SIZE 4
258
+
259
+ /* Size in bytes of a block checksum footer in little-endian format. */
260
+ #define LZ4F_BLOCK_CHECKSUM_SIZE 4
261
+
262
+ /* Size in bytes of the content checksum. */
263
+ #define LZ4F_CONTENT_CHECKSUM_SIZE 4
264
+
256
265
  /*! LZ4F_compressBegin() :
257
266
  * will write the frame header into dstBuffer.
258
267
  * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
@@ -151,6 +151,21 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
151
151
  return back;
152
152
  }
153
153
 
154
+ #if defined(_MSC_VER)
155
+ # define LZ4HC_rotl32(x,r) _rotl(x,r)
156
+ #else
157
+ # define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
158
+ #endif
159
+
160
+
161
+ static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
162
+ {
163
+ size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
164
+ if (bitsToRotate == 0)
165
+ return pattern;
166
+ return LZ4HC_rotl32(pattern, (int)bitsToRotate);
167
+ }
168
+
154
169
  /* LZ4HC_countPattern() :
155
170
  * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
156
171
  static unsigned
@@ -203,6 +218,16 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
203
218
  return (unsigned)(iStart - ip);
204
219
  }
205
220
 
221
+ /* LZ4HC_protectDictEnd() :
222
+ * Checks if the match is in the last 3 bytes of the dictionary, so reading the
223
+ * 4 byte MINMATCH would overflow.
224
+ * @returns true if the match index is okay.
225
+ */
226
+ static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
227
+ {
228
+ return ((U32)((dictLimit - 1) - matchIndex) >= 3);
229
+ }
230
+
206
231
  typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
207
232
  typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
208
233
 
@@ -228,7 +253,7 @@ LZ4HC_InsertAndGetWiderMatch (
228
253
  const U32 dictLimit = hc4->dictLimit;
229
254
  const BYTE* const lowPrefixPtr = base + dictLimit;
230
255
  const U32 ipIndex = (U32)(ip - base);
231
- const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
256
+ const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
232
257
  const BYTE* const dictBase = hc4->dictBase;
233
258
  int const lookBackLength = (int)(ip-iLowLimit);
234
259
  int nbAttempts = maxNbAttempts;
@@ -287,14 +312,21 @@ LZ4HC_InsertAndGetWiderMatch (
287
312
  if (chainSwap && matchLength==longest) { /* better match => select a better chain */
288
313
  assert(lookBackLength==0); /* search forward only */
289
314
  if (matchIndex + (U32)longest <= ipIndex) {
315
+ int const kTrigger = 4;
290
316
  U32 distanceToNextMatch = 1;
317
+ int const end = longest - MINMATCH + 1;
318
+ int step = 1;
319
+ int accel = 1 << kTrigger;
291
320
  int pos;
292
- for (pos = 0; pos <= longest - MINMATCH; pos++) {
321
+ for (pos = 0; pos < end; pos += step) {
293
322
  U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
323
+ step = (accel++ >> kTrigger);
294
324
  if (candidateDist > distanceToNextMatch) {
295
325
  distanceToNextMatch = candidateDist;
296
326
  matchChainPos = (U32)pos;
297
- } }
327
+ accel = 1 << kTrigger;
328
+ }
329
+ }
298
330
  if (distanceToNextMatch > 1) {
299
331
  if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
300
332
  matchIndex -= distanceToNextMatch;
@@ -313,34 +345,61 @@ LZ4HC_InsertAndGetWiderMatch (
313
345
  } else {
314
346
  repeat = rep_not;
315
347
  } }
316
- if ( (repeat == rep_confirmed)
317
- && (matchCandidateIdx >= dictLimit) ) { /* same segment only */
318
- const BYTE* const matchPtr = base + matchCandidateIdx;
348
+ if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
349
+ && LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
350
+ const int extDict = matchCandidateIdx < dictLimit;
351
+ const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
319
352
  if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
320
- size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
321
- const BYTE* const lowestMatchPtr = (lowPrefixPtr + LZ4_DISTANCE_MAX >= ip) ? lowPrefixPtr : ip - LZ4_DISTANCE_MAX;
322
- size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
323
- size_t const currentSegmentLength = backLength + forwardPatternLength;
324
-
325
- if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
326
- && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
327
- matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
328
- } else {
329
- matchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
330
- if (lookBackLength==0) { /* no back possible */
331
- size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
332
- if ((size_t)longest < maxML) {
333
- assert(base + matchIndex < ip);
334
- if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
335
- assert(maxML < 2 GB);
336
- longest = (int)maxML;
337
- *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
338
- *startpos = ip;
353
+ const BYTE* const dictStart = dictBase + hc4->lowLimit;
354
+ const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
355
+ size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
356
+ if (extDict && matchPtr + forwardPatternLength == iLimit) {
357
+ U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
358
+ forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
359
+ }
360
+ { const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
361
+ size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
362
+ size_t currentSegmentLength;
363
+ if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
364
+ U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
365
+ backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
366
+ }
367
+ /* Limit backLength not go further than lowestMatchIndex */
368
+ backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
369
+ assert(matchCandidateIdx - backLength >= lowestMatchIndex);
370
+ currentSegmentLength = backLength + forwardPatternLength;
371
+ /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
372
+ if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
373
+ && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
374
+ U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
375
+ if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
376
+ matchIndex = newMatchIndex;
377
+ else {
378
+ /* Can only happen if started in the prefix */
379
+ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
380
+ matchIndex = dictLimit;
339
381
  }
340
- { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
341
- if (distToNextPattern > matchIndex) break; /* avoid overflow */
342
- matchIndex -= distToNextPattern;
343
- } } }
382
+ } else {
383
+ U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
384
+ if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
385
+ assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
386
+ matchIndex = dictLimit;
387
+ } else {
388
+ matchIndex = newMatchIndex;
389
+ if (lookBackLength==0) { /* no back possible */
390
+ size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
391
+ if ((size_t)longest < maxML) {
392
+ assert(base + matchIndex < ip);
393
+ if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
394
+ assert(maxML < 2 GB);
395
+ longest = (int)maxML;
396
+ *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
397
+ *startpos = ip;
398
+ }
399
+ { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
400
+ if (distToNextPattern > matchIndex) break; /* avoid overflow */
401
+ matchIndex -= distToNextPattern;
402
+ } } } } }
344
403
  continue;
345
404
  } }
346
405
  } } /* PA optimization */
@@ -442,7 +501,7 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
442
501
  }
443
502
 
444
503
  /* Copy Literals */
445
- LZ4_wildCopy(*op, *anchor, (*op) + length);
504
+ LZ4_wildCopy8(*op, *anchor, (*op) + length);
446
505
  *op += length;
447
506
 
448
507
  /* Encode Offset */
@@ -1005,6 +1064,9 @@ static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBl
1005
1064
  ctxPtr->base = newBlock - ctxPtr->dictLimit;
1006
1065
  ctxPtr->end = newBlock;
1007
1066
  ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
1067
+
1068
+ /* cannot reference an extDict and a dictCtx at the same time */
1069
+ ctxPtr->dictCtx = NULL;
1008
1070
  }
1009
1071
 
1010
1072
  static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
@@ -1396,6 +1458,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1396
1458
  } }
1397
1459
  } /* for (cur = 1; cur <= last_match_pos; cur++) */
1398
1460
 
1461
+ assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS);
1399
1462
  best_mlen = opt[last_match_pos].mlen;
1400
1463
  best_off = opt[last_match_pos].off;
1401
1464
  cur = last_match_pos - best_mlen;