lz4-native-ruby 0.1.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +292 -0
  3. data/LICENSE +55 -21
  4. data/README.md +109 -15
  5. data/{vendor/lz4/lib → ext/lz4_native}/Makefile +29 -24
  6. data/{vendor/lz4/lib → ext/lz4_native}/README.md +1 -1
  7. data/ext/lz4_native/extconf.rb +33 -0
  8. data/{vendor/lz4/lib → ext/lz4_native}/liblz4.pc.in +1 -0
  9. data/{vendor/lz4/lib → ext/lz4_native}/lz4.c +26 -23
  10. data/{vendor/lz4/lib → ext/lz4_native}/lz4.h +11 -9
  11. data/ext/lz4_native/lz4_native.c +442 -0
  12. data/ext/lz4_native/lz4file.c +362 -0
  13. data/{vendor/lz4/lib → ext/lz4_native}/lz4file.h +32 -9
  14. data/{vendor/lz4/lib → ext/lz4_native}/lz4frame.c +50 -21
  15. data/{vendor/lz4/lib → ext/lz4_native}/lz4frame.h +48 -28
  16. data/{vendor/lz4/lib → ext/lz4_native}/lz4frame_static.h +1 -1
  17. data/{vendor/lz4/lib → ext/lz4_native}/lz4hc.c +123 -60
  18. data/{vendor/lz4/lib → ext/lz4_native}/lz4hc.h +1 -1
  19. data/lib/lz4_native/lz4_native.so +0 -0
  20. data/lib/lz4_native/version.rb +3 -0
  21. data/lib/lz4_native.rb +47 -0
  22. data/test/test_helper.rb +4 -0
  23. data/test/test_lz4_basic.rb +100 -0
  24. data/test/test_lz4frame.rb +129 -0
  25. data/test/test_lz4hc.rb +75 -0
  26. metadata +50 -43
  27. data/ext/lz4/extconf.rb +0 -12
  28. data/ext/lz4/lz4_ext.c +0 -230
  29. data/lib/lz4/lz4_ext.so +0 -0
  30. data/lib/lz4/version.rb +0 -3
  31. data/lib/lz4.rb +0 -60
  32. data/vendor/lz4/lib/lz4file.c +0 -341
  33. /data/{vendor/lz4/lib → ext/lz4_native}/LICENSE +0 -0
  34. /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/Makefile +0 -0
  35. /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/README.md +0 -0
  36. /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/fullbench-dll.sln +0 -0
  37. /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/fullbench-dll.vcxproj +0 -0
  38. /data/{vendor/lz4/lib → ext/lz4_native}/liblz4-dll.rc.in +0 -0
  39. /data/{vendor/lz4/lib → ext/lz4_native}/xxhash.c +0 -0
  40. /data/{vendor/lz4/lib → ext/lz4_native}/xxhash.h +0 -0
@@ -1,7 +1,8 @@
1
1
  /*
2
2
  LZ4F - LZ4-Frame library
3
3
  Header File
4
- Copyright (C) 2011-2020, Yann Collet.
4
+ Copyright (c) Yann Collet. All rights reserved.
5
+
5
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7
 
7
8
  Redistribution and use in source and binary forms, with or without
@@ -289,6 +290,9 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
289
290
  /* Size in bytes of the content checksum. */
290
291
  #define LZ4F_CONTENT_CHECKSUM_SIZE 4
291
292
 
293
+ /* Size in bytes of the endmark. */
294
+ #define LZ4F_ENDMARK_SIZE 4
295
+
292
296
  /*! LZ4F_compressBegin() :
293
297
  * will write the frame header into dstBuffer.
294
298
  * dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
@@ -462,41 +466,52 @@ LZ4F_getFrameInfo(LZ4F_dctx* dctx,
462
466
  LZ4F_frameInfo_t* frameInfoPtr,
463
467
  const void* srcBuffer, size_t* srcSizePtr);
464
468
 
465
- /*! LZ4F_decompress() :
466
- * Call this function repetitively to regenerate data compressed in `srcBuffer`.
467
- *
468
- * The function requires a valid dctx state.
469
- * It will read up to *srcSizePtr bytes from srcBuffer,
470
- * and decompress data into dstBuffer, of capacity *dstSizePtr.
469
+ /**
470
+ * @brief Incrementally decompresses an LZ4 frame into user-provided buffers.
471
471
  *
472
- * The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value).
473
- * The nb of bytes decompressed into dstBuffer will be written into *dstSizePtr (necessarily <= original value).
472
+ * Call repeatedly until the return value is 0 (frame fully decoded) or an error is reported.
473
+ * On each call, the function consumes up to *srcSizePtr bytes from @p srcBuffer and
474
+ * produces up to *dstSizePtr bytes into @p dstBuffer. It updates both size pointers with
475
+ * the actual number of bytes consumed/produced. There is no separate flush step.
474
476
  *
475
- * The function does not necessarily read all input bytes, so always check value in *srcSizePtr.
476
- * Unconsumed source data must be presented again in subsequent invocations.
477
+ * Typical loop:
478
+ * - Provide whatever input you have and an available output buffer.
479
+ * - Read how much input was consumed and how much output was produced.
480
+ * - Use the returned value as a hint for how many source bytes are ideal next time.
477
481
  *
478
- * `dstBuffer` can freely change between each consecutive function invocation.
479
- * `dstBuffer` content will be overwritten.
482
+ * @param[in] dctx A valid decompression context created by LZ4F_createDecompressionContext().
483
+ * @param[out] dstBuffer Destination buffer for decompressed bytes. May change between calls.
484
+ * @param[in,out] dstSizePtr In: capacity of @p dstBuffer in bytes. Out: number of bytes written (<= input value).
485
+ * @param[in] srcBuffer Source buffer containing (more) compressed data. May point to the middle of a larger buffer.
486
+ * @param[in,out] srcSizePtr In: number of available bytes in @p srcBuffer. Out: number of bytes consumed (<= input value).
487
+ * @param[in] optionsPtr Optional decompression options; pass NULL for defaults.
480
488
  *
481
- * Note: if `LZ4F_getFrameInfo()` is called before `LZ4F_decompress()`, srcBuffer must be updated to reflect
482
- * the number of bytes consumed after reading the frame header. Failure to update srcBuffer before calling
483
- * `LZ4F_decompress()` will cause decompression failure or, even worse, successful but incorrect decompression.
484
- * See the `LZ4F_getFrameInfo()` docs for details.
489
+ * @return See @retval cases.
490
+ * @retval >0 Hint (in bytes) for how many source bytes are ideal to provide on the next call.
491
+ * This also indicates the current frame is not yet complete: the decompressor
492
+ * expects more input, or may require additional output space to make progress.
493
+ * User can always pass any amount of input; this value is only a performance hint.
494
+ * @retval 0 The current frame is fully decoded. If *srcSizePtr is less than the provided value,
495
+ * the unconsumed tail is the start of another frame (if any).
496
+ * @retval error An error code; test with LZ4F_isError(ret). After an error, dctx is not
497
+ * resumable: call LZ4F_resetDecompressionContext() before reusing it.
485
498
  *
486
- * @return : an hint of how many `srcSize` bytes LZ4F_decompress() expects for next call.
487
- * Schematically, it's the size of the current (or remaining) compressed block + header of next block.
488
- * Respecting the hint provides some small speed benefit, because it skips intermediate buffers.
489
- * This is just a hint though, it's always possible to provide any srcSize.
499
+ * @pre @p dctx is a valid state created by LZ4F_createDecompressionContext().
500
+ * @post *srcSizePtr and *dstSizePtr are updated with the actual bytes consumed/produced.
501
+ * @p dstBuffer contents in [0, *dstSizePtr) are valid decompressed data.
490
502
  *
491
- * When a frame is fully decoded, @return will be 0 (no more data expected).
492
- * When provided with more bytes than necessary to decode a frame,
493
- * LZ4F_decompress() will stop reading exactly at end of current frame, and @return 0.
503
+ * @note The function may not consume all provided input on each call. Always check *srcSizePtr.
504
+ * Present any unconsumed source bytes again on the next call.
505
+ * @note @p dstBuffer content is overwritten; it does not need to be stable across calls.
506
+ * @note After finishing a frame (return==0), you may immediately start feeding the next frame
507
+ * into the same @p dctx (optionally, one can use LZ4F_resetDecompressionContext()).
494
508
  *
495
- * If decompression failed, @return is an error code, which can be tested using LZ4F_isError().
496
- * After a decompression error, the `dctx` context is not resumable.
497
- * Use LZ4F_resetDecompressionContext() to return to clean state.
509
+ * @warning If you called LZ4F_getFrameInfo() beforehand, you must advance @p srcBuffer and
510
+ * decrease *srcSizePtr by the number of bytes it consumed (the frame header). Failing
511
+ * to do so can cause decompression failure or, worse, silent corruption.
498
512
  *
499
- * After a frame is fully decoded, dctx can be used again to decompress another frame.
513
+ * @see LZ4F_getFrameInfo(), LZ4F_isError(), LZ4F_resetDecompressionContext(),
514
+ * LZ4F_createDecompressionContext(), LZ4F_freeDecompressionContext()
500
515
  */
501
516
  LZ4FLIB_API size_t
502
517
  LZ4F_decompress(LZ4F_dctx* dctx,
@@ -743,6 +758,11 @@ LZ4FLIB_STATIC_API LZ4F_cctx* LZ4F_createCompressionContext_advanced(LZ4F_Custom
743
758
  LZ4FLIB_STATIC_API LZ4F_dctx* LZ4F_createDecompressionContext_advanced(LZ4F_CustomMem customMem, unsigned version);
744
759
  LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict_advanced(LZ4F_CustomMem customMem, const void* dictBuffer, size_t dictSize);
745
760
 
761
+ /*! Context size inspection : v1.10.1+
762
+ * These functions return the total memory footprint of the provided context.
763
+ */
764
+ LZ4FLIB_STATIC_API size_t LZ4F_cctx_size(const LZ4F_cctx* cctx);
765
+ LZ4FLIB_STATIC_API size_t LZ4F_dctx_size(const LZ4F_dctx* dctx);
746
766
 
747
767
  #if defined (__cplusplus)
748
768
  }
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  LZ4 auto-framing library
3
3
  Header File for static linking only
4
- Copyright (C) 2011-2020, Yann Collet.
4
+ Copyright (c) Yann Collet. All rights reserved.
5
5
 
6
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
7
 
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  LZ4 HC - High Compression Mode of LZ4
3
- Copyright (C) 2011-2020, Yann Collet.
3
+ Copyright (c) Yann Collet. All rights reserved.
4
4
 
5
5
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
6
 
@@ -262,6 +262,30 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
262
262
  /**************************************
263
263
  * Encode
264
264
  **************************************/
265
+ #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
266
+ # define RAWLOG(...) fprintf(stderr, __VA_ARGS__)
267
+ void LZ4HC_hexOut(const void* src, size_t len)
268
+ {
269
+ const BYTE* p = (const BYTE*)src;
270
+ size_t n;
271
+ for (n=0; n<len; n++) {
272
+ RAWLOG("%02X ", p[n]);
273
+ }
274
+ RAWLOG(" \n");
275
+ }
276
+
277
+ # define HEX_CMP(_lev, _ptr, _ref, _len) \
278
+ if (LZ4_DEBUG >= _lev) { \
279
+ RAWLOG("match bytes: "); \
280
+ LZ4HC_hexOut(_ptr, _len); \
281
+ RAWLOG("ref bytes: "); \
282
+ LZ4HC_hexOut(_ref, _len); \
283
+ }
284
+
285
+ #else
286
+ # define HEX_CMP(l,p,r,_l)
287
+ #endif
288
+
265
289
  /* LZ4HC_encodeSequence() :
266
290
  * @return : 0 if ok,
267
291
  * 1 if buffer issue detected */
@@ -278,47 +302,49 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
278
302
  #define op (*_op)
279
303
  #define anchor (*_anchor)
280
304
 
281
- size_t length;
282
305
  BYTE* const token = op++;
283
306
 
284
307
  #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
285
308
  static const BYTE* start = NULL;
286
309
  static U32 totalCost = 0;
287
- U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
310
+ U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); /* only works for single segment */
288
311
  U32 const ll = (U32)(ip - anchor);
289
312
  U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
290
313
  U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
291
314
  U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
292
315
  if (start==NULL) start = anchor; /* only works for single segment */
293
- /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
294
316
  DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u",
295
317
  pos,
296
318
  (U32)(ip - anchor), matchLength, offset,
297
319
  cost, totalCost);
320
+ # if 1 /* only works on single segment data */
321
+ HEX_CMP(7, ip, ip-offset, matchLength);
322
+ # endif
298
323
  totalCost += cost;
299
324
  #endif
300
325
 
301
326
  /* Encode Literal length */
302
- length = (size_t)(ip - anchor);
303
- LZ4_STATIC_ASSERT(notLimited == 0);
304
- /* Check output limit */
305
- if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) {
306
- DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
307
- (int)length, (int)(oend - op));
308
- return 1;
309
- }
310
- if (length >= RUN_MASK) {
311
- size_t len = length - RUN_MASK;
312
- *token = (RUN_MASK << ML_BITS);
313
- for(; len >= 255 ; len -= 255) *op++ = 255;
314
- *op++ = (BYTE)len;
315
- } else {
316
- *token = (BYTE)(length << ML_BITS);
317
- }
327
+ { size_t litLen = (size_t)(ip - anchor);
328
+ LZ4_STATIC_ASSERT(notLimited == 0);
329
+ /* Check output limit */
330
+ if (limit && ((op + (litLen / 255) + litLen + (2 + 1 + LASTLITERALS)) > oend)) {
331
+ DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
332
+ (int)litLen, (int)(oend - op));
333
+ return 1;
334
+ }
335
+ if (litLen >= RUN_MASK) {
336
+ size_t len = litLen - RUN_MASK;
337
+ *token = (RUN_MASK << ML_BITS);
338
+ for(; len >= 255 ; len -= 255) *op++ = 255;
339
+ *op++ = (BYTE)len;
340
+ } else {
341
+ *token = (BYTE)(litLen << ML_BITS);
342
+ }
318
343
 
319
- /* Copy Literals */
320
- LZ4_wildCopy8(op, anchor, op + length);
321
- op += length;
344
+ /* Copy Literals */
345
+ LZ4_wildCopy8(op, anchor, op + litLen);
346
+ op += litLen;
347
+ }
322
348
 
323
349
  /* Encode Offset */
324
350
  assert(offset <= LZ4_DISTANCE_MAX );
@@ -327,20 +353,20 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
327
353
 
328
354
  /* Encode MatchLength */
329
355
  assert(matchLength >= MINMATCH);
330
- length = (size_t)matchLength - MINMATCH;
331
- if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) {
332
- DEBUGLOG(6, "Not enough room to write match length");
333
- return 1; /* Check output limit */
334
- }
335
- if (length >= ML_MASK) {
336
- *token += ML_MASK;
337
- length -= ML_MASK;
338
- for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; }
339
- if (length >= 255) { length -= 255; *op++ = 255; }
340
- *op++ = (BYTE)length;
341
- } else {
342
- *token += (BYTE)(length);
343
- }
356
+ { size_t mlCode = (size_t)matchLength - MINMATCH;
357
+ if (limit && (op + (mlCode / 255) + (1 + LASTLITERALS) > oend)) {
358
+ DEBUGLOG(6, "Not enough room to write match length");
359
+ return 1; /* Check output limit */
360
+ }
361
+ if (mlCode >= ML_MASK) {
362
+ *token += ML_MASK;
363
+ mlCode -= ML_MASK;
364
+ for(; mlCode >= 510 ; mlCode -= 510) { *op++ = 255; *op++ = 255; }
365
+ if (mlCode >= 255) { mlCode -= 255; *op++ = 255; }
366
+ *op++ = (BYTE)mlCode;
367
+ } else {
368
+ *token += (BYTE)(mlCode);
369
+ } }
344
370
 
345
371
  /* Prepare next loop */
346
372
  ip += matchLength;
@@ -519,6 +545,12 @@ static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_inter
519
545
  return LZ4MID_searchHCDict;
520
546
  }
521
547
 
548
+ /* preconditions:
549
+ * - *srcSizePtr within [1, LZ4_MAX_INPUT_SIZE]
550
+ * - src is valid
551
+ * - maxOutputSize >= 1
552
+ * - dst is valid
553
+ */
522
554
  static int LZ4MID_compress (
523
555
  LZ4HC_CCtx_internal* const ctx,
524
556
  const char* const src,
@@ -550,18 +582,16 @@ static int LZ4MID_compress (
550
582
  unsigned matchLength;
551
583
  unsigned matchDistance;
552
584
 
553
- /* input sanitization */
554
585
  DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr);
586
+
587
+ /* preconditions verifications */
555
588
  if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc");
556
- assert(*srcSizePtr >= 0);
557
- if (*srcSizePtr) assert(src != NULL);
558
- if (maxOutputSize) assert(dst != NULL);
559
- if (*srcSizePtr < 0) return 0; /* invalid */
560
- if (maxOutputSize < 0) return 0; /* invalid */
561
- if (*srcSizePtr > LZ4_MAX_INPUT_SIZE) {
562
- /* forbidden: no input is allowed to be that large */
563
- return 0;
564
- }
589
+ assert(*srcSizePtr > 0);
590
+ assert(*srcSizePtr <= LZ4_MAX_INPUT_SIZE);
591
+ assert(src != NULL);
592
+ assert(maxOutputSize >= 1);
593
+ assert(dst != NULL);
594
+
565
595
  if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
566
596
  if (*srcSizePtr < LZ4_minLength)
567
597
  goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */
@@ -940,6 +970,7 @@ LZ4HC_InsertAndGetWiderMatch (
940
970
  offset = (int)(ipIndex - matchIndex);
941
971
  sBack = back;
942
972
  DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back);
973
+ HEX_CMP(7, ip + back, ip + back - offset, (size_t)matchLength);
943
974
  } } }
944
975
  } else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */
945
976
  const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx);
@@ -959,6 +990,7 @@ LZ4HC_InsertAndGetWiderMatch (
959
990
  offset = (int)(ipIndex - matchIndex);
960
991
  sBack = back;
961
992
  DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back);
993
+ HEX_CMP(7, ip + back, matchPtr + back, (size_t)matchLength);
962
994
  } } }
963
995
 
964
996
  if (chainSwap && matchLength==longest) { /* better match => select a better chain */
@@ -1118,10 +1150,16 @@ LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table wi
1118
1150
  }
1119
1151
 
1120
1152
 
1153
+ /* preconditions:
1154
+ * - *srcSizePtr within [1, LZ4_MAX_INPUT_SIZE]
1155
+ * - src is valid
1156
+ * - maxOutputSize >= 1
1157
+ * - dst is valid
1158
+ */
1121
1159
  LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
1122
1160
  LZ4HC_CCtx_internal* const ctx,
1123
- const char* const source,
1124
- char* const dest,
1161
+ const char* const src,
1162
+ char* const dst,
1125
1163
  int* srcSizePtr,
1126
1164
  int const maxOutputSize,
1127
1165
  int maxNbAttempts,
@@ -1132,14 +1170,14 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
1132
1170
  const int inputSize = *srcSizePtr;
1133
1171
  const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */
1134
1172
 
1135
- const BYTE* ip = (const BYTE*) source;
1173
+ const BYTE* ip = (const BYTE*)src;
1136
1174
  const BYTE* anchor = ip;
1137
1175
  const BYTE* const iend = ip + inputSize;
1138
1176
  const BYTE* const mflimit = iend - MFLIMIT;
1139
1177
  const BYTE* const matchlimit = (iend - LASTLITERALS);
1140
1178
 
1141
- BYTE* optr = (BYTE*) dest;
1142
- BYTE* op = (BYTE*) dest;
1179
+ BYTE* optr = (BYTE*) dst;
1180
+ BYTE* op = (BYTE*) dst;
1143
1181
  BYTE* oend = op + maxOutputSize;
1144
1182
 
1145
1183
  const BYTE* start0;
@@ -1150,6 +1188,13 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
1150
1188
 
1151
1189
  /* init */
1152
1190
  DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict);
1191
+
1192
+ /* preconditions verifications */
1193
+ assert(*srcSizePtr >= 1);
1194
+ assert(src != NULL);
1195
+ assert(maxOutputSize >= 1);
1196
+ assert(dst != NULL);
1197
+
1153
1198
  *srcSizePtr = 0;
1154
1199
  if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
1155
1200
  if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
@@ -1334,8 +1379,8 @@ _last_literals:
1334
1379
  }
1335
1380
 
1336
1381
  /* End */
1337
- *srcSizePtr = (int) (((const char*)ip) - source);
1338
- return (int) (((char*)op)-dest);
1382
+ *srcSizePtr = (int) (((const char*)ip) - src);
1383
+ return (int) (((char*)op)-dst);
1339
1384
 
1340
1385
  _dest_overflow:
1341
1386
  if (limit == fillOutput) {
@@ -1370,7 +1415,7 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
1370
1415
  const dictCtx_directive dict,
1371
1416
  const HCfavor_e favorDecSpeed);
1372
1417
 
1373
- LZ4_FORCE_INLINE int
1418
+ static int
1374
1419
  LZ4HC_compress_generic_internal (
1375
1420
  LZ4HC_CCtx_internal* const ctx,
1376
1421
  const char* const src,
@@ -1382,11 +1427,15 @@ LZ4HC_compress_generic_internal (
1382
1427
  const dictCtx_directive dict
1383
1428
  )
1384
1429
  {
1385
- DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)",
1386
- src, *srcSizePtr);
1430
+ DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d, dstCapacity=%d)",
1431
+ src, *srcSizePtr, dstCapacity);
1387
1432
 
1388
- if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */
1433
+ /* input sanitization */
1389
1434
  if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
1435
+ if (dstCapacity < 1) return 0; /* Invalid: impossible to store anything */
1436
+ assert(dst); /* since dstCapacity >= 1, dst must be valid */
1437
+ if (*srcSizePtr == 0) { *dst = 0; return 1; }
1438
+ assert(src != NULL); /* since *srcSizePtr >= 1, src must be valid */
1390
1439
 
1391
1440
  ctx->end += *srcSizePtr;
1392
1441
  { cParams_t const cParam = LZ4HC_getCLevelParams(cLevel);
@@ -1820,6 +1869,13 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
1820
1869
  }
1821
1870
 
1822
1871
 
1872
+
1873
+ /* preconditions:
1874
+ * - *srcSizePtr within [1, LZ4_MAX_INPUT_SIZE]
1875
+ * - src is valid
1876
+ * - maxOutputSize >= 1
1877
+ * - dst is valid
1878
+ */
1823
1879
  static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1824
1880
  const char* const source,
1825
1881
  char* dst,
@@ -1837,7 +1893,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1837
1893
  #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
1838
1894
  LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
1839
1895
  #else
1840
- LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */
1896
+ LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which can be a bit large for some stacks... */
1841
1897
  #endif
1842
1898
 
1843
1899
  const BYTE* ip = (const BYTE*) source;
@@ -1852,10 +1908,17 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
1852
1908
  int ovoff = 0;
1853
1909
 
1854
1910
  /* init */
1911
+ DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
1855
1912
  #if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
1856
1913
  if (opt == NULL) goto _return_label;
1857
1914
  #endif
1858
- DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
1915
+
1916
+ /* preconditions verifications */
1917
+ assert(dstCapacity > 0);
1918
+ assert(dst != NULL);
1919
+ assert(*srcSizePtr > 0);
1920
+ assert(source != NULL);
1921
+
1859
1922
  *srcSizePtr = 0;
1860
1923
  if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
1861
1924
  if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  LZ4 HC - High Compression Mode of LZ4
3
3
  Header File
4
- Copyright (C) 2011-2020, Yann Collet.
4
+ Copyright (c) Yann Collet. All rights reserved.
5
5
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
6
 
7
7
  Redistribution and use in source and binary forms, with or without
Binary file
@@ -0,0 +1,3 @@
1
+ module LZ4Native
2
+ VERSION = "1.0.1"
3
+ end
data/lib/lz4_native.rb ADDED
@@ -0,0 +1,47 @@
1
+ require "lz4_native/version"
2
+ require "lz4_native/lz4_native" # C extension
3
+
4
+ module LZ4Native
5
+ class Error < StandardError; end
6
+ class CompressionError < Error; end
7
+ class DecompressionError < Error; end
8
+ class FrameError < Error; end
9
+
10
+ # Simple block compression using default LZ4
11
+ # @param data [String] Data to compress
12
+ # @return [String] Compressed data
13
+ def self.compress(data)
14
+ LZ4.compress_default(data)
15
+ end
16
+
17
+ # Simple block decompression
18
+ # @param data [String] Compressed data
19
+ # @param max_size [Integer] Maximum expected decompressed size
20
+ # @return [String] Decompressed data
21
+ def self.decompress(data, max_size)
22
+ LZ4.decompress_safe(data, max_size)
23
+ end
24
+
25
+ # High compression mode
26
+ # @param data [String] Data to compress
27
+ # @param level [Integer] Compression level (1-12, default 9)
28
+ # @return [String] Compressed data
29
+ def self.compress_hc(data, level = 9)
30
+ LZ4HC.compress(data, level)
31
+ end
32
+
33
+ # Frame compression (self-contained format)
34
+ # @param data [String] Data to compress
35
+ # @param options [Hash] Frame compression options
36
+ # @return [String] Compressed frame
37
+ def self.compress_frame(data, options = {})
38
+ LZ4Frame.compress_frame(data, options)
39
+ end
40
+
41
+ # Frame decompression
42
+ # @param data [String] Compressed frame data
43
+ # @return [String] Decompressed data
44
+ def self.decompress_frame(data)
45
+ LZ4Frame.decompress_frame(data)
46
+ end
47
+ end
@@ -0,0 +1,4 @@
1
+ $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
2
+ require "lz4_native"
3
+
4
+ require "minitest/autorun"
@@ -0,0 +1,100 @@
1
+ require "test_helper"
2
+
3
+ class TestLZ4Basic < Minitest::Test
4
+ def setup
5
+ @test_data = "Hello, World! " * 100
6
+ @small_data = "test"
7
+ @large_data = "A" * 100000
8
+ end
9
+
10
+ def test_version_number
11
+ version = LZ4Native::LZ4.version_number
12
+ assert version.is_a?(Integer)
13
+ assert version > 0
14
+ end
15
+
16
+ def test_version_string
17
+ version = LZ4Native::LZ4.version_string
18
+ assert version.is_a?(String)
19
+ assert version.length > 0
20
+ end
21
+
22
+ def test_compress_bound
23
+ bound = LZ4Native::LZ4.compress_bound(1000)
24
+ assert bound.is_a?(Integer)
25
+ assert bound > 1000
26
+ end
27
+
28
+ def test_compress_decompress_default
29
+ compressed = LZ4Native::LZ4.compress_default(@test_data)
30
+ assert compressed.is_a?(String)
31
+ assert compressed.bytesize < @test_data.bytesize
32
+ assert compressed.bytesize > 0
33
+
34
+ decompressed = LZ4Native::LZ4.decompress_safe(compressed, @test_data.bytesize * 2)
35
+ assert_equal @test_data, decompressed
36
+ end
37
+
38
+ def test_compress_decompress_small_data
39
+ compressed = LZ4Native::LZ4.compress_default(@small_data)
40
+ decompressed = LZ4Native::LZ4.decompress_safe(compressed, @small_data.bytesize * 2)
41
+ assert_equal @small_data, decompressed
42
+ end
43
+
44
+ def test_compress_decompress_large_data
45
+ compressed = LZ4Native::LZ4.compress_default(@large_data)
46
+ assert compressed.bytesize < @large_data.bytesize
47
+
48
+ decompressed = LZ4Native::LZ4.decompress_safe(compressed, @large_data.bytesize * 2)
49
+ assert_equal @large_data, decompressed
50
+ end
51
+
52
+ def test_compress_fast
53
+ accelerations = [1, 5, 10]
54
+ accelerations.each do |accel|
55
+ compressed = LZ4Native::LZ4.compress_fast(@test_data, accel)
56
+ assert compressed.is_a?(String)
57
+ assert compressed.bytesize > 0
58
+
59
+ decompressed = LZ4Native::LZ4.decompress_safe(compressed, @test_data.bytesize * 2)
60
+ assert_equal @test_data, decompressed
61
+ end
62
+ end
63
+
64
+ def test_decompress_with_exact_size
65
+ compressed = LZ4Native::LZ4.compress_default(@test_data)
66
+ decompressed = LZ4Native::LZ4.decompress_safe(compressed, @test_data.bytesize)
67
+ assert_equal @test_data, decompressed
68
+ end
69
+
70
+ def test_decompress_with_insufficient_size
71
+ compressed = LZ4Native::LZ4.compress_default(@test_data)
72
+ assert_raises(LZ4Native::DecompressionError) do
73
+ LZ4Native::LZ4.decompress_safe(compressed, 10)
74
+ end
75
+ end
76
+
77
+ def test_decompress_corrupt_data
78
+ corrupt_data = "this is not compressed data"
79
+ assert_raises(LZ4Native::DecompressionError) do
80
+ LZ4Native::LZ4.decompress_safe(corrupt_data, 1000)
81
+ end
82
+ end
83
+
84
+ def test_high_level_compress
85
+ compressed = LZ4Native.compress(@test_data)
86
+ assert compressed.is_a?(String)
87
+ assert compressed.bytesize > 0
88
+ end
89
+
90
+ def test_high_level_decompress
91
+ compressed = LZ4Native.compress(@test_data)
92
+ decompressed = LZ4Native.decompress(compressed, @test_data.bytesize * 2)
93
+ assert_equal @test_data, decompressed
94
+ end
95
+
96
+ def test_constants
97
+ assert LZ4Native::LZ4::MAX_INPUT_SIZE.is_a?(Integer)
98
+ assert LZ4Native::LZ4::MAX_INPUT_SIZE > 0
99
+ end
100
+ end