lz4-native-ruby 0.1.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +292 -0
- data/LICENSE +55 -21
- data/README.md +109 -15
- data/{vendor/lz4/lib → ext/lz4_native}/Makefile +29 -24
- data/{vendor/lz4/lib → ext/lz4_native}/README.md +1 -1
- data/ext/lz4_native/extconf.rb +33 -0
- data/{vendor/lz4/lib → ext/lz4_native}/liblz4.pc.in +1 -0
- data/{vendor/lz4/lib → ext/lz4_native}/lz4.c +26 -23
- data/{vendor/lz4/lib → ext/lz4_native}/lz4.h +11 -9
- data/ext/lz4_native/lz4_native.c +442 -0
- data/ext/lz4_native/lz4file.c +362 -0
- data/{vendor/lz4/lib → ext/lz4_native}/lz4file.h +32 -9
- data/{vendor/lz4/lib → ext/lz4_native}/lz4frame.c +50 -21
- data/{vendor/lz4/lib → ext/lz4_native}/lz4frame.h +48 -28
- data/{vendor/lz4/lib → ext/lz4_native}/lz4frame_static.h +1 -1
- data/{vendor/lz4/lib → ext/lz4_native}/lz4hc.c +123 -60
- data/{vendor/lz4/lib → ext/lz4_native}/lz4hc.h +1 -1
- data/lib/lz4_native/lz4_native.so +0 -0
- data/lib/lz4_native/version.rb +3 -0
- data/lib/lz4_native.rb +47 -0
- data/test/test_helper.rb +4 -0
- data/test/test_lz4_basic.rb +100 -0
- data/test/test_lz4frame.rb +129 -0
- data/test/test_lz4hc.rb +75 -0
- metadata +50 -43
- data/ext/lz4/extconf.rb +0 -12
- data/ext/lz4/lz4_ext.c +0 -230
- data/lib/lz4/lz4_ext.so +0 -0
- data/lib/lz4/version.rb +0 -3
- data/lib/lz4.rb +0 -60
- data/vendor/lz4/lib/lz4file.c +0 -341
- /data/{vendor/lz4/lib → ext/lz4_native}/LICENSE +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/Makefile +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/README.md +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/fullbench-dll.sln +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/dll/example/fullbench-dll.vcxproj +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/liblz4-dll.rc.in +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/xxhash.c +0 -0
- /data/{vendor/lz4/lib → ext/lz4_native}/xxhash.h +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
/*
|
|
2
2
|
LZ4F - LZ4-Frame library
|
|
3
3
|
Header File
|
|
4
|
-
Copyright (
|
|
4
|
+
Copyright (c) Yann Collet. All rights reserved.
|
|
5
|
+
|
|
5
6
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
6
7
|
|
|
7
8
|
Redistribution and use in source and binary forms, with or without
|
|
@@ -289,6 +290,9 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
|
|
|
289
290
|
/* Size in bytes of the content checksum. */
|
|
290
291
|
#define LZ4F_CONTENT_CHECKSUM_SIZE 4
|
|
291
292
|
|
|
293
|
+
/* Size in bytes of the endmark. */
|
|
294
|
+
#define LZ4F_ENDMARK_SIZE 4
|
|
295
|
+
|
|
292
296
|
/*! LZ4F_compressBegin() :
|
|
293
297
|
* will write the frame header into dstBuffer.
|
|
294
298
|
* dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
|
|
@@ -462,41 +466,52 @@ LZ4F_getFrameInfo(LZ4F_dctx* dctx,
|
|
|
462
466
|
LZ4F_frameInfo_t* frameInfoPtr,
|
|
463
467
|
const void* srcBuffer, size_t* srcSizePtr);
|
|
464
468
|
|
|
465
|
-
|
|
466
|
-
*
|
|
467
|
-
*
|
|
468
|
-
* The function requires a valid dctx state.
|
|
469
|
-
* It will read up to *srcSizePtr bytes from srcBuffer,
|
|
470
|
-
* and decompress data into dstBuffer, of capacity *dstSizePtr.
|
|
469
|
+
/**
|
|
470
|
+
* @brief Incrementally decompresses an LZ4 frame into user-provided buffers.
|
|
471
471
|
*
|
|
472
|
-
*
|
|
473
|
-
*
|
|
472
|
+
* Call repeatedly until the return value is 0 (frame fully decoded) or an error is reported.
|
|
473
|
+
* On each call, the function consumes up to *srcSizePtr bytes from @p srcBuffer and
|
|
474
|
+
* produces up to *dstSizePtr bytes into @p dstBuffer. It updates both size pointers with
|
|
475
|
+
* the actual number of bytes consumed/produced. There is no separate flush step.
|
|
474
476
|
*
|
|
475
|
-
*
|
|
476
|
-
*
|
|
477
|
+
* Typical loop:
|
|
478
|
+
* - Provide whatever input you have and an available output buffer.
|
|
479
|
+
* - Read how much input was consumed and how much output was produced.
|
|
480
|
+
* - Use the returned value as a hint for how many source bytes are ideal next time.
|
|
477
481
|
*
|
|
478
|
-
*
|
|
479
|
-
*
|
|
482
|
+
* @param[in] dctx A valid decompression context created by LZ4F_createDecompressionContext().
|
|
483
|
+
* @param[out] dstBuffer Destination buffer for decompressed bytes. May change between calls.
|
|
484
|
+
* @param[in,out] dstSizePtr In: capacity of @p dstBuffer in bytes. Out: number of bytes written (<= input value).
|
|
485
|
+
* @param[in] srcBuffer Source buffer containing (more) compressed data. May point to the middle of a larger buffer.
|
|
486
|
+
* @param[in,out] srcSizePtr In: number of available bytes in @p srcBuffer. Out: number of bytes consumed (<= input value).
|
|
487
|
+
* @param[in] optionsPtr Optional decompression options; pass NULL for defaults.
|
|
480
488
|
*
|
|
481
|
-
*
|
|
482
|
-
*
|
|
483
|
-
*
|
|
484
|
-
*
|
|
489
|
+
* @return See @retval cases.
|
|
490
|
+
* @retval >0 Hint (in bytes) for how many source bytes are ideal to provide on the next call.
|
|
491
|
+
* This also indicates the current frame is not yet complete: the decompressor
|
|
492
|
+
* expects more input, or may require additional output space to make progress.
|
|
493
|
+
* User can always pass any amount of input; this value is only a performance hint.
|
|
494
|
+
* @retval 0 The current frame is fully decoded. If *srcSizePtr is less than the provided value,
|
|
495
|
+
* the unconsumed tail is the start of another frame (if any).
|
|
496
|
+
* @retval error An error code; test with LZ4F_isError(ret). After an error, dctx is not
|
|
497
|
+
* resumable: call LZ4F_resetDecompressionContext() before reusing it.
|
|
485
498
|
*
|
|
486
|
-
* @
|
|
487
|
-
*
|
|
488
|
-
*
|
|
489
|
-
* This is just a hint though, it's always possible to provide any srcSize.
|
|
499
|
+
* @pre @p dctx is a valid state created by LZ4F_createDecompressionContext().
|
|
500
|
+
* @post *srcSizePtr and *dstSizePtr are updated with the actual bytes consumed/produced.
|
|
501
|
+
* @p dstBuffer contents in [0, *dstSizePtr) are valid decompressed data.
|
|
490
502
|
*
|
|
491
|
-
*
|
|
492
|
-
*
|
|
493
|
-
*
|
|
503
|
+
* @note The function may not consume all provided input on each call. Always check *srcSizePtr.
|
|
504
|
+
* Present any unconsumed source bytes again on the next call.
|
|
505
|
+
* @note @p dstBuffer content is overwritten; it does not need to be stable across calls.
|
|
506
|
+
* @note After finishing a frame (return==0), you may immediately start feeding the next frame
|
|
507
|
+
* into the same @p dctx (optionally, one can use LZ4F_resetDecompressionContext()).
|
|
494
508
|
*
|
|
495
|
-
*
|
|
496
|
-
*
|
|
497
|
-
*
|
|
509
|
+
* @warning If you called LZ4F_getFrameInfo() beforehand, you must advance @p srcBuffer and
|
|
510
|
+
* decrease *srcSizePtr by the number of bytes it consumed (the frame header). Failing
|
|
511
|
+
* to do so can cause decompression failure or, worse, silent corruption.
|
|
498
512
|
*
|
|
499
|
-
*
|
|
513
|
+
* @see LZ4F_getFrameInfo(), LZ4F_isError(), LZ4F_resetDecompressionContext(),
|
|
514
|
+
* LZ4F_createDecompressionContext(), LZ4F_freeDecompressionContext()
|
|
500
515
|
*/
|
|
501
516
|
LZ4FLIB_API size_t
|
|
502
517
|
LZ4F_decompress(LZ4F_dctx* dctx,
|
|
@@ -743,6 +758,11 @@ LZ4FLIB_STATIC_API LZ4F_cctx* LZ4F_createCompressionContext_advanced(LZ4F_Custom
|
|
|
743
758
|
LZ4FLIB_STATIC_API LZ4F_dctx* LZ4F_createDecompressionContext_advanced(LZ4F_CustomMem customMem, unsigned version);
|
|
744
759
|
LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict_advanced(LZ4F_CustomMem customMem, const void* dictBuffer, size_t dictSize);
|
|
745
760
|
|
|
761
|
+
/*! Context size inspection : v1.10.1+
|
|
762
|
+
* These functions return the total memory footprint of the provided context.
|
|
763
|
+
*/
|
|
764
|
+
LZ4FLIB_STATIC_API size_t LZ4F_cctx_size(const LZ4F_cctx* cctx);
|
|
765
|
+
LZ4FLIB_STATIC_API size_t LZ4F_dctx_size(const LZ4F_dctx* dctx);
|
|
746
766
|
|
|
747
767
|
#if defined (__cplusplus)
|
|
748
768
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/*
|
|
2
2
|
LZ4 HC - High Compression Mode of LZ4
|
|
3
|
-
Copyright (
|
|
3
|
+
Copyright (c) Yann Collet. All rights reserved.
|
|
4
4
|
|
|
5
5
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
6
6
|
|
|
@@ -262,6 +262,30 @@ static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start)
|
|
|
262
262
|
/**************************************
|
|
263
263
|
* Encode
|
|
264
264
|
**************************************/
|
|
265
|
+
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
|
|
266
|
+
# define RAWLOG(...) fprintf(stderr, __VA_ARGS__)
|
|
267
|
+
void LZ4HC_hexOut(const void* src, size_t len)
|
|
268
|
+
{
|
|
269
|
+
const BYTE* p = (const BYTE*)src;
|
|
270
|
+
size_t n;
|
|
271
|
+
for (n=0; n<len; n++) {
|
|
272
|
+
RAWLOG("%02X ", p[n]);
|
|
273
|
+
}
|
|
274
|
+
RAWLOG(" \n");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
# define HEX_CMP(_lev, _ptr, _ref, _len) \
|
|
278
|
+
if (LZ4_DEBUG >= _lev) { \
|
|
279
|
+
RAWLOG("match bytes: "); \
|
|
280
|
+
LZ4HC_hexOut(_ptr, _len); \
|
|
281
|
+
RAWLOG("ref bytes: "); \
|
|
282
|
+
LZ4HC_hexOut(_ref, _len); \
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
#else
|
|
286
|
+
# define HEX_CMP(l,p,r,_l)
|
|
287
|
+
#endif
|
|
288
|
+
|
|
265
289
|
/* LZ4HC_encodeSequence() :
|
|
266
290
|
* @return : 0 if ok,
|
|
267
291
|
* 1 if buffer issue detected */
|
|
@@ -278,47 +302,49 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
|
|
278
302
|
#define op (*_op)
|
|
279
303
|
#define anchor (*_anchor)
|
|
280
304
|
|
|
281
|
-
size_t length;
|
|
282
305
|
BYTE* const token = op++;
|
|
283
306
|
|
|
284
307
|
#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6)
|
|
285
308
|
static const BYTE* start = NULL;
|
|
286
309
|
static U32 totalCost = 0;
|
|
287
|
-
U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start);
|
|
310
|
+
U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); /* only works for single segment */
|
|
288
311
|
U32 const ll = (U32)(ip - anchor);
|
|
289
312
|
U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0;
|
|
290
313
|
U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0;
|
|
291
314
|
U32 const cost = 1 + llAdd + ll + 2 + mlAdd;
|
|
292
315
|
if (start==NULL) start = anchor; /* only works for single segment */
|
|
293
|
-
/* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */
|
|
294
316
|
DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u",
|
|
295
317
|
pos,
|
|
296
318
|
(U32)(ip - anchor), matchLength, offset,
|
|
297
319
|
cost, totalCost);
|
|
320
|
+
# if 1 /* only works on single segment data */
|
|
321
|
+
HEX_CMP(7, ip, ip-offset, matchLength);
|
|
322
|
+
# endif
|
|
298
323
|
totalCost += cost;
|
|
299
324
|
#endif
|
|
300
325
|
|
|
301
326
|
/* Encode Literal length */
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
327
|
+
{ size_t litLen = (size_t)(ip - anchor);
|
|
328
|
+
LZ4_STATIC_ASSERT(notLimited == 0);
|
|
329
|
+
/* Check output limit */
|
|
330
|
+
if (limit && ((op + (litLen / 255) + litLen + (2 + 1 + LASTLITERALS)) > oend)) {
|
|
331
|
+
DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)",
|
|
332
|
+
(int)litLen, (int)(oend - op));
|
|
333
|
+
return 1;
|
|
334
|
+
}
|
|
335
|
+
if (litLen >= RUN_MASK) {
|
|
336
|
+
size_t len = litLen - RUN_MASK;
|
|
337
|
+
*token = (RUN_MASK << ML_BITS);
|
|
338
|
+
for(; len >= 255 ; len -= 255) *op++ = 255;
|
|
339
|
+
*op++ = (BYTE)len;
|
|
340
|
+
} else {
|
|
341
|
+
*token = (BYTE)(litLen << ML_BITS);
|
|
342
|
+
}
|
|
318
343
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
344
|
+
/* Copy Literals */
|
|
345
|
+
LZ4_wildCopy8(op, anchor, op + litLen);
|
|
346
|
+
op += litLen;
|
|
347
|
+
}
|
|
322
348
|
|
|
323
349
|
/* Encode Offset */
|
|
324
350
|
assert(offset <= LZ4_DISTANCE_MAX );
|
|
@@ -327,20 +353,20 @@ LZ4_FORCE_INLINE int LZ4HC_encodeSequence (
|
|
|
327
353
|
|
|
328
354
|
/* Encode MatchLength */
|
|
329
355
|
assert(matchLength >= MINMATCH);
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
}
|
|
356
|
+
{ size_t mlCode = (size_t)matchLength - MINMATCH;
|
|
357
|
+
if (limit && (op + (mlCode / 255) + (1 + LASTLITERALS) > oend)) {
|
|
358
|
+
DEBUGLOG(6, "Not enough room to write match length");
|
|
359
|
+
return 1; /* Check output limit */
|
|
360
|
+
}
|
|
361
|
+
if (mlCode >= ML_MASK) {
|
|
362
|
+
*token += ML_MASK;
|
|
363
|
+
mlCode -= ML_MASK;
|
|
364
|
+
for(; mlCode >= 510 ; mlCode -= 510) { *op++ = 255; *op++ = 255; }
|
|
365
|
+
if (mlCode >= 255) { mlCode -= 255; *op++ = 255; }
|
|
366
|
+
*op++ = (BYTE)mlCode;
|
|
367
|
+
} else {
|
|
368
|
+
*token += (BYTE)(mlCode);
|
|
369
|
+
} }
|
|
344
370
|
|
|
345
371
|
/* Prepare next loop */
|
|
346
372
|
ip += matchLength;
|
|
@@ -519,6 +545,12 @@ static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_inter
|
|
|
519
545
|
return LZ4MID_searchHCDict;
|
|
520
546
|
}
|
|
521
547
|
|
|
548
|
+
/* preconditions:
|
|
549
|
+
* - *srcSizePtr within [1, LZ4_MAX_INPUT_SIZE]
|
|
550
|
+
* - src is valid
|
|
551
|
+
* - maxOutputSize >= 1
|
|
552
|
+
* - dst is valid
|
|
553
|
+
*/
|
|
522
554
|
static int LZ4MID_compress (
|
|
523
555
|
LZ4HC_CCtx_internal* const ctx,
|
|
524
556
|
const char* const src,
|
|
@@ -550,18 +582,16 @@ static int LZ4MID_compress (
|
|
|
550
582
|
unsigned matchLength;
|
|
551
583
|
unsigned matchDistance;
|
|
552
584
|
|
|
553
|
-
/* input sanitization */
|
|
554
585
|
DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr);
|
|
586
|
+
|
|
587
|
+
/* preconditions verifications */
|
|
555
588
|
if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc");
|
|
556
|
-
assert(*srcSizePtr
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
/* forbidden: no input is allowed to be that large */
|
|
563
|
-
return 0;
|
|
564
|
-
}
|
|
589
|
+
assert(*srcSizePtr > 0);
|
|
590
|
+
assert(*srcSizePtr <= LZ4_MAX_INPUT_SIZE);
|
|
591
|
+
assert(src != NULL);
|
|
592
|
+
assert(maxOutputSize >= 1);
|
|
593
|
+
assert(dst != NULL);
|
|
594
|
+
|
|
565
595
|
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
|
566
596
|
if (*srcSizePtr < LZ4_minLength)
|
|
567
597
|
goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */
|
|
@@ -940,6 +970,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
|
940
970
|
offset = (int)(ipIndex - matchIndex);
|
|
941
971
|
sBack = back;
|
|
942
972
|
DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back);
|
|
973
|
+
HEX_CMP(7, ip + back, ip + back - offset, (size_t)matchLength);
|
|
943
974
|
} } }
|
|
944
975
|
} else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */
|
|
945
976
|
const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx);
|
|
@@ -959,6 +990,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
|
|
959
990
|
offset = (int)(ipIndex - matchIndex);
|
|
960
991
|
sBack = back;
|
|
961
992
|
DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back);
|
|
993
|
+
HEX_CMP(7, ip + back, matchPtr + back, (size_t)matchLength);
|
|
962
994
|
} } }
|
|
963
995
|
|
|
964
996
|
if (chainSwap && matchLength==longest) { /* better match => select a better chain */
|
|
@@ -1118,10 +1150,16 @@ LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table wi
|
|
|
1118
1150
|
}
|
|
1119
1151
|
|
|
1120
1152
|
|
|
1153
|
+
/* preconditions:
|
|
1154
|
+
* - *srcSizePtr within [1, LZ4_MAX_INPUT_SIZE]
|
|
1155
|
+
* - src is valid
|
|
1156
|
+
* - maxOutputSize >= 1
|
|
1157
|
+
* - dst is valid
|
|
1158
|
+
*/
|
|
1121
1159
|
LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
|
1122
1160
|
LZ4HC_CCtx_internal* const ctx,
|
|
1123
|
-
const char* const
|
|
1124
|
-
char* const
|
|
1161
|
+
const char* const src,
|
|
1162
|
+
char* const dst,
|
|
1125
1163
|
int* srcSizePtr,
|
|
1126
1164
|
int const maxOutputSize,
|
|
1127
1165
|
int maxNbAttempts,
|
|
@@ -1132,14 +1170,14 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
|
|
1132
1170
|
const int inputSize = *srcSizePtr;
|
|
1133
1171
|
const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */
|
|
1134
1172
|
|
|
1135
|
-
const BYTE* ip = (const BYTE*)
|
|
1173
|
+
const BYTE* ip = (const BYTE*)src;
|
|
1136
1174
|
const BYTE* anchor = ip;
|
|
1137
1175
|
const BYTE* const iend = ip + inputSize;
|
|
1138
1176
|
const BYTE* const mflimit = iend - MFLIMIT;
|
|
1139
1177
|
const BYTE* const matchlimit = (iend - LASTLITERALS);
|
|
1140
1178
|
|
|
1141
|
-
BYTE* optr = (BYTE*)
|
|
1142
|
-
BYTE* op = (BYTE*)
|
|
1179
|
+
BYTE* optr = (BYTE*) dst;
|
|
1180
|
+
BYTE* op = (BYTE*) dst;
|
|
1143
1181
|
BYTE* oend = op + maxOutputSize;
|
|
1144
1182
|
|
|
1145
1183
|
const BYTE* start0;
|
|
@@ -1150,6 +1188,13 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain (
|
|
|
1150
1188
|
|
|
1151
1189
|
/* init */
|
|
1152
1190
|
DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict);
|
|
1191
|
+
|
|
1192
|
+
/* preconditions verifications */
|
|
1193
|
+
assert(*srcSizePtr >= 1);
|
|
1194
|
+
assert(src != NULL);
|
|
1195
|
+
assert(maxOutputSize >= 1);
|
|
1196
|
+
assert(dst != NULL);
|
|
1197
|
+
|
|
1153
1198
|
*srcSizePtr = 0;
|
|
1154
1199
|
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
|
1155
1200
|
if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
|
|
@@ -1334,8 +1379,8 @@ _last_literals:
|
|
|
1334
1379
|
}
|
|
1335
1380
|
|
|
1336
1381
|
/* End */
|
|
1337
|
-
*srcSizePtr = (int) (((const char*)ip) -
|
|
1338
|
-
return (int) (((char*)op)-
|
|
1382
|
+
*srcSizePtr = (int) (((const char*)ip) - src);
|
|
1383
|
+
return (int) (((char*)op)-dst);
|
|
1339
1384
|
|
|
1340
1385
|
_dest_overflow:
|
|
1341
1386
|
if (limit == fillOutput) {
|
|
@@ -1370,7 +1415,7 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
|
|
|
1370
1415
|
const dictCtx_directive dict,
|
|
1371
1416
|
const HCfavor_e favorDecSpeed);
|
|
1372
1417
|
|
|
1373
|
-
|
|
1418
|
+
static int
|
|
1374
1419
|
LZ4HC_compress_generic_internal (
|
|
1375
1420
|
LZ4HC_CCtx_internal* const ctx,
|
|
1376
1421
|
const char* const src,
|
|
@@ -1382,11 +1427,15 @@ LZ4HC_compress_generic_internal (
|
|
|
1382
1427
|
const dictCtx_directive dict
|
|
1383
1428
|
)
|
|
1384
1429
|
{
|
|
1385
|
-
DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)",
|
|
1386
|
-
src, *srcSizePtr);
|
|
1430
|
+
DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d, dstCapacity=%d)",
|
|
1431
|
+
src, *srcSizePtr, dstCapacity);
|
|
1387
1432
|
|
|
1388
|
-
|
|
1433
|
+
/* input sanitization */
|
|
1389
1434
|
if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */
|
|
1435
|
+
if (dstCapacity < 1) return 0; /* Invalid: impossible to store anything */
|
|
1436
|
+
assert(dst); /* since dstCapacity >= 1, dst must be valid */
|
|
1437
|
+
if (*srcSizePtr == 0) { *dst = 0; return 1; }
|
|
1438
|
+
assert(src != NULL); /* since *srcSizePtr >= 1, src must be valid */
|
|
1390
1439
|
|
|
1391
1440
|
ctx->end += *srcSizePtr;
|
|
1392
1441
|
{ cParams_t const cParam = LZ4HC_getCLevelParams(cLevel);
|
|
@@ -1820,6 +1869,13 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
|
|
|
1820
1869
|
}
|
|
1821
1870
|
|
|
1822
1871
|
|
|
1872
|
+
|
|
1873
|
+
/* preconditions:
|
|
1874
|
+
* - *srcSizePtr within [1, LZ4_MAX_INPUT_SIZE]
|
|
1875
|
+
* - src is valid
|
|
1876
|
+
* - maxOutputSize >= 1
|
|
1877
|
+
* - dst is valid
|
|
1878
|
+
*/
|
|
1823
1879
|
static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
|
1824
1880
|
const char* const source,
|
|
1825
1881
|
char* dst,
|
|
@@ -1837,7 +1893,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
|
|
1837
1893
|
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
|
1838
1894
|
LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS));
|
|
1839
1895
|
#else
|
|
1840
|
-
LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which
|
|
1896
|
+
LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which can be a bit large for some stacks... */
|
|
1841
1897
|
#endif
|
|
1842
1898
|
|
|
1843
1899
|
const BYTE* ip = (const BYTE*) source;
|
|
@@ -1852,10 +1908,17 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx,
|
|
|
1852
1908
|
int ovoff = 0;
|
|
1853
1909
|
|
|
1854
1910
|
/* init */
|
|
1911
|
+
DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity);
|
|
1855
1912
|
#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1
|
|
1856
1913
|
if (opt == NULL) goto _return_label;
|
|
1857
1914
|
#endif
|
|
1858
|
-
|
|
1915
|
+
|
|
1916
|
+
/* preconditions verifications */
|
|
1917
|
+
assert(dstCapacity > 0);
|
|
1918
|
+
assert(dst != NULL);
|
|
1919
|
+
assert(*srcSizePtr > 0);
|
|
1920
|
+
assert(source != NULL);
|
|
1921
|
+
|
|
1859
1922
|
*srcSizePtr = 0;
|
|
1860
1923
|
if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */
|
|
1861
1924
|
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/*
|
|
2
2
|
LZ4 HC - High Compression Mode of LZ4
|
|
3
3
|
Header File
|
|
4
|
-
Copyright (
|
|
4
|
+
Copyright (c) Yann Collet. All rights reserved.
|
|
5
5
|
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
|
6
6
|
|
|
7
7
|
Redistribution and use in source and binary forms, with or without
|
|
Binary file
|
data/lib/lz4_native.rb
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require "lz4_native/version"
|
|
2
|
+
require "lz4_native/lz4_native" # C extension
|
|
3
|
+
|
|
4
|
+
module LZ4Native
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
class CompressionError < Error; end
|
|
7
|
+
class DecompressionError < Error; end
|
|
8
|
+
class FrameError < Error; end
|
|
9
|
+
|
|
10
|
+
# Simple block compression using default LZ4
|
|
11
|
+
# @param data [String] Data to compress
|
|
12
|
+
# @return [String] Compressed data
|
|
13
|
+
def self.compress(data)
|
|
14
|
+
LZ4.compress_default(data)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Simple block decompression
|
|
18
|
+
# @param data [String] Compressed data
|
|
19
|
+
# @param max_size [Integer] Maximum expected decompressed size
|
|
20
|
+
# @return [String] Decompressed data
|
|
21
|
+
def self.decompress(data, max_size)
|
|
22
|
+
LZ4.decompress_safe(data, max_size)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# High compression mode
|
|
26
|
+
# @param data [String] Data to compress
|
|
27
|
+
# @param level [Integer] Compression level (1-12, default 9)
|
|
28
|
+
# @return [String] Compressed data
|
|
29
|
+
def self.compress_hc(data, level = 9)
|
|
30
|
+
LZ4HC.compress(data, level)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Frame compression (self-contained format)
|
|
34
|
+
# @param data [String] Data to compress
|
|
35
|
+
# @param options [Hash] Frame compression options
|
|
36
|
+
# @return [String] Compressed frame
|
|
37
|
+
def self.compress_frame(data, options = {})
|
|
38
|
+
LZ4Frame.compress_frame(data, options)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Frame decompression
|
|
42
|
+
# @param data [String] Compressed frame data
|
|
43
|
+
# @return [String] Decompressed data
|
|
44
|
+
def self.decompress_frame(data)
|
|
45
|
+
LZ4Frame.decompress_frame(data)
|
|
46
|
+
end
|
|
47
|
+
end
|
data/test/test_helper.rb
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class TestLZ4Basic < Minitest::Test
|
|
4
|
+
def setup
|
|
5
|
+
@test_data = "Hello, World! " * 100
|
|
6
|
+
@small_data = "test"
|
|
7
|
+
@large_data = "A" * 100000
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def test_version_number
|
|
11
|
+
version = LZ4Native::LZ4.version_number
|
|
12
|
+
assert version.is_a?(Integer)
|
|
13
|
+
assert version > 0
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def test_version_string
|
|
17
|
+
version = LZ4Native::LZ4.version_string
|
|
18
|
+
assert version.is_a?(String)
|
|
19
|
+
assert version.length > 0
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def test_compress_bound
|
|
23
|
+
bound = LZ4Native::LZ4.compress_bound(1000)
|
|
24
|
+
assert bound.is_a?(Integer)
|
|
25
|
+
assert bound > 1000
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def test_compress_decompress_default
|
|
29
|
+
compressed = LZ4Native::LZ4.compress_default(@test_data)
|
|
30
|
+
assert compressed.is_a?(String)
|
|
31
|
+
assert compressed.bytesize < @test_data.bytesize
|
|
32
|
+
assert compressed.bytesize > 0
|
|
33
|
+
|
|
34
|
+
decompressed = LZ4Native::LZ4.decompress_safe(compressed, @test_data.bytesize * 2)
|
|
35
|
+
assert_equal @test_data, decompressed
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def test_compress_decompress_small_data
|
|
39
|
+
compressed = LZ4Native::LZ4.compress_default(@small_data)
|
|
40
|
+
decompressed = LZ4Native::LZ4.decompress_safe(compressed, @small_data.bytesize * 2)
|
|
41
|
+
assert_equal @small_data, decompressed
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def test_compress_decompress_large_data
|
|
45
|
+
compressed = LZ4Native::LZ4.compress_default(@large_data)
|
|
46
|
+
assert compressed.bytesize < @large_data.bytesize
|
|
47
|
+
|
|
48
|
+
decompressed = LZ4Native::LZ4.decompress_safe(compressed, @large_data.bytesize * 2)
|
|
49
|
+
assert_equal @large_data, decompressed
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def test_compress_fast
|
|
53
|
+
accelerations = [1, 5, 10]
|
|
54
|
+
accelerations.each do |accel|
|
|
55
|
+
compressed = LZ4Native::LZ4.compress_fast(@test_data, accel)
|
|
56
|
+
assert compressed.is_a?(String)
|
|
57
|
+
assert compressed.bytesize > 0
|
|
58
|
+
|
|
59
|
+
decompressed = LZ4Native::LZ4.decompress_safe(compressed, @test_data.bytesize * 2)
|
|
60
|
+
assert_equal @test_data, decompressed
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def test_decompress_with_exact_size
|
|
65
|
+
compressed = LZ4Native::LZ4.compress_default(@test_data)
|
|
66
|
+
decompressed = LZ4Native::LZ4.decompress_safe(compressed, @test_data.bytesize)
|
|
67
|
+
assert_equal @test_data, decompressed
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def test_decompress_with_insufficient_size
|
|
71
|
+
compressed = LZ4Native::LZ4.compress_default(@test_data)
|
|
72
|
+
assert_raises(LZ4Native::DecompressionError) do
|
|
73
|
+
LZ4Native::LZ4.decompress_safe(compressed, 10)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def test_decompress_corrupt_data
|
|
78
|
+
corrupt_data = "this is not compressed data"
|
|
79
|
+
assert_raises(LZ4Native::DecompressionError) do
|
|
80
|
+
LZ4Native::LZ4.decompress_safe(corrupt_data, 1000)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def test_high_level_compress
|
|
85
|
+
compressed = LZ4Native.compress(@test_data)
|
|
86
|
+
assert compressed.is_a?(String)
|
|
87
|
+
assert compressed.bytesize > 0
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def test_high_level_decompress
|
|
91
|
+
compressed = LZ4Native.compress(@test_data)
|
|
92
|
+
decompressed = LZ4Native.decompress(compressed, @test_data.bytesize * 2)
|
|
93
|
+
assert_equal @test_data, decompressed
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def test_constants
|
|
97
|
+
assert LZ4Native::LZ4::MAX_INPUT_SIZE.is_a?(Integer)
|
|
98
|
+
assert LZ4Native::LZ4::MAX_INPUT_SIZE > 0
|
|
99
|
+
end
|
|
100
|
+
end
|