lz4-ruby 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/ext/lz4ruby/lz4.c +99 -57
- data/ext/lz4ruby/lz4.h +24 -27
- data/ext/lz4ruby/lz4hc.c +27 -19
- data/ext/lz4ruby/lz4ruby.c +45 -78
- data/lib/lz4-ruby.rb +57 -8
- data/test/helper.rb +1 -1
- data/test/test_lz4-ruby.rb +28 -6
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.7
|
data/ext/lz4ruby/lz4.c
CHANGED
@@ -34,31 +34,24 @@
|
|
34
34
|
//**************************************
|
35
35
|
// Tuning parameters
|
36
36
|
//**************************************
|
37
|
-
//
|
38
|
-
//
|
39
|
-
//
|
40
|
-
// Reduced memory usage
|
41
|
-
//
|
42
|
-
#define
|
43
|
-
|
44
|
-
//
|
37
|
+
// MEMORY_USAGE :
|
38
|
+
// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
39
|
+
// Increasing memory usage improves compression ratio
|
40
|
+
// Reduced memory usage can improve speed, due to cache effect
|
41
|
+
// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
|
42
|
+
#define MEMORY_USAGE 14
|
43
|
+
|
44
|
+
// NOTCOMPRESSIBLE_DETECTIONLEVEL :
|
45
45
|
// Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
|
46
46
|
// This may decrease compression ratio dramatically, but will be faster on incompressible data
|
47
47
|
// Increasing this value will make the algorithm search more before declaring a segment "incompressible"
|
48
48
|
// This could improve compression a bit, but will be slower on incompressible data
|
49
49
|
// The default value (6) is recommended
|
50
|
-
#define
|
51
|
-
|
52
|
-
// LZ4_COMPRESSMIN :
|
53
|
-
// Compression function will *fail* if it is not successful at compressing input by at least LZ4_COMPRESSMIN bytes
|
54
|
-
// Since the compression function stops working prematurely, it results in a speed gain
|
55
|
-
// The output however is unusable. Compression function result will be zero.
|
56
|
-
// Default : 0 = disabled
|
57
|
-
#define LZ4_COMPRESSMIN 0
|
50
|
+
#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
|
58
51
|
|
59
52
|
// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
|
60
|
-
// This will provide a boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
|
61
|
-
// You can set this option to 1 in situations where data will
|
53
|
+
// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
|
54
|
+
// You can set this option to 1 in situations where data will remain within closed environment
|
62
55
|
// This option is useless on Little_Endian CPU (such as x86)
|
63
56
|
//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
|
64
57
|
|
@@ -108,6 +101,7 @@
|
|
108
101
|
|
109
102
|
#ifdef _MSC_VER // Visual Studio
|
110
103
|
# define inline __forceinline // Visual is not C99, but supports some kind of inline
|
104
|
+
# include <intrin.h> // For Visual 2005
|
111
105
|
# if LZ4_ARCH64 // 64-bit
|
112
106
|
# pragma intrinsic(_BitScanForward64) // For Visual 2005
|
113
107
|
# pragma intrinsic(_BitScanReverse64) // For Visual 2005
|
@@ -181,11 +175,11 @@ typedef struct _U64_S { U64 v; } U64_S;
|
|
181
175
|
//**************************************
|
182
176
|
#define MINMATCH 4
|
183
177
|
|
184
|
-
#define HASH_LOG
|
178
|
+
#define HASH_LOG (MEMORY_USAGE-2)
|
185
179
|
#define HASHTABLESIZE (1 << HASH_LOG)
|
186
180
|
#define HASH_MASK (HASHTABLESIZE - 1)
|
187
181
|
|
188
|
-
#define SKIPSTRENGTH (
|
182
|
+
#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
|
189
183
|
#define STACKLIMIT 13
|
190
184
|
#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
|
191
185
|
#define COPYLENGTH 8
|
@@ -257,7 +251,7 @@ struct refTables
|
|
257
251
|
//****************************
|
258
252
|
#if LZ4_ARCH64
|
259
253
|
|
260
|
-
inline
|
254
|
+
static inline int LZ4_NbCommonBytes (register U64 val)
|
261
255
|
{
|
262
256
|
#if defined(LZ4_BIG_ENDIAN)
|
263
257
|
#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
@@ -289,7 +283,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
|
|
289
283
|
|
290
284
|
#else
|
291
285
|
|
292
|
-
inline
|
286
|
+
static inline int LZ4_NbCommonBytes (register U32 val)
|
293
287
|
{
|
294
288
|
#if defined(LZ4_BIG_ENDIAN)
|
295
289
|
#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
@@ -321,25 +315,22 @@ inline static int LZ4_NbCommonBytes (register U32 val)
|
|
321
315
|
#endif
|
322
316
|
|
323
317
|
|
324
|
-
//****************************
|
325
|
-
// Public functions
|
326
|
-
//****************************
|
327
|
-
|
328
|
-
int LZ4_compressBound(int isize)
|
329
|
-
{
|
330
|
-
return (isize + (isize/255) + 16);
|
331
|
-
}
|
332
|
-
|
333
|
-
|
334
318
|
|
335
319
|
//******************************
|
336
320
|
// Compression functions
|
337
321
|
//******************************
|
338
322
|
|
339
|
-
|
323
|
+
// LZ4_compressCtx :
|
324
|
+
// -----------------
|
325
|
+
// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
|
326
|
+
// If it cannot achieve it, compression will stop, and result of the function will be zero.
|
327
|
+
// return : the number of bytes written in buffer 'dest', or 0 if the compression fails
|
328
|
+
|
329
|
+
static inline int LZ4_compressCtx(void** ctx,
|
340
330
|
const char* source,
|
341
331
|
char* dest,
|
342
|
-
int isize
|
332
|
+
int isize,
|
333
|
+
int maxOutputSize)
|
343
334
|
{
|
344
335
|
#if HEAPMODE
|
345
336
|
struct refTables *srt = (struct refTables *) (*ctx);
|
@@ -356,6 +347,7 @@ int LZ4_compressCtx(void** ctx,
|
|
356
347
|
#define matchlimit (iend - LASTLITERALS)
|
357
348
|
|
358
349
|
BYTE* op = (BYTE*) dest;
|
350
|
+
BYTE* const oend = op + maxOutputSize;
|
359
351
|
|
360
352
|
int len, length;
|
361
353
|
const int skipStrength = SKIPSTRENGTH;
|
@@ -408,17 +400,37 @@ int LZ4_compressCtx(void** ctx,
|
|
408
400
|
while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
|
409
401
|
|
410
402
|
// Encode Literal length
|
411
|
-
length = ip - anchor;
|
403
|
+
length = (int)(ip - anchor);
|
412
404
|
token = op++;
|
405
|
+
if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
|
406
|
+
#ifdef _MSC_VER
|
407
|
+
if (length>=(int)RUN_MASK)
|
408
|
+
{
|
409
|
+
int len = length-RUN_MASK;
|
410
|
+
*token=(RUN_MASK<<ML_BITS);
|
411
|
+
if (len>254)
|
412
|
+
{
|
413
|
+
do { *op++ = 255; len -= 255; } while (len>254);
|
414
|
+
*op++ = (BYTE)len;
|
415
|
+
memcpy(op, anchor, length);
|
416
|
+
op += length;
|
417
|
+
goto _next_match;
|
418
|
+
}
|
419
|
+
else
|
420
|
+
*op++ = (BYTE)len;
|
421
|
+
}
|
422
|
+
else *token = (length<<ML_BITS);
|
423
|
+
#else
|
413
424
|
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
|
414
425
|
else *token = (length<<ML_BITS);
|
426
|
+
#endif
|
415
427
|
|
416
428
|
// Copy Literals
|
417
429
|
LZ4_BLINDCOPY(anchor, op, length);
|
418
430
|
|
419
431
|
_next_match:
|
420
432
|
// Encode Offset
|
421
|
-
LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
|
433
|
+
LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
|
422
434
|
|
423
435
|
// Start Counting
|
424
436
|
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
|
@@ -436,7 +448,7 @@ _next_match:
|
|
436
448
|
_endCount:
|
437
449
|
|
438
450
|
// Encode MatchLength
|
439
|
-
len = (ip - anchor);
|
451
|
+
len = (int)(ip - anchor);
|
440
452
|
if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
|
441
453
|
else *token += len;
|
442
454
|
|
@@ -459,8 +471,8 @@ _endCount:
|
|
459
471
|
_last_literals:
|
460
472
|
// Encode Last Literals
|
461
473
|
{
|
462
|
-
int lastRun = iend - anchor;
|
463
|
-
if ((
|
474
|
+
int lastRun = (int)(iend - anchor);
|
475
|
+
if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0;
|
464
476
|
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
465
477
|
else *op++ = (lastRun<<ML_BITS);
|
466
478
|
memcpy(op, anchor, iend - anchor);
|
@@ -479,10 +491,11 @@ _last_literals:
|
|
479
491
|
#define HASH64KTABLESIZE (1U<<HASHLOG64K)
|
480
492
|
#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
|
481
493
|
#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
|
482
|
-
int LZ4_compress64kCtx(void** ctx,
|
494
|
+
static inline int LZ4_compress64kCtx(void** ctx,
|
483
495
|
const char* source,
|
484
496
|
char* dest,
|
485
|
-
int isize
|
497
|
+
int isize,
|
498
|
+
int maxOutputSize)
|
486
499
|
{
|
487
500
|
#if HEAPMODE
|
488
501
|
struct refTables *srt = (struct refTables *) (*ctx);
|
@@ -499,6 +512,7 @@ int LZ4_compress64kCtx(void** ctx,
|
|
499
512
|
#define matchlimit (iend - LASTLITERALS)
|
500
513
|
|
501
514
|
BYTE* op = (BYTE*) dest;
|
515
|
+
BYTE* const oend = op + maxOutputSize;
|
502
516
|
|
503
517
|
int len, length;
|
504
518
|
const int skipStrength = SKIPSTRENGTH;
|
@@ -542,7 +556,7 @@ int LZ4_compress64kCtx(void** ctx,
|
|
542
556
|
|
543
557
|
forwardH = LZ4_HASH64K_VALUE(forwardIp);
|
544
558
|
ref = base + HashTable[h];
|
545
|
-
HashTable[h] = ip - base;
|
559
|
+
HashTable[h] = (U16)(ip - base);
|
546
560
|
|
547
561
|
} while (A32(ref) != A32(ip));
|
548
562
|
|
@@ -550,17 +564,37 @@ int LZ4_compress64kCtx(void** ctx,
|
|
550
564
|
while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
|
551
565
|
|
552
566
|
// Encode Literal length
|
553
|
-
length = ip - anchor;
|
567
|
+
length = (int)(ip - anchor);
|
554
568
|
token = op++;
|
569
|
+
if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
|
570
|
+
#ifdef _MSC_VER
|
571
|
+
if (length>=(int)RUN_MASK)
|
572
|
+
{
|
573
|
+
int len = length-RUN_MASK;
|
574
|
+
*token=(RUN_MASK<<ML_BITS);
|
575
|
+
if (len>254)
|
576
|
+
{
|
577
|
+
do { *op++ = 255; len -= 255; } while (len>254);
|
578
|
+
*op++ = (BYTE)len;
|
579
|
+
memcpy(op, anchor, length);
|
580
|
+
op += length;
|
581
|
+
goto _next_match;
|
582
|
+
}
|
583
|
+
else
|
584
|
+
*op++ = (BYTE)len;
|
585
|
+
}
|
586
|
+
else *token = (length<<ML_BITS);
|
587
|
+
#else
|
555
588
|
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
|
556
589
|
else *token = (length<<ML_BITS);
|
590
|
+
#endif
|
557
591
|
|
558
592
|
// Copy Literals
|
559
593
|
LZ4_BLINDCOPY(anchor, op, length);
|
560
594
|
|
561
595
|
_next_match:
|
562
596
|
// Encode Offset
|
563
|
-
LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
|
597
|
+
LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
|
564
598
|
|
565
599
|
// Start Counting
|
566
600
|
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
|
@@ -578,7 +612,7 @@ _next_match:
|
|
578
612
|
_endCount:
|
579
613
|
|
580
614
|
// Encode MatchLength
|
581
|
-
len = (ip - anchor);
|
615
|
+
len = (int)(ip - anchor);
|
582
616
|
if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
|
583
617
|
else *token += len;
|
584
618
|
|
@@ -586,11 +620,11 @@ _endCount:
|
|
586
620
|
if (ip > mflimit) { anchor = ip; break; }
|
587
621
|
|
588
622
|
// Fill table
|
589
|
-
HashTable[LZ4_HASH64K_VALUE(ip-2)] = ip - 2 - base;
|
623
|
+
HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
|
590
624
|
|
591
625
|
// Test next position
|
592
626
|
ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
|
593
|
-
HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
|
627
|
+
HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
|
594
628
|
if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
|
595
629
|
|
596
630
|
// Prepare next loop
|
@@ -601,8 +635,8 @@ _endCount:
|
|
601
635
|
_last_literals:
|
602
636
|
// Encode Last Literals
|
603
637
|
{
|
604
|
-
int lastRun = iend - anchor;
|
605
|
-
if ((
|
638
|
+
int lastRun = (int)(iend - anchor);
|
639
|
+
if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0;
|
606
640
|
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
607
641
|
else *op++ = (lastRun<<ML_BITS);
|
608
642
|
memcpy(op, anchor, iend - anchor);
|
@@ -614,26 +648,34 @@ _last_literals:
|
|
614
648
|
}
|
615
649
|
|
616
650
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
651
|
+
int LZ4_compress_limitedOutput(const char* source,
|
652
|
+
char* dest,
|
653
|
+
int isize,
|
654
|
+
int maxOutputSize)
|
621
655
|
{
|
622
656
|
#if HEAPMODE
|
623
657
|
void* ctx = malloc(sizeof(struct refTables));
|
624
658
|
int result;
|
625
659
|
if (isize < LZ4_64KLIMIT)
|
626
|
-
result = LZ4_compress64kCtx(&ctx, source, dest, isize);
|
627
|
-
else result = LZ4_compressCtx(&ctx, source, dest, isize);
|
660
|
+
result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
|
661
|
+
else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
|
628
662
|
free(ctx);
|
629
663
|
return result;
|
630
664
|
#else
|
631
|
-
if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize);
|
632
|
-
return LZ4_compressCtx(NULL, source, dest, isize);
|
665
|
+
if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
|
666
|
+
return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
|
633
667
|
#endif
|
634
668
|
}
|
635
669
|
|
636
670
|
|
671
|
+
int LZ4_compress(const char* source,
|
672
|
+
char* dest,
|
673
|
+
int isize)
|
674
|
+
{
|
675
|
+
return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
|
676
|
+
}
|
677
|
+
|
678
|
+
|
637
679
|
|
638
680
|
|
639
681
|
//****************************
|
data/ext/lz4ruby/lz4.h
CHANGED
@@ -47,19 +47,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
|
|
47
47
|
|
48
48
|
/*
|
49
49
|
LZ4_compress() :
|
50
|
+
Compresses 'isize' bytes from 'source' into 'dest'.
|
51
|
+
Destination buffer must be already allocated,
|
52
|
+
and must be sized to handle worst cases situations (input data not compressible)
|
53
|
+
Worst case size evaluation is provided by macro LZ4_compressBound()
|
54
|
+
|
50
55
|
isize : is the input size. Max supported value is ~1.9GB
|
51
56
|
return : the number of bytes written in buffer dest
|
52
|
-
|
53
|
-
note : destination buffer must be already allocated.
|
54
|
-
destination buffer must be sized to handle worst cases situations (input data not compressible)
|
55
|
-
worst case size evaluation is provided by function LZ4_compressBound()
|
57
|
+
|
56
58
|
|
57
59
|
LZ4_uncompress() :
|
58
60
|
osize : is the output size, therefore the original size
|
59
61
|
return : the number of bytes read in the source buffer
|
60
62
|
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
|
61
63
|
This function never writes beyond dest + osize, and is therefore protected against malicious data packets
|
62
|
-
note : destination buffer must be already allocated
|
64
|
+
note : destination buffer must be already allocated.
|
65
|
+
its size must be a minimum of 'osize' bytes.
|
63
66
|
*/
|
64
67
|
|
65
68
|
|
@@ -67,7 +70,7 @@ LZ4_uncompress() :
|
|
67
70
|
// Advanced Functions
|
68
71
|
//****************************
|
69
72
|
|
70
|
-
|
73
|
+
#define LZ4_compressBound(isize) (isize + (isize/255) + 16)
|
71
74
|
|
72
75
|
/*
|
73
76
|
LZ4_compressBound() :
|
@@ -80,6 +83,21 @@ LZ4_compressBound() :
|
|
80
83
|
*/
|
81
84
|
|
82
85
|
|
86
|
+
int LZ4_compress_limitedOutput (const char* source, char* dest, int isize, int maxOutputSize);
|
87
|
+
|
88
|
+
/*
|
89
|
+
LZ4_compress_limitedOutput() :
|
90
|
+
Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
|
91
|
+
If it cannot achieve it, compression will stop, and result of the function will be zero.
|
92
|
+
This function never writes outside of provided output buffer.
|
93
|
+
|
94
|
+
isize : is the input size. Max supported value is ~1.9GB
|
95
|
+
maxOutputSize : is the size of the destination buffer (which must be already allocated)
|
96
|
+
return : the number of bytes written in buffer 'dest'
|
97
|
+
or 0 if the compression fails
|
98
|
+
*/
|
99
|
+
|
100
|
+
|
83
101
|
int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
|
84
102
|
|
85
103
|
/*
|
@@ -94,27 +112,6 @@ LZ4_uncompress_unknownOutputSize() :
|
|
94
112
|
*/
|
95
113
|
|
96
114
|
|
97
|
-
int LZ4_compressCtx(void** ctx, const char* source, char* dest, int isize);
|
98
|
-
int LZ4_compress64kCtx(void** ctx, const char* source, char* dest, int isize);
|
99
|
-
|
100
|
-
/*
|
101
|
-
LZ4_compressCtx() :
|
102
|
-
This function explicitly handles the CTX memory structure.
|
103
|
-
It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
|
104
|
-
This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
|
105
|
-
Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
|
106
|
-
Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
|
107
|
-
LZ4_compress64kCtx() :
|
108
|
-
Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
|
109
|
-
isize *Must* be <64KB, otherwise the output will be corrupted.
|
110
|
-
|
111
|
-
On first call : provide a *ctx=NULL; It will be automatically allocated.
|
112
|
-
On next calls : reuse the same ctx pointer.
|
113
|
-
Use different pointers for different threads when doing multi-threading.
|
114
|
-
|
115
|
-
*/
|
116
|
-
|
117
|
-
|
118
115
|
#if defined (__cplusplus)
|
119
116
|
}
|
120
117
|
#endif
|
data/ext/lz4ruby/lz4hc.c
CHANGED
@@ -68,12 +68,20 @@
|
|
68
68
|
|
69
69
|
#ifdef _MSC_VER
|
70
70
|
#define inline __forceinline // Visual is not C99, but supports some kind of inline
|
71
|
+
#include <intrin.h> // For Visual 2005
|
72
|
+
# if LZ4_ARCH64 // 64-bit
|
73
|
+
# pragma intrinsic(_BitScanForward64) // For Visual 2005
|
74
|
+
# pragma intrinsic(_BitScanReverse64) // For Visual 2005
|
75
|
+
# else
|
76
|
+
# pragma intrinsic(_BitScanForward) // For Visual 2005
|
77
|
+
# pragma intrinsic(_BitScanReverse) // For Visual 2005
|
78
|
+
# endif
|
71
79
|
#endif
|
72
80
|
|
73
81
|
#ifdef _MSC_VER // Visual Studio
|
74
|
-
#define
|
82
|
+
#define lz4_bswap16(x) _byteswap_ushort(x)
|
75
83
|
#else
|
76
|
-
#define
|
84
|
+
#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
|
77
85
|
#endif
|
78
86
|
|
79
87
|
|
@@ -174,8 +182,8 @@ typedef struct _U64_S { U64 v; } U64_S;
|
|
174
182
|
#endif
|
175
183
|
|
176
184
|
#if defined(LZ4_BIG_ENDIAN)
|
177
|
-
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v =
|
178
|
-
#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v =
|
185
|
+
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
|
186
|
+
#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
|
179
187
|
#else // Little Endian
|
180
188
|
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
|
181
189
|
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
|
@@ -350,7 +358,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
|
|
350
358
|
if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
|
351
359
|
_endCount:
|
352
360
|
|
353
|
-
if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }
|
361
|
+
if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
|
354
362
|
}
|
355
363
|
ref = GETNEXT(ref);
|
356
364
|
}
|
@@ -366,7 +374,7 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
|
|
366
374
|
INITBASE(base,hc4->base);
|
367
375
|
const BYTE* ref;
|
368
376
|
int nbAttempts = MAX_NB_ATTEMPTS;
|
369
|
-
int delta = ip-startLimit;
|
377
|
+
int delta = (int)(ip-startLimit);
|
370
378
|
|
371
379
|
// First Match
|
372
380
|
LZ4HC_Insert(hc4, ip);
|
@@ -399,7 +407,7 @@ _endCount:
|
|
399
407
|
|
400
408
|
if ((ipt-startt) > longest)
|
401
409
|
{
|
402
|
-
longest = ipt-startt;
|
410
|
+
longest = (int)(ipt-startt);
|
403
411
|
*matchpos = reft;
|
404
412
|
*startpos = startt;
|
405
413
|
}
|
@@ -417,7 +425,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
|
|
417
425
|
BYTE* token;
|
418
426
|
|
419
427
|
// Encode Literal length
|
420
|
-
length = *ip - *anchor;
|
428
|
+
length = (int)(*ip - *anchor);
|
421
429
|
token = (*op)++;
|
422
430
|
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
|
423
431
|
else *token = (length<<ML_BITS);
|
@@ -426,7 +434,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
|
|
426
434
|
LZ4_BLINDCOPY(*anchor, *op, length);
|
427
435
|
|
428
436
|
// Encode Offset
|
429
|
-
LZ4_WRITE_LITTLEENDIAN_16(*op
|
437
|
+
LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
|
430
438
|
|
431
439
|
// Encode MatchLength
|
432
440
|
len = (int)(ml-MINMATCH);
|
@@ -519,8 +527,8 @@ _Search3:
|
|
519
527
|
int correction;
|
520
528
|
int new_ml = ml;
|
521
529
|
if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
|
522
|
-
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;
|
523
|
-
correction = new_ml - (start2 - ip);
|
530
|
+
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
531
|
+
correction = new_ml - (int)(start2 - ip);
|
524
532
|
if (correction > 0)
|
525
533
|
{
|
526
534
|
start2 += correction;
|
@@ -543,8 +551,8 @@ _Search3:
|
|
543
551
|
{
|
544
552
|
int correction;
|
545
553
|
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
|
546
|
-
if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
|
547
|
-
correction = ml - (start2 - ip);
|
554
|
+
if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
555
|
+
correction = ml - (int)(start2 - ip);
|
548
556
|
if (correction > 0)
|
549
557
|
{
|
550
558
|
start2 += correction;
|
@@ -554,7 +562,7 @@ _Search3:
|
|
554
562
|
}
|
555
563
|
else
|
556
564
|
{
|
557
|
-
ml = start2 - ip;
|
565
|
+
ml = (int)(start2 - ip);
|
558
566
|
}
|
559
567
|
}
|
560
568
|
// Now, encode 2 sequences
|
@@ -570,7 +578,7 @@ _Search3:
|
|
570
578
|
{
|
571
579
|
if (start2 < ip+ml)
|
572
580
|
{
|
573
|
-
int correction = (ip+ml
|
581
|
+
int correction = (int)(ip+ml - start2);
|
574
582
|
start2 += correction;
|
575
583
|
ref2 += correction;
|
576
584
|
ml2 -= correction;
|
@@ -607,8 +615,8 @@ _Search3:
|
|
607
615
|
{
|
608
616
|
int correction;
|
609
617
|
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
|
610
|
-
if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
|
611
|
-
correction = ml - (start2 - ip);
|
618
|
+
if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
619
|
+
correction = ml - (int)(start2 - ip);
|
612
620
|
if (correction > 0)
|
613
621
|
{
|
614
622
|
start2 += correction;
|
@@ -618,7 +626,7 @@ _Search3:
|
|
618
626
|
}
|
619
627
|
else
|
620
628
|
{
|
621
|
-
ml = start2 - ip;
|
629
|
+
ml = (int)(start2 - ip);
|
622
630
|
}
|
623
631
|
}
|
624
632
|
LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
|
@@ -637,7 +645,7 @@ _Search3:
|
|
637
645
|
|
638
646
|
// Encode Last Literals
|
639
647
|
{
|
640
|
-
int lastRun = iend - anchor;
|
648
|
+
int lastRun = (int)(iend - anchor);
|
641
649
|
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
642
650
|
else *op++ = (lastRun<<ML_BITS);
|
643
651
|
memcpy(op, anchor, iend - anchor);
|
data/ext/lz4ruby/lz4ruby.c
CHANGED
@@ -4,109 +4,76 @@
|
|
4
4
|
|
5
5
|
typedef int (*CompressFunc)(const char *source, char *dest, int isize);
|
6
6
|
|
7
|
-
static VALUE
|
7
|
+
static VALUE lz4internal;
|
8
8
|
static VALUE lz4_error;
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
if (value == 0) { return 2; }
|
17
|
-
buf[1] |= 0x80;
|
18
|
-
|
19
|
-
buf[2] = value & 0x7f; value >>= 7;
|
20
|
-
if (value == 0) { return 3; }
|
21
|
-
buf[2] |= 0x80;
|
22
|
-
|
23
|
-
buf[3] = value & 0x7f; value >>= 7;
|
24
|
-
if (value == 0) { return 4; }
|
25
|
-
buf[3] |= 0x80;
|
26
|
-
|
27
|
-
buf[4] = value & 0x7f;
|
28
|
-
return 5;
|
29
|
-
}
|
30
|
-
|
31
|
-
static int decode_varbyte(const char *src, int len, int *value) {
|
32
|
-
if (len < 1) { return -1; }
|
33
|
-
|
34
|
-
*value = src[0] & 0x7f;
|
35
|
-
if ((src[0] & 0x80) == 0) { return 1; }
|
36
|
-
if (len < 2) { return -1; }
|
37
|
-
|
38
|
-
*value |= (src[1] & 0x7f) << 7;
|
39
|
-
if ((src[1] & 0x80) == 0) { return 2; }
|
40
|
-
if (len < 3) { return -1; }
|
41
|
-
|
42
|
-
*value |= (src[2] & 0x7f) << 14;
|
43
|
-
if ((src[2] & 0x80) == 0) { return 3; }
|
44
|
-
if (len < 4) { return -1; }
|
45
|
-
|
46
|
-
*value |= (src[3] & 0x7f) << 21;
|
47
|
-
if ((src[3] & 0x80) == 0) { return 4; }
|
48
|
-
if (len < 5) { return -1; }
|
49
|
-
|
50
|
-
*value |= (src[4] & 0x7f) << 28;
|
10
|
+
/**
|
11
|
+
* LZ4Internal functions.
|
12
|
+
*/
|
13
|
+
static VALUE compress_internal(CompressFunc compressor, VALUE header, VALUE input, VALUE in_size) {
|
14
|
+
const char *src_p;
|
15
|
+
int src_size;
|
51
16
|
|
52
|
-
|
53
|
-
|
17
|
+
const char *header_p;
|
18
|
+
int header_size;
|
54
19
|
|
55
|
-
static VALUE compress(CompressFunc compressor, VALUE self, VALUE source, VALUE src_size_prm) {
|
56
|
-
const char *src_p = NULL;
|
57
|
-
char varbyte[5];
|
58
|
-
char *buf = NULL;
|
59
20
|
VALUE result;
|
60
|
-
|
61
|
-
int varbyte_len;
|
21
|
+
char *buf;
|
62
22
|
int buf_size;
|
23
|
+
|
63
24
|
int comp_size;
|
64
25
|
|
65
|
-
Check_Type(
|
66
|
-
src_p = RSTRING_PTR(
|
67
|
-
src_size = NUM2INT(
|
26
|
+
Check_Type(input, T_STRING);
|
27
|
+
src_p = RSTRING_PTR(input);
|
28
|
+
src_size = NUM2INT(in_size);
|
68
29
|
buf_size = LZ4_compressBound(src_size);
|
69
30
|
|
70
|
-
|
31
|
+
Check_Type(header, T_STRING);
|
32
|
+
header_p = RSTRING_PTR(header);
|
33
|
+
header_size = RSTRING_LEN(header);
|
71
34
|
|
72
|
-
result = rb_str_new(NULL, buf_size +
|
35
|
+
result = rb_str_new(NULL, buf_size + header_size);
|
73
36
|
buf = RSTRING_PTR(result);
|
74
37
|
|
75
|
-
memcpy(buf,
|
38
|
+
memcpy(buf, header_p, header_size);
|
76
39
|
|
77
|
-
comp_size = compressor(src_p, buf +
|
78
|
-
rb_str_resize(result, comp_size +
|
40
|
+
comp_size = compressor(src_p, buf + header_size, src_size);
|
41
|
+
rb_str_resize(result, comp_size + header_size);
|
79
42
|
|
80
43
|
return result;
|
81
44
|
}
|
82
45
|
|
83
|
-
static VALUE
|
84
|
-
return
|
46
|
+
static VALUE lz4internal_compress(VALUE self, VALUE header, VALUE input, VALUE in_size) {
|
47
|
+
return compress_internal(LZ4_compress, header, input, in_size);
|
85
48
|
}
|
86
49
|
|
87
|
-
static VALUE
|
88
|
-
return
|
50
|
+
static VALUE lz4internal_compressHC(VALUE self, VALUE header, VALUE input, VALUE in_size) {
|
51
|
+
return compress_internal(LZ4_compressHC, header, input, in_size);
|
89
52
|
}
|
90
53
|
|
91
|
-
static VALUE
|
92
|
-
const char *src_p
|
93
|
-
char *buf = NULL;
|
94
|
-
VALUE result;
|
54
|
+
static VALUE lz4internal_uncompress(VALUE self, VALUE input, VALUE in_size, VALUE offset, VALUE out_size) {
|
55
|
+
const char *src_p;
|
95
56
|
int src_size;
|
96
|
-
|
97
|
-
int
|
57
|
+
|
58
|
+
int header_size;
|
59
|
+
|
60
|
+
VALUE result;
|
61
|
+
char *buf;
|
62
|
+
int buf_size;
|
63
|
+
|
98
64
|
int read_bytes;
|
99
65
|
|
100
|
-
Check_Type(
|
101
|
-
src_p = RSTRING_PTR(
|
102
|
-
src_size =
|
66
|
+
Check_Type(input, T_STRING);
|
67
|
+
src_p = RSTRING_PTR(input);
|
68
|
+
src_size = NUM2INT(in_size);
|
103
69
|
|
104
|
-
|
70
|
+
header_size = NUM2INT(offset);
|
71
|
+
buf_size = NUM2INT(out_size);
|
105
72
|
|
106
73
|
result = rb_str_new(NULL, buf_size);
|
107
74
|
buf = RSTRING_PTR(result);
|
108
75
|
|
109
|
-
read_bytes =
|
76
|
+
read_bytes = LZ4_uncompress_unknownOutputSize(src_p + header_size, buf, src_size - header_size, buf_size);
|
110
77
|
if (read_bytes < 0) {
|
111
78
|
rb_raise(lz4_error, "Compressed data is maybe corrupted.");
|
112
79
|
}
|
@@ -115,11 +82,11 @@ static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
|
|
115
82
|
}
|
116
83
|
|
117
84
|
void Init_lz4ruby(void) {
|
118
|
-
|
85
|
+
lz4internal = rb_define_module("LZ4Internal");
|
119
86
|
|
120
|
-
rb_define_module_function(
|
121
|
-
rb_define_module_function(
|
122
|
-
rb_define_module_function(
|
87
|
+
rb_define_module_function(lz4internal, "compress", lz4internal_compress, 3);
|
88
|
+
rb_define_module_function(lz4internal, "compressHC", lz4internal_compressHC, 3);
|
89
|
+
rb_define_module_function(lz4internal, "uncompress", lz4internal_uncompress, 4);
|
123
90
|
|
124
|
-
lz4_error = rb_define_class_under(
|
91
|
+
lz4_error = rb_define_class_under(lz4internal, "Error", rb_eStandardError);
|
125
92
|
}
|
data/lib/lz4-ruby.rb
CHANGED
@@ -7,17 +7,66 @@ else
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class LZ4
|
10
|
-
def self.compress(
|
11
|
-
|
12
|
-
return LZ4Native::compress(source, src_size)
|
10
|
+
def self.compress(input, in_size = nil)
|
11
|
+
return _compress(input, in_size, false)
|
13
12
|
end
|
14
13
|
|
15
|
-
def self.compressHC(
|
16
|
-
|
17
|
-
return LZ4Native::compressHC(source, src_size)
|
14
|
+
def self.compressHC(input, in_size = nil)
|
15
|
+
return _compress(input, in_size, true)
|
18
16
|
end
|
17
|
+
|
18
|
+
def self._compress(input, in_size, high_compression)
|
19
|
+
in_size = input.length if in_size == nil
|
20
|
+
header = encode_varbyte(in_size)
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
+
if high_compression
|
23
|
+
return LZ4Internal.compressHC(header, input, in_size)
|
24
|
+
else
|
25
|
+
return LZ4Internal.compress(header, input, in_size)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.uncompress(input, in_size = nil)
|
30
|
+
in_size = input.length if in_size == nil
|
31
|
+
out_size, varbyte_len = decode_varbyte(input)
|
32
|
+
|
33
|
+
if out_size < 0 || varbyte_len < 0
|
34
|
+
raise "Compressed data is maybe corrupted"
|
35
|
+
end
|
36
|
+
|
37
|
+
return LZ4Internal::uncompress(input, in_size, varbyte_len, out_size)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.encode_varbyte(val)
|
41
|
+
varbytes = []
|
42
|
+
|
43
|
+
loop do
|
44
|
+
byte = val & 0x7f
|
45
|
+
val >>= 7
|
46
|
+
|
47
|
+
if val == 0
|
48
|
+
varbytes.push(byte)
|
49
|
+
break
|
50
|
+
else
|
51
|
+
varbytes.push(byte | 0x80)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
return varbytes.pack("C*")
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.decode_varbyte(text)
|
59
|
+
len = [text.length, 5].min
|
60
|
+
bytes = text[0, len].unpack("C*")
|
61
|
+
|
62
|
+
varbyte_len = 0
|
63
|
+
val = 0
|
64
|
+
bytes.each do |b|
|
65
|
+
val |= (b & 0x7f) << (7 * varbyte_len)
|
66
|
+
varbyte_len += 1
|
67
|
+
return val, varbyte_len if b & 0x80 == 0
|
68
|
+
end
|
69
|
+
|
70
|
+
return -1, -1
|
22
71
|
end
|
23
72
|
end
|
data/test/helper.rb
CHANGED
data/test/test_lz4-ruby.rb
CHANGED
@@ -3,15 +3,30 @@ require 'helper'
|
|
3
3
|
class TestLz4Ruby < Test::Unit::TestCase
|
4
4
|
LOOP_COUNT = 257
|
5
5
|
|
6
|
-
|
6
|
+
srand(123)
|
7
|
+
|
8
|
+
def self.random_bytes(len)
|
9
|
+
result = []
|
10
|
+
len.times do |t|
|
11
|
+
result << rand(256)
|
12
|
+
end
|
13
|
+
return result.pack("C*")
|
14
|
+
end
|
7
15
|
|
8
16
|
context "LZ4::compress" do
|
9
17
|
should "empty text" do
|
10
18
|
compressed = LZ4::compress("")
|
11
19
|
uncompressed = LZ4::uncompress(compressed)
|
12
|
-
|
20
|
+
assert_equal("", uncompressed)
|
13
21
|
end
|
14
|
-
|
22
|
+
|
23
|
+
should "long text" do
|
24
|
+
text = "a" * 131073
|
25
|
+
compressed = LZ4.compress(text)
|
26
|
+
uncompressed = LZ4.uncompress(compressed)
|
27
|
+
assert_equal(text, uncompressed)
|
28
|
+
end
|
29
|
+
|
15
30
|
LOOP_COUNT.times do |t|
|
16
31
|
len = t + 1
|
17
32
|
text = "a" * len
|
@@ -25,7 +40,7 @@ class TestLz4Ruby < Test::Unit::TestCase
|
|
25
40
|
|
26
41
|
LOOP_COUNT.times do |t|
|
27
42
|
len = t + 1
|
28
|
-
text =
|
43
|
+
text = random_bytes(len)
|
29
44
|
|
30
45
|
should "random text of #{len} bytes" do
|
31
46
|
compressed = LZ4::compress(text)
|
@@ -49,9 +64,16 @@ class TestLz4Ruby < Test::Unit::TestCase
|
|
49
64
|
should "empty text" do
|
50
65
|
compressed = LZ4::compressHC("")
|
51
66
|
uncompressed = LZ4::uncompress(compressed)
|
52
|
-
|
67
|
+
assert_equal("", uncompressed)
|
53
68
|
end
|
54
69
|
|
70
|
+
should "long text" do
|
71
|
+
text = "a" * 131073
|
72
|
+
compressed = LZ4.compressHC(text)
|
73
|
+
uncompressed = LZ4.uncompress(compressed)
|
74
|
+
assert_equal(text, uncompressed)
|
75
|
+
end
|
76
|
+
|
55
77
|
LOOP_COUNT.times do |t|
|
56
78
|
len = t + 1
|
57
79
|
text = "a" * len
|
@@ -65,7 +87,7 @@ class TestLz4Ruby < Test::Unit::TestCase
|
|
65
87
|
|
66
88
|
LOOP_COUNT.times do |t|
|
67
89
|
len = t + 1
|
68
|
-
text =
|
90
|
+
text = random_bytes(len)
|
69
91
|
|
70
92
|
should "random text of #{len} bytes" do
|
71
93
|
compressed = LZ4::compressHC(text)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lz4-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 7
|
10
|
+
version: 0.1.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- KOMIYA Atsushi
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-08-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :development
|