lz4-ruby 0.1.6-x86-mingw32 → 0.1.7-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/ext/lz4ruby/lz4.c +99 -57
- data/ext/lz4ruby/lz4.h +24 -27
- data/ext/lz4ruby/lz4hc.c +27 -19
- data/ext/lz4ruby/lz4ruby.c +45 -78
- data/lib/1.8/lz4ruby.so +0 -0
- data/lib/1.9/lz4ruby.so +0 -0
- data/lib/lz4-ruby.rb +57 -8
- data/test/helper.rb +1 -1
- data/test/test_lz4-ruby.rb +28 -6
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.7
|
data/ext/lz4ruby/lz4.c
CHANGED
@@ -34,31 +34,24 @@
|
|
34
34
|
//**************************************
|
35
35
|
// Tuning parameters
|
36
36
|
//**************************************
|
37
|
-
//
|
38
|
-
//
|
39
|
-
//
|
40
|
-
// Reduced memory usage
|
41
|
-
//
|
42
|
-
#define
|
43
|
-
|
44
|
-
//
|
37
|
+
// MEMORY_USAGE :
|
38
|
+
// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
|
39
|
+
// Increasing memory usage improves compression ratio
|
40
|
+
// Reduced memory usage can improve speed, due to cache effect
|
41
|
+
// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
|
42
|
+
#define MEMORY_USAGE 14
|
43
|
+
|
44
|
+
// NOTCOMPRESSIBLE_DETECTIONLEVEL :
|
45
45
|
// Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
|
46
46
|
// This may decrease compression ratio dramatically, but will be faster on incompressible data
|
47
47
|
// Increasing this value will make the algorithm search more before declaring a segment "incompressible"
|
48
48
|
// This could improve compression a bit, but will be slower on incompressible data
|
49
49
|
// The default value (6) is recommended
|
50
|
-
#define
|
51
|
-
|
52
|
-
// LZ4_COMPRESSMIN :
|
53
|
-
// Compression function will *fail* if it is not successful at compressing input by at least LZ4_COMPRESSMIN bytes
|
54
|
-
// Since the compression function stops working prematurely, it results in a speed gain
|
55
|
-
// The output however is unusable. Compression function result will be zero.
|
56
|
-
// Default : 0 = disabled
|
57
|
-
#define LZ4_COMPRESSMIN 0
|
50
|
+
#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
|
58
51
|
|
59
52
|
// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
|
60
|
-
// This will provide a boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
|
61
|
-
// You can set this option to 1 in situations where data will
|
53
|
+
// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
|
54
|
+
// You can set this option to 1 in situations where data will remain within closed environment
|
62
55
|
// This option is useless on Little_Endian CPU (such as x86)
|
63
56
|
//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
|
64
57
|
|
@@ -108,6 +101,7 @@
|
|
108
101
|
|
109
102
|
#ifdef _MSC_VER // Visual Studio
|
110
103
|
# define inline __forceinline // Visual is not C99, but supports some kind of inline
|
104
|
+
# include <intrin.h> // For Visual 2005
|
111
105
|
# if LZ4_ARCH64 // 64-bit
|
112
106
|
# pragma intrinsic(_BitScanForward64) // For Visual 2005
|
113
107
|
# pragma intrinsic(_BitScanReverse64) // For Visual 2005
|
@@ -181,11 +175,11 @@ typedef struct _U64_S { U64 v; } U64_S;
|
|
181
175
|
//**************************************
|
182
176
|
#define MINMATCH 4
|
183
177
|
|
184
|
-
#define HASH_LOG
|
178
|
+
#define HASH_LOG (MEMORY_USAGE-2)
|
185
179
|
#define HASHTABLESIZE (1 << HASH_LOG)
|
186
180
|
#define HASH_MASK (HASHTABLESIZE - 1)
|
187
181
|
|
188
|
-
#define SKIPSTRENGTH (
|
182
|
+
#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
|
189
183
|
#define STACKLIMIT 13
|
190
184
|
#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
|
191
185
|
#define COPYLENGTH 8
|
@@ -257,7 +251,7 @@ struct refTables
|
|
257
251
|
//****************************
|
258
252
|
#if LZ4_ARCH64
|
259
253
|
|
260
|
-
inline
|
254
|
+
static inline int LZ4_NbCommonBytes (register U64 val)
|
261
255
|
{
|
262
256
|
#if defined(LZ4_BIG_ENDIAN)
|
263
257
|
#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
@@ -289,7 +283,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
|
|
289
283
|
|
290
284
|
#else
|
291
285
|
|
292
|
-
inline
|
286
|
+
static inline int LZ4_NbCommonBytes (register U32 val)
|
293
287
|
{
|
294
288
|
#if defined(LZ4_BIG_ENDIAN)
|
295
289
|
#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
@@ -321,25 +315,22 @@ inline static int LZ4_NbCommonBytes (register U32 val)
|
|
321
315
|
#endif
|
322
316
|
|
323
317
|
|
324
|
-
//****************************
|
325
|
-
// Public functions
|
326
|
-
//****************************
|
327
|
-
|
328
|
-
int LZ4_compressBound(int isize)
|
329
|
-
{
|
330
|
-
return (isize + (isize/255) + 16);
|
331
|
-
}
|
332
|
-
|
333
|
-
|
334
318
|
|
335
319
|
//******************************
|
336
320
|
// Compression functions
|
337
321
|
//******************************
|
338
322
|
|
339
|
-
|
323
|
+
// LZ4_compressCtx :
|
324
|
+
// -----------------
|
325
|
+
// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
|
326
|
+
// If it cannot achieve it, compression will stop, and result of the function will be zero.
|
327
|
+
// return : the number of bytes written in buffer 'dest', or 0 if the compression fails
|
328
|
+
|
329
|
+
static inline int LZ4_compressCtx(void** ctx,
|
340
330
|
const char* source,
|
341
331
|
char* dest,
|
342
|
-
int isize
|
332
|
+
int isize,
|
333
|
+
int maxOutputSize)
|
343
334
|
{
|
344
335
|
#if HEAPMODE
|
345
336
|
struct refTables *srt = (struct refTables *) (*ctx);
|
@@ -356,6 +347,7 @@ int LZ4_compressCtx(void** ctx,
|
|
356
347
|
#define matchlimit (iend - LASTLITERALS)
|
357
348
|
|
358
349
|
BYTE* op = (BYTE*) dest;
|
350
|
+
BYTE* const oend = op + maxOutputSize;
|
359
351
|
|
360
352
|
int len, length;
|
361
353
|
const int skipStrength = SKIPSTRENGTH;
|
@@ -408,17 +400,37 @@ int LZ4_compressCtx(void** ctx,
|
|
408
400
|
while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
|
409
401
|
|
410
402
|
// Encode Literal length
|
411
|
-
length = ip - anchor;
|
403
|
+
length = (int)(ip - anchor);
|
412
404
|
token = op++;
|
405
|
+
if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
|
406
|
+
#ifdef _MSC_VER
|
407
|
+
if (length>=(int)RUN_MASK)
|
408
|
+
{
|
409
|
+
int len = length-RUN_MASK;
|
410
|
+
*token=(RUN_MASK<<ML_BITS);
|
411
|
+
if (len>254)
|
412
|
+
{
|
413
|
+
do { *op++ = 255; len -= 255; } while (len>254);
|
414
|
+
*op++ = (BYTE)len;
|
415
|
+
memcpy(op, anchor, length);
|
416
|
+
op += length;
|
417
|
+
goto _next_match;
|
418
|
+
}
|
419
|
+
else
|
420
|
+
*op++ = (BYTE)len;
|
421
|
+
}
|
422
|
+
else *token = (length<<ML_BITS);
|
423
|
+
#else
|
413
424
|
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
|
414
425
|
else *token = (length<<ML_BITS);
|
426
|
+
#endif
|
415
427
|
|
416
428
|
// Copy Literals
|
417
429
|
LZ4_BLINDCOPY(anchor, op, length);
|
418
430
|
|
419
431
|
_next_match:
|
420
432
|
// Encode Offset
|
421
|
-
LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
|
433
|
+
LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
|
422
434
|
|
423
435
|
// Start Counting
|
424
436
|
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
|
@@ -436,7 +448,7 @@ _next_match:
|
|
436
448
|
_endCount:
|
437
449
|
|
438
450
|
// Encode MatchLength
|
439
|
-
len = (ip - anchor);
|
451
|
+
len = (int)(ip - anchor);
|
440
452
|
if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
|
441
453
|
else *token += len;
|
442
454
|
|
@@ -459,8 +471,8 @@ _endCount:
|
|
459
471
|
_last_literals:
|
460
472
|
// Encode Last Literals
|
461
473
|
{
|
462
|
-
int lastRun = iend - anchor;
|
463
|
-
if ((
|
474
|
+
int lastRun = (int)(iend - anchor);
|
475
|
+
if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0;
|
464
476
|
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
465
477
|
else *op++ = (lastRun<<ML_BITS);
|
466
478
|
memcpy(op, anchor, iend - anchor);
|
@@ -479,10 +491,11 @@ _last_literals:
|
|
479
491
|
#define HASH64KTABLESIZE (1U<<HASHLOG64K)
|
480
492
|
#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
|
481
493
|
#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
|
482
|
-
int LZ4_compress64kCtx(void** ctx,
|
494
|
+
static inline int LZ4_compress64kCtx(void** ctx,
|
483
495
|
const char* source,
|
484
496
|
char* dest,
|
485
|
-
int isize
|
497
|
+
int isize,
|
498
|
+
int maxOutputSize)
|
486
499
|
{
|
487
500
|
#if HEAPMODE
|
488
501
|
struct refTables *srt = (struct refTables *) (*ctx);
|
@@ -499,6 +512,7 @@ int LZ4_compress64kCtx(void** ctx,
|
|
499
512
|
#define matchlimit (iend - LASTLITERALS)
|
500
513
|
|
501
514
|
BYTE* op = (BYTE*) dest;
|
515
|
+
BYTE* const oend = op + maxOutputSize;
|
502
516
|
|
503
517
|
int len, length;
|
504
518
|
const int skipStrength = SKIPSTRENGTH;
|
@@ -542,7 +556,7 @@ int LZ4_compress64kCtx(void** ctx,
|
|
542
556
|
|
543
557
|
forwardH = LZ4_HASH64K_VALUE(forwardIp);
|
544
558
|
ref = base + HashTable[h];
|
545
|
-
HashTable[h] = ip - base;
|
559
|
+
HashTable[h] = (U16)(ip - base);
|
546
560
|
|
547
561
|
} while (A32(ref) != A32(ip));
|
548
562
|
|
@@ -550,17 +564,37 @@ int LZ4_compress64kCtx(void** ctx,
|
|
550
564
|
while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
|
551
565
|
|
552
566
|
// Encode Literal length
|
553
|
-
length = ip - anchor;
|
567
|
+
length = (int)(ip - anchor);
|
554
568
|
token = op++;
|
569
|
+
if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
|
570
|
+
#ifdef _MSC_VER
|
571
|
+
if (length>=(int)RUN_MASK)
|
572
|
+
{
|
573
|
+
int len = length-RUN_MASK;
|
574
|
+
*token=(RUN_MASK<<ML_BITS);
|
575
|
+
if (len>254)
|
576
|
+
{
|
577
|
+
do { *op++ = 255; len -= 255; } while (len>254);
|
578
|
+
*op++ = (BYTE)len;
|
579
|
+
memcpy(op, anchor, length);
|
580
|
+
op += length;
|
581
|
+
goto _next_match;
|
582
|
+
}
|
583
|
+
else
|
584
|
+
*op++ = (BYTE)len;
|
585
|
+
}
|
586
|
+
else *token = (length<<ML_BITS);
|
587
|
+
#else
|
555
588
|
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
|
556
589
|
else *token = (length<<ML_BITS);
|
590
|
+
#endif
|
557
591
|
|
558
592
|
// Copy Literals
|
559
593
|
LZ4_BLINDCOPY(anchor, op, length);
|
560
594
|
|
561
595
|
_next_match:
|
562
596
|
// Encode Offset
|
563
|
-
LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
|
597
|
+
LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
|
564
598
|
|
565
599
|
// Start Counting
|
566
600
|
ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
|
@@ -578,7 +612,7 @@ _next_match:
|
|
578
612
|
_endCount:
|
579
613
|
|
580
614
|
// Encode MatchLength
|
581
|
-
len = (ip - anchor);
|
615
|
+
len = (int)(ip - anchor);
|
582
616
|
if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
|
583
617
|
else *token += len;
|
584
618
|
|
@@ -586,11 +620,11 @@ _endCount:
|
|
586
620
|
if (ip > mflimit) { anchor = ip; break; }
|
587
621
|
|
588
622
|
// Fill table
|
589
|
-
HashTable[LZ4_HASH64K_VALUE(ip-2)] = ip - 2 - base;
|
623
|
+
HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
|
590
624
|
|
591
625
|
// Test next position
|
592
626
|
ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
|
593
|
-
HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
|
627
|
+
HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
|
594
628
|
if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
|
595
629
|
|
596
630
|
// Prepare next loop
|
@@ -601,8 +635,8 @@ _endCount:
|
|
601
635
|
_last_literals:
|
602
636
|
// Encode Last Literals
|
603
637
|
{
|
604
|
-
int lastRun = iend - anchor;
|
605
|
-
if ((
|
638
|
+
int lastRun = (int)(iend - anchor);
|
639
|
+
if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0;
|
606
640
|
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
607
641
|
else *op++ = (lastRun<<ML_BITS);
|
608
642
|
memcpy(op, anchor, iend - anchor);
|
@@ -614,26 +648,34 @@ _last_literals:
|
|
614
648
|
}
|
615
649
|
|
616
650
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
651
|
+
int LZ4_compress_limitedOutput(const char* source,
|
652
|
+
char* dest,
|
653
|
+
int isize,
|
654
|
+
int maxOutputSize)
|
621
655
|
{
|
622
656
|
#if HEAPMODE
|
623
657
|
void* ctx = malloc(sizeof(struct refTables));
|
624
658
|
int result;
|
625
659
|
if (isize < LZ4_64KLIMIT)
|
626
|
-
result = LZ4_compress64kCtx(&ctx, source, dest, isize);
|
627
|
-
else result = LZ4_compressCtx(&ctx, source, dest, isize);
|
660
|
+
result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
|
661
|
+
else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
|
628
662
|
free(ctx);
|
629
663
|
return result;
|
630
664
|
#else
|
631
|
-
if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize);
|
632
|
-
return LZ4_compressCtx(NULL, source, dest, isize);
|
665
|
+
if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
|
666
|
+
return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
|
633
667
|
#endif
|
634
668
|
}
|
635
669
|
|
636
670
|
|
671
|
+
int LZ4_compress(const char* source,
|
672
|
+
char* dest,
|
673
|
+
int isize)
|
674
|
+
{
|
675
|
+
return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
|
676
|
+
}
|
677
|
+
|
678
|
+
|
637
679
|
|
638
680
|
|
639
681
|
//****************************
|
data/ext/lz4ruby/lz4.h
CHANGED
@@ -47,19 +47,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
|
|
47
47
|
|
48
48
|
/*
|
49
49
|
LZ4_compress() :
|
50
|
+
Compresses 'isize' bytes from 'source' into 'dest'.
|
51
|
+
Destination buffer must be already allocated,
|
52
|
+
and must be sized to handle worst cases situations (input data not compressible)
|
53
|
+
Worst case size evaluation is provided by macro LZ4_compressBound()
|
54
|
+
|
50
55
|
isize : is the input size. Max supported value is ~1.9GB
|
51
56
|
return : the number of bytes written in buffer dest
|
52
|
-
|
53
|
-
note : destination buffer must be already allocated.
|
54
|
-
destination buffer must be sized to handle worst cases situations (input data not compressible)
|
55
|
-
worst case size evaluation is provided by function LZ4_compressBound()
|
57
|
+
|
56
58
|
|
57
59
|
LZ4_uncompress() :
|
58
60
|
osize : is the output size, therefore the original size
|
59
61
|
return : the number of bytes read in the source buffer
|
60
62
|
If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
|
61
63
|
This function never writes beyond dest + osize, and is therefore protected against malicious data packets
|
62
|
-
note : destination buffer must be already allocated
|
64
|
+
note : destination buffer must be already allocated.
|
65
|
+
its size must be a minimum of 'osize' bytes.
|
63
66
|
*/
|
64
67
|
|
65
68
|
|
@@ -67,7 +70,7 @@ LZ4_uncompress() :
|
|
67
70
|
// Advanced Functions
|
68
71
|
//****************************
|
69
72
|
|
70
|
-
|
73
|
+
#define LZ4_compressBound(isize) (isize + (isize/255) + 16)
|
71
74
|
|
72
75
|
/*
|
73
76
|
LZ4_compressBound() :
|
@@ -80,6 +83,21 @@ LZ4_compressBound() :
|
|
80
83
|
*/
|
81
84
|
|
82
85
|
|
86
|
+
int LZ4_compress_limitedOutput (const char* source, char* dest, int isize, int maxOutputSize);
|
87
|
+
|
88
|
+
/*
|
89
|
+
LZ4_compress_limitedOutput() :
|
90
|
+
Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
|
91
|
+
If it cannot achieve it, compression will stop, and result of the function will be zero.
|
92
|
+
This function never writes outside of provided output buffer.
|
93
|
+
|
94
|
+
isize : is the input size. Max supported value is ~1.9GB
|
95
|
+
maxOutputSize : is the size of the destination buffer (which must be already allocated)
|
96
|
+
return : the number of bytes written in buffer 'dest'
|
97
|
+
or 0 if the compression fails
|
98
|
+
*/
|
99
|
+
|
100
|
+
|
83
101
|
int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
|
84
102
|
|
85
103
|
/*
|
@@ -94,27 +112,6 @@ LZ4_uncompress_unknownOutputSize() :
|
|
94
112
|
*/
|
95
113
|
|
96
114
|
|
97
|
-
int LZ4_compressCtx(void** ctx, const char* source, char* dest, int isize);
|
98
|
-
int LZ4_compress64kCtx(void** ctx, const char* source, char* dest, int isize);
|
99
|
-
|
100
|
-
/*
|
101
|
-
LZ4_compressCtx() :
|
102
|
-
This function explicitly handles the CTX memory structure.
|
103
|
-
It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
|
104
|
-
This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
|
105
|
-
Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
|
106
|
-
Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
|
107
|
-
LZ4_compress64kCtx() :
|
108
|
-
Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
|
109
|
-
isize *Must* be <64KB, otherwise the output will be corrupted.
|
110
|
-
|
111
|
-
On first call : provide a *ctx=NULL; It will be automatically allocated.
|
112
|
-
On next calls : reuse the same ctx pointer.
|
113
|
-
Use different pointers for different threads when doing multi-threading.
|
114
|
-
|
115
|
-
*/
|
116
|
-
|
117
|
-
|
118
115
|
#if defined (__cplusplus)
|
119
116
|
}
|
120
117
|
#endif
|
data/ext/lz4ruby/lz4hc.c
CHANGED
@@ -68,12 +68,20 @@
|
|
68
68
|
|
69
69
|
#ifdef _MSC_VER
|
70
70
|
#define inline __forceinline // Visual is not C99, but supports some kind of inline
|
71
|
+
#include <intrin.h> // For Visual 2005
|
72
|
+
# if LZ4_ARCH64 // 64-bit
|
73
|
+
# pragma intrinsic(_BitScanForward64) // For Visual 2005
|
74
|
+
# pragma intrinsic(_BitScanReverse64) // For Visual 2005
|
75
|
+
# else
|
76
|
+
# pragma intrinsic(_BitScanForward) // For Visual 2005
|
77
|
+
# pragma intrinsic(_BitScanReverse) // For Visual 2005
|
78
|
+
# endif
|
71
79
|
#endif
|
72
80
|
|
73
81
|
#ifdef _MSC_VER // Visual Studio
|
74
|
-
#define
|
82
|
+
#define lz4_bswap16(x) _byteswap_ushort(x)
|
75
83
|
#else
|
76
|
-
#define
|
84
|
+
#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
|
77
85
|
#endif
|
78
86
|
|
79
87
|
|
@@ -174,8 +182,8 @@ typedef struct _U64_S { U64 v; } U64_S;
|
|
174
182
|
#endif
|
175
183
|
|
176
184
|
#if defined(LZ4_BIG_ENDIAN)
|
177
|
-
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v =
|
178
|
-
#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v =
|
185
|
+
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
|
186
|
+
#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
|
179
187
|
#else // Little Endian
|
180
188
|
#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
|
181
189
|
#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
|
@@ -350,7 +358,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
|
|
350
358
|
if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
|
351
359
|
_endCount:
|
352
360
|
|
353
|
-
if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }
|
361
|
+
if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
|
354
362
|
}
|
355
363
|
ref = GETNEXT(ref);
|
356
364
|
}
|
@@ -366,7 +374,7 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
|
|
366
374
|
INITBASE(base,hc4->base);
|
367
375
|
const BYTE* ref;
|
368
376
|
int nbAttempts = MAX_NB_ATTEMPTS;
|
369
|
-
int delta = ip-startLimit;
|
377
|
+
int delta = (int)(ip-startLimit);
|
370
378
|
|
371
379
|
// First Match
|
372
380
|
LZ4HC_Insert(hc4, ip);
|
@@ -399,7 +407,7 @@ _endCount:
|
|
399
407
|
|
400
408
|
if ((ipt-startt) > longest)
|
401
409
|
{
|
402
|
-
longest = ipt-startt;
|
410
|
+
longest = (int)(ipt-startt);
|
403
411
|
*matchpos = reft;
|
404
412
|
*startpos = startt;
|
405
413
|
}
|
@@ -417,7 +425,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
|
|
417
425
|
BYTE* token;
|
418
426
|
|
419
427
|
// Encode Literal length
|
420
|
-
length = *ip - *anchor;
|
428
|
+
length = (int)(*ip - *anchor);
|
421
429
|
token = (*op)++;
|
422
430
|
if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
|
423
431
|
else *token = (length<<ML_BITS);
|
@@ -426,7 +434,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
|
|
426
434
|
LZ4_BLINDCOPY(*anchor, *op, length);
|
427
435
|
|
428
436
|
// Encode Offset
|
429
|
-
LZ4_WRITE_LITTLEENDIAN_16(*op
|
437
|
+
LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
|
430
438
|
|
431
439
|
// Encode MatchLength
|
432
440
|
len = (int)(ml-MINMATCH);
|
@@ -519,8 +527,8 @@ _Search3:
|
|
519
527
|
int correction;
|
520
528
|
int new_ml = ml;
|
521
529
|
if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
|
522
|
-
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;
|
523
|
-
correction = new_ml - (start2 - ip);
|
530
|
+
if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
531
|
+
correction = new_ml - (int)(start2 - ip);
|
524
532
|
if (correction > 0)
|
525
533
|
{
|
526
534
|
start2 += correction;
|
@@ -543,8 +551,8 @@ _Search3:
|
|
543
551
|
{
|
544
552
|
int correction;
|
545
553
|
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
|
546
|
-
if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
|
547
|
-
correction = ml - (start2 - ip);
|
554
|
+
if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
555
|
+
correction = ml - (int)(start2 - ip);
|
548
556
|
if (correction > 0)
|
549
557
|
{
|
550
558
|
start2 += correction;
|
@@ -554,7 +562,7 @@ _Search3:
|
|
554
562
|
}
|
555
563
|
else
|
556
564
|
{
|
557
|
-
ml = start2 - ip;
|
565
|
+
ml = (int)(start2 - ip);
|
558
566
|
}
|
559
567
|
}
|
560
568
|
// Now, encode 2 sequences
|
@@ -570,7 +578,7 @@ _Search3:
|
|
570
578
|
{
|
571
579
|
if (start2 < ip+ml)
|
572
580
|
{
|
573
|
-
int correction = (ip+ml
|
581
|
+
int correction = (int)(ip+ml - start2);
|
574
582
|
start2 += correction;
|
575
583
|
ref2 += correction;
|
576
584
|
ml2 -= correction;
|
@@ -607,8 +615,8 @@ _Search3:
|
|
607
615
|
{
|
608
616
|
int correction;
|
609
617
|
if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
|
610
|
-
if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
|
611
|
-
correction = ml - (start2 - ip);
|
618
|
+
if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
|
619
|
+
correction = ml - (int)(start2 - ip);
|
612
620
|
if (correction > 0)
|
613
621
|
{
|
614
622
|
start2 += correction;
|
@@ -618,7 +626,7 @@ _Search3:
|
|
618
626
|
}
|
619
627
|
else
|
620
628
|
{
|
621
|
-
ml = start2 - ip;
|
629
|
+
ml = (int)(start2 - ip);
|
622
630
|
}
|
623
631
|
}
|
624
632
|
LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
|
@@ -637,7 +645,7 @@ _Search3:
|
|
637
645
|
|
638
646
|
// Encode Last Literals
|
639
647
|
{
|
640
|
-
int lastRun = iend - anchor;
|
648
|
+
int lastRun = (int)(iend - anchor);
|
641
649
|
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
642
650
|
else *op++ = (lastRun<<ML_BITS);
|
643
651
|
memcpy(op, anchor, iend - anchor);
|
data/ext/lz4ruby/lz4ruby.c
CHANGED
@@ -4,109 +4,76 @@
|
|
4
4
|
|
5
5
|
typedef int (*CompressFunc)(const char *source, char *dest, int isize);
|
6
6
|
|
7
|
-
static VALUE
|
7
|
+
static VALUE lz4internal;
|
8
8
|
static VALUE lz4_error;
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
if (value == 0) { return 2; }
|
17
|
-
buf[1] |= 0x80;
|
18
|
-
|
19
|
-
buf[2] = value & 0x7f; value >>= 7;
|
20
|
-
if (value == 0) { return 3; }
|
21
|
-
buf[2] |= 0x80;
|
22
|
-
|
23
|
-
buf[3] = value & 0x7f; value >>= 7;
|
24
|
-
if (value == 0) { return 4; }
|
25
|
-
buf[3] |= 0x80;
|
26
|
-
|
27
|
-
buf[4] = value & 0x7f;
|
28
|
-
return 5;
|
29
|
-
}
|
30
|
-
|
31
|
-
static int decode_varbyte(const char *src, int len, int *value) {
|
32
|
-
if (len < 1) { return -1; }
|
33
|
-
|
34
|
-
*value = src[0] & 0x7f;
|
35
|
-
if ((src[0] & 0x80) == 0) { return 1; }
|
36
|
-
if (len < 2) { return -1; }
|
37
|
-
|
38
|
-
*value |= (src[1] & 0x7f) << 7;
|
39
|
-
if ((src[1] & 0x80) == 0) { return 2; }
|
40
|
-
if (len < 3) { return -1; }
|
41
|
-
|
42
|
-
*value |= (src[2] & 0x7f) << 14;
|
43
|
-
if ((src[2] & 0x80) == 0) { return 3; }
|
44
|
-
if (len < 4) { return -1; }
|
45
|
-
|
46
|
-
*value |= (src[3] & 0x7f) << 21;
|
47
|
-
if ((src[3] & 0x80) == 0) { return 4; }
|
48
|
-
if (len < 5) { return -1; }
|
49
|
-
|
50
|
-
*value |= (src[4] & 0x7f) << 28;
|
10
|
+
/**
|
11
|
+
* LZ4Internal functions.
|
12
|
+
*/
|
13
|
+
static VALUE compress_internal(CompressFunc compressor, VALUE header, VALUE input, VALUE in_size) {
|
14
|
+
const char *src_p;
|
15
|
+
int src_size;
|
51
16
|
|
52
|
-
|
53
|
-
|
17
|
+
const char *header_p;
|
18
|
+
int header_size;
|
54
19
|
|
55
|
-
static VALUE compress(CompressFunc compressor, VALUE self, VALUE source, VALUE src_size_prm) {
|
56
|
-
const char *src_p = NULL;
|
57
|
-
char varbyte[5];
|
58
|
-
char *buf = NULL;
|
59
20
|
VALUE result;
|
60
|
-
|
61
|
-
int varbyte_len;
|
21
|
+
char *buf;
|
62
22
|
int buf_size;
|
23
|
+
|
63
24
|
int comp_size;
|
64
25
|
|
65
|
-
Check_Type(
|
66
|
-
src_p = RSTRING_PTR(
|
67
|
-
src_size = NUM2INT(
|
26
|
+
Check_Type(input, T_STRING);
|
27
|
+
src_p = RSTRING_PTR(input);
|
28
|
+
src_size = NUM2INT(in_size);
|
68
29
|
buf_size = LZ4_compressBound(src_size);
|
69
30
|
|
70
|
-
|
31
|
+
Check_Type(header, T_STRING);
|
32
|
+
header_p = RSTRING_PTR(header);
|
33
|
+
header_size = RSTRING_LEN(header);
|
71
34
|
|
72
|
-
result = rb_str_new(NULL, buf_size +
|
35
|
+
result = rb_str_new(NULL, buf_size + header_size);
|
73
36
|
buf = RSTRING_PTR(result);
|
74
37
|
|
75
|
-
memcpy(buf,
|
38
|
+
memcpy(buf, header_p, header_size);
|
76
39
|
|
77
|
-
comp_size = compressor(src_p, buf +
|
78
|
-
rb_str_resize(result, comp_size +
|
40
|
+
comp_size = compressor(src_p, buf + header_size, src_size);
|
41
|
+
rb_str_resize(result, comp_size + header_size);
|
79
42
|
|
80
43
|
return result;
|
81
44
|
}
|
82
45
|
|
83
|
-
static VALUE
|
84
|
-
return
|
46
|
+
static VALUE lz4internal_compress(VALUE self, VALUE header, VALUE input, VALUE in_size) {
|
47
|
+
return compress_internal(LZ4_compress, header, input, in_size);
|
85
48
|
}
|
86
49
|
|
87
|
-
static VALUE
|
88
|
-
return
|
50
|
+
static VALUE lz4internal_compressHC(VALUE self, VALUE header, VALUE input, VALUE in_size) {
|
51
|
+
return compress_internal(LZ4_compressHC, header, input, in_size);
|
89
52
|
}
|
90
53
|
|
91
|
-
static VALUE
|
92
|
-
const char *src_p
|
93
|
-
char *buf = NULL;
|
94
|
-
VALUE result;
|
54
|
+
static VALUE lz4internal_uncompress(VALUE self, VALUE input, VALUE in_size, VALUE offset, VALUE out_size) {
|
55
|
+
const char *src_p;
|
95
56
|
int src_size;
|
96
|
-
|
97
|
-
int
|
57
|
+
|
58
|
+
int header_size;
|
59
|
+
|
60
|
+
VALUE result;
|
61
|
+
char *buf;
|
62
|
+
int buf_size;
|
63
|
+
|
98
64
|
int read_bytes;
|
99
65
|
|
100
|
-
Check_Type(
|
101
|
-
src_p = RSTRING_PTR(
|
102
|
-
src_size =
|
66
|
+
Check_Type(input, T_STRING);
|
67
|
+
src_p = RSTRING_PTR(input);
|
68
|
+
src_size = NUM2INT(in_size);
|
103
69
|
|
104
|
-
|
70
|
+
header_size = NUM2INT(offset);
|
71
|
+
buf_size = NUM2INT(out_size);
|
105
72
|
|
106
73
|
result = rb_str_new(NULL, buf_size);
|
107
74
|
buf = RSTRING_PTR(result);
|
108
75
|
|
109
|
-
read_bytes =
|
76
|
+
read_bytes = LZ4_uncompress_unknownOutputSize(src_p + header_size, buf, src_size - header_size, buf_size);
|
110
77
|
if (read_bytes < 0) {
|
111
78
|
rb_raise(lz4_error, "Compressed data is maybe corrupted.");
|
112
79
|
}
|
@@ -115,11 +82,11 @@ static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
|
|
115
82
|
}
|
116
83
|
|
117
84
|
void Init_lz4ruby(void) {
|
118
|
-
|
85
|
+
lz4internal = rb_define_module("LZ4Internal");
|
119
86
|
|
120
|
-
rb_define_module_function(
|
121
|
-
rb_define_module_function(
|
122
|
-
rb_define_module_function(
|
87
|
+
rb_define_module_function(lz4internal, "compress", lz4internal_compress, 3);
|
88
|
+
rb_define_module_function(lz4internal, "compressHC", lz4internal_compressHC, 3);
|
89
|
+
rb_define_module_function(lz4internal, "uncompress", lz4internal_uncompress, 4);
|
123
90
|
|
124
|
-
lz4_error = rb_define_class_under(
|
91
|
+
lz4_error = rb_define_class_under(lz4internal, "Error", rb_eStandardError);
|
125
92
|
}
|
data/lib/1.8/lz4ruby.so
CHANGED
Binary file
|
data/lib/1.9/lz4ruby.so
CHANGED
Binary file
|
data/lib/lz4-ruby.rb
CHANGED
@@ -7,17 +7,66 @@ else
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class LZ4
|
10
|
-
def self.compress(
|
11
|
-
|
12
|
-
return LZ4Native::compress(source, src_size)
|
10
|
+
def self.compress(input, in_size = nil)
|
11
|
+
return _compress(input, in_size, false)
|
13
12
|
end
|
14
13
|
|
15
|
-
def self.compressHC(
|
16
|
-
|
17
|
-
return LZ4Native::compressHC(source, src_size)
|
14
|
+
def self.compressHC(input, in_size = nil)
|
15
|
+
return _compress(input, in_size, true)
|
18
16
|
end
|
17
|
+
|
18
|
+
def self._compress(input, in_size, high_compression)
|
19
|
+
in_size = input.length if in_size == nil
|
20
|
+
header = encode_varbyte(in_size)
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
+
if high_compression
|
23
|
+
return LZ4Internal.compressHC(header, input, in_size)
|
24
|
+
else
|
25
|
+
return LZ4Internal.compress(header, input, in_size)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.uncompress(input, in_size = nil)
|
30
|
+
in_size = input.length if in_size == nil
|
31
|
+
out_size, varbyte_len = decode_varbyte(input)
|
32
|
+
|
33
|
+
if out_size < 0 || varbyte_len < 0
|
34
|
+
raise "Compressed data is maybe corrupted"
|
35
|
+
end
|
36
|
+
|
37
|
+
return LZ4Internal::uncompress(input, in_size, varbyte_len, out_size)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.encode_varbyte(val)
|
41
|
+
varbytes = []
|
42
|
+
|
43
|
+
loop do
|
44
|
+
byte = val & 0x7f
|
45
|
+
val >>= 7
|
46
|
+
|
47
|
+
if val == 0
|
48
|
+
varbytes.push(byte)
|
49
|
+
break
|
50
|
+
else
|
51
|
+
varbytes.push(byte | 0x80)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
return varbytes.pack("C*")
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.decode_varbyte(text)
|
59
|
+
len = [text.length, 5].min
|
60
|
+
bytes = text[0, len].unpack("C*")
|
61
|
+
|
62
|
+
varbyte_len = 0
|
63
|
+
val = 0
|
64
|
+
bytes.each do |b|
|
65
|
+
val |= (b & 0x7f) << (7 * varbyte_len)
|
66
|
+
varbyte_len += 1
|
67
|
+
return val, varbyte_len if b & 0x80 == 0
|
68
|
+
end
|
69
|
+
|
70
|
+
return -1, -1
|
22
71
|
end
|
23
72
|
end
|
data/test/helper.rb
CHANGED
data/test/test_lz4-ruby.rb
CHANGED
@@ -3,15 +3,30 @@ require 'helper'
|
|
3
3
|
class TestLz4Ruby < Test::Unit::TestCase
|
4
4
|
LOOP_COUNT = 257
|
5
5
|
|
6
|
-
|
6
|
+
srand(123)
|
7
|
+
|
8
|
+
def self.random_bytes(len)
|
9
|
+
result = []
|
10
|
+
len.times do |t|
|
11
|
+
result << rand(256)
|
12
|
+
end
|
13
|
+
return result.pack("C*")
|
14
|
+
end
|
7
15
|
|
8
16
|
context "LZ4::compress" do
|
9
17
|
should "empty text" do
|
10
18
|
compressed = LZ4::compress("")
|
11
19
|
uncompressed = LZ4::uncompress(compressed)
|
12
|
-
|
20
|
+
assert_equal("", uncompressed)
|
13
21
|
end
|
14
|
-
|
22
|
+
|
23
|
+
should "long text" do
|
24
|
+
text = "a" * 131073
|
25
|
+
compressed = LZ4.compress(text)
|
26
|
+
uncompressed = LZ4.uncompress(compressed)
|
27
|
+
assert_equal(text, uncompressed)
|
28
|
+
end
|
29
|
+
|
15
30
|
LOOP_COUNT.times do |t|
|
16
31
|
len = t + 1
|
17
32
|
text = "a" * len
|
@@ -25,7 +40,7 @@ class TestLz4Ruby < Test::Unit::TestCase
|
|
25
40
|
|
26
41
|
LOOP_COUNT.times do |t|
|
27
42
|
len = t + 1
|
28
|
-
text =
|
43
|
+
text = random_bytes(len)
|
29
44
|
|
30
45
|
should "random text of #{len} bytes" do
|
31
46
|
compressed = LZ4::compress(text)
|
@@ -49,9 +64,16 @@ class TestLz4Ruby < Test::Unit::TestCase
|
|
49
64
|
should "empty text" do
|
50
65
|
compressed = LZ4::compressHC("")
|
51
66
|
uncompressed = LZ4::uncompress(compressed)
|
52
|
-
|
67
|
+
assert_equal("", uncompressed)
|
53
68
|
end
|
54
69
|
|
70
|
+
should "long text" do
|
71
|
+
text = "a" * 131073
|
72
|
+
compressed = LZ4.compressHC(text)
|
73
|
+
uncompressed = LZ4.uncompress(compressed)
|
74
|
+
assert_equal(text, uncompressed)
|
75
|
+
end
|
76
|
+
|
55
77
|
LOOP_COUNT.times do |t|
|
56
78
|
len = t + 1
|
57
79
|
text = "a" * len
|
@@ -65,7 +87,7 @@ class TestLz4Ruby < Test::Unit::TestCase
|
|
65
87
|
|
66
88
|
LOOP_COUNT.times do |t|
|
67
89
|
len = t + 1
|
68
|
-
text =
|
90
|
+
text = random_bytes(len)
|
69
91
|
|
70
92
|
should "random text of #{len} bytes" do
|
71
93
|
compressed = LZ4::compressHC(text)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lz4-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 7
|
10
|
+
version: 0.1.7
|
11
11
|
platform: x86-mingw32
|
12
12
|
authors:
|
13
13
|
- KOMIYA Atsushi
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-08-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
type: :development
|