lz4-ruby 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.1.7
@@ -34,31 +34,24 @@
34
34
  //**************************************
35
35
  // Tuning parameters
36
36
  //**************************************
37
- // COMPRESSIONLEVEL :
38
- // Increasing this value improves compression ratio
39
- // Lowering this value reduces memory usage
40
- // Reduced memory usage typically improves speed, due to cache effect (ex : L1 32KB for Intel, L1 64KB for AMD)
41
- // Memory usage formula : N->2^(N+2) Bytes (examples : 12 -> 16KB ; 17 -> 512KB)
42
- #define COMPRESSIONLEVEL 12
43
-
44
- // NOTCOMPRESSIBLE_CONFIRMATION :
37
+ // MEMORY_USAGE :
38
+ // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
39
+ // Increasing memory usage improves compression ratio
40
+ // Reduced memory usage can improve speed, due to cache effect
41
+ // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
42
+ #define MEMORY_USAGE 14
43
+
44
+ // NOTCOMPRESSIBLE_DETECTIONLEVEL :
45
45
  // Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
46
46
  // This may decrease compression ratio dramatically, but will be faster on incompressible data
47
47
  // Increasing this value will make the algorithm search more before declaring a segment "incompressible"
48
48
  // This could improve compression a bit, but will be slower on incompressible data
49
49
  // The default value (6) is recommended
50
- #define NOTCOMPRESSIBLE_CONFIRMATION 6
51
-
52
- // LZ4_COMPRESSMIN :
53
- // Compression function will *fail* if it is not successful at compressing input by at least LZ4_COMPRESSMIN bytes
54
- // Since the compression function stops working prematurely, it results in a speed gain
55
- // The output however is unusable. Compression function result will be zero.
56
- // Default : 0 = disabled
57
- #define LZ4_COMPRESSMIN 0
50
+ #define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
58
51
 
59
52
  // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
60
- // This will provide a boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
61
- // You can set this option to 1 in situations where data will stay within closed environment
53
+ // This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
54
+ // You can set this option to 1 in situations where data will remain within closed environment
62
55
  // This option is useless on Little_Endian CPU (such as x86)
63
56
  //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
64
57
 
@@ -108,6 +101,7 @@
108
101
 
109
102
  #ifdef _MSC_VER // Visual Studio
110
103
  # define inline __forceinline // Visual is not C99, but supports some kind of inline
104
+ # include <intrin.h> // For Visual 2005
111
105
  # if LZ4_ARCH64 // 64-bit
112
106
  # pragma intrinsic(_BitScanForward64) // For Visual 2005
113
107
  # pragma intrinsic(_BitScanReverse64) // For Visual 2005
@@ -181,11 +175,11 @@ typedef struct _U64_S { U64 v; } U64_S;
181
175
  //**************************************
182
176
  #define MINMATCH 4
183
177
 
184
- #define HASH_LOG COMPRESSIONLEVEL
178
+ #define HASH_LOG (MEMORY_USAGE-2)
185
179
  #define HASHTABLESIZE (1 << HASH_LOG)
186
180
  #define HASH_MASK (HASHTABLESIZE - 1)
187
181
 
188
- #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION>2?NOTCOMPRESSIBLE_CONFIRMATION:2)
182
+ #define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
189
183
  #define STACKLIMIT 13
190
184
  #define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
191
185
  #define COPYLENGTH 8
@@ -257,7 +251,7 @@ struct refTables
257
251
  //****************************
258
252
  #if LZ4_ARCH64
259
253
 
260
- inline static int LZ4_NbCommonBytes (register U64 val)
254
+ static inline int LZ4_NbCommonBytes (register U64 val)
261
255
  {
262
256
  #if defined(LZ4_BIG_ENDIAN)
263
257
  #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -289,7 +283,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
289
283
 
290
284
  #else
291
285
 
292
- inline static int LZ4_NbCommonBytes (register U32 val)
286
+ static inline int LZ4_NbCommonBytes (register U32 val)
293
287
  {
294
288
  #if defined(LZ4_BIG_ENDIAN)
295
289
  #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -321,25 +315,22 @@ inline static int LZ4_NbCommonBytes (register U32 val)
321
315
  #endif
322
316
 
323
317
 
324
- //****************************
325
- // Public functions
326
- //****************************
327
-
328
- int LZ4_compressBound(int isize)
329
- {
330
- return (isize + (isize/255) + 16);
331
- }
332
-
333
-
334
318
 
335
319
  //******************************
336
320
  // Compression functions
337
321
  //******************************
338
322
 
339
- int LZ4_compressCtx(void** ctx,
323
+ // LZ4_compressCtx :
324
+ // -----------------
325
+ // Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
326
+ // If it cannot achieve it, compression will stop, and result of the function will be zero.
327
+ // return : the number of bytes written in buffer 'dest', or 0 if the compression fails
328
+
329
+ static inline int LZ4_compressCtx(void** ctx,
340
330
  const char* source,
341
331
  char* dest,
342
- int isize)
332
+ int isize,
333
+ int maxOutputSize)
343
334
  {
344
335
  #if HEAPMODE
345
336
  struct refTables *srt = (struct refTables *) (*ctx);
@@ -356,6 +347,7 @@ int LZ4_compressCtx(void** ctx,
356
347
  #define matchlimit (iend - LASTLITERALS)
357
348
 
358
349
  BYTE* op = (BYTE*) dest;
350
+ BYTE* const oend = op + maxOutputSize;
359
351
 
360
352
  int len, length;
361
353
  const int skipStrength = SKIPSTRENGTH;
@@ -408,17 +400,37 @@ int LZ4_compressCtx(void** ctx,
408
400
  while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
409
401
 
410
402
  // Encode Literal length
411
- length = ip - anchor;
403
+ length = (int)(ip - anchor);
412
404
  token = op++;
405
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
406
+ #ifdef _MSC_VER
407
+ if (length>=(int)RUN_MASK)
408
+ {
409
+ int len = length-RUN_MASK;
410
+ *token=(RUN_MASK<<ML_BITS);
411
+ if (len>254)
412
+ {
413
+ do { *op++ = 255; len -= 255; } while (len>254);
414
+ *op++ = (BYTE)len;
415
+ memcpy(op, anchor, length);
416
+ op += length;
417
+ goto _next_match;
418
+ }
419
+ else
420
+ *op++ = (BYTE)len;
421
+ }
422
+ else *token = (length<<ML_BITS);
423
+ #else
413
424
  if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
414
425
  else *token = (length<<ML_BITS);
426
+ #endif
415
427
 
416
428
  // Copy Literals
417
429
  LZ4_BLINDCOPY(anchor, op, length);
418
430
 
419
431
  _next_match:
420
432
  // Encode Offset
421
- LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
433
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
422
434
 
423
435
  // Start Counting
424
436
  ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
@@ -436,7 +448,7 @@ _next_match:
436
448
  _endCount:
437
449
 
438
450
  // Encode MatchLength
439
- len = (ip - anchor);
451
+ len = (int)(ip - anchor);
440
452
  if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
441
453
  else *token += len;
442
454
 
@@ -459,8 +471,8 @@ _endCount:
459
471
  _last_literals:
460
472
  // Encode Last Literals
461
473
  {
462
- int lastRun = iend - anchor;
463
- if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
474
+ int lastRun = (int)(iend - anchor);
475
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0;
464
476
  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
465
477
  else *op++ = (lastRun<<ML_BITS);
466
478
  memcpy(op, anchor, iend - anchor);
@@ -479,10 +491,11 @@ _last_literals:
479
491
  #define HASH64KTABLESIZE (1U<<HASHLOG64K)
480
492
  #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
481
493
  #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
482
- int LZ4_compress64kCtx(void** ctx,
494
+ static inline int LZ4_compress64kCtx(void** ctx,
483
495
  const char* source,
484
496
  char* dest,
485
- int isize)
497
+ int isize,
498
+ int maxOutputSize)
486
499
  {
487
500
  #if HEAPMODE
488
501
  struct refTables *srt = (struct refTables *) (*ctx);
@@ -499,6 +512,7 @@ int LZ4_compress64kCtx(void** ctx,
499
512
  #define matchlimit (iend - LASTLITERALS)
500
513
 
501
514
  BYTE* op = (BYTE*) dest;
515
+ BYTE* const oend = op + maxOutputSize;
502
516
 
503
517
  int len, length;
504
518
  const int skipStrength = SKIPSTRENGTH;
@@ -542,7 +556,7 @@ int LZ4_compress64kCtx(void** ctx,
542
556
 
543
557
  forwardH = LZ4_HASH64K_VALUE(forwardIp);
544
558
  ref = base + HashTable[h];
545
- HashTable[h] = ip - base;
559
+ HashTable[h] = (U16)(ip - base);
546
560
 
547
561
  } while (A32(ref) != A32(ip));
548
562
 
@@ -550,17 +564,37 @@ int LZ4_compress64kCtx(void** ctx,
550
564
  while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
551
565
 
552
566
  // Encode Literal length
553
- length = ip - anchor;
567
+ length = (int)(ip - anchor);
554
568
  token = op++;
569
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
570
+ #ifdef _MSC_VER
571
+ if (length>=(int)RUN_MASK)
572
+ {
573
+ int len = length-RUN_MASK;
574
+ *token=(RUN_MASK<<ML_BITS);
575
+ if (len>254)
576
+ {
577
+ do { *op++ = 255; len -= 255; } while (len>254);
578
+ *op++ = (BYTE)len;
579
+ memcpy(op, anchor, length);
580
+ op += length;
581
+ goto _next_match;
582
+ }
583
+ else
584
+ *op++ = (BYTE)len;
585
+ }
586
+ else *token = (length<<ML_BITS);
587
+ #else
555
588
  if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
556
589
  else *token = (length<<ML_BITS);
590
+ #endif
557
591
 
558
592
  // Copy Literals
559
593
  LZ4_BLINDCOPY(anchor, op, length);
560
594
 
561
595
  _next_match:
562
596
  // Encode Offset
563
- LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
597
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
564
598
 
565
599
  // Start Counting
566
600
  ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
@@ -578,7 +612,7 @@ _next_match:
578
612
  _endCount:
579
613
 
580
614
  // Encode MatchLength
581
- len = (ip - anchor);
615
+ len = (int)(ip - anchor);
582
616
  if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
583
617
  else *token += len;
584
618
 
@@ -586,11 +620,11 @@ _endCount:
586
620
  if (ip > mflimit) { anchor = ip; break; }
587
621
 
588
622
  // Fill table
589
- HashTable[LZ4_HASH64K_VALUE(ip-2)] = ip - 2 - base;
623
+ HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
590
624
 
591
625
  // Test next position
592
626
  ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
593
- HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
627
+ HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
594
628
  if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
595
629
 
596
630
  // Prepare next loop
@@ -601,8 +635,8 @@ _endCount:
601
635
  _last_literals:
602
636
  // Encode Last Literals
603
637
  {
604
- int lastRun = iend - anchor;
605
- if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
638
+ int lastRun = (int)(iend - anchor);
639
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0;
606
640
  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
607
641
  else *op++ = (lastRun<<ML_BITS);
608
642
  memcpy(op, anchor, iend - anchor);
@@ -614,26 +648,34 @@ _last_literals:
614
648
  }
615
649
 
616
650
 
617
-
618
- int LZ4_compress(const char* source,
619
- char* dest,
620
- int isize)
651
+ int LZ4_compress_limitedOutput(const char* source,
652
+ char* dest,
653
+ int isize,
654
+ int maxOutputSize)
621
655
  {
622
656
  #if HEAPMODE
623
657
  void* ctx = malloc(sizeof(struct refTables));
624
658
  int result;
625
659
  if (isize < LZ4_64KLIMIT)
626
- result = LZ4_compress64kCtx(&ctx, source, dest, isize);
627
- else result = LZ4_compressCtx(&ctx, source, dest, isize);
660
+ result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
661
+ else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
628
662
  free(ctx);
629
663
  return result;
630
664
  #else
631
- if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize);
632
- return LZ4_compressCtx(NULL, source, dest, isize);
665
+ if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
666
+ return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
633
667
  #endif
634
668
  }
635
669
 
636
670
 
671
+ int LZ4_compress(const char* source,
672
+ char* dest,
673
+ int isize)
674
+ {
675
+ return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
676
+ }
677
+
678
+
637
679
 
638
680
 
639
681
  //****************************
@@ -47,19 +47,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
47
47
 
48
48
  /*
49
49
  LZ4_compress() :
50
+ Compresses 'isize' bytes from 'source' into 'dest'.
51
+ Destination buffer must be already allocated,
52
+ and must be sized to handle worst cases situations (input data not compressible)
53
+ Worst case size evaluation is provided by macro LZ4_compressBound()
54
+
50
55
  isize : is the input size. Max supported value is ~1.9GB
51
56
  return : the number of bytes written in buffer dest
52
- or 0 if the compression fails (if LZ4_COMPRESSMIN is set)
53
- note : destination buffer must be already allocated.
54
- destination buffer must be sized to handle worst cases situations (input data not compressible)
55
- worst case size evaluation is provided by function LZ4_compressBound()
57
+
56
58
 
57
59
  LZ4_uncompress() :
58
60
  osize : is the output size, therefore the original size
59
61
  return : the number of bytes read in the source buffer
60
62
  If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
61
63
  This function never writes beyond dest + osize, and is therefore protected against malicious data packets
62
- note : destination buffer must be already allocated
64
+ note : destination buffer must be already allocated.
65
+ its size must be a minimum of 'osize' bytes.
63
66
  */
64
67
 
65
68
 
@@ -67,7 +70,7 @@ LZ4_uncompress() :
67
70
  // Advanced Functions
68
71
  //****************************
69
72
 
70
- int LZ4_compressBound(int isize);
73
+ #define LZ4_compressBound(isize) (isize + (isize/255) + 16)
71
74
 
72
75
  /*
73
76
  LZ4_compressBound() :
@@ -80,6 +83,21 @@ LZ4_compressBound() :
80
83
  */
81
84
 
82
85
 
86
+ int LZ4_compress_limitedOutput (const char* source, char* dest, int isize, int maxOutputSize);
87
+
88
+ /*
89
+ LZ4_compress_limitedOutput() :
90
+ Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
91
+ If it cannot achieve it, compression will stop, and result of the function will be zero.
92
+ This function never writes outside of provided output buffer.
93
+
94
+ isize : is the input size. Max supported value is ~1.9GB
95
+ maxOutputSize : is the size of the destination buffer (which must be already allocated)
96
+ return : the number of bytes written in buffer 'dest'
97
+ or 0 if the compression fails
98
+ */
99
+
100
+
83
101
  int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
84
102
 
85
103
  /*
@@ -94,27 +112,6 @@ LZ4_uncompress_unknownOutputSize() :
94
112
  */
95
113
 
96
114
 
97
- int LZ4_compressCtx(void** ctx, const char* source, char* dest, int isize);
98
- int LZ4_compress64kCtx(void** ctx, const char* source, char* dest, int isize);
99
-
100
- /*
101
- LZ4_compressCtx() :
102
- This function explicitly handles the CTX memory structure.
103
- It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
104
- This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
105
- Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
106
- Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
107
- LZ4_compress64kCtx() :
108
- Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
109
- isize *Must* be <64KB, otherwise the output will be corrupted.
110
-
111
- On first call : provide a *ctx=NULL; It will be automatically allocated.
112
- On next calls : reuse the same ctx pointer.
113
- Use different pointers for different threads when doing multi-threading.
114
-
115
- */
116
-
117
-
118
115
  #if defined (__cplusplus)
119
116
  }
120
117
  #endif
@@ -68,12 +68,20 @@
68
68
 
69
69
  #ifdef _MSC_VER
70
70
  #define inline __forceinline // Visual is not C99, but supports some kind of inline
71
+ #include <intrin.h> // For Visual 2005
72
+ # if LZ4_ARCH64 // 64-bit
73
+ # pragma intrinsic(_BitScanForward64) // For Visual 2005
74
+ # pragma intrinsic(_BitScanReverse64) // For Visual 2005
75
+ # else
76
+ # pragma intrinsic(_BitScanForward) // For Visual 2005
77
+ # pragma intrinsic(_BitScanReverse) // For Visual 2005
78
+ # endif
71
79
  #endif
72
80
 
73
81
  #ifdef _MSC_VER // Visual Studio
74
- #define bswap16(x) _byteswap_ushort(x)
82
+ #define lz4_bswap16(x) _byteswap_ushort(x)
75
83
  #else
76
- #define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
84
+ #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
77
85
  #endif
78
86
 
79
87
 
@@ -174,8 +182,8 @@ typedef struct _U64_S { U64 v; } U64_S;
174
182
  #endif
175
183
 
176
184
  #if defined(LZ4_BIG_ENDIAN)
177
- #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }
178
- #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }
185
+ #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
186
+ #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
179
187
  #else // Little Endian
180
188
  #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
181
189
  #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
@@ -350,7 +358,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
350
358
  if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
351
359
  _endCount:
352
360
 
353
- if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }
361
+ if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
354
362
  }
355
363
  ref = GETNEXT(ref);
356
364
  }
@@ -366,7 +374,7 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
366
374
  INITBASE(base,hc4->base);
367
375
  const BYTE* ref;
368
376
  int nbAttempts = MAX_NB_ATTEMPTS;
369
- int delta = ip-startLimit;
377
+ int delta = (int)(ip-startLimit);
370
378
 
371
379
  // First Match
372
380
  LZ4HC_Insert(hc4, ip);
@@ -399,7 +407,7 @@ _endCount:
399
407
 
400
408
  if ((ipt-startt) > longest)
401
409
  {
402
- longest = ipt-startt;
410
+ longest = (int)(ipt-startt);
403
411
  *matchpos = reft;
404
412
  *startpos = startt;
405
413
  }
@@ -417,7 +425,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
417
425
  BYTE* token;
418
426
 
419
427
  // Encode Literal length
420
- length = *ip - *anchor;
428
+ length = (int)(*ip - *anchor);
421
429
  token = (*op)++;
422
430
  if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
423
431
  else *token = (length<<ML_BITS);
@@ -426,7 +434,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
426
434
  LZ4_BLINDCOPY(*anchor, *op, length);
427
435
 
428
436
  // Encode Offset
429
- LZ4_WRITE_LITTLEENDIAN_16(*op,*ip-ref);
437
+ LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
430
438
 
431
439
  // Encode MatchLength
432
440
  len = (int)(ml-MINMATCH);
@@ -519,8 +527,8 @@ _Search3:
519
527
  int correction;
520
528
  int new_ml = ml;
521
529
  if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
522
- if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;
523
- correction = new_ml - (start2 - ip);
530
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
531
+ correction = new_ml - (int)(start2 - ip);
524
532
  if (correction > 0)
525
533
  {
526
534
  start2 += correction;
@@ -543,8 +551,8 @@ _Search3:
543
551
  {
544
552
  int correction;
545
553
  if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
546
- if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
547
- correction = ml - (start2 - ip);
554
+ if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
555
+ correction = ml - (int)(start2 - ip);
548
556
  if (correction > 0)
549
557
  {
550
558
  start2 += correction;
@@ -554,7 +562,7 @@ _Search3:
554
562
  }
555
563
  else
556
564
  {
557
- ml = start2 - ip;
565
+ ml = (int)(start2 - ip);
558
566
  }
559
567
  }
560
568
  // Now, encode 2 sequences
@@ -570,7 +578,7 @@ _Search3:
570
578
  {
571
579
  if (start2 < ip+ml)
572
580
  {
573
- int correction = (ip+ml) - start2;
581
+ int correction = (int)(ip+ml - start2);
574
582
  start2 += correction;
575
583
  ref2 += correction;
576
584
  ml2 -= correction;
@@ -607,8 +615,8 @@ _Search3:
607
615
  {
608
616
  int correction;
609
617
  if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
610
- if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
611
- correction = ml - (start2 - ip);
618
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
619
+ correction = ml - (int)(start2 - ip);
612
620
  if (correction > 0)
613
621
  {
614
622
  start2 += correction;
@@ -618,7 +626,7 @@ _Search3:
618
626
  }
619
627
  else
620
628
  {
621
- ml = start2 - ip;
629
+ ml = (int)(start2 - ip);
622
630
  }
623
631
  }
624
632
  LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
@@ -637,7 +645,7 @@ _Search3:
637
645
 
638
646
  // Encode Last Literals
639
647
  {
640
- int lastRun = iend - anchor;
648
+ int lastRun = (int)(iend - anchor);
641
649
  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
642
650
  else *op++ = (lastRun<<ML_BITS);
643
651
  memcpy(op, anchor, iend - anchor);
@@ -4,109 +4,76 @@
4
4
 
5
5
  typedef int (*CompressFunc)(const char *source, char *dest, int isize);
6
6
 
7
- static VALUE lz4;
7
+ static VALUE lz4internal;
8
8
  static VALUE lz4_error;
9
9
 
10
- static int encode_varbyte(int value, char *buf) {
11
- buf[0] = value & 0x7f; value >>= 7;
12
- if (value == 0) { return 1; }
13
- buf[0] |= 0x80;
14
-
15
- buf[1] = value & 0x7f; value >>= 7;
16
- if (value == 0) { return 2; }
17
- buf[1] |= 0x80;
18
-
19
- buf[2] = value & 0x7f; value >>= 7;
20
- if (value == 0) { return 3; }
21
- buf[2] |= 0x80;
22
-
23
- buf[3] = value & 0x7f; value >>= 7;
24
- if (value == 0) { return 4; }
25
- buf[3] |= 0x80;
26
-
27
- buf[4] = value & 0x7f;
28
- return 5;
29
- }
30
-
31
- static int decode_varbyte(const char *src, int len, int *value) {
32
- if (len < 1) { return -1; }
33
-
34
- *value = src[0] & 0x7f;
35
- if ((src[0] & 0x80) == 0) { return 1; }
36
- if (len < 2) { return -1; }
37
-
38
- *value |= (src[1] & 0x7f) << 7;
39
- if ((src[1] & 0x80) == 0) { return 2; }
40
- if (len < 3) { return -1; }
41
-
42
- *value |= (src[2] & 0x7f) << 14;
43
- if ((src[2] & 0x80) == 0) { return 3; }
44
- if (len < 4) { return -1; }
45
-
46
- *value |= (src[3] & 0x7f) << 21;
47
- if ((src[3] & 0x80) == 0) { return 4; }
48
- if (len < 5) { return -1; }
49
-
50
- *value |= (src[4] & 0x7f) << 28;
10
+ /**
11
+ * LZ4Internal functions.
12
+ */
13
+ static VALUE compress_internal(CompressFunc compressor, VALUE header, VALUE input, VALUE in_size) {
14
+ const char *src_p;
15
+ int src_size;
51
16
 
52
- return 5;
53
- }
17
+ const char *header_p;
18
+ int header_size;
54
19
 
55
- static VALUE compress(CompressFunc compressor, VALUE self, VALUE source, VALUE src_size_prm) {
56
- const char *src_p = NULL;
57
- char varbyte[5];
58
- char *buf = NULL;
59
20
  VALUE result;
60
- int src_size;
61
- int varbyte_len;
21
+ char *buf;
62
22
  int buf_size;
23
+
63
24
  int comp_size;
64
25
 
65
- Check_Type(source, T_STRING);
66
- src_p = RSTRING_PTR(source);
67
- src_size = NUM2INT(src_size_prm);
26
+ Check_Type(input, T_STRING);
27
+ src_p = RSTRING_PTR(input);
28
+ src_size = NUM2INT(in_size);
68
29
  buf_size = LZ4_compressBound(src_size);
69
30
 
70
- varbyte_len = encode_varbyte(src_size, varbyte);
31
+ Check_Type(header, T_STRING);
32
+ header_p = RSTRING_PTR(header);
33
+ header_size = RSTRING_LEN(header);
71
34
 
72
- result = rb_str_new(NULL, buf_size + varbyte_len);
35
+ result = rb_str_new(NULL, buf_size + header_size);
73
36
  buf = RSTRING_PTR(result);
74
37
 
75
- memcpy(buf, varbyte, varbyte_len);
38
+ memcpy(buf, header_p, header_size);
76
39
 
77
- comp_size = compressor(src_p, buf + varbyte_len, src_size);
78
- rb_str_resize(result, comp_size + varbyte_len);
40
+ comp_size = compressor(src_p, buf + header_size, src_size);
41
+ rb_str_resize(result, comp_size + header_size);
79
42
 
80
43
  return result;
81
44
  }
82
45
 
83
- static VALUE lz4_ruby_compress(VALUE self, VALUE source, VALUE src_size) {
84
- return compress(LZ4_compress, self, source, src_size);
46
+ static VALUE lz4internal_compress(VALUE self, VALUE header, VALUE input, VALUE in_size) {
47
+ return compress_internal(LZ4_compress, header, input, in_size);
85
48
  }
86
49
 
87
- static VALUE lz4_ruby_compressHC(VALUE self, VALUE source, VALUE src_size) {
88
- return compress(LZ4_compressHC, self, source, src_size);
50
+ static VALUE lz4internal_compressHC(VALUE self, VALUE header, VALUE input, VALUE in_size) {
51
+ return compress_internal(LZ4_compressHC, header, input, in_size);
89
52
  }
90
53
 
91
- static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
92
- const char *src_p = NULL;
93
- char *buf = NULL;
94
- VALUE result;
54
+ static VALUE lz4internal_uncompress(VALUE self, VALUE input, VALUE in_size, VALUE offset, VALUE out_size) {
55
+ const char *src_p;
95
56
  int src_size;
96
- int varbyte_len;
97
- int buf_size = 0;
57
+
58
+ int header_size;
59
+
60
+ VALUE result;
61
+ char *buf;
62
+ int buf_size;
63
+
98
64
  int read_bytes;
99
65
 
100
- Check_Type(source, T_STRING);
101
- src_p = RSTRING_PTR(source);
102
- src_size = RSTRING_LEN(source);
66
+ Check_Type(input, T_STRING);
67
+ src_p = RSTRING_PTR(input);
68
+ src_size = NUM2INT(in_size);
103
69
 
104
- varbyte_len = decode_varbyte(src_p, src_size, &buf_size);
70
+ header_size = NUM2INT(offset);
71
+ buf_size = NUM2INT(out_size);
105
72
 
106
73
  result = rb_str_new(NULL, buf_size);
107
74
  buf = RSTRING_PTR(result);
108
75
 
109
- read_bytes = LZ4_uncompress(src_p + varbyte_len, buf, buf_size);
76
+ read_bytes = LZ4_uncompress_unknownOutputSize(src_p + header_size, buf, src_size - header_size, buf_size);
110
77
  if (read_bytes < 0) {
111
78
  rb_raise(lz4_error, "Compressed data is maybe corrupted.");
112
79
  }
@@ -115,11 +82,11 @@ static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
115
82
  }
116
83
 
117
84
  void Init_lz4ruby(void) {
118
- lz4 = rb_define_module("LZ4Native");
85
+ lz4internal = rb_define_module("LZ4Internal");
119
86
 
120
- rb_define_module_function(lz4, "compress", lz4_ruby_compress, 2);
121
- rb_define_module_function(lz4, "compressHC", lz4_ruby_compressHC, 2);
122
- rb_define_module_function(lz4, "uncompress", lz4_ruby_uncompress, 1);
87
+ rb_define_module_function(lz4internal, "compress", lz4internal_compress, 3);
88
+ rb_define_module_function(lz4internal, "compressHC", lz4internal_compressHC, 3);
89
+ rb_define_module_function(lz4internal, "uncompress", lz4internal_uncompress, 4);
123
90
 
124
- lz4_error = rb_define_class_under(lz4, "Error", rb_eStandardError);
91
+ lz4_error = rb_define_class_under(lz4internal, "Error", rb_eStandardError);
125
92
  }
@@ -7,17 +7,66 @@ else
7
7
  end
8
8
 
9
9
  class LZ4
10
- def self.compress(source, src_size = nil)
11
- src_size = source.length if src_size == nil
12
- return LZ4Native::compress(source, src_size)
10
+ def self.compress(input, in_size = nil)
11
+ return _compress(input, in_size, false)
13
12
  end
14
13
 
15
- def self.compressHC(source, src_size = nil)
16
- src_size = source.length if src_size == nil
17
- return LZ4Native::compressHC(source, src_size)
14
+ def self.compressHC(input, in_size = nil)
15
+ return _compress(input, in_size, true)
18
16
  end
17
+
18
+ def self._compress(input, in_size, high_compression)
19
+ in_size = input.length if in_size == nil
20
+ header = encode_varbyte(in_size)
19
21
 
20
- def self.uncompress(source)
21
- return LZ4Native::uncompress(source)
22
+ if high_compression
23
+ return LZ4Internal.compressHC(header, input, in_size)
24
+ else
25
+ return LZ4Internal.compress(header, input, in_size)
26
+ end
27
+ end
28
+
29
+ def self.uncompress(input, in_size = nil)
30
+ in_size = input.length if in_size == nil
31
+ out_size, varbyte_len = decode_varbyte(input)
32
+
33
+ if out_size < 0 || varbyte_len < 0
34
+ raise "Compressed data is maybe corrupted"
35
+ end
36
+
37
+ return LZ4Internal::uncompress(input, in_size, varbyte_len, out_size)
38
+ end
39
+
40
+ def self.encode_varbyte(val)
41
+ varbytes = []
42
+
43
+ loop do
44
+ byte = val & 0x7f
45
+ val >>= 7
46
+
47
+ if val == 0
48
+ varbytes.push(byte)
49
+ break
50
+ else
51
+ varbytes.push(byte | 0x80)
52
+ end
53
+ end
54
+
55
+ return varbytes.pack("C*")
56
+ end
57
+
58
+ def self.decode_varbyte(text)
59
+ len = [text.length, 5].min
60
+ bytes = text[0, len].unpack("C*")
61
+
62
+ varbyte_len = 0
63
+ val = 0
64
+ bytes.each do |b|
65
+ val |= (b & 0x7f) << (7 * varbyte_len)
66
+ varbyte_len += 1
67
+ return val, varbyte_len if b & 0x80 == 0
68
+ end
69
+
70
+ return -1, -1
22
71
  end
23
72
  end
@@ -15,7 +15,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext/lz4ruby'))
15
15
  $LOAD_PATH.unshift(File.dirname(__FILE__))
16
16
 
17
17
  build_native = <<EOS
18
- cd ext/lz4-ruby
18
+ cd ext/lz4ruby
19
19
  ruby extconf.rb
20
20
  make clean
21
21
  make
@@ -3,15 +3,30 @@ require 'helper'
3
3
  class TestLz4Ruby < Test::Unit::TestCase
4
4
  LOOP_COUNT = 257
5
5
 
6
- @@random = Random.new(123)
6
+ srand(123)
7
+
8
+ def self.random_bytes(len)
9
+ result = []
10
+ len.times do |t|
11
+ result << rand(256)
12
+ end
13
+ return result.pack("C*")
14
+ end
7
15
 
8
16
  context "LZ4::compress" do
9
17
  should "empty text" do
10
18
  compressed = LZ4::compress("")
11
19
  uncompressed = LZ4::uncompress(compressed)
12
- assert_empty("", uncompressed)
20
+ assert_equal("", uncompressed)
13
21
  end
14
-
22
+
23
+ should "long text" do
24
+ text = "a" * 131073
25
+ compressed = LZ4.compress(text)
26
+ uncompressed = LZ4.uncompress(compressed)
27
+ assert_equal(text, uncompressed)
28
+ end
29
+
15
30
  LOOP_COUNT.times do |t|
16
31
  len = t + 1
17
32
  text = "a" * len
@@ -25,7 +40,7 @@ class TestLz4Ruby < Test::Unit::TestCase
25
40
 
26
41
  LOOP_COUNT.times do |t|
27
42
  len = t + 1
28
- text = @@random.bytes(len)
43
+ text = random_bytes(len)
29
44
 
30
45
  should "random text of #{len} bytes" do
31
46
  compressed = LZ4::compress(text)
@@ -49,9 +64,16 @@ class TestLz4Ruby < Test::Unit::TestCase
49
64
  should "empty text" do
50
65
  compressed = LZ4::compressHC("")
51
66
  uncompressed = LZ4::uncompress(compressed)
52
- assert_empty("", uncompressed)
67
+ assert_equal("", uncompressed)
53
68
  end
54
69
 
70
+ should "long text" do
71
+ text = "a" * 131073
72
+ compressed = LZ4.compressHC(text)
73
+ uncompressed = LZ4.uncompress(compressed)
74
+ assert_equal(text, uncompressed)
75
+ end
76
+
55
77
  LOOP_COUNT.times do |t|
56
78
  len = t + 1
57
79
  text = "a" * len
@@ -65,7 +87,7 @@ class TestLz4Ruby < Test::Unit::TestCase
65
87
 
66
88
  LOOP_COUNT.times do |t|
67
89
  len = t + 1
68
- text = @@random.bytes(len)
90
+ text = random_bytes(len)
69
91
 
70
92
  should "random text of #{len} bytes" do
71
93
  compressed = LZ4::compressHC(text)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lz4-ruby
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 6
10
- version: 0.1.6
9
+ - 7
10
+ version: 0.1.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - KOMIYA Atsushi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-06-13 00:00:00 Z
18
+ date: 2012-08-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :development