lz4-ruby 0.1.6-x86-mingw32 → 0.1.7-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.1.7
@@ -34,31 +34,24 @@
34
34
  //**************************************
35
35
  // Tuning parameters
36
36
  //**************************************
37
- // COMPRESSIONLEVEL :
38
- // Increasing this value improves compression ratio
39
- // Lowering this value reduces memory usage
40
- // Reduced memory usage typically improves speed, due to cache effect (ex : L1 32KB for Intel, L1 64KB for AMD)
41
- // Memory usage formula : N->2^(N+2) Bytes (examples : 12 -> 16KB ; 17 -> 512KB)
42
- #define COMPRESSIONLEVEL 12
43
-
44
- // NOTCOMPRESSIBLE_CONFIRMATION :
37
+ // MEMORY_USAGE :
38
+ // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
39
+ // Increasing memory usage improves compression ratio
40
+ // Reduced memory usage can improve speed, due to cache effect
41
+ // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
42
+ #define MEMORY_USAGE 14
43
+
44
+ // NOTCOMPRESSIBLE_DETECTIONLEVEL :
45
45
  // Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
46
46
  // This may decrease compression ratio dramatically, but will be faster on incompressible data
47
47
  // Increasing this value will make the algorithm search more before declaring a segment "incompressible"
48
48
  // This could improve compression a bit, but will be slower on incompressible data
49
49
  // The default value (6) is recommended
50
- #define NOTCOMPRESSIBLE_CONFIRMATION 6
51
-
52
- // LZ4_COMPRESSMIN :
53
- // Compression function will *fail* if it is not successful at compressing input by at least LZ4_COMPRESSMIN bytes
54
- // Since the compression function stops working prematurely, it results in a speed gain
55
- // The output however is unusable. Compression function result will be zero.
56
- // Default : 0 = disabled
57
- #define LZ4_COMPRESSMIN 0
50
+ #define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
58
51
 
59
52
  // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
60
- // This will provide a boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
61
- // You can set this option to 1 in situations where data will stay within closed environment
53
+ // This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
54
+ // You can set this option to 1 in situations where data will remain within closed environment
62
55
  // This option is useless on Little_Endian CPU (such as x86)
63
56
  //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
64
57
 
@@ -108,6 +101,7 @@
108
101
 
109
102
  #ifdef _MSC_VER // Visual Studio
110
103
  # define inline __forceinline // Visual is not C99, but supports some kind of inline
104
+ # include <intrin.h> // For Visual 2005
111
105
  # if LZ4_ARCH64 // 64-bit
112
106
  # pragma intrinsic(_BitScanForward64) // For Visual 2005
113
107
  # pragma intrinsic(_BitScanReverse64) // For Visual 2005
@@ -181,11 +175,11 @@ typedef struct _U64_S { U64 v; } U64_S;
181
175
  //**************************************
182
176
  #define MINMATCH 4
183
177
 
184
- #define HASH_LOG COMPRESSIONLEVEL
178
+ #define HASH_LOG (MEMORY_USAGE-2)
185
179
  #define HASHTABLESIZE (1 << HASH_LOG)
186
180
  #define HASH_MASK (HASHTABLESIZE - 1)
187
181
 
188
- #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION>2?NOTCOMPRESSIBLE_CONFIRMATION:2)
182
+ #define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
189
183
  #define STACKLIMIT 13
190
184
  #define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
191
185
  #define COPYLENGTH 8
@@ -257,7 +251,7 @@ struct refTables
257
251
  //****************************
258
252
  #if LZ4_ARCH64
259
253
 
260
- inline static int LZ4_NbCommonBytes (register U64 val)
254
+ static inline int LZ4_NbCommonBytes (register U64 val)
261
255
  {
262
256
  #if defined(LZ4_BIG_ENDIAN)
263
257
  #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -289,7 +283,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
289
283
 
290
284
  #else
291
285
 
292
- inline static int LZ4_NbCommonBytes (register U32 val)
286
+ static inline int LZ4_NbCommonBytes (register U32 val)
293
287
  {
294
288
  #if defined(LZ4_BIG_ENDIAN)
295
289
  #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -321,25 +315,22 @@ inline static int LZ4_NbCommonBytes (register U32 val)
321
315
  #endif
322
316
 
323
317
 
324
- //****************************
325
- // Public functions
326
- //****************************
327
-
328
- int LZ4_compressBound(int isize)
329
- {
330
- return (isize + (isize/255) + 16);
331
- }
332
-
333
-
334
318
 
335
319
  //******************************
336
320
  // Compression functions
337
321
  //******************************
338
322
 
339
- int LZ4_compressCtx(void** ctx,
323
+ // LZ4_compressCtx :
324
+ // -----------------
325
+ // Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
326
+ // If it cannot achieve it, compression will stop, and result of the function will be zero.
327
+ // return : the number of bytes written in buffer 'dest', or 0 if the compression fails
328
+
329
+ static inline int LZ4_compressCtx(void** ctx,
340
330
  const char* source,
341
331
  char* dest,
342
- int isize)
332
+ int isize,
333
+ int maxOutputSize)
343
334
  {
344
335
  #if HEAPMODE
345
336
  struct refTables *srt = (struct refTables *) (*ctx);
@@ -356,6 +347,7 @@ int LZ4_compressCtx(void** ctx,
356
347
  #define matchlimit (iend - LASTLITERALS)
357
348
 
358
349
  BYTE* op = (BYTE*) dest;
350
+ BYTE* const oend = op + maxOutputSize;
359
351
 
360
352
  int len, length;
361
353
  const int skipStrength = SKIPSTRENGTH;
@@ -408,17 +400,37 @@ int LZ4_compressCtx(void** ctx,
408
400
  while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
409
401
 
410
402
  // Encode Literal length
411
- length = ip - anchor;
403
+ length = (int)(ip - anchor);
412
404
  token = op++;
405
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
406
+ #ifdef _MSC_VER
407
+ if (length>=(int)RUN_MASK)
408
+ {
409
+ int len = length-RUN_MASK;
410
+ *token=(RUN_MASK<<ML_BITS);
411
+ if (len>254)
412
+ {
413
+ do { *op++ = 255; len -= 255; } while (len>254);
414
+ *op++ = (BYTE)len;
415
+ memcpy(op, anchor, length);
416
+ op += length;
417
+ goto _next_match;
418
+ }
419
+ else
420
+ *op++ = (BYTE)len;
421
+ }
422
+ else *token = (length<<ML_BITS);
423
+ #else
413
424
  if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
414
425
  else *token = (length<<ML_BITS);
426
+ #endif
415
427
 
416
428
  // Copy Literals
417
429
  LZ4_BLINDCOPY(anchor, op, length);
418
430
 
419
431
  _next_match:
420
432
  // Encode Offset
421
- LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
433
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
422
434
 
423
435
  // Start Counting
424
436
  ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
@@ -436,7 +448,7 @@ _next_match:
436
448
  _endCount:
437
449
 
438
450
  // Encode MatchLength
439
- len = (ip - anchor);
451
+ len = (int)(ip - anchor);
440
452
  if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
441
453
  else *token += len;
442
454
 
@@ -459,8 +471,8 @@ _endCount:
459
471
  _last_literals:
460
472
  // Encode Last Literals
461
473
  {
462
- int lastRun = iend - anchor;
463
- if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
474
+ int lastRun = (int)(iend - anchor);
475
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0;
464
476
  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
465
477
  else *op++ = (lastRun<<ML_BITS);
466
478
  memcpy(op, anchor, iend - anchor);
@@ -479,10 +491,11 @@ _last_literals:
479
491
  #define HASH64KTABLESIZE (1U<<HASHLOG64K)
480
492
  #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
481
493
  #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
482
- int LZ4_compress64kCtx(void** ctx,
494
+ static inline int LZ4_compress64kCtx(void** ctx,
483
495
  const char* source,
484
496
  char* dest,
485
- int isize)
497
+ int isize,
498
+ int maxOutputSize)
486
499
  {
487
500
  #if HEAPMODE
488
501
  struct refTables *srt = (struct refTables *) (*ctx);
@@ -499,6 +512,7 @@ int LZ4_compress64kCtx(void** ctx,
499
512
  #define matchlimit (iend - LASTLITERALS)
500
513
 
501
514
  BYTE* op = (BYTE*) dest;
515
+ BYTE* const oend = op + maxOutputSize;
502
516
 
503
517
  int len, length;
504
518
  const int skipStrength = SKIPSTRENGTH;
@@ -542,7 +556,7 @@ int LZ4_compress64kCtx(void** ctx,
542
556
 
543
557
  forwardH = LZ4_HASH64K_VALUE(forwardIp);
544
558
  ref = base + HashTable[h];
545
- HashTable[h] = ip - base;
559
+ HashTable[h] = (U16)(ip - base);
546
560
 
547
561
  } while (A32(ref) != A32(ip));
548
562
 
@@ -550,17 +564,37 @@ int LZ4_compress64kCtx(void** ctx,
550
564
  while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
551
565
 
552
566
  // Encode Literal length
553
- length = ip - anchor;
567
+ length = (int)(ip - anchor);
554
568
  token = op++;
569
+ if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; // Check output limit
570
+ #ifdef _MSC_VER
571
+ if (length>=(int)RUN_MASK)
572
+ {
573
+ int len = length-RUN_MASK;
574
+ *token=(RUN_MASK<<ML_BITS);
575
+ if (len>254)
576
+ {
577
+ do { *op++ = 255; len -= 255; } while (len>254);
578
+ *op++ = (BYTE)len;
579
+ memcpy(op, anchor, length);
580
+ op += length;
581
+ goto _next_match;
582
+ }
583
+ else
584
+ *op++ = (BYTE)len;
585
+ }
586
+ else *token = (length<<ML_BITS);
587
+ #else
555
588
  if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
556
589
  else *token = (length<<ML_BITS);
590
+ #endif
557
591
 
558
592
  // Copy Literals
559
593
  LZ4_BLINDCOPY(anchor, op, length);
560
594
 
561
595
  _next_match:
562
596
  // Encode Offset
563
- LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
597
+ LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
564
598
 
565
599
  // Start Counting
566
600
  ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified
@@ -578,7 +612,7 @@ _next_match:
578
612
  _endCount:
579
613
 
580
614
  // Encode MatchLength
581
- len = (ip - anchor);
615
+ len = (int)(ip - anchor);
582
616
  if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
583
617
  else *token += len;
584
618
 
@@ -586,11 +620,11 @@ _endCount:
586
620
  if (ip > mflimit) { anchor = ip; break; }
587
621
 
588
622
  // Fill table
589
- HashTable[LZ4_HASH64K_VALUE(ip-2)] = ip - 2 - base;
623
+ HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
590
624
 
591
625
  // Test next position
592
626
  ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
593
- HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
627
+ HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
594
628
  if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
595
629
 
596
630
  // Prepare next loop
@@ -601,8 +635,8 @@ _endCount:
601
635
  _last_literals:
602
636
  // Encode Last Literals
603
637
  {
604
- int lastRun = iend - anchor;
605
- if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
638
+ int lastRun = (int)(iend - anchor);
639
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0;
606
640
  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
607
641
  else *op++ = (lastRun<<ML_BITS);
608
642
  memcpy(op, anchor, iend - anchor);
@@ -614,26 +648,34 @@ _last_literals:
614
648
  }
615
649
 
616
650
 
617
-
618
- int LZ4_compress(const char* source,
619
- char* dest,
620
- int isize)
651
+ int LZ4_compress_limitedOutput(const char* source,
652
+ char* dest,
653
+ int isize,
654
+ int maxOutputSize)
621
655
  {
622
656
  #if HEAPMODE
623
657
  void* ctx = malloc(sizeof(struct refTables));
624
658
  int result;
625
659
  if (isize < LZ4_64KLIMIT)
626
- result = LZ4_compress64kCtx(&ctx, source, dest, isize);
627
- else result = LZ4_compressCtx(&ctx, source, dest, isize);
660
+ result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
661
+ else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
628
662
  free(ctx);
629
663
  return result;
630
664
  #else
631
- if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize);
632
- return LZ4_compressCtx(NULL, source, dest, isize);
665
+ if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
666
+ return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
633
667
  #endif
634
668
  }
635
669
 
636
670
 
671
+ int LZ4_compress(const char* source,
672
+ char* dest,
673
+ int isize)
674
+ {
675
+ return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
676
+ }
677
+
678
+
637
679
 
638
680
 
639
681
  //****************************
@@ -47,19 +47,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
47
47
 
48
48
  /*
49
49
  LZ4_compress() :
50
+ Compresses 'isize' bytes from 'source' into 'dest'.
51
+ Destination buffer must be already allocated,
52
+ and must be sized to handle worst cases situations (input data not compressible)
53
+ Worst case size evaluation is provided by macro LZ4_compressBound()
54
+
50
55
  isize : is the input size. Max supported value is ~1.9GB
51
56
  return : the number of bytes written in buffer dest
52
- or 0 if the compression fails (if LZ4_COMPRESSMIN is set)
53
- note : destination buffer must be already allocated.
54
- destination buffer must be sized to handle worst cases situations (input data not compressible)
55
- worst case size evaluation is provided by function LZ4_compressBound()
57
+
56
58
 
57
59
  LZ4_uncompress() :
58
60
  osize : is the output size, therefore the original size
59
61
  return : the number of bytes read in the source buffer
60
62
  If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
61
63
  This function never writes beyond dest + osize, and is therefore protected against malicious data packets
62
- note : destination buffer must be already allocated
64
+ note : destination buffer must be already allocated.
65
+ its size must be a minimum of 'osize' bytes.
63
66
  */
64
67
 
65
68
 
@@ -67,7 +70,7 @@ LZ4_uncompress() :
67
70
  // Advanced Functions
68
71
  //****************************
69
72
 
70
- int LZ4_compressBound(int isize);
73
+ #define LZ4_compressBound(isize) (isize + (isize/255) + 16)
71
74
 
72
75
  /*
73
76
  LZ4_compressBound() :
@@ -80,6 +83,21 @@ LZ4_compressBound() :
80
83
  */
81
84
 
82
85
 
86
+ int LZ4_compress_limitedOutput (const char* source, char* dest, int isize, int maxOutputSize);
87
+
88
+ /*
89
+ LZ4_compress_limitedOutput() :
90
+ Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
91
+ If it cannot achieve it, compression will stop, and result of the function will be zero.
92
+ This function never writes outside of provided output buffer.
93
+
94
+ isize : is the input size. Max supported value is ~1.9GB
95
+ maxOutputSize : is the size of the destination buffer (which must be already allocated)
96
+ return : the number of bytes written in buffer 'dest'
97
+ or 0 if the compression fails
98
+ */
99
+
100
+
83
101
  int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
84
102
 
85
103
  /*
@@ -94,27 +112,6 @@ LZ4_uncompress_unknownOutputSize() :
94
112
  */
95
113
 
96
114
 
97
- int LZ4_compressCtx(void** ctx, const char* source, char* dest, int isize);
98
- int LZ4_compress64kCtx(void** ctx, const char* source, char* dest, int isize);
99
-
100
- /*
101
- LZ4_compressCtx() :
102
- This function explicitly handles the CTX memory structure.
103
- It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
104
- This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
105
- Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
106
- Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
107
- LZ4_compress64kCtx() :
108
- Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
109
- isize *Must* be <64KB, otherwise the output will be corrupted.
110
-
111
- On first call : provide a *ctx=NULL; It will be automatically allocated.
112
- On next calls : reuse the same ctx pointer.
113
- Use different pointers for different threads when doing multi-threading.
114
-
115
- */
116
-
117
-
118
115
  #if defined (__cplusplus)
119
116
  }
120
117
  #endif
@@ -68,12 +68,20 @@
68
68
 
69
69
  #ifdef _MSC_VER
70
70
  #define inline __forceinline // Visual is not C99, but supports some kind of inline
71
+ #include <intrin.h> // For Visual 2005
72
+ # if LZ4_ARCH64 // 64-bit
73
+ # pragma intrinsic(_BitScanForward64) // For Visual 2005
74
+ # pragma intrinsic(_BitScanReverse64) // For Visual 2005
75
+ # else
76
+ # pragma intrinsic(_BitScanForward) // For Visual 2005
77
+ # pragma intrinsic(_BitScanReverse) // For Visual 2005
78
+ # endif
71
79
  #endif
72
80
 
73
81
  #ifdef _MSC_VER // Visual Studio
74
- #define bswap16(x) _byteswap_ushort(x)
82
+ #define lz4_bswap16(x) _byteswap_ushort(x)
75
83
  #else
76
- #define bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
84
+ #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
77
85
  #endif
78
86
 
79
87
 
@@ -174,8 +182,8 @@ typedef struct _U64_S { U64 v; } U64_S;
174
182
  #endif
175
183
 
176
184
  #if defined(LZ4_BIG_ENDIAN)
177
- #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }
178
- #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }
185
+ #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
186
+ #define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
179
187
  #else // Little Endian
180
188
  #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
181
189
  #define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
@@ -350,7 +358,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
350
358
  if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
351
359
  _endCount:
352
360
 
353
- if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }
361
+ if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
354
362
  }
355
363
  ref = GETNEXT(ref);
356
364
  }
@@ -366,7 +374,7 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
366
374
  INITBASE(base,hc4->base);
367
375
  const BYTE* ref;
368
376
  int nbAttempts = MAX_NB_ATTEMPTS;
369
- int delta = ip-startLimit;
377
+ int delta = (int)(ip-startLimit);
370
378
 
371
379
  // First Match
372
380
  LZ4HC_Insert(hc4, ip);
@@ -399,7 +407,7 @@ _endCount:
399
407
 
400
408
  if ((ipt-startt) > longest)
401
409
  {
402
- longest = ipt-startt;
410
+ longest = (int)(ipt-startt);
403
411
  *matchpos = reft;
404
412
  *startpos = startt;
405
413
  }
@@ -417,7 +425,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
417
425
  BYTE* token;
418
426
 
419
427
  // Encode Literal length
420
- length = *ip - *anchor;
428
+ length = (int)(*ip - *anchor);
421
429
  token = (*op)++;
422
430
  if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
423
431
  else *token = (length<<ML_BITS);
@@ -426,7 +434,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
426
434
  LZ4_BLINDCOPY(*anchor, *op, length);
427
435
 
428
436
  // Encode Offset
429
- LZ4_WRITE_LITTLEENDIAN_16(*op,*ip-ref);
437
+ LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
430
438
 
431
439
  // Encode MatchLength
432
440
  len = (int)(ml-MINMATCH);
@@ -519,8 +527,8 @@ _Search3:
519
527
  int correction;
520
528
  int new_ml = ml;
521
529
  if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
522
- if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;
523
- correction = new_ml - (start2 - ip);
530
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
531
+ correction = new_ml - (int)(start2 - ip);
524
532
  if (correction > 0)
525
533
  {
526
534
  start2 += correction;
@@ -543,8 +551,8 @@ _Search3:
543
551
  {
544
552
  int correction;
545
553
  if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
546
- if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
547
- correction = ml - (start2 - ip);
554
+ if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
555
+ correction = ml - (int)(start2 - ip);
548
556
  if (correction > 0)
549
557
  {
550
558
  start2 += correction;
@@ -554,7 +562,7 @@ _Search3:
554
562
  }
555
563
  else
556
564
  {
557
- ml = start2 - ip;
565
+ ml = (int)(start2 - ip);
558
566
  }
559
567
  }
560
568
  // Now, encode 2 sequences
@@ -570,7 +578,7 @@ _Search3:
570
578
  {
571
579
  if (start2 < ip+ml)
572
580
  {
573
- int correction = (ip+ml) - start2;
581
+ int correction = (int)(ip+ml - start2);
574
582
  start2 += correction;
575
583
  ref2 += correction;
576
584
  ml2 -= correction;
@@ -607,8 +615,8 @@ _Search3:
607
615
  {
608
616
  int correction;
609
617
  if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
610
- if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
611
- correction = ml - (start2 - ip);
618
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
619
+ correction = ml - (int)(start2 - ip);
612
620
  if (correction > 0)
613
621
  {
614
622
  start2 += correction;
@@ -618,7 +626,7 @@ _Search3:
618
626
  }
619
627
  else
620
628
  {
621
- ml = start2 - ip;
629
+ ml = (int)(start2 - ip);
622
630
  }
623
631
  }
624
632
  LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
@@ -637,7 +645,7 @@ _Search3:
637
645
 
638
646
  // Encode Last Literals
639
647
  {
640
- int lastRun = iend - anchor;
648
+ int lastRun = (int)(iend - anchor);
641
649
  if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
642
650
  else *op++ = (lastRun<<ML_BITS);
643
651
  memcpy(op, anchor, iend - anchor);
@@ -4,109 +4,76 @@
4
4
 
5
5
  typedef int (*CompressFunc)(const char *source, char *dest, int isize);
6
6
 
7
- static VALUE lz4;
7
+ static VALUE lz4internal;
8
8
  static VALUE lz4_error;
9
9
 
10
- static int encode_varbyte(int value, char *buf) {
11
- buf[0] = value & 0x7f; value >>= 7;
12
- if (value == 0) { return 1; }
13
- buf[0] |= 0x80;
14
-
15
- buf[1] = value & 0x7f; value >>= 7;
16
- if (value == 0) { return 2; }
17
- buf[1] |= 0x80;
18
-
19
- buf[2] = value & 0x7f; value >>= 7;
20
- if (value == 0) { return 3; }
21
- buf[2] |= 0x80;
22
-
23
- buf[3] = value & 0x7f; value >>= 7;
24
- if (value == 0) { return 4; }
25
- buf[3] |= 0x80;
26
-
27
- buf[4] = value & 0x7f;
28
- return 5;
29
- }
30
-
31
- static int decode_varbyte(const char *src, int len, int *value) {
32
- if (len < 1) { return -1; }
33
-
34
- *value = src[0] & 0x7f;
35
- if ((src[0] & 0x80) == 0) { return 1; }
36
- if (len < 2) { return -1; }
37
-
38
- *value |= (src[1] & 0x7f) << 7;
39
- if ((src[1] & 0x80) == 0) { return 2; }
40
- if (len < 3) { return -1; }
41
-
42
- *value |= (src[2] & 0x7f) << 14;
43
- if ((src[2] & 0x80) == 0) { return 3; }
44
- if (len < 4) { return -1; }
45
-
46
- *value |= (src[3] & 0x7f) << 21;
47
- if ((src[3] & 0x80) == 0) { return 4; }
48
- if (len < 5) { return -1; }
49
-
50
- *value |= (src[4] & 0x7f) << 28;
10
+ /**
11
+ * LZ4Internal functions.
12
+ */
13
+ static VALUE compress_internal(CompressFunc compressor, VALUE header, VALUE input, VALUE in_size) {
14
+ const char *src_p;
15
+ int src_size;
51
16
 
52
- return 5;
53
- }
17
+ const char *header_p;
18
+ int header_size;
54
19
 
55
- static VALUE compress(CompressFunc compressor, VALUE self, VALUE source, VALUE src_size_prm) {
56
- const char *src_p = NULL;
57
- char varbyte[5];
58
- char *buf = NULL;
59
20
  VALUE result;
60
- int src_size;
61
- int varbyte_len;
21
+ char *buf;
62
22
  int buf_size;
23
+
63
24
  int comp_size;
64
25
 
65
- Check_Type(source, T_STRING);
66
- src_p = RSTRING_PTR(source);
67
- src_size = NUM2INT(src_size_prm);
26
+ Check_Type(input, T_STRING);
27
+ src_p = RSTRING_PTR(input);
28
+ src_size = NUM2INT(in_size);
68
29
  buf_size = LZ4_compressBound(src_size);
69
30
 
70
- varbyte_len = encode_varbyte(src_size, varbyte);
31
+ Check_Type(header, T_STRING);
32
+ header_p = RSTRING_PTR(header);
33
+ header_size = RSTRING_LEN(header);
71
34
 
72
- result = rb_str_new(NULL, buf_size + varbyte_len);
35
+ result = rb_str_new(NULL, buf_size + header_size);
73
36
  buf = RSTRING_PTR(result);
74
37
 
75
- memcpy(buf, varbyte, varbyte_len);
38
+ memcpy(buf, header_p, header_size);
76
39
 
77
- comp_size = compressor(src_p, buf + varbyte_len, src_size);
78
- rb_str_resize(result, comp_size + varbyte_len);
40
+ comp_size = compressor(src_p, buf + header_size, src_size);
41
+ rb_str_resize(result, comp_size + header_size);
79
42
 
80
43
  return result;
81
44
  }
82
45
 
83
- static VALUE lz4_ruby_compress(VALUE self, VALUE source, VALUE src_size) {
84
- return compress(LZ4_compress, self, source, src_size);
46
+ static VALUE lz4internal_compress(VALUE self, VALUE header, VALUE input, VALUE in_size) {
47
+ return compress_internal(LZ4_compress, header, input, in_size);
85
48
  }
86
49
 
87
- static VALUE lz4_ruby_compressHC(VALUE self, VALUE source, VALUE src_size) {
88
- return compress(LZ4_compressHC, self, source, src_size);
50
+ static VALUE lz4internal_compressHC(VALUE self, VALUE header, VALUE input, VALUE in_size) {
51
+ return compress_internal(LZ4_compressHC, header, input, in_size);
89
52
  }
90
53
 
91
- static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
92
- const char *src_p = NULL;
93
- char *buf = NULL;
94
- VALUE result;
54
+ static VALUE lz4internal_uncompress(VALUE self, VALUE input, VALUE in_size, VALUE offset, VALUE out_size) {
55
+ const char *src_p;
95
56
  int src_size;
96
- int varbyte_len;
97
- int buf_size = 0;
57
+
58
+ int header_size;
59
+
60
+ VALUE result;
61
+ char *buf;
62
+ int buf_size;
63
+
98
64
  int read_bytes;
99
65
 
100
- Check_Type(source, T_STRING);
101
- src_p = RSTRING_PTR(source);
102
- src_size = RSTRING_LEN(source);
66
+ Check_Type(input, T_STRING);
67
+ src_p = RSTRING_PTR(input);
68
+ src_size = NUM2INT(in_size);
103
69
 
104
- varbyte_len = decode_varbyte(src_p, src_size, &buf_size);
70
+ header_size = NUM2INT(offset);
71
+ buf_size = NUM2INT(out_size);
105
72
 
106
73
  result = rb_str_new(NULL, buf_size);
107
74
  buf = RSTRING_PTR(result);
108
75
 
109
- read_bytes = LZ4_uncompress(src_p + varbyte_len, buf, buf_size);
76
+ read_bytes = LZ4_uncompress_unknownOutputSize(src_p + header_size, buf, src_size - header_size, buf_size);
110
77
  if (read_bytes < 0) {
111
78
  rb_raise(lz4_error, "Compressed data is maybe corrupted.");
112
79
  }
@@ -115,11 +82,11 @@ static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
115
82
  }
116
83
 
117
84
  void Init_lz4ruby(void) {
118
- lz4 = rb_define_module("LZ4Native");
85
+ lz4internal = rb_define_module("LZ4Internal");
119
86
 
120
- rb_define_module_function(lz4, "compress", lz4_ruby_compress, 2);
121
- rb_define_module_function(lz4, "compressHC", lz4_ruby_compressHC, 2);
122
- rb_define_module_function(lz4, "uncompress", lz4_ruby_uncompress, 1);
87
+ rb_define_module_function(lz4internal, "compress", lz4internal_compress, 3);
88
+ rb_define_module_function(lz4internal, "compressHC", lz4internal_compressHC, 3);
89
+ rb_define_module_function(lz4internal, "uncompress", lz4internal_uncompress, 4);
123
90
 
124
- lz4_error = rb_define_class_under(lz4, "Error", rb_eStandardError);
91
+ lz4_error = rb_define_class_under(lz4internal, "Error", rb_eStandardError);
125
92
  }
Binary file
Binary file
@@ -7,17 +7,66 @@ else
7
7
  end
8
8
 
9
9
  class LZ4
10
- def self.compress(source, src_size = nil)
11
- src_size = source.length if src_size == nil
12
- return LZ4Native::compress(source, src_size)
10
+ def self.compress(input, in_size = nil)
11
+ return _compress(input, in_size, false)
13
12
  end
14
13
 
15
- def self.compressHC(source, src_size = nil)
16
- src_size = source.length if src_size == nil
17
- return LZ4Native::compressHC(source, src_size)
14
+ def self.compressHC(input, in_size = nil)
15
+ return _compress(input, in_size, true)
18
16
  end
17
+
18
+ def self._compress(input, in_size, high_compression)
19
+ in_size = input.length if in_size == nil
20
+ header = encode_varbyte(in_size)
19
21
 
20
- def self.uncompress(source)
21
- return LZ4Native::uncompress(source)
22
+ if high_compression
23
+ return LZ4Internal.compressHC(header, input, in_size)
24
+ else
25
+ return LZ4Internal.compress(header, input, in_size)
26
+ end
27
+ end
28
+
29
+ def self.uncompress(input, in_size = nil)
30
+ in_size = input.length if in_size == nil
31
+ out_size, varbyte_len = decode_varbyte(input)
32
+
33
+ if out_size < 0 || varbyte_len < 0
34
+ raise "Compressed data is maybe corrupted"
35
+ end
36
+
37
+ return LZ4Internal::uncompress(input, in_size, varbyte_len, out_size)
38
+ end
39
+
40
+ def self.encode_varbyte(val)
41
+ varbytes = []
42
+
43
+ loop do
44
+ byte = val & 0x7f
45
+ val >>= 7
46
+
47
+ if val == 0
48
+ varbytes.push(byte)
49
+ break
50
+ else
51
+ varbytes.push(byte | 0x80)
52
+ end
53
+ end
54
+
55
+ return varbytes.pack("C*")
56
+ end
57
+
58
+ def self.decode_varbyte(text)
59
+ len = [text.length, 5].min
60
+ bytes = text[0, len].unpack("C*")
61
+
62
+ varbyte_len = 0
63
+ val = 0
64
+ bytes.each do |b|
65
+ val |= (b & 0x7f) << (7 * varbyte_len)
66
+ varbyte_len += 1
67
+ return val, varbyte_len if b & 0x80 == 0
68
+ end
69
+
70
+ return -1, -1
22
71
  end
23
72
  end
@@ -15,7 +15,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext/lz4ruby'))
15
15
  $LOAD_PATH.unshift(File.dirname(__FILE__))
16
16
 
17
17
  build_native = <<EOS
18
- cd ext/lz4-ruby
18
+ cd ext/lz4ruby
19
19
  ruby extconf.rb
20
20
  make clean
21
21
  make
@@ -3,15 +3,30 @@ require 'helper'
3
3
  class TestLz4Ruby < Test::Unit::TestCase
4
4
  LOOP_COUNT = 257
5
5
 
6
- @@random = Random.new(123)
6
+ srand(123)
7
+
8
+ def self.random_bytes(len)
9
+ result = []
10
+ len.times do |t|
11
+ result << rand(256)
12
+ end
13
+ return result.pack("C*")
14
+ end
7
15
 
8
16
  context "LZ4::compress" do
9
17
  should "empty text" do
10
18
  compressed = LZ4::compress("")
11
19
  uncompressed = LZ4::uncompress(compressed)
12
- assert_empty("", uncompressed)
20
+ assert_equal("", uncompressed)
13
21
  end
14
-
22
+
23
+ should "long text" do
24
+ text = "a" * 131073
25
+ compressed = LZ4.compress(text)
26
+ uncompressed = LZ4.uncompress(compressed)
27
+ assert_equal(text, uncompressed)
28
+ end
29
+
15
30
  LOOP_COUNT.times do |t|
16
31
  len = t + 1
17
32
  text = "a" * len
@@ -25,7 +40,7 @@ class TestLz4Ruby < Test::Unit::TestCase
25
40
 
26
41
  LOOP_COUNT.times do |t|
27
42
  len = t + 1
28
- text = @@random.bytes(len)
43
+ text = random_bytes(len)
29
44
 
30
45
  should "random text of #{len} bytes" do
31
46
  compressed = LZ4::compress(text)
@@ -49,9 +64,16 @@ class TestLz4Ruby < Test::Unit::TestCase
49
64
  should "empty text" do
50
65
  compressed = LZ4::compressHC("")
51
66
  uncompressed = LZ4::uncompress(compressed)
52
- assert_empty("", uncompressed)
67
+ assert_equal("", uncompressed)
53
68
  end
54
69
 
70
+ should "long text" do
71
+ text = "a" * 131073
72
+ compressed = LZ4.compressHC(text)
73
+ uncompressed = LZ4.uncompress(compressed)
74
+ assert_equal(text, uncompressed)
75
+ end
76
+
55
77
  LOOP_COUNT.times do |t|
56
78
  len = t + 1
57
79
  text = "a" * len
@@ -65,7 +87,7 @@ class TestLz4Ruby < Test::Unit::TestCase
65
87
 
66
88
  LOOP_COUNT.times do |t|
67
89
  len = t + 1
68
- text = @@random.bytes(len)
90
+ text = random_bytes(len)
69
91
 
70
92
  should "random text of #{len} bytes" do
71
93
  compressed = LZ4::compressHC(text)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lz4-ruby
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 6
10
- version: 0.1.6
9
+ - 7
10
+ version: 0.1.7
11
11
  platform: x86-mingw32
12
12
  authors:
13
13
  - KOMIYA Atsushi
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-06-13 00:00:00 Z
18
+ date: 2012-08-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :development