RubyGems - lz4-ruby - Versions diffs - 0.1.6-x86-mingw32 → 0.1.7-x86-mingw32 - Mend

lz4-ruby 0.1.6-x86-mingw32 → 0.1.7-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.1.6
1	+ 0.1.7

data/ext/lz4ruby/lz4.c CHANGED

@@ -34,31 +34,24 @@
 //**************************************
 // Tuning parameters
 //**************************************
-// COMPRESSIONLEVEL :
-// Increasing this value improves compression ratio
-// Lowering this value reduces memory usage
-// Reduced memory usage typically improves speed, due to cache effect (ex : L1 32KB for Intel, L1 64KB for AMD)
-// Memory usage formula : N->2^(N+2) Bytes (examples : 12 -> 16KB ; 17 -> 512KB)
-#define COMPRESSIONLEVEL 12
-// NOTCOMPRESSIBLE_CONFIRMATION :
+// MEMORY_USAGE :
+// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+// Increasing memory usage improves compression ratio
+// Reduced memory usage can improve speed, due to cache effect
+// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+#define MEMORY_USAGE 14
+// NOTCOMPRESSIBLE_DETECTIONLEVEL :
 // Decreasing this value will make the algorithm skip faster data segments considered "incompressible"
 // This may decrease compression ratio dramatically, but will be faster on incompressible data
 // Increasing this value will make the algorithm search more before declaring a segment "incompressible"
 // This could improve compression a bit, but will be slower on incompressible data
 // The default value (6) is recommended
-#define NOTCOMPRESSIBLE_CONFIRMATION 6
-// LZ4_COMPRESSMIN :
-// Compression function will *fail* if it is not successful at compressing input by at least LZ4_COMPRESSMIN bytes
-// Since the compression function stops working prematurely, it results in a speed gain
-// The output however is unusable. Compression function result will be zero.
-// Default : 0 = disabled
-#define LZ4_COMPRESSMIN 0
+#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6
 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
-// This will provide a boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
-// You can set this option to 1 in situations where data will stay within closed environment
+// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU.
+// You can set this option to 1 in situations where data will remain within closed environment
 // This option is useless on Little_Endian CPU (such as x86)
 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
@@ -108,6 +101,7 @@
 #ifdef _MSC_VER  // Visual Studio
 #  define inline __forceinline           // Visual is not C99, but supports some kind of inline
+#  include <intrin.h>   // For Visual 2005
 #  if LZ4_ARCH64	// 64-bit
 #    pragma intrinsic(_BitScanForward64) // For Visual 2005
 #    pragma intrinsic(_BitScanReverse64) // For Visual 2005
@@ -181,11 +175,11 @@ typedef struct _U64_S { U64 v; } U64_S;
 //**************************************
 #define MINMATCH 4
-#define HASH_LOG COMPRESSIONLEVEL
+#define HASH_LOG (MEMORY_USAGE-2)
 #define HASHTABLESIZE (1 << HASH_LOG)
 #define HASH_MASK (HASHTABLESIZE - 1)
-#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION>2?NOTCOMPRESSIBLE_CONFIRMATION:2)
+#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2)
 #define STACKLIMIT 13
 #define HEAPMODE (HASH_LOG>STACKLIMIT)  // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()).
 #define COPYLENGTH 8
@@ -257,7 +251,7 @@ struct refTables
 //****************************
 #if LZ4_ARCH64
-inline static int LZ4_NbCommonBytes (register U64 val)
+static inline int LZ4_NbCommonBytes (register U64 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -289,7 +283,7 @@ inline static int LZ4_NbCommonBytes (register U64 val)
 #else
-inline static int LZ4_NbCommonBytes (register U32 val)
+static inline int LZ4_NbCommonBytes (register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
     #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -321,25 +315,22 @@ inline static int LZ4_NbCommonBytes (register U32 val)
 #endif
-//****************************
-// Public functions
-//****************************
-int LZ4_compressBound(int isize)
-{
-	return (isize + (isize/255) + 16);
-}
 //******************************
 // Compression functions
 //******************************
-int LZ4_compressCtx(void** ctx,
+// LZ4_compressCtx :
+// -----------------
+// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+// If it cannot achieve it, compression will stop, and result of the function will be zero.
+// return : the number of bytes written in buffer 'dest', or 0 if the compression fails
+static inline int LZ4_compressCtx(void** ctx,
 				 const char* source,
 				 char* dest,
-				 int isize)
+				 int isize,
+				 int maxOutputSize)
 {
 #if HEAPMODE
 	struct refTables *srt = (struct refTables *) (*ctx);
@@ -356,6 +347,7 @@ int LZ4_compressCtx(void** ctx,
 #define matchlimit (iend - LASTLITERALS)
 	BYTE* op = (BYTE*) dest;
+	BYTE* const oend = op + maxOutputSize;
 	int len, length;
 	const int skipStrength = SKIPSTRENGTH;
@@ -408,17 +400,37 @@ int LZ4_compressCtx(void** ctx,
 		while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
 		// Encode Literal length
-		length = ip - anchor;
+		length = (int)(ip - anchor);
 		token = op++;
+		if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; 		// Check output limit
+#ifdef _MSC_VER
+		if (length>=(int)RUN_MASK)
+		{
+			int len = length-RUN_MASK;
+			*token=(RUN_MASK<<ML_BITS);
+			if (len>254)
+			{
+				do { *op++ = 255; len -= 255; } while (len>254);
+				*op++ = (BYTE)len;
+				memcpy(op, anchor, length);
+				op += length;
+				goto _next_match;
+			}
+			else
+			*op++ = (BYTE)len;
+		}
+		else *token = (length<<ML_BITS);
+#else
 		if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
 		else *token = (length<<ML_BITS);
+#endif
 		// Copy Literals
 		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		// Encode Offset
-		LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
+		LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
 		// Start Counting
 		ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
@@ -436,7 +448,7 @@ _next_match:
 _endCount:
 		// Encode MatchLength
-		len = (ip - anchor);
+		len = (int)(ip - anchor);
 		if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
 		else *token += len;
@@ -459,8 +471,8 @@ _endCount:
 _last_literals:
 	// Encode Last Literals
 	{
-		int lastRun = iend - anchor;
-		if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
+		int lastRun = (int)(iend - anchor);
+		if (((char*)op - dest) + lastRun + 1 + ((lastRun-15)/255) >= maxOutputSize) return 0;
 		if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
 		else *op++ = (lastRun<<ML_BITS);
 		memcpy(op, anchor, iend - anchor);
@@ -479,10 +491,11 @@ _last_literals:
 #define HASH64KTABLESIZE (1U<<HASHLOG64K)
 #define LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K))
 #define LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
-int LZ4_compress64kCtx(void** ctx,
+static inline int LZ4_compress64kCtx(void** ctx,
 				 const char* source,
 				 char* dest,
-				 int isize)
+				 int isize,
+				 int maxOutputSize)
 {
 #if HEAPMODE
 	struct refTables *srt = (struct refTables *) (*ctx);
@@ -499,6 +512,7 @@ int LZ4_compress64kCtx(void** ctx,
 #define matchlimit (iend - LASTLITERALS)
 	BYTE* op = (BYTE*) dest;
+	BYTE* const oend = op + maxOutputSize;
 	int len, length;
 	const int skipStrength = SKIPSTRENGTH;
@@ -542,7 +556,7 @@ int LZ4_compress64kCtx(void** ctx,
 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
 			ref = base + HashTable[h];
-			HashTable[h] = ip - base;
+			HashTable[h] = (U16)(ip - base);
 		} while (A32(ref) != A32(ip));
@@ -550,17 +564,37 @@ int LZ4_compress64kCtx(void** ctx,
 		while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; }
 		// Encode Literal length
-		length = ip - anchor;
+		length = (int)(ip - anchor);
 		token = op++;
+		if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) >= oend) return 0; 		// Check output limit
+#ifdef _MSC_VER
+		if (length>=(int)RUN_MASK)
+		{
+			int len = length-RUN_MASK;
+			*token=(RUN_MASK<<ML_BITS);
+			if (len>254)
+			{
+				do { *op++ = 255; len -= 255; } while (len>254);
+				*op++ = (BYTE)len;
+				memcpy(op, anchor, length);
+				op += length;
+				goto _next_match;
+			}
+			else
+			*op++ = (BYTE)len;
+		}
+		else *token = (length<<ML_BITS);
+#else
 		if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; }
 		else *token = (length<<ML_BITS);
+#endif
 		// Copy Literals
 		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		// Encode Offset
-		LZ4_WRITE_LITTLEENDIAN_16(op,ip-ref);
+		LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
 		// Start Counting
 		ip+=MINMATCH; ref+=MINMATCH;   // MinMatch verified
@@ -578,7 +612,7 @@ _next_match:
 _endCount:
 		// Encode MatchLength
-		len = (ip - anchor);
+		len = (int)(ip - anchor);
 		if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; }
 		else *token += len;
@@ -586,11 +620,11 @@ _endCount:
 		if (ip > mflimit) { anchor = ip;  break; }
 		// Fill table
-		HashTable[LZ4_HASH64K_VALUE(ip-2)] = ip - 2 - base;
+		HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base);
 		// Test next position
 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
-		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
+		HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base);
 		if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; }
 		// Prepare next loop
@@ -601,8 +635,8 @@ _endCount:
 _last_literals:
 	// Encode Last Literals
 	{
-		int lastRun = iend - anchor;
-		if ((LZ4_COMPRESSMIN>0) && (((op - (BYTE*)dest) + lastRun + 1 + ((lastRun-15)/255)) > isize - LZ4_COMPRESSMIN)) return 0;
+		int lastRun = (int)(iend - anchor);
+		if (((char*)op - dest) + lastRun + 1 + ((lastRun)>>8) >= maxOutputSize) return 0;
 		if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
 		else *op++ = (lastRun<<ML_BITS);
 		memcpy(op, anchor, iend - anchor);
@@ -614,26 +648,34 @@ _last_literals:
 }
-int LZ4_compress(const char* source,
-				 char* dest,
-				 int isize)
+int LZ4_compress_limitedOutput(const char* source,
+							   char* dest,
+							   int isize,
+							   int maxOutputSize)
 {
 #if HEAPMODE
 	void* ctx = malloc(sizeof(struct refTables));
 	int result;
 	if (isize < LZ4_64KLIMIT)
-		result = LZ4_compress64kCtx(&ctx, source, dest, isize);
-	else result = LZ4_compressCtx(&ctx, source, dest, isize);
+		result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize);
+	else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize);
 	free(ctx);
 	return result;
 #else
-	if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize);
-	return LZ4_compressCtx(NULL, source, dest, isize);
+	if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize);
+	return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize);
 #endif
 }
+int LZ4_compress(const char* source,
+				 char* dest,
+				 int isize)
+{
+	return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize));
+}
 //****************************

data/ext/lz4ruby/lz4.h CHANGED

@@ -47,19 +47,22 @@ int LZ4_uncompress (const char* source, char* dest, int osize);
 /*
 LZ4_compress() :
+	Compresses 'isize' bytes from 'source' into 'dest'.
+	Destination buffer must be already allocated,
+	and must be sized to handle worst cases situations (input data not compressible)
+	Worst case size evaluation is provided by macro LZ4_compressBound()
 	isize  : is the input size. Max supported value is ~1.9GB
 	return : the number of bytes written in buffer dest
-			 or 0 if the compression fails (if LZ4_COMPRESSMIN is set)
-	note : destination buffer must be already allocated.
-		destination buffer must be sized to handle worst cases situations (input data not compressible)
-		worst case size evaluation is provided by function LZ4_compressBound()
 LZ4_uncompress() :
 	osize  : is the output size, therefore the original size
 	return : the number of bytes read in the source buffer
 			 If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction
 			 This function never writes beyond dest + osize, and is therefore protected against malicious data packets
-	note : destination buffer must be already allocated
+	note : destination buffer must be already allocated.
+		   its size must be a minimum of 'osize' bytes.
 */
@@ -67,7 +70,7 @@ LZ4_uncompress() :
 // Advanced Functions
 //****************************
-int LZ4_compressBound(int isize);
+#define LZ4_compressBound(isize)   (isize + (isize/255) + 16)
 /*
 LZ4_compressBound() :
@@ -80,6 +83,21 @@ LZ4_compressBound() :
 */
+int LZ4_compress_limitedOutput   (const char* source, char* dest, int isize, int maxOutputSize);
+/*
+LZ4_compress_limitedOutput() :
+    Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+	If it cannot achieve it, compression will stop, and result of the function will be zero.
+	This function never writes outside of provided output buffer.
+	isize  : is the input size. Max supported value is ~1.9GB
+	maxOutputSize : is the size of the destination buffer (which must be already allocated)
+	return : the number of bytes written in buffer 'dest'
+			 or 0 if the compression fails
+*/
 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
 /*
@@ -94,27 +112,6 @@ LZ4_uncompress_unknownOutputSize() :
 */
-int LZ4_compressCtx(void** ctx, const char* source,  char* dest, int isize);
-int LZ4_compress64kCtx(void** ctx, const char* source,  char* dest, int isize);
-/*
-LZ4_compressCtx() :
-	This function explicitly handles the CTX memory structure.
-	It avoids allocating/deallocating memory between each call, improving performance when malloc is heavily invoked.
-	This function is only useful when memory is allocated into the heap (HASH_LOG value beyond STACK_LIMIT)
-	Performance difference will be noticeable only when repetitively calling the compression function over many small segments.
-	Note : by default, memory is allocated into the stack, therefore "malloc" is not invoked.
-LZ4_compress64kCtx() :
-	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
-	isize *Must* be <64KB, otherwise the output will be corrupted.
-	On first call : provide a *ctx=NULL; It will be automatically allocated.
-	On next calls : reuse the same ctx pointer.
-	Use different pointers for different threads when doing multi-threading.
-*/
 #if defined (__cplusplus)
 }
 #endif

data/ext/lz4ruby/lz4hc.c CHANGED

@@ -68,12 +68,20 @@
 #ifdef _MSC_VER
 #define inline __forceinline    // Visual is not C99, but supports some kind of inline
+#include <intrin.h>             // For Visual 2005
+#  if LZ4_ARCH64	// 64-bit
+#    pragma intrinsic(_BitScanForward64) // For Visual 2005
+#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
+#  else
+#    pragma intrinsic(_BitScanForward)   // For Visual 2005
+#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
+#  endif
 #endif
 #ifdef _MSC_VER  // Visual Studio
-#define bswap16(x) _byteswap_ushort(x)
+#define lz4_bswap16(x) _byteswap_ushort(x)
 #else
-#define bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#define lz4_bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
 #endif
@@ -174,8 +182,8 @@ typedef struct _U64_S { U64 v; } U64_S;
 #endif
 #if defined(LZ4_BIG_ENDIAN)
-#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = bswap16(v); d = (s) - v; }
-#define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = bswap16(v); A16(p) = v; p+=2; }
+#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
+#define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
 #else		// Little Endian
 #define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
 #define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
@@ -350,7 +358,7 @@ inline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const
 			if ((ipt<matchlimit) && (*reft == *ipt)) ipt++;
 _endCount:
-			if (ipt-ip > ml) { ml = ipt-ip; *matchpos = ref; }
+			if (ipt-ip > ml) { ml = (int)(ipt-ip); *matchpos = ref; }
 		}
 		ref = GETNEXT(ref);
 	}
@@ -366,7 +374,7 @@ inline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const
 	INITBASE(base,hc4->base);
 	const BYTE*  ref;
 	int nbAttempts = MAX_NB_ATTEMPTS;
-	int delta = ip-startLimit;
+	int delta = (int)(ip-startLimit);
 	// First Match
 	LZ4HC_Insert(hc4, ip);
@@ -399,7 +407,7 @@ _endCount:
 			if ((ipt-startt) > longest)
 			{
-				longest = ipt-startt;
+				longest = (int)(ipt-startt);
 				*matchpos = reft;
 				*startpos = startt;
 			}
@@ -417,7 +425,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
 	BYTE* token;
 	// Encode Literal length
-	length = *ip - *anchor;
+	length = (int)(*ip - *anchor);
 	token = (*op)++;
 	if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
 	else *token = (length<<ML_BITS);
@@ -426,7 +434,7 @@ inline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** an
 	LZ4_BLINDCOPY(*anchor, *op, length);
 	// Encode Offset
-	LZ4_WRITE_LITTLEENDIAN_16(*op,*ip-ref);
+	LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
 	// Encode MatchLength
 	len = (int)(ml-MINMATCH);
@@ -519,8 +527,8 @@ _Search3:
 			int correction;
 			int new_ml = ml;
 			if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
-			if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = start2 - ip + ml2 - MINMATCH;
-			correction = new_ml - (start2 - ip);
+			if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+			correction = new_ml - (int)(start2 - ip);
 			if (correction > 0)
 			{
 				start2 += correction;
@@ -543,8 +551,8 @@ _Search3:
 				{
 					int correction;
 					if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-					if (ip+ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
-					correction = ml - (start2 - ip);
+					if (ip+ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+					correction = ml - (int)(start2 - ip);
 					if (correction > 0)
 					{
 						start2 += correction;
@@ -554,7 +562,7 @@ _Search3:
 				}
 				else
 				{
-					ml = start2 - ip;
+					ml = (int)(start2 - ip);
 				}
 			}
 			// Now, encode 2 sequences
@@ -570,7 +578,7 @@ _Search3:
 			{
 				if (start2 < ip+ml)
 				{
-					int correction = (ip+ml) - start2;
+					int correction = (int)(ip+ml - start2);
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -607,8 +615,8 @@ _Search3:
 			{
 				int correction;
 				if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
-				if (ip + ml > start2 + ml2 - MINMATCH) ml = start2 - ip + ml2 - MINMATCH;
-				correction = ml - (start2 - ip);
+				if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+				correction = ml - (int)(start2 - ip);
 				if (correction > 0)
 				{
 					start2 += correction;
@@ -618,7 +626,7 @@ _Search3:
 			}
 			else
 			{
-				ml = start2 - ip;
+				ml = (int)(start2 - ip);
 			}
 		}
 		LZ4_encodeSequence(&ip, &op, &anchor, ml, ref);
@@ -637,7 +645,7 @@ _Search3:
 	// Encode Last Literals
 	{
-		int lastRun = iend - anchor;
+		int lastRun = (int)(iend - anchor);
 		if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
 		else *op++ = (lastRun<<ML_BITS);
 		memcpy(op, anchor, iend - anchor);

data/ext/lz4ruby/lz4ruby.c CHANGED

@@ -4,109 +4,76 @@
 typedef int (*CompressFunc)(const char *source, char *dest, int isize);
-static VALUE lz4;
+static VALUE lz4internal;
 static VALUE lz4_error;
-static int encode_varbyte(int value, char *buf) {
-  buf[0] = value & 0x7f; value >>= 7;
-  if (value == 0) { return 1; }
-  buf[0] |= 0x80;
-  buf[1] = value & 0x7f; value >>= 7;
-  if (value == 0) { return 2; }
-  buf[1] |= 0x80;
-  buf[2] = value & 0x7f; value >>= 7;
-  if (value == 0) { return 3; }
-  buf[2] |= 0x80;
-  buf[3] = value & 0x7f; value >>= 7;
-  if (value == 0) { return 4; }
-  buf[3] |= 0x80;
-  buf[4] = value & 0x7f;
-  return 5;
-}
-static int decode_varbyte(const char *src, int len, int *value) {
-  if (len < 1) { return -1; }
-  *value = src[0] & 0x7f;
-  if ((src[0] & 0x80) == 0) { return 1; }
-  if (len < 2) { return -1; }
-  *value |= (src[1] & 0x7f) << 7;
-  if ((src[1] & 0x80) == 0) { return 2; }
-  if (len < 3) { return -1; }
-  *value |= (src[2] & 0x7f) << 14;
-  if ((src[2] & 0x80) == 0) { return 3; }
-  if (len < 4) { return -1; }
-  *value |= (src[3] & 0x7f) << 21;
-  if ((src[3] & 0x80) == 0) { return 4; }
-  if (len < 5) { return -1; }
-  *value |= (src[4] & 0x7f) << 28;
+/**
+ * LZ4Internal functions.
+ */
+static VALUE compress_internal(CompressFunc compressor, VALUE header, VALUE input, VALUE in_size) {
+  const char *src_p;
+  int src_size;
-  return 5;
-}
+  const char *header_p;
+  int header_size;
-static VALUE compress(CompressFunc compressor, VALUE self, VALUE source, VALUE src_size_prm) {
-  const char *src_p = NULL;
-  char varbyte[5];
-  char *buf = NULL;
   VALUE result;
-  int src_size;
-  int varbyte_len;
+  char *buf;
   int buf_size;
   int comp_size;
-  Check_Type(source, T_STRING);
-  src_p = RSTRING_PTR(source);
-  src_size = NUM2INT(src_size_prm);
+  Check_Type(input, T_STRING);
+  src_p = RSTRING_PTR(input);
+  src_size = NUM2INT(in_size);
   buf_size = LZ4_compressBound(src_size);
-  varbyte_len = encode_varbyte(src_size, varbyte);
+  Check_Type(header, T_STRING);
+  header_p = RSTRING_PTR(header);
+  header_size = RSTRING_LEN(header);
-  result = rb_str_new(NULL, buf_size + varbyte_len);
+  result = rb_str_new(NULL, buf_size + header_size);
   buf = RSTRING_PTR(result);
-  memcpy(buf, varbyte, varbyte_len);
+  memcpy(buf, header_p, header_size);
-  comp_size = compressor(src_p, buf + varbyte_len, src_size);
-  rb_str_resize(result, comp_size + varbyte_len);
+  comp_size = compressor(src_p, buf + header_size, src_size);
+  rb_str_resize(result, comp_size + header_size);
   return result;
 }
-static VALUE lz4_ruby_compress(VALUE self, VALUE source, VALUE src_size) {
-  return compress(LZ4_compress, self, source, src_size);
+static VALUE lz4internal_compress(VALUE self, VALUE header, VALUE input, VALUE in_size) {
+  return compress_internal(LZ4_compress, header, input, in_size);
 }
-static VALUE lz4_ruby_compressHC(VALUE self, VALUE source, VALUE src_size) {
-  return compress(LZ4_compressHC, self, source, src_size);
+static VALUE lz4internal_compressHC(VALUE self, VALUE header, VALUE input, VALUE in_size) {
+  return compress_internal(LZ4_compressHC, header, input, in_size);
 }
-static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
-  const char *src_p = NULL;
-  char *buf = NULL;
-  VALUE result;
+static VALUE lz4internal_uncompress(VALUE self, VALUE input, VALUE in_size, VALUE offset, VALUE out_size) {
+  const char *src_p;
   int src_size;
-  int varbyte_len;
-  int buf_size = 0;
+  int header_size;
+  VALUE result;
+  char *buf;
+  int buf_size;
   int read_bytes;
-  Check_Type(source, T_STRING);
-  src_p = RSTRING_PTR(source);
-  src_size = RSTRING_LEN(source);
+  Check_Type(input, T_STRING);
+  src_p = RSTRING_PTR(input);
+  src_size = NUM2INT(in_size);
-  varbyte_len = decode_varbyte(src_p, src_size, &buf_size);
+  header_size = NUM2INT(offset);
+  buf_size = NUM2INT(out_size);
   result = rb_str_new(NULL, buf_size);
   buf = RSTRING_PTR(result);
-  read_bytes = LZ4_uncompress(src_p + varbyte_len, buf, buf_size);
+  read_bytes = LZ4_uncompress_unknownOutputSize(src_p + header_size, buf, src_size - header_size, buf_size);
   if (read_bytes < 0) {
     rb_raise(lz4_error, "Compressed data is maybe corrupted.");
   }
@@ -115,11 +82,11 @@ static VALUE lz4_ruby_uncompress(VALUE self, VALUE source) {
 }
 void Init_lz4ruby(void) {
-  lz4 = rb_define_module("LZ4Native");
+  lz4internal = rb_define_module("LZ4Internal");
-  rb_define_module_function(lz4, "compress", lz4_ruby_compress, 2);
-  rb_define_module_function(lz4, "compressHC", lz4_ruby_compressHC, 2);
-  rb_define_module_function(lz4, "uncompress", lz4_ruby_uncompress, 1);
+  rb_define_module_function(lz4internal, "compress", lz4internal_compress, 3);
+  rb_define_module_function(lz4internal, "compressHC", lz4internal_compressHC, 3);
+  rb_define_module_function(lz4internal, "uncompress", lz4internal_uncompress, 4);
-  lz4_error = rb_define_class_under(lz4, "Error", rb_eStandardError);
+  lz4_error = rb_define_class_under(lz4internal, "Error", rb_eStandardError);
 }

data/lib/1.8/lz4ruby.so CHANGED

Binary file

data/lib/1.9/lz4ruby.so CHANGED

Binary file

data/lib/lz4-ruby.rb CHANGED

@@ -7,17 +7,66 @@ else
 end
 class LZ4
-  def self.compress(source, src_size = nil)
-    src_size = source.length if src_size == nil
-    return LZ4Native::compress(source, src_size)
+  def self.compress(input, in_size = nil)
+    return _compress(input, in_size, false)
   end
-  def self.compressHC(source, src_size = nil)
-    src_size = source.length if src_size == nil
-    return LZ4Native::compressHC(source, src_size)
+  def self.compressHC(input, in_size = nil)
+    return _compress(input, in_size, true)
   end
+  def self._compress(input, in_size, high_compression)
+    in_size = input.length if in_size == nil
+    header = encode_varbyte(in_size)
-  def self.uncompress(source)
-    return LZ4Native::uncompress(source)
+    if high_compression
+      return LZ4Internal.compressHC(header, input, in_size)
+    else
+      return LZ4Internal.compress(header, input, in_size)
+    end
+  end
+  def self.uncompress(input, in_size = nil)
+    in_size = input.length if in_size == nil
+    out_size, varbyte_len = decode_varbyte(input)
+    if out_size < 0 || varbyte_len < 0
+      raise "Compressed data is maybe corrupted"
+    end
+    return LZ4Internal::uncompress(input, in_size, varbyte_len, out_size)
+  end
+  def self.encode_varbyte(val)
+    varbytes = []
+    loop do
+      byte = val & 0x7f
+      val >>= 7
+      if val == 0
+        varbytes.push(byte)
+        break
+      else
+        varbytes.push(byte | 0x80)
+      end
+    end
+    return varbytes.pack("C*")
+  end
+  def self.decode_varbyte(text)
+    len = [text.length, 5].min
+    bytes = text[0, len].unpack("C*")
+    varbyte_len = 0
+    val = 0
+    bytes.each do |b|
+      val |= (b & 0x7f) << (7 * varbyte_len)
+      varbyte_len += 1
+      return val, varbyte_len if b & 0x80 == 0
+    end
+    return -1, -1
   end
 end

data/test/helper.rb CHANGED

@@ -15,7 +15,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext/lz4ruby'))
 $LOAD_PATH.unshift(File.dirname(__FILE__))
 build_native = <<EOS
-cd ext/lz4-ruby
+cd ext/lz4ruby
 ruby extconf.rb
 make clean
 make

data/test/test_lz4-ruby.rb CHANGED

@@ -3,15 +3,30 @@ require 'helper'
 class TestLz4Ruby < Test::Unit::TestCase
   LOOP_COUNT = 257
-  @@random = Random.new(123)
+  srand(123)
+  def self.random_bytes(len)
+    result = []
+    len.times do |t|
+      result << rand(256)
+    end
+    return result.pack("C*")
+  end
   context "LZ4::compress" do
     should "empty text" do
       compressed = LZ4::compress("")
       uncompressed = LZ4::uncompress(compressed)
-      assert_empty("", uncompressed)
+      assert_equal("", uncompressed)
     end
+    should "long text" do
+      text = "a" * 131073
+      compressed = LZ4.compress(text)
+      uncompressed = LZ4.uncompress(compressed)
+      assert_equal(text, uncompressed)
+    end
     LOOP_COUNT.times do |t|
       len = t + 1
       text = "a" * len
@@ -25,7 +40,7 @@ class TestLz4Ruby < Test::Unit::TestCase
     LOOP_COUNT.times do |t|
       len = t + 1
-      text = @@random.bytes(len)
+      text = random_bytes(len)
       should "random text of #{len} bytes" do
         compressed = LZ4::compress(text)
@@ -49,9 +64,16 @@ class TestLz4Ruby < Test::Unit::TestCase
     should "empty text" do
       compressed = LZ4::compressHC("")
       uncompressed = LZ4::uncompress(compressed)
-      assert_empty("", uncompressed)
+      assert_equal("", uncompressed)
     end
+    should "long text" do
+      text = "a" * 131073
+      compressed = LZ4.compressHC(text)
+      uncompressed = LZ4.uncompress(compressed)
+      assert_equal(text, uncompressed)
+    end
     LOOP_COUNT.times do |t|
       len = t + 1
       text = "a" * len
@@ -65,7 +87,7 @@ class TestLz4Ruby < Test::Unit::TestCase
     LOOP_COUNT.times do |t|
       len = t + 1
-      text = @@random.bytes(len)
+      text = random_bytes(len)
       should "random text of #{len} bytes" do
         compressed = LZ4::compressHC(text)

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: lz4-ruby
 version: !ruby/object:Gem::Version
-  hash: 23
+  hash: 21
   prerelease:
   segments:
   - 0
   - 1
-  - 6
-  version: 0.1.6
+  - 7
+  version: 0.1.7
 platform: x86-mingw32
 authors:
 - KOMIYA Atsushi
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-13 00:00:00 Z
+date: 2012-08-14 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   type: :development