RubyGems - snappy - Versions diffs - 0.0.17-java → 0.1.0-java - Mend

snappy 0.0.17-java → 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +5 -5
data/.travis.yml +6 -1
data/Gemfile +5 -0
data/README.md +28 -4
data/Rakefile +1 -0
data/ext/extconf.rb +21 -16
data/lib/snappy.rb +1 -0
data/lib/snappy/hadoop.rb +22 -0
data/lib/snappy/hadoop/reader.rb +58 -0
data/lib/snappy/hadoop/writer.rb +51 -0
data/lib/snappy/reader.rb +4 -4
data/lib/snappy/version.rb +1 -1
data/smoke.sh +1 -1
data/snappy.gemspec +0 -4
data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
data/test/test-snappy-hadoop.rb +22 -0
data/vendor/snappy/CMakeLists.txt +174 -0
data/vendor/snappy/CONTRIBUTING.md +26 -0
data/vendor/snappy/NEWS +32 -0
data/vendor/snappy/{README → README.md} +13 -3
data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
data/vendor/snappy/cmake/config.h.in +62 -0
data/vendor/snappy/snappy-internal.h +9 -12
data/vendor/snappy/snappy-stubs-internal.h +63 -30
data/vendor/snappy/snappy-stubs-public.h.in +13 -19
data/vendor/snappy/snappy-test.cc +10 -7
data/vendor/snappy/snappy-test.h +12 -38
data/vendor/snappy/snappy.cc +81 -30
data/vendor/snappy/snappy_unittest.cc +16 -137
metadata +18 -54
data/vendor/snappy/ChangeLog +0 -2468
data/vendor/snappy/Makefile.am +0 -26
data/vendor/snappy/autogen.sh +0 -12
data/vendor/snappy/configure.ac +0 -134
data/vendor/snappy/m4/gtest.m4 +0 -74
data/vendor/snappy/snappy.pc.in +0 -10

data/vendor/snappy/snappy-stubs-internal.h CHANGED

@@ -45,6 +45,14 @@
 #include <sys/mman.h>
 #endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif  // defined(_MSC_VER)
 #include "snappy-stubs-public.h"
 #if defined(__x86_64__)
@@ -52,6 +60,14 @@
 // Enable 64-bit optimized versions of some routines.
 #define ARCH_K8 1
+#elif defined(__ppc64__)
+#define ARCH_PPC 1
+#elif defined(__aarch64__)
+#define ARCH_ARM 1
 #endif
 // Needed by OS X, among others.
@@ -59,10 +75,6 @@
 #define MAP_ANONYMOUS MAP_ANON
 #endif
-// Pull in std::min, std::ostream, and the likes. This is safe because this
-// header file is never used from any public header files.
-using namespace std;
 // The size of an array, if known at compile-time.
 // Will give unexpected results if used on a pointer.
 // We undefine it first, since some compilers already have a definition.
@@ -73,11 +85,11 @@ using namespace std;
 // Static prediction hints.
 #ifdef HAVE_BUILTIN_EXPECT
-#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
-#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
 #else
-#define PREDICT_FALSE(x) x
-#define PREDICT_TRUE(x) x
+#define SNAPPY_PREDICT_FALSE(x) x
+#define SNAPPY_PREDICT_TRUE(x) x
 #endif
 // This is only used for recomputing the tag byte table used during
@@ -96,9 +108,10 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
 // Potentially unaligned loads and stores.
-// x86 and PowerPC can simply do these loads and stores native.
+// x86, PowerPC, and ARM64 can simply do these loads and stores native.
-#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
+    defined(__aarch64__)
 #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
 #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
@@ -225,22 +238,8 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
 #endif
-// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
-// on some platforms, in particular ARM.
-inline void UnalignedCopy64(const void *src, void *dst) {
-  if (sizeof(void *) == 8) {
-    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
-  } else {
-    const char *src_char = reinterpret_cast<const char *>(src);
-    char *dst_char = reinterpret_cast<char *>(dst);
-    UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
-    UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
-  }
-}
 // The following guarantees declaration of the byte swap functions.
-#ifdef WORDS_BIGENDIAN
+#if defined(SNAPPY_IS_BIG_ENDIAN)
 #ifdef HAVE_SYS_BYTEORDER_H
 #include <sys/byteorder.h>
@@ -297,7 +296,7 @@ inline uint64 bswap_64(uint64 x) {
 #endif
-#endif  // WORDS_BIGENDIAN
+#endif  // defined(SNAPPY_IS_BIG_ENDIAN)
 // Convert to little-endian storage, opposite of network format.
 // Convert x from host to little endian: x = LittleEndian.FromHost(x);
@@ -311,7 +310,7 @@ inline uint64 bswap_64(uint64 x) {
 class LittleEndian {
  public:
   // Conversion functions.
-#ifdef WORDS_BIGENDIAN
+#if defined(SNAPPY_IS_BIG_ENDIAN)
   static uint16 FromHost16(uint16 x) { return bswap_16(x); }
   static uint16 ToHost16(uint16 x) { return bswap_16(x); }
@@ -321,7 +320,7 @@ class LittleEndian {
   static bool IsLittleEndian() { return false; }
-#else  // !defined(WORDS_BIGENDIAN)
+#else  // !defined(SNAPPY_IS_BIG_ENDIAN)
   static uint16 FromHost16(uint16 x) { return x; }
   static uint16 ToHost16(uint16 x) { return x; }
@@ -331,7 +330,7 @@ class LittleEndian {
   static bool IsLittleEndian() { return true; }
-#endif  // !defined(WORDS_BIGENDIAN)
+#endif  // !defined(SNAPPY_IS_BIG_ENDIAN)
   // Functions to do unaligned loads and stores in little-endian order.
   static uint16 Load16(const void *p) {
@@ -361,10 +360,15 @@ class Bits {
   // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
   // that it's 0-indexed.
   static int FindLSBSetNonZero(uint32 n);
+#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
   static int FindLSBSetNonZero64(uint64 n);
+#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
  private:
-  DISALLOW_COPY_AND_ASSIGN(Bits);
+  // No copying
+  Bits(const Bits&);
+  void operator=(const Bits&);
 };
 #ifdef HAVE_BUILTIN_CTZ
@@ -377,9 +381,36 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
   return __builtin_ctz(n);
 }
+#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
 inline int Bits::FindLSBSetNonZero64(uint64 n) {
   return __builtin_ctzll(n);
 }
+#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
+#elif defined(_MSC_VER)
+inline int Bits::Log2Floor(uint32 n) {
+  unsigned long where;
+  if (_BitScanReverse(&where, n)) {
+    return where;
+  } else {
+    return -1;
+  }
+}
+inline int Bits::FindLSBSetNonZero(uint32 n) {
+  unsigned long where;
+  if (_BitScanForward(&where, n)) return static_cast<int>(where);
+  return 32;
+}
+#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
+  unsigned long where;
+  if (_BitScanForward64(&where, n)) return static_cast<int>(where);
+  return 64;
+}
+#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
 #else  // Portable versions.
@@ -413,6 +444,7 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
   return rc;
 }
+#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
 // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
 inline int Bits::FindLSBSetNonZero64(uint64 n) {
   const uint32 bottombits = static_cast<uint32>(n);
@@ -423,6 +455,7 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) {
     return FindLSBSetNonZero(bottombits);
   }
 }
+#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
 #endif  // End portable versions.

data/vendor/snappy/snappy-stubs-public.h.in CHANGED

@@ -36,21 +36,21 @@
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
-#if @ac_cv_have_stdint_h@
+#if ${HAVE_STDINT_H_01}  // HAVE_STDINT_H
 #include <stdint.h>
-#endif
+#endif  // HAVE_STDDEF_H
-#if @ac_cv_have_stddef_h@
+#if ${HAVE_STDDEF_H_01}  // HAVE_STDDEF_H
 #include <stddef.h>
-#endif
+#endif  // HAVE_STDDEF_H
-#if @ac_cv_have_sys_uio_h@
+#if ${HAVE_SYS_UIO_H_01}  // HAVE_SYS_UIO_H
 #include <sys/uio.h>
-#endif
+#endif  // HAVE_SYS_UIO_H
-#define SNAPPY_MAJOR @SNAPPY_MAJOR@
-#define SNAPPY_MINOR @SNAPPY_MINOR@
-#define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@
+#define SNAPPY_MAJOR ${SNAPPY_MAJOR}
+#define SNAPPY_MINOR ${SNAPPY_MINOR}
+#define SNAPPY_PATCHLEVEL ${SNAPPY_PATCHLEVEL}
 #define SNAPPY_VERSION \
     ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
@@ -58,7 +58,7 @@
 namespace snappy {
-#if @ac_cv_have_stdint_h@
+#if ${HAVE_STDINT_H_01}  // HAVE_STDINT_H
 typedef int8_t int8;
 typedef uint8_t uint8;
 typedef int16_t int16;
@@ -76,24 +76,18 @@ typedef int int32;
 typedef unsigned int uint32;
 typedef long long int64;
 typedef unsigned long long uint64;
-#endif
+#endif  // HAVE_STDINT_H
 typedef std::string string;
-#ifndef DISALLOW_COPY_AND_ASSIGN
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&);               \
-  void operator=(const TypeName&)
-#endif
-#if !@ac_cv_have_sys_uio_h@
+#if !${HAVE_SYS_UIO_H_01}  // !HAVE_SYS_UIO_H
 // Windows does not have an iovec type, yet the concept is universally useful.
 // It is simple to define it ourselves, so we put it inside our own namespace.
 struct iovec {
 	void* iov_base;
 	size_t iov_len;
 };
-#endif
+#endif  // !HAVE_SYS_UIO_H
 }  // namespace snappy

data/vendor/snappy/snappy-test.cc CHANGED

@@ -33,6 +33,9 @@
 #endif
 #ifdef HAVE_WINDOWS_H
+// Needed to be able to use std::max without workarounds in the source code.
+// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
+#define NOMINMAX
 #include <windows.h>
 #endif
@@ -201,7 +204,7 @@ void Benchmark::Run() {
     if (benchmark_real_time_us > 0) {
       num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
     }
-    num_iterations = max(num_iterations, kCalibrateIterations);
+    num_iterations = std::max(num_iterations, kCalibrateIterations);
     BenchmarkRun benchmark_runs[kNumRuns];
     for (int run = 0; run < kNumRuns; ++run) {
@@ -217,10 +220,10 @@ void Benchmark::Run() {
     string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num);
     string human_readable_speed;
-    nth_element(benchmark_runs,
-                benchmark_runs + kMedianPos,
-                benchmark_runs + kNumRuns,
-                BenchmarkCompareCPUTime());
+    std::nth_element(benchmark_runs,
+                     benchmark_runs + kMedianPos,
+                     benchmark_runs + kNumRuns,
+                     BenchmarkCompareCPUTime());
     int64 real_time_us = benchmark_runs[kMedianPos].real_time_us;
     int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
     if (cpu_time_us <= 0) {
@@ -523,8 +526,8 @@ int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
     LOG(WARNING)
       << "UncompressChunkOrAll: Received some extra data, bytes total: "
       << uncomp_stream_.avail_in << " bytes: "
-      << string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
-                min(int(uncomp_stream_.avail_in), 20));
+      << std::string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
+                     std::min(int(uncomp_stream_.avail_in), 20));
     UncompressErrorInit();
     return Z_DATA_ERROR;       // what's the extra data for?
   } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {

data/vendor/snappy/snappy-test.h CHANGED

@@ -110,26 +110,8 @@
 #include "lzo/lzo1x.h"
 #endif
-#ifdef HAVE_LIBLZF
-extern "C" {
-#include "lzf.h"
-}
-#endif
-#ifdef HAVE_LIBFASTLZ
-#include "fastlz.h"
-#endif
-#ifdef HAVE_LIBQUICKLZ
-#include "quicklz.h"
-#endif
 namespace {
-namespace File {
-  void Init() { }
-}  // namespace File
 namespace file {
   int Defaults() { return 0; }
@@ -138,7 +120,8 @@ namespace file {
     void CheckSuccess() { }
   };
-  DummyStatus GetContents(const string& filename, string* data, int unused) {
+  DummyStatus GetContents(
+      const std::string& filename, std::string* data, int unused) {
     FILE* fp = fopen(filename.c_str(), "rb");
     if (fp == NULL) {
       perror(filename.c_str());
@@ -153,7 +136,7 @@ namespace file {
         perror("fread");
         exit(1);
       }
-      data->append(string(buf, ret));
+      data->append(std::string(buf, ret));
     }
     fclose(fp);
@@ -161,9 +144,8 @@ namespace file {
     return DummyStatus();
   }
-  DummyStatus SetContents(const string& filename,
-                          const string& str,
-                          int unused) {
+  inline DummyStatus SetContents(
+      const std::string& filename, const std::string& str, int unused) {
     FILE* fp = fopen(filename.c_str(), "wb");
     if (fp == NULL) {
       perror(filename.c_str());
@@ -467,7 +449,7 @@ class ZLib {
 DECLARE_bool(run_microbenchmarks);
-static void RunSpecifiedBenchmarks() {
+static inline void RunSpecifiedBenchmarks() {
   if (!FLAGS_run_microbenchmarks) {
     return;
   }
@@ -515,10 +497,6 @@ static inline int RUN_ALL_TESTS() {
 // For main().
 namespace snappy {
-static void CompressFile(const char* fname);
-static void UncompressFile(const char* fname);
-static void MeasureFile(const char* fname);
 // Logging.
 #define LOG(level) LogMessage()
@@ -529,15 +507,15 @@ class LogMessage {
  public:
   LogMessage() { }
   ~LogMessage() {
-    cerr << endl;
+    std::cerr << std::endl;
   }
   LogMessage& operator<<(const std::string& msg) {
-    cerr << msg;
+    std::cerr << msg;
     return *this;
   }
   LogMessage& operator<<(int x) {
-    cerr << x;
+    std::cerr << x;
     return *this;
   }
 };
@@ -546,7 +524,7 @@ class LogMessage {
 // and ones that are always active.
 #define CRASH_UNLESS(condition) \
-    PREDICT_TRUE(condition) ? (void)0 : \
+    SNAPPY_PREDICT_TRUE(condition) ? (void)0 : \
     snappy::LogMessageVoidify() & snappy::LogMessageCrash()
 #ifdef _MSC_VER
@@ -560,7 +538,7 @@ class LogMessageCrash : public LogMessage {
  public:
   LogMessageCrash() { }
   ~LogMessageCrash() {
-    cerr << endl;
+    std::cerr << std::endl;
     abort();
   }
 };
@@ -590,10 +568,6 @@ class LogMessageVoidify {
 #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
 #define CHECK_OK(cond) (cond).CheckSuccess()
-}  // namespace
-using snappy::CompressFile;
-using snappy::UncompressFile;
-using snappy::MeasureFile;
+}  // namespace snappy
 #endif  // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_

data/vendor/snappy/snappy.cc CHANGED

@@ -30,7 +30,16 @@
 #include "snappy-internal.h"
 #include "snappy-sinksource.h"
-#if defined(__x86_64__) || defined(_M_X64)
+#ifndef SNAPPY_HAVE_SSE2
+#if defined(__SSE2__) || defined(_M_X64) || \
+    (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+#define SNAPPY_HAVE_SSE2 1
+#else
+#define SNAPPY_HAVE_SSE2 0
+#endif
+#endif
+#if SNAPPY_HAVE_SSE2
 #include <emmintrin.h>
 #endif
 #include <stdio.h>
@@ -47,7 +56,6 @@ using internal::COPY_2_BYTE_OFFSET;
 using internal::LITERAL;
 using internal::char_table;
 using internal::kMaximumTagLength;
-using internal::wordmask;
 // Any hash function will produce a valid compressed bitstream, but a good
 // hash function reduces the number of collisions and thus yields better
@@ -89,17 +97,21 @@ size_t MaxCompressedLength(size_t source_len) {
 namespace {
 void UnalignedCopy64(const void* src, void* dst) {
-  memcpy(dst, src, 8);
+  char tmp[8];
+  memcpy(tmp, src, 8);
+  memcpy(dst, tmp, 8);
 }
 void UnalignedCopy128(const void* src, void* dst) {
   // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
   // SSE2 moves for memcpy(dst, src, 16).
-#ifdef __SSE2__
+#if SNAPPY_HAVE_SSE2
   __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
   _mm_storeu_si128(static_cast<__m128i*>(dst), x);
 #else
-  memcpy(dst, src, 16);
+  char tmp[16];
+  memcpy(tmp, src, 16);
+  memcpy(dst, tmp, 16);
 #endif
 }
@@ -163,7 +175,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
   // copying 2x 8 bytes at a time.
   // Handle the uncommon case where pattern is less than 8 bytes.
-  if (PREDICT_FALSE(pattern_size < 8)) {
+  if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
     // Expand pattern to at least 8 bytes. The worse case scenario in terms of
     // buffer usage is when the pattern is size 3. ^ is the original position
     // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
@@ -173,13 +185,13 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
     // abcabcabcabcxxxxx
     //    ^
     // The last x is 14 bytes after ^.
-    if (PREDICT_TRUE(op <= buf_limit - 14)) {
+    if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
       while (pattern_size < 8) {
         UnalignedCopy64(src, op);
         op += pattern_size;
         pattern_size *= 2;
       }
-      if (PREDICT_TRUE(op >= op_limit)) return op_limit;
+      if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
     } else {
       return IncrementalCopySlow(src, op, op_limit);
     }
@@ -195,11 +207,11 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
     UnalignedCopy64(src + 8, op + 8);
     src += 16;
     op += 16;
-    if (PREDICT_TRUE(op >= op_limit)) return op_limit;
+    if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
   }
   // We only take this branch if we didn't have enough slop and we can do a
   // single 8 byte copy.
-  if (PREDICT_FALSE(op <= buf_limit - 8)) {
+  if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
     UnalignedCopy64(src, op);
     src += 8;
     op += 8;
@@ -261,7 +273,7 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
   assert(offset < 65536);
   assert(len_less_than_12 == (len < 12));
-  if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
+  if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
     // offset fits in 11 bits.  The 3 highest go in the top of the first byte,
     // and the rest go in the second byte.
     *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
@@ -286,7 +298,7 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len,
     // it's in the noise.
     // Emit 64 byte copies but make sure to keep at least four bytes reserved.
-    while (PREDICT_FALSE(len >= 68)) {
+    while (SNAPPY_PREDICT_FALSE(len >= 68)) {
       op = EmitCopyAtMost64(op, offset, 64, false);
       len -= 64;
     }
@@ -415,7 +427,7 @@ char* CompressFragment(const char* input,
   const char* next_emit = ip;
   const size_t kInputMarginBytes = 15;
-  if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
+  if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
     const char* ip_limit = input + input_size - kInputMarginBytes;
     for (uint32 next_hash = Hash(++ip, shift); ; ) {
@@ -456,7 +468,7 @@ char* CompressFragment(const char* input,
         uint32 bytes_between_hash_lookups = skip >> 5;
         skip += bytes_between_hash_lookups;
         next_ip = ip + bytes_between_hash_lookups;
-        if (PREDICT_FALSE(next_ip > ip_limit)) {
+        if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
           goto emit_remainder;
         }
         next_hash = Hash(next_ip, shift);
@@ -465,8 +477,8 @@ char* CompressFragment(const char* input,
         assert(candidate < ip);
         table[hash] = ip - base_ip;
-      } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
-                            UNALIGNED_LOAD32(candidate)));
+      } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
+                                 UNALIGNED_LOAD32(candidate)));
       // Step 2: A 4-byte match has been found.  We'll later see if more
       // than 4 bytes match.  But, prior to the match, input
@@ -497,7 +509,7 @@ char* CompressFragment(const char* input,
         assert(0 == memcmp(base, candidate, matched));
         op = EmitCopy(op, offset, matched, p.second);
         next_emit = ip;
-        if (PREDICT_FALSE(ip >= ip_limit)) {
+        if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
           goto emit_remainder;
         }
         // We are now looking for a 4-byte match again.  We read
@@ -527,6 +539,10 @@ char* CompressFragment(const char* input,
 }
 }  // end namespace internal
+// Called back at avery compression call to trace parameters and sizes.
+static inline void Report(const char *algorithm, size_t compressed_size,
+                          size_t uncompressed_size) {}
 // Signature of output types needed by decompression code.
 // The decompression code is templatized on a type that obeys this
 // signature so that we do not pay virtual function call overhead in
@@ -567,6 +583,14 @@ char* CompressFragment(const char* input,
 //   bool TryFastAppend(const char* ip, size_t available, size_t length);
 // };
+namespace internal {
+// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
+static const uint32 wordmask[] = {
+  0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
+};
+}  // end namespace internal
 // Helper class for decompression
 class SnappyDecompressor {
@@ -638,7 +662,16 @@ class SnappyDecompressor {
     // For position-independent executables, accessing global arrays can be
     // slow.  Move wordmask array onto the stack to mitigate this.
     uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
-    memcpy(wordmask, internal::wordmask, sizeof(wordmask));
+    // Do not use memcpy to copy internal::wordmask to
+    // wordmask.  LLVM converts stack arrays to global arrays if it detects
+    // const stack arrays and this hurts the performance of position
+    // independent code. This change is temporary and can be reverted when
+    // https://reviews.llvm.org/D30759 is approved.
+    wordmask[0] = internal::wordmask[0];
+    wordmask[1] = internal::wordmask[1];
+    wordmask[2] = internal::wordmask[2];
+    wordmask[3] = internal::wordmask[3];
+    wordmask[4] = internal::wordmask[4];
     // We could have put this refill fragment only at the beginning of the loop.
     // However, duplicating it at the end of each branch gives the compiler more
@@ -652,6 +685,13 @@ class SnappyDecompressor {
         }
     MAYBE_REFILL();
+    // Add loop alignment directive. Without this directive, we observed
+    // significant performance degradation on several intel architectures
+    // in snappy benchmark built with LLVM. The degradation was caused by
+    // increased branch miss prediction.
+#if defined(__clang__) && defined(__x86_64__)
+    asm volatile (".p2align 5");
+#endif
     for ( ;; ) {
       const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
@@ -667,7 +707,7 @@ class SnappyDecompressor {
       // txt[1-4]        25%        75%
       // pb              24%        76%
       // bin             24%        76%
-      if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
+      if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
         size_t literal_length = (c >> 2) + 1u;
         if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
           assert(literal_length < 61);
@@ -677,7 +717,7 @@ class SnappyDecompressor {
           // bytes in addition to the literal.
           continue;
         }
-        if (PREDICT_FALSE(literal_length >= 61)) {
+        if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
           // Long literal.
           const size_t literal_length_length = literal_length - 60;
           literal_length =
@@ -757,7 +797,7 @@ bool SnappyDecompressor::RefillTag() {
       size_t length;
       const char* src = reader_->Peek(&length);
       if (length == 0) return false;
-      uint32 to_add = min<uint32>(needed - nbuf, length);
+      uint32 to_add = std::min<uint32>(needed - nbuf, length);
       memcpy(scratch_ + nbuf, src, to_add);
       nbuf += to_add;
       reader_->Skip(to_add);
@@ -786,13 +826,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
   SnappyDecompressor decompressor(r);
   uint32 uncompressed_len = 0;
   if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
-  return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
+  return InternalUncompressAllTags(&decompressor, writer, r->Available(),
+                                   uncompressed_len);
 }
 template <typename Writer>
 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
                                       Writer* writer,
+                                      uint32 compressed_len,
                                       uint32 uncompressed_len) {
+  Report("snappy_uncompress", compressed_len, uncompressed_len);
   writer->SetExpectedLength(uncompressed_len);
   // Process the entire input
@@ -809,6 +854,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
 size_t Compress(Source* reader, Sink* writer) {
   size_t written = 0;
   size_t N = reader->Available();
+  const size_t uncompressed_size = N;
   char ulength[Varint::kMax32];
   char* p = Varint::Encode32(ulength, N);
   writer->Append(ulength, p-ulength);
@@ -823,7 +869,7 @@ size_t Compress(Source* reader, Sink* writer) {
     size_t fragment_size;
     const char* fragment = reader->Peek(&fragment_size);
     assert(fragment_size != 0);  // premature end of input
-    const size_t num_to_read = min(N, kBlockSize);
+    const size_t num_to_read = std::min(N, kBlockSize);
     size_t bytes_read = fragment_size;
     size_t pending_advance = 0;
@@ -844,7 +890,7 @@ size_t Compress(Source* reader, Sink* writer) {
       while (bytes_read < num_to_read) {
         fragment = reader->Peek(&fragment_size);
-        size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
+        size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
         memcpy(scratch + bytes_read, fragment, n);
         bytes_read += n;
         reader->Skip(n);
@@ -881,6 +927,8 @@ size_t Compress(Source* reader, Sink* writer) {
     reader->Skip(pending_advance);
   }
+  Report("snappy_compress", written, uncompressed_size);
   delete[] scratch;
   delete[] scratch_output;
@@ -1313,7 +1361,8 @@ class SnappyScatteredWriter {
     char* const op_end = op_ptr_ + len;
     // See SnappyArrayWriter::AppendFromSelf for an explanation of
     // the "offset - 1u" trick.
-    if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
+    if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
+                          op_end <= op_limit_)) {
       // Fast path: src and dst in current block.
       op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
       return true;
@@ -1344,7 +1393,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
     }
     // Make new block
-    size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
+    size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
     op_base_ = allocator_.Allocate(bsize);
     op_ptr_ = op_base_;
     op_limit_ = op_base_ + bsize;
@@ -1401,7 +1450,7 @@ class SnappySinkAllocator {
     size_t size_written = 0;
     size_t block_size;
     for (int i = 0; i < blocks_.size(); ++i) {
-      block_size = min<size_t>(blocks_[i].size, size - size_written);
+      block_size = std::min<size_t>(blocks_[i].size, size - size_written);
       dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
                                     &SnappySinkAllocator::Deleter, NULL);
       size_written += block_size;
@@ -1446,18 +1495,20 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
   char* buf = uncompressed->GetAppendBufferVariable(
       1, uncompressed_len, &c, 1, &allocated_size);
+  const size_t compressed_len = compressed->Available();
   // If we can get a flat buffer, then use it, otherwise do block by block
   // uncompression
   if (allocated_size >= uncompressed_len) {
     SnappyArrayWriter writer(buf);
-    bool result = InternalUncompressAllTags(
-        &decompressor, &writer, uncompressed_len);
+    bool result = InternalUncompressAllTags(&decompressor, &writer,
+                                            compressed_len, uncompressed_len);
     uncompressed->Append(buf, writer.Produced());
     return result;
   } else {
     SnappySinkAllocator allocator(uncompressed);
     SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
-    return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
+    return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
+                                     uncompressed_len);
   }
 }