RubyGems - snappy - Versions diffs - 0.4.0-java → 0.5.0-java - Mend

snappy 0.4.0-java → 0.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/.github/workflows/main.yml +1 -1
data/.github/workflows/publish.yml +7 -13
data/Dockerfile +1 -1
data/Gemfile +1 -0
data/README.md +3 -0
data/Rakefile +1 -1
data/lib/snappy/shim.rb +3 -23
data/lib/snappy/version.rb +1 -1
data/lib/snappy/writer.rb +1 -1
data/lib/snappy_ext.jar +0 -0
data/test/snappy_test.rb +29 -4
data/vendor/snappy/BUILD.bazel +211 -0
data/vendor/snappy/CMakeLists.txt +32 -10
data/vendor/snappy/MODULE.bazel +23 -0
data/vendor/snappy/NEWS +15 -0
data/vendor/snappy/README.md +4 -4
data/vendor/snappy/WORKSPACE +27 -0
data/vendor/snappy/WORKSPACE.bzlmod +0 -0
data/vendor/snappy/cmake/config.h.in +3 -0
data/vendor/snappy/snappy-internal.h +29 -0
data/vendor/snappy/snappy-stubs-internal.h +6 -0
data/vendor/snappy/snappy.cc +258 -37
data/vendor/snappy/snappy.h +41 -6
data/vendor/snappy/snappy_benchmark.cc +38 -18
data/vendor/snappy/snappy_compress_fuzzer.cc +18 -14
data/vendor/snappy/snappy_unittest.cc +13 -0
metadata +8 -7

data/vendor/snappy/snappy-internal.h CHANGED Viewed

@@ -31,6 +31,8 @@
 #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
 #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
+#include <utility>
 #include "snappy-stubs-internal.h"
 #if SNAPPY_HAVE_SSSE3
@@ -256,6 +258,8 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
       s2 += 8;
     }
   }
+  SNAPPY_PREFETCH(s1 + 64);
+  SNAPPY_PREFETCH(s2 + 64);
   // Find out how long the match is. We loop over the data 64 bits at a
   // time until we find a 64-bit block that doesn't match; then we find
@@ -330,6 +334,31 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
 }
 #endif
+static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
+                                          const char* s2_limit) {
+  // Implementation based on the x86-64 version, above.
+  assert(s2_limit >= s2);
+  int matched = 0;
+  while (s2 <= s2_limit - 8 &&
+         UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
+    s2 += 8;
+    matched += 8;
+  }
+  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
+    uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
+    int matching_bits = Bits::FindLSBSetNonZero64(x);
+    matched += matching_bits >> 3;
+    s2 += matching_bits >> 3;
+  } else {
+    while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    }
+  }
+  return matched;
+}
 // Lookup tables for decompression code.  Give --snappy_dump_decompression_table
 // to the unit test to recompute char_table.

data/vendor/snappy/snappy-stubs-internal.h CHANGED Viewed

@@ -105,6 +105,12 @@
 #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
 #endif  // HAVE_ATTRIBUTE_ALWAYS_INLINE
+#if HAVE_BUILTIN_PREFETCH
+#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
+#else
+#define SNAPPY_PREFETCH(ptr) (void)(ptr)
+#endif
 // Stubbed version of ABSL_FLAG.
 //
 // In the open source version, flags can only be changed at compile time.

data/vendor/snappy/snappy.cc CHANGED Viewed

@@ -68,18 +68,14 @@
 #include <arm_acle.h>
 #endif
-#if defined(__GNUC__)
-#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
-#else
-#define SNAPPY_PREFETCH(ptr) (void)(ptr)
-#endif
 #include <algorithm>
 #include <array>
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
 #include <cstring>
+#include <functional>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -180,6 +176,22 @@ inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
                                      (hash & mask));
 }
+inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes,
+                                      uint32_t mask) {
+  constexpr uint32_t kMagic = 2654435761U;
+  const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits);
+  return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
+                                     (hash & mask));
+}
+inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes,
+                                      uint32_t mask) {
+  constexpr uint64_t kMagic = 58295818150454627ULL;
+  const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits);
+  return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
+                                     (hash & mask));
+}
 }  // namespace
 size_t MaxCompressedLength(size_t source_bytes) {
@@ -936,6 +948,174 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
     }
   }
+emit_remainder:
+  // Emit the remaining bytes as a literal
+  if (ip < ip_end) {
+    op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
+  }
+  return op;
+}
+char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op,
+                                 uint16_t* table, const int table_size,
+                                 uint16_t* table2, const int table_size2) {
+  (void)table_size2;
+  assert(table_size == table_size2);
+  // "ip" is the input pointer, and "op" is the output pointer.
+  const char* ip = input;
+  assert(input_size <= kBlockSize);
+  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
+  const uint32_t mask = 2 * (table_size - 1);
+  const char* ip_end = input + input_size;
+  const char* base_ip = ip;
+  const size_t kInputMarginBytes = 15;
+  if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
+    const char* ip_limit = input + input_size - kInputMarginBytes;
+    for (;;) {
+      const char* next_emit = ip++;
+      uint64_t data = LittleEndian::Load64(ip);
+      uint32_t skip = 512;
+      const char* candidate;
+      uint32_t candidate_length;
+      while (true) {
+        assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
+        uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask);
+        uint32_t bytes_between_hash_lookups = skip >> 9;
+        skip++;
+        const char* next_ip = ip + bytes_between_hash_lookups;
+        if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
+          ip = next_emit;
+          goto emit_remainder;
+        }
+        candidate = base_ip + *table_entry2;
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+        *table_entry2 = ip - base_ip;
+        if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
+                                LittleEndian::Load32(candidate))) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          break;
+        }
+        uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask);
+        candidate = base_ip + *table_entry;
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+        *table_entry = ip - base_ip;
+        if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
+                                LittleEndian::Load32(candidate))) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          table_entry2 =
+              TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask);
+          auto candidate2 = base_ip + *table_entry2;
+          size_t candidate_length2 =
+              FindMatchLengthPlain(candidate2, ip + 1, ip_end);
+          if (candidate_length2 > candidate_length) {
+            *table_entry2 = ip - base_ip;
+            candidate = candidate2;
+            candidate_length = candidate_length2;
+            ++ip;
+          }
+          break;
+        }
+        data = LittleEndian::Load64(next_ip);
+        ip = next_ip;
+      }
+      // Backtrack to the point it matches fully.
+      while (ip > next_emit && candidate > base_ip &&
+             *(ip - 1) == *(candidate - 1)) {
+        --ip;
+        --candidate;
+        ++candidate_length;
+      }
+      *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) =
+          ip - base_ip + 1;
+      *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) =
+          ip - base_ip + 2;
+      *TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) =
+          ip - base_ip + 1;
+      // Step 2: A 4-byte or 8-byte match has been found.
+      // We'll later see if more than 4 bytes match.  But, prior to the match,
+      // input bytes [next_emit, ip) are unmatched.  Emit them as
+      // "literal bytes."
+      assert(next_emit + 16 <= ip_end);
+      if (ip - next_emit > 0) {
+        op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit,
+                                                   ip - next_emit);
+      }
+      // Step 3: Call EmitCopy, and then see if another EmitCopy could
+      // be our next move.  Repeat until we find no match for the
+      // input immediately after what was consumed by the last EmitCopy call.
+      //
+      // If we exit this loop normally then we need to call EmitLiteral next,
+      // though we don't yet know how big the literal will be.  We handle that
+      // by proceeding to the next iteration of the main loop.  We also can exit
+      // this loop via goto if we get close to exhausting the input.
+      do {
+        // We have a 4-byte match at ip, and no need to emit any
+        // "literal bytes" prior to ip.
+        const char* base = ip;
+        ip += candidate_length;
+        size_t offset = base - candidate;
+        if (candidate_length < 12) {
+          op =
+              EmitCopy</*len_less_than_12=*/true>(op, offset, candidate_length);
+        } else {
+          op = EmitCopy</*len_less_than_12=*/false>(op, offset,
+                                                    candidate_length);
+        }
+        if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // We are now looking for a 4-byte match again.  We read
+        // table[Hash(ip, mask)] for that. To improve compression,
+        // we also update several previous table entries.
+        if (ip - base_ip > 7) {
+          *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) =
+              ip - base_ip - 7;
+          *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) =
+              ip - base_ip - 4;
+        }
+        *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) =
+            ip - base_ip - 3;
+        *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) =
+            ip - base_ip - 2;
+        *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) =
+            ip - base_ip - 2;
+        *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) =
+            ip - base_ip - 1;
+        uint16_t* table_entry =
+            TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask);
+        candidate = base_ip + *table_entry;
+        *table_entry = ip - base_ip;
+        if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          continue;
+        }
+        table_entry =
+            TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask);
+        candidate = base_ip + *table_entry;
+        *table_entry = ip - base_ip;
+        if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
+          candidate_length =
+              FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
+          continue;
+        }
+        break;
+      } while (true);
+    }
+  }
 emit_remainder:
   // Emit the remaining bytes as a literal
   if (ip < ip_end) {
@@ -946,10 +1126,10 @@ emit_remainder:
 }
 }  // end namespace internal
-// Called back at avery compression call to trace parameters and sizes.
-static inline void Report(const char *algorithm, size_t compressed_size,
-                          size_t uncompressed_size) {
+static inline void Report(int token, const char *algorithm, size_t
+compressed_size, size_t uncompressed_size) {
   // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)token;
   (void)algorithm;
   (void)compressed_size;
   (void)uncompressed_size;
@@ -1234,16 +1414,21 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
         assert(tag == ip[-1]);
         // For literals tag_type = 0, hence we will always obtain 0 from
         // ExtractLowBytes. For literals offset will thus be kLiteralOffset.
-        ptrdiff_t len_min_offset = kLengthMinusOffset[tag];
+        ptrdiff_t len_minus_offset = kLengthMinusOffset[tag];
+        uint32_t next;
 #if defined(__aarch64__)
         size_t tag_type = AdvanceToNextTagARMOptimized(&ip, &tag);
+        // We never need more than 16 bits. Doing a Load16 allows the compiler
+        // to elide the masking operation in ExtractOffset.
+        next = LittleEndian::Load16(old_ip);
 #else
         size_t tag_type = AdvanceToNextTagX86Optimized(&ip, &tag);
+        next = LittleEndian::Load32(old_ip);
 #endif
-        uint32_t next = LittleEndian::Load32(old_ip);
-        size_t len = len_min_offset & 0xFF;
-        len_min_offset -= ExtractOffset(next, tag_type);
-        if (SNAPPY_PREDICT_FALSE(len_min_offset > 0)) {
+        size_t len = len_minus_offset & 0xFF;
+        ptrdiff_t extracted = ExtractOffset(next, tag_type);
+        ptrdiff_t len_min_offset = len_minus_offset - extracted;
+        if (SNAPPY_PREDICT_FALSE(len_minus_offset > extracted)) {
           if (SNAPPY_PREDICT_FALSE(len & 0x80)) {
             // Exceptional case (long literal or copy 4).
             // Actually doing the copy here is negatively impacting the main
@@ -1290,7 +1475,7 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
         DeferMemCopy(&deferred_src, &deferred_length, from, len);
       }
     } while (ip < ip_limit_min_slop &&
-             (op + deferred_length) < op_limit_min_slop);
+             static_cast<ptrdiff_t>(op + deferred_length) < op_limit_min_slop);
   exit:
     ip--;
     assert(ip <= ip_limit);
@@ -1314,7 +1499,7 @@ class SnappyDecompressor {
   // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
   // buffer.
   const char* ip_limit_min_maxtaglen_;
-  uint32_t peeked_;                  // Bytes peeked from reader (need to skip)
+  uint64_t peeked_;                  // Bytes peeked from reader (need to skip)
   bool eof_;                         // Hit end of input without an error?
   char scratch_[kMaximumTagLength];  // See RefillTag().
@@ -1505,7 +1690,8 @@ constexpr uint32_t CalculateNeeded(uint8_t tag) {
 #if __cplusplus >= 201402L
 constexpr bool VerifyCalculateNeeded() {
   for (int i = 0; i < 1; i++) {
-    if (CalculateNeeded(i) != (char_table[i] >> 11) + 1) return false;
+    if (CalculateNeeded(i) != static_cast<uint32_t>((char_table[i] >> 11)) + 1)
+      return false;
   }
   return true;
 }
@@ -1541,7 +1727,7 @@ bool SnappyDecompressor::RefillTag() {
   assert(needed <= sizeof(scratch_));
   // Read more bytes from reader if needed
-  uint32_t nbuf = ip_limit_ - ip;
+  uint64_t nbuf = ip_limit_ - ip;
   if (nbuf < needed) {
     // Stitch together bytes from ip and reader to form the word
     // contents.  We store the needed bytes in "scratch_".  They
@@ -1554,7 +1740,7 @@ bool SnappyDecompressor::RefillTag() {
       size_t length;
       const char* src = reader_->Peek(&length);
       if (length == 0) return false;
-      uint32_t to_add = std::min<uint32_t>(needed - nbuf, length);
+      uint64_t to_add = std::min<uint64_t>(needed - nbuf, length);
       std::memcpy(scratch_ + nbuf, src, to_add);
       nbuf += to_add;
       reader_->Skip(to_add);
@@ -1592,7 +1778,8 @@ template <typename Writer>
 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
                                       Writer* writer, uint32_t compressed_len,
                                       uint32_t uncompressed_len) {
-  Report("snappy_uncompress", compressed_len, uncompressed_len);
+    int token = 0;
+  Report(token, "snappy_uncompress", compressed_len, uncompressed_len);
   writer->SetExpectedLength(uncompressed_len);
@@ -1608,8 +1795,15 @@ bool GetUncompressedLength(Source* source, uint32_t* result) {
 }
 size_t Compress(Source* reader, Sink* writer) {
+  return Compress(reader, writer, CompressionOptions{});
+}
+size_t Compress(Source* reader, Sink* writer, CompressionOptions options) {
+  assert(options.level == 1 || options.level == 2);
+  int token = 0;
   size_t written = 0;
   size_t N = reader->Available();
+  assert(N <= 0xFFFFFFFFu);
   const size_t uncompressed_size = N;
   char ulength[Varint::kMax32];
   char* p = Varint::Encode32(ulength, N);
@@ -1654,17 +1848,23 @@ size_t Compress(Source* reader, Sink* writer) {
     uint16_t* table = wmem.GetHashTable(num_to_read, &table_size);
     // Compress input_fragment and append to dest
-    const int max_output = MaxCompressedLength(num_to_read);
-    // Need a scratch buffer for the output, in case the byte sink doesn't
-    // have room for us directly.
+    int max_output = MaxCompressedLength(num_to_read);
     // Since we encode kBlockSize regions followed by a region
     // which is <= kBlockSize in length, a previously allocated
     // scratch_output[] region is big enough for this iteration.
+    // Need a scratch buffer for the output, in case the byte sink doesn't
+    // have room for us directly.
     char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
-    char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
-                                           table_size);
+    char* end = nullptr;
+    if (options.level == 1) {
+      end = internal::CompressFragment(fragment, fragment_size, dest, table,
+                                       table_size);
+    } else if (options.level == 2) {
+      end = internal::CompressFragmentDoubleHash(
+          fragment, fragment_size, dest, table, table_size >> 1,
+          table + (table_size >> 1), table_size >> 1);
+    }
     writer->Append(dest, end - dest);
     written += (end - dest);
@@ -1672,8 +1872,7 @@ size_t Compress(Source* reader, Sink* writer) {
     reader->Skip(pending_advance);
   }
-  Report("snappy_compress", written, uncompressed_size);
+  Report(token, "snappy_compress", written, uncompressed_size);
   return written;
 }
@@ -1696,16 +1895,16 @@ class SnappyIOVecReader : public Source {
     if (total_size > 0 && curr_size_remaining_ == 0) Advance();
   }
-  ~SnappyIOVecReader() = default;
+  ~SnappyIOVecReader() override = default;
-  size_t Available() const { return total_size_remaining_; }
+  size_t Available() const override { return total_size_remaining_; }
-  const char* Peek(size_t* len) {
+  const char* Peek(size_t* len) override {
     *len = curr_size_remaining_;
     return curr_pos_;
   }
-  void Skip(size_t n) {
+  void Skip(size_t n) override {
     while (n >= curr_size_remaining_ && n > 0) {
       n -= curr_size_remaining_;
       Advance();
@@ -2108,9 +2307,15 @@ bool IsValidCompressed(Source* compressed) {
 void RawCompress(const char* input, size_t input_length, char* compressed,
                  size_t* compressed_length) {
+  RawCompress(input, input_length, compressed, compressed_length,
+              CompressionOptions{});
+}
+void RawCompress(const char* input, size_t input_length, char* compressed,
+                 size_t* compressed_length, CompressionOptions options) {
   ByteArraySource reader(input, input_length);
   UncheckedByteArraySink writer(compressed);
-  Compress(&reader, &writer);
+  Compress(&reader, &writer, options);
   // Compute how many bytes were added
   *compressed_length = (writer.CurrentDestination() - compressed);
@@ -2118,9 +2323,16 @@ void RawCompress(const char* input, size_t input_length, char* compressed,
 void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
                           char* compressed, size_t* compressed_length) {
+  RawCompressFromIOVec(iov, uncompressed_length, compressed, compressed_length,
+                       CompressionOptions{});
+}
+void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                          char* compressed, size_t* compressed_length,
+                          CompressionOptions options) {
   SnappyIOVecReader reader(iov, uncompressed_length);
   UncheckedByteArraySink writer(compressed);
-  Compress(&reader, &writer);
+  Compress(&reader, &writer, options);
   // Compute how many bytes were added.
   *compressed_length = writer.CurrentDestination() - compressed;
@@ -2128,18 +2340,28 @@ void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
 size_t Compress(const char* input, size_t input_length,
                 std::string* compressed) {
+  return Compress(input, input_length, compressed, CompressionOptions{});
+}
+size_t Compress(const char* input, size_t input_length, std::string* compressed,
+                CompressionOptions options) {
   // Pre-grow the buffer to the max length of the compressed output
   STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
   size_t compressed_length;
   RawCompress(input, input_length, string_as_array(compressed),
-              &compressed_length);
+              &compressed_length, options);
   compressed->erase(compressed_length);
   return compressed_length;
 }
 size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
                          std::string* compressed) {
+  return CompressFromIOVec(iov, iov_cnt, compressed, CompressionOptions{});
+}
+size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                         std::string* compressed, CompressionOptions options) {
   // Compute the number of bytes to be compressed.
   size_t uncompressed_length = 0;
   for (size_t i = 0; i < iov_cnt; ++i) {
@@ -2152,7 +2374,7 @@ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
   size_t compressed_length;
   RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
-                       &compressed_length);
+                       &compressed_length, options);
   compressed->erase(compressed_length);
   return compressed_length;
 }
@@ -2342,7 +2564,6 @@ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
 class SnappySinkAllocator {
  public:
   explicit SnappySinkAllocator(Sink* dest) : dest_(dest) {}
-  ~SnappySinkAllocator() {}
   char* Allocate(int size) {
     Datablock block(new char[size], size);

data/vendor/snappy/snappy.h CHANGED Viewed

@@ -50,13 +50,38 @@ namespace snappy {
   class Source;
   class Sink;
+  struct CompressionOptions {
+    // Compression level.
+    // Level 1 is the fastest
+    // Level 2 is a little slower but provides better compression. Level 2 is
+    // **EXPERIMENTAL** for the time being. It might happen that we decide to
+    // fall back to level 1 in the future.
+    // Levels 3+ are currently not supported. We plan to support levels up to
+    // 9 in the future.
+    // If you played with other compression algorithms, level 1 is equivalent to
+    // fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
+    // and compresses somewhere around zstd:-3 and zstd:-2 but generally with
+    // faster decompression speeds than snappy:1 and zstd:-3.
+    int level = DefaultCompressionLevel();
+    constexpr CompressionOptions() = default;
+    constexpr CompressionOptions(int compression_level)
+        : level(compression_level) {}
+    static constexpr int MinCompressionLevel() { return 1; }
+    static constexpr int MaxCompressionLevel() { return 2; }
+    static constexpr int DefaultCompressionLevel() { return 1; }
+  };
   // ------------------------------------------------------------------------
   // Generic compression/decompression routines.
   // ------------------------------------------------------------------------
-  // Compress the bytes read from "*source" and append to "*sink". Return the
+  // Compress the bytes read from "*reader" and append to "*writer". Return the
   // number of bytes written.
-  size_t Compress(Source* source, Sink* sink);
+  // First version is to preserve ABI.
+  size_t Compress(Source* reader, Sink* writer);
+  size_t Compress(Source* reader, Sink* writer,
+                  CompressionOptions options);
   // Find the uncompressed length of the given stream, as given by the header.
   // Note that the true length could deviate from this; the stream could e.g.
@@ -75,15 +100,22 @@ namespace snappy {
   // Original contents of *compressed are lost.
   //
   // REQUIRES: "input[]" is not an alias of "*compressed".
+  // First version is to preserve ABI.
   size_t Compress(const char* input, size_t input_length,
                   std::string* compressed);
+  size_t Compress(const char* input, size_t input_length,
+                  std::string* compressed, CompressionOptions options);
   // Same as `Compress` above but taking an `iovec` array as input. Note that
   // this function preprocesses the inputs to compute the sum of
   // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
   // `RawCompressFromIOVec` below.
+  // First version is to preserve ABI.
   size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
                            std::string* compressed);
+  size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
+                           std::string* compressed,
+                           CompressionOptions options);
   // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
   // Original contents of "*uncompressed" are lost.
@@ -126,16 +158,19 @@ namespace snappy {
   //    RawCompress(input, input_length, output, &output_length);
   //    ... Process(output, output_length) ...
   //    delete [] output;
-  void RawCompress(const char* input,
-                   size_t input_length,
-                   char* compressed,
+  void RawCompress(const char* input, size_t input_length, char* compressed,
                    size_t* compressed_length);
+  void RawCompress(const char* input, size_t input_length, char* compressed,
+                   size_t* compressed_length, CompressionOptions options);
   // Same as `RawCompress` above but taking an `iovec` array as input. Note that
   // `uncompressed_length` is the total number of bytes to be read from the
   // elements of `iov` (_not_ the number of elements in `iov`).
   void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
                             char* compressed, size_t* compressed_length);
+  void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
+                            char* compressed, size_t* compressed_length,
+                            CompressionOptions options);
   // Given data in "compressed[0..compressed_length-1]" generated by
   // calling the Snappy::Compress routine, this routine
@@ -215,7 +250,7 @@ namespace snappy {
   static constexpr int kMinHashTableBits = 8;
   static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
-  static constexpr int kMaxHashTableBits = 14;
+  static constexpr int kMaxHashTableBits = 15;
   static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
 }  // end namespace snappy