snappy 0.4.0-java → 0.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,8 @@
31
31
  #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
32
32
  #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
33
33
 
34
+ #include <utility>
35
+
34
36
  #include "snappy-stubs-internal.h"
35
37
 
36
38
  #if SNAPPY_HAVE_SSSE3
@@ -256,6 +258,8 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
256
258
  s2 += 8;
257
259
  }
258
260
  }
261
+ SNAPPY_PREFETCH(s1 + 64);
262
+ SNAPPY_PREFETCH(s2 + 64);
259
263
 
260
264
  // Find out how long the match is. We loop over the data 64 bits at a
261
265
  // time until we find a 64-bit block that doesn't match; then we find
@@ -330,6 +334,31 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
330
334
  }
331
335
  #endif
332
336
 
337
+ static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
338
+ const char* s2_limit) {
339
+ // Implementation based on the x86-64 version, above.
340
+ assert(s2_limit >= s2);
341
+ int matched = 0;
342
+
343
+ while (s2 <= s2_limit - 8 &&
344
+ UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
345
+ s2 += 8;
346
+ matched += 8;
347
+ }
348
+ if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
349
+ uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
350
+ int matching_bits = Bits::FindLSBSetNonZero64(x);
351
+ matched += matching_bits >> 3;
352
+ s2 += matching_bits >> 3;
353
+ } else {
354
+ while ((s2 < s2_limit) && (s1[matched] == *s2)) {
355
+ ++s2;
356
+ ++matched;
357
+ }
358
+ }
359
+ return matched;
360
+ }
361
+
333
362
  // Lookup tables for decompression code. Give --snappy_dump_decompression_table
334
363
  // to the unit test to recompute char_table.
335
364
 
@@ -105,6 +105,12 @@
105
105
  #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
106
106
  #endif // HAVE_ATTRIBUTE_ALWAYS_INLINE
107
107
 
108
+ #if HAVE_BUILTIN_PREFETCH
109
+ #define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
110
+ #else
111
+ #define SNAPPY_PREFETCH(ptr) (void)(ptr)
112
+ #endif
113
+
108
114
  // Stubbed version of ABSL_FLAG.
109
115
  //
110
116
  // In the open source version, flags can only be changed at compile time.
@@ -68,18 +68,14 @@
68
68
  #include <arm_acle.h>
69
69
  #endif
70
70
 
71
- #if defined(__GNUC__)
72
- #define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
73
- #else
74
- #define SNAPPY_PREFETCH(ptr) (void)(ptr)
75
- #endif
76
-
77
71
  #include <algorithm>
78
72
  #include <array>
79
73
  #include <cstddef>
80
74
  #include <cstdint>
81
75
  #include <cstdio>
82
76
  #include <cstring>
77
+ #include <functional>
78
+ #include <memory>
83
79
  #include <string>
84
80
  #include <utility>
85
81
  #include <vector>
@@ -180,6 +176,22 @@ inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
180
176
  (hash & mask));
181
177
  }
182
178
 
179
+ inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes,
180
+ uint32_t mask) {
181
+ constexpr uint32_t kMagic = 2654435761U;
182
+ const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits);
183
+ return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
184
+ (hash & mask));
185
+ }
186
+
187
+ inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes,
188
+ uint32_t mask) {
189
+ constexpr uint64_t kMagic = 58295818150454627ULL;
190
+ const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits);
191
+ return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
192
+ (hash & mask));
193
+ }
194
+
183
195
  } // namespace
184
196
 
185
197
  size_t MaxCompressedLength(size_t source_bytes) {
@@ -936,6 +948,174 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
936
948
  }
937
949
  }
938
950
 
951
+ emit_remainder:
952
+ // Emit the remaining bytes as a literal
953
+ if (ip < ip_end) {
954
+ op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
955
+ }
956
+
957
+ return op;
958
+ }
959
+
960
+ char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op,
961
+ uint16_t* table, const int table_size,
962
+ uint16_t* table2, const int table_size2) {
963
+ (void)table_size2;
964
+ assert(table_size == table_size2);
965
+ // "ip" is the input pointer, and "op" is the output pointer.
966
+ const char* ip = input;
967
+ assert(input_size <= kBlockSize);
968
+ assert((table_size & (table_size - 1)) == 0); // table must be power of two
969
+ const uint32_t mask = 2 * (table_size - 1);
970
+ const char* ip_end = input + input_size;
971
+ const char* base_ip = ip;
972
+
973
+ const size_t kInputMarginBytes = 15;
974
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
975
+ const char* ip_limit = input + input_size - kInputMarginBytes;
976
+
977
+ for (;;) {
978
+ const char* next_emit = ip++;
979
+ uint64_t data = LittleEndian::Load64(ip);
980
+ uint32_t skip = 512;
981
+
982
+ const char* candidate;
983
+ uint32_t candidate_length;
984
+ while (true) {
985
+ assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
986
+ uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask);
987
+ uint32_t bytes_between_hash_lookups = skip >> 9;
988
+ skip++;
989
+ const char* next_ip = ip + bytes_between_hash_lookups;
990
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
991
+ ip = next_emit;
992
+ goto emit_remainder;
993
+ }
994
+ candidate = base_ip + *table_entry2;
995
+ assert(candidate >= base_ip);
996
+ assert(candidate < ip);
997
+
998
+ *table_entry2 = ip - base_ip;
999
+ if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
1000
+ LittleEndian::Load32(candidate))) {
1001
+ candidate_length =
1002
+ FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
1003
+ break;
1004
+ }
1005
+
1006
+ uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask);
1007
+ candidate = base_ip + *table_entry;
1008
+ assert(candidate >= base_ip);
1009
+ assert(candidate < ip);
1010
+
1011
+ *table_entry = ip - base_ip;
1012
+ if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
1013
+ LittleEndian::Load32(candidate))) {
1014
+ candidate_length =
1015
+ FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
1016
+ table_entry2 =
1017
+ TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask);
1018
+ auto candidate2 = base_ip + *table_entry2;
1019
+ size_t candidate_length2 =
1020
+ FindMatchLengthPlain(candidate2, ip + 1, ip_end);
1021
+ if (candidate_length2 > candidate_length) {
1022
+ *table_entry2 = ip - base_ip;
1023
+ candidate = candidate2;
1024
+ candidate_length = candidate_length2;
1025
+ ++ip;
1026
+ }
1027
+ break;
1028
+ }
1029
+ data = LittleEndian::Load64(next_ip);
1030
+ ip = next_ip;
1031
+ }
1032
+ // Backtrack to the point it matches fully.
1033
+ while (ip > next_emit && candidate > base_ip &&
1034
+ *(ip - 1) == *(candidate - 1)) {
1035
+ --ip;
1036
+ --candidate;
1037
+ ++candidate_length;
1038
+ }
1039
+ *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) =
1040
+ ip - base_ip + 1;
1041
+ *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) =
1042
+ ip - base_ip + 2;
1043
+ *TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) =
1044
+ ip - base_ip + 1;
1045
+ // Step 2: A 4-byte or 8-byte match has been found.
1046
+ // We'll later see if more than 4 bytes match. But, prior to the match,
1047
+ // input bytes [next_emit, ip) are unmatched. Emit them as
1048
+ // "literal bytes."
1049
+ assert(next_emit + 16 <= ip_end);
1050
+ if (ip - next_emit > 0) {
1051
+ op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit,
1052
+ ip - next_emit);
1053
+ }
1054
+ // Step 3: Call EmitCopy, and then see if another EmitCopy could
1055
+ // be our next move. Repeat until we find no match for the
1056
+ // input immediately after what was consumed by the last EmitCopy call.
1057
+ //
1058
+ // If we exit this loop normally then we need to call EmitLiteral next,
1059
+ // though we don't yet know how big the literal will be. We handle that
1060
+ // by proceeding to the next iteration of the main loop. We also can exit
1061
+ // this loop via goto if we get close to exhausting the input.
1062
+ do {
1063
+ // We have a 4-byte match at ip, and no need to emit any
1064
+ // "literal bytes" prior to ip.
1065
+ const char* base = ip;
1066
+ ip += candidate_length;
1067
+ size_t offset = base - candidate;
1068
+ if (candidate_length < 12) {
1069
+ op =
1070
+ EmitCopy</*len_less_than_12=*/true>(op, offset, candidate_length);
1071
+ } else {
1072
+ op = EmitCopy</*len_less_than_12=*/false>(op, offset,
1073
+ candidate_length);
1074
+ }
1075
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
1076
+ goto emit_remainder;
1077
+ }
1078
+ // We are now looking for a 4-byte match again. We read
1079
+ // table[Hash(ip, mask)] for that. To improve compression,
1080
+ // we also update several previous table entries.
1081
+ if (ip - base_ip > 7) {
1082
+ *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) =
1083
+ ip - base_ip - 7;
1084
+ *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) =
1085
+ ip - base_ip - 4;
1086
+ }
1087
+ *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) =
1088
+ ip - base_ip - 3;
1089
+ *TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) =
1090
+ ip - base_ip - 2;
1091
+ *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) =
1092
+ ip - base_ip - 2;
1093
+ *TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) =
1094
+ ip - base_ip - 1;
1095
+
1096
+ uint16_t* table_entry =
1097
+ TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask);
1098
+ candidate = base_ip + *table_entry;
1099
+ *table_entry = ip - base_ip;
1100
+ if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
1101
+ candidate_length =
1102
+ FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
1103
+ continue;
1104
+ }
1105
+ table_entry =
1106
+ TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask);
1107
+ candidate = base_ip + *table_entry;
1108
+ *table_entry = ip - base_ip;
1109
+ if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
1110
+ candidate_length =
1111
+ FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
1112
+ continue;
1113
+ }
1114
+ break;
1115
+ } while (true);
1116
+ }
1117
+ }
1118
+
939
1119
  emit_remainder:
940
1120
  // Emit the remaining bytes as a literal
941
1121
  if (ip < ip_end) {
@@ -946,10 +1126,10 @@ emit_remainder:
946
1126
  }
947
1127
  } // end namespace internal
948
1128
 
949
- // Called back at avery compression call to trace parameters and sizes.
950
- static inline void Report(const char *algorithm, size_t compressed_size,
951
- size_t uncompressed_size) {
1129
+ static inline void Report(int token, const char *algorithm, size_t
1130
+ compressed_size, size_t uncompressed_size) {
952
1131
  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
1132
+ (void)token;
953
1133
  (void)algorithm;
954
1134
  (void)compressed_size;
955
1135
  (void)uncompressed_size;
@@ -1234,16 +1414,21 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
1234
1414
  assert(tag == ip[-1]);
1235
1415
  // For literals tag_type = 0, hence we will always obtain 0 from
1236
1416
  // ExtractLowBytes. For literals offset will thus be kLiteralOffset.
1237
- ptrdiff_t len_min_offset = kLengthMinusOffset[tag];
1417
+ ptrdiff_t len_minus_offset = kLengthMinusOffset[tag];
1418
+ uint32_t next;
1238
1419
  #if defined(__aarch64__)
1239
1420
  size_t tag_type = AdvanceToNextTagARMOptimized(&ip, &tag);
1421
+ // We never need more than 16 bits. Doing a Load16 allows the compiler
1422
+ // to elide the masking operation in ExtractOffset.
1423
+ next = LittleEndian::Load16(old_ip);
1240
1424
  #else
1241
1425
  size_t tag_type = AdvanceToNextTagX86Optimized(&ip, &tag);
1426
+ next = LittleEndian::Load32(old_ip);
1242
1427
  #endif
1243
- uint32_t next = LittleEndian::Load32(old_ip);
1244
- size_t len = len_min_offset & 0xFF;
1245
- len_min_offset -= ExtractOffset(next, tag_type);
1246
- if (SNAPPY_PREDICT_FALSE(len_min_offset > 0)) {
1428
+ size_t len = len_minus_offset & 0xFF;
1429
+ ptrdiff_t extracted = ExtractOffset(next, tag_type);
1430
+ ptrdiff_t len_min_offset = len_minus_offset - extracted;
1431
+ if (SNAPPY_PREDICT_FALSE(len_minus_offset > extracted)) {
1247
1432
  if (SNAPPY_PREDICT_FALSE(len & 0x80)) {
1248
1433
  // Exceptional case (long literal or copy 4).
1249
1434
  // Actually doing the copy here is negatively impacting the main
@@ -1290,7 +1475,7 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
1290
1475
  DeferMemCopy(&deferred_src, &deferred_length, from, len);
1291
1476
  }
1292
1477
  } while (ip < ip_limit_min_slop &&
1293
- (op + deferred_length) < op_limit_min_slop);
1478
+ static_cast<ptrdiff_t>(op + deferred_length) < op_limit_min_slop);
1294
1479
  exit:
1295
1480
  ip--;
1296
1481
  assert(ip <= ip_limit);
@@ -1314,7 +1499,7 @@ class SnappyDecompressor {
1314
1499
  // If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
1315
1500
  // buffer.
1316
1501
  const char* ip_limit_min_maxtaglen_;
1317
- uint32_t peeked_; // Bytes peeked from reader (need to skip)
1502
+ uint64_t peeked_; // Bytes peeked from reader (need to skip)
1318
1503
  bool eof_; // Hit end of input without an error?
1319
1504
  char scratch_[kMaximumTagLength]; // See RefillTag().
1320
1505
 
@@ -1505,7 +1690,8 @@ constexpr uint32_t CalculateNeeded(uint8_t tag) {
1505
1690
  #if __cplusplus >= 201402L
1506
1691
  constexpr bool VerifyCalculateNeeded() {
1507
1692
  for (int i = 0; i < 1; i++) {
1508
- if (CalculateNeeded(i) != (char_table[i] >> 11) + 1) return false;
1693
+ if (CalculateNeeded(i) != static_cast<uint32_t>((char_table[i] >> 11)) + 1)
1694
+ return false;
1509
1695
  }
1510
1696
  return true;
1511
1697
  }
@@ -1541,7 +1727,7 @@ bool SnappyDecompressor::RefillTag() {
1541
1727
  assert(needed <= sizeof(scratch_));
1542
1728
 
1543
1729
  // Read more bytes from reader if needed
1544
- uint32_t nbuf = ip_limit_ - ip;
1730
+ uint64_t nbuf = ip_limit_ - ip;
1545
1731
  if (nbuf < needed) {
1546
1732
  // Stitch together bytes from ip and reader to form the word
1547
1733
  // contents. We store the needed bytes in "scratch_". They
@@ -1554,7 +1740,7 @@ bool SnappyDecompressor::RefillTag() {
1554
1740
  size_t length;
1555
1741
  const char* src = reader_->Peek(&length);
1556
1742
  if (length == 0) return false;
1557
- uint32_t to_add = std::min<uint32_t>(needed - nbuf, length);
1743
+ uint64_t to_add = std::min<uint64_t>(needed - nbuf, length);
1558
1744
  std::memcpy(scratch_ + nbuf, src, to_add);
1559
1745
  nbuf += to_add;
1560
1746
  reader_->Skip(to_add);
@@ -1592,7 +1778,8 @@ template <typename Writer>
1592
1778
  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
1593
1779
  Writer* writer, uint32_t compressed_len,
1594
1780
  uint32_t uncompressed_len) {
1595
- Report("snappy_uncompress", compressed_len, uncompressed_len);
1781
+ int token = 0;
1782
+ Report(token, "snappy_uncompress", compressed_len, uncompressed_len);
1596
1783
 
1597
1784
  writer->SetExpectedLength(uncompressed_len);
1598
1785
 
@@ -1608,8 +1795,15 @@ bool GetUncompressedLength(Source* source, uint32_t* result) {
1608
1795
  }
1609
1796
 
1610
1797
  size_t Compress(Source* reader, Sink* writer) {
1798
+ return Compress(reader, writer, CompressionOptions{});
1799
+ }
1800
+
1801
+ size_t Compress(Source* reader, Sink* writer, CompressionOptions options) {
1802
+ assert(options.level == 1 || options.level == 2);
1803
+ int token = 0;
1611
1804
  size_t written = 0;
1612
1805
  size_t N = reader->Available();
1806
+ assert(N <= 0xFFFFFFFFu);
1613
1807
  const size_t uncompressed_size = N;
1614
1808
  char ulength[Varint::kMax32];
1615
1809
  char* p = Varint::Encode32(ulength, N);
@@ -1654,17 +1848,23 @@ size_t Compress(Source* reader, Sink* writer) {
1654
1848
  uint16_t* table = wmem.GetHashTable(num_to_read, &table_size);
1655
1849
 
1656
1850
  // Compress input_fragment and append to dest
1657
- const int max_output = MaxCompressedLength(num_to_read);
1658
-
1659
- // Need a scratch buffer for the output, in case the byte sink doesn't
1660
- // have room for us directly.
1851
+ int max_output = MaxCompressedLength(num_to_read);
1661
1852
 
1662
1853
  // Since we encode kBlockSize regions followed by a region
1663
1854
  // which is <= kBlockSize in length, a previously allocated
1664
1855
  // scratch_output[] region is big enough for this iteration.
1856
+ // Need a scratch buffer for the output, in case the byte sink doesn't
1857
+ // have room for us directly.
1665
1858
  char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
1666
- char* end = internal::CompressFragment(fragment, fragment_size, dest, table,
1667
- table_size);
1859
+ char* end = nullptr;
1860
+ if (options.level == 1) {
1861
+ end = internal::CompressFragment(fragment, fragment_size, dest, table,
1862
+ table_size);
1863
+ } else if (options.level == 2) {
1864
+ end = internal::CompressFragmentDoubleHash(
1865
+ fragment, fragment_size, dest, table, table_size >> 1,
1866
+ table + (table_size >> 1), table_size >> 1);
1867
+ }
1668
1868
  writer->Append(dest, end - dest);
1669
1869
  written += (end - dest);
1670
1870
 
@@ -1672,8 +1872,7 @@ size_t Compress(Source* reader, Sink* writer) {
1672
1872
  reader->Skip(pending_advance);
1673
1873
  }
1674
1874
 
1675
- Report("snappy_compress", written, uncompressed_size);
1676
-
1875
+ Report(token, "snappy_compress", written, uncompressed_size);
1677
1876
  return written;
1678
1877
  }
1679
1878
 
@@ -1696,16 +1895,16 @@ class SnappyIOVecReader : public Source {
1696
1895
  if (total_size > 0 && curr_size_remaining_ == 0) Advance();
1697
1896
  }
1698
1897
 
1699
- ~SnappyIOVecReader() = default;
1898
+ ~SnappyIOVecReader() override = default;
1700
1899
 
1701
- size_t Available() const { return total_size_remaining_; }
1900
+ size_t Available() const override { return total_size_remaining_; }
1702
1901
 
1703
- const char* Peek(size_t* len) {
1902
+ const char* Peek(size_t* len) override {
1704
1903
  *len = curr_size_remaining_;
1705
1904
  return curr_pos_;
1706
1905
  }
1707
1906
 
1708
- void Skip(size_t n) {
1907
+ void Skip(size_t n) override {
1709
1908
  while (n >= curr_size_remaining_ && n > 0) {
1710
1909
  n -= curr_size_remaining_;
1711
1910
  Advance();
@@ -2108,9 +2307,15 @@ bool IsValidCompressed(Source* compressed) {
2108
2307
 
2109
2308
  void RawCompress(const char* input, size_t input_length, char* compressed,
2110
2309
  size_t* compressed_length) {
2310
+ RawCompress(input, input_length, compressed, compressed_length,
2311
+ CompressionOptions{});
2312
+ }
2313
+
2314
+ void RawCompress(const char* input, size_t input_length, char* compressed,
2315
+ size_t* compressed_length, CompressionOptions options) {
2111
2316
  ByteArraySource reader(input, input_length);
2112
2317
  UncheckedByteArraySink writer(compressed);
2113
- Compress(&reader, &writer);
2318
+ Compress(&reader, &writer, options);
2114
2319
 
2115
2320
  // Compute how many bytes were added
2116
2321
  *compressed_length = (writer.CurrentDestination() - compressed);
@@ -2118,9 +2323,16 @@ void RawCompress(const char* input, size_t input_length, char* compressed,
2118
2323
 
2119
2324
  void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
2120
2325
  char* compressed, size_t* compressed_length) {
2326
+ RawCompressFromIOVec(iov, uncompressed_length, compressed, compressed_length,
2327
+ CompressionOptions{});
2328
+ }
2329
+
2330
+ void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
2331
+ char* compressed, size_t* compressed_length,
2332
+ CompressionOptions options) {
2121
2333
  SnappyIOVecReader reader(iov, uncompressed_length);
2122
2334
  UncheckedByteArraySink writer(compressed);
2123
- Compress(&reader, &writer);
2335
+ Compress(&reader, &writer, options);
2124
2336
 
2125
2337
  // Compute how many bytes were added.
2126
2338
  *compressed_length = writer.CurrentDestination() - compressed;
@@ -2128,18 +2340,28 @@ void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
2128
2340
 
2129
2341
  size_t Compress(const char* input, size_t input_length,
2130
2342
  std::string* compressed) {
2343
+ return Compress(input, input_length, compressed, CompressionOptions{});
2344
+ }
2345
+
2346
+ size_t Compress(const char* input, size_t input_length, std::string* compressed,
2347
+ CompressionOptions options) {
2131
2348
  // Pre-grow the buffer to the max length of the compressed output
2132
2349
  STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
2133
2350
 
2134
2351
  size_t compressed_length;
2135
2352
  RawCompress(input, input_length, string_as_array(compressed),
2136
- &compressed_length);
2353
+ &compressed_length, options);
2137
2354
  compressed->erase(compressed_length);
2138
2355
  return compressed_length;
2139
2356
  }
2140
2357
 
2141
2358
  size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
2142
2359
  std::string* compressed) {
2360
+ return CompressFromIOVec(iov, iov_cnt, compressed, CompressionOptions{});
2361
+ }
2362
+
2363
+ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
2364
+ std::string* compressed, CompressionOptions options) {
2143
2365
  // Compute the number of bytes to be compressed.
2144
2366
  size_t uncompressed_length = 0;
2145
2367
  for (size_t i = 0; i < iov_cnt; ++i) {
@@ -2152,7 +2374,7 @@ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
2152
2374
 
2153
2375
  size_t compressed_length;
2154
2376
  RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
2155
- &compressed_length);
2377
+ &compressed_length, options);
2156
2378
  compressed->erase(compressed_length);
2157
2379
  return compressed_length;
2158
2380
  }
@@ -2342,7 +2564,6 @@ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
2342
2564
  class SnappySinkAllocator {
2343
2565
  public:
2344
2566
  explicit SnappySinkAllocator(Sink* dest) : dest_(dest) {}
2345
- ~SnappySinkAllocator() {}
2346
2567
 
2347
2568
  char* Allocate(int size) {
2348
2569
  Datablock block(new char[size], size);
@@ -50,13 +50,38 @@ namespace snappy {
50
50
  class Source;
51
51
  class Sink;
52
52
 
53
+ struct CompressionOptions {
54
+ // Compression level.
55
+ // Level 1 is the fastest
56
+ // Level 2 is a little slower but provides better compression. Level 2 is
57
+ // **EXPERIMENTAL** for the time being. It might happen that we decide to
58
+ // fall back to level 1 in the future.
59
+ // Levels 3+ are currently not supported. We plan to support levels up to
60
+ // 9 in the future.
61
+ // If you played with other compression algorithms, level 1 is equivalent to
62
+ // fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
63
+ // and compresses somewhere around zstd:-3 and zstd:-2 but generally with
64
+ // faster decompression speeds than snappy:1 and zstd:-3.
65
+ int level = DefaultCompressionLevel();
66
+
67
+ constexpr CompressionOptions() = default;
68
+ constexpr CompressionOptions(int compression_level)
69
+ : level(compression_level) {}
70
+ static constexpr int MinCompressionLevel() { return 1; }
71
+ static constexpr int MaxCompressionLevel() { return 2; }
72
+ static constexpr int DefaultCompressionLevel() { return 1; }
73
+ };
74
+
53
75
  // ------------------------------------------------------------------------
54
76
  // Generic compression/decompression routines.
55
77
  // ------------------------------------------------------------------------
56
78
 
57
- // Compress the bytes read from "*source" and append to "*sink". Return the
79
+ // Compress the bytes read from "*reader" and append to "*writer". Return the
58
80
  // number of bytes written.
59
- size_t Compress(Source* source, Sink* sink);
81
+ // First version is to preserve ABI.
82
+ size_t Compress(Source* reader, Sink* writer);
83
+ size_t Compress(Source* reader, Sink* writer,
84
+ CompressionOptions options);
60
85
 
61
86
  // Find the uncompressed length of the given stream, as given by the header.
62
87
  // Note that the true length could deviate from this; the stream could e.g.
@@ -75,15 +100,22 @@ namespace snappy {
75
100
  // Original contents of *compressed are lost.
76
101
  //
77
102
  // REQUIRES: "input[]" is not an alias of "*compressed".
103
+ // First version is to preserve ABI.
78
104
  size_t Compress(const char* input, size_t input_length,
79
105
  std::string* compressed);
106
+ size_t Compress(const char* input, size_t input_length,
107
+ std::string* compressed, CompressionOptions options);
80
108
 
81
109
  // Same as `Compress` above but taking an `iovec` array as input. Note that
82
110
  // this function preprocesses the inputs to compute the sum of
83
111
  // `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
84
112
  // `RawCompressFromIOVec` below.
113
+ // First version is to preserve ABI.
85
114
  size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
86
115
  std::string* compressed);
116
+ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
117
+ std::string* compressed,
118
+ CompressionOptions options);
87
119
 
88
120
  // Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
89
121
  // Original contents of "*uncompressed" are lost.
@@ -126,16 +158,19 @@ namespace snappy {
126
158
  // RawCompress(input, input_length, output, &output_length);
127
159
  // ... Process(output, output_length) ...
128
160
  // delete [] output;
129
- void RawCompress(const char* input,
130
- size_t input_length,
131
- char* compressed,
161
+ void RawCompress(const char* input, size_t input_length, char* compressed,
132
162
  size_t* compressed_length);
163
+ void RawCompress(const char* input, size_t input_length, char* compressed,
164
+ size_t* compressed_length, CompressionOptions options);
133
165
 
134
166
  // Same as `RawCompress` above but taking an `iovec` array as input. Note that
135
167
  // `uncompressed_length` is the total number of bytes to be read from the
136
168
  // elements of `iov` (_not_ the number of elements in `iov`).
137
169
  void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
138
170
  char* compressed, size_t* compressed_length);
171
+ void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
172
+ char* compressed, size_t* compressed_length,
173
+ CompressionOptions options);
139
174
 
140
175
  // Given data in "compressed[0..compressed_length-1]" generated by
141
176
  // calling the Snappy::Compress routine, this routine
@@ -215,7 +250,7 @@ namespace snappy {
215
250
  static constexpr int kMinHashTableBits = 8;
216
251
  static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
217
252
 
218
- static constexpr int kMaxHashTableBits = 14;
253
+ static constexpr int kMaxHashTableBits = 15;
219
254
  static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
220
255
  } // end namespace snappy
221
256