snappy 0.4.0-java → 0.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +1 -1
- data/.github/workflows/publish.yml +7 -13
- data/Dockerfile +1 -1
- data/Gemfile +1 -0
- data/README.md +3 -0
- data/Rakefile +1 -1
- data/lib/snappy/shim.rb +3 -23
- data/lib/snappy/version.rb +1 -1
- data/lib/snappy/writer.rb +1 -1
- data/lib/snappy_ext.jar +0 -0
- data/test/snappy_test.rb +29 -4
- data/vendor/snappy/BUILD.bazel +211 -0
- data/vendor/snappy/CMakeLists.txt +32 -10
- data/vendor/snappy/MODULE.bazel +23 -0
- data/vendor/snappy/NEWS +15 -0
- data/vendor/snappy/README.md +4 -4
- data/vendor/snappy/WORKSPACE +27 -0
- data/vendor/snappy/WORKSPACE.bzlmod +0 -0
- data/vendor/snappy/cmake/config.h.in +3 -0
- data/vendor/snappy/snappy-internal.h +29 -0
- data/vendor/snappy/snappy-stubs-internal.h +6 -0
- data/vendor/snappy/snappy.cc +258 -37
- data/vendor/snappy/snappy.h +41 -6
- data/vendor/snappy/snappy_benchmark.cc +38 -18
- data/vendor/snappy/snappy_compress_fuzzer.cc +18 -14
- data/vendor/snappy/snappy_unittest.cc +13 -0
- metadata +8 -7
|
@@ -31,6 +31,8 @@
|
|
|
31
31
|
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
|
32
32
|
#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
|
33
33
|
|
|
34
|
+
#include <utility>
|
|
35
|
+
|
|
34
36
|
#include "snappy-stubs-internal.h"
|
|
35
37
|
|
|
36
38
|
#if SNAPPY_HAVE_SSSE3
|
|
@@ -256,6 +258,8 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
|
|
|
256
258
|
s2 += 8;
|
|
257
259
|
}
|
|
258
260
|
}
|
|
261
|
+
SNAPPY_PREFETCH(s1 + 64);
|
|
262
|
+
SNAPPY_PREFETCH(s2 + 64);
|
|
259
263
|
|
|
260
264
|
// Find out how long the match is. We loop over the data 64 bits at a
|
|
261
265
|
// time until we find a 64-bit block that doesn't match; then we find
|
|
@@ -330,6 +334,31 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
|
|
|
330
334
|
}
|
|
331
335
|
#endif
|
|
332
336
|
|
|
337
|
+
static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
|
|
338
|
+
const char* s2_limit) {
|
|
339
|
+
// Implementation based on the x86-64 version, above.
|
|
340
|
+
assert(s2_limit >= s2);
|
|
341
|
+
int matched = 0;
|
|
342
|
+
|
|
343
|
+
while (s2 <= s2_limit - 8 &&
|
|
344
|
+
UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
|
|
345
|
+
s2 += 8;
|
|
346
|
+
matched += 8;
|
|
347
|
+
}
|
|
348
|
+
if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
|
|
349
|
+
uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
|
|
350
|
+
int matching_bits = Bits::FindLSBSetNonZero64(x);
|
|
351
|
+
matched += matching_bits >> 3;
|
|
352
|
+
s2 += matching_bits >> 3;
|
|
353
|
+
} else {
|
|
354
|
+
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
|
|
355
|
+
++s2;
|
|
356
|
+
++matched;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return matched;
|
|
360
|
+
}
|
|
361
|
+
|
|
333
362
|
// Lookup tables for decompression code. Give --snappy_dump_decompression_table
|
|
334
363
|
// to the unit test to recompute char_table.
|
|
335
364
|
|
|
@@ -105,6 +105,12 @@
|
|
|
105
105
|
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
|
|
106
106
|
#endif // HAVE_ATTRIBUTE_ALWAYS_INLINE
|
|
107
107
|
|
|
108
|
+
#if HAVE_BUILTIN_PREFETCH
|
|
109
|
+
#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
|
|
110
|
+
#else
|
|
111
|
+
#define SNAPPY_PREFETCH(ptr) (void)(ptr)
|
|
112
|
+
#endif
|
|
113
|
+
|
|
108
114
|
// Stubbed version of ABSL_FLAG.
|
|
109
115
|
//
|
|
110
116
|
// In the open source version, flags can only be changed at compile time.
|
data/vendor/snappy/snappy.cc
CHANGED
|
@@ -68,18 +68,14 @@
|
|
|
68
68
|
#include <arm_acle.h>
|
|
69
69
|
#endif
|
|
70
70
|
|
|
71
|
-
#if defined(__GNUC__)
|
|
72
|
-
#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
|
|
73
|
-
#else
|
|
74
|
-
#define SNAPPY_PREFETCH(ptr) (void)(ptr)
|
|
75
|
-
#endif
|
|
76
|
-
|
|
77
71
|
#include <algorithm>
|
|
78
72
|
#include <array>
|
|
79
73
|
#include <cstddef>
|
|
80
74
|
#include <cstdint>
|
|
81
75
|
#include <cstdio>
|
|
82
76
|
#include <cstring>
|
|
77
|
+
#include <functional>
|
|
78
|
+
#include <memory>
|
|
83
79
|
#include <string>
|
|
84
80
|
#include <utility>
|
|
85
81
|
#include <vector>
|
|
@@ -180,6 +176,22 @@ inline uint16_t* TableEntry(uint16_t* table, uint32_t bytes, uint32_t mask) {
|
|
|
180
176
|
(hash & mask));
|
|
181
177
|
}
|
|
182
178
|
|
|
179
|
+
inline uint16_t* TableEntry4ByteMatch(uint16_t* table, uint32_t bytes,
|
|
180
|
+
uint32_t mask) {
|
|
181
|
+
constexpr uint32_t kMagic = 2654435761U;
|
|
182
|
+
const uint32_t hash = (kMagic * bytes) >> (32 - kMaxHashTableBits);
|
|
183
|
+
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
|
|
184
|
+
(hash & mask));
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
inline uint16_t* TableEntry8ByteMatch(uint16_t* table, uint64_t bytes,
|
|
188
|
+
uint32_t mask) {
|
|
189
|
+
constexpr uint64_t kMagic = 58295818150454627ULL;
|
|
190
|
+
const uint32_t hash = (kMagic * bytes) >> (64 - kMaxHashTableBits);
|
|
191
|
+
return reinterpret_cast<uint16_t*>(reinterpret_cast<uintptr_t>(table) +
|
|
192
|
+
(hash & mask));
|
|
193
|
+
}
|
|
194
|
+
|
|
183
195
|
} // namespace
|
|
184
196
|
|
|
185
197
|
size_t MaxCompressedLength(size_t source_bytes) {
|
|
@@ -936,6 +948,174 @@ char* CompressFragment(const char* input, size_t input_size, char* op,
|
|
|
936
948
|
}
|
|
937
949
|
}
|
|
938
950
|
|
|
951
|
+
emit_remainder:
|
|
952
|
+
// Emit the remaining bytes as a literal
|
|
953
|
+
if (ip < ip_end) {
|
|
954
|
+
op = EmitLiteral</*allow_fast_path=*/false>(op, ip, ip_end - ip);
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
return op;
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
char* CompressFragmentDoubleHash(const char* input, size_t input_size, char* op,
|
|
961
|
+
uint16_t* table, const int table_size,
|
|
962
|
+
uint16_t* table2, const int table_size2) {
|
|
963
|
+
(void)table_size2;
|
|
964
|
+
assert(table_size == table_size2);
|
|
965
|
+
// "ip" is the input pointer, and "op" is the output pointer.
|
|
966
|
+
const char* ip = input;
|
|
967
|
+
assert(input_size <= kBlockSize);
|
|
968
|
+
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
|
969
|
+
const uint32_t mask = 2 * (table_size - 1);
|
|
970
|
+
const char* ip_end = input + input_size;
|
|
971
|
+
const char* base_ip = ip;
|
|
972
|
+
|
|
973
|
+
const size_t kInputMarginBytes = 15;
|
|
974
|
+
if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
|
|
975
|
+
const char* ip_limit = input + input_size - kInputMarginBytes;
|
|
976
|
+
|
|
977
|
+
for (;;) {
|
|
978
|
+
const char* next_emit = ip++;
|
|
979
|
+
uint64_t data = LittleEndian::Load64(ip);
|
|
980
|
+
uint32_t skip = 512;
|
|
981
|
+
|
|
982
|
+
const char* candidate;
|
|
983
|
+
uint32_t candidate_length;
|
|
984
|
+
while (true) {
|
|
985
|
+
assert(static_cast<uint32_t>(data) == LittleEndian::Load32(ip));
|
|
986
|
+
uint16_t* table_entry2 = TableEntry8ByteMatch(table2, data, mask);
|
|
987
|
+
uint32_t bytes_between_hash_lookups = skip >> 9;
|
|
988
|
+
skip++;
|
|
989
|
+
const char* next_ip = ip + bytes_between_hash_lookups;
|
|
990
|
+
if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
|
|
991
|
+
ip = next_emit;
|
|
992
|
+
goto emit_remainder;
|
|
993
|
+
}
|
|
994
|
+
candidate = base_ip + *table_entry2;
|
|
995
|
+
assert(candidate >= base_ip);
|
|
996
|
+
assert(candidate < ip);
|
|
997
|
+
|
|
998
|
+
*table_entry2 = ip - base_ip;
|
|
999
|
+
if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
|
|
1000
|
+
LittleEndian::Load32(candidate))) {
|
|
1001
|
+
candidate_length =
|
|
1002
|
+
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
|
|
1003
|
+
break;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
uint16_t* table_entry = TableEntry4ByteMatch(table, data, mask);
|
|
1007
|
+
candidate = base_ip + *table_entry;
|
|
1008
|
+
assert(candidate >= base_ip);
|
|
1009
|
+
assert(candidate < ip);
|
|
1010
|
+
|
|
1011
|
+
*table_entry = ip - base_ip;
|
|
1012
|
+
if (SNAPPY_PREDICT_FALSE(static_cast<uint32_t>(data) ==
|
|
1013
|
+
LittleEndian::Load32(candidate))) {
|
|
1014
|
+
candidate_length =
|
|
1015
|
+
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
|
|
1016
|
+
table_entry2 =
|
|
1017
|
+
TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask);
|
|
1018
|
+
auto candidate2 = base_ip + *table_entry2;
|
|
1019
|
+
size_t candidate_length2 =
|
|
1020
|
+
FindMatchLengthPlain(candidate2, ip + 1, ip_end);
|
|
1021
|
+
if (candidate_length2 > candidate_length) {
|
|
1022
|
+
*table_entry2 = ip - base_ip;
|
|
1023
|
+
candidate = candidate2;
|
|
1024
|
+
candidate_length = candidate_length2;
|
|
1025
|
+
++ip;
|
|
1026
|
+
}
|
|
1027
|
+
break;
|
|
1028
|
+
}
|
|
1029
|
+
data = LittleEndian::Load64(next_ip);
|
|
1030
|
+
ip = next_ip;
|
|
1031
|
+
}
|
|
1032
|
+
// Backtrack to the point it matches fully.
|
|
1033
|
+
while (ip > next_emit && candidate > base_ip &&
|
|
1034
|
+
*(ip - 1) == *(candidate - 1)) {
|
|
1035
|
+
--ip;
|
|
1036
|
+
--candidate;
|
|
1037
|
+
++candidate_length;
|
|
1038
|
+
}
|
|
1039
|
+
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 1), mask) =
|
|
1040
|
+
ip - base_ip + 1;
|
|
1041
|
+
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip + 2), mask) =
|
|
1042
|
+
ip - base_ip + 2;
|
|
1043
|
+
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip + 1), mask) =
|
|
1044
|
+
ip - base_ip + 1;
|
|
1045
|
+
// Step 2: A 4-byte or 8-byte match has been found.
|
|
1046
|
+
// We'll later see if more than 4 bytes match. But, prior to the match,
|
|
1047
|
+
// input bytes [next_emit, ip) are unmatched. Emit them as
|
|
1048
|
+
// "literal bytes."
|
|
1049
|
+
assert(next_emit + 16 <= ip_end);
|
|
1050
|
+
if (ip - next_emit > 0) {
|
|
1051
|
+
op = EmitLiteral</*allow_fast_path=*/true>(op, next_emit,
|
|
1052
|
+
ip - next_emit);
|
|
1053
|
+
}
|
|
1054
|
+
// Step 3: Call EmitCopy, and then see if another EmitCopy could
|
|
1055
|
+
// be our next move. Repeat until we find no match for the
|
|
1056
|
+
// input immediately after what was consumed by the last EmitCopy call.
|
|
1057
|
+
//
|
|
1058
|
+
// If we exit this loop normally then we need to call EmitLiteral next,
|
|
1059
|
+
// though we don't yet know how big the literal will be. We handle that
|
|
1060
|
+
// by proceeding to the next iteration of the main loop. We also can exit
|
|
1061
|
+
// this loop via goto if we get close to exhausting the input.
|
|
1062
|
+
do {
|
|
1063
|
+
// We have a 4-byte match at ip, and no need to emit any
|
|
1064
|
+
// "literal bytes" prior to ip.
|
|
1065
|
+
const char* base = ip;
|
|
1066
|
+
ip += candidate_length;
|
|
1067
|
+
size_t offset = base - candidate;
|
|
1068
|
+
if (candidate_length < 12) {
|
|
1069
|
+
op =
|
|
1070
|
+
EmitCopy</*len_less_than_12=*/true>(op, offset, candidate_length);
|
|
1071
|
+
} else {
|
|
1072
|
+
op = EmitCopy</*len_less_than_12=*/false>(op, offset,
|
|
1073
|
+
candidate_length);
|
|
1074
|
+
}
|
|
1075
|
+
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
|
|
1076
|
+
goto emit_remainder;
|
|
1077
|
+
}
|
|
1078
|
+
// We are now looking for a 4-byte match again. We read
|
|
1079
|
+
// table[Hash(ip, mask)] for that. To improve compression,
|
|
1080
|
+
// we also update several previous table entries.
|
|
1081
|
+
if (ip - base_ip > 7) {
|
|
1082
|
+
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 7), mask) =
|
|
1083
|
+
ip - base_ip - 7;
|
|
1084
|
+
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 4), mask) =
|
|
1085
|
+
ip - base_ip - 4;
|
|
1086
|
+
}
|
|
1087
|
+
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 3), mask) =
|
|
1088
|
+
ip - base_ip - 3;
|
|
1089
|
+
*TableEntry8ByteMatch(table2, LittleEndian::Load64(ip - 2), mask) =
|
|
1090
|
+
ip - base_ip - 2;
|
|
1091
|
+
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 2), mask) =
|
|
1092
|
+
ip - base_ip - 2;
|
|
1093
|
+
*TableEntry4ByteMatch(table, LittleEndian::Load32(ip - 1), mask) =
|
|
1094
|
+
ip - base_ip - 1;
|
|
1095
|
+
|
|
1096
|
+
uint16_t* table_entry =
|
|
1097
|
+
TableEntry8ByteMatch(table2, LittleEndian::Load64(ip), mask);
|
|
1098
|
+
candidate = base_ip + *table_entry;
|
|
1099
|
+
*table_entry = ip - base_ip;
|
|
1100
|
+
if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
|
|
1101
|
+
candidate_length =
|
|
1102
|
+
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
table_entry =
|
|
1106
|
+
TableEntry4ByteMatch(table, LittleEndian::Load32(ip), mask);
|
|
1107
|
+
candidate = base_ip + *table_entry;
|
|
1108
|
+
*table_entry = ip - base_ip;
|
|
1109
|
+
if (LittleEndian::Load32(ip) == LittleEndian::Load32(candidate)) {
|
|
1110
|
+
candidate_length =
|
|
1111
|
+
FindMatchLengthPlain(candidate + 4, ip + 4, ip_end) + 4;
|
|
1112
|
+
continue;
|
|
1113
|
+
}
|
|
1114
|
+
break;
|
|
1115
|
+
} while (true);
|
|
1116
|
+
}
|
|
1117
|
+
}
|
|
1118
|
+
|
|
939
1119
|
emit_remainder:
|
|
940
1120
|
// Emit the remaining bytes as a literal
|
|
941
1121
|
if (ip < ip_end) {
|
|
@@ -946,10 +1126,10 @@ emit_remainder:
|
|
|
946
1126
|
}
|
|
947
1127
|
} // end namespace internal
|
|
948
1128
|
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
size_t uncompressed_size) {
|
|
1129
|
+
static inline void Report(int token, const char *algorithm, size_t
|
|
1130
|
+
compressed_size, size_t uncompressed_size) {
|
|
952
1131
|
// TODO: Switch to [[maybe_unused]] when we can assume C++17.
|
|
1132
|
+
(void)token;
|
|
953
1133
|
(void)algorithm;
|
|
954
1134
|
(void)compressed_size;
|
|
955
1135
|
(void)uncompressed_size;
|
|
@@ -1234,16 +1414,21 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
|
|
|
1234
1414
|
assert(tag == ip[-1]);
|
|
1235
1415
|
// For literals tag_type = 0, hence we will always obtain 0 from
|
|
1236
1416
|
// ExtractLowBytes. For literals offset will thus be kLiteralOffset.
|
|
1237
|
-
ptrdiff_t
|
|
1417
|
+
ptrdiff_t len_minus_offset = kLengthMinusOffset[tag];
|
|
1418
|
+
uint32_t next;
|
|
1238
1419
|
#if defined(__aarch64__)
|
|
1239
1420
|
size_t tag_type = AdvanceToNextTagARMOptimized(&ip, &tag);
|
|
1421
|
+
// We never need more than 16 bits. Doing a Load16 allows the compiler
|
|
1422
|
+
// to elide the masking operation in ExtractOffset.
|
|
1423
|
+
next = LittleEndian::Load16(old_ip);
|
|
1240
1424
|
#else
|
|
1241
1425
|
size_t tag_type = AdvanceToNextTagX86Optimized(&ip, &tag);
|
|
1426
|
+
next = LittleEndian::Load32(old_ip);
|
|
1242
1427
|
#endif
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
len_min_offset
|
|
1246
|
-
if (SNAPPY_PREDICT_FALSE(
|
|
1428
|
+
size_t len = len_minus_offset & 0xFF;
|
|
1429
|
+
ptrdiff_t extracted = ExtractOffset(next, tag_type);
|
|
1430
|
+
ptrdiff_t len_min_offset = len_minus_offset - extracted;
|
|
1431
|
+
if (SNAPPY_PREDICT_FALSE(len_minus_offset > extracted)) {
|
|
1247
1432
|
if (SNAPPY_PREDICT_FALSE(len & 0x80)) {
|
|
1248
1433
|
// Exceptional case (long literal or copy 4).
|
|
1249
1434
|
// Actually doing the copy here is negatively impacting the main
|
|
@@ -1290,7 +1475,7 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
|
|
|
1290
1475
|
DeferMemCopy(&deferred_src, &deferred_length, from, len);
|
|
1291
1476
|
}
|
|
1292
1477
|
} while (ip < ip_limit_min_slop &&
|
|
1293
|
-
(op + deferred_length) < op_limit_min_slop);
|
|
1478
|
+
static_cast<ptrdiff_t>(op + deferred_length) < op_limit_min_slop);
|
|
1294
1479
|
exit:
|
|
1295
1480
|
ip--;
|
|
1296
1481
|
assert(ip <= ip_limit);
|
|
@@ -1314,7 +1499,7 @@ class SnappyDecompressor {
|
|
|
1314
1499
|
// If ip < ip_limit_min_maxtaglen_ it's safe to read kMaxTagLength from
|
|
1315
1500
|
// buffer.
|
|
1316
1501
|
const char* ip_limit_min_maxtaglen_;
|
|
1317
|
-
|
|
1502
|
+
uint64_t peeked_; // Bytes peeked from reader (need to skip)
|
|
1318
1503
|
bool eof_; // Hit end of input without an error?
|
|
1319
1504
|
char scratch_[kMaximumTagLength]; // See RefillTag().
|
|
1320
1505
|
|
|
@@ -1505,7 +1690,8 @@ constexpr uint32_t CalculateNeeded(uint8_t tag) {
|
|
|
1505
1690
|
#if __cplusplus >= 201402L
|
|
1506
1691
|
constexpr bool VerifyCalculateNeeded() {
|
|
1507
1692
|
for (int i = 0; i < 1; i++) {
|
|
1508
|
-
if (CalculateNeeded(i) != (char_table[i] >> 11) + 1)
|
|
1693
|
+
if (CalculateNeeded(i) != static_cast<uint32_t>((char_table[i] >> 11)) + 1)
|
|
1694
|
+
return false;
|
|
1509
1695
|
}
|
|
1510
1696
|
return true;
|
|
1511
1697
|
}
|
|
@@ -1541,7 +1727,7 @@ bool SnappyDecompressor::RefillTag() {
|
|
|
1541
1727
|
assert(needed <= sizeof(scratch_));
|
|
1542
1728
|
|
|
1543
1729
|
// Read more bytes from reader if needed
|
|
1544
|
-
|
|
1730
|
+
uint64_t nbuf = ip_limit_ - ip;
|
|
1545
1731
|
if (nbuf < needed) {
|
|
1546
1732
|
// Stitch together bytes from ip and reader to form the word
|
|
1547
1733
|
// contents. We store the needed bytes in "scratch_". They
|
|
@@ -1554,7 +1740,7 @@ bool SnappyDecompressor::RefillTag() {
|
|
|
1554
1740
|
size_t length;
|
|
1555
1741
|
const char* src = reader_->Peek(&length);
|
|
1556
1742
|
if (length == 0) return false;
|
|
1557
|
-
|
|
1743
|
+
uint64_t to_add = std::min<uint64_t>(needed - nbuf, length);
|
|
1558
1744
|
std::memcpy(scratch_ + nbuf, src, to_add);
|
|
1559
1745
|
nbuf += to_add;
|
|
1560
1746
|
reader_->Skip(to_add);
|
|
@@ -1592,7 +1778,8 @@ template <typename Writer>
|
|
|
1592
1778
|
static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
|
|
1593
1779
|
Writer* writer, uint32_t compressed_len,
|
|
1594
1780
|
uint32_t uncompressed_len) {
|
|
1595
|
-
|
|
1781
|
+
int token = 0;
|
|
1782
|
+
Report(token, "snappy_uncompress", compressed_len, uncompressed_len);
|
|
1596
1783
|
|
|
1597
1784
|
writer->SetExpectedLength(uncompressed_len);
|
|
1598
1785
|
|
|
@@ -1608,8 +1795,15 @@ bool GetUncompressedLength(Source* source, uint32_t* result) {
|
|
|
1608
1795
|
}
|
|
1609
1796
|
|
|
1610
1797
|
size_t Compress(Source* reader, Sink* writer) {
|
|
1798
|
+
return Compress(reader, writer, CompressionOptions{});
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
size_t Compress(Source* reader, Sink* writer, CompressionOptions options) {
|
|
1802
|
+
assert(options.level == 1 || options.level == 2);
|
|
1803
|
+
int token = 0;
|
|
1611
1804
|
size_t written = 0;
|
|
1612
1805
|
size_t N = reader->Available();
|
|
1806
|
+
assert(N <= 0xFFFFFFFFu);
|
|
1613
1807
|
const size_t uncompressed_size = N;
|
|
1614
1808
|
char ulength[Varint::kMax32];
|
|
1615
1809
|
char* p = Varint::Encode32(ulength, N);
|
|
@@ -1654,17 +1848,23 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
|
1654
1848
|
uint16_t* table = wmem.GetHashTable(num_to_read, &table_size);
|
|
1655
1849
|
|
|
1656
1850
|
// Compress input_fragment and append to dest
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
// Need a scratch buffer for the output, in case the byte sink doesn't
|
|
1660
|
-
// have room for us directly.
|
|
1851
|
+
int max_output = MaxCompressedLength(num_to_read);
|
|
1661
1852
|
|
|
1662
1853
|
// Since we encode kBlockSize regions followed by a region
|
|
1663
1854
|
// which is <= kBlockSize in length, a previously allocated
|
|
1664
1855
|
// scratch_output[] region is big enough for this iteration.
|
|
1856
|
+
// Need a scratch buffer for the output, in case the byte sink doesn't
|
|
1857
|
+
// have room for us directly.
|
|
1665
1858
|
char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput());
|
|
1666
|
-
char* end =
|
|
1667
|
-
|
|
1859
|
+
char* end = nullptr;
|
|
1860
|
+
if (options.level == 1) {
|
|
1861
|
+
end = internal::CompressFragment(fragment, fragment_size, dest, table,
|
|
1862
|
+
table_size);
|
|
1863
|
+
} else if (options.level == 2) {
|
|
1864
|
+
end = internal::CompressFragmentDoubleHash(
|
|
1865
|
+
fragment, fragment_size, dest, table, table_size >> 1,
|
|
1866
|
+
table + (table_size >> 1), table_size >> 1);
|
|
1867
|
+
}
|
|
1668
1868
|
writer->Append(dest, end - dest);
|
|
1669
1869
|
written += (end - dest);
|
|
1670
1870
|
|
|
@@ -1672,8 +1872,7 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
|
1672
1872
|
reader->Skip(pending_advance);
|
|
1673
1873
|
}
|
|
1674
1874
|
|
|
1675
|
-
Report("snappy_compress", written, uncompressed_size);
|
|
1676
|
-
|
|
1875
|
+
Report(token, "snappy_compress", written, uncompressed_size);
|
|
1677
1876
|
return written;
|
|
1678
1877
|
}
|
|
1679
1878
|
|
|
@@ -1696,16 +1895,16 @@ class SnappyIOVecReader : public Source {
|
|
|
1696
1895
|
if (total_size > 0 && curr_size_remaining_ == 0) Advance();
|
|
1697
1896
|
}
|
|
1698
1897
|
|
|
1699
|
-
~SnappyIOVecReader() = default;
|
|
1898
|
+
~SnappyIOVecReader() override = default;
|
|
1700
1899
|
|
|
1701
|
-
size_t Available() const { return total_size_remaining_; }
|
|
1900
|
+
size_t Available() const override { return total_size_remaining_; }
|
|
1702
1901
|
|
|
1703
|
-
const char* Peek(size_t* len) {
|
|
1902
|
+
const char* Peek(size_t* len) override {
|
|
1704
1903
|
*len = curr_size_remaining_;
|
|
1705
1904
|
return curr_pos_;
|
|
1706
1905
|
}
|
|
1707
1906
|
|
|
1708
|
-
void Skip(size_t n) {
|
|
1907
|
+
void Skip(size_t n) override {
|
|
1709
1908
|
while (n >= curr_size_remaining_ && n > 0) {
|
|
1710
1909
|
n -= curr_size_remaining_;
|
|
1711
1910
|
Advance();
|
|
@@ -2108,9 +2307,15 @@ bool IsValidCompressed(Source* compressed) {
|
|
|
2108
2307
|
|
|
2109
2308
|
void RawCompress(const char* input, size_t input_length, char* compressed,
|
|
2110
2309
|
size_t* compressed_length) {
|
|
2310
|
+
RawCompress(input, input_length, compressed, compressed_length,
|
|
2311
|
+
CompressionOptions{});
|
|
2312
|
+
}
|
|
2313
|
+
|
|
2314
|
+
void RawCompress(const char* input, size_t input_length, char* compressed,
|
|
2315
|
+
size_t* compressed_length, CompressionOptions options) {
|
|
2111
2316
|
ByteArraySource reader(input, input_length);
|
|
2112
2317
|
UncheckedByteArraySink writer(compressed);
|
|
2113
|
-
Compress(&reader, &writer);
|
|
2318
|
+
Compress(&reader, &writer, options);
|
|
2114
2319
|
|
|
2115
2320
|
// Compute how many bytes were added
|
|
2116
2321
|
*compressed_length = (writer.CurrentDestination() - compressed);
|
|
@@ -2118,9 +2323,16 @@ void RawCompress(const char* input, size_t input_length, char* compressed,
|
|
|
2118
2323
|
|
|
2119
2324
|
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
|
|
2120
2325
|
char* compressed, size_t* compressed_length) {
|
|
2326
|
+
RawCompressFromIOVec(iov, uncompressed_length, compressed, compressed_length,
|
|
2327
|
+
CompressionOptions{});
|
|
2328
|
+
}
|
|
2329
|
+
|
|
2330
|
+
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
|
|
2331
|
+
char* compressed, size_t* compressed_length,
|
|
2332
|
+
CompressionOptions options) {
|
|
2121
2333
|
SnappyIOVecReader reader(iov, uncompressed_length);
|
|
2122
2334
|
UncheckedByteArraySink writer(compressed);
|
|
2123
|
-
Compress(&reader, &writer);
|
|
2335
|
+
Compress(&reader, &writer, options);
|
|
2124
2336
|
|
|
2125
2337
|
// Compute how many bytes were added.
|
|
2126
2338
|
*compressed_length = writer.CurrentDestination() - compressed;
|
|
@@ -2128,18 +2340,28 @@ void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
|
|
|
2128
2340
|
|
|
2129
2341
|
size_t Compress(const char* input, size_t input_length,
|
|
2130
2342
|
std::string* compressed) {
|
|
2343
|
+
return Compress(input, input_length, compressed, CompressionOptions{});
|
|
2344
|
+
}
|
|
2345
|
+
|
|
2346
|
+
size_t Compress(const char* input, size_t input_length, std::string* compressed,
|
|
2347
|
+
CompressionOptions options) {
|
|
2131
2348
|
// Pre-grow the buffer to the max length of the compressed output
|
|
2132
2349
|
STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length));
|
|
2133
2350
|
|
|
2134
2351
|
size_t compressed_length;
|
|
2135
2352
|
RawCompress(input, input_length, string_as_array(compressed),
|
|
2136
|
-
&compressed_length);
|
|
2353
|
+
&compressed_length, options);
|
|
2137
2354
|
compressed->erase(compressed_length);
|
|
2138
2355
|
return compressed_length;
|
|
2139
2356
|
}
|
|
2140
2357
|
|
|
2141
2358
|
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
|
|
2142
2359
|
std::string* compressed) {
|
|
2360
|
+
return CompressFromIOVec(iov, iov_cnt, compressed, CompressionOptions{});
|
|
2361
|
+
}
|
|
2362
|
+
|
|
2363
|
+
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
|
|
2364
|
+
std::string* compressed, CompressionOptions options) {
|
|
2143
2365
|
// Compute the number of bytes to be compressed.
|
|
2144
2366
|
size_t uncompressed_length = 0;
|
|
2145
2367
|
for (size_t i = 0; i < iov_cnt; ++i) {
|
|
@@ -2152,7 +2374,7 @@ size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
|
|
|
2152
2374
|
|
|
2153
2375
|
size_t compressed_length;
|
|
2154
2376
|
RawCompressFromIOVec(iov, uncompressed_length, string_as_array(compressed),
|
|
2155
|
-
&compressed_length);
|
|
2377
|
+
&compressed_length, options);
|
|
2156
2378
|
compressed->erase(compressed_length);
|
|
2157
2379
|
return compressed_length;
|
|
2158
2380
|
}
|
|
@@ -2342,7 +2564,6 @@ bool SnappyScatteredWriter<Allocator>::SlowAppendFromSelf(size_t offset,
|
|
|
2342
2564
|
class SnappySinkAllocator {
|
|
2343
2565
|
public:
|
|
2344
2566
|
explicit SnappySinkAllocator(Sink* dest) : dest_(dest) {}
|
|
2345
|
-
~SnappySinkAllocator() {}
|
|
2346
2567
|
|
|
2347
2568
|
char* Allocate(int size) {
|
|
2348
2569
|
Datablock block(new char[size], size);
|
data/vendor/snappy/snappy.h
CHANGED
|
@@ -50,13 +50,38 @@ namespace snappy {
|
|
|
50
50
|
class Source;
|
|
51
51
|
class Sink;
|
|
52
52
|
|
|
53
|
+
struct CompressionOptions {
|
|
54
|
+
// Compression level.
|
|
55
|
+
// Level 1 is the fastest
|
|
56
|
+
// Level 2 is a little slower but provides better compression. Level 2 is
|
|
57
|
+
// **EXPERIMENTAL** for the time being. It might happen that we decide to
|
|
58
|
+
// fall back to level 1 in the future.
|
|
59
|
+
// Levels 3+ are currently not supported. We plan to support levels up to
|
|
60
|
+
// 9 in the future.
|
|
61
|
+
// If you played with other compression algorithms, level 1 is equivalent to
|
|
62
|
+
// fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
|
|
63
|
+
// and compresses somewhere around zstd:-3 and zstd:-2 but generally with
|
|
64
|
+
// faster decompression speeds than snappy:1 and zstd:-3.
|
|
65
|
+
int level = DefaultCompressionLevel();
|
|
66
|
+
|
|
67
|
+
constexpr CompressionOptions() = default;
|
|
68
|
+
constexpr CompressionOptions(int compression_level)
|
|
69
|
+
: level(compression_level) {}
|
|
70
|
+
static constexpr int MinCompressionLevel() { return 1; }
|
|
71
|
+
static constexpr int MaxCompressionLevel() { return 2; }
|
|
72
|
+
static constexpr int DefaultCompressionLevel() { return 1; }
|
|
73
|
+
};
|
|
74
|
+
|
|
53
75
|
// ------------------------------------------------------------------------
|
|
54
76
|
// Generic compression/decompression routines.
|
|
55
77
|
// ------------------------------------------------------------------------
|
|
56
78
|
|
|
57
|
-
// Compress the bytes read from "*
|
|
79
|
+
// Compress the bytes read from "*reader" and append to "*writer". Return the
|
|
58
80
|
// number of bytes written.
|
|
59
|
-
|
|
81
|
+
// First version is to preserve ABI.
|
|
82
|
+
size_t Compress(Source* reader, Sink* writer);
|
|
83
|
+
size_t Compress(Source* reader, Sink* writer,
|
|
84
|
+
CompressionOptions options);
|
|
60
85
|
|
|
61
86
|
// Find the uncompressed length of the given stream, as given by the header.
|
|
62
87
|
// Note that the true length could deviate from this; the stream could e.g.
|
|
@@ -75,15 +100,22 @@ namespace snappy {
|
|
|
75
100
|
// Original contents of *compressed are lost.
|
|
76
101
|
//
|
|
77
102
|
// REQUIRES: "input[]" is not an alias of "*compressed".
|
|
103
|
+
// First version is to preserve ABI.
|
|
78
104
|
size_t Compress(const char* input, size_t input_length,
|
|
79
105
|
std::string* compressed);
|
|
106
|
+
size_t Compress(const char* input, size_t input_length,
|
|
107
|
+
std::string* compressed, CompressionOptions options);
|
|
80
108
|
|
|
81
109
|
// Same as `Compress` above but taking an `iovec` array as input. Note that
|
|
82
110
|
// this function preprocesses the inputs to compute the sum of
|
|
83
111
|
// `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
|
|
84
112
|
// `RawCompressFromIOVec` below.
|
|
113
|
+
// First version is to preserve ABI.
|
|
85
114
|
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
|
|
86
115
|
std::string* compressed);
|
|
116
|
+
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
|
|
117
|
+
std::string* compressed,
|
|
118
|
+
CompressionOptions options);
|
|
87
119
|
|
|
88
120
|
// Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
|
|
89
121
|
// Original contents of "*uncompressed" are lost.
|
|
@@ -126,16 +158,19 @@ namespace snappy {
|
|
|
126
158
|
// RawCompress(input, input_length, output, &output_length);
|
|
127
159
|
// ... Process(output, output_length) ...
|
|
128
160
|
// delete [] output;
|
|
129
|
-
void RawCompress(const char* input,
|
|
130
|
-
size_t input_length,
|
|
131
|
-
char* compressed,
|
|
161
|
+
void RawCompress(const char* input, size_t input_length, char* compressed,
|
|
132
162
|
size_t* compressed_length);
|
|
163
|
+
void RawCompress(const char* input, size_t input_length, char* compressed,
|
|
164
|
+
size_t* compressed_length, CompressionOptions options);
|
|
133
165
|
|
|
134
166
|
// Same as `RawCompress` above but taking an `iovec` array as input. Note that
|
|
135
167
|
// `uncompressed_length` is the total number of bytes to be read from the
|
|
136
168
|
// elements of `iov` (_not_ the number of elements in `iov`).
|
|
137
169
|
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
|
|
138
170
|
char* compressed, size_t* compressed_length);
|
|
171
|
+
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
|
|
172
|
+
char* compressed, size_t* compressed_length,
|
|
173
|
+
CompressionOptions options);
|
|
139
174
|
|
|
140
175
|
// Given data in "compressed[0..compressed_length-1]" generated by
|
|
141
176
|
// calling the Snappy::Compress routine, this routine
|
|
@@ -215,7 +250,7 @@ namespace snappy {
|
|
|
215
250
|
static constexpr int kMinHashTableBits = 8;
|
|
216
251
|
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
|
|
217
252
|
|
|
218
|
-
static constexpr int kMaxHashTableBits =
|
|
253
|
+
static constexpr int kMaxHashTableBits = 15;
|
|
219
254
|
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
|
|
220
255
|
} // end namespace snappy
|
|
221
256
|
|