snappy 0.0.17-java → 0.1.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +6 -1
- data/Gemfile +5 -0
- data/README.md +28 -4
- data/Rakefile +1 -0
- data/ext/extconf.rb +21 -16
- data/lib/snappy.rb +1 -0
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/hadoop/reader.rb +58 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/reader.rb +4 -4
- data/lib/snappy/version.rb +1 -1
- data/smoke.sh +1 -1
- data/snappy.gemspec +0 -4
- data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
- data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
- data/test/test-snappy-hadoop.rb +22 -0
- data/vendor/snappy/CMakeLists.txt +174 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/NEWS +32 -0
- data/vendor/snappy/{README → README.md} +13 -3
- data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/snappy-internal.h +9 -12
- data/vendor/snappy/snappy-stubs-internal.h +63 -30
- data/vendor/snappy/snappy-stubs-public.h.in +13 -19
- data/vendor/snappy/snappy-test.cc +10 -7
- data/vendor/snappy/snappy-test.h +12 -38
- data/vendor/snappy/snappy.cc +81 -30
- data/vendor/snappy/snappy_unittest.cc +16 -137
- metadata +18 -54
- data/vendor/snappy/ChangeLog +0 -2468
- data/vendor/snappy/Makefile.am +0 -26
- data/vendor/snappy/autogen.sh +0 -12
- data/vendor/snappy/configure.ac +0 -134
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/snappy.pc.in +0 -10
@@ -45,6 +45,14 @@
|
|
45
45
|
#include <sys/mman.h>
|
46
46
|
#endif
|
47
47
|
|
48
|
+
#ifdef HAVE_UNISTD_H
|
49
|
+
#include <unistd.h>
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#if defined(_MSC_VER)
|
53
|
+
#include <intrin.h>
|
54
|
+
#endif // defined(_MSC_VER)
|
55
|
+
|
48
56
|
#include "snappy-stubs-public.h"
|
49
57
|
|
50
58
|
#if defined(__x86_64__)
|
@@ -52,6 +60,14 @@
|
|
52
60
|
// Enable 64-bit optimized versions of some routines.
|
53
61
|
#define ARCH_K8 1
|
54
62
|
|
63
|
+
#elif defined(__ppc64__)
|
64
|
+
|
65
|
+
#define ARCH_PPC 1
|
66
|
+
|
67
|
+
#elif defined(__aarch64__)
|
68
|
+
|
69
|
+
#define ARCH_ARM 1
|
70
|
+
|
55
71
|
#endif
|
56
72
|
|
57
73
|
// Needed by OS X, among others.
|
@@ -59,10 +75,6 @@
|
|
59
75
|
#define MAP_ANONYMOUS MAP_ANON
|
60
76
|
#endif
|
61
77
|
|
62
|
-
// Pull in std::min, std::ostream, and the likes. This is safe because this
|
63
|
-
// header file is never used from any public header files.
|
64
|
-
using namespace std;
|
65
|
-
|
66
78
|
// The size of an array, if known at compile-time.
|
67
79
|
// Will give unexpected results if used on a pointer.
|
68
80
|
// We undefine it first, since some compilers already have a definition.
|
@@ -73,11 +85,11 @@ using namespace std;
|
|
73
85
|
|
74
86
|
// Static prediction hints.
|
75
87
|
#ifdef HAVE_BUILTIN_EXPECT
|
76
|
-
#define
|
77
|
-
#define
|
88
|
+
#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
89
|
+
#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
78
90
|
#else
|
79
|
-
#define
|
80
|
-
#define
|
91
|
+
#define SNAPPY_PREDICT_FALSE(x) x
|
92
|
+
#define SNAPPY_PREDICT_TRUE(x) x
|
81
93
|
#endif
|
82
94
|
|
83
95
|
// This is only used for recomputing the tag byte table used during
|
@@ -96,9 +108,10 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
|
|
96
108
|
|
97
109
|
// Potentially unaligned loads and stores.
|
98
110
|
|
99
|
-
// x86 and
|
111
|
+
// x86, PowerPC, and ARM64 can simply do these loads and stores native.
|
100
112
|
|
101
|
-
#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
|
113
|
+
#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
|
114
|
+
defined(__aarch64__)
|
102
115
|
|
103
116
|
#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
|
104
117
|
#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
|
@@ -225,22 +238,8 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
|
|
225
238
|
|
226
239
|
#endif
|
227
240
|
|
228
|
-
// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
|
229
|
-
// on some platforms, in particular ARM.
|
230
|
-
inline void UnalignedCopy64(const void *src, void *dst) {
|
231
|
-
if (sizeof(void *) == 8) {
|
232
|
-
UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
|
233
|
-
} else {
|
234
|
-
const char *src_char = reinterpret_cast<const char *>(src);
|
235
|
-
char *dst_char = reinterpret_cast<char *>(dst);
|
236
|
-
|
237
|
-
UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
|
238
|
-
UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
|
239
|
-
}
|
240
|
-
}
|
241
|
-
|
242
241
|
// The following guarantees declaration of the byte swap functions.
|
243
|
-
#
|
242
|
+
#if defined(SNAPPY_IS_BIG_ENDIAN)
|
244
243
|
|
245
244
|
#ifdef HAVE_SYS_BYTEORDER_H
|
246
245
|
#include <sys/byteorder.h>
|
@@ -297,7 +296,7 @@ inline uint64 bswap_64(uint64 x) {
|
|
297
296
|
|
298
297
|
#endif
|
299
298
|
|
300
|
-
#endif //
|
299
|
+
#endif // defined(SNAPPY_IS_BIG_ENDIAN)
|
301
300
|
|
302
301
|
// Convert to little-endian storage, opposite of network format.
|
303
302
|
// Convert x from host to little endian: x = LittleEndian.FromHost(x);
|
@@ -311,7 +310,7 @@ inline uint64 bswap_64(uint64 x) {
|
|
311
310
|
class LittleEndian {
|
312
311
|
public:
|
313
312
|
// Conversion functions.
|
314
|
-
#
|
313
|
+
#if defined(SNAPPY_IS_BIG_ENDIAN)
|
315
314
|
|
316
315
|
static uint16 FromHost16(uint16 x) { return bswap_16(x); }
|
317
316
|
static uint16 ToHost16(uint16 x) { return bswap_16(x); }
|
@@ -321,7 +320,7 @@ class LittleEndian {
|
|
321
320
|
|
322
321
|
static bool IsLittleEndian() { return false; }
|
323
322
|
|
324
|
-
#else // !defined(
|
323
|
+
#else // !defined(SNAPPY_IS_BIG_ENDIAN)
|
325
324
|
|
326
325
|
static uint16 FromHost16(uint16 x) { return x; }
|
327
326
|
static uint16 ToHost16(uint16 x) { return x; }
|
@@ -331,7 +330,7 @@ class LittleEndian {
|
|
331
330
|
|
332
331
|
static bool IsLittleEndian() { return true; }
|
333
332
|
|
334
|
-
#endif // !defined(
|
333
|
+
#endif // !defined(SNAPPY_IS_BIG_ENDIAN)
|
335
334
|
|
336
335
|
// Functions to do unaligned loads and stores in little-endian order.
|
337
336
|
static uint16 Load16(const void *p) {
|
@@ -361,10 +360,15 @@ class Bits {
|
|
361
360
|
// undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
|
362
361
|
// that it's 0-indexed.
|
363
362
|
static int FindLSBSetNonZero(uint32 n);
|
363
|
+
|
364
|
+
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
364
365
|
static int FindLSBSetNonZero64(uint64 n);
|
366
|
+
#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
365
367
|
|
366
368
|
private:
|
367
|
-
|
369
|
+
// No copying
|
370
|
+
Bits(const Bits&);
|
371
|
+
void operator=(const Bits&);
|
368
372
|
};
|
369
373
|
|
370
374
|
#ifdef HAVE_BUILTIN_CTZ
|
@@ -377,9 +381,36 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
|
|
377
381
|
return __builtin_ctz(n);
|
378
382
|
}
|
379
383
|
|
384
|
+
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
380
385
|
inline int Bits::FindLSBSetNonZero64(uint64 n) {
|
381
386
|
return __builtin_ctzll(n);
|
382
387
|
}
|
388
|
+
#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
389
|
+
|
390
|
+
#elif defined(_MSC_VER)
|
391
|
+
|
392
|
+
inline int Bits::Log2Floor(uint32 n) {
|
393
|
+
unsigned long where;
|
394
|
+
if (_BitScanReverse(&where, n)) {
|
395
|
+
return where;
|
396
|
+
} else {
|
397
|
+
return -1;
|
398
|
+
}
|
399
|
+
}
|
400
|
+
|
401
|
+
inline int Bits::FindLSBSetNonZero(uint32 n) {
|
402
|
+
unsigned long where;
|
403
|
+
if (_BitScanForward(&where, n)) return static_cast<int>(where);
|
404
|
+
return 32;
|
405
|
+
}
|
406
|
+
|
407
|
+
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
408
|
+
inline int Bits::FindLSBSetNonZero64(uint64 n) {
|
409
|
+
unsigned long where;
|
410
|
+
if (_BitScanForward64(&where, n)) return static_cast<int>(where);
|
411
|
+
return 64;
|
412
|
+
}
|
413
|
+
#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
383
414
|
|
384
415
|
#else // Portable versions.
|
385
416
|
|
@@ -413,6 +444,7 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
|
|
413
444
|
return rc;
|
414
445
|
}
|
415
446
|
|
447
|
+
#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
416
448
|
// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
|
417
449
|
inline int Bits::FindLSBSetNonZero64(uint64 n) {
|
418
450
|
const uint32 bottombits = static_cast<uint32>(n);
|
@@ -423,6 +455,7 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) {
|
|
423
455
|
return FindLSBSetNonZero(bottombits);
|
424
456
|
}
|
425
457
|
}
|
458
|
+
#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
|
426
459
|
|
427
460
|
#endif // End portable versions.
|
428
461
|
|
@@ -36,21 +36,21 @@
|
|
36
36
|
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
37
37
|
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
38
38
|
|
39
|
-
#if
|
39
|
+
#if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
|
40
40
|
#include <stdint.h>
|
41
|
-
#endif
|
41
|
+
#endif // HAVE_STDDEF_H
|
42
42
|
|
43
|
-
#if
|
43
|
+
#if ${HAVE_STDDEF_H_01} // HAVE_STDDEF_H
|
44
44
|
#include <stddef.h>
|
45
|
-
#endif
|
45
|
+
#endif // HAVE_STDDEF_H
|
46
46
|
|
47
|
-
#if
|
47
|
+
#if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H
|
48
48
|
#include <sys/uio.h>
|
49
|
-
#endif
|
49
|
+
#endif // HAVE_SYS_UIO_H
|
50
50
|
|
51
|
-
#define SNAPPY_MAJOR
|
52
|
-
#define SNAPPY_MINOR
|
53
|
-
#define SNAPPY_PATCHLEVEL
|
51
|
+
#define SNAPPY_MAJOR ${SNAPPY_MAJOR}
|
52
|
+
#define SNAPPY_MINOR ${SNAPPY_MINOR}
|
53
|
+
#define SNAPPY_PATCHLEVEL ${SNAPPY_PATCHLEVEL}
|
54
54
|
#define SNAPPY_VERSION \
|
55
55
|
((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
|
56
56
|
|
@@ -58,7 +58,7 @@
|
|
58
58
|
|
59
59
|
namespace snappy {
|
60
60
|
|
61
|
-
#if
|
61
|
+
#if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
|
62
62
|
typedef int8_t int8;
|
63
63
|
typedef uint8_t uint8;
|
64
64
|
typedef int16_t int16;
|
@@ -76,24 +76,18 @@ typedef int int32;
|
|
76
76
|
typedef unsigned int uint32;
|
77
77
|
typedef long long int64;
|
78
78
|
typedef unsigned long long uint64;
|
79
|
-
#endif
|
79
|
+
#endif // HAVE_STDINT_H
|
80
80
|
|
81
81
|
typedef std::string string;
|
82
82
|
|
83
|
-
#
|
84
|
-
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
|
85
|
-
TypeName(const TypeName&); \
|
86
|
-
void operator=(const TypeName&)
|
87
|
-
#endif
|
88
|
-
|
89
|
-
#if !@ac_cv_have_sys_uio_h@
|
83
|
+
#if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H
|
90
84
|
// Windows does not have an iovec type, yet the concept is universally useful.
|
91
85
|
// It is simple to define it ourselves, so we put it inside our own namespace.
|
92
86
|
struct iovec {
|
93
87
|
void* iov_base;
|
94
88
|
size_t iov_len;
|
95
89
|
};
|
96
|
-
#endif
|
90
|
+
#endif // !HAVE_SYS_UIO_H
|
97
91
|
|
98
92
|
} // namespace snappy
|
99
93
|
|
@@ -33,6 +33,9 @@
|
|
33
33
|
#endif
|
34
34
|
|
35
35
|
#ifdef HAVE_WINDOWS_H
|
36
|
+
// Needed to be able to use std::max without workarounds in the source code.
|
37
|
+
// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
|
38
|
+
#define NOMINMAX
|
36
39
|
#include <windows.h>
|
37
40
|
#endif
|
38
41
|
|
@@ -201,7 +204,7 @@ void Benchmark::Run() {
|
|
201
204
|
if (benchmark_real_time_us > 0) {
|
202
205
|
num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
|
203
206
|
}
|
204
|
-
num_iterations = max(num_iterations, kCalibrateIterations);
|
207
|
+
num_iterations = std::max(num_iterations, kCalibrateIterations);
|
205
208
|
BenchmarkRun benchmark_runs[kNumRuns];
|
206
209
|
|
207
210
|
for (int run = 0; run < kNumRuns; ++run) {
|
@@ -217,10 +220,10 @@ void Benchmark::Run() {
|
|
217
220
|
string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num);
|
218
221
|
string human_readable_speed;
|
219
222
|
|
220
|
-
nth_element(benchmark_runs,
|
221
|
-
|
222
|
-
|
223
|
-
|
223
|
+
std::nth_element(benchmark_runs,
|
224
|
+
benchmark_runs + kMedianPos,
|
225
|
+
benchmark_runs + kNumRuns,
|
226
|
+
BenchmarkCompareCPUTime());
|
224
227
|
int64 real_time_us = benchmark_runs[kMedianPos].real_time_us;
|
225
228
|
int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
|
226
229
|
if (cpu_time_us <= 0) {
|
@@ -523,8 +526,8 @@ int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
|
|
523
526
|
LOG(WARNING)
|
524
527
|
<< "UncompressChunkOrAll: Received some extra data, bytes total: "
|
525
528
|
<< uncomp_stream_.avail_in << " bytes: "
|
526
|
-
<< string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
|
527
|
-
|
529
|
+
<< std::string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
|
530
|
+
std::min(int(uncomp_stream_.avail_in), 20));
|
528
531
|
UncompressErrorInit();
|
529
532
|
return Z_DATA_ERROR; // what's the extra data for?
|
530
533
|
} else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
|
data/vendor/snappy/snappy-test.h
CHANGED
@@ -110,26 +110,8 @@
|
|
110
110
|
#include "lzo/lzo1x.h"
|
111
111
|
#endif
|
112
112
|
|
113
|
-
#ifdef HAVE_LIBLZF
|
114
|
-
extern "C" {
|
115
|
-
#include "lzf.h"
|
116
|
-
}
|
117
|
-
#endif
|
118
|
-
|
119
|
-
#ifdef HAVE_LIBFASTLZ
|
120
|
-
#include "fastlz.h"
|
121
|
-
#endif
|
122
|
-
|
123
|
-
#ifdef HAVE_LIBQUICKLZ
|
124
|
-
#include "quicklz.h"
|
125
|
-
#endif
|
126
|
-
|
127
113
|
namespace {
|
128
114
|
|
129
|
-
namespace File {
|
130
|
-
void Init() { }
|
131
|
-
} // namespace File
|
132
|
-
|
133
115
|
namespace file {
|
134
116
|
int Defaults() { return 0; }
|
135
117
|
|
@@ -138,7 +120,8 @@ namespace file {
|
|
138
120
|
void CheckSuccess() { }
|
139
121
|
};
|
140
122
|
|
141
|
-
DummyStatus GetContents(
|
123
|
+
DummyStatus GetContents(
|
124
|
+
const std::string& filename, std::string* data, int unused) {
|
142
125
|
FILE* fp = fopen(filename.c_str(), "rb");
|
143
126
|
if (fp == NULL) {
|
144
127
|
perror(filename.c_str());
|
@@ -153,7 +136,7 @@ namespace file {
|
|
153
136
|
perror("fread");
|
154
137
|
exit(1);
|
155
138
|
}
|
156
|
-
data->append(string(buf, ret));
|
139
|
+
data->append(std::string(buf, ret));
|
157
140
|
}
|
158
141
|
|
159
142
|
fclose(fp);
|
@@ -161,9 +144,8 @@ namespace file {
|
|
161
144
|
return DummyStatus();
|
162
145
|
}
|
163
146
|
|
164
|
-
DummyStatus SetContents(
|
165
|
-
|
166
|
-
int unused) {
|
147
|
+
inline DummyStatus SetContents(
|
148
|
+
const std::string& filename, const std::string& str, int unused) {
|
167
149
|
FILE* fp = fopen(filename.c_str(), "wb");
|
168
150
|
if (fp == NULL) {
|
169
151
|
perror(filename.c_str());
|
@@ -467,7 +449,7 @@ class ZLib {
|
|
467
449
|
|
468
450
|
DECLARE_bool(run_microbenchmarks);
|
469
451
|
|
470
|
-
static void RunSpecifiedBenchmarks() {
|
452
|
+
static inline void RunSpecifiedBenchmarks() {
|
471
453
|
if (!FLAGS_run_microbenchmarks) {
|
472
454
|
return;
|
473
455
|
}
|
@@ -515,10 +497,6 @@ static inline int RUN_ALL_TESTS() {
|
|
515
497
|
// For main().
|
516
498
|
namespace snappy {
|
517
499
|
|
518
|
-
static void CompressFile(const char* fname);
|
519
|
-
static void UncompressFile(const char* fname);
|
520
|
-
static void MeasureFile(const char* fname);
|
521
|
-
|
522
500
|
// Logging.
|
523
501
|
|
524
502
|
#define LOG(level) LogMessage()
|
@@ -529,15 +507,15 @@ class LogMessage {
|
|
529
507
|
public:
|
530
508
|
LogMessage() { }
|
531
509
|
~LogMessage() {
|
532
|
-
cerr << endl;
|
510
|
+
std::cerr << std::endl;
|
533
511
|
}
|
534
512
|
|
535
513
|
LogMessage& operator<<(const std::string& msg) {
|
536
|
-
cerr << msg;
|
514
|
+
std::cerr << msg;
|
537
515
|
return *this;
|
538
516
|
}
|
539
517
|
LogMessage& operator<<(int x) {
|
540
|
-
cerr << x;
|
518
|
+
std::cerr << x;
|
541
519
|
return *this;
|
542
520
|
}
|
543
521
|
};
|
@@ -546,7 +524,7 @@ class LogMessage {
|
|
546
524
|
// and ones that are always active.
|
547
525
|
|
548
526
|
#define CRASH_UNLESS(condition) \
|
549
|
-
|
527
|
+
SNAPPY_PREDICT_TRUE(condition) ? (void)0 : \
|
550
528
|
snappy::LogMessageVoidify() & snappy::LogMessageCrash()
|
551
529
|
|
552
530
|
#ifdef _MSC_VER
|
@@ -560,7 +538,7 @@ class LogMessageCrash : public LogMessage {
|
|
560
538
|
public:
|
561
539
|
LogMessageCrash() { }
|
562
540
|
~LogMessageCrash() {
|
563
|
-
cerr << endl;
|
541
|
+
std::cerr << std::endl;
|
564
542
|
abort();
|
565
543
|
}
|
566
544
|
};
|
@@ -590,10 +568,6 @@ class LogMessageVoidify {
|
|
590
568
|
#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
|
591
569
|
#define CHECK_OK(cond) (cond).CheckSuccess()
|
592
570
|
|
593
|
-
} // namespace
|
594
|
-
|
595
|
-
using snappy::CompressFile;
|
596
|
-
using snappy::UncompressFile;
|
597
|
-
using snappy::MeasureFile;
|
571
|
+
} // namespace snappy
|
598
572
|
|
599
573
|
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
|
data/vendor/snappy/snappy.cc
CHANGED
@@ -30,7 +30,16 @@
|
|
30
30
|
#include "snappy-internal.h"
|
31
31
|
#include "snappy-sinksource.h"
|
32
32
|
|
33
|
-
#
|
33
|
+
#ifndef SNAPPY_HAVE_SSE2
|
34
|
+
#if defined(__SSE2__) || defined(_M_X64) || \
|
35
|
+
(defined(_M_IX86_FP) && _M_IX86_FP >= 2)
|
36
|
+
#define SNAPPY_HAVE_SSE2 1
|
37
|
+
#else
|
38
|
+
#define SNAPPY_HAVE_SSE2 0
|
39
|
+
#endif
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#if SNAPPY_HAVE_SSE2
|
34
43
|
#include <emmintrin.h>
|
35
44
|
#endif
|
36
45
|
#include <stdio.h>
|
@@ -47,7 +56,6 @@ using internal::COPY_2_BYTE_OFFSET;
|
|
47
56
|
using internal::LITERAL;
|
48
57
|
using internal::char_table;
|
49
58
|
using internal::kMaximumTagLength;
|
50
|
-
using internal::wordmask;
|
51
59
|
|
52
60
|
// Any hash function will produce a valid compressed bitstream, but a good
|
53
61
|
// hash function reduces the number of collisions and thus yields better
|
@@ -89,17 +97,21 @@ size_t MaxCompressedLength(size_t source_len) {
|
|
89
97
|
namespace {
|
90
98
|
|
91
99
|
void UnalignedCopy64(const void* src, void* dst) {
|
92
|
-
|
100
|
+
char tmp[8];
|
101
|
+
memcpy(tmp, src, 8);
|
102
|
+
memcpy(dst, tmp, 8);
|
93
103
|
}
|
94
104
|
|
95
105
|
void UnalignedCopy128(const void* src, void* dst) {
|
96
106
|
// TODO(alkis): Remove this when we upgrade to a recent compiler that emits
|
97
107
|
// SSE2 moves for memcpy(dst, src, 16).
|
98
|
-
#
|
108
|
+
#if SNAPPY_HAVE_SSE2
|
99
109
|
__m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
|
100
110
|
_mm_storeu_si128(static_cast<__m128i*>(dst), x);
|
101
111
|
#else
|
102
|
-
|
112
|
+
char tmp[16];
|
113
|
+
memcpy(tmp, src, 16);
|
114
|
+
memcpy(dst, tmp, 16);
|
103
115
|
#endif
|
104
116
|
}
|
105
117
|
|
@@ -163,7 +175,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
163
175
|
// copying 2x 8 bytes at a time.
|
164
176
|
|
165
177
|
// Handle the uncommon case where pattern is less than 8 bytes.
|
166
|
-
if (
|
178
|
+
if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
|
167
179
|
// Expand pattern to at least 8 bytes. The worse case scenario in terms of
|
168
180
|
// buffer usage is when the pattern is size 3. ^ is the original position
|
169
181
|
// of op. x are irrelevant bytes copied by the last UnalignedCopy64.
|
@@ -173,13 +185,13 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
173
185
|
// abcabcabcabcxxxxx
|
174
186
|
// ^
|
175
187
|
// The last x is 14 bytes after ^.
|
176
|
-
if (
|
188
|
+
if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
|
177
189
|
while (pattern_size < 8) {
|
178
190
|
UnalignedCopy64(src, op);
|
179
191
|
op += pattern_size;
|
180
192
|
pattern_size *= 2;
|
181
193
|
}
|
182
|
-
if (
|
194
|
+
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
|
183
195
|
} else {
|
184
196
|
return IncrementalCopySlow(src, op, op_limit);
|
185
197
|
}
|
@@ -195,11 +207,11 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
|
|
195
207
|
UnalignedCopy64(src + 8, op + 8);
|
196
208
|
src += 16;
|
197
209
|
op += 16;
|
198
|
-
if (
|
210
|
+
if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
|
199
211
|
}
|
200
212
|
// We only take this branch if we didn't have enough slop and we can do a
|
201
213
|
// single 8 byte copy.
|
202
|
-
if (
|
214
|
+
if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
|
203
215
|
UnalignedCopy64(src, op);
|
204
216
|
src += 8;
|
205
217
|
op += 8;
|
@@ -261,7 +273,7 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
|
|
261
273
|
assert(offset < 65536);
|
262
274
|
assert(len_less_than_12 == (len < 12));
|
263
275
|
|
264
|
-
if (len_less_than_12 &&
|
276
|
+
if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
|
265
277
|
// offset fits in 11 bits. The 3 highest go in the top of the first byte,
|
266
278
|
// and the rest go in the second byte.
|
267
279
|
*op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
|
@@ -286,7 +298,7 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len,
|
|
286
298
|
// it's in the noise.
|
287
299
|
|
288
300
|
// Emit 64 byte copies but make sure to keep at least four bytes reserved.
|
289
|
-
while (
|
301
|
+
while (SNAPPY_PREDICT_FALSE(len >= 68)) {
|
290
302
|
op = EmitCopyAtMost64(op, offset, 64, false);
|
291
303
|
len -= 64;
|
292
304
|
}
|
@@ -415,7 +427,7 @@ char* CompressFragment(const char* input,
|
|
415
427
|
const char* next_emit = ip;
|
416
428
|
|
417
429
|
const size_t kInputMarginBytes = 15;
|
418
|
-
if (
|
430
|
+
if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
|
419
431
|
const char* ip_limit = input + input_size - kInputMarginBytes;
|
420
432
|
|
421
433
|
for (uint32 next_hash = Hash(++ip, shift); ; ) {
|
@@ -456,7 +468,7 @@ char* CompressFragment(const char* input,
|
|
456
468
|
uint32 bytes_between_hash_lookups = skip >> 5;
|
457
469
|
skip += bytes_between_hash_lookups;
|
458
470
|
next_ip = ip + bytes_between_hash_lookups;
|
459
|
-
if (
|
471
|
+
if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
|
460
472
|
goto emit_remainder;
|
461
473
|
}
|
462
474
|
next_hash = Hash(next_ip, shift);
|
@@ -465,8 +477,8 @@ char* CompressFragment(const char* input,
|
|
465
477
|
assert(candidate < ip);
|
466
478
|
|
467
479
|
table[hash] = ip - base_ip;
|
468
|
-
} while (
|
469
|
-
|
480
|
+
} while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
|
481
|
+
UNALIGNED_LOAD32(candidate)));
|
470
482
|
|
471
483
|
// Step 2: A 4-byte match has been found. We'll later see if more
|
472
484
|
// than 4 bytes match. But, prior to the match, input
|
@@ -497,7 +509,7 @@ char* CompressFragment(const char* input,
|
|
497
509
|
assert(0 == memcmp(base, candidate, matched));
|
498
510
|
op = EmitCopy(op, offset, matched, p.second);
|
499
511
|
next_emit = ip;
|
500
|
-
if (
|
512
|
+
if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
|
501
513
|
goto emit_remainder;
|
502
514
|
}
|
503
515
|
// We are now looking for a 4-byte match again. We read
|
@@ -527,6 +539,10 @@ char* CompressFragment(const char* input,
|
|
527
539
|
}
|
528
540
|
} // end namespace internal
|
529
541
|
|
542
|
+
// Called back at avery compression call to trace parameters and sizes.
|
543
|
+
static inline void Report(const char *algorithm, size_t compressed_size,
|
544
|
+
size_t uncompressed_size) {}
|
545
|
+
|
530
546
|
// Signature of output types needed by decompression code.
|
531
547
|
// The decompression code is templatized on a type that obeys this
|
532
548
|
// signature so that we do not pay virtual function call overhead in
|
@@ -567,6 +583,14 @@ char* CompressFragment(const char* input,
|
|
567
583
|
// bool TryFastAppend(const char* ip, size_t available, size_t length);
|
568
584
|
// };
|
569
585
|
|
586
|
+
namespace internal {
|
587
|
+
|
588
|
+
// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
|
589
|
+
static const uint32 wordmask[] = {
|
590
|
+
0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
|
591
|
+
};
|
592
|
+
|
593
|
+
} // end namespace internal
|
570
594
|
|
571
595
|
// Helper class for decompression
|
572
596
|
class SnappyDecompressor {
|
@@ -638,7 +662,16 @@ class SnappyDecompressor {
|
|
638
662
|
// For position-independent executables, accessing global arrays can be
|
639
663
|
// slow. Move wordmask array onto the stack to mitigate this.
|
640
664
|
uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
|
641
|
-
memcpy
|
665
|
+
// Do not use memcpy to copy internal::wordmask to
|
666
|
+
// wordmask. LLVM converts stack arrays to global arrays if it detects
|
667
|
+
// const stack arrays and this hurts the performance of position
|
668
|
+
// independent code. This change is temporary and can be reverted when
|
669
|
+
// https://reviews.llvm.org/D30759 is approved.
|
670
|
+
wordmask[0] = internal::wordmask[0];
|
671
|
+
wordmask[1] = internal::wordmask[1];
|
672
|
+
wordmask[2] = internal::wordmask[2];
|
673
|
+
wordmask[3] = internal::wordmask[3];
|
674
|
+
wordmask[4] = internal::wordmask[4];
|
642
675
|
|
643
676
|
// We could have put this refill fragment only at the beginning of the loop.
|
644
677
|
// However, duplicating it at the end of each branch gives the compiler more
|
@@ -652,6 +685,13 @@ class SnappyDecompressor {
|
|
652
685
|
}
|
653
686
|
|
654
687
|
MAYBE_REFILL();
|
688
|
+
// Add loop alignment directive. Without this directive, we observed
|
689
|
+
// significant performance degradation on several intel architectures
|
690
|
+
// in snappy benchmark built with LLVM. The degradation was caused by
|
691
|
+
// increased branch miss prediction.
|
692
|
+
#if defined(__clang__) && defined(__x86_64__)
|
693
|
+
asm volatile (".p2align 5");
|
694
|
+
#endif
|
655
695
|
for ( ;; ) {
|
656
696
|
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
|
657
697
|
|
@@ -667,7 +707,7 @@ class SnappyDecompressor {
|
|
667
707
|
// txt[1-4] 25% 75%
|
668
708
|
// pb 24% 76%
|
669
709
|
// bin 24% 76%
|
670
|
-
if (
|
710
|
+
if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
|
671
711
|
size_t literal_length = (c >> 2) + 1u;
|
672
712
|
if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
|
673
713
|
assert(literal_length < 61);
|
@@ -677,7 +717,7 @@ class SnappyDecompressor {
|
|
677
717
|
// bytes in addition to the literal.
|
678
718
|
continue;
|
679
719
|
}
|
680
|
-
if (
|
720
|
+
if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
|
681
721
|
// Long literal.
|
682
722
|
const size_t literal_length_length = literal_length - 60;
|
683
723
|
literal_length =
|
@@ -757,7 +797,7 @@ bool SnappyDecompressor::RefillTag() {
|
|
757
797
|
size_t length;
|
758
798
|
const char* src = reader_->Peek(&length);
|
759
799
|
if (length == 0) return false;
|
760
|
-
uint32 to_add = min<uint32>(needed - nbuf, length);
|
800
|
+
uint32 to_add = std::min<uint32>(needed - nbuf, length);
|
761
801
|
memcpy(scratch_ + nbuf, src, to_add);
|
762
802
|
nbuf += to_add;
|
763
803
|
reader_->Skip(to_add);
|
@@ -786,13 +826,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
|
|
786
826
|
SnappyDecompressor decompressor(r);
|
787
827
|
uint32 uncompressed_len = 0;
|
788
828
|
if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
|
789
|
-
|
829
|
+
|
830
|
+
return InternalUncompressAllTags(&decompressor, writer, r->Available(),
|
831
|
+
uncompressed_len);
|
790
832
|
}
|
791
833
|
|
792
834
|
template <typename Writer>
|
793
835
|
static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
|
794
836
|
Writer* writer,
|
837
|
+
uint32 compressed_len,
|
795
838
|
uint32 uncompressed_len) {
|
839
|
+
Report("snappy_uncompress", compressed_len, uncompressed_len);
|
840
|
+
|
796
841
|
writer->SetExpectedLength(uncompressed_len);
|
797
842
|
|
798
843
|
// Process the entire input
|
@@ -809,6 +854,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
|
|
809
854
|
size_t Compress(Source* reader, Sink* writer) {
|
810
855
|
size_t written = 0;
|
811
856
|
size_t N = reader->Available();
|
857
|
+
const size_t uncompressed_size = N;
|
812
858
|
char ulength[Varint::kMax32];
|
813
859
|
char* p = Varint::Encode32(ulength, N);
|
814
860
|
writer->Append(ulength, p-ulength);
|
@@ -823,7 +869,7 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
823
869
|
size_t fragment_size;
|
824
870
|
const char* fragment = reader->Peek(&fragment_size);
|
825
871
|
assert(fragment_size != 0); // premature end of input
|
826
|
-
const size_t num_to_read = min(N, kBlockSize);
|
872
|
+
const size_t num_to_read = std::min(N, kBlockSize);
|
827
873
|
size_t bytes_read = fragment_size;
|
828
874
|
|
829
875
|
size_t pending_advance = 0;
|
@@ -844,7 +890,7 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
844
890
|
|
845
891
|
while (bytes_read < num_to_read) {
|
846
892
|
fragment = reader->Peek(&fragment_size);
|
847
|
-
size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
|
893
|
+
size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
|
848
894
|
memcpy(scratch + bytes_read, fragment, n);
|
849
895
|
bytes_read += n;
|
850
896
|
reader->Skip(n);
|
@@ -881,6 +927,8 @@ size_t Compress(Source* reader, Sink* writer) {
|
|
881
927
|
reader->Skip(pending_advance);
|
882
928
|
}
|
883
929
|
|
930
|
+
Report("snappy_compress", written, uncompressed_size);
|
931
|
+
|
884
932
|
delete[] scratch;
|
885
933
|
delete[] scratch_output;
|
886
934
|
|
@@ -1313,7 +1361,8 @@ class SnappyScatteredWriter {
|
|
1313
1361
|
char* const op_end = op_ptr_ + len;
|
1314
1362
|
// See SnappyArrayWriter::AppendFromSelf for an explanation of
|
1315
1363
|
// the "offset - 1u" trick.
|
1316
|
-
if (
|
1364
|
+
if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
|
1365
|
+
op_end <= op_limit_)) {
|
1317
1366
|
// Fast path: src and dst in current block.
|
1318
1367
|
op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
|
1319
1368
|
return true;
|
@@ -1344,7 +1393,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
|
|
1344
1393
|
}
|
1345
1394
|
|
1346
1395
|
// Make new block
|
1347
|
-
size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
|
1396
|
+
size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
|
1348
1397
|
op_base_ = allocator_.Allocate(bsize);
|
1349
1398
|
op_ptr_ = op_base_;
|
1350
1399
|
op_limit_ = op_base_ + bsize;
|
@@ -1401,7 +1450,7 @@ class SnappySinkAllocator {
|
|
1401
1450
|
size_t size_written = 0;
|
1402
1451
|
size_t block_size;
|
1403
1452
|
for (int i = 0; i < blocks_.size(); ++i) {
|
1404
|
-
block_size = min<size_t>(blocks_[i].size, size - size_written);
|
1453
|
+
block_size = std::min<size_t>(blocks_[i].size, size - size_written);
|
1405
1454
|
dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
|
1406
1455
|
&SnappySinkAllocator::Deleter, NULL);
|
1407
1456
|
size_written += block_size;
|
@@ -1446,18 +1495,20 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
|
|
1446
1495
|
char* buf = uncompressed->GetAppendBufferVariable(
|
1447
1496
|
1, uncompressed_len, &c, 1, &allocated_size);
|
1448
1497
|
|
1498
|
+
const size_t compressed_len = compressed->Available();
|
1449
1499
|
// If we can get a flat buffer, then use it, otherwise do block by block
|
1450
1500
|
// uncompression
|
1451
1501
|
if (allocated_size >= uncompressed_len) {
|
1452
1502
|
SnappyArrayWriter writer(buf);
|
1453
|
-
bool result = InternalUncompressAllTags(
|
1454
|
-
|
1503
|
+
bool result = InternalUncompressAllTags(&decompressor, &writer,
|
1504
|
+
compressed_len, uncompressed_len);
|
1455
1505
|
uncompressed->Append(buf, writer.Produced());
|
1456
1506
|
return result;
|
1457
1507
|
} else {
|
1458
1508
|
SnappySinkAllocator allocator(uncompressed);
|
1459
1509
|
SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
|
1460
|
-
return InternalUncompressAllTags(&decompressor, &writer,
|
1510
|
+
return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
|
1511
|
+
uncompressed_len);
|
1461
1512
|
}
|
1462
1513
|
}
|
1463
1514
|
|