snappy 0.0.17-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -45,6 +45,14 @@
45
45
  #include <sys/mman.h>
46
46
  #endif
47
47
 
48
+ #ifdef HAVE_UNISTD_H
49
+ #include <unistd.h>
50
+ #endif
51
+
52
+ #if defined(_MSC_VER)
53
+ #include <intrin.h>
54
+ #endif // defined(_MSC_VER)
55
+
48
56
  #include "snappy-stubs-public.h"
49
57
 
50
58
  #if defined(__x86_64__)
@@ -52,6 +60,14 @@
52
60
  // Enable 64-bit optimized versions of some routines.
53
61
  #define ARCH_K8 1
54
62
 
63
+ #elif defined(__ppc64__)
64
+
65
+ #define ARCH_PPC 1
66
+
67
+ #elif defined(__aarch64__)
68
+
69
+ #define ARCH_ARM 1
70
+
55
71
  #endif
56
72
 
57
73
  // Needed by OS X, among others.
@@ -59,10 +75,6 @@
59
75
  #define MAP_ANONYMOUS MAP_ANON
60
76
  #endif
61
77
 
62
- // Pull in std::min, std::ostream, and the likes. This is safe because this
63
- // header file is never used from any public header files.
64
- using namespace std;
65
-
66
78
  // The size of an array, if known at compile-time.
67
79
  // Will give unexpected results if used on a pointer.
68
80
  // We undefine it first, since some compilers already have a definition.
@@ -73,11 +85,11 @@ using namespace std;
73
85
 
74
86
  // Static prediction hints.
75
87
  #ifdef HAVE_BUILTIN_EXPECT
76
- #define PREDICT_FALSE(x) (__builtin_expect(x, 0))
77
- #define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
88
+ #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
89
+ #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
78
90
  #else
79
- #define PREDICT_FALSE(x) x
80
- #define PREDICT_TRUE(x) x
91
+ #define SNAPPY_PREDICT_FALSE(x) x
92
+ #define SNAPPY_PREDICT_TRUE(x) x
81
93
  #endif
82
94
 
83
95
  // This is only used for recomputing the tag byte table used during
@@ -96,9 +108,10 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
96
108
 
97
109
  // Potentially unaligned loads and stores.
98
110
 
99
- // x86 and PowerPC can simply do these loads and stores native.
111
+ // x86, PowerPC, and ARM64 can simply do these loads and stores native.
100
112
 
101
- #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
113
+ #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
114
+ defined(__aarch64__)
102
115
 
103
116
  #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
104
117
  #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
@@ -225,22 +238,8 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
225
238
 
226
239
  #endif
227
240
 
228
- // This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
229
- // on some platforms, in particular ARM.
230
- inline void UnalignedCopy64(const void *src, void *dst) {
231
- if (sizeof(void *) == 8) {
232
- UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
233
- } else {
234
- const char *src_char = reinterpret_cast<const char *>(src);
235
- char *dst_char = reinterpret_cast<char *>(dst);
236
-
237
- UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
238
- UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
239
- }
240
- }
241
-
242
241
  // The following guarantees declaration of the byte swap functions.
243
- #ifdef WORDS_BIGENDIAN
242
+ #if defined(SNAPPY_IS_BIG_ENDIAN)
244
243
 
245
244
  #ifdef HAVE_SYS_BYTEORDER_H
246
245
  #include <sys/byteorder.h>
@@ -297,7 +296,7 @@ inline uint64 bswap_64(uint64 x) {
297
296
 
298
297
  #endif
299
298
 
300
- #endif // WORDS_BIGENDIAN
299
+ #endif // defined(SNAPPY_IS_BIG_ENDIAN)
301
300
 
302
301
  // Convert to little-endian storage, opposite of network format.
303
302
  // Convert x from host to little endian: x = LittleEndian.FromHost(x);
@@ -311,7 +310,7 @@ inline uint64 bswap_64(uint64 x) {
311
310
  class LittleEndian {
312
311
  public:
313
312
  // Conversion functions.
314
- #ifdef WORDS_BIGENDIAN
313
+ #if defined(SNAPPY_IS_BIG_ENDIAN)
315
314
 
316
315
  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
317
316
  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
@@ -321,7 +320,7 @@ class LittleEndian {
321
320
 
322
321
  static bool IsLittleEndian() { return false; }
323
322
 
324
- #else // !defined(WORDS_BIGENDIAN)
323
+ #else // !defined(SNAPPY_IS_BIG_ENDIAN)
325
324
 
326
325
  static uint16 FromHost16(uint16 x) { return x; }
327
326
  static uint16 ToHost16(uint16 x) { return x; }
@@ -331,7 +330,7 @@ class LittleEndian {
331
330
 
332
331
  static bool IsLittleEndian() { return true; }
333
332
 
334
- #endif // !defined(WORDS_BIGENDIAN)
333
+ #endif // !defined(SNAPPY_IS_BIG_ENDIAN)
335
334
 
336
335
  // Functions to do unaligned loads and stores in little-endian order.
337
336
  static uint16 Load16(const void *p) {
@@ -361,10 +360,15 @@ class Bits {
361
360
  // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
362
361
  // that it's 0-indexed.
363
362
  static int FindLSBSetNonZero(uint32 n);
363
+
364
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
364
365
  static int FindLSBSetNonZero64(uint64 n);
366
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
365
367
 
366
368
  private:
367
- DISALLOW_COPY_AND_ASSIGN(Bits);
369
+ // No copying
370
+ Bits(const Bits&);
371
+ void operator=(const Bits&);
368
372
  };
369
373
 
370
374
  #ifdef HAVE_BUILTIN_CTZ
@@ -377,9 +381,36 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
377
381
  return __builtin_ctz(n);
378
382
  }
379
383
 
384
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
380
385
  inline int Bits::FindLSBSetNonZero64(uint64 n) {
381
386
  return __builtin_ctzll(n);
382
387
  }
388
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
389
+
390
+ #elif defined(_MSC_VER)
391
+
392
+ inline int Bits::Log2Floor(uint32 n) {
393
+ unsigned long where;
394
+ if (_BitScanReverse(&where, n)) {
395
+ return where;
396
+ } else {
397
+ return -1;
398
+ }
399
+ }
400
+
401
+ inline int Bits::FindLSBSetNonZero(uint32 n) {
402
+ unsigned long where;
403
+ if (_BitScanForward(&where, n)) return static_cast<int>(where);
404
+ return 32;
405
+ }
406
+
407
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
408
+ inline int Bits::FindLSBSetNonZero64(uint64 n) {
409
+ unsigned long where;
410
+ if (_BitScanForward64(&where, n)) return static_cast<int>(where);
411
+ return 64;
412
+ }
413
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
383
414
 
384
415
  #else // Portable versions.
385
416
 
@@ -413,6 +444,7 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
413
444
  return rc;
414
445
  }
415
446
 
447
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
416
448
  // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
417
449
  inline int Bits::FindLSBSetNonZero64(uint64 n) {
418
450
  const uint32 bottombits = static_cast<uint32>(n);
@@ -423,6 +455,7 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) {
423
455
  return FindLSBSetNonZero(bottombits);
424
456
  }
425
457
  }
458
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
426
459
 
427
460
  #endif // End portable versions.
428
461
 
@@ -36,21 +36,21 @@
36
36
  #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
37
  #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
38
38
 
39
- #if @ac_cv_have_stdint_h@
39
+ #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
40
40
  #include <stdint.h>
41
- #endif
41
+ #endif // HAVE_STDDEF_H
42
42
 
43
- #if @ac_cv_have_stddef_h@
43
+ #if ${HAVE_STDDEF_H_01} // HAVE_STDDEF_H
44
44
  #include <stddef.h>
45
- #endif
45
+ #endif // HAVE_STDDEF_H
46
46
 
47
- #if @ac_cv_have_sys_uio_h@
47
+ #if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H
48
48
  #include <sys/uio.h>
49
- #endif
49
+ #endif // HAVE_SYS_UIO_H
50
50
 
51
- #define SNAPPY_MAJOR @SNAPPY_MAJOR@
52
- #define SNAPPY_MINOR @SNAPPY_MINOR@
53
- #define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@
51
+ #define SNAPPY_MAJOR ${SNAPPY_MAJOR}
52
+ #define SNAPPY_MINOR ${SNAPPY_MINOR}
53
+ #define SNAPPY_PATCHLEVEL ${SNAPPY_PATCHLEVEL}
54
54
  #define SNAPPY_VERSION \
55
55
  ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
56
56
 
@@ -58,7 +58,7 @@
58
58
 
59
59
  namespace snappy {
60
60
 
61
- #if @ac_cv_have_stdint_h@
61
+ #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
62
62
  typedef int8_t int8;
63
63
  typedef uint8_t uint8;
64
64
  typedef int16_t int16;
@@ -76,24 +76,18 @@ typedef int int32;
76
76
  typedef unsigned int uint32;
77
77
  typedef long long int64;
78
78
  typedef unsigned long long uint64;
79
- #endif
79
+ #endif // HAVE_STDINT_H
80
80
 
81
81
  typedef std::string string;
82
82
 
83
- #ifndef DISALLOW_COPY_AND_ASSIGN
84
- #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
85
- TypeName(const TypeName&); \
86
- void operator=(const TypeName&)
87
- #endif
88
-
89
- #if !@ac_cv_have_sys_uio_h@
83
+ #if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H
90
84
  // Windows does not have an iovec type, yet the concept is universally useful.
91
85
  // It is simple to define it ourselves, so we put it inside our own namespace.
92
86
  struct iovec {
93
87
  void* iov_base;
94
88
  size_t iov_len;
95
89
  };
96
- #endif
90
+ #endif // !HAVE_SYS_UIO_H
97
91
 
98
92
  } // namespace snappy
99
93
 
@@ -33,6 +33,9 @@
33
33
  #endif
34
34
 
35
35
  #ifdef HAVE_WINDOWS_H
36
+ // Needed to be able to use std::max without workarounds in the source code.
37
+ // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
38
+ #define NOMINMAX
36
39
  #include <windows.h>
37
40
  #endif
38
41
 
@@ -201,7 +204,7 @@ void Benchmark::Run() {
201
204
  if (benchmark_real_time_us > 0) {
202
205
  num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
203
206
  }
204
- num_iterations = max(num_iterations, kCalibrateIterations);
207
+ num_iterations = std::max(num_iterations, kCalibrateIterations);
205
208
  BenchmarkRun benchmark_runs[kNumRuns];
206
209
 
207
210
  for (int run = 0; run < kNumRuns; ++run) {
@@ -217,10 +220,10 @@ void Benchmark::Run() {
217
220
  string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num);
218
221
  string human_readable_speed;
219
222
 
220
- nth_element(benchmark_runs,
221
- benchmark_runs + kMedianPos,
222
- benchmark_runs + kNumRuns,
223
- BenchmarkCompareCPUTime());
223
+ std::nth_element(benchmark_runs,
224
+ benchmark_runs + kMedianPos,
225
+ benchmark_runs + kNumRuns,
226
+ BenchmarkCompareCPUTime());
224
227
  int64 real_time_us = benchmark_runs[kMedianPos].real_time_us;
225
228
  int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
226
229
  if (cpu_time_us <= 0) {
@@ -523,8 +526,8 @@ int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
523
526
  LOG(WARNING)
524
527
  << "UncompressChunkOrAll: Received some extra data, bytes total: "
525
528
  << uncomp_stream_.avail_in << " bytes: "
526
- << string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
527
- min(int(uncomp_stream_.avail_in), 20));
529
+ << std::string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
530
+ std::min(int(uncomp_stream_.avail_in), 20));
528
531
  UncompressErrorInit();
529
532
  return Z_DATA_ERROR; // what's the extra data for?
530
533
  } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
@@ -110,26 +110,8 @@
110
110
  #include "lzo/lzo1x.h"
111
111
  #endif
112
112
 
113
- #ifdef HAVE_LIBLZF
114
- extern "C" {
115
- #include "lzf.h"
116
- }
117
- #endif
118
-
119
- #ifdef HAVE_LIBFASTLZ
120
- #include "fastlz.h"
121
- #endif
122
-
123
- #ifdef HAVE_LIBQUICKLZ
124
- #include "quicklz.h"
125
- #endif
126
-
127
113
  namespace {
128
114
 
129
- namespace File {
130
- void Init() { }
131
- } // namespace File
132
-
133
115
  namespace file {
134
116
  int Defaults() { return 0; }
135
117
 
@@ -138,7 +120,8 @@ namespace file {
138
120
  void CheckSuccess() { }
139
121
  };
140
122
 
141
- DummyStatus GetContents(const string& filename, string* data, int unused) {
123
+ DummyStatus GetContents(
124
+ const std::string& filename, std::string* data, int unused) {
142
125
  FILE* fp = fopen(filename.c_str(), "rb");
143
126
  if (fp == NULL) {
144
127
  perror(filename.c_str());
@@ -153,7 +136,7 @@ namespace file {
153
136
  perror("fread");
154
137
  exit(1);
155
138
  }
156
- data->append(string(buf, ret));
139
+ data->append(std::string(buf, ret));
157
140
  }
158
141
 
159
142
  fclose(fp);
@@ -161,9 +144,8 @@ namespace file {
161
144
  return DummyStatus();
162
145
  }
163
146
 
164
- DummyStatus SetContents(const string& filename,
165
- const string& str,
166
- int unused) {
147
+ inline DummyStatus SetContents(
148
+ const std::string& filename, const std::string& str, int unused) {
167
149
  FILE* fp = fopen(filename.c_str(), "wb");
168
150
  if (fp == NULL) {
169
151
  perror(filename.c_str());
@@ -467,7 +449,7 @@ class ZLib {
467
449
 
468
450
  DECLARE_bool(run_microbenchmarks);
469
451
 
470
- static void RunSpecifiedBenchmarks() {
452
+ static inline void RunSpecifiedBenchmarks() {
471
453
  if (!FLAGS_run_microbenchmarks) {
472
454
  return;
473
455
  }
@@ -515,10 +497,6 @@ static inline int RUN_ALL_TESTS() {
515
497
  // For main().
516
498
  namespace snappy {
517
499
 
518
- static void CompressFile(const char* fname);
519
- static void UncompressFile(const char* fname);
520
- static void MeasureFile(const char* fname);
521
-
522
500
  // Logging.
523
501
 
524
502
  #define LOG(level) LogMessage()
@@ -529,15 +507,15 @@ class LogMessage {
529
507
  public:
530
508
  LogMessage() { }
531
509
  ~LogMessage() {
532
- cerr << endl;
510
+ std::cerr << std::endl;
533
511
  }
534
512
 
535
513
  LogMessage& operator<<(const std::string& msg) {
536
- cerr << msg;
514
+ std::cerr << msg;
537
515
  return *this;
538
516
  }
539
517
  LogMessage& operator<<(int x) {
540
- cerr << x;
518
+ std::cerr << x;
541
519
  return *this;
542
520
  }
543
521
  };
@@ -546,7 +524,7 @@ class LogMessage {
546
524
  // and ones that are always active.
547
525
 
548
526
  #define CRASH_UNLESS(condition) \
549
- PREDICT_TRUE(condition) ? (void)0 : \
527
+ SNAPPY_PREDICT_TRUE(condition) ? (void)0 : \
550
528
  snappy::LogMessageVoidify() & snappy::LogMessageCrash()
551
529
 
552
530
  #ifdef _MSC_VER
@@ -560,7 +538,7 @@ class LogMessageCrash : public LogMessage {
560
538
  public:
561
539
  LogMessageCrash() { }
562
540
  ~LogMessageCrash() {
563
- cerr << endl;
541
+ std::cerr << std::endl;
564
542
  abort();
565
543
  }
566
544
  };
@@ -590,10 +568,6 @@ class LogMessageVoidify {
590
568
  #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
591
569
  #define CHECK_OK(cond) (cond).CheckSuccess()
592
570
 
593
- } // namespace
594
-
595
- using snappy::CompressFile;
596
- using snappy::UncompressFile;
597
- using snappy::MeasureFile;
571
+ } // namespace snappy
598
572
 
599
573
  #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
@@ -30,7 +30,16 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
- #if defined(__x86_64__) || defined(_M_X64)
33
+ #ifndef SNAPPY_HAVE_SSE2
34
+ #if defined(__SSE2__) || defined(_M_X64) || \
35
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
36
+ #define SNAPPY_HAVE_SSE2 1
37
+ #else
38
+ #define SNAPPY_HAVE_SSE2 0
39
+ #endif
40
+ #endif
41
+
42
+ #if SNAPPY_HAVE_SSE2
34
43
  #include <emmintrin.h>
35
44
  #endif
36
45
  #include <stdio.h>
@@ -47,7 +56,6 @@ using internal::COPY_2_BYTE_OFFSET;
47
56
  using internal::LITERAL;
48
57
  using internal::char_table;
49
58
  using internal::kMaximumTagLength;
50
- using internal::wordmask;
51
59
 
52
60
  // Any hash function will produce a valid compressed bitstream, but a good
53
61
  // hash function reduces the number of collisions and thus yields better
@@ -89,17 +97,21 @@ size_t MaxCompressedLength(size_t source_len) {
89
97
  namespace {
90
98
 
91
99
  void UnalignedCopy64(const void* src, void* dst) {
92
- memcpy(dst, src, 8);
100
+ char tmp[8];
101
+ memcpy(tmp, src, 8);
102
+ memcpy(dst, tmp, 8);
93
103
  }
94
104
 
95
105
  void UnalignedCopy128(const void* src, void* dst) {
96
106
  // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
97
107
  // SSE2 moves for memcpy(dst, src, 16).
98
- #ifdef __SSE2__
108
+ #if SNAPPY_HAVE_SSE2
99
109
  __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
100
110
  _mm_storeu_si128(static_cast<__m128i*>(dst), x);
101
111
  #else
102
- memcpy(dst, src, 16);
112
+ char tmp[16];
113
+ memcpy(tmp, src, 16);
114
+ memcpy(dst, tmp, 16);
103
115
  #endif
104
116
  }
105
117
 
@@ -163,7 +175,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
163
175
  // copying 2x 8 bytes at a time.
164
176
 
165
177
  // Handle the uncommon case where pattern is less than 8 bytes.
166
- if (PREDICT_FALSE(pattern_size < 8)) {
178
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
167
179
  // Expand pattern to at least 8 bytes. The worse case scenario in terms of
168
180
  // buffer usage is when the pattern is size 3. ^ is the original position
169
181
  // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
@@ -173,13 +185,13 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
173
185
  // abcabcabcabcxxxxx
174
186
  // ^
175
187
  // The last x is 14 bytes after ^.
176
- if (PREDICT_TRUE(op <= buf_limit - 14)) {
188
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
177
189
  while (pattern_size < 8) {
178
190
  UnalignedCopy64(src, op);
179
191
  op += pattern_size;
180
192
  pattern_size *= 2;
181
193
  }
182
- if (PREDICT_TRUE(op >= op_limit)) return op_limit;
194
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
183
195
  } else {
184
196
  return IncrementalCopySlow(src, op, op_limit);
185
197
  }
@@ -195,11 +207,11 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
195
207
  UnalignedCopy64(src + 8, op + 8);
196
208
  src += 16;
197
209
  op += 16;
198
- if (PREDICT_TRUE(op >= op_limit)) return op_limit;
210
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
199
211
  }
200
212
  // We only take this branch if we didn't have enough slop and we can do a
201
213
  // single 8 byte copy.
202
- if (PREDICT_FALSE(op <= buf_limit - 8)) {
214
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
203
215
  UnalignedCopy64(src, op);
204
216
  src += 8;
205
217
  op += 8;
@@ -261,7 +273,7 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
261
273
  assert(offset < 65536);
262
274
  assert(len_less_than_12 == (len < 12));
263
275
 
264
- if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
276
+ if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
265
277
  // offset fits in 11 bits. The 3 highest go in the top of the first byte,
266
278
  // and the rest go in the second byte.
267
279
  *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
@@ -286,7 +298,7 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len,
286
298
  // it's in the noise.
287
299
 
288
300
  // Emit 64 byte copies but make sure to keep at least four bytes reserved.
289
- while (PREDICT_FALSE(len >= 68)) {
301
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
290
302
  op = EmitCopyAtMost64(op, offset, 64, false);
291
303
  len -= 64;
292
304
  }
@@ -415,7 +427,7 @@ char* CompressFragment(const char* input,
415
427
  const char* next_emit = ip;
416
428
 
417
429
  const size_t kInputMarginBytes = 15;
418
- if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
430
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
419
431
  const char* ip_limit = input + input_size - kInputMarginBytes;
420
432
 
421
433
  for (uint32 next_hash = Hash(++ip, shift); ; ) {
@@ -456,7 +468,7 @@ char* CompressFragment(const char* input,
456
468
  uint32 bytes_between_hash_lookups = skip >> 5;
457
469
  skip += bytes_between_hash_lookups;
458
470
  next_ip = ip + bytes_between_hash_lookups;
459
- if (PREDICT_FALSE(next_ip > ip_limit)) {
471
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
460
472
  goto emit_remainder;
461
473
  }
462
474
  next_hash = Hash(next_ip, shift);
@@ -465,8 +477,8 @@ char* CompressFragment(const char* input,
465
477
  assert(candidate < ip);
466
478
 
467
479
  table[hash] = ip - base_ip;
468
- } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
469
- UNALIGNED_LOAD32(candidate)));
480
+ } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
481
+ UNALIGNED_LOAD32(candidate)));
470
482
 
471
483
  // Step 2: A 4-byte match has been found. We'll later see if more
472
484
  // than 4 bytes match. But, prior to the match, input
@@ -497,7 +509,7 @@ char* CompressFragment(const char* input,
497
509
  assert(0 == memcmp(base, candidate, matched));
498
510
  op = EmitCopy(op, offset, matched, p.second);
499
511
  next_emit = ip;
500
- if (PREDICT_FALSE(ip >= ip_limit)) {
512
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
501
513
  goto emit_remainder;
502
514
  }
503
515
  // We are now looking for a 4-byte match again. We read
@@ -527,6 +539,10 @@ char* CompressFragment(const char* input,
527
539
  }
528
540
  } // end namespace internal
529
541
 
542
+ // Called back at avery compression call to trace parameters and sizes.
543
+ static inline void Report(const char *algorithm, size_t compressed_size,
544
+ size_t uncompressed_size) {}
545
+
530
546
  // Signature of output types needed by decompression code.
531
547
  // The decompression code is templatized on a type that obeys this
532
548
  // signature so that we do not pay virtual function call overhead in
@@ -567,6 +583,14 @@ char* CompressFragment(const char* input,
567
583
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
568
584
  // };
569
585
 
586
+ namespace internal {
587
+
588
+ // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
589
+ static const uint32 wordmask[] = {
590
+ 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
591
+ };
592
+
593
+ } // end namespace internal
570
594
 
571
595
  // Helper class for decompression
572
596
  class SnappyDecompressor {
@@ -638,7 +662,16 @@ class SnappyDecompressor {
638
662
  // For position-independent executables, accessing global arrays can be
639
663
  // slow. Move wordmask array onto the stack to mitigate this.
640
664
  uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
641
- memcpy(wordmask, internal::wordmask, sizeof(wordmask));
665
+ // Do not use memcpy to copy internal::wordmask to
666
+ // wordmask. LLVM converts stack arrays to global arrays if it detects
667
+ // const stack arrays and this hurts the performance of position
668
+ // independent code. This change is temporary and can be reverted when
669
+ // https://reviews.llvm.org/D30759 is approved.
670
+ wordmask[0] = internal::wordmask[0];
671
+ wordmask[1] = internal::wordmask[1];
672
+ wordmask[2] = internal::wordmask[2];
673
+ wordmask[3] = internal::wordmask[3];
674
+ wordmask[4] = internal::wordmask[4];
642
675
 
643
676
  // We could have put this refill fragment only at the beginning of the loop.
644
677
  // However, duplicating it at the end of each branch gives the compiler more
@@ -652,6 +685,13 @@ class SnappyDecompressor {
652
685
  }
653
686
 
654
687
  MAYBE_REFILL();
688
+ // Add loop alignment directive. Without this directive, we observed
689
+ // significant performance degradation on several intel architectures
690
+ // in snappy benchmark built with LLVM. The degradation was caused by
691
+ // increased branch miss prediction.
692
+ #if defined(__clang__) && defined(__x86_64__)
693
+ asm volatile (".p2align 5");
694
+ #endif
655
695
  for ( ;; ) {
656
696
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
657
697
 
@@ -667,7 +707,7 @@ class SnappyDecompressor {
667
707
  // txt[1-4] 25% 75%
668
708
  // pb 24% 76%
669
709
  // bin 24% 76%
670
- if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
710
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
671
711
  size_t literal_length = (c >> 2) + 1u;
672
712
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
673
713
  assert(literal_length < 61);
@@ -677,7 +717,7 @@ class SnappyDecompressor {
677
717
  // bytes in addition to the literal.
678
718
  continue;
679
719
  }
680
- if (PREDICT_FALSE(literal_length >= 61)) {
720
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
681
721
  // Long literal.
682
722
  const size_t literal_length_length = literal_length - 60;
683
723
  literal_length =
@@ -757,7 +797,7 @@ bool SnappyDecompressor::RefillTag() {
757
797
  size_t length;
758
798
  const char* src = reader_->Peek(&length);
759
799
  if (length == 0) return false;
760
- uint32 to_add = min<uint32>(needed - nbuf, length);
800
+ uint32 to_add = std::min<uint32>(needed - nbuf, length);
761
801
  memcpy(scratch_ + nbuf, src, to_add);
762
802
  nbuf += to_add;
763
803
  reader_->Skip(to_add);
@@ -786,13 +826,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
786
826
  SnappyDecompressor decompressor(r);
787
827
  uint32 uncompressed_len = 0;
788
828
  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
789
- return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
829
+
830
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
831
+ uncompressed_len);
790
832
  }
791
833
 
792
834
  template <typename Writer>
793
835
  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
794
836
  Writer* writer,
837
+ uint32 compressed_len,
795
838
  uint32 uncompressed_len) {
839
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
840
+
796
841
  writer->SetExpectedLength(uncompressed_len);
797
842
 
798
843
  // Process the entire input
@@ -809,6 +854,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
809
854
  size_t Compress(Source* reader, Sink* writer) {
810
855
  size_t written = 0;
811
856
  size_t N = reader->Available();
857
+ const size_t uncompressed_size = N;
812
858
  char ulength[Varint::kMax32];
813
859
  char* p = Varint::Encode32(ulength, N);
814
860
  writer->Append(ulength, p-ulength);
@@ -823,7 +869,7 @@ size_t Compress(Source* reader, Sink* writer) {
823
869
  size_t fragment_size;
824
870
  const char* fragment = reader->Peek(&fragment_size);
825
871
  assert(fragment_size != 0); // premature end of input
826
- const size_t num_to_read = min(N, kBlockSize);
872
+ const size_t num_to_read = std::min(N, kBlockSize);
827
873
  size_t bytes_read = fragment_size;
828
874
 
829
875
  size_t pending_advance = 0;
@@ -844,7 +890,7 @@ size_t Compress(Source* reader, Sink* writer) {
844
890
 
845
891
  while (bytes_read < num_to_read) {
846
892
  fragment = reader->Peek(&fragment_size);
847
- size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
893
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
848
894
  memcpy(scratch + bytes_read, fragment, n);
849
895
  bytes_read += n;
850
896
  reader->Skip(n);
@@ -881,6 +927,8 @@ size_t Compress(Source* reader, Sink* writer) {
881
927
  reader->Skip(pending_advance);
882
928
  }
883
929
 
930
+ Report("snappy_compress", written, uncompressed_size);
931
+
884
932
  delete[] scratch;
885
933
  delete[] scratch_output;
886
934
 
@@ -1313,7 +1361,8 @@ class SnappyScatteredWriter {
1313
1361
  char* const op_end = op_ptr_ + len;
1314
1362
  // See SnappyArrayWriter::AppendFromSelf for an explanation of
1315
1363
  // the "offset - 1u" trick.
1316
- if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
1364
+ if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
1365
+ op_end <= op_limit_)) {
1317
1366
  // Fast path: src and dst in current block.
1318
1367
  op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1319
1368
  return true;
@@ -1344,7 +1393,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1344
1393
  }
1345
1394
 
1346
1395
  // Make new block
1347
- size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1396
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1348
1397
  op_base_ = allocator_.Allocate(bsize);
1349
1398
  op_ptr_ = op_base_;
1350
1399
  op_limit_ = op_base_ + bsize;
@@ -1401,7 +1450,7 @@ class SnappySinkAllocator {
1401
1450
  size_t size_written = 0;
1402
1451
  size_t block_size;
1403
1452
  for (int i = 0; i < blocks_.size(); ++i) {
1404
- block_size = min<size_t>(blocks_[i].size, size - size_written);
1453
+ block_size = std::min<size_t>(blocks_[i].size, size - size_written);
1405
1454
  dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1406
1455
  &SnappySinkAllocator::Deleter, NULL);
1407
1456
  size_written += block_size;
@@ -1446,18 +1495,20 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
1446
1495
  char* buf = uncompressed->GetAppendBufferVariable(
1447
1496
  1, uncompressed_len, &c, 1, &allocated_size);
1448
1497
 
1498
+ const size_t compressed_len = compressed->Available();
1449
1499
  // If we can get a flat buffer, then use it, otherwise do block by block
1450
1500
  // uncompression
1451
1501
  if (allocated_size >= uncompressed_len) {
1452
1502
  SnappyArrayWriter writer(buf);
1453
- bool result = InternalUncompressAllTags(
1454
- &decompressor, &writer, uncompressed_len);
1503
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1504
+ compressed_len, uncompressed_len);
1455
1505
  uncompressed->Append(buf, writer.Produced());
1456
1506
  return result;
1457
1507
  } else {
1458
1508
  SnappySinkAllocator allocator(uncompressed);
1459
1509
  SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1460
- return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1510
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1511
+ uncompressed_len);
1461
1512
  }
1462
1513
  }
1463
1514