snappy 0.0.17 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +6 -1
  3. data/Gemfile +5 -0
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -16
  7. data/lib/snappy.rb +1 -0
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +4 -4
  12. data/lib/snappy/version.rb +1 -1
  13. data/smoke.sh +1 -1
  14. data/snappy.gemspec +0 -4
  15. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  16. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  17. data/test/test-snappy-hadoop.rb +22 -0
  18. data/vendor/snappy/CMakeLists.txt +174 -0
  19. data/vendor/snappy/CONTRIBUTING.md +26 -0
  20. data/vendor/snappy/NEWS +32 -0
  21. data/vendor/snappy/{README → README.md} +13 -3
  22. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  23. data/vendor/snappy/cmake/config.h.in +62 -0
  24. data/vendor/snappy/snappy-internal.h +9 -12
  25. data/vendor/snappy/snappy-stubs-internal.h +63 -30
  26. data/vendor/snappy/snappy-stubs-public.h.in +13 -19
  27. data/vendor/snappy/snappy-test.cc +10 -7
  28. data/vendor/snappy/snappy-test.h +12 -38
  29. data/vendor/snappy/snappy.cc +81 -30
  30. data/vendor/snappy/snappy_unittest.cc +16 -137
  31. metadata +18 -82
  32. data/vendor/snappy/ChangeLog +0 -2468
  33. data/vendor/snappy/INSTALL +0 -370
  34. data/vendor/snappy/Makefile +0 -982
  35. data/vendor/snappy/Makefile.am +0 -26
  36. data/vendor/snappy/Makefile.in +0 -982
  37. data/vendor/snappy/aclocal.m4 +0 -9738
  38. data/vendor/snappy/autogen.sh +0 -12
  39. data/vendor/snappy/autom4te.cache/output.0 +0 -18856
  40. data/vendor/snappy/autom4te.cache/output.1 +0 -18852
  41. data/vendor/snappy/autom4te.cache/requests +0 -297
  42. data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
  43. data/vendor/snappy/autom4te.cache/traces.1 +0 -714
  44. data/vendor/snappy/config.guess +0 -1530
  45. data/vendor/snappy/config.h +0 -135
  46. data/vendor/snappy/config.h.in +0 -134
  47. data/vendor/snappy/config.log +0 -1640
  48. data/vendor/snappy/config.status +0 -2318
  49. data/vendor/snappy/config.sub +0 -1773
  50. data/vendor/snappy/configure +0 -18852
  51. data/vendor/snappy/configure.ac +0 -134
  52. data/vendor/snappy/depcomp +0 -688
  53. data/vendor/snappy/install-sh +0 -527
  54. data/vendor/snappy/libtool +0 -10246
  55. data/vendor/snappy/ltmain.sh +0 -9661
  56. data/vendor/snappy/m4/gtest.m4 +0 -74
  57. data/vendor/snappy/m4/libtool.m4 +0 -8001
  58. data/vendor/snappy/m4/ltoptions.m4 +0 -384
  59. data/vendor/snappy/m4/ltsugar.m4 +0 -123
  60. data/vendor/snappy/m4/ltversion.m4 +0 -23
  61. data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
  62. data/vendor/snappy/missing +0 -331
  63. data/vendor/snappy/snappy-stubs-public.h +0 -100
  64. data/vendor/snappy/snappy.pc +0 -10
  65. data/vendor/snappy/snappy.pc.in +0 -10
  66. data/vendor/snappy/stamp-h1 +0 -1
@@ -45,6 +45,14 @@
45
45
  #include <sys/mman.h>
46
46
  #endif
47
47
 
48
+ #ifdef HAVE_UNISTD_H
49
+ #include <unistd.h>
50
+ #endif
51
+
52
+ #if defined(_MSC_VER)
53
+ #include <intrin.h>
54
+ #endif // defined(_MSC_VER)
55
+
48
56
  #include "snappy-stubs-public.h"
49
57
 
50
58
  #if defined(__x86_64__)
@@ -52,6 +60,14 @@
52
60
  // Enable 64-bit optimized versions of some routines.
53
61
  #define ARCH_K8 1
54
62
 
63
+ #elif defined(__ppc64__)
64
+
65
+ #define ARCH_PPC 1
66
+
67
+ #elif defined(__aarch64__)
68
+
69
+ #define ARCH_ARM 1
70
+
55
71
  #endif
56
72
 
57
73
  // Needed by OS X, among others.
@@ -59,10 +75,6 @@
59
75
  #define MAP_ANONYMOUS MAP_ANON
60
76
  #endif
61
77
 
62
- // Pull in std::min, std::ostream, and the likes. This is safe because this
63
- // header file is never used from any public header files.
64
- using namespace std;
65
-
66
78
  // The size of an array, if known at compile-time.
67
79
  // Will give unexpected results if used on a pointer.
68
80
  // We undefine it first, since some compilers already have a definition.
@@ -73,11 +85,11 @@ using namespace std;
73
85
 
74
86
  // Static prediction hints.
75
87
  #ifdef HAVE_BUILTIN_EXPECT
76
- #define PREDICT_FALSE(x) (__builtin_expect(x, 0))
77
- #define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
88
+ #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
89
+ #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
78
90
  #else
79
- #define PREDICT_FALSE(x) x
80
- #define PREDICT_TRUE(x) x
91
+ #define SNAPPY_PREDICT_FALSE(x) x
92
+ #define SNAPPY_PREDICT_TRUE(x) x
81
93
  #endif
82
94
 
83
95
  // This is only used for recomputing the tag byte table used during
@@ -96,9 +108,10 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
96
108
 
97
109
  // Potentially unaligned loads and stores.
98
110
 
99
- // x86 and PowerPC can simply do these loads and stores native.
111
+ // x86, PowerPC, and ARM64 can simply do these loads and stores native.
100
112
 
101
- #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
113
+ #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
114
+ defined(__aarch64__)
102
115
 
103
116
  #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
104
117
  #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
@@ -225,22 +238,8 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
225
238
 
226
239
  #endif
227
240
 
228
- // This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
229
- // on some platforms, in particular ARM.
230
- inline void UnalignedCopy64(const void *src, void *dst) {
231
- if (sizeof(void *) == 8) {
232
- UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
233
- } else {
234
- const char *src_char = reinterpret_cast<const char *>(src);
235
- char *dst_char = reinterpret_cast<char *>(dst);
236
-
237
- UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
238
- UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
239
- }
240
- }
241
-
242
241
  // The following guarantees declaration of the byte swap functions.
243
- #ifdef WORDS_BIGENDIAN
242
+ #if defined(SNAPPY_IS_BIG_ENDIAN)
244
243
 
245
244
  #ifdef HAVE_SYS_BYTEORDER_H
246
245
  #include <sys/byteorder.h>
@@ -297,7 +296,7 @@ inline uint64 bswap_64(uint64 x) {
297
296
 
298
297
  #endif
299
298
 
300
- #endif // WORDS_BIGENDIAN
299
+ #endif // defined(SNAPPY_IS_BIG_ENDIAN)
301
300
 
302
301
  // Convert to little-endian storage, opposite of network format.
303
302
  // Convert x from host to little endian: x = LittleEndian.FromHost(x);
@@ -311,7 +310,7 @@ inline uint64 bswap_64(uint64 x) {
311
310
  class LittleEndian {
312
311
  public:
313
312
  // Conversion functions.
314
- #ifdef WORDS_BIGENDIAN
313
+ #if defined(SNAPPY_IS_BIG_ENDIAN)
315
314
 
316
315
  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
317
316
  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
@@ -321,7 +320,7 @@ class LittleEndian {
321
320
 
322
321
  static bool IsLittleEndian() { return false; }
323
322
 
324
- #else // !defined(WORDS_BIGENDIAN)
323
+ #else // !defined(SNAPPY_IS_BIG_ENDIAN)
325
324
 
326
325
  static uint16 FromHost16(uint16 x) { return x; }
327
326
  static uint16 ToHost16(uint16 x) { return x; }
@@ -331,7 +330,7 @@ class LittleEndian {
331
330
 
332
331
  static bool IsLittleEndian() { return true; }
333
332
 
334
- #endif // !defined(WORDS_BIGENDIAN)
333
+ #endif // !defined(SNAPPY_IS_BIG_ENDIAN)
335
334
 
336
335
  // Functions to do unaligned loads and stores in little-endian order.
337
336
  static uint16 Load16(const void *p) {
@@ -361,10 +360,15 @@ class Bits {
361
360
  // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
362
361
  // that it's 0-indexed.
363
362
  static int FindLSBSetNonZero(uint32 n);
363
+
364
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
364
365
  static int FindLSBSetNonZero64(uint64 n);
366
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
365
367
 
366
368
  private:
367
- DISALLOW_COPY_AND_ASSIGN(Bits);
369
+ // No copying
370
+ Bits(const Bits&);
371
+ void operator=(const Bits&);
368
372
  };
369
373
 
370
374
  #ifdef HAVE_BUILTIN_CTZ
@@ -377,9 +381,36 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
377
381
  return __builtin_ctz(n);
378
382
  }
379
383
 
384
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
380
385
  inline int Bits::FindLSBSetNonZero64(uint64 n) {
381
386
  return __builtin_ctzll(n);
382
387
  }
388
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
389
+
390
+ #elif defined(_MSC_VER)
391
+
392
+ inline int Bits::Log2Floor(uint32 n) {
393
+ unsigned long where;
394
+ if (_BitScanReverse(&where, n)) {
395
+ return where;
396
+ } else {
397
+ return -1;
398
+ }
399
+ }
400
+
401
+ inline int Bits::FindLSBSetNonZero(uint32 n) {
402
+ unsigned long where;
403
+ if (_BitScanForward(&where, n)) return static_cast<int>(where);
404
+ return 32;
405
+ }
406
+
407
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
408
+ inline int Bits::FindLSBSetNonZero64(uint64 n) {
409
+ unsigned long where;
410
+ if (_BitScanForward64(&where, n)) return static_cast<int>(where);
411
+ return 64;
412
+ }
413
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
383
414
 
384
415
  #else // Portable versions.
385
416
 
@@ -413,6 +444,7 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
413
444
  return rc;
414
445
  }
415
446
 
447
+ #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
416
448
  // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
417
449
  inline int Bits::FindLSBSetNonZero64(uint64 n) {
418
450
  const uint32 bottombits = static_cast<uint32>(n);
@@ -423,6 +455,7 @@ inline int Bits::FindLSBSetNonZero64(uint64 n) {
423
455
  return FindLSBSetNonZero(bottombits);
424
456
  }
425
457
  }
458
+ #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
426
459
 
427
460
  #endif // End portable versions.
428
461
 
@@ -36,21 +36,21 @@
36
36
  #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
37
37
  #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
38
38
 
39
- #if @ac_cv_have_stdint_h@
39
+ #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
40
40
  #include <stdint.h>
41
- #endif
41
+ #endif // HAVE_STDDEF_H
42
42
 
43
- #if @ac_cv_have_stddef_h@
43
+ #if ${HAVE_STDDEF_H_01} // HAVE_STDDEF_H
44
44
  #include <stddef.h>
45
- #endif
45
+ #endif // HAVE_STDDEF_H
46
46
 
47
- #if @ac_cv_have_sys_uio_h@
47
+ #if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H
48
48
  #include <sys/uio.h>
49
- #endif
49
+ #endif // HAVE_SYS_UIO_H
50
50
 
51
- #define SNAPPY_MAJOR @SNAPPY_MAJOR@
52
- #define SNAPPY_MINOR @SNAPPY_MINOR@
53
- #define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@
51
+ #define SNAPPY_MAJOR ${SNAPPY_MAJOR}
52
+ #define SNAPPY_MINOR ${SNAPPY_MINOR}
53
+ #define SNAPPY_PATCHLEVEL ${SNAPPY_PATCHLEVEL}
54
54
  #define SNAPPY_VERSION \
55
55
  ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
56
56
 
@@ -58,7 +58,7 @@
58
58
 
59
59
  namespace snappy {
60
60
 
61
- #if @ac_cv_have_stdint_h@
61
+ #if ${HAVE_STDINT_H_01} // HAVE_STDINT_H
62
62
  typedef int8_t int8;
63
63
  typedef uint8_t uint8;
64
64
  typedef int16_t int16;
@@ -76,24 +76,18 @@ typedef int int32;
76
76
  typedef unsigned int uint32;
77
77
  typedef long long int64;
78
78
  typedef unsigned long long uint64;
79
- #endif
79
+ #endif // HAVE_STDINT_H
80
80
 
81
81
  typedef std::string string;
82
82
 
83
- #ifndef DISALLOW_COPY_AND_ASSIGN
84
- #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
85
- TypeName(const TypeName&); \
86
- void operator=(const TypeName&)
87
- #endif
88
-
89
- #if !@ac_cv_have_sys_uio_h@
83
+ #if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H
90
84
  // Windows does not have an iovec type, yet the concept is universally useful.
91
85
  // It is simple to define it ourselves, so we put it inside our own namespace.
92
86
  struct iovec {
93
87
  void* iov_base;
94
88
  size_t iov_len;
95
89
  };
96
- #endif
90
+ #endif // !HAVE_SYS_UIO_H
97
91
 
98
92
  } // namespace snappy
99
93
 
@@ -33,6 +33,9 @@
33
33
  #endif
34
34
 
35
35
  #ifdef HAVE_WINDOWS_H
36
+ // Needed to be able to use std::max without workarounds in the source code.
37
+ // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
38
+ #define NOMINMAX
36
39
  #include <windows.h>
37
40
  #endif
38
41
 
@@ -201,7 +204,7 @@ void Benchmark::Run() {
201
204
  if (benchmark_real_time_us > 0) {
202
205
  num_iterations = 200000 * kCalibrateIterations / benchmark_real_time_us;
203
206
  }
204
- num_iterations = max(num_iterations, kCalibrateIterations);
207
+ num_iterations = std::max(num_iterations, kCalibrateIterations);
205
208
  BenchmarkRun benchmark_runs[kNumRuns];
206
209
 
207
210
  for (int run = 0; run < kNumRuns; ++run) {
@@ -217,10 +220,10 @@ void Benchmark::Run() {
217
220
  string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num);
218
221
  string human_readable_speed;
219
222
 
220
- nth_element(benchmark_runs,
221
- benchmark_runs + kMedianPos,
222
- benchmark_runs + kNumRuns,
223
- BenchmarkCompareCPUTime());
223
+ std::nth_element(benchmark_runs,
224
+ benchmark_runs + kMedianPos,
225
+ benchmark_runs + kNumRuns,
226
+ BenchmarkCompareCPUTime());
224
227
  int64 real_time_us = benchmark_runs[kMedianPos].real_time_us;
225
228
  int64 cpu_time_us = benchmark_runs[kMedianPos].cpu_time_us;
226
229
  if (cpu_time_us <= 0) {
@@ -523,8 +526,8 @@ int ZLib::UncompressAtMostOrAll(Bytef *dest, uLongf *destLen,
523
526
  LOG(WARNING)
524
527
  << "UncompressChunkOrAll: Received some extra data, bytes total: "
525
528
  << uncomp_stream_.avail_in << " bytes: "
526
- << string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
527
- min(int(uncomp_stream_.avail_in), 20));
529
+ << std::string(reinterpret_cast<const char *>(uncomp_stream_.next_in),
530
+ std::min(int(uncomp_stream_.avail_in), 20));
528
531
  UncompressErrorInit();
529
532
  return Z_DATA_ERROR; // what's the extra data for?
530
533
  } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
@@ -110,26 +110,8 @@
110
110
  #include "lzo/lzo1x.h"
111
111
  #endif
112
112
 
113
- #ifdef HAVE_LIBLZF
114
- extern "C" {
115
- #include "lzf.h"
116
- }
117
- #endif
118
-
119
- #ifdef HAVE_LIBFASTLZ
120
- #include "fastlz.h"
121
- #endif
122
-
123
- #ifdef HAVE_LIBQUICKLZ
124
- #include "quicklz.h"
125
- #endif
126
-
127
113
  namespace {
128
114
 
129
- namespace File {
130
- void Init() { }
131
- } // namespace File
132
-
133
115
  namespace file {
134
116
  int Defaults() { return 0; }
135
117
 
@@ -138,7 +120,8 @@ namespace file {
138
120
  void CheckSuccess() { }
139
121
  };
140
122
 
141
- DummyStatus GetContents(const string& filename, string* data, int unused) {
123
+ DummyStatus GetContents(
124
+ const std::string& filename, std::string* data, int unused) {
142
125
  FILE* fp = fopen(filename.c_str(), "rb");
143
126
  if (fp == NULL) {
144
127
  perror(filename.c_str());
@@ -153,7 +136,7 @@ namespace file {
153
136
  perror("fread");
154
137
  exit(1);
155
138
  }
156
- data->append(string(buf, ret));
139
+ data->append(std::string(buf, ret));
157
140
  }
158
141
 
159
142
  fclose(fp);
@@ -161,9 +144,8 @@ namespace file {
161
144
  return DummyStatus();
162
145
  }
163
146
 
164
- DummyStatus SetContents(const string& filename,
165
- const string& str,
166
- int unused) {
147
+ inline DummyStatus SetContents(
148
+ const std::string& filename, const std::string& str, int unused) {
167
149
  FILE* fp = fopen(filename.c_str(), "wb");
168
150
  if (fp == NULL) {
169
151
  perror(filename.c_str());
@@ -467,7 +449,7 @@ class ZLib {
467
449
 
468
450
  DECLARE_bool(run_microbenchmarks);
469
451
 
470
- static void RunSpecifiedBenchmarks() {
452
+ static inline void RunSpecifiedBenchmarks() {
471
453
  if (!FLAGS_run_microbenchmarks) {
472
454
  return;
473
455
  }
@@ -515,10 +497,6 @@ static inline int RUN_ALL_TESTS() {
515
497
  // For main().
516
498
  namespace snappy {
517
499
 
518
- static void CompressFile(const char* fname);
519
- static void UncompressFile(const char* fname);
520
- static void MeasureFile(const char* fname);
521
-
522
500
  // Logging.
523
501
 
524
502
  #define LOG(level) LogMessage()
@@ -529,15 +507,15 @@ class LogMessage {
529
507
  public:
530
508
  LogMessage() { }
531
509
  ~LogMessage() {
532
- cerr << endl;
510
+ std::cerr << std::endl;
533
511
  }
534
512
 
535
513
  LogMessage& operator<<(const std::string& msg) {
536
- cerr << msg;
514
+ std::cerr << msg;
537
515
  return *this;
538
516
  }
539
517
  LogMessage& operator<<(int x) {
540
- cerr << x;
518
+ std::cerr << x;
541
519
  return *this;
542
520
  }
543
521
  };
@@ -546,7 +524,7 @@ class LogMessage {
546
524
  // and ones that are always active.
547
525
 
548
526
  #define CRASH_UNLESS(condition) \
549
- PREDICT_TRUE(condition) ? (void)0 : \
527
+ SNAPPY_PREDICT_TRUE(condition) ? (void)0 : \
550
528
  snappy::LogMessageVoidify() & snappy::LogMessageCrash()
551
529
 
552
530
  #ifdef _MSC_VER
@@ -560,7 +538,7 @@ class LogMessageCrash : public LogMessage {
560
538
  public:
561
539
  LogMessageCrash() { }
562
540
  ~LogMessageCrash() {
563
- cerr << endl;
541
+ std::cerr << std::endl;
564
542
  abort();
565
543
  }
566
544
  };
@@ -590,10 +568,6 @@ class LogMessageVoidify {
590
568
  #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
591
569
  #define CHECK_OK(cond) (cond).CheckSuccess()
592
570
 
593
- } // namespace
594
-
595
- using snappy::CompressFile;
596
- using snappy::UncompressFile;
597
- using snappy::MeasureFile;
571
+ } // namespace snappy
598
572
 
599
573
  #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
@@ -30,7 +30,16 @@
30
30
  #include "snappy-internal.h"
31
31
  #include "snappy-sinksource.h"
32
32
 
33
- #if defined(__x86_64__) || defined(_M_X64)
33
+ #ifndef SNAPPY_HAVE_SSE2
34
+ #if defined(__SSE2__) || defined(_M_X64) || \
35
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
36
+ #define SNAPPY_HAVE_SSE2 1
37
+ #else
38
+ #define SNAPPY_HAVE_SSE2 0
39
+ #endif
40
+ #endif
41
+
42
+ #if SNAPPY_HAVE_SSE2
34
43
  #include <emmintrin.h>
35
44
  #endif
36
45
  #include <stdio.h>
@@ -47,7 +56,6 @@ using internal::COPY_2_BYTE_OFFSET;
47
56
  using internal::LITERAL;
48
57
  using internal::char_table;
49
58
  using internal::kMaximumTagLength;
50
- using internal::wordmask;
51
59
 
52
60
  // Any hash function will produce a valid compressed bitstream, but a good
53
61
  // hash function reduces the number of collisions and thus yields better
@@ -89,17 +97,21 @@ size_t MaxCompressedLength(size_t source_len) {
89
97
  namespace {
90
98
 
91
99
  void UnalignedCopy64(const void* src, void* dst) {
92
- memcpy(dst, src, 8);
100
+ char tmp[8];
101
+ memcpy(tmp, src, 8);
102
+ memcpy(dst, tmp, 8);
93
103
  }
94
104
 
95
105
  void UnalignedCopy128(const void* src, void* dst) {
96
106
  // TODO(alkis): Remove this when we upgrade to a recent compiler that emits
97
107
  // SSE2 moves for memcpy(dst, src, 16).
98
- #ifdef __SSE2__
108
+ #if SNAPPY_HAVE_SSE2
99
109
  __m128i x = _mm_loadu_si128(static_cast<const __m128i*>(src));
100
110
  _mm_storeu_si128(static_cast<__m128i*>(dst), x);
101
111
  #else
102
- memcpy(dst, src, 16);
112
+ char tmp[16];
113
+ memcpy(tmp, src, 16);
114
+ memcpy(dst, tmp, 16);
103
115
  #endif
104
116
  }
105
117
 
@@ -163,7 +175,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
163
175
  // copying 2x 8 bytes at a time.
164
176
 
165
177
  // Handle the uncommon case where pattern is less than 8 bytes.
166
- if (PREDICT_FALSE(pattern_size < 8)) {
178
+ if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) {
167
179
  // Expand pattern to at least 8 bytes. The worse case scenario in terms of
168
180
  // buffer usage is when the pattern is size 3. ^ is the original position
169
181
  // of op. x are irrelevant bytes copied by the last UnalignedCopy64.
@@ -173,13 +185,13 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
173
185
  // abcabcabcabcxxxxx
174
186
  // ^
175
187
  // The last x is 14 bytes after ^.
176
- if (PREDICT_TRUE(op <= buf_limit - 14)) {
188
+ if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) {
177
189
  while (pattern_size < 8) {
178
190
  UnalignedCopy64(src, op);
179
191
  op += pattern_size;
180
192
  pattern_size *= 2;
181
193
  }
182
- if (PREDICT_TRUE(op >= op_limit)) return op_limit;
194
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
183
195
  } else {
184
196
  return IncrementalCopySlow(src, op, op_limit);
185
197
  }
@@ -195,11 +207,11 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit,
195
207
  UnalignedCopy64(src + 8, op + 8);
196
208
  src += 16;
197
209
  op += 16;
198
- if (PREDICT_TRUE(op >= op_limit)) return op_limit;
210
+ if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit;
199
211
  }
200
212
  // We only take this branch if we didn't have enough slop and we can do a
201
213
  // single 8 byte copy.
202
- if (PREDICT_FALSE(op <= buf_limit - 8)) {
214
+ if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) {
203
215
  UnalignedCopy64(src, op);
204
216
  src += 8;
205
217
  op += 8;
@@ -261,7 +273,7 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len,
261
273
  assert(offset < 65536);
262
274
  assert(len_less_than_12 == (len < 12));
263
275
 
264
- if (len_less_than_12 && PREDICT_TRUE(offset < 2048)) {
276
+ if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) {
265
277
  // offset fits in 11 bits. The 3 highest go in the top of the first byte,
266
278
  // and the rest go in the second byte.
267
279
  *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0);
@@ -286,7 +298,7 @@ static inline char* EmitCopy(char* op, size_t offset, size_t len,
286
298
  // it's in the noise.
287
299
 
288
300
  // Emit 64 byte copies but make sure to keep at least four bytes reserved.
289
- while (PREDICT_FALSE(len >= 68)) {
301
+ while (SNAPPY_PREDICT_FALSE(len >= 68)) {
290
302
  op = EmitCopyAtMost64(op, offset, 64, false);
291
303
  len -= 64;
292
304
  }
@@ -415,7 +427,7 @@ char* CompressFragment(const char* input,
415
427
  const char* next_emit = ip;
416
428
 
417
429
  const size_t kInputMarginBytes = 15;
418
- if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
430
+ if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) {
419
431
  const char* ip_limit = input + input_size - kInputMarginBytes;
420
432
 
421
433
  for (uint32 next_hash = Hash(++ip, shift); ; ) {
@@ -456,7 +468,7 @@ char* CompressFragment(const char* input,
456
468
  uint32 bytes_between_hash_lookups = skip >> 5;
457
469
  skip += bytes_between_hash_lookups;
458
470
  next_ip = ip + bytes_between_hash_lookups;
459
- if (PREDICT_FALSE(next_ip > ip_limit)) {
471
+ if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) {
460
472
  goto emit_remainder;
461
473
  }
462
474
  next_hash = Hash(next_ip, shift);
@@ -465,8 +477,8 @@ char* CompressFragment(const char* input,
465
477
  assert(candidate < ip);
466
478
 
467
479
  table[hash] = ip - base_ip;
468
- } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
469
- UNALIGNED_LOAD32(candidate)));
480
+ } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
481
+ UNALIGNED_LOAD32(candidate)));
470
482
 
471
483
  // Step 2: A 4-byte match has been found. We'll later see if more
472
484
  // than 4 bytes match. But, prior to the match, input
@@ -497,7 +509,7 @@ char* CompressFragment(const char* input,
497
509
  assert(0 == memcmp(base, candidate, matched));
498
510
  op = EmitCopy(op, offset, matched, p.second);
499
511
  next_emit = ip;
500
- if (PREDICT_FALSE(ip >= ip_limit)) {
512
+ if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) {
501
513
  goto emit_remainder;
502
514
  }
503
515
  // We are now looking for a 4-byte match again. We read
@@ -527,6 +539,10 @@ char* CompressFragment(const char* input,
527
539
  }
528
540
  } // end namespace internal
529
541
 
542
+ // Called back at avery compression call to trace parameters and sizes.
543
+ static inline void Report(const char *algorithm, size_t compressed_size,
544
+ size_t uncompressed_size) {}
545
+
530
546
  // Signature of output types needed by decompression code.
531
547
  // The decompression code is templatized on a type that obeys this
532
548
  // signature so that we do not pay virtual function call overhead in
@@ -567,6 +583,14 @@ char* CompressFragment(const char* input,
567
583
  // bool TryFastAppend(const char* ip, size_t available, size_t length);
568
584
  // };
569
585
 
586
+ namespace internal {
587
+
588
+ // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
589
+ static const uint32 wordmask[] = {
590
+ 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
591
+ };
592
+
593
+ } // end namespace internal
570
594
 
571
595
  // Helper class for decompression
572
596
  class SnappyDecompressor {
@@ -638,7 +662,16 @@ class SnappyDecompressor {
638
662
  // For position-independent executables, accessing global arrays can be
639
663
  // slow. Move wordmask array onto the stack to mitigate this.
640
664
  uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)];
641
- memcpy(wordmask, internal::wordmask, sizeof(wordmask));
665
+ // Do not use memcpy to copy internal::wordmask to
666
+ // wordmask. LLVM converts stack arrays to global arrays if it detects
667
+ // const stack arrays and this hurts the performance of position
668
+ // independent code. This change is temporary and can be reverted when
669
+ // https://reviews.llvm.org/D30759 is approved.
670
+ wordmask[0] = internal::wordmask[0];
671
+ wordmask[1] = internal::wordmask[1];
672
+ wordmask[2] = internal::wordmask[2];
673
+ wordmask[3] = internal::wordmask[3];
674
+ wordmask[4] = internal::wordmask[4];
642
675
 
643
676
  // We could have put this refill fragment only at the beginning of the loop.
644
677
  // However, duplicating it at the end of each branch gives the compiler more
@@ -652,6 +685,13 @@ class SnappyDecompressor {
652
685
  }
653
686
 
654
687
  MAYBE_REFILL();
688
+ // Add loop alignment directive. Without this directive, we observed
689
+ // significant performance degradation on several intel architectures
690
+ // in snappy benchmark built with LLVM. The degradation was caused by
691
+ // increased branch miss prediction.
692
+ #if defined(__clang__) && defined(__x86_64__)
693
+ asm volatile (".p2align 5");
694
+ #endif
655
695
  for ( ;; ) {
656
696
  const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
657
697
 
@@ -667,7 +707,7 @@ class SnappyDecompressor {
667
707
  // txt[1-4] 25% 75%
668
708
  // pb 24% 76%
669
709
  // bin 24% 76%
670
- if (PREDICT_FALSE((c & 0x3) == LITERAL)) {
710
+ if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) {
671
711
  size_t literal_length = (c >> 2) + 1u;
672
712
  if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
673
713
  assert(literal_length < 61);
@@ -677,7 +717,7 @@ class SnappyDecompressor {
677
717
  // bytes in addition to the literal.
678
718
  continue;
679
719
  }
680
- if (PREDICT_FALSE(literal_length >= 61)) {
720
+ if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) {
681
721
  // Long literal.
682
722
  const size_t literal_length_length = literal_length - 60;
683
723
  literal_length =
@@ -757,7 +797,7 @@ bool SnappyDecompressor::RefillTag() {
757
797
  size_t length;
758
798
  const char* src = reader_->Peek(&length);
759
799
  if (length == 0) return false;
760
- uint32 to_add = min<uint32>(needed - nbuf, length);
800
+ uint32 to_add = std::min<uint32>(needed - nbuf, length);
761
801
  memcpy(scratch_ + nbuf, src, to_add);
762
802
  nbuf += to_add;
763
803
  reader_->Skip(to_add);
@@ -786,13 +826,18 @@ static bool InternalUncompress(Source* r, Writer* writer) {
786
826
  SnappyDecompressor decompressor(r);
787
827
  uint32 uncompressed_len = 0;
788
828
  if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
789
- return InternalUncompressAllTags(&decompressor, writer, uncompressed_len);
829
+
830
+ return InternalUncompressAllTags(&decompressor, writer, r->Available(),
831
+ uncompressed_len);
790
832
  }
791
833
 
792
834
  template <typename Writer>
793
835
  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
794
836
  Writer* writer,
837
+ uint32 compressed_len,
795
838
  uint32 uncompressed_len) {
839
+ Report("snappy_uncompress", compressed_len, uncompressed_len);
840
+
796
841
  writer->SetExpectedLength(uncompressed_len);
797
842
 
798
843
  // Process the entire input
@@ -809,6 +854,7 @@ bool GetUncompressedLength(Source* source, uint32* result) {
809
854
  size_t Compress(Source* reader, Sink* writer) {
810
855
  size_t written = 0;
811
856
  size_t N = reader->Available();
857
+ const size_t uncompressed_size = N;
812
858
  char ulength[Varint::kMax32];
813
859
  char* p = Varint::Encode32(ulength, N);
814
860
  writer->Append(ulength, p-ulength);
@@ -823,7 +869,7 @@ size_t Compress(Source* reader, Sink* writer) {
823
869
  size_t fragment_size;
824
870
  const char* fragment = reader->Peek(&fragment_size);
825
871
  assert(fragment_size != 0); // premature end of input
826
- const size_t num_to_read = min(N, kBlockSize);
872
+ const size_t num_to_read = std::min(N, kBlockSize);
827
873
  size_t bytes_read = fragment_size;
828
874
 
829
875
  size_t pending_advance = 0;
@@ -844,7 +890,7 @@ size_t Compress(Source* reader, Sink* writer) {
844
890
 
845
891
  while (bytes_read < num_to_read) {
846
892
  fragment = reader->Peek(&fragment_size);
847
- size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
893
+ size_t n = std::min<size_t>(fragment_size, num_to_read - bytes_read);
848
894
  memcpy(scratch + bytes_read, fragment, n);
849
895
  bytes_read += n;
850
896
  reader->Skip(n);
@@ -881,6 +927,8 @@ size_t Compress(Source* reader, Sink* writer) {
881
927
  reader->Skip(pending_advance);
882
928
  }
883
929
 
930
+ Report("snappy_compress", written, uncompressed_size);
931
+
884
932
  delete[] scratch;
885
933
  delete[] scratch_output;
886
934
 
@@ -1313,7 +1361,8 @@ class SnappyScatteredWriter {
1313
1361
  char* const op_end = op_ptr_ + len;
1314
1362
  // See SnappyArrayWriter::AppendFromSelf for an explanation of
1315
1363
  // the "offset - 1u" trick.
1316
- if (PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ && op_end <= op_limit_)) {
1364
+ if (SNAPPY_PREDICT_TRUE(offset - 1u < op_ptr_ - op_base_ &&
1365
+ op_end <= op_limit_)) {
1317
1366
  // Fast path: src and dst in current block.
1318
1367
  op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_);
1319
1368
  return true;
@@ -1344,7 +1393,7 @@ bool SnappyScatteredWriter<Allocator>::SlowAppend(const char* ip, size_t len) {
1344
1393
  }
1345
1394
 
1346
1395
  // Make new block
1347
- size_t bsize = min<size_t>(kBlockSize, expected_ - full_size_);
1396
+ size_t bsize = std::min<size_t>(kBlockSize, expected_ - full_size_);
1348
1397
  op_base_ = allocator_.Allocate(bsize);
1349
1398
  op_ptr_ = op_base_;
1350
1399
  op_limit_ = op_base_ + bsize;
@@ -1401,7 +1450,7 @@ class SnappySinkAllocator {
1401
1450
  size_t size_written = 0;
1402
1451
  size_t block_size;
1403
1452
  for (int i = 0; i < blocks_.size(); ++i) {
1404
- block_size = min<size_t>(blocks_[i].size, size - size_written);
1453
+ block_size = std::min<size_t>(blocks_[i].size, size - size_written);
1405
1454
  dest_->AppendAndTakeOwnership(blocks_[i].data, block_size,
1406
1455
  &SnappySinkAllocator::Deleter, NULL);
1407
1456
  size_written += block_size;
@@ -1446,18 +1495,20 @@ bool Uncompress(Source* compressed, Sink* uncompressed) {
1446
1495
  char* buf = uncompressed->GetAppendBufferVariable(
1447
1496
  1, uncompressed_len, &c, 1, &allocated_size);
1448
1497
 
1498
+ const size_t compressed_len = compressed->Available();
1449
1499
  // If we can get a flat buffer, then use it, otherwise do block by block
1450
1500
  // uncompression
1451
1501
  if (allocated_size >= uncompressed_len) {
1452
1502
  SnappyArrayWriter writer(buf);
1453
- bool result = InternalUncompressAllTags(
1454
- &decompressor, &writer, uncompressed_len);
1503
+ bool result = InternalUncompressAllTags(&decompressor, &writer,
1504
+ compressed_len, uncompressed_len);
1455
1505
  uncompressed->Append(buf, writer.Produced());
1456
1506
  return result;
1457
1507
  } else {
1458
1508
  SnappySinkAllocator allocator(uncompressed);
1459
1509
  SnappyScatteredWriter<SnappySinkAllocator> writer(allocator);
1460
- return InternalUncompressAllTags(&decompressor, &writer, uncompressed_len);
1510
+ return InternalUncompressAllTags(&decompressor, &writer, compressed_len,
1511
+ uncompressed_len);
1461
1512
  }
1462
1513
  }
1463
1514