snappy 0.0.17 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +5 -5
  2. data/.dockerignore +2 -0
  3. data/.github/workflows/main.yml +34 -0
  4. data/.github/workflows/publish.yml +34 -0
  5. data/.gitignore +2 -1
  6. data/.gitmodules +1 -1
  7. data/Dockerfile +13 -0
  8. data/Gemfile +4 -0
  9. data/README.md +45 -5
  10. data/Rakefile +32 -29
  11. data/ext/api.c +6 -1
  12. data/ext/extconf.rb +31 -22
  13. data/lib/snappy/hadoop/reader.rb +62 -0
  14. data/lib/snappy/hadoop/writer.rb +51 -0
  15. data/lib/snappy/hadoop.rb +22 -0
  16. data/lib/snappy/reader.rb +14 -10
  17. data/lib/snappy/shim.rb +1 -1
  18. data/lib/snappy/version.rb +1 -1
  19. data/lib/snappy.rb +5 -4
  20. data/snappy.gemspec +14 -13
  21. data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
  22. data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
  23. data/test/snappy_hadoop_test.rb +26 -0
  24. data/test/snappy_reader_test.rb +148 -0
  25. data/test/snappy_test.rb +95 -0
  26. data/test/snappy_writer_test.rb +55 -0
  27. data/test/test_helper.rb +7 -0
  28. data/test.sh +3 -0
  29. data/vendor/snappy/CMakeLists.txt +420 -0
  30. data/vendor/snappy/CONTRIBUTING.md +31 -0
  31. data/vendor/snappy/NEWS +52 -0
  32. data/vendor/snappy/{README → README.md} +75 -49
  33. data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
  34. data/vendor/snappy/cmake/config.h.in +66 -0
  35. data/vendor/snappy/docs/README.md +72 -0
  36. data/vendor/snappy/snappy-internal.h +200 -32
  37. data/vendor/snappy/snappy-sinksource.cc +26 -9
  38. data/vendor/snappy/snappy-sinksource.h +11 -11
  39. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  40. data/vendor/snappy/snappy-stubs-internal.h +299 -302
  41. data/vendor/snappy/snappy-stubs-public.h.in +10 -47
  42. data/vendor/snappy/snappy-test.cc +94 -200
  43. data/vendor/snappy/snappy-test.h +101 -358
  44. data/vendor/snappy/snappy.cc +1437 -474
  45. data/vendor/snappy/snappy.h +31 -12
  46. data/vendor/snappy/snappy_benchmark.cc +378 -0
  47. data/vendor/snappy/snappy_compress_fuzzer.cc +60 -0
  48. data/vendor/snappy/snappy_test_data.cc +57 -0
  49. data/vendor/snappy/snappy_test_data.h +68 -0
  50. data/vendor/snappy/snappy_test_tool.cc +471 -0
  51. data/vendor/snappy/snappy_uncompress_fuzzer.cc +58 -0
  52. data/vendor/snappy/snappy_unittest.cc +271 -792
  53. metadata +42 -92
  54. data/.travis.yml +0 -26
  55. data/smoke.sh +0 -8
  56. data/test/test-snappy-reader.rb +0 -129
  57. data/test/test-snappy-writer.rb +0 -55
  58. data/test/test-snappy.rb +0 -58
  59. data/vendor/snappy/ChangeLog +0 -2468
  60. data/vendor/snappy/INSTALL +0 -370
  61. data/vendor/snappy/Makefile +0 -982
  62. data/vendor/snappy/Makefile.am +0 -26
  63. data/vendor/snappy/Makefile.in +0 -982
  64. data/vendor/snappy/aclocal.m4 +0 -9738
  65. data/vendor/snappy/autogen.sh +0 -12
  66. data/vendor/snappy/autom4te.cache/output.0 +0 -18856
  67. data/vendor/snappy/autom4te.cache/output.1 +0 -18852
  68. data/vendor/snappy/autom4te.cache/requests +0 -297
  69. data/vendor/snappy/autom4te.cache/traces.0 +0 -2689
  70. data/vendor/snappy/autom4te.cache/traces.1 +0 -714
  71. data/vendor/snappy/config.guess +0 -1530
  72. data/vendor/snappy/config.h +0 -135
  73. data/vendor/snappy/config.h.in +0 -134
  74. data/vendor/snappy/config.log +0 -1640
  75. data/vendor/snappy/config.status +0 -2318
  76. data/vendor/snappy/config.sub +0 -1773
  77. data/vendor/snappy/configure +0 -18852
  78. data/vendor/snappy/configure.ac +0 -134
  79. data/vendor/snappy/depcomp +0 -688
  80. data/vendor/snappy/install-sh +0 -527
  81. data/vendor/snappy/libtool +0 -10246
  82. data/vendor/snappy/ltmain.sh +0 -9661
  83. data/vendor/snappy/m4/gtest.m4 +0 -74
  84. data/vendor/snappy/m4/libtool.m4 +0 -8001
  85. data/vendor/snappy/m4/ltoptions.m4 +0 -384
  86. data/vendor/snappy/m4/ltsugar.m4 +0 -123
  87. data/vendor/snappy/m4/ltversion.m4 +0 -23
  88. data/vendor/snappy/m4/lt~obsolete.m4 +0 -98
  89. data/vendor/snappy/missing +0 -331
  90. data/vendor/snappy/snappy-stubs-public.h +0 -100
  91. data/vendor/snappy/snappy.pc +0 -10
  92. data/vendor/snappy/snappy.pc.in +0 -10
  93. data/vendor/snappy/stamp-h1 +0 -1
@@ -0,0 +1,33 @@
1
+ # Copyright 2019 Google Inc. All Rights Reserved.
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are
5
+ # met:
6
+ #
7
+ # * Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # * Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following disclaimer
11
+ # in the documentation and/or other materials provided with the
12
+ # distribution.
13
+ # * Neither the name of Google Inc. nor the names of its
14
+ # contributors may be used to endorse or promote products derived from
15
+ # this software without specific prior written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ @PACKAGE_INIT@
30
+
31
+ include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
32
+
33
+ check_required_components(Snappy)
@@ -0,0 +1,66 @@
1
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
2
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
3
+
4
+ /* Define to 1 if the compiler supports __attribute__((always_inline)). */
5
+ #cmakedefine01 HAVE_ATTRIBUTE_ALWAYS_INLINE
6
+
7
+ /* Define to 1 if the compiler supports __builtin_ctz and friends. */
8
+ #cmakedefine01 HAVE_BUILTIN_CTZ
9
+
10
+ /* Define to 1 if the compiler supports __builtin_expect. */
11
+ #cmakedefine01 HAVE_BUILTIN_EXPECT
12
+
13
+ /* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
14
+ #cmakedefine01 HAVE_FUNC_MMAP
15
+
16
+ /* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
17
+ #cmakedefine01 HAVE_FUNC_SYSCONF
18
+
19
+ /* Define to 1 if you have the `lzo2' library (-llzo2). */
20
+ #cmakedefine01 HAVE_LIBLZO2
21
+
22
+ /* Define to 1 if you have the `z' library (-lz). */
23
+ #cmakedefine01 HAVE_LIBZ
24
+
25
+ /* Define to 1 if you have the `lz4' library (-llz4). */
26
+ #cmakedefine01 HAVE_LIBLZ4
27
+
28
+ /* Define to 1 if you have the <sys/mman.h> header file. */
29
+ #cmakedefine01 HAVE_SYS_MMAN_H
30
+
31
+ /* Define to 1 if you have the <sys/resource.h> header file. */
32
+ #cmakedefine01 HAVE_SYS_RESOURCE_H
33
+
34
+ /* Define to 1 if you have the <sys/time.h> header file. */
35
+ #cmakedefine01 HAVE_SYS_TIME_H
36
+
37
+ /* Define to 1 if you have the <sys/uio.h> header file. */
38
+ #cmakedefine01 HAVE_SYS_UIO_H
39
+
40
+ /* Define to 1 if you have the <unistd.h> header file. */
41
+ #cmakedefine01 HAVE_UNISTD_H
42
+
43
+ /* Define to 1 if you have the <windows.h> header file. */
44
+ #cmakedefine01 HAVE_WINDOWS_H
45
+
46
+ /* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
47
+ #cmakedefine01 SNAPPY_HAVE_SSSE3
48
+
49
+ /* Define to 1 if you target processors with SSE4.2 and have <crc32intrin.h>. */
50
+ #cmakedefine01 SNAPPY_HAVE_X86_CRC32
51
+
52
+ /* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
53
+ #cmakedefine01 SNAPPY_HAVE_BMI2
54
+
55
+ /* Define to 1 if you target processors with NEON and have <arm_neon.h>. */
56
+ #cmakedefine01 SNAPPY_HAVE_NEON
57
+
58
+ /* Define to 1 if you have <arm_neon.h> and <arm_acle.h> and want to optimize
59
+ compression speed by using __crc32cw from <arm_acle.h>. */
60
+ #cmakedefine01 SNAPPY_HAVE_NEON_CRC32
61
+
62
+ /* Define to 1 if your processor stores words with the most significant byte
63
+ first (like Motorola and SPARC, unlike Intel and VAX). */
64
+ #cmakedefine01 SNAPPY_IS_BIG_ENDIAN
65
+
66
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
@@ -0,0 +1,72 @@
1
+ Snappy is a compression/decompression library. It does not aim for maximum
2
+ compression, or compatibility with any other compression library; instead, it
3
+ aims for very high speeds and reasonable compression. For instance, compared
4
+ to the fastest mode of zlib, Snappy is an order of magnitude faster for most
5
+ inputs, but the resulting compressed files are anywhere from 20% to 100%
6
+ bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy
7
+ compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec
8
+ or more.
9
+
10
+ Snappy is widely used inside Google, in everything from BigTable and MapReduce
11
+ to our internal RPC systems. (Snappy has previously been referred to as "Zippy"
12
+ in some presentations and the likes.)
13
+
14
+ For more information, please see the [README](../README.md). Benchmarks against
15
+ a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are
16
+ included in the source code distribution. The source code also contains a
17
+ [formal format specification](../format_description.txt), as well
18
+ as a specification for a [framing format](../framing_format.txt) useful for
19
+ higher-level framing and encapsulation of Snappy data, e.g. for transporting
20
+ Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy
21
+ distribution currently has no code implementing the latter, but some of the
22
+ ports do (see below).
23
+
24
+ Snappy is written in C++, but C bindings are included, and several bindings to
25
+ other languages are maintained by third parties:
26
+
27
+ * C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper),
28
+ [Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper),
29
+ [Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native
30
+ reimplementation)
31
+ * [C port](http://github.com/andikleen/snappy-c)
32
+ * [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries,
33
+ NuGet packages and command-line tool)
34
+ * Common Lisp: [Library bindings](http://flambard.github.com/thnappy/),
35
+ [native reimplementation](https://github.com/brown/snappy)
36
+ * Erlang: [esnappy](https://github.com/thekvs/esnappy),
37
+ [snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif)
38
+ * [Go](https://github.com/golang/snappy/)
39
+ * [Haskell](http://hackage.haskell.org/package/snappy)
40
+ * [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko)
41
+ * [iOS packaging](https://github.com/ideawu/snappy-ios)
42
+ * Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the
43
+ framing format), [native reimplementation](http://code.google.com/p/jsnappy/),
44
+ [other native reimplementation](https://github.com/dain/snappy) (including
45
+ the framing format)
46
+ * [Lua](https://github.com/forhappy/lua-snappy)
47
+ * [Node.js](https://github.com/kesla/node-snappy) (including the [framing
48
+ format](https://github.com/kesla/node-snappy-stream))
49
+ * [Perl](http://search.cpan.org/dist/Compress-Snappy/)
50
+ * [PHP](https://github.com/kjdev/php-ext-snappy)
51
+ * [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line
52
+ tool for the framing format)
53
+ * [R](https://github.com/lulyon/R-snappy)
54
+ * [Ruby](https://github.com/miyucy/snappy)
55
+ * [Rust](https://github.com/BurntSushi/rust-snappy)
56
+ * [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format)
57
+
58
+ Snappy is used or is available as an alternative in software such as
59
+
60
+ * [MongoDB](https://www.mongodb.com/)
61
+ * [Cassandra](http://cassandra.apache.org/)
62
+ * [Couchbase](http://www.couchbase.com/)
63
+ * [Hadoop](http://hadoop.apache.org/)
64
+ * [LessFS](http://www.lessfs.com/wordpress/)
65
+ * [LevelDB](https://github.com/google/leveldb) (which is in turn used by
66
+ [Google Chrome](http://chrome.google.com/))
67
+ * [Lucene](http://lucene.apache.org/)
68
+ * [VoltDB](http://voltdb.com/)
69
+
70
+ If you know of more, do not hesitate to let us know. The easiest way to get in
71
+ touch is via the
72
+ [Snappy discussion mailing list](http://groups.google.com/group/snappy-compression).
@@ -33,24 +33,108 @@
33
33
 
34
34
  #include "snappy-stubs-internal.h"
35
35
 
36
+ #if SNAPPY_HAVE_SSSE3
37
+ // Please do not replace with <x86intrin.h> or with headers that assume more
38
+ // advanced SSE versions without checking with all the OWNERS.
39
+ #include <emmintrin.h>
40
+ #include <tmmintrin.h>
41
+ #endif
42
+
43
+ #if SNAPPY_HAVE_NEON
44
+ #include <arm_neon.h>
45
+ #endif
46
+
47
+ #if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON
48
+ #define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1
49
+ #else
50
+ #define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0
51
+ #endif
52
+
36
53
  namespace snappy {
37
54
  namespace internal {
38
55
 
56
+ #if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
57
+ #if SNAPPY_HAVE_SSSE3
58
+ using V128 = __m128i;
59
+ #elif SNAPPY_HAVE_NEON
60
+ using V128 = uint8x16_t;
61
+ #endif
62
+
63
+ // Load 128 bits of integer data. `src` must be 16-byte aligned.
64
+ inline V128 V128_Load(const V128* src);
65
+
66
+ // Load 128 bits of integer data. `src` does not need to be aligned.
67
+ inline V128 V128_LoadU(const V128* src);
68
+
69
+ // Store 128 bits of integer data. `dst` does not need to be aligned.
70
+ inline void V128_StoreU(V128* dst, V128 val);
71
+
72
+ // Shuffle packed 8-bit integers using a shuffle mask.
73
+ // Each packed integer in the shuffle mask must be in [0,16).
74
+ inline V128 V128_Shuffle(V128 input, V128 shuffle_mask);
75
+
76
+ // Constructs V128 with 16 chars |c|.
77
+ inline V128 V128_DupChar(char c);
78
+
79
+ #if SNAPPY_HAVE_SSSE3
80
+ inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
81
+
82
+ inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
83
+
84
+ inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); }
85
+
86
+ inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
87
+ return _mm_shuffle_epi8(input, shuffle_mask);
88
+ }
89
+
90
+ inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); }
91
+
92
+ #elif SNAPPY_HAVE_NEON
93
+ inline V128 V128_Load(const V128* src) {
94
+ return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
95
+ }
96
+
97
+ inline V128 V128_LoadU(const V128* src) {
98
+ return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
99
+ }
100
+
101
+ inline void V128_StoreU(V128* dst, V128 val) {
102
+ vst1q_u8(reinterpret_cast<uint8_t*>(dst), val);
103
+ }
104
+
105
+ inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
106
+ assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15);
107
+ return vqtbl1q_u8(input, shuffle_mask);
108
+ }
109
+
110
+ inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); }
111
+ #endif
112
+ #endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
113
+
114
+ // Working memory performs a single allocation to hold all scratch space
115
+ // required for compression.
39
116
  class WorkingMemory {
40
117
  public:
41
- WorkingMemory() : large_table_(NULL) { }
42
- ~WorkingMemory() { delete[] large_table_; }
118
+ explicit WorkingMemory(size_t input_size);
119
+ ~WorkingMemory();
43
120
 
44
121
  // Allocates and clears a hash table using memory in "*this",
45
122
  // stores the number of buckets in "*table_size" and returns a pointer to
46
123
  // the base of the hash table.
47
- uint16* GetHashTable(size_t input_size, int* table_size);
124
+ uint16_t* GetHashTable(size_t fragment_size, int* table_size) const;
125
+ char* GetScratchInput() const { return input_; }
126
+ char* GetScratchOutput() const { return output_; }
48
127
 
49
128
  private:
50
- uint16 small_table_[1<<10]; // 2KB
51
- uint16* large_table_; // Allocated only when needed
129
+ char* mem_; // the allocated memory, never nullptr
130
+ size_t size_; // the size of the allocated memory, never 0
131
+ uint16_t* table_; // the pointer to the hashtable
132
+ char* input_; // the pointer to the input scratch buffer
133
+ char* output_; // the pointer to the output scratch buffer
52
134
 
53
- DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
135
+ // No copying
136
+ WorkingMemory(const WorkingMemory&);
137
+ void operator=(const WorkingMemory&);
54
138
  };
55
139
 
56
140
  // Flat array compression that does not emit the "uncompressed length"
@@ -67,7 +151,7 @@ class WorkingMemory {
67
151
  char* CompressFragment(const char* input,
68
152
  size_t input_length,
69
153
  char* op,
70
- uint16* table,
154
+ uint16_t* table,
71
155
  const int table_size);
72
156
 
73
157
  // Find the largest n such that
@@ -80,12 +164,19 @@ char* CompressFragment(const char* input,
80
164
  // Does not read *(s1 + (s2_limit - s2)) or beyond.
81
165
  // Requires that s2_limit >= s2.
82
166
  //
83
- // Separate implementation for x86_64, for speed. Uses the fact that
84
- // x86_64 is little endian.
85
- #if defined(ARCH_K8)
167
+ // In addition populate *data with the next 5 bytes from the end of the match.
168
+ // This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is
169
+ // that on some arch's this can be done faster in this routine than subsequent
170
+ // loading from s2 + n.
171
+ //
172
+ // Separate implementation for 64-bit, little-endian cpus.
173
+ #if !SNAPPY_IS_BIG_ENDIAN && \
174
+ (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \
175
+ defined(ARCH_ARM))
86
176
  static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
87
177
  const char* s2,
88
- const char* s2_limit) {
178
+ const char* s2_limit,
179
+ uint64_t* data) {
89
180
  assert(s2_limit >= s2);
90
181
  size_t matched = 0;
91
182
 
@@ -94,12 +185,72 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
94
185
  // uncommon code paths that determine, without extra effort, whether the match
95
186
  // length is less than 8. In short, we are hoping to avoid a conditional
96
187
  // branch, and perhaps get better code layout from the C++ compiler.
97
- if (PREDICT_TRUE(s2 <= s2_limit - 8)) {
98
- uint64 a1 = UNALIGNED_LOAD64(s1);
99
- uint64 a2 = UNALIGNED_LOAD64(s2);
100
- if (a1 != a2) {
101
- return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
102
- true);
188
+ if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
189
+ uint64_t a1 = UNALIGNED_LOAD64(s1);
190
+ uint64_t a2 = UNALIGNED_LOAD64(s2);
191
+ if (SNAPPY_PREDICT_TRUE(a1 != a2)) {
192
+ // This code is critical for performance. The reason is that it determines
193
+ // how much to advance `ip` (s2). This obviously depends on both the loads
194
+ // from the `candidate` (s1) and `ip`. Furthermore the next `candidate`
195
+ // depends on the advanced `ip` calculated here through a load, hash and
196
+ // new candidate hash lookup (a lot of cycles). This makes s1 (ie.
197
+ // `candidate`) the variable that limits throughput. This is the reason we
198
+ // go through hoops to have this function update `data` for the next iter.
199
+ // The straightforward code would use *data, given by
200
+ //
201
+ // *data = UNALIGNED_LOAD64(s2 + matched_bytes) (Latency of 5 cycles),
202
+ //
203
+ // as input for the hash table lookup to find next candidate. However
204
+ // this forces the load on the data dependency chain of s1, because
205
+ // matched_bytes directly depends on s1. However matched_bytes is 0..7, so
206
+ // we can also calculate *data by
207
+ //
208
+ // *data = AlignRight(UNALIGNED_LOAD64(s2), UNALIGNED_LOAD64(s2 + 8),
209
+ // matched_bytes);
210
+ //
211
+ // The loads do not depend on s1 anymore and are thus off the bottleneck.
212
+ // The straightforward implementation on x86_64 would be to use
213
+ //
214
+ // shrd rax, rdx, cl (cl being matched_bytes * 8)
215
+ //
216
+ // unfortunately shrd with a variable shift has a 4 cycle latency. So this
217
+ // only wins 1 cycle. The BMI2 shrx instruction is a 1 cycle variable
218
+ // shift instruction but can only shift 64 bits. If we focus on just
219
+ // obtaining the least significant 4 bytes, we can obtain this by
220
+ //
221
+ // *data = ConditionalMove(matched_bytes < 4, UNALIGNED_LOAD64(s2),
222
+ // UNALIGNED_LOAD64(s2 + 4) >> ((matched_bytes & 3) * 8);
223
+ //
224
+ // Writen like above this is not a big win, the conditional move would be
225
+ // a cmp followed by a cmov (2 cycles) followed by a shift (1 cycle).
226
+ // However matched_bytes < 4 is equal to
227
+ // static_cast<uint32_t>(xorval) != 0. Writen that way, the conditional
228
+ // move (2 cycles) can execute in parallel with FindLSBSetNonZero64
229
+ // (tzcnt), which takes 3 cycles.
230
+ uint64_t xorval = a1 ^ a2;
231
+ int shift = Bits::FindLSBSetNonZero64(xorval);
232
+ size_t matched_bytes = shift >> 3;
233
+ uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
234
+ #ifndef __x86_64__
235
+ a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
236
+ #else
237
+ // Ideally this would just be
238
+ //
239
+ // a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
240
+ //
241
+ // However clang correctly infers that the above statement participates on
242
+ // a critical data dependency chain and thus, unfortunately, refuses to
243
+ // use a conditional move (it's tuned to cut data dependencies). In this
244
+ // case there is a longer parallel chain anyway AND this will be fairly
245
+ // unpredictable.
246
+ asm("testl %k2, %k2\n\t"
247
+ "cmovzq %1, %0\n\t"
248
+ : "+r"(a2)
249
+ : "r"(a3), "r"(xorval)
250
+ : "cc");
251
+ #endif
252
+ *data = a2 >> (shift & (3 * 8));
253
+ return std::pair<size_t, bool>(matched_bytes, true);
103
254
  } else {
104
255
  matched = 8;
105
256
  s2 += 8;
@@ -110,23 +261,40 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
110
261
  // time until we find a 64-bit block that doesn't match; then we find
111
262
  // the first non-matching bit and use that to calculate the total
112
263
  // length of the match.
113
- while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
114
- if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
264
+ while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
265
+ uint64_t a1 = UNALIGNED_LOAD64(s1 + matched);
266
+ uint64_t a2 = UNALIGNED_LOAD64(s2);
267
+ if (a1 == a2) {
115
268
  s2 += 8;
116
269
  matched += 8;
117
270
  } else {
118
- uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
119
- int matching_bits = Bits::FindLSBSetNonZero64(x);
120
- matched += matching_bits >> 3;
271
+ uint64_t xorval = a1 ^ a2;
272
+ int shift = Bits::FindLSBSetNonZero64(xorval);
273
+ size_t matched_bytes = shift >> 3;
274
+ uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
275
+ #ifndef __x86_64__
276
+ a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
277
+ #else
278
+ asm("testl %k2, %k2\n\t"
279
+ "cmovzq %1, %0\n\t"
280
+ : "+r"(a2)
281
+ : "r"(a3), "r"(xorval)
282
+ : "cc");
283
+ #endif
284
+ *data = a2 >> (shift & (3 * 8));
285
+ matched += matched_bytes;
121
286
  assert(matched >= 8);
122
287
  return std::pair<size_t, bool>(matched, false);
123
288
  }
124
289
  }
125
- while (PREDICT_TRUE(s2 < s2_limit)) {
290
+ while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
126
291
  if (s1[matched] == *s2) {
127
292
  ++s2;
128
293
  ++matched;
129
294
  } else {
295
+ if (s2 <= s2_limit - 8) {
296
+ *data = UNALIGNED_LOAD64(s2);
297
+ }
130
298
  return std::pair<size_t, bool>(matched, matched < 8);
131
299
  }
132
300
  }
@@ -135,7 +303,8 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
135
303
  #else
136
304
  static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
137
305
  const char* s2,
138
- const char* s2_limit) {
306
+ const char* s2_limit,
307
+ uint64_t* data) {
139
308
  // Implementation based on the x86-64 version, above.
140
309
  assert(s2_limit >= s2);
141
310
  int matched = 0;
@@ -146,15 +315,17 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
146
315
  matched += 4;
147
316
  }
148
317
  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
149
- uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
318
+ uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
150
319
  int matching_bits = Bits::FindLSBSetNonZero(x);
151
320
  matched += matching_bits >> 3;
321
+ s2 += matching_bits >> 3;
152
322
  } else {
153
323
  while ((s2 < s2_limit) && (s1[matched] == *s2)) {
154
324
  ++s2;
155
325
  ++matched;
156
326
  }
157
327
  }
328
+ if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2);
158
329
  return std::pair<size_t, bool>(matched, matched < 8);
159
330
  }
160
331
  #endif
@@ -170,11 +341,6 @@ enum {
170
341
  };
171
342
  static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
172
343
 
173
- // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
174
- static const uint32 wordmask[] = {
175
- 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
176
- };
177
-
178
344
  // Data stored per entry in lookup table:
179
345
  // Range Bits-used Description
180
346
  // ------------------------------------
@@ -186,7 +352,8 @@ static const uint32 wordmask[] = {
186
352
  // because of efficiency reasons:
187
353
  // (1) Extracting a byte is faster than a bit-field
188
354
  // (2) It properly aligns copy offset so we do not need a <<8
189
- static const uint16 char_table[256] = {
355
+ static constexpr uint16_t char_table[256] = {
356
+ // clang-format off
190
357
  0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
191
358
  0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
192
359
  0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
@@ -218,7 +385,8 @@ static const uint16 char_table[256] = {
218
385
  0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
219
386
  0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
220
387
  0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
221
- 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
388
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040,
389
+ // clang-format on
222
390
  };
223
391
 
224
392
  } // end namespace internal
@@ -26,23 +26,31 @@
26
26
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
27
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
- #include <string.h>
29
+ #include <stddef.h>
30
+ #include <cstring>
30
31
 
31
32
  #include "snappy-sinksource.h"
32
33
 
33
34
  namespace snappy {
34
35
 
35
- Source::~Source() { }
36
+ Source::~Source() = default;
36
37
 
37
- Sink::~Sink() { }
38
+ Sink::~Sink() = default;
38
39
 
39
40
  char* Sink::GetAppendBuffer(size_t length, char* scratch) {
41
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
42
+ (void)length;
43
+
40
44
  return scratch;
41
45
  }
42
46
 
43
47
  char* Sink::GetAppendBufferVariable(
44
48
  size_t min_size, size_t desired_size_hint, char* scratch,
45
49
  size_t scratch_size, size_t* allocated_size) {
50
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
51
+ (void)min_size;
52
+ (void)desired_size_hint;
53
+
46
54
  *allocated_size = scratch_size;
47
55
  return scratch;
48
56
  }
@@ -55,7 +63,7 @@ void Sink::AppendAndTakeOwnership(
55
63
  (*deleter)(deleter_arg, bytes, n);
56
64
  }
57
65
 
58
- ByteArraySource::~ByteArraySource() { }
66
+ ByteArraySource::~ByteArraySource() = default;
59
67
 
60
68
  size_t ByteArraySource::Available() const { return left_; }
61
69
 
@@ -74,22 +82,26 @@ UncheckedByteArraySink::~UncheckedByteArraySink() { }
74
82
  void UncheckedByteArraySink::Append(const char* data, size_t n) {
75
83
  // Do no copying if the caller filled in the result of GetAppendBuffer()
76
84
  if (data != dest_) {
77
- memcpy(dest_, data, n);
85
+ std::memcpy(dest_, data, n);
78
86
  }
79
87
  dest_ += n;
80
88
  }
81
89
 
82
90
  char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
91
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
92
+ (void)len;
93
+ (void)scratch;
94
+
83
95
  return dest_;
84
96
  }
85
97
 
86
98
  void UncheckedByteArraySink::AppendAndTakeOwnership(
87
- char* data, size_t n,
99
+ char* bytes, size_t n,
88
100
  void (*deleter)(void*, const char*, size_t),
89
101
  void *deleter_arg) {
90
- if (data != dest_) {
91
- memcpy(dest_, data, n);
92
- (*deleter)(deleter_arg, data, n);
102
+ if (bytes != dest_) {
103
+ std::memcpy(dest_, bytes, n);
104
+ (*deleter)(deleter_arg, bytes, n);
93
105
  }
94
106
  dest_ += n;
95
107
  }
@@ -97,6 +109,11 @@ void UncheckedByteArraySink::AppendAndTakeOwnership(
97
109
  char* UncheckedByteArraySink::GetAppendBufferVariable(
98
110
  size_t min_size, size_t desired_size_hint, char* scratch,
99
111
  size_t scratch_size, size_t* allocated_size) {
112
+ // TODO: Switch to [[maybe_unused]] when we can assume C++17.
113
+ (void)min_size;
114
+ (void)scratch;
115
+ (void)scratch_size;
116
+
100
117
  *allocated_size = desired_size_hint;
101
118
  return dest_;
102
119
  }
@@ -146,10 +146,10 @@ class Source {
146
146
  class ByteArraySource : public Source {
147
147
  public:
148
148
  ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
149
- virtual ~ByteArraySource();
150
- virtual size_t Available() const;
151
- virtual const char* Peek(size_t* len);
152
- virtual void Skip(size_t n);
149
+ ~ByteArraySource() override;
150
+ size_t Available() const override;
151
+ const char* Peek(size_t* len) override;
152
+ void Skip(size_t n) override;
153
153
  private:
154
154
  const char* ptr_;
155
155
  size_t left_;
@@ -159,15 +159,15 @@ class ByteArraySource : public Source {
159
159
  class UncheckedByteArraySink : public Sink {
160
160
  public:
161
161
  explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
162
- virtual ~UncheckedByteArraySink();
163
- virtual void Append(const char* data, size_t n);
164
- virtual char* GetAppendBuffer(size_t len, char* scratch);
165
- virtual char* GetAppendBufferVariable(
162
+ ~UncheckedByteArraySink() override;
163
+ void Append(const char* data, size_t n) override;
164
+ char* GetAppendBuffer(size_t len, char* scratch) override;
165
+ char* GetAppendBufferVariable(
166
166
  size_t min_size, size_t desired_size_hint, char* scratch,
167
- size_t scratch_size, size_t* allocated_size);
168
- virtual void AppendAndTakeOwnership(
167
+ size_t scratch_size, size_t* allocated_size) override;
168
+ void AppendAndTakeOwnership(
169
169
  char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
170
- void *deleter_arg);
170
+ void *deleter_arg) override;
171
171
 
172
172
  // Return the current output pointer so that a caller can see how
173
173
  // many bytes were produced.
@@ -33,7 +33,7 @@
33
33
 
34
34
  namespace snappy {
35
35
 
36
- void Varint::Append32(string* s, uint32 value) {
36
+ void Varint::Append32(std::string* s, uint32_t value) {
37
37
  char buf[Varint::kMax32];
38
38
  const char* p = Varint::Encode32(buf, value);
39
39
  s->append(buf, p - buf);