snappy 0.0.15-java → 0.0.16-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,5 +19,8 @@ noinst_PROGRAMS = $(TESTS)
19
19
  EXTRA_DIST = autogen.sh testdata/alice29.txt testdata/asyoulik.txt testdata/baddata1.snappy testdata/baddata2.snappy testdata/baddata3.snappy testdata/geo.protodata testdata/fireworks.jpeg testdata/html testdata/html_x_4 testdata/kppkn.gtb testdata/lcet10.txt testdata/paper-100k.pdf testdata/plrabn12.txt testdata/urls.10K
20
20
  dist_doc_DATA = ChangeLog COPYING INSTALL NEWS README format_description.txt framing_format.txt
21
21
 
22
+ pkgconfigdir = $(libdir)/pkgconfig
23
+ nodist_pkgconfig_DATA = snappy.pc
24
+
22
25
  libtool: $(LIBTOOL_DEPS)
23
26
  $(SHELL) ./config.status --recheck
@@ -1,3 +1,23 @@
1
+ Snappy v1.1.4, January 25th 2017:
2
+
3
+ * Fix a 1% performance regression when snappy is used in PIE executables.
4
+
5
+ * Improve compression performance by 5%.
6
+
7
+ * Improve decompression performance by 20%.
8
+
9
+ Snappy v1.1.3, July 6th 2015:
10
+
11
+ This is the first release to be done from GitHub, which means that
12
+ some minor things like the ChangeLog format has changed (git log
13
+ format instead of svn log).
14
+
15
+ * Add support for Uncompress() from a Source to a Sink.
16
+
17
+ * Various minor changes to improve MSVC support; in particular,
18
+ the unit tests now compile and run under MSVC.
19
+
20
+
1
21
  Snappy v1.1.2, February 28th 2014:
2
22
 
3
23
  This is a maintenance release with no changes to the actual library
@@ -29,7 +29,7 @@ and the like.
29
29
 
30
30
  Performance
31
31
  ===========
32
-
32
+
33
33
  Snappy is intended to be fast. On a single core of a Core i7 processor
34
34
  in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
35
35
  about 500 MB/sec or more. (These numbers are for the slowest inputs in our
@@ -67,7 +67,7 @@ Usage
67
67
 
68
68
  Note that Snappy, both the implementation and the main interface,
69
69
  is written in C++. However, several third-party bindings to other languages
70
- are available; see the Google Code page at http://code.google.com/p/snappy/
70
+ are available; see the home page at http://google.github.io/snappy/
71
71
  for more information. Also, if you want to use Snappy from C code, you can
72
72
  use the included C bindings in snappy-c.h.
73
73
 
@@ -102,12 +102,12 @@ tests to verify you have not broken anything. Note that if you have the
102
102
  Google Test library installed, unit test behavior (especially failures) will be
103
103
  significantly more user-friendly. You can find Google Test at
104
104
 
105
- http://code.google.com/p/googletest/
105
+ http://github.com/google/googletest
106
106
 
107
107
  You probably also want the gflags library for handling of command-line flags;
108
108
  you can find it at
109
109
 
110
- http://code.google.com/p/google-gflags/
110
+ http://gflags.github.io/gflags/
111
111
 
112
112
  In addition to the unit tests, snappy contains microbenchmarks used to
113
113
  tune compression and decompression performance. These are automatically run
@@ -129,7 +129,11 @@ test.)
129
129
  Contact
130
130
  =======
131
131
 
132
- Snappy is distributed through Google Code. For the latest version, a bug tracker,
132
+ Snappy is distributed through GitHub. For the latest version, a bug tracker,
133
133
  and other information, see
134
134
 
135
- http://code.google.com/p/snappy/
135
+ http://google.github.io/snappy/
136
+
137
+ or the repository at
138
+
139
+ https://github.com/google/snappy
@@ -2,6 +2,11 @@
2
2
  rm -rf autom4te.cache
3
3
  aclocal -I m4
4
4
  autoheader
5
- libtoolize --copy
5
+ if glibtoolize --version >/dev/null 2>/dev/null; then
6
+ LIBTOOLIZE=${LIBTOOLIZE:-glibtoolize}
7
+ else
8
+ LIBTOOLIZE=${LIBTOOLIZE:-libtoolize}
9
+ fi
10
+ $LIBTOOLIZE --copy
6
11
  automake --add-missing --copy
7
12
  autoconf
@@ -1,14 +1,15 @@
1
1
  m4_define([snappy_major], [1])
2
2
  m4_define([snappy_minor], [1])
3
- m4_define([snappy_patchlevel], [2])
3
+ m4_define([snappy_patchlevel], [4])
4
4
 
5
5
  # Libtool shared library interface versions (current:revision:age)
6
6
  # Update this value for every release! (A:B:C will map to foo.so.(A-C).C.B)
7
7
  # http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
8
- m4_define([snappy_ltversion], [3:1:2])
8
+ m4_define([snappy_ltversion], [4:1:3])
9
9
 
10
10
  AC_INIT([snappy], [snappy_major.snappy_minor.snappy_patchlevel])
11
11
  AC_CONFIG_MACRO_DIR([m4])
12
+ AC_CONFIG_AUX_DIR([.])
12
13
 
13
14
  # These are flags passed to automake (though they look like gcc flags!)
14
15
  AM_INIT_AUTOMAKE([-Wall])
@@ -129,5 +130,5 @@ AC_SUBST([SNAPPY_PATCHLEVEL])
129
130
  AC_SUBST([SNAPPY_LTVERSION], snappy_ltversion)
130
131
 
131
132
  AC_CONFIG_HEADERS([config.h])
132
- AC_CONFIG_FILES([Makefile snappy-stubs-public.h])
133
+ AC_CONFIG_FILES([Makefile snappy-stubs-public.h snappy.pc])
133
134
  AC_OUTPUT
@@ -30,8 +30,8 @@
30
30
  * Plain C interface (a wrapper around the C++ implementation).
31
31
  */
32
32
 
33
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
34
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
33
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
34
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
35
35
 
36
36
  #ifdef __cplusplus
37
37
  extern "C" {
@@ -135,4 +135,4 @@ snappy_status snappy_validate_compressed_buffer(const char* compressed,
135
135
  } // extern "C"
136
136
  #endif
137
137
 
138
- #endif /* UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
138
+ #endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
@@ -28,8 +28,8 @@
28
28
  //
29
29
  // Internals shared between the Snappy implementation and its unittest.
30
30
 
31
- #ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_
32
- #define UTIL_SNAPPY_SNAPPY_INTERNAL_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
32
+ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
33
33
 
34
34
  #include "snappy-stubs-internal.h"
35
35
 
@@ -70,11 +70,12 @@ char* CompressFragment(const char* input,
70
70
  uint16* table,
71
71
  const int table_size);
72
72
 
73
- // Return the largest n such that
73
+ // Find the largest n such that
74
74
  //
75
75
  // s1[0,n-1] == s2[0,n-1]
76
76
  // and n <= (s2_limit - s2).
77
77
  //
78
+ // Return make_pair(n, n < 8).
78
79
  // Does not read *s2_limit or beyond.
79
80
  // Does not read *(s1 + (s2_limit - s2)) or beyond.
80
81
  // Requires that s2_limit >= s2.
@@ -82,45 +83,59 @@ char* CompressFragment(const char* input,
82
83
  // Separate implementation for x86_64, for speed. Uses the fact that
83
84
  // x86_64 is little endian.
84
85
  #if defined(ARCH_K8)
85
- static inline int FindMatchLength(const char* s1,
86
- const char* s2,
87
- const char* s2_limit) {
86
+ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
87
+ const char* s2,
88
+ const char* s2_limit) {
88
89
  assert(s2_limit >= s2);
89
- int matched = 0;
90
+ size_t matched = 0;
91
+
92
+ // This block isn't necessary for correctness; we could just start looping
93
+ // immediately. As an optimization though, it is useful. It creates some not
94
+ // uncommon code paths that determine, without extra effort, whether the match
95
+ // length is less than 8. In short, we are hoping to avoid a conditional
96
+ // branch, and perhaps get better code layout from the C++ compiler.
97
+ if (PREDICT_TRUE(s2 <= s2_limit - 8)) {
98
+ uint64 a1 = UNALIGNED_LOAD64(s1);
99
+ uint64 a2 = UNALIGNED_LOAD64(s2);
100
+ if (a1 != a2) {
101
+ return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
102
+ true);
103
+ } else {
104
+ matched = 8;
105
+ s2 += 8;
106
+ }
107
+ }
90
108
 
91
109
  // Find out how long the match is. We loop over the data 64 bits at a
92
110
  // time until we find a 64-bit block that doesn't match; then we find
93
111
  // the first non-matching bit and use that to calculate the total
94
112
  // length of the match.
95
113
  while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
96
- if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
114
+ if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
97
115
  s2 += 8;
98
116
  matched += 8;
99
117
  } else {
100
- // On current (mid-2008) Opteron models there is a 3% more
101
- // efficient code sequence to find the first non-matching byte.
102
- // However, what follows is ~10% better on Intel Core 2 and newer,
103
- // and we expect AMD's bsf instruction to improve.
104
118
  uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
105
119
  int matching_bits = Bits::FindLSBSetNonZero64(x);
106
120
  matched += matching_bits >> 3;
107
- return matched;
121
+ assert(matched >= 8);
122
+ return std::pair<size_t, bool>(matched, false);
108
123
  }
109
124
  }
110
125
  while (PREDICT_TRUE(s2 < s2_limit)) {
111
- if (PREDICT_TRUE(s1[matched] == *s2)) {
126
+ if (s1[matched] == *s2) {
112
127
  ++s2;
113
128
  ++matched;
114
129
  } else {
115
- return matched;
130
+ return std::pair<size_t, bool>(matched, matched < 8);
116
131
  }
117
132
  }
118
- return matched;
133
+ return std::pair<size_t, bool>(matched, matched < 8);
119
134
  }
120
135
  #else
121
- static inline int FindMatchLength(const char* s1,
122
- const char* s2,
123
- const char* s2_limit) {
136
+ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
137
+ const char* s2,
138
+ const char* s2_limit) {
124
139
  // Implementation based on the x86-64 version, above.
125
140
  assert(s2_limit >= s2);
126
141
  int matched = 0;
@@ -140,11 +155,73 @@ static inline int FindMatchLength(const char* s1,
140
155
  ++matched;
141
156
  }
142
157
  }
143
- return matched;
158
+ return std::pair<size_t, bool>(matched, matched < 8);
144
159
  }
145
160
  #endif
146
161
 
162
+ // Lookup tables for decompression code. Give --snappy_dump_decompression_table
163
+ // to the unit test to recompute char_table.
164
+
165
+ enum {
166
+ LITERAL = 0,
167
+ COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
168
+ COPY_2_BYTE_OFFSET = 2,
169
+ COPY_4_BYTE_OFFSET = 3
170
+ };
171
+ static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
172
+
173
+ // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
174
+ static const uint32 wordmask[] = {
175
+ 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
176
+ };
177
+
178
+ // Data stored per entry in lookup table:
179
+ // Range Bits-used Description
180
+ // ------------------------------------
181
+ // 1..64 0..7 Literal/copy length encoded in opcode byte
182
+ // 0..7 8..10 Copy offset encoded in opcode byte / 256
183
+ // 0..4 11..13 Extra bytes after opcode
184
+ //
185
+ // We use eight bits for the length even though 7 would have sufficed
186
+ // because of efficiency reasons:
187
+ // (1) Extracting a byte is faster than a bit-field
188
+ // (2) It properly aligns copy offset so we do not need a <<8
189
+ static const uint16 char_table[256] = {
190
+ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
191
+ 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
192
+ 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
193
+ 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
194
+ 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
195
+ 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
196
+ 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
197
+ 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
198
+ 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
199
+ 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
200
+ 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
201
+ 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
202
+ 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
203
+ 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
204
+ 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
205
+ 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
206
+ 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
207
+ 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
208
+ 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
209
+ 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
210
+ 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
211
+ 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
212
+ 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
213
+ 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
214
+ 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
215
+ 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
216
+ 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
217
+ 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
218
+ 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
219
+ 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
220
+ 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
221
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
222
+ };
223
+
147
224
  } // end namespace internal
148
225
  } // end namespace snappy
149
226
 
150
- #endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_
227
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
@@ -40,6 +40,21 @@ char* Sink::GetAppendBuffer(size_t length, char* scratch) {
40
40
  return scratch;
41
41
  }
42
42
 
43
+ char* Sink::GetAppendBufferVariable(
44
+ size_t min_size, size_t desired_size_hint, char* scratch,
45
+ size_t scratch_size, size_t* allocated_size) {
46
+ *allocated_size = scratch_size;
47
+ return scratch;
48
+ }
49
+
50
+ void Sink::AppendAndTakeOwnership(
51
+ char* bytes, size_t n,
52
+ void (*deleter)(void*, const char*, size_t),
53
+ void *deleter_arg) {
54
+ Append(bytes, n);
55
+ (*deleter)(deleter_arg, bytes, n);
56
+ }
57
+
43
58
  ByteArraySource::~ByteArraySource() { }
44
59
 
45
60
  size_t ByteArraySource::Available() const { return left_; }
@@ -68,4 +83,22 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
68
83
  return dest_;
69
84
  }
70
85
 
86
+ void UncheckedByteArraySink::AppendAndTakeOwnership(
87
+ char* data, size_t n,
88
+ void (*deleter)(void*, const char*, size_t),
89
+ void *deleter_arg) {
90
+ if (data != dest_) {
91
+ memcpy(dest_, data, n);
92
+ (*deleter)(deleter_arg, data, n);
93
+ }
94
+ dest_ += n;
95
+ }
96
+
97
+ char* UncheckedByteArraySink::GetAppendBufferVariable(
98
+ size_t min_size, size_t desired_size_hint, char* scratch,
99
+ size_t scratch_size, size_t* allocated_size) {
100
+ *allocated_size = desired_size_hint;
101
+ return dest_;
71
102
  }
103
+
104
+ } // namespace snappy
@@ -26,12 +26,11 @@
26
26
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
27
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
- #ifndef UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
30
- #define UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
29
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
30
+ #define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
31
31
 
32
32
  #include <stddef.h>
33
33
 
34
-
35
34
  namespace snappy {
36
35
 
37
36
  // A Sink is an interface that consumes a sequence of bytes.
@@ -60,6 +59,47 @@ class Sink {
60
59
  // The default implementation always returns the scratch buffer.
61
60
  virtual char* GetAppendBuffer(size_t length, char* scratch);
62
61
 
62
+ // For higher performance, Sink implementations can provide custom
63
+ // AppendAndTakeOwnership() and GetAppendBufferVariable() methods.
64
+ // These methods can reduce the number of copies done during
65
+ // compression/decompression.
66
+
67
+ // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes"
68
+ // and calls the deleter function as (*deleter)(deleter_arg, bytes, n)
69
+ // to free the buffer. deleter function must be non NULL.
70
+ //
71
+ // The default implementation just calls Append and frees "bytes".
72
+ // Other implementations may avoid a copy while appending the buffer.
73
+ virtual void AppendAndTakeOwnership(
74
+ char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
75
+ void *deleter_arg);
76
+
77
+ // Returns a writable buffer for appending and writes the buffer's capacity to
78
+ // *allocated_size. Guarantees *allocated_size >= min_size.
79
+ // May return a pointer to the caller-owned scratch buffer which must have
80
+ // scratch_size >= min_size.
81
+ //
82
+ // The returned buffer is only valid until the next operation
83
+ // on this ByteSink.
84
+ //
85
+ // After writing at most *allocated_size bytes, call Append() with the
86
+ // pointer returned from this function and the number of bytes written.
87
+ // Many Append() implementations will avoid copying bytes if this function
88
+ // returned an internal buffer.
89
+ //
90
+ // If the sink implementation allocates or reallocates an internal buffer,
91
+ // it should use the desired_size_hint if appropriate. If a caller cannot
92
+ // provide a reasonable guess at the desired capacity, it should set
93
+ // desired_size_hint = 0.
94
+ //
95
+ // If a non-scratch buffer is returned, the caller may only pass
96
+ // a prefix to it to Append(). That is, it is not correct to pass an
97
+ // interior pointer to Append().
98
+ //
99
+ // The default implementation always returns the scratch buffer.
100
+ virtual char* GetAppendBufferVariable(
101
+ size_t min_size, size_t desired_size_hint, char* scratch,
102
+ size_t scratch_size, size_t* allocated_size);
63
103
 
64
104
  private:
65
105
  // No copying
@@ -122,6 +162,12 @@ class UncheckedByteArraySink : public Sink {
122
162
  virtual ~UncheckedByteArraySink();
123
163
  virtual void Append(const char* data, size_t n);
124
164
  virtual char* GetAppendBuffer(size_t len, char* scratch);
165
+ virtual char* GetAppendBufferVariable(
166
+ size_t min_size, size_t desired_size_hint, char* scratch,
167
+ size_t scratch_size, size_t* allocated_size);
168
+ virtual void AppendAndTakeOwnership(
169
+ char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
170
+ void *deleter_arg);
125
171
 
126
172
  // Return the current output pointer so that a caller can see how
127
173
  // many bytes were produced.
@@ -131,7 +177,6 @@ class UncheckedByteArraySink : public Sink {
131
177
  char* dest_;
132
178
  };
133
179
 
180
+ } // namespace snappy
134
181
 
135
- }
136
-
137
- #endif // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
182
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
@@ -28,8 +28,8 @@
28
28
  //
29
29
  // Various stubs for the open-source version of Snappy.
30
30
 
31
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
32
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
33
33
 
34
34
  #ifdef HAVE_CONFIG_H
35
35
  #include "config.h"
@@ -116,6 +116,15 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
116
116
  // sub-architectures.
117
117
  //
118
118
  // This is a mess, but there's not much we can do about it.
119
+ //
120
+ // To further complicate matters, only LDR instructions (single reads) are
121
+ // allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
122
+ // explicitly tell the compiler that these accesses can be unaligned, it can and
123
+ // will combine accesses. On armcc, the way to signal this is done by accessing
124
+ // through the type (uint32 __packed *), but GCC has no such attribute
125
+ // (it ignores __attribute__((packed)) on individual variables). However,
126
+ // we can tell it that a _struct_ is unaligned, which has the same effect,
127
+ // so we do that.
119
128
 
120
129
  #elif defined(__arm__) && \
121
130
  !defined(__ARM_ARCH_4__) && \
@@ -131,11 +140,39 @@ static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
131
140
  !defined(__ARM_ARCH_6ZK__) && \
132
141
  !defined(__ARM_ARCH_6T2__)
133
142
 
134
- #define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
135
- #define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
143
+ #if __GNUC__
144
+ #define ATTRIBUTE_PACKED __attribute__((__packed__))
145
+ #else
146
+ #define ATTRIBUTE_PACKED
147
+ #endif
136
148
 
137
- #define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
138
- #define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
149
+ namespace base {
150
+ namespace internal {
151
+
152
+ struct Unaligned16Struct {
153
+ uint16 value;
154
+ uint8 dummy; // To make the size non-power-of-two.
155
+ } ATTRIBUTE_PACKED;
156
+
157
+ struct Unaligned32Struct {
158
+ uint32 value;
159
+ uint8 dummy; // To make the size non-power-of-two.
160
+ } ATTRIBUTE_PACKED;
161
+
162
+ } // namespace internal
163
+ } // namespace base
164
+
165
+ #define UNALIGNED_LOAD16(_p) \
166
+ ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
167
+ #define UNALIGNED_LOAD32(_p) \
168
+ ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
169
+
170
+ #define UNALIGNED_STORE16(_p, _val) \
171
+ ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
172
+ (_val))
173
+ #define UNALIGNED_STORE32(_p, _val) \
174
+ ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
175
+ (_val))
139
176
 
140
177
  // TODO(user): NEON supports unaligned 64-bit loads and stores.
141
178
  // See if that would be more efficient on platforms supporting it,
@@ -488,4 +525,4 @@ inline char* string_as_array(string* str) {
488
525
 
489
526
  } // namespace snappy
490
527
 
491
- #endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
528
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_