snappy 0.0.12-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +28 -1
  3. data/Gemfile +6 -1
  4. data/README.md +28 -4
  5. data/Rakefile +1 -0
  6. data/ext/extconf.rb +21 -24
  7. data/lib/snappy.rb +3 -1
  8. data/lib/snappy/hadoop.rb +22 -0
  9. data/lib/snappy/hadoop/reader.rb +58 -0
  10. data/lib/snappy/hadoop/writer.rb +51 -0
  11. data/lib/snappy/reader.rb +11 -7
  12. data/lib/snappy/shim.rb +30 -0
  13. data/lib/snappy/version.rb +3 -1
  14. data/lib/snappy/writer.rb +14 -9
  15. data/smoke.sh +8 -0
  16. data/snappy.gemspec +6 -30
  17. data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
  18. data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
  19. data/test/test-snappy-hadoop.rb +22 -0
  20. data/vendor/snappy/CMakeLists.txt +174 -0
  21. data/vendor/snappy/CONTRIBUTING.md +26 -0
  22. data/vendor/snappy/COPYING +1 -1
  23. data/vendor/snappy/NEWS +52 -0
  24. data/vendor/snappy/{README → README.md} +23 -9
  25. data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
  26. data/vendor/snappy/cmake/config.h.in +62 -0
  27. data/vendor/snappy/snappy-c.h +3 -3
  28. data/vendor/snappy/snappy-internal.h +101 -27
  29. data/vendor/snappy/snappy-sinksource.cc +33 -0
  30. data/vendor/snappy/snappy-sinksource.h +51 -6
  31. data/vendor/snappy/snappy-stubs-internal.h +107 -37
  32. data/vendor/snappy/snappy-stubs-public.h.in +16 -20
  33. data/vendor/snappy/snappy-test.cc +15 -9
  34. data/vendor/snappy/snappy-test.h +34 -43
  35. data/vendor/snappy/snappy.cc +529 -320
  36. data/vendor/snappy/snappy.h +23 -4
  37. data/vendor/snappy/snappy_unittest.cc +240 -185
  38. metadata +27 -74
  39. data/vendor/snappy/ChangeLog +0 -1916
  40. data/vendor/snappy/Makefile.am +0 -23
  41. data/vendor/snappy/autogen.sh +0 -7
  42. data/vendor/snappy/configure.ac +0 -133
  43. data/vendor/snappy/m4/gtest.m4 +0 -74
  44. data/vendor/snappy/testdata/alice29.txt +0 -3609
  45. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  46. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  47. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  48. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  49. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  50. data/vendor/snappy/testdata/geo.protodata +0 -0
  51. data/vendor/snappy/testdata/html +0 -1
  52. data/vendor/snappy/testdata/html_x_4 +0 -1
  53. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  54. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  55. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  56. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  57. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -29,12 +29,12 @@ and the like.
29
29
 
30
30
  Performance
31
31
  ===========
32
-
32
+
33
33
  Snappy is intended to be fast. On a single core of a Core i7 processor
34
34
  in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
35
35
  about 500 MB/sec or more. (These numbers are for the slowest inputs in our
36
36
  benchmark suite; others are much faster.) In our tests, Snappy usually
37
- is faster than algorithms in the same class (e.g. LZO, LZF, FastLZ, QuickLZ,
37
+ is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ,
38
38
  etc.) while achieving comparable compression ratios.
39
39
 
40
40
  Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
@@ -52,7 +52,7 @@ In particular:
52
52
  - Snappy uses 64-bit operations in several places to process more data at
53
53
  once than would otherwise be possible.
54
54
  - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
55
- On some platforms, these must be emulated with single-byte loads
55
+ On some platforms, these must be emulated with single-byte loads
56
56
  and stores, which is much slower.
57
57
  - Snappy assumes little-endian throughout, and needs to byte-swap data in
58
58
  several places if running on a big-endian platform.
@@ -62,12 +62,22 @@ Performance optimizations, whether for 64-bit x86 or other platforms,
62
62
  are of course most welcome; see "Contact", below.
63
63
 
64
64
 
65
+ Building
66
+ ========
67
+
68
+ CMake is supported and autotools will soon be deprecated.
69
+ You need CMake 3.4 or above to build:
70
+
71
+ mkdir build
72
+ cd build && cmake ../ && make
73
+
74
+
65
75
  Usage
66
76
  =====
67
77
 
68
78
  Note that Snappy, both the implementation and the main interface,
69
79
  is written in C++. However, several third-party bindings to other languages
70
- are available; see the Google Code page at http://code.google.com/p/snappy/
80
+ are available; see the home page at http://google.github.io/snappy/
71
81
  for more information. Also, if you want to use Snappy from C code, you can
72
82
  use the included C bindings in snappy-c.h.
73
83
 
@@ -102,12 +112,12 @@ tests to verify you have not broken anything. Note that if you have the
102
112
  Google Test library installed, unit test behavior (especially failures) will be
103
113
  significantly more user-friendly. You can find Google Test at
104
114
 
105
- http://code.google.com/p/googletest/
115
+ http://github.com/google/googletest
106
116
 
107
117
  You probably also want the gflags library for handling of command-line flags;
108
118
  you can find it at
109
119
 
110
- http://code.google.com/p/google-gflags/
120
+ http://gflags.github.io/gflags/
111
121
 
112
122
  In addition to the unit tests, snappy contains microbenchmarks used to
113
123
  tune compression and decompression performance. These are automatically run
@@ -116,7 +126,7 @@ before the unit tests, but you can disable them using the flag
116
126
  need to edit the source).
117
127
 
118
128
  Finally, snappy can benchmark Snappy against a few other compression libraries
119
- (zlib, LZO, LZF, FastLZ and QuickLZ), if they were detected at configure time.
129
+ (zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
120
130
  To benchmark using a given file, give the compression algorithm you want to test
121
131
  Snappy against (e.g. --zlib) and then a list of one or more file names on the
122
132
  command line. The testdata/ directory contains the files used by the
@@ -129,7 +139,11 @@ test.)
129
139
  Contact
130
140
  =======
131
141
 
132
- Snappy is distributed through Google Code. For the latest version, a bug tracker,
142
+ Snappy is distributed through GitHub. For the latest version, a bug tracker,
133
143
  and other information, see
134
144
 
135
- http://code.google.com/p/snappy/
145
+ http://google.github.io/snappy/
146
+
147
+ or the repository at
148
+
149
+ https://github.com/google/snappy
@@ -0,0 +1 @@
1
+ include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
@@ -0,0 +1,62 @@
1
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
2
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
3
+
4
+ /* Define to 1 if the compiler supports __builtin_ctz and friends. */
5
+ #cmakedefine HAVE_BUILTIN_CTZ 1
6
+
7
+ /* Define to 1 if the compiler supports __builtin_expect. */
8
+ #cmakedefine HAVE_BUILTIN_EXPECT 1
9
+
10
+ /* Define to 1 if you have the <byteswap.h> header file. */
11
+ #cmakedefine HAVE_BYTESWAP_H 1
12
+
13
+ /* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
14
+ #cmakedefine HAVE_FUNC_MMAP 1
15
+
16
+ /* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
17
+ #cmakedefine HAVE_FUNC_SYSCONF 1
18
+
19
+ /* Define to 1 to use the gflags package for command-line parsing. */
20
+ #cmakedefine HAVE_GFLAGS 1
21
+
22
+ /* Define to 1 if you have Google Test. */
23
+ #cmakedefine HAVE_GTEST 1
24
+
25
+ /* Define to 1 if you have the `lzo2' library (-llzo2). */
26
+ #cmakedefine HAVE_LIBLZO2 1
27
+
28
+ /* Define to 1 if you have the `z' library (-lz). */
29
+ #cmakedefine HAVE_LIBZ 1
30
+
31
+ /* Define to 1 if you have the <stddef.h> header file. */
32
+ #cmakedefine HAVE_STDDEF_H 1
33
+
34
+ /* Define to 1 if you have the <stdint.h> header file. */
35
+ #cmakedefine HAVE_STDINT_H 1
36
+
37
+ /* Define to 1 if you have the <sys/endian.h> header file. */
38
+ #cmakedefine HAVE_SYS_ENDIAN_H 1
39
+
40
+ /* Define to 1 if you have the <sys/mman.h> header file. */
41
+ #cmakedefine HAVE_SYS_MMAN_H 1
42
+
43
+ /* Define to 1 if you have the <sys/resource.h> header file. */
44
+ #cmakedefine HAVE_SYS_RESOURCE_H 1
45
+
46
+ /* Define to 1 if you have the <sys/time.h> header file. */
47
+ #cmakedefine HAVE_SYS_TIME_H 1
48
+
49
+ /* Define to 1 if you have the <sys/uio.h> header file. */
50
+ #cmakedefine HAVE_SYS_UIO_H 1
51
+
52
+ /* Define to 1 if you have the <unistd.h> header file. */
53
+ #cmakedefine HAVE_UNISTD_H 1
54
+
55
+ /* Define to 1 if you have the <windows.h> header file. */
56
+ #cmakedefine HAVE_WINDOWS_H 1
57
+
58
+ /* Define to 1 if your processor stores words with the most significant byte
59
+ first (like Motorola and SPARC, unlike Intel and VAX). */
60
+ #cmakedefine SNAPPY_IS_BIG_ENDIAN 1
61
+
62
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
@@ -30,8 +30,8 @@
30
30
  * Plain C interface (a wrapper around the C++ implementation).
31
31
  */
32
32
 
33
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
34
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
33
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
34
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
35
35
 
36
36
  #ifdef __cplusplus
37
37
  extern "C" {
@@ -135,4 +135,4 @@ snappy_status snappy_validate_compressed_buffer(const char* compressed,
135
135
  } // extern "C"
136
136
  #endif
137
137
 
138
- #endif /* UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
138
+ #endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
@@ -28,8 +28,8 @@
28
28
  //
29
29
  // Internals shared between the Snappy implementation and its unittest.
30
30
 
31
- #ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_
32
- #define UTIL_SNAPPY_SNAPPY_INTERNAL_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
32
+ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
33
33
 
34
34
  #include "snappy-stubs-internal.h"
35
35
 
@@ -50,7 +50,9 @@ class WorkingMemory {
50
50
  uint16 small_table_[1<<10]; // 2KB
51
51
  uint16* large_table_; // Allocated only when needed
52
52
 
53
- DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
53
+ // No copying
54
+ WorkingMemory(const WorkingMemory&);
55
+ void operator=(const WorkingMemory&);
54
56
  };
55
57
 
56
58
  // Flat array compression that does not emit the "uncompressed length"
@@ -70,57 +72,72 @@ char* CompressFragment(const char* input,
70
72
  uint16* table,
71
73
  const int table_size);
72
74
 
73
- // Return the largest n such that
75
+ // Find the largest n such that
74
76
  //
75
77
  // s1[0,n-1] == s2[0,n-1]
76
78
  // and n <= (s2_limit - s2).
77
79
  //
80
+ // Return make_pair(n, n < 8).
78
81
  // Does not read *s2_limit or beyond.
79
82
  // Does not read *(s1 + (s2_limit - s2)) or beyond.
80
83
  // Requires that s2_limit >= s2.
81
84
  //
82
- // Separate implementation for x86_64, for speed. Uses the fact that
83
- // x86_64 is little endian.
84
- #if defined(ARCH_K8)
85
- static inline int FindMatchLength(const char* s1,
86
- const char* s2,
87
- const char* s2_limit) {
85
+ // Separate implementation for 64-bit, little-endian cpus.
86
+ #if !defined(SNAPPY_IS_BIG_ENDIAN) && \
87
+ (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
88
+ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
89
+ const char* s2,
90
+ const char* s2_limit) {
88
91
  assert(s2_limit >= s2);
89
- int matched = 0;
92
+ size_t matched = 0;
93
+
94
+ // This block isn't necessary for correctness; we could just start looping
95
+ // immediately. As an optimization though, it is useful. It creates some not
96
+ // uncommon code paths that determine, without extra effort, whether the match
97
+ // length is less than 8. In short, we are hoping to avoid a conditional
98
+ // branch, and perhaps get better code layout from the C++ compiler.
99
+ if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
100
+ uint64 a1 = UNALIGNED_LOAD64(s1);
101
+ uint64 a2 = UNALIGNED_LOAD64(s2);
102
+ if (a1 != a2) {
103
+ return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
104
+ true);
105
+ } else {
106
+ matched = 8;
107
+ s2 += 8;
108
+ }
109
+ }
90
110
 
91
111
  // Find out how long the match is. We loop over the data 64 bits at a
92
112
  // time until we find a 64-bit block that doesn't match; then we find
93
113
  // the first non-matching bit and use that to calculate the total
94
114
  // length of the match.
95
- while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
96
- if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
115
+ while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
116
+ if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
97
117
  s2 += 8;
98
118
  matched += 8;
99
119
  } else {
100
- // On current (mid-2008) Opteron models there is a 3% more
101
- // efficient code sequence to find the first non-matching byte.
102
- // However, what follows is ~10% better on Intel Core 2 and newer,
103
- // and we expect AMD's bsf instruction to improve.
104
120
  uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
105
121
  int matching_bits = Bits::FindLSBSetNonZero64(x);
106
122
  matched += matching_bits >> 3;
107
- return matched;
123
+ assert(matched >= 8);
124
+ return std::pair<size_t, bool>(matched, false);
108
125
  }
109
126
  }
110
- while (PREDICT_TRUE(s2 < s2_limit)) {
111
- if (PREDICT_TRUE(s1[matched] == *s2)) {
127
+ while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
128
+ if (s1[matched] == *s2) {
112
129
  ++s2;
113
130
  ++matched;
114
131
  } else {
115
- return matched;
132
+ return std::pair<size_t, bool>(matched, matched < 8);
116
133
  }
117
134
  }
118
- return matched;
135
+ return std::pair<size_t, bool>(matched, matched < 8);
119
136
  }
120
137
  #else
121
- static inline int FindMatchLength(const char* s1,
122
- const char* s2,
123
- const char* s2_limit) {
138
+ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
139
+ const char* s2,
140
+ const char* s2_limit) {
124
141
  // Implementation based on the x86-64 version, above.
125
142
  assert(s2_limit >= s2);
126
143
  int matched = 0;
@@ -140,11 +157,68 @@ static inline int FindMatchLength(const char* s1,
140
157
  ++matched;
141
158
  }
142
159
  }
143
- return matched;
160
+ return std::pair<size_t, bool>(matched, matched < 8);
144
161
  }
145
162
  #endif
146
163
 
164
+ // Lookup tables for decompression code. Give --snappy_dump_decompression_table
165
+ // to the unit test to recompute char_table.
166
+
167
+ enum {
168
+ LITERAL = 0,
169
+ COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
170
+ COPY_2_BYTE_OFFSET = 2,
171
+ COPY_4_BYTE_OFFSET = 3
172
+ };
173
+ static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
174
+
175
+ // Data stored per entry in lookup table:
176
+ // Range Bits-used Description
177
+ // ------------------------------------
178
+ // 1..64 0..7 Literal/copy length encoded in opcode byte
179
+ // 0..7 8..10 Copy offset encoded in opcode byte / 256
180
+ // 0..4 11..13 Extra bytes after opcode
181
+ //
182
+ // We use eight bits for the length even though 7 would have sufficed
183
+ // because of efficiency reasons:
184
+ // (1) Extracting a byte is faster than a bit-field
185
+ // (2) It properly aligns copy offset so we do not need a <<8
186
+ static const uint16 char_table[256] = {
187
+ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
188
+ 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
189
+ 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
190
+ 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
191
+ 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
192
+ 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
193
+ 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
194
+ 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
195
+ 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
196
+ 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
197
+ 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
198
+ 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
199
+ 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
200
+ 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
201
+ 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
202
+ 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
203
+ 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
204
+ 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
205
+ 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
206
+ 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
207
+ 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
208
+ 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
209
+ 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
210
+ 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
211
+ 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
212
+ 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
213
+ 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
214
+ 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
215
+ 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
216
+ 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
217
+ 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
218
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
219
+ };
220
+
147
221
  } // end namespace internal
148
222
  } // end namespace snappy
149
223
 
150
- #endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_
224
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
@@ -40,6 +40,21 @@ char* Sink::GetAppendBuffer(size_t length, char* scratch) {
40
40
  return scratch;
41
41
  }
42
42
 
43
+ char* Sink::GetAppendBufferVariable(
44
+ size_t min_size, size_t desired_size_hint, char* scratch,
45
+ size_t scratch_size, size_t* allocated_size) {
46
+ *allocated_size = scratch_size;
47
+ return scratch;
48
+ }
49
+
50
+ void Sink::AppendAndTakeOwnership(
51
+ char* bytes, size_t n,
52
+ void (*deleter)(void*, const char*, size_t),
53
+ void *deleter_arg) {
54
+ Append(bytes, n);
55
+ (*deleter)(deleter_arg, bytes, n);
56
+ }
57
+
43
58
  ByteArraySource::~ByteArraySource() { }
44
59
 
45
60
  size_t ByteArraySource::Available() const { return left_; }
@@ -68,4 +83,22 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
68
83
  return dest_;
69
84
  }
70
85
 
86
+ void UncheckedByteArraySink::AppendAndTakeOwnership(
87
+ char* data, size_t n,
88
+ void (*deleter)(void*, const char*, size_t),
89
+ void *deleter_arg) {
90
+ if (data != dest_) {
91
+ memcpy(dest_, data, n);
92
+ (*deleter)(deleter_arg, data, n);
93
+ }
94
+ dest_ += n;
95
+ }
96
+
97
+ char* UncheckedByteArraySink::GetAppendBufferVariable(
98
+ size_t min_size, size_t desired_size_hint, char* scratch,
99
+ size_t scratch_size, size_t* allocated_size) {
100
+ *allocated_size = desired_size_hint;
101
+ return dest_;
71
102
  }
103
+
104
+ } // namespace snappy
@@ -26,12 +26,11 @@
26
26
  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
27
  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
28
 
29
- #ifndef UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
30
- #define UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
29
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
30
+ #define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
31
31
 
32
32
  #include <stddef.h>
33
33
 
34
-
35
34
  namespace snappy {
36
35
 
37
36
  // A Sink is an interface that consumes a sequence of bytes.
@@ -60,6 +59,47 @@ class Sink {
60
59
  // The default implementation always returns the scratch buffer.
61
60
  virtual char* GetAppendBuffer(size_t length, char* scratch);
62
61
 
62
+ // For higher performance, Sink implementations can provide custom
63
+ // AppendAndTakeOwnership() and GetAppendBufferVariable() methods.
64
+ // These methods can reduce the number of copies done during
65
+ // compression/decompression.
66
+
67
+ // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes"
68
+ // and calls the deleter function as (*deleter)(deleter_arg, bytes, n)
69
+ // to free the buffer. deleter function must be non NULL.
70
+ //
71
+ // The default implementation just calls Append and frees "bytes".
72
+ // Other implementations may avoid a copy while appending the buffer.
73
+ virtual void AppendAndTakeOwnership(
74
+ char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
75
+ void *deleter_arg);
76
+
77
+ // Returns a writable buffer for appending and writes the buffer's capacity to
78
+ // *allocated_size. Guarantees *allocated_size >= min_size.
79
+ // May return a pointer to the caller-owned scratch buffer which must have
80
+ // scratch_size >= min_size.
81
+ //
82
+ // The returned buffer is only valid until the next operation
83
+ // on this ByteSink.
84
+ //
85
+ // After writing at most *allocated_size bytes, call Append() with the
86
+ // pointer returned from this function and the number of bytes written.
87
+ // Many Append() implementations will avoid copying bytes if this function
88
+ // returned an internal buffer.
89
+ //
90
+ // If the sink implementation allocates or reallocates an internal buffer,
91
+ // it should use the desired_size_hint if appropriate. If a caller cannot
92
+ // provide a reasonable guess at the desired capacity, it should set
93
+ // desired_size_hint = 0.
94
+ //
95
+ // If a non-scratch buffer is returned, the caller may only pass
96
+ // a prefix to it to Append(). That is, it is not correct to pass an
97
+ // interior pointer to Append().
98
+ //
99
+ // The default implementation always returns the scratch buffer.
100
+ virtual char* GetAppendBufferVariable(
101
+ size_t min_size, size_t desired_size_hint, char* scratch,
102
+ size_t scratch_size, size_t* allocated_size);
63
103
 
64
104
  private:
65
105
  // No copying
@@ -122,6 +162,12 @@ class UncheckedByteArraySink : public Sink {
122
162
  virtual ~UncheckedByteArraySink();
123
163
  virtual void Append(const char* data, size_t n);
124
164
  virtual char* GetAppendBuffer(size_t len, char* scratch);
165
+ virtual char* GetAppendBufferVariable(
166
+ size_t min_size, size_t desired_size_hint, char* scratch,
167
+ size_t scratch_size, size_t* allocated_size);
168
+ virtual void AppendAndTakeOwnership(
169
+ char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
170
+ void *deleter_arg);
125
171
 
126
172
  // Return the current output pointer so that a caller can see how
127
173
  // many bytes were produced.
@@ -131,7 +177,6 @@ class UncheckedByteArraySink : public Sink {
131
177
  char* dest_;
132
178
  };
133
179
 
180
+ } // namespace snappy
134
181
 
135
- }
136
-
137
- #endif // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
182
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_