snappy 0.0.12-java → 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +28 -1
- data/Gemfile +6 -1
- data/README.md +28 -4
- data/Rakefile +1 -0
- data/ext/extconf.rb +21 -24
- data/lib/snappy.rb +3 -1
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/hadoop/reader.rb +58 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/reader.rb +11 -7
- data/lib/snappy/shim.rb +30 -0
- data/lib/snappy/version.rb +3 -1
- data/lib/snappy/writer.rb +14 -9
- data/smoke.sh +8 -0
- data/snappy.gemspec +6 -30
- data/test/hadoop/test-snappy-hadoop-reader.rb +103 -0
- data/test/hadoop/test-snappy-hadoop-writer.rb +48 -0
- data/test/test-snappy-hadoop.rb +22 -0
- data/vendor/snappy/CMakeLists.txt +174 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/COPYING +1 -1
- data/vendor/snappy/NEWS +52 -0
- data/vendor/snappy/{README → README.md} +23 -9
- data/vendor/snappy/cmake/SnappyConfig.cmake +1 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/snappy-c.h +3 -3
- data/vendor/snappy/snappy-internal.h +101 -27
- data/vendor/snappy/snappy-sinksource.cc +33 -0
- data/vendor/snappy/snappy-sinksource.h +51 -6
- data/vendor/snappy/snappy-stubs-internal.h +107 -37
- data/vendor/snappy/snappy-stubs-public.h.in +16 -20
- data/vendor/snappy/snappy-test.cc +15 -9
- data/vendor/snappy/snappy-test.h +34 -43
- data/vendor/snappy/snappy.cc +529 -320
- data/vendor/snappy/snappy.h +23 -4
- data/vendor/snappy/snappy_unittest.cc +240 -185
- metadata +27 -74
- data/vendor/snappy/ChangeLog +0 -1916
- data/vendor/snappy/Makefile.am +0 -23
- data/vendor/snappy/autogen.sh +0 -7
- data/vendor/snappy/configure.ac +0 -133
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/testdata/alice29.txt +0 -3609
- data/vendor/snappy/testdata/asyoulik.txt +0 -4122
- data/vendor/snappy/testdata/baddata1.snappy +0 -0
- data/vendor/snappy/testdata/baddata2.snappy +0 -0
- data/vendor/snappy/testdata/baddata3.snappy +0 -0
- data/vendor/snappy/testdata/fireworks.jpeg +0 -0
- data/vendor/snappy/testdata/geo.protodata +0 -0
- data/vendor/snappy/testdata/html +0 -1
- data/vendor/snappy/testdata/html_x_4 +0 -1
- data/vendor/snappy/testdata/kppkn.gtb +0 -0
- data/vendor/snappy/testdata/lcet10.txt +0 -7519
- data/vendor/snappy/testdata/paper-100k.pdf +2 -600
- data/vendor/snappy/testdata/plrabn12.txt +0 -10699
- data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -29,12 +29,12 @@ and the like.
|
|
29
29
|
|
30
30
|
Performance
|
31
31
|
===========
|
32
|
-
|
32
|
+
|
33
33
|
Snappy is intended to be fast. On a single core of a Core i7 processor
|
34
34
|
in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
|
35
35
|
about 500 MB/sec or more. (These numbers are for the slowest inputs in our
|
36
36
|
benchmark suite; others are much faster.) In our tests, Snappy usually
|
37
|
-
is faster than algorithms in the same class (e.g. LZO, LZF,
|
37
|
+
is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ,
|
38
38
|
etc.) while achieving comparable compression ratios.
|
39
39
|
|
40
40
|
Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
|
@@ -52,7 +52,7 @@ In particular:
|
|
52
52
|
- Snappy uses 64-bit operations in several places to process more data at
|
53
53
|
once than would otherwise be possible.
|
54
54
|
- Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
|
55
|
-
On some platforms, these must be emulated with single-byte loads
|
55
|
+
On some platforms, these must be emulated with single-byte loads
|
56
56
|
and stores, which is much slower.
|
57
57
|
- Snappy assumes little-endian throughout, and needs to byte-swap data in
|
58
58
|
several places if running on a big-endian platform.
|
@@ -62,12 +62,22 @@ Performance optimizations, whether for 64-bit x86 or other platforms,
|
|
62
62
|
are of course most welcome; see "Contact", below.
|
63
63
|
|
64
64
|
|
65
|
+
Building
|
66
|
+
========
|
67
|
+
|
68
|
+
CMake is supported and autotools will soon be deprecated.
|
69
|
+
You need CMake 3.4 or above to build:
|
70
|
+
|
71
|
+
mkdir build
|
72
|
+
cd build && cmake ../ && make
|
73
|
+
|
74
|
+
|
65
75
|
Usage
|
66
76
|
=====
|
67
77
|
|
68
78
|
Note that Snappy, both the implementation and the main interface,
|
69
79
|
is written in C++. However, several third-party bindings to other languages
|
70
|
-
are available; see the
|
80
|
+
are available; see the home page at http://google.github.io/snappy/
|
71
81
|
for more information. Also, if you want to use Snappy from C code, you can
|
72
82
|
use the included C bindings in snappy-c.h.
|
73
83
|
|
@@ -102,12 +112,12 @@ tests to verify you have not broken anything. Note that if you have the
|
|
102
112
|
Google Test library installed, unit test behavior (especially failures) will be
|
103
113
|
significantly more user-friendly. You can find Google Test at
|
104
114
|
|
105
|
-
http://
|
115
|
+
http://github.com/google/googletest
|
106
116
|
|
107
117
|
You probably also want the gflags library for handling of command-line flags;
|
108
118
|
you can find it at
|
109
119
|
|
110
|
-
http://
|
120
|
+
http://gflags.github.io/gflags/
|
111
121
|
|
112
122
|
In addition to the unit tests, snappy contains microbenchmarks used to
|
113
123
|
tune compression and decompression performance. These are automatically run
|
@@ -116,7 +126,7 @@ before the unit tests, but you can disable them using the flag
|
|
116
126
|
need to edit the source).
|
117
127
|
|
118
128
|
Finally, snappy can benchmark Snappy against a few other compression libraries
|
119
|
-
(zlib, LZO, LZF,
|
129
|
+
(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
|
120
130
|
To benchmark using a given file, give the compression algorithm you want to test
|
121
131
|
Snappy against (e.g. --zlib) and then a list of one or more file names on the
|
122
132
|
command line. The testdata/ directory contains the files used by the
|
@@ -129,7 +139,11 @@ test.)
|
|
129
139
|
Contact
|
130
140
|
=======
|
131
141
|
|
132
|
-
Snappy is distributed through
|
142
|
+
Snappy is distributed through GitHub. For the latest version, a bug tracker,
|
133
143
|
and other information, see
|
134
144
|
|
135
|
-
http://
|
145
|
+
http://google.github.io/snappy/
|
146
|
+
|
147
|
+
or the repository at
|
148
|
+
|
149
|
+
https://github.com/google/snappy
|
@@ -0,0 +1 @@
|
|
1
|
+
include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
2
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
3
|
+
|
4
|
+
/* Define to 1 if the compiler supports __builtin_ctz and friends. */
|
5
|
+
#cmakedefine HAVE_BUILTIN_CTZ 1
|
6
|
+
|
7
|
+
/* Define to 1 if the compiler supports __builtin_expect. */
|
8
|
+
#cmakedefine HAVE_BUILTIN_EXPECT 1
|
9
|
+
|
10
|
+
/* Define to 1 if you have the <byteswap.h> header file. */
|
11
|
+
#cmakedefine HAVE_BYTESWAP_H 1
|
12
|
+
|
13
|
+
/* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
|
14
|
+
#cmakedefine HAVE_FUNC_MMAP 1
|
15
|
+
|
16
|
+
/* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
|
17
|
+
#cmakedefine HAVE_FUNC_SYSCONF 1
|
18
|
+
|
19
|
+
/* Define to 1 to use the gflags package for command-line parsing. */
|
20
|
+
#cmakedefine HAVE_GFLAGS 1
|
21
|
+
|
22
|
+
/* Define to 1 if you have Google Test. */
|
23
|
+
#cmakedefine HAVE_GTEST 1
|
24
|
+
|
25
|
+
/* Define to 1 if you have the `lzo2' library (-llzo2). */
|
26
|
+
#cmakedefine HAVE_LIBLZO2 1
|
27
|
+
|
28
|
+
/* Define to 1 if you have the `z' library (-lz). */
|
29
|
+
#cmakedefine HAVE_LIBZ 1
|
30
|
+
|
31
|
+
/* Define to 1 if you have the <stddef.h> header file. */
|
32
|
+
#cmakedefine HAVE_STDDEF_H 1
|
33
|
+
|
34
|
+
/* Define to 1 if you have the <stdint.h> header file. */
|
35
|
+
#cmakedefine HAVE_STDINT_H 1
|
36
|
+
|
37
|
+
/* Define to 1 if you have the <sys/endian.h> header file. */
|
38
|
+
#cmakedefine HAVE_SYS_ENDIAN_H 1
|
39
|
+
|
40
|
+
/* Define to 1 if you have the <sys/mman.h> header file. */
|
41
|
+
#cmakedefine HAVE_SYS_MMAN_H 1
|
42
|
+
|
43
|
+
/* Define to 1 if you have the <sys/resource.h> header file. */
|
44
|
+
#cmakedefine HAVE_SYS_RESOURCE_H 1
|
45
|
+
|
46
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
47
|
+
#cmakedefine HAVE_SYS_TIME_H 1
|
48
|
+
|
49
|
+
/* Define to 1 if you have the <sys/uio.h> header file. */
|
50
|
+
#cmakedefine HAVE_SYS_UIO_H 1
|
51
|
+
|
52
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
53
|
+
#cmakedefine HAVE_UNISTD_H 1
|
54
|
+
|
55
|
+
/* Define to 1 if you have the <windows.h> header file. */
|
56
|
+
#cmakedefine HAVE_WINDOWS_H 1
|
57
|
+
|
58
|
+
/* Define to 1 if your processor stores words with the most significant byte
|
59
|
+
first (like Motorola and SPARC, unlike Intel and VAX). */
|
60
|
+
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1
|
61
|
+
|
62
|
+
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
data/vendor/snappy/snappy-c.h
CHANGED
@@ -30,8 +30,8 @@
|
|
30
30
|
* Plain C interface (a wrapper around the C++ implementation).
|
31
31
|
*/
|
32
32
|
|
33
|
-
#ifndef
|
34
|
-
#define
|
33
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
|
34
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
|
35
35
|
|
36
36
|
#ifdef __cplusplus
|
37
37
|
extern "C" {
|
@@ -135,4 +135,4 @@ snappy_status snappy_validate_compressed_buffer(const char* compressed,
|
|
135
135
|
} // extern "C"
|
136
136
|
#endif
|
137
137
|
|
138
|
-
#endif /*
|
138
|
+
#endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
|
@@ -28,8 +28,8 @@
|
|
28
28
|
//
|
29
29
|
// Internals shared between the Snappy implementation and its unittest.
|
30
30
|
|
31
|
-
#ifndef
|
32
|
-
#define
|
31
|
+
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
32
|
+
#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
33
33
|
|
34
34
|
#include "snappy-stubs-internal.h"
|
35
35
|
|
@@ -50,7 +50,9 @@ class WorkingMemory {
|
|
50
50
|
uint16 small_table_[1<<10]; // 2KB
|
51
51
|
uint16* large_table_; // Allocated only when needed
|
52
52
|
|
53
|
-
|
53
|
+
// No copying
|
54
|
+
WorkingMemory(const WorkingMemory&);
|
55
|
+
void operator=(const WorkingMemory&);
|
54
56
|
};
|
55
57
|
|
56
58
|
// Flat array compression that does not emit the "uncompressed length"
|
@@ -70,57 +72,72 @@ char* CompressFragment(const char* input,
|
|
70
72
|
uint16* table,
|
71
73
|
const int table_size);
|
72
74
|
|
73
|
-
//
|
75
|
+
// Find the largest n such that
|
74
76
|
//
|
75
77
|
// s1[0,n-1] == s2[0,n-1]
|
76
78
|
// and n <= (s2_limit - s2).
|
77
79
|
//
|
80
|
+
// Return make_pair(n, n < 8).
|
78
81
|
// Does not read *s2_limit or beyond.
|
79
82
|
// Does not read *(s1 + (s2_limit - s2)) or beyond.
|
80
83
|
// Requires that s2_limit >= s2.
|
81
84
|
//
|
82
|
-
// Separate implementation for
|
83
|
-
|
84
|
-
|
85
|
-
static inline
|
86
|
-
|
87
|
-
|
85
|
+
// Separate implementation for 64-bit, little-endian cpus.
|
86
|
+
#if !defined(SNAPPY_IS_BIG_ENDIAN) && \
|
87
|
+
(defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
|
88
|
+
static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
|
89
|
+
const char* s2,
|
90
|
+
const char* s2_limit) {
|
88
91
|
assert(s2_limit >= s2);
|
89
|
-
|
92
|
+
size_t matched = 0;
|
93
|
+
|
94
|
+
// This block isn't necessary for correctness; we could just start looping
|
95
|
+
// immediately. As an optimization though, it is useful. It creates some not
|
96
|
+
// uncommon code paths that determine, without extra effort, whether the match
|
97
|
+
// length is less than 8. In short, we are hoping to avoid a conditional
|
98
|
+
// branch, and perhaps get better code layout from the C++ compiler.
|
99
|
+
if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
|
100
|
+
uint64 a1 = UNALIGNED_LOAD64(s1);
|
101
|
+
uint64 a2 = UNALIGNED_LOAD64(s2);
|
102
|
+
if (a1 != a2) {
|
103
|
+
return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
|
104
|
+
true);
|
105
|
+
} else {
|
106
|
+
matched = 8;
|
107
|
+
s2 += 8;
|
108
|
+
}
|
109
|
+
}
|
90
110
|
|
91
111
|
// Find out how long the match is. We loop over the data 64 bits at a
|
92
112
|
// time until we find a 64-bit block that doesn't match; then we find
|
93
113
|
// the first non-matching bit and use that to calculate the total
|
94
114
|
// length of the match.
|
95
|
-
while (
|
96
|
-
if (
|
115
|
+
while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
|
116
|
+
if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
|
97
117
|
s2 += 8;
|
98
118
|
matched += 8;
|
99
119
|
} else {
|
100
|
-
// On current (mid-2008) Opteron models there is a 3% more
|
101
|
-
// efficient code sequence to find the first non-matching byte.
|
102
|
-
// However, what follows is ~10% better on Intel Core 2 and newer,
|
103
|
-
// and we expect AMD's bsf instruction to improve.
|
104
120
|
uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
|
105
121
|
int matching_bits = Bits::FindLSBSetNonZero64(x);
|
106
122
|
matched += matching_bits >> 3;
|
107
|
-
|
123
|
+
assert(matched >= 8);
|
124
|
+
return std::pair<size_t, bool>(matched, false);
|
108
125
|
}
|
109
126
|
}
|
110
|
-
while (
|
111
|
-
if (
|
127
|
+
while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
|
128
|
+
if (s1[matched] == *s2) {
|
112
129
|
++s2;
|
113
130
|
++matched;
|
114
131
|
} else {
|
115
|
-
return matched;
|
132
|
+
return std::pair<size_t, bool>(matched, matched < 8);
|
116
133
|
}
|
117
134
|
}
|
118
|
-
return matched;
|
135
|
+
return std::pair<size_t, bool>(matched, matched < 8);
|
119
136
|
}
|
120
137
|
#else
|
121
|
-
static inline
|
122
|
-
|
123
|
-
|
138
|
+
static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
|
139
|
+
const char* s2,
|
140
|
+
const char* s2_limit) {
|
124
141
|
// Implementation based on the x86-64 version, above.
|
125
142
|
assert(s2_limit >= s2);
|
126
143
|
int matched = 0;
|
@@ -140,11 +157,68 @@ static inline int FindMatchLength(const char* s1,
|
|
140
157
|
++matched;
|
141
158
|
}
|
142
159
|
}
|
143
|
-
return matched;
|
160
|
+
return std::pair<size_t, bool>(matched, matched < 8);
|
144
161
|
}
|
145
162
|
#endif
|
146
163
|
|
164
|
+
// Lookup tables for decompression code. Give --snappy_dump_decompression_table
|
165
|
+
// to the unit test to recompute char_table.
|
166
|
+
|
167
|
+
enum {
|
168
|
+
LITERAL = 0,
|
169
|
+
COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
|
170
|
+
COPY_2_BYTE_OFFSET = 2,
|
171
|
+
COPY_4_BYTE_OFFSET = 3
|
172
|
+
};
|
173
|
+
static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
|
174
|
+
|
175
|
+
// Data stored per entry in lookup table:
|
176
|
+
// Range Bits-used Description
|
177
|
+
// ------------------------------------
|
178
|
+
// 1..64 0..7 Literal/copy length encoded in opcode byte
|
179
|
+
// 0..7 8..10 Copy offset encoded in opcode byte / 256
|
180
|
+
// 0..4 11..13 Extra bytes after opcode
|
181
|
+
//
|
182
|
+
// We use eight bits for the length even though 7 would have sufficed
|
183
|
+
// because of efficiency reasons:
|
184
|
+
// (1) Extracting a byte is faster than a bit-field
|
185
|
+
// (2) It properly aligns copy offset so we do not need a <<8
|
186
|
+
static const uint16 char_table[256] = {
|
187
|
+
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
|
188
|
+
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
|
189
|
+
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
|
190
|
+
0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
|
191
|
+
0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
|
192
|
+
0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
|
193
|
+
0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
|
194
|
+
0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
|
195
|
+
0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
|
196
|
+
0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
|
197
|
+
0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
|
198
|
+
0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
|
199
|
+
0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
|
200
|
+
0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
|
201
|
+
0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
|
202
|
+
0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
|
203
|
+
0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
|
204
|
+
0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
|
205
|
+
0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
|
206
|
+
0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
|
207
|
+
0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
|
208
|
+
0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
|
209
|
+
0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
|
210
|
+
0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
|
211
|
+
0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
|
212
|
+
0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
|
213
|
+
0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
|
214
|
+
0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
|
215
|
+
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
|
216
|
+
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
|
217
|
+
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
|
218
|
+
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
|
219
|
+
};
|
220
|
+
|
147
221
|
} // end namespace internal
|
148
222
|
} // end namespace snappy
|
149
223
|
|
150
|
-
#endif //
|
224
|
+
#endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
@@ -40,6 +40,21 @@ char* Sink::GetAppendBuffer(size_t length, char* scratch) {
|
|
40
40
|
return scratch;
|
41
41
|
}
|
42
42
|
|
43
|
+
char* Sink::GetAppendBufferVariable(
|
44
|
+
size_t min_size, size_t desired_size_hint, char* scratch,
|
45
|
+
size_t scratch_size, size_t* allocated_size) {
|
46
|
+
*allocated_size = scratch_size;
|
47
|
+
return scratch;
|
48
|
+
}
|
49
|
+
|
50
|
+
void Sink::AppendAndTakeOwnership(
|
51
|
+
char* bytes, size_t n,
|
52
|
+
void (*deleter)(void*, const char*, size_t),
|
53
|
+
void *deleter_arg) {
|
54
|
+
Append(bytes, n);
|
55
|
+
(*deleter)(deleter_arg, bytes, n);
|
56
|
+
}
|
57
|
+
|
43
58
|
ByteArraySource::~ByteArraySource() { }
|
44
59
|
|
45
60
|
size_t ByteArraySource::Available() const { return left_; }
|
@@ -68,4 +83,22 @@ char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
|
|
68
83
|
return dest_;
|
69
84
|
}
|
70
85
|
|
86
|
+
void UncheckedByteArraySink::AppendAndTakeOwnership(
|
87
|
+
char* data, size_t n,
|
88
|
+
void (*deleter)(void*, const char*, size_t),
|
89
|
+
void *deleter_arg) {
|
90
|
+
if (data != dest_) {
|
91
|
+
memcpy(dest_, data, n);
|
92
|
+
(*deleter)(deleter_arg, data, n);
|
93
|
+
}
|
94
|
+
dest_ += n;
|
95
|
+
}
|
96
|
+
|
97
|
+
char* UncheckedByteArraySink::GetAppendBufferVariable(
|
98
|
+
size_t min_size, size_t desired_size_hint, char* scratch,
|
99
|
+
size_t scratch_size, size_t* allocated_size) {
|
100
|
+
*allocated_size = desired_size_hint;
|
101
|
+
return dest_;
|
71
102
|
}
|
103
|
+
|
104
|
+
} // namespace snappy
|
@@ -26,12 +26,11 @@
|
|
26
26
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
27
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
28
|
|
29
|
-
#ifndef
|
30
|
-
#define
|
29
|
+
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
|
30
|
+
#define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
|
31
31
|
|
32
32
|
#include <stddef.h>
|
33
33
|
|
34
|
-
|
35
34
|
namespace snappy {
|
36
35
|
|
37
36
|
// A Sink is an interface that consumes a sequence of bytes.
|
@@ -60,6 +59,47 @@ class Sink {
|
|
60
59
|
// The default implementation always returns the scratch buffer.
|
61
60
|
virtual char* GetAppendBuffer(size_t length, char* scratch);
|
62
61
|
|
62
|
+
// For higher performance, Sink implementations can provide custom
|
63
|
+
// AppendAndTakeOwnership() and GetAppendBufferVariable() methods.
|
64
|
+
// These methods can reduce the number of copies done during
|
65
|
+
// compression/decompression.
|
66
|
+
|
67
|
+
// Append "bytes[0,n-1] to the sink. Takes ownership of "bytes"
|
68
|
+
// and calls the deleter function as (*deleter)(deleter_arg, bytes, n)
|
69
|
+
// to free the buffer. deleter function must be non NULL.
|
70
|
+
//
|
71
|
+
// The default implementation just calls Append and frees "bytes".
|
72
|
+
// Other implementations may avoid a copy while appending the buffer.
|
73
|
+
virtual void AppendAndTakeOwnership(
|
74
|
+
char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
|
75
|
+
void *deleter_arg);
|
76
|
+
|
77
|
+
// Returns a writable buffer for appending and writes the buffer's capacity to
|
78
|
+
// *allocated_size. Guarantees *allocated_size >= min_size.
|
79
|
+
// May return a pointer to the caller-owned scratch buffer which must have
|
80
|
+
// scratch_size >= min_size.
|
81
|
+
//
|
82
|
+
// The returned buffer is only valid until the next operation
|
83
|
+
// on this ByteSink.
|
84
|
+
//
|
85
|
+
// After writing at most *allocated_size bytes, call Append() with the
|
86
|
+
// pointer returned from this function and the number of bytes written.
|
87
|
+
// Many Append() implementations will avoid copying bytes if this function
|
88
|
+
// returned an internal buffer.
|
89
|
+
//
|
90
|
+
// If the sink implementation allocates or reallocates an internal buffer,
|
91
|
+
// it should use the desired_size_hint if appropriate. If a caller cannot
|
92
|
+
// provide a reasonable guess at the desired capacity, it should set
|
93
|
+
// desired_size_hint = 0.
|
94
|
+
//
|
95
|
+
// If a non-scratch buffer is returned, the caller may only pass
|
96
|
+
// a prefix to it to Append(). That is, it is not correct to pass an
|
97
|
+
// interior pointer to Append().
|
98
|
+
//
|
99
|
+
// The default implementation always returns the scratch buffer.
|
100
|
+
virtual char* GetAppendBufferVariable(
|
101
|
+
size_t min_size, size_t desired_size_hint, char* scratch,
|
102
|
+
size_t scratch_size, size_t* allocated_size);
|
63
103
|
|
64
104
|
private:
|
65
105
|
// No copying
|
@@ -122,6 +162,12 @@ class UncheckedByteArraySink : public Sink {
|
|
122
162
|
virtual ~UncheckedByteArraySink();
|
123
163
|
virtual void Append(const char* data, size_t n);
|
124
164
|
virtual char* GetAppendBuffer(size_t len, char* scratch);
|
165
|
+
virtual char* GetAppendBufferVariable(
|
166
|
+
size_t min_size, size_t desired_size_hint, char* scratch,
|
167
|
+
size_t scratch_size, size_t* allocated_size);
|
168
|
+
virtual void AppendAndTakeOwnership(
|
169
|
+
char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
|
170
|
+
void *deleter_arg);
|
125
171
|
|
126
172
|
// Return the current output pointer so that a caller can see how
|
127
173
|
// many bytes were produced.
|
@@ -131,7 +177,6 @@ class UncheckedByteArraySink : public Sink {
|
|
131
177
|
char* dest_;
|
132
178
|
};
|
133
179
|
|
180
|
+
} // namespace snappy
|
134
181
|
|
135
|
-
|
136
|
-
|
137
|
-
#endif // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
|
182
|
+
#endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_
|