snappy 0.0.14-java → 0.2.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/main.yml +34 -0
- data/.github/workflows/publish.yml +34 -0
- data/Gemfile +4 -0
- data/README.md +28 -4
- data/Rakefile +32 -29
- data/ext/api.c +6 -1
- data/ext/extconf.rb +21 -24
- data/lib/snappy.rb +6 -4
- data/lib/snappy/hadoop.rb +22 -0
- data/lib/snappy/hadoop/reader.rb +62 -0
- data/lib/snappy/hadoop/writer.rb +51 -0
- data/lib/snappy/reader.rb +19 -11
- data/lib/snappy/shim.rb +30 -0
- data/lib/snappy/version.rb +3 -1
- data/lib/snappy/writer.rb +8 -9
- data/snappy.gemspec +17 -37
- data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
- data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
- data/test/snappy_hadoop_test.rb +26 -0
- data/test/snappy_reader_test.rb +148 -0
- data/test/snappy_test.rb +95 -0
- data/test/snappy_writer_test.rb +55 -0
- data/test/test_helper.rb +7 -0
- data/vendor/snappy/CMakeLists.txt +297 -0
- data/vendor/snappy/CONTRIBUTING.md +26 -0
- data/vendor/snappy/COPYING +1 -1
- data/vendor/snappy/NEWS +60 -0
- data/vendor/snappy/{README → README.md} +29 -16
- data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/vendor/snappy/cmake/config.h.in +62 -0
- data/vendor/snappy/docs/README.md +72 -0
- data/vendor/snappy/snappy-c.h +3 -3
- data/vendor/snappy/snappy-internal.h +113 -32
- data/vendor/snappy/snappy-sinksource.cc +33 -0
- data/vendor/snappy/snappy-sinksource.h +51 -6
- data/vendor/snappy/snappy-stubs-internal.cc +1 -1
- data/vendor/snappy/snappy-stubs-internal.h +160 -45
- data/vendor/snappy/snappy-stubs-public.h.in +23 -47
- data/vendor/snappy/snappy-test.cc +31 -24
- data/vendor/snappy/snappy-test.h +46 -103
- data/vendor/snappy/snappy.cc +786 -431
- data/vendor/snappy/snappy.h +37 -14
- data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
- data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
- data/vendor/snappy/snappy_unittest.cc +441 -290
- metadata +35 -75
- data/.travis.yml +0 -4
- data/test/test-snappy-reader.rb +0 -129
- data/test/test-snappy-writer.rb +0 -55
- data/test/test-snappy.rb +0 -58
- data/vendor/snappy/ChangeLog +0 -1916
- data/vendor/snappy/Makefile.am +0 -23
- data/vendor/snappy/autogen.sh +0 -7
- data/vendor/snappy/configure.ac +0 -133
- data/vendor/snappy/m4/gtest.m4 +0 -74
- data/vendor/snappy/testdata/alice29.txt +0 -3609
- data/vendor/snappy/testdata/asyoulik.txt +0 -4122
- data/vendor/snappy/testdata/baddata1.snappy +0 -0
- data/vendor/snappy/testdata/baddata2.snappy +0 -0
- data/vendor/snappy/testdata/baddata3.snappy +0 -0
- data/vendor/snappy/testdata/fireworks.jpeg +0 -0
- data/vendor/snappy/testdata/geo.protodata +0 -0
- data/vendor/snappy/testdata/html +0 -1
- data/vendor/snappy/testdata/html_x_4 +0 -1
- data/vendor/snappy/testdata/kppkn.gtb +0 -0
- data/vendor/snappy/testdata/lcet10.txt +0 -7519
- data/vendor/snappy/testdata/paper-100k.pdf +2 -600
- data/vendor/snappy/testdata/plrabn12.txt +0 -10699
- data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -0,0 +1,26 @@
|
|
1
|
+
# How to Contribute
|
2
|
+
|
3
|
+
We'd love to accept your patches and contributions to this project. There are
|
4
|
+
just a few small guidelines you need to follow.
|
5
|
+
|
6
|
+
## Contributor License Agreement
|
7
|
+
|
8
|
+
Contributions to this project must be accompanied by a Contributor License
|
9
|
+
Agreement. You (or your employer) retain the copyright to your contribution,
|
10
|
+
this simply gives us permission to use and redistribute your contributions as
|
11
|
+
part of the project. Head over to <https://cla.developers.google.com/> to see
|
12
|
+
your current agreements on file or to sign a new one.
|
13
|
+
|
14
|
+
You generally only need to submit a CLA once, so if you've already submitted one
|
15
|
+
(even if it was for a different project), you probably don't need to do it
|
16
|
+
again.
|
17
|
+
|
18
|
+
## Code reviews
|
19
|
+
|
20
|
+
All submissions, including submissions by project members, require review. We
|
21
|
+
use GitHub pull requests for this purpose. Consult
|
22
|
+
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
|
23
|
+
information on using pull requests.
|
24
|
+
|
25
|
+
Please make sure that all the automated checks (CLA, AppVeyor, Travis) pass for
|
26
|
+
your pull requests. Pull requests whose checks fail may be ignored.
|
data/vendor/snappy/COPYING
CHANGED
@@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
29
|
|
30
30
|
===
|
31
31
|
|
32
|
-
Some of the benchmark data in
|
32
|
+
Some of the benchmark data in testdata/ is licensed differently:
|
33
33
|
|
34
34
|
- fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and
|
35
35
|
is licensed under the Creative Commons Attribution 3.0 license
|
data/vendor/snappy/NEWS
CHANGED
@@ -1,3 +1,63 @@
|
|
1
|
+
Snappy v1.1.8, January 15th 2020:
|
2
|
+
|
3
|
+
* Small performance improvements.
|
4
|
+
|
5
|
+
* Removed snappy::string alias for std::string.
|
6
|
+
|
7
|
+
* Improved CMake configuration.
|
8
|
+
|
9
|
+
Snappy v1.1.7, August 24th 2017:
|
10
|
+
|
11
|
+
* Improved CMake build support for 64-bit Linux distributions.
|
12
|
+
|
13
|
+
* MSVC builds now use MSVC-specific intrinsics that map to clzll.
|
14
|
+
|
15
|
+
* ARM64 (AArch64) builds use the code paths optimized for 64-bit processors.
|
16
|
+
|
17
|
+
Snappy v1.1.6, July 12th 2017:
|
18
|
+
|
19
|
+
This is a re-release of v1.1.5 with proper SONAME / SOVERSION values.
|
20
|
+
|
21
|
+
Snappy v1.1.5, June 28th 2017:
|
22
|
+
|
23
|
+
This release has broken SONAME / SOVERSION values. Users of snappy as a shared
|
24
|
+
library should avoid 1.1.5 and use 1.1.6 instead. SONAME / SOVERSION errors will
|
25
|
+
manifest as the dynamic library loader complaining that it cannot find snappy's
|
26
|
+
shared library file (libsnappy.so / libsnappy.dylib), or that the library it
|
27
|
+
found does not have the required version. 1.1.6 has the same code as 1.1.5, but
|
28
|
+
carries build configuration fixes for the issues above.
|
29
|
+
|
30
|
+
* Add CMake build support. The autoconf build support is now deprecated, and
|
31
|
+
will be removed in the next release.
|
32
|
+
|
33
|
+
* Add AppVeyor configuration, for Windows CI coverage.
|
34
|
+
|
35
|
+
* Small performance improvement on little-endian PowerPC.
|
36
|
+
|
37
|
+
* Small performance improvement on LLVM with position-independent executables.
|
38
|
+
|
39
|
+
* Fix a few issues with various build environments.
|
40
|
+
|
41
|
+
Snappy v1.1.4, January 25th 2017:
|
42
|
+
|
43
|
+
* Fix a 1% performance regression when snappy is used in PIE executables.
|
44
|
+
|
45
|
+
* Improve compression performance by 5%.
|
46
|
+
|
47
|
+
* Improve decompression performance by 20%.
|
48
|
+
|
49
|
+
Snappy v1.1.3, July 6th 2015:
|
50
|
+
|
51
|
+
This is the first release to be done from GitHub, which means that
|
52
|
+
some minor things like the ChangeLog format has changed (git log
|
53
|
+
format instead of svn log).
|
54
|
+
|
55
|
+
* Add support for Uncompress() from a Source to a Sink.
|
56
|
+
|
57
|
+
* Various minor changes to improve MSVC support; in particular,
|
58
|
+
the unit tests now compile and run under MSVC.
|
59
|
+
|
60
|
+
|
1
61
|
Snappy v1.1.2, February 28th 2014:
|
2
62
|
|
3
63
|
This is a maintenance release with no changes to the actual library
|
@@ -29,12 +29,12 @@ and the like.
|
|
29
29
|
|
30
30
|
Performance
|
31
31
|
===========
|
32
|
-
|
32
|
+
|
33
33
|
Snappy is intended to be fast. On a single core of a Core i7 processor
|
34
34
|
in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
|
35
35
|
about 500 MB/sec or more. (These numbers are for the slowest inputs in our
|
36
36
|
benchmark suite; others are much faster.) In our tests, Snappy usually
|
37
|
-
is faster than algorithms in the same class (e.g. LZO, LZF,
|
37
|
+
is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ,
|
38
38
|
etc.) while achieving comparable compression ratios.
|
39
39
|
|
40
40
|
Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
|
@@ -51,8 +51,8 @@ In particular:
|
|
51
51
|
|
52
52
|
- Snappy uses 64-bit operations in several places to process more data at
|
53
53
|
once than would otherwise be possible.
|
54
|
-
- Snappy assumes unaligned 32
|
55
|
-
On some platforms, these must be emulated with single-byte loads
|
54
|
+
- Snappy assumes unaligned 32 and 64-bit loads and stores are cheap.
|
55
|
+
On some platforms, these must be emulated with single-byte loads
|
56
56
|
and stores, which is much slower.
|
57
57
|
- Snappy assumes little-endian throughout, and needs to byte-swap data in
|
58
58
|
several places if running on a big-endian platform.
|
@@ -62,25 +62,40 @@ Performance optimizations, whether for 64-bit x86 or other platforms,
|
|
62
62
|
are of course most welcome; see "Contact", below.
|
63
63
|
|
64
64
|
|
65
|
+
Building
|
66
|
+
========
|
67
|
+
|
68
|
+
You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt)
|
69
|
+
or later to build:
|
70
|
+
|
71
|
+
```bash
|
72
|
+
mkdir build
|
73
|
+
cd build && cmake ../ && make
|
74
|
+
```
|
75
|
+
|
65
76
|
Usage
|
66
77
|
=====
|
67
78
|
|
68
79
|
Note that Snappy, both the implementation and the main interface,
|
69
80
|
is written in C++. However, several third-party bindings to other languages
|
70
|
-
are available; see the
|
71
|
-
|
72
|
-
|
81
|
+
are available; see the [home page](docs/README.md) for more information.
|
82
|
+
Also, if you want to use Snappy from C code, you can use the included C
|
83
|
+
bindings in snappy-c.h.
|
73
84
|
|
74
85
|
To use Snappy from your own C++ program, include the file "snappy.h" from
|
75
86
|
your calling file, and link against the compiled library.
|
76
87
|
|
77
88
|
There are many ways to call Snappy, but the simplest possible is
|
78
89
|
|
79
|
-
|
90
|
+
```c++
|
91
|
+
snappy::Compress(input.data(), input.size(), &output);
|
92
|
+
```
|
80
93
|
|
81
94
|
and similarly
|
82
95
|
|
83
|
-
|
96
|
+
```c++
|
97
|
+
snappy::Uncompress(input.data(), input.size(), &output);
|
98
|
+
```
|
84
99
|
|
85
100
|
where "input" and "output" are both instances of std::string.
|
86
101
|
|
@@ -102,12 +117,12 @@ tests to verify you have not broken anything. Note that if you have the
|
|
102
117
|
Google Test library installed, unit test behavior (especially failures) will be
|
103
118
|
significantly more user-friendly. You can find Google Test at
|
104
119
|
|
105
|
-
|
120
|
+
https://github.com/google/googletest
|
106
121
|
|
107
122
|
You probably also want the gflags library for handling of command-line flags;
|
108
123
|
you can find it at
|
109
124
|
|
110
|
-
|
125
|
+
https://gflags.github.io/gflags/
|
111
126
|
|
112
127
|
In addition to the unit tests, snappy contains microbenchmarks used to
|
113
128
|
tune compression and decompression performance. These are automatically run
|
@@ -116,7 +131,7 @@ before the unit tests, but you can disable them using the flag
|
|
116
131
|
need to edit the source).
|
117
132
|
|
118
133
|
Finally, snappy can benchmark Snappy against a few other compression libraries
|
119
|
-
(zlib, LZO, LZF,
|
134
|
+
(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
|
120
135
|
To benchmark using a given file, give the compression algorithm you want to test
|
121
136
|
Snappy against (e.g. --zlib) and then a list of one or more file names on the
|
122
137
|
command line. The testdata/ directory contains the files used by the
|
@@ -129,7 +144,5 @@ test.)
|
|
129
144
|
Contact
|
130
145
|
=======
|
131
146
|
|
132
|
-
Snappy is distributed through
|
133
|
-
and other information, see
|
134
|
-
|
135
|
-
http://code.google.com/p/snappy/
|
147
|
+
Snappy is distributed through GitHub. For the latest version, a bug tracker,
|
148
|
+
and other information, see https://github.com/google/snappy.
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright 2019 Google Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Redistribution and use in source and binary forms, with or without
|
4
|
+
# modification, are permitted provided that the following conditions are
|
5
|
+
# met:
|
6
|
+
#
|
7
|
+
# * Redistributions of source code must retain the above copyright
|
8
|
+
# notice, this list of conditions and the following disclaimer.
|
9
|
+
# * Redistributions in binary form must reproduce the above
|
10
|
+
# copyright notice, this list of conditions and the following disclaimer
|
11
|
+
# in the documentation and/or other materials provided with the
|
12
|
+
# distribution.
|
13
|
+
# * Neither the name of Google Inc. nor the names of its
|
14
|
+
# contributors may be used to endorse or promote products derived from
|
15
|
+
# this software without specific prior written permission.
|
16
|
+
#
|
17
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
@PACKAGE_INIT@
|
30
|
+
|
31
|
+
include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
|
32
|
+
|
33
|
+
check_required_components(Snappy)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
2
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
3
|
+
|
4
|
+
/* Define to 1 if the compiler supports __builtin_ctz and friends. */
|
5
|
+
#cmakedefine HAVE_BUILTIN_CTZ 1
|
6
|
+
|
7
|
+
/* Define to 1 if the compiler supports __builtin_expect. */
|
8
|
+
#cmakedefine HAVE_BUILTIN_EXPECT 1
|
9
|
+
|
10
|
+
/* Define to 1 if you have the <byteswap.h> header file. */
|
11
|
+
#cmakedefine HAVE_BYTESWAP_H 1
|
12
|
+
|
13
|
+
/* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
|
14
|
+
#cmakedefine HAVE_FUNC_MMAP 1
|
15
|
+
|
16
|
+
/* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
|
17
|
+
#cmakedefine HAVE_FUNC_SYSCONF 1
|
18
|
+
|
19
|
+
/* Define to 1 to use the gflags package for command-line parsing. */
|
20
|
+
#cmakedefine HAVE_GFLAGS 1
|
21
|
+
|
22
|
+
/* Define to 1 if you have Google Test. */
|
23
|
+
#cmakedefine HAVE_GTEST 1
|
24
|
+
|
25
|
+
/* Define to 1 if you have the `lzo2' library (-llzo2). */
|
26
|
+
#cmakedefine HAVE_LIBLZO2 1
|
27
|
+
|
28
|
+
/* Define to 1 if you have the `z' library (-lz). */
|
29
|
+
#cmakedefine HAVE_LIBZ 1
|
30
|
+
|
31
|
+
/* Define to 1 if you have the <sys/endian.h> header file. */
|
32
|
+
#cmakedefine HAVE_SYS_ENDIAN_H 1
|
33
|
+
|
34
|
+
/* Define to 1 if you have the <sys/mman.h> header file. */
|
35
|
+
#cmakedefine HAVE_SYS_MMAN_H 1
|
36
|
+
|
37
|
+
/* Define to 1 if you have the <sys/resource.h> header file. */
|
38
|
+
#cmakedefine HAVE_SYS_RESOURCE_H 1
|
39
|
+
|
40
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
41
|
+
#cmakedefine HAVE_SYS_TIME_H 1
|
42
|
+
|
43
|
+
/* Define to 1 if you have the <sys/uio.h> header file. */
|
44
|
+
#cmakedefine HAVE_SYS_UIO_H 1
|
45
|
+
|
46
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
47
|
+
#cmakedefine HAVE_UNISTD_H 1
|
48
|
+
|
49
|
+
/* Define to 1 if you have the <windows.h> header file. */
|
50
|
+
#cmakedefine HAVE_WINDOWS_H 1
|
51
|
+
|
52
|
+
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
|
53
|
+
#cmakedefine01 SNAPPY_HAVE_SSSE3
|
54
|
+
|
55
|
+
/* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
|
56
|
+
#cmakedefine01 SNAPPY_HAVE_BMI2
|
57
|
+
|
58
|
+
/* Define to 1 if your processor stores words with the most significant byte
|
59
|
+
first (like Motorola and SPARC, unlike Intel and VAX). */
|
60
|
+
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1
|
61
|
+
|
62
|
+
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
@@ -0,0 +1,72 @@
|
|
1
|
+
Snappy is a compression/decompression library. It does not aim for maximum
|
2
|
+
compression, or compatibility with any other compression library; instead, it
|
3
|
+
aims for very high speeds and reasonable compression. For instance, compared
|
4
|
+
to the fastest mode of zlib, Snappy is an order of magnitude faster for most
|
5
|
+
inputs, but the resulting compressed files are anywhere from 20% to 100%
|
6
|
+
bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy
|
7
|
+
compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec
|
8
|
+
or more.
|
9
|
+
|
10
|
+
Snappy is widely used inside Google, in everything from BigTable and MapReduce
|
11
|
+
to our internal RPC systems. (Snappy has previously been referred to as "Zippy"
|
12
|
+
in some presentations and the likes.)
|
13
|
+
|
14
|
+
For more information, please see the [README](../README.md). Benchmarks against
|
15
|
+
a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are
|
16
|
+
included in the source code distribution. The source code also contains a
|
17
|
+
[formal format specification](../format_description.txt), as well
|
18
|
+
as a specification for a [framing format](../framing_format.txt) useful for
|
19
|
+
higher-level framing and encapsulation of Snappy data, e.g. for transporting
|
20
|
+
Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy
|
21
|
+
distribution currently has no code implementing the latter, but some of the
|
22
|
+
ports do (see below).
|
23
|
+
|
24
|
+
Snappy is written in C++, but C bindings are included, and several bindings to
|
25
|
+
other languages are maintained by third parties:
|
26
|
+
|
27
|
+
* C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper),
|
28
|
+
[Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper),
|
29
|
+
[Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native
|
30
|
+
reimplementation)
|
31
|
+
* [C port](http://github.com/andikleen/snappy-c)
|
32
|
+
* [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries,
|
33
|
+
NuGet packages and command-line tool)
|
34
|
+
* Common Lisp: [Library bindings](http://flambard.github.com/thnappy/),
|
35
|
+
[native reimplementation](https://github.com/brown/snappy)
|
36
|
+
* Erlang: [esnappy](https://github.com/thekvs/esnappy),
|
37
|
+
[snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif)
|
38
|
+
* [Go](https://github.com/golang/snappy/)
|
39
|
+
* [Haskell](http://hackage.haskell.org/package/snappy)
|
40
|
+
* [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko)
|
41
|
+
* [iOS packaging](https://github.com/ideawu/snappy-ios)
|
42
|
+
* Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the
|
43
|
+
framing format), [native reimplementation](http://code.google.com/p/jsnappy/),
|
44
|
+
[other native reimplementation](https://github.com/dain/snappy) (including
|
45
|
+
the framing format)
|
46
|
+
* [Lua](https://github.com/forhappy/lua-snappy)
|
47
|
+
* [Node.js](https://github.com/kesla/node-snappy) (including the [framing
|
48
|
+
format](https://github.com/kesla/node-snappy-stream))
|
49
|
+
* [Perl](http://search.cpan.org/dist/Compress-Snappy/)
|
50
|
+
* [PHP](https://github.com/kjdev/php-ext-snappy)
|
51
|
+
* [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line
|
52
|
+
tool for the framing format)
|
53
|
+
* [R](https://github.com/lulyon/R-snappy)
|
54
|
+
* [Ruby](https://github.com/miyucy/snappy)
|
55
|
+
* [Rust](https://github.com/BurntSushi/rust-snappy)
|
56
|
+
* [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format)
|
57
|
+
|
58
|
+
Snappy is used or is available as an alternative in software such as
|
59
|
+
|
60
|
+
* [MongoDB](https://www.mongodb.com/)
|
61
|
+
* [Cassandra](http://cassandra.apache.org/)
|
62
|
+
* [Couchbase](http://www.couchbase.com/)
|
63
|
+
* [Hadoop](http://hadoop.apache.org/)
|
64
|
+
* [LessFS](http://www.lessfs.com/wordpress/)
|
65
|
+
* [LevelDB](https://github.com/google/leveldb) (which is in turn used by
|
66
|
+
[Google Chrome](http://chrome.google.com/))
|
67
|
+
* [Lucene](http://lucene.apache.org/)
|
68
|
+
* [VoltDB](http://voltdb.com/)
|
69
|
+
|
70
|
+
If you know of more, do not hesitate to let us know. The easiest way to get in
|
71
|
+
touch is via the
|
72
|
+
[Snappy discussion mailing list](http://groups.google.com/group/snappy-compression).
|
data/vendor/snappy/snappy-c.h
CHANGED
@@ -30,8 +30,8 @@
|
|
30
30
|
* Plain C interface (a wrapper around the C++ implementation).
|
31
31
|
*/
|
32
32
|
|
33
|
-
#ifndef
|
34
|
-
#define
|
33
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
|
34
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
|
35
35
|
|
36
36
|
#ifdef __cplusplus
|
37
37
|
extern "C" {
|
@@ -135,4 +135,4 @@ snappy_status snappy_validate_compressed_buffer(const char* compressed,
|
|
135
135
|
} // extern "C"
|
136
136
|
#endif
|
137
137
|
|
138
|
-
#endif /*
|
138
|
+
#endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
|
@@ -28,29 +28,38 @@
|
|
28
28
|
//
|
29
29
|
// Internals shared between the Snappy implementation and its unittest.
|
30
30
|
|
31
|
-
#ifndef
|
32
|
-
#define
|
31
|
+
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
32
|
+
#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|
33
33
|
|
34
34
|
#include "snappy-stubs-internal.h"
|
35
35
|
|
36
36
|
namespace snappy {
|
37
37
|
namespace internal {
|
38
38
|
|
39
|
+
// Working memory performs a single allocation to hold all scratch space
|
40
|
+
// required for compression.
|
39
41
|
class WorkingMemory {
|
40
42
|
public:
|
41
|
-
WorkingMemory(
|
42
|
-
~WorkingMemory()
|
43
|
+
explicit WorkingMemory(size_t input_size);
|
44
|
+
~WorkingMemory();
|
43
45
|
|
44
46
|
// Allocates and clears a hash table using memory in "*this",
|
45
47
|
// stores the number of buckets in "*table_size" and returns a pointer to
|
46
48
|
// the base of the hash table.
|
47
|
-
uint16* GetHashTable(size_t
|
49
|
+
uint16* GetHashTable(size_t fragment_size, int* table_size) const;
|
50
|
+
char* GetScratchInput() const { return input_; }
|
51
|
+
char* GetScratchOutput() const { return output_; }
|
48
52
|
|
49
53
|
private:
|
50
|
-
|
51
|
-
|
54
|
+
char* mem_; // the allocated memory, never nullptr
|
55
|
+
size_t size_; // the size of the allocated memory, never 0
|
56
|
+
uint16* table_; // the pointer to the hashtable
|
57
|
+
char* input_; // the pointer to the input scratch buffer
|
58
|
+
char* output_; // the pointer to the output scratch buffer
|
52
59
|
|
53
|
-
|
60
|
+
// No copying
|
61
|
+
WorkingMemory(const WorkingMemory&);
|
62
|
+
void operator=(const WorkingMemory&);
|
54
63
|
};
|
55
64
|
|
56
65
|
// Flat array compression that does not emit the "uncompressed length"
|
@@ -70,57 +79,72 @@ char* CompressFragment(const char* input,
|
|
70
79
|
uint16* table,
|
71
80
|
const int table_size);
|
72
81
|
|
73
|
-
//
|
82
|
+
// Find the largest n such that
|
74
83
|
//
|
75
84
|
// s1[0,n-1] == s2[0,n-1]
|
76
85
|
// and n <= (s2_limit - s2).
|
77
86
|
//
|
87
|
+
// Return make_pair(n, n < 8).
|
78
88
|
// Does not read *s2_limit or beyond.
|
79
89
|
// Does not read *(s1 + (s2_limit - s2)) or beyond.
|
80
90
|
// Requires that s2_limit >= s2.
|
81
91
|
//
|
82
|
-
// Separate implementation for
|
83
|
-
|
84
|
-
|
85
|
-
static inline
|
86
|
-
|
87
|
-
|
92
|
+
// Separate implementation for 64-bit, little-endian cpus.
|
93
|
+
#if !defined(SNAPPY_IS_BIG_ENDIAN) && \
|
94
|
+
(defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
|
95
|
+
static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
|
96
|
+
const char* s2,
|
97
|
+
const char* s2_limit) {
|
88
98
|
assert(s2_limit >= s2);
|
89
|
-
|
99
|
+
size_t matched = 0;
|
100
|
+
|
101
|
+
// This block isn't necessary for correctness; we could just start looping
|
102
|
+
// immediately. As an optimization though, it is useful. It creates some not
|
103
|
+
// uncommon code paths that determine, without extra effort, whether the match
|
104
|
+
// length is less than 8. In short, we are hoping to avoid a conditional
|
105
|
+
// branch, and perhaps get better code layout from the C++ compiler.
|
106
|
+
if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
|
107
|
+
uint64 a1 = UNALIGNED_LOAD64(s1);
|
108
|
+
uint64 a2 = UNALIGNED_LOAD64(s2);
|
109
|
+
if (a1 != a2) {
|
110
|
+
return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
|
111
|
+
true);
|
112
|
+
} else {
|
113
|
+
matched = 8;
|
114
|
+
s2 += 8;
|
115
|
+
}
|
116
|
+
}
|
90
117
|
|
91
118
|
// Find out how long the match is. We loop over the data 64 bits at a
|
92
119
|
// time until we find a 64-bit block that doesn't match; then we find
|
93
120
|
// the first non-matching bit and use that to calculate the total
|
94
121
|
// length of the match.
|
95
|
-
while (
|
96
|
-
if (
|
122
|
+
while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
|
123
|
+
if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
|
97
124
|
s2 += 8;
|
98
125
|
matched += 8;
|
99
126
|
} else {
|
100
|
-
// On current (mid-2008) Opteron models there is a 3% more
|
101
|
-
// efficient code sequence to find the first non-matching byte.
|
102
|
-
// However, what follows is ~10% better on Intel Core 2 and newer,
|
103
|
-
// and we expect AMD's bsf instruction to improve.
|
104
127
|
uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
|
105
128
|
int matching_bits = Bits::FindLSBSetNonZero64(x);
|
106
129
|
matched += matching_bits >> 3;
|
107
|
-
|
130
|
+
assert(matched >= 8);
|
131
|
+
return std::pair<size_t, bool>(matched, false);
|
108
132
|
}
|
109
133
|
}
|
110
|
-
while (
|
111
|
-
if (
|
134
|
+
while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
|
135
|
+
if (s1[matched] == *s2) {
|
112
136
|
++s2;
|
113
137
|
++matched;
|
114
138
|
} else {
|
115
|
-
return matched;
|
139
|
+
return std::pair<size_t, bool>(matched, matched < 8);
|
116
140
|
}
|
117
141
|
}
|
118
|
-
return matched;
|
142
|
+
return std::pair<size_t, bool>(matched, matched < 8);
|
119
143
|
}
|
120
144
|
#else
|
121
|
-
static inline
|
122
|
-
|
123
|
-
|
145
|
+
static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
|
146
|
+
const char* s2,
|
147
|
+
const char* s2_limit) {
|
124
148
|
// Implementation based on the x86-64 version, above.
|
125
149
|
assert(s2_limit >= s2);
|
126
150
|
int matched = 0;
|
@@ -140,11 +164,68 @@ static inline int FindMatchLength(const char* s1,
|
|
140
164
|
++matched;
|
141
165
|
}
|
142
166
|
}
|
143
|
-
return matched;
|
167
|
+
return std::pair<size_t, bool>(matched, matched < 8);
|
144
168
|
}
|
145
169
|
#endif
|
146
170
|
|
171
|
+
// Lookup tables for decompression code. Give --snappy_dump_decompression_table
|
172
|
+
// to the unit test to recompute char_table.
|
173
|
+
|
174
|
+
enum {
|
175
|
+
LITERAL = 0,
|
176
|
+
COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
|
177
|
+
COPY_2_BYTE_OFFSET = 2,
|
178
|
+
COPY_4_BYTE_OFFSET = 3
|
179
|
+
};
|
180
|
+
static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
|
181
|
+
|
182
|
+
// Data stored per entry in lookup table:
|
183
|
+
// Range Bits-used Description
|
184
|
+
// ------------------------------------
|
185
|
+
// 1..64 0..7 Literal/copy length encoded in opcode byte
|
186
|
+
// 0..7 8..10 Copy offset encoded in opcode byte / 256
|
187
|
+
// 0..4 11..13 Extra bytes after opcode
|
188
|
+
//
|
189
|
+
// We use eight bits for the length even though 7 would have sufficed
|
190
|
+
// because of efficiency reasons:
|
191
|
+
// (1) Extracting a byte is faster than a bit-field
|
192
|
+
// (2) It properly aligns copy offset so we do not need a <<8
|
193
|
+
static const uint16 char_table[256] = {
|
194
|
+
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
|
195
|
+
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
|
196
|
+
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
|
197
|
+
0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
|
198
|
+
0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
|
199
|
+
0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
|
200
|
+
0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
|
201
|
+
0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
|
202
|
+
0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
|
203
|
+
0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
|
204
|
+
0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
|
205
|
+
0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
|
206
|
+
0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
|
207
|
+
0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
|
208
|
+
0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
|
209
|
+
0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
|
210
|
+
0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
|
211
|
+
0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
|
212
|
+
0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
|
213
|
+
0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
|
214
|
+
0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
|
215
|
+
0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
|
216
|
+
0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
|
217
|
+
0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
|
218
|
+
0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
|
219
|
+
0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
|
220
|
+
0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
|
221
|
+
0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
|
222
|
+
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
|
223
|
+
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
|
224
|
+
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
|
225
|
+
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
|
226
|
+
};
|
227
|
+
|
147
228
|
} // end namespace internal
|
148
229
|
} // end namespace snappy
|
149
230
|
|
150
|
-
#endif //
|
231
|
+
#endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
|