snappy 0.0.14-java → 0.2.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/.github/workflows/main.yml +34 -0
  3. data/.github/workflows/publish.yml +34 -0
  4. data/Gemfile +4 -0
  5. data/README.md +28 -4
  6. data/Rakefile +32 -29
  7. data/ext/api.c +6 -1
  8. data/ext/extconf.rb +21 -24
  9. data/lib/snappy.rb +6 -4
  10. data/lib/snappy/hadoop.rb +22 -0
  11. data/lib/snappy/hadoop/reader.rb +62 -0
  12. data/lib/snappy/hadoop/writer.rb +51 -0
  13. data/lib/snappy/reader.rb +19 -11
  14. data/lib/snappy/shim.rb +30 -0
  15. data/lib/snappy/version.rb +3 -1
  16. data/lib/snappy/writer.rb +8 -9
  17. data/snappy.gemspec +17 -37
  18. data/test/hadoop/snappy_hadoop_reader_test.rb +115 -0
  19. data/test/hadoop/snappy_hadoop_writer_test.rb +48 -0
  20. data/test/snappy_hadoop_test.rb +26 -0
  21. data/test/snappy_reader_test.rb +148 -0
  22. data/test/snappy_test.rb +95 -0
  23. data/test/snappy_writer_test.rb +55 -0
  24. data/test/test_helper.rb +7 -0
  25. data/vendor/snappy/CMakeLists.txt +297 -0
  26. data/vendor/snappy/CONTRIBUTING.md +26 -0
  27. data/vendor/snappy/COPYING +1 -1
  28. data/vendor/snappy/NEWS +60 -0
  29. data/vendor/snappy/{README → README.md} +29 -16
  30. data/vendor/snappy/cmake/SnappyConfig.cmake.in +33 -0
  31. data/vendor/snappy/cmake/config.h.in +62 -0
  32. data/vendor/snappy/docs/README.md +72 -0
  33. data/vendor/snappy/snappy-c.h +3 -3
  34. data/vendor/snappy/snappy-internal.h +113 -32
  35. data/vendor/snappy/snappy-sinksource.cc +33 -0
  36. data/vendor/snappy/snappy-sinksource.h +51 -6
  37. data/vendor/snappy/snappy-stubs-internal.cc +1 -1
  38. data/vendor/snappy/snappy-stubs-internal.h +160 -45
  39. data/vendor/snappy/snappy-stubs-public.h.in +23 -47
  40. data/vendor/snappy/snappy-test.cc +31 -24
  41. data/vendor/snappy/snappy-test.h +46 -103
  42. data/vendor/snappy/snappy.cc +786 -431
  43. data/vendor/snappy/snappy.h +37 -14
  44. data/vendor/snappy/snappy_compress_fuzzer.cc +59 -0
  45. data/vendor/snappy/snappy_uncompress_fuzzer.cc +57 -0
  46. data/vendor/snappy/snappy_unittest.cc +441 -290
  47. metadata +35 -75
  48. data/.travis.yml +0 -4
  49. data/test/test-snappy-reader.rb +0 -129
  50. data/test/test-snappy-writer.rb +0 -55
  51. data/test/test-snappy.rb +0 -58
  52. data/vendor/snappy/ChangeLog +0 -1916
  53. data/vendor/snappy/Makefile.am +0 -23
  54. data/vendor/snappy/autogen.sh +0 -7
  55. data/vendor/snappy/configure.ac +0 -133
  56. data/vendor/snappy/m4/gtest.m4 +0 -74
  57. data/vendor/snappy/testdata/alice29.txt +0 -3609
  58. data/vendor/snappy/testdata/asyoulik.txt +0 -4122
  59. data/vendor/snappy/testdata/baddata1.snappy +0 -0
  60. data/vendor/snappy/testdata/baddata2.snappy +0 -0
  61. data/vendor/snappy/testdata/baddata3.snappy +0 -0
  62. data/vendor/snappy/testdata/fireworks.jpeg +0 -0
  63. data/vendor/snappy/testdata/geo.protodata +0 -0
  64. data/vendor/snappy/testdata/html +0 -1
  65. data/vendor/snappy/testdata/html_x_4 +0 -1
  66. data/vendor/snappy/testdata/kppkn.gtb +0 -0
  67. data/vendor/snappy/testdata/lcet10.txt +0 -7519
  68. data/vendor/snappy/testdata/paper-100k.pdf +2 -600
  69. data/vendor/snappy/testdata/plrabn12.txt +0 -10699
  70. data/vendor/snappy/testdata/urls.10K +0 -10000
@@ -0,0 +1,26 @@
1
+ # How to Contribute
2
+
3
+ We'd love to accept your patches and contributions to this project. There are
4
+ just a few small guidelines you need to follow.
5
+
6
+ ## Contributor License Agreement
7
+
8
+ Contributions to this project must be accompanied by a Contributor License
9
+ Agreement. You (or your employer) retain the copyright to your contribution,
10
+ this simply gives us permission to use and redistribute your contributions as
11
+ part of the project. Head over to <https://cla.developers.google.com/> to see
12
+ your current agreements on file or to sign a new one.
13
+
14
+ You generally only need to submit a CLA once, so if you've already submitted one
15
+ (even if it was for a different project), you probably don't need to do it
16
+ again.
17
+
18
+ ## Code reviews
19
+
20
+ All submissions, including submissions by project members, require review. We
21
+ use GitHub pull requests for this purpose. Consult
22
+ [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23
+ information on using pull requests.
24
+
25
+ Please make sure that all the automated checks (CLA, AppVeyor, Travis) pass for
26
+ your pull requests. Pull requests whose checks fail may be ignored.
@@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
29
 
30
30
  ===
31
31
 
32
- Some of the benchmark data in util/zippy/testdata is licensed differently:
32
+ Some of the benchmark data in testdata/ is licensed differently:
33
33
 
34
34
  - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and
35
35
  is licensed under the Creative Commons Attribution 3.0 license
@@ -1,3 +1,63 @@
1
+ Snappy v1.1.8, January 15th 2020:
2
+
3
+ * Small performance improvements.
4
+
5
+ * Removed snappy::string alias for std::string.
6
+
7
+ * Improved CMake configuration.
8
+
9
+ Snappy v1.1.7, August 24th 2017:
10
+
11
+ * Improved CMake build support for 64-bit Linux distributions.
12
+
13
+ * MSVC builds now use MSVC-specific intrinsics that map to clzll.
14
+
15
+ * ARM64 (AArch64) builds use the code paths optimized for 64-bit processors.
16
+
17
+ Snappy v1.1.6, July 12th 2017:
18
+
19
+ This is a re-release of v1.1.5 with proper SONAME / SOVERSION values.
20
+
21
+ Snappy v1.1.5, June 28th 2017:
22
+
23
+ This release has broken SONAME / SOVERSION values. Users of snappy as a shared
24
+ library should avoid 1.1.5 and use 1.1.6 instead. SONAME / SOVERSION errors will
25
+ manifest as the dynamic library loader complaining that it cannot find snappy's
26
+ shared library file (libsnappy.so / libsnappy.dylib), or that the library it
27
+ found does not have the required version. 1.1.6 has the same code as 1.1.5, but
28
+ carries build configuration fixes for the issues above.
29
+
30
+ * Add CMake build support. The autoconf build support is now deprecated, and
31
+ will be removed in the next release.
32
+
33
+ * Add AppVeyor configuration, for Windows CI coverage.
34
+
35
+ * Small performance improvement on little-endian PowerPC.
36
+
37
+ * Small performance improvement on LLVM with position-independent executables.
38
+
39
+ * Fix a few issues with various build environments.
40
+
41
+ Snappy v1.1.4, January 25th 2017:
42
+
43
+ * Fix a 1% performance regression when snappy is used in PIE executables.
44
+
45
+ * Improve compression performance by 5%.
46
+
47
+ * Improve decompression performance by 20%.
48
+
49
+ Snappy v1.1.3, July 6th 2015:
50
+
51
+ This is the first release to be done from GitHub, which means that
52
+ some minor things like the ChangeLog format has changed (git log
53
+ format instead of svn log).
54
+
55
+ * Add support for Uncompress() from a Source to a Sink.
56
+
57
+ * Various minor changes to improve MSVC support; in particular,
58
+ the unit tests now compile and run under MSVC.
59
+
60
+
1
61
  Snappy v1.1.2, February 28th 2014:
2
62
 
3
63
  This is a maintenance release with no changes to the actual library
@@ -29,12 +29,12 @@ and the like.
29
29
 
30
30
  Performance
31
31
  ===========
32
-
32
+
33
33
  Snappy is intended to be fast. On a single core of a Core i7 processor
34
34
  in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
35
35
  about 500 MB/sec or more. (These numbers are for the slowest inputs in our
36
36
  benchmark suite; others are much faster.) In our tests, Snappy usually
37
- is faster than algorithms in the same class (e.g. LZO, LZF, FastLZ, QuickLZ,
37
+ is faster than algorithms in the same class (e.g. LZO, LZF, QuickLZ,
38
38
  etc.) while achieving comparable compression ratios.
39
39
 
40
40
  Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
@@ -51,8 +51,8 @@ In particular:
51
51
 
52
52
  - Snappy uses 64-bit operations in several places to process more data at
53
53
  once than would otherwise be possible.
54
- - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
55
- On some platforms, these must be emulated with single-byte loads
54
+ - Snappy assumes unaligned 32 and 64-bit loads and stores are cheap.
55
+ On some platforms, these must be emulated with single-byte loads
56
56
  and stores, which is much slower.
57
57
  - Snappy assumes little-endian throughout, and needs to byte-swap data in
58
58
  several places if running on a big-endian platform.
@@ -62,25 +62,40 @@ Performance optimizations, whether for 64-bit x86 or other platforms,
62
62
  are of course most welcome; see "Contact", below.
63
63
 
64
64
 
65
+ Building
66
+ ========
67
+
68
+ You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt)
69
+ or later to build:
70
+
71
+ ```bash
72
+ mkdir build
73
+ cd build && cmake ../ && make
74
+ ```
75
+
65
76
  Usage
66
77
  =====
67
78
 
68
79
  Note that Snappy, both the implementation and the main interface,
69
80
  is written in C++. However, several third-party bindings to other languages
70
- are available; see the Google Code page at http://code.google.com/p/snappy/
71
- for more information. Also, if you want to use Snappy from C code, you can
72
- use the included C bindings in snappy-c.h.
81
+ are available; see the [home page](docs/README.md) for more information.
82
+ Also, if you want to use Snappy from C code, you can use the included C
83
+ bindings in snappy-c.h.
73
84
 
74
85
  To use Snappy from your own C++ program, include the file "snappy.h" from
75
86
  your calling file, and link against the compiled library.
76
87
 
77
88
  There are many ways to call Snappy, but the simplest possible is
78
89
 
79
- snappy::Compress(input.data(), input.size(), &output);
90
+ ```c++
91
+ snappy::Compress(input.data(), input.size(), &output);
92
+ ```
80
93
 
81
94
  and similarly
82
95
 
83
- snappy::Uncompress(input.data(), input.size(), &output);
96
+ ```c++
97
+ snappy::Uncompress(input.data(), input.size(), &output);
98
+ ```
84
99
 
85
100
  where "input" and "output" are both instances of std::string.
86
101
 
@@ -102,12 +117,12 @@ tests to verify you have not broken anything. Note that if you have the
102
117
  Google Test library installed, unit test behavior (especially failures) will be
103
118
  significantly more user-friendly. You can find Google Test at
104
119
 
105
- http://code.google.com/p/googletest/
120
+ https://github.com/google/googletest
106
121
 
107
122
  You probably also want the gflags library for handling of command-line flags;
108
123
  you can find it at
109
124
 
110
- http://code.google.com/p/google-gflags/
125
+ https://gflags.github.io/gflags/
111
126
 
112
127
  In addition to the unit tests, snappy contains microbenchmarks used to
113
128
  tune compression and decompression performance. These are automatically run
@@ -116,7 +131,7 @@ before the unit tests, but you can disable them using the flag
116
131
  need to edit the source).
117
132
 
118
133
  Finally, snappy can benchmark Snappy against a few other compression libraries
119
- (zlib, LZO, LZF, FastLZ and QuickLZ), if they were detected at configure time.
134
+ (zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
120
135
  To benchmark using a given file, give the compression algorithm you want to test
121
136
  Snappy against (e.g. --zlib) and then a list of one or more file names on the
122
137
  command line. The testdata/ directory contains the files used by the
@@ -129,7 +144,5 @@ test.)
129
144
  Contact
130
145
  =======
131
146
 
132
- Snappy is distributed through Google Code. For the latest version, a bug tracker,
133
- and other information, see
134
-
135
- http://code.google.com/p/snappy/
147
+ Snappy is distributed through GitHub. For the latest version, a bug tracker,
148
+ and other information, see https://github.com/google/snappy.
@@ -0,0 +1,33 @@
1
+ # Copyright 2019 Google Inc. All Rights Reserved.
2
+ #
3
+ # Redistribution and use in source and binary forms, with or without
4
+ # modification, are permitted provided that the following conditions are
5
+ # met:
6
+ #
7
+ # * Redistributions of source code must retain the above copyright
8
+ # notice, this list of conditions and the following disclaimer.
9
+ # * Redistributions in binary form must reproduce the above
10
+ # copyright notice, this list of conditions and the following disclaimer
11
+ # in the documentation and/or other materials provided with the
12
+ # distribution.
13
+ # * Neither the name of Google Inc. nor the names of its
14
+ # contributors may be used to endorse or promote products derived from
15
+ # this software without specific prior written permission.
16
+ #
17
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ @PACKAGE_INIT@
30
+
31
+ include("${CMAKE_CURRENT_LIST_DIR}/SnappyTargets.cmake")
32
+
33
+ check_required_components(Snappy)
@@ -0,0 +1,62 @@
1
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
2
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
3
+
4
+ /* Define to 1 if the compiler supports __builtin_ctz and friends. */
5
+ #cmakedefine HAVE_BUILTIN_CTZ 1
6
+
7
+ /* Define to 1 if the compiler supports __builtin_expect. */
8
+ #cmakedefine HAVE_BUILTIN_EXPECT 1
9
+
10
+ /* Define to 1 if you have the <byteswap.h> header file. */
11
+ #cmakedefine HAVE_BYTESWAP_H 1
12
+
13
+ /* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
14
+ #cmakedefine HAVE_FUNC_MMAP 1
15
+
16
+ /* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
17
+ #cmakedefine HAVE_FUNC_SYSCONF 1
18
+
19
+ /* Define to 1 to use the gflags package for command-line parsing. */
20
+ #cmakedefine HAVE_GFLAGS 1
21
+
22
+ /* Define to 1 if you have Google Test. */
23
+ #cmakedefine HAVE_GTEST 1
24
+
25
+ /* Define to 1 if you have the `lzo2' library (-llzo2). */
26
+ #cmakedefine HAVE_LIBLZO2 1
27
+
28
+ /* Define to 1 if you have the `z' library (-lz). */
29
+ #cmakedefine HAVE_LIBZ 1
30
+
31
+ /* Define to 1 if you have the <sys/endian.h> header file. */
32
+ #cmakedefine HAVE_SYS_ENDIAN_H 1
33
+
34
+ /* Define to 1 if you have the <sys/mman.h> header file. */
35
+ #cmakedefine HAVE_SYS_MMAN_H 1
36
+
37
+ /* Define to 1 if you have the <sys/resource.h> header file. */
38
+ #cmakedefine HAVE_SYS_RESOURCE_H 1
39
+
40
+ /* Define to 1 if you have the <sys/time.h> header file. */
41
+ #cmakedefine HAVE_SYS_TIME_H 1
42
+
43
+ /* Define to 1 if you have the <sys/uio.h> header file. */
44
+ #cmakedefine HAVE_SYS_UIO_H 1
45
+
46
+ /* Define to 1 if you have the <unistd.h> header file. */
47
+ #cmakedefine HAVE_UNISTD_H 1
48
+
49
+ /* Define to 1 if you have the <windows.h> header file. */
50
+ #cmakedefine HAVE_WINDOWS_H 1
51
+
52
+ /* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
53
+ #cmakedefine01 SNAPPY_HAVE_SSSE3
54
+
55
+ /* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
56
+ #cmakedefine01 SNAPPY_HAVE_BMI2
57
+
58
+ /* Define to 1 if your processor stores words with the most significant byte
59
+ first (like Motorola and SPARC, unlike Intel and VAX). */
60
+ #cmakedefine SNAPPY_IS_BIG_ENDIAN 1
61
+
62
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
@@ -0,0 +1,72 @@
1
+ Snappy is a compression/decompression library. It does not aim for maximum
2
+ compression, or compatibility with any other compression library; instead, it
3
+ aims for very high speeds and reasonable compression. For instance, compared
4
+ to the fastest mode of zlib, Snappy is an order of magnitude faster for most
5
+ inputs, but the resulting compressed files are anywhere from 20% to 100%
6
+ bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy
7
+ compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec
8
+ or more.
9
+
10
+ Snappy is widely used inside Google, in everything from BigTable and MapReduce
11
+ to our internal RPC systems. (Snappy has previously been referred to as "Zippy"
12
+ in some presentations and the likes.)
13
+
14
+ For more information, please see the [README](../README.md). Benchmarks against
15
+ a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are
16
+ included in the source code distribution. The source code also contains a
17
+ [formal format specification](../format_description.txt), as well
18
+ as a specification for a [framing format](../framing_format.txt) useful for
19
+ higher-level framing and encapsulation of Snappy data, e.g. for transporting
20
+ Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy
21
+ distribution currently has no code implementing the latter, but some of the
22
+ ports do (see below).
23
+
24
+ Snappy is written in C++, but C bindings are included, and several bindings to
25
+ other languages are maintained by third parties:
26
+
27
+ * C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper),
28
+ [Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper),
29
+ [Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native
30
+ reimplementation)
31
+ * [C port](http://github.com/andikleen/snappy-c)
32
+ * [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries,
33
+ NuGet packages and command-line tool)
34
+ * Common Lisp: [Library bindings](http://flambard.github.com/thnappy/),
35
+ [native reimplementation](https://github.com/brown/snappy)
36
+ * Erlang: [esnappy](https://github.com/thekvs/esnappy),
37
+ [snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif)
38
+ * [Go](https://github.com/golang/snappy/)
39
+ * [Haskell](http://hackage.haskell.org/package/snappy)
40
+ * [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko)
41
+ * [iOS packaging](https://github.com/ideawu/snappy-ios)
42
+ * Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the
43
+ framing format), [native reimplementation](http://code.google.com/p/jsnappy/),
44
+ [other native reimplementation](https://github.com/dain/snappy) (including
45
+ the framing format)
46
+ * [Lua](https://github.com/forhappy/lua-snappy)
47
+ * [Node.js](https://github.com/kesla/node-snappy) (including the [framing
48
+ format](https://github.com/kesla/node-snappy-stream))
49
+ * [Perl](http://search.cpan.org/dist/Compress-Snappy/)
50
+ * [PHP](https://github.com/kjdev/php-ext-snappy)
51
+ * [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line
52
+ tool for the framing format)
53
+ * [R](https://github.com/lulyon/R-snappy)
54
+ * [Ruby](https://github.com/miyucy/snappy)
55
+ * [Rust](https://github.com/BurntSushi/rust-snappy)
56
+ * [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format)
57
+
58
+ Snappy is used or is available as an alternative in software such as
59
+
60
+ * [MongoDB](https://www.mongodb.com/)
61
+ * [Cassandra](http://cassandra.apache.org/)
62
+ * [Couchbase](http://www.couchbase.com/)
63
+ * [Hadoop](http://hadoop.apache.org/)
64
+ * [LessFS](http://www.lessfs.com/wordpress/)
65
+ * [LevelDB](https://github.com/google/leveldb) (which is in turn used by
66
+ [Google Chrome](http://chrome.google.com/))
67
+ * [Lucene](http://lucene.apache.org/)
68
+ * [VoltDB](http://voltdb.com/)
69
+
70
+ If you know of more, do not hesitate to let us know. The easiest way to get in
71
+ touch is via the
72
+ [Snappy discussion mailing list](http://groups.google.com/group/snappy-compression).
@@ -30,8 +30,8 @@
30
30
  * Plain C interface (a wrapper around the C++ implementation).
31
31
  */
32
32
 
33
- #ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
34
- #define UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
33
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
34
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_
35
35
 
36
36
  #ifdef __cplusplus
37
37
  extern "C" {
@@ -135,4 +135,4 @@ snappy_status snappy_validate_compressed_buffer(const char* compressed,
135
135
  } // extern "C"
136
136
  #endif
137
137
 
138
- #endif /* UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
138
+ #endif /* THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */
@@ -28,29 +28,38 @@
28
28
  //
29
29
  // Internals shared between the Snappy implementation and its unittest.
30
30
 
31
- #ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_
32
- #define UTIL_SNAPPY_SNAPPY_INTERNAL_H_
31
+ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
32
+ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
33
33
 
34
34
  #include "snappy-stubs-internal.h"
35
35
 
36
36
  namespace snappy {
37
37
  namespace internal {
38
38
 
39
+ // Working memory performs a single allocation to hold all scratch space
40
+ // required for compression.
39
41
  class WorkingMemory {
40
42
  public:
41
- WorkingMemory() : large_table_(NULL) { }
42
- ~WorkingMemory() { delete[] large_table_; }
43
+ explicit WorkingMemory(size_t input_size);
44
+ ~WorkingMemory();
43
45
 
44
46
  // Allocates and clears a hash table using memory in "*this",
45
47
  // stores the number of buckets in "*table_size" and returns a pointer to
46
48
  // the base of the hash table.
47
- uint16* GetHashTable(size_t input_size, int* table_size);
49
+ uint16* GetHashTable(size_t fragment_size, int* table_size) const;
50
+ char* GetScratchInput() const { return input_; }
51
+ char* GetScratchOutput() const { return output_; }
48
52
 
49
53
  private:
50
- uint16 small_table_[1<<10]; // 2KB
51
- uint16* large_table_; // Allocated only when needed
54
+ char* mem_; // the allocated memory, never nullptr
55
+ size_t size_; // the size of the allocated memory, never 0
56
+ uint16* table_; // the pointer to the hashtable
57
+ char* input_; // the pointer to the input scratch buffer
58
+ char* output_; // the pointer to the output scratch buffer
52
59
 
53
- DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
60
+ // No copying
61
+ WorkingMemory(const WorkingMemory&);
62
+ void operator=(const WorkingMemory&);
54
63
  };
55
64
 
56
65
  // Flat array compression that does not emit the "uncompressed length"
@@ -70,57 +79,72 @@ char* CompressFragment(const char* input,
70
79
  uint16* table,
71
80
  const int table_size);
72
81
 
73
- // Return the largest n such that
82
+ // Find the largest n such that
74
83
  //
75
84
  // s1[0,n-1] == s2[0,n-1]
76
85
  // and n <= (s2_limit - s2).
77
86
  //
87
+ // Return make_pair(n, n < 8).
78
88
  // Does not read *s2_limit or beyond.
79
89
  // Does not read *(s1 + (s2_limit - s2)) or beyond.
80
90
  // Requires that s2_limit >= s2.
81
91
  //
82
- // Separate implementation for x86_64, for speed. Uses the fact that
83
- // x86_64 is little endian.
84
- #if defined(ARCH_K8)
85
- static inline int FindMatchLength(const char* s1,
86
- const char* s2,
87
- const char* s2_limit) {
92
+ // Separate implementation for 64-bit, little-endian cpus.
93
+ #if !defined(SNAPPY_IS_BIG_ENDIAN) && \
94
+ (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
95
+ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
96
+ const char* s2,
97
+ const char* s2_limit) {
88
98
  assert(s2_limit >= s2);
89
- int matched = 0;
99
+ size_t matched = 0;
100
+
101
+ // This block isn't necessary for correctness; we could just start looping
102
+ // immediately. As an optimization though, it is useful. It creates some not
103
+ // uncommon code paths that determine, without extra effort, whether the match
104
+ // length is less than 8. In short, we are hoping to avoid a conditional
105
+ // branch, and perhaps get better code layout from the C++ compiler.
106
+ if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
107
+ uint64 a1 = UNALIGNED_LOAD64(s1);
108
+ uint64 a2 = UNALIGNED_LOAD64(s2);
109
+ if (a1 != a2) {
110
+ return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
111
+ true);
112
+ } else {
113
+ matched = 8;
114
+ s2 += 8;
115
+ }
116
+ }
90
117
 
91
118
  // Find out how long the match is. We loop over the data 64 bits at a
92
119
  // time until we find a 64-bit block that doesn't match; then we find
93
120
  // the first non-matching bit and use that to calculate the total
94
121
  // length of the match.
95
- while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
96
- if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
122
+ while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
123
+ if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
97
124
  s2 += 8;
98
125
  matched += 8;
99
126
  } else {
100
- // On current (mid-2008) Opteron models there is a 3% more
101
- // efficient code sequence to find the first non-matching byte.
102
- // However, what follows is ~10% better on Intel Core 2 and newer,
103
- // and we expect AMD's bsf instruction to improve.
104
127
  uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
105
128
  int matching_bits = Bits::FindLSBSetNonZero64(x);
106
129
  matched += matching_bits >> 3;
107
- return matched;
130
+ assert(matched >= 8);
131
+ return std::pair<size_t, bool>(matched, false);
108
132
  }
109
133
  }
110
- while (PREDICT_TRUE(s2 < s2_limit)) {
111
- if (PREDICT_TRUE(s1[matched] == *s2)) {
134
+ while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) {
135
+ if (s1[matched] == *s2) {
112
136
  ++s2;
113
137
  ++matched;
114
138
  } else {
115
- return matched;
139
+ return std::pair<size_t, bool>(matched, matched < 8);
116
140
  }
117
141
  }
118
- return matched;
142
+ return std::pair<size_t, bool>(matched, matched < 8);
119
143
  }
120
144
  #else
121
- static inline int FindMatchLength(const char* s1,
122
- const char* s2,
123
- const char* s2_limit) {
145
+ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
146
+ const char* s2,
147
+ const char* s2_limit) {
124
148
  // Implementation based on the x86-64 version, above.
125
149
  assert(s2_limit >= s2);
126
150
  int matched = 0;
@@ -140,11 +164,68 @@ static inline int FindMatchLength(const char* s1,
140
164
  ++matched;
141
165
  }
142
166
  }
143
- return matched;
167
+ return std::pair<size_t, bool>(matched, matched < 8);
144
168
  }
145
169
  #endif
146
170
 
171
+ // Lookup tables for decompression code. Give --snappy_dump_decompression_table
172
+ // to the unit test to recompute char_table.
173
+
174
+ enum {
175
+ LITERAL = 0,
176
+ COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
177
+ COPY_2_BYTE_OFFSET = 2,
178
+ COPY_4_BYTE_OFFSET = 3
179
+ };
180
+ static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset.
181
+
182
+ // Data stored per entry in lookup table:
183
+ // Range Bits-used Description
184
+ // ------------------------------------
185
+ // 1..64 0..7 Literal/copy length encoded in opcode byte
186
+ // 0..7 8..10 Copy offset encoded in opcode byte / 256
187
+ // 0..4 11..13 Extra bytes after opcode
188
+ //
189
+ // We use eight bits for the length even though 7 would have sufficed
190
+ // because of efficiency reasons:
191
+ // (1) Extracting a byte is faster than a bit-field
192
+ // (2) It properly aligns copy offset so we do not need a <<8
193
+ static const uint16 char_table[256] = {
194
+ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
195
+ 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
196
+ 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
197
+ 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
198
+ 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
199
+ 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
200
+ 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
201
+ 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
202
+ 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
203
+ 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
204
+ 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
205
+ 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
206
+ 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
207
+ 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
208
+ 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
209
+ 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
210
+ 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
211
+ 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
212
+ 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
213
+ 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
214
+ 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
215
+ 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
216
+ 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
217
+ 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
218
+ 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
219
+ 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
220
+ 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
221
+ 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
222
+ 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
223
+ 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
224
+ 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
225
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
226
+ };
227
+
147
228
  } // end namespace internal
148
229
  } // end namespace snappy
149
230
 
150
- #endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_
231
+ #endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_