couchbase 3.0.0.alpha.1-universal-darwin-19 → 3.0.0.alpha.2-universal-darwin-19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/tests-6.0.3.yml +49 -0
- data/.github/workflows/tests.yml +47 -0
- data/.gitmodules +3 -0
- data/.idea/dictionaries/gem_terms.xml +5 -0
- data/.idea/inspectionProfiles/Project_Default.xml +1 -0
- data/.idea/vcs.xml +1 -0
- data/Gemfile +1 -0
- data/README.md +55 -2
- data/Rakefile +18 -0
- data/bin/init-cluster +62 -0
- data/bin/setup +1 -0
- data/couchbase.gemspec +3 -2
- data/examples/crud.rb +1 -2
- data/examples/managing_buckets.rb +47 -0
- data/examples/managing_collections.rb +58 -0
- data/examples/managing_query_indexes.rb +63 -0
- data/examples/query.rb +3 -2
- data/examples/query_with_consistency.rb +76 -0
- data/examples/subdocument.rb +23 -1
- data/ext/.clang-format +1 -1
- data/ext/.idea/dictionaries/couchbase_terms.xml +2 -0
- data/ext/.idea/vcs.xml +1 -0
- data/ext/CMakeLists.txt +30 -12
- data/ext/build_version.hxx.in +26 -0
- data/ext/couchbase/bucket.hxx +69 -8
- data/ext/couchbase/cluster.hxx +70 -54
- data/ext/couchbase/collections_manifest.hxx +3 -3
- data/ext/couchbase/configuration.hxx +14 -0
- data/ext/couchbase/couchbase.cxx +2044 -383
- data/ext/couchbase/{operations/document_id.hxx → document_id.hxx} +5 -4
- data/ext/couchbase/io/http_message.hxx +5 -1
- data/ext/couchbase/io/http_parser.hxx +2 -1
- data/ext/couchbase/io/http_session.hxx +6 -3
- data/ext/couchbase/io/{binary_message.hxx → mcbp_message.hxx} +15 -12
- data/ext/couchbase/io/mcbp_parser.hxx +99 -0
- data/ext/couchbase/io/{key_value_session.hxx → mcbp_session.hxx} +200 -95
- data/ext/couchbase/io/session_manager.hxx +37 -22
- data/ext/couchbase/mutation_token.hxx +2 -1
- data/ext/couchbase/operations.hxx +38 -8
- data/ext/couchbase/operations/bucket_create.hxx +138 -0
- data/ext/couchbase/operations/bucket_drop.hxx +65 -0
- data/ext/couchbase/operations/bucket_flush.hxx +65 -0
- data/ext/couchbase/operations/bucket_get.hxx +69 -0
- data/ext/couchbase/operations/bucket_get_all.hxx +62 -0
- data/ext/couchbase/operations/bucket_settings.hxx +111 -0
- data/ext/couchbase/operations/bucket_update.hxx +115 -0
- data/ext/couchbase/operations/cluster_developer_preview_enable.hxx +60 -0
- data/ext/couchbase/operations/collection_create.hxx +86 -0
- data/ext/couchbase/operations/collection_drop.hxx +82 -0
- data/ext/couchbase/operations/command.hxx +10 -10
- data/ext/couchbase/operations/document_decrement.hxx +80 -0
- data/ext/couchbase/operations/document_exists.hxx +80 -0
- data/ext/couchbase/operations/{get.hxx → document_get.hxx} +4 -2
- data/ext/couchbase/operations/document_get_and_lock.hxx +64 -0
- data/ext/couchbase/operations/document_get_and_touch.hxx +64 -0
- data/ext/couchbase/operations/document_increment.hxx +80 -0
- data/ext/couchbase/operations/document_insert.hxx +74 -0
- data/ext/couchbase/operations/{lookup_in.hxx → document_lookup_in.hxx} +2 -2
- data/ext/couchbase/operations/{mutate_in.hxx → document_mutate_in.hxx} +11 -2
- data/ext/couchbase/operations/{query.hxx → document_query.hxx} +101 -6
- data/ext/couchbase/operations/document_remove.hxx +67 -0
- data/ext/couchbase/operations/document_replace.hxx +76 -0
- data/ext/couchbase/operations/{upsert.hxx → document_touch.hxx} +14 -14
- data/ext/couchbase/operations/{remove.hxx → document_unlock.hxx} +12 -10
- data/ext/couchbase/operations/document_upsert.hxx +74 -0
- data/ext/couchbase/operations/query_index_build_deferred.hxx +85 -0
- data/ext/couchbase/operations/query_index_create.hxx +134 -0
- data/ext/couchbase/operations/query_index_drop.hxx +108 -0
- data/ext/couchbase/operations/query_index_get_all.hxx +106 -0
- data/ext/couchbase/operations/scope_create.hxx +81 -0
- data/ext/couchbase/operations/scope_drop.hxx +79 -0
- data/ext/couchbase/operations/scope_get_all.hxx +72 -0
- data/ext/couchbase/protocol/client_opcode.hxx +35 -0
- data/ext/couchbase/protocol/client_request.hxx +56 -9
- data/ext/couchbase/protocol/client_response.hxx +52 -15
- data/ext/couchbase/protocol/cmd_cluster_map_change_notification.hxx +81 -0
- data/ext/couchbase/protocol/cmd_decrement.hxx +187 -0
- data/ext/couchbase/protocol/cmd_exists.hxx +171 -0
- data/ext/couchbase/protocol/cmd_get.hxx +31 -8
- data/ext/couchbase/protocol/cmd_get_and_lock.hxx +142 -0
- data/ext/couchbase/protocol/cmd_get_and_touch.hxx +142 -0
- data/ext/couchbase/protocol/cmd_get_cluster_config.hxx +16 -3
- data/ext/couchbase/protocol/cmd_get_collections_manifest.hxx +16 -3
- data/ext/couchbase/protocol/cmd_get_error_map.hxx +16 -3
- data/ext/couchbase/protocol/cmd_hello.hxx +24 -8
- data/ext/couchbase/protocol/cmd_increment.hxx +187 -0
- data/ext/couchbase/protocol/cmd_info.hxx +1 -0
- data/ext/couchbase/protocol/cmd_insert.hxx +172 -0
- data/ext/couchbase/protocol/cmd_lookup_in.hxx +28 -13
- data/ext/couchbase/protocol/cmd_mutate_in.hxx +65 -13
- data/ext/couchbase/protocol/cmd_remove.hxx +59 -4
- data/ext/couchbase/protocol/cmd_replace.hxx +172 -0
- data/ext/couchbase/protocol/cmd_sasl_auth.hxx +15 -3
- data/ext/couchbase/protocol/cmd_sasl_list_mechs.hxx +15 -3
- data/ext/couchbase/protocol/cmd_sasl_step.hxx +15 -3
- data/ext/couchbase/protocol/cmd_select_bucket.hxx +14 -2
- data/ext/couchbase/protocol/cmd_touch.hxx +102 -0
- data/ext/couchbase/protocol/cmd_unlock.hxx +95 -0
- data/ext/couchbase/protocol/cmd_upsert.hxx +50 -14
- data/ext/couchbase/protocol/durability_level.hxx +67 -0
- data/ext/couchbase/protocol/frame_info_id.hxx +187 -0
- data/ext/couchbase/protocol/hello_feature.hxx +137 -0
- data/ext/couchbase/protocol/server_opcode.hxx +57 -0
- data/ext/couchbase/protocol/server_request.hxx +122 -0
- data/ext/couchbase/protocol/unsigned_leb128.h +15 -15
- data/ext/couchbase/utils/byteswap.hxx +1 -2
- data/ext/couchbase/utils/url_codec.hxx +225 -0
- data/ext/couchbase/version.hxx +3 -1
- data/ext/extconf.rb +4 -1
- data/ext/test/main.cxx +37 -113
- data/ext/third_party/snappy/.appveyor.yml +36 -0
- data/ext/third_party/snappy/.gitignore +8 -0
- data/ext/third_party/snappy/.travis.yml +98 -0
- data/ext/third_party/snappy/AUTHORS +1 -0
- data/ext/third_party/snappy/CMakeLists.txt +345 -0
- data/ext/third_party/snappy/CONTRIBUTING.md +26 -0
- data/ext/third_party/snappy/COPYING +54 -0
- data/ext/third_party/snappy/NEWS +188 -0
- data/ext/third_party/snappy/README.md +148 -0
- data/ext/third_party/snappy/cmake/SnappyConfig.cmake.in +33 -0
- data/ext/third_party/snappy/cmake/config.h.in +59 -0
- data/ext/third_party/snappy/docs/README.md +72 -0
- data/ext/third_party/snappy/format_description.txt +110 -0
- data/ext/third_party/snappy/framing_format.txt +135 -0
- data/ext/third_party/snappy/snappy-c.cc +90 -0
- data/ext/third_party/snappy/snappy-c.h +138 -0
- data/ext/third_party/snappy/snappy-internal.h +315 -0
- data/ext/third_party/snappy/snappy-sinksource.cc +121 -0
- data/ext/third_party/snappy/snappy-sinksource.h +182 -0
- data/ext/third_party/snappy/snappy-stubs-internal.cc +42 -0
- data/ext/third_party/snappy/snappy-stubs-internal.h +493 -0
- data/ext/third_party/snappy/snappy-stubs-public.h.in +63 -0
- data/ext/third_party/snappy/snappy-test.cc +613 -0
- data/ext/third_party/snappy/snappy-test.h +526 -0
- data/ext/third_party/snappy/snappy.cc +1770 -0
- data/ext/third_party/snappy/snappy.h +209 -0
- data/ext/third_party/snappy/snappy_compress_fuzzer.cc +60 -0
- data/ext/third_party/snappy/snappy_uncompress_fuzzer.cc +58 -0
- data/ext/third_party/snappy/snappy_unittest.cc +1512 -0
- data/ext/third_party/snappy/testdata/alice29.txt +3609 -0
- data/ext/third_party/snappy/testdata/asyoulik.txt +4122 -0
- data/ext/third_party/snappy/testdata/baddata1.snappy +0 -0
- data/ext/third_party/snappy/testdata/baddata2.snappy +0 -0
- data/ext/third_party/snappy/testdata/baddata3.snappy +0 -0
- data/ext/third_party/snappy/testdata/fireworks.jpeg +0 -0
- data/ext/third_party/snappy/testdata/geo.protodata +0 -0
- data/ext/third_party/snappy/testdata/html +1 -0
- data/ext/third_party/snappy/testdata/html_x_4 +1 -0
- data/ext/third_party/snappy/testdata/kppkn.gtb +0 -0
- data/ext/third_party/snappy/testdata/lcet10.txt +7519 -0
- data/ext/third_party/snappy/testdata/paper-100k.pdf +600 -2
- data/ext/third_party/snappy/testdata/plrabn12.txt +10699 -0
- data/ext/third_party/snappy/testdata/urls.10K +10000 -0
- data/lib/couchbase/binary_collection.rb +33 -76
- data/lib/couchbase/binary_collection_options.rb +94 -0
- data/lib/couchbase/bucket.rb +9 -3
- data/lib/couchbase/cluster.rb +161 -23
- data/lib/couchbase/collection.rb +108 -191
- data/lib/couchbase/collection_options.rb +430 -0
- data/lib/couchbase/errors.rb +136 -134
- data/lib/couchbase/json_transcoder.rb +32 -0
- data/lib/couchbase/management/analytics_index_manager.rb +185 -9
- data/lib/couchbase/management/bucket_manager.rb +84 -33
- data/lib/couchbase/management/collection_manager.rb +166 -1
- data/lib/couchbase/management/query_index_manager.rb +261 -0
- data/lib/couchbase/management/search_index_manager.rb +291 -0
- data/lib/couchbase/management/user_manager.rb +12 -10
- data/lib/couchbase/management/view_index_manager.rb +151 -1
- data/lib/couchbase/mutation_state.rb +11 -1
- data/lib/couchbase/scope.rb +4 -4
- data/lib/couchbase/version.rb +1 -1
- metadata +113 -18
- data/.travis.yml +0 -7
- data/ext/couchbase/io/binary_parser.hxx +0 -64
- data/lib/couchbase/results.rb +0 -307
@@ -0,0 +1,59 @@
|
|
1
|
+
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
2
|
+
#define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
3
|
+
|
4
|
+
/* Define to 1 if the compiler supports __attribute__((always_inline)). */
|
5
|
+
#cmakedefine HAVE_ATTRIBUTE_ALWAYS_INLINE 1
|
6
|
+
|
7
|
+
/* Define to 1 if the compiler supports __builtin_ctz and friends. */
|
8
|
+
#cmakedefine HAVE_BUILTIN_CTZ 1
|
9
|
+
|
10
|
+
/* Define to 1 if the compiler supports __builtin_expect. */
|
11
|
+
#cmakedefine HAVE_BUILTIN_EXPECT 1
|
12
|
+
|
13
|
+
/* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
|
14
|
+
#cmakedefine HAVE_FUNC_MMAP 1
|
15
|
+
|
16
|
+
/* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
|
17
|
+
#cmakedefine HAVE_FUNC_SYSCONF 1
|
18
|
+
|
19
|
+
/* Define to 1 to use the gflags package for command-line parsing. */
|
20
|
+
#cmakedefine HAVE_GFLAGS 1
|
21
|
+
|
22
|
+
/* Define to 1 if you have Google Test. */
|
23
|
+
#cmakedefine HAVE_GTEST 1
|
24
|
+
|
25
|
+
/* Define to 1 if you have the `lzo2' library (-llzo2). */
|
26
|
+
#cmakedefine HAVE_LIBLZO2 1
|
27
|
+
|
28
|
+
/* Define to 1 if you have the `z' library (-lz). */
|
29
|
+
#cmakedefine HAVE_LIBZ 1
|
30
|
+
|
31
|
+
/* Define to 1 if you have the <sys/mman.h> header file. */
|
32
|
+
#cmakedefine HAVE_SYS_MMAN_H 1
|
33
|
+
|
34
|
+
/* Define to 1 if you have the <sys/resource.h> header file. */
|
35
|
+
#cmakedefine HAVE_SYS_RESOURCE_H 1
|
36
|
+
|
37
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
38
|
+
#cmakedefine HAVE_SYS_TIME_H 1
|
39
|
+
|
40
|
+
/* Define to 1 if you have the <sys/uio.h> header file. */
|
41
|
+
#cmakedefine HAVE_SYS_UIO_H 1
|
42
|
+
|
43
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
44
|
+
#cmakedefine HAVE_UNISTD_H 1
|
45
|
+
|
46
|
+
/* Define to 1 if you have the <windows.h> header file. */
|
47
|
+
#cmakedefine HAVE_WINDOWS_H 1
|
48
|
+
|
49
|
+
/* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
|
50
|
+
#cmakedefine01 SNAPPY_HAVE_SSSE3
|
51
|
+
|
52
|
+
/* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
|
53
|
+
#cmakedefine01 SNAPPY_HAVE_BMI2
|
54
|
+
|
55
|
+
/* Define to 1 if your processor stores words with the most significant byte
|
56
|
+
first (like Motorola and SPARC, unlike Intel and VAX). */
|
57
|
+
#cmakedefine SNAPPY_IS_BIG_ENDIAN 1
|
58
|
+
|
59
|
+
#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
|
@@ -0,0 +1,72 @@
|
|
1
|
+
Snappy is a compression/decompression library. It does not aim for maximum
|
2
|
+
compression, or compatibility with any other compression library; instead, it
|
3
|
+
aims for very high speeds and reasonable compression. For instance, compared
|
4
|
+
to the fastest mode of zlib, Snappy is an order of magnitude faster for most
|
5
|
+
inputs, but the resulting compressed files are anywhere from 20% to 100%
|
6
|
+
bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy
|
7
|
+
compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec
|
8
|
+
or more.
|
9
|
+
|
10
|
+
Snappy is widely used inside Google, in everything from BigTable and MapReduce
|
11
|
+
to our internal RPC systems. (Snappy has previously been referred to as "Zippy"
|
12
|
+
in some presentations and the likes.)
|
13
|
+
|
14
|
+
For more information, please see the [README](../README.md). Benchmarks against
|
15
|
+
a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are
|
16
|
+
included in the source code distribution. The source code also contains a
|
17
|
+
[formal format specification](../format_description.txt), as well
|
18
|
+
as a specification for a [framing format](../framing_format.txt) useful for
|
19
|
+
higher-level framing and encapsulation of Snappy data, e.g. for transporting
|
20
|
+
Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy
|
21
|
+
distribution currently has no code implementing the latter, but some of the
|
22
|
+
ports do (see below).
|
23
|
+
|
24
|
+
Snappy is written in C++, but C bindings are included, and several bindings to
|
25
|
+
other languages are maintained by third parties:
|
26
|
+
|
27
|
+
* C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper),
|
28
|
+
[Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper),
|
29
|
+
[Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native
|
30
|
+
reimplementation)
|
31
|
+
* [C port](http://github.com/andikleen/snappy-c)
|
32
|
+
* [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries,
|
33
|
+
NuGet packages and command-line tool)
|
34
|
+
* Common Lisp: [Library bindings](http://flambard.github.com/thnappy/),
|
35
|
+
[native reimplementation](https://github.com/brown/snappy)
|
36
|
+
* Erlang: [esnappy](https://github.com/thekvs/esnappy),
|
37
|
+
[snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif)
|
38
|
+
* [Go](https://github.com/golang/snappy/)
|
39
|
+
* [Haskell](http://hackage.haskell.org/package/snappy)
|
40
|
+
* [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko)
|
41
|
+
* [iOS packaging](https://github.com/ideawu/snappy-ios)
|
42
|
+
* Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the
|
43
|
+
framing format), [native reimplementation](http://code.google.com/p/jsnappy/),
|
44
|
+
[other native reimplementation](https://github.com/dain/snappy) (including
|
45
|
+
the framing format)
|
46
|
+
* [Lua](https://github.com/forhappy/lua-snappy)
|
47
|
+
* [Node.js](https://github.com/kesla/node-snappy) (including the [framing
|
48
|
+
format](https://github.com/kesla/node-snappy-stream))
|
49
|
+
* [Perl](http://search.cpan.org/dist/Compress-Snappy/)
|
50
|
+
* [PHP](https://github.com/kjdev/php-ext-snappy)
|
51
|
+
* [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line
|
52
|
+
tool for the framing format)
|
53
|
+
* [R](https://github.com/lulyon/R-snappy)
|
54
|
+
* [Ruby](https://github.com/miyucy/snappy)
|
55
|
+
* [Rust](https://github.com/BurntSushi/rust-snappy)
|
56
|
+
* [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format)
|
57
|
+
|
58
|
+
Snappy is used or is available as an alternative in software such as
|
59
|
+
|
60
|
+
* [MongoDB](https://www.mongodb.com/)
|
61
|
+
* [Cassandra](http://cassandra.apache.org/)
|
62
|
+
* [Couchbase](http://www.couchbase.com/)
|
63
|
+
* [Hadoop](http://hadoop.apache.org/)
|
64
|
+
* [LessFS](http://www.lessfs.com/wordpress/)
|
65
|
+
* [LevelDB](https://github.com/google/leveldb) (which is in turn used by
|
66
|
+
[Google Chrome](http://chrome.google.com/))
|
67
|
+
* [Lucene](http://lucene.apache.org/)
|
68
|
+
* [VoltDB](http://voltdb.com/)
|
69
|
+
|
70
|
+
If you know of more, do not hesitate to let us know. The easiest way to get in
|
71
|
+
touch is via the
|
72
|
+
[Snappy discussion mailing list](http://groups.google.com/group/snappy-compression).
|
@@ -0,0 +1,110 @@
|
|
1
|
+
Snappy compressed format description
|
2
|
+
Last revised: 2011-10-05
|
3
|
+
|
4
|
+
|
5
|
+
This is not a formal specification, but should suffice to explain most
|
6
|
+
relevant parts of how the Snappy format works. It is originally based on
|
7
|
+
text by Zeev Tarantov.
|
8
|
+
|
9
|
+
Snappy is a LZ77-type compressor with a fixed, byte-oriented encoding.
|
10
|
+
There is no entropy encoder backend nor framing layer -- the latter is
|
11
|
+
assumed to be handled by other parts of the system.
|
12
|
+
|
13
|
+
This document only describes the format, not how the Snappy compressor nor
|
14
|
+
decompressor actually works. The correctness of the decompressor should not
|
15
|
+
depend on implementation details of the compressor, and vice versa.
|
16
|
+
|
17
|
+
|
18
|
+
1. Preamble
|
19
|
+
|
20
|
+
The stream starts with the uncompressed length (up to a maximum of 2^32 - 1),
|
21
|
+
stored as a little-endian varint. Varints consist of a series of bytes,
|
22
|
+
where the lower 7 bits are data and the upper bit is set iff there are
|
23
|
+
more bytes to be read. In other words, an uncompressed length of 64 would
|
24
|
+
be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
|
25
|
+
would be stored as 0xFE 0xFF 0x7F.
|
26
|
+
|
27
|
+
|
28
|
+
2. The compressed stream itself
|
29
|
+
|
30
|
+
There are two types of elements in a Snappy stream: Literals and
|
31
|
+
copies (backreferences). There is no restriction on the order of elements,
|
32
|
+
except that the stream naturally cannot start with a copy. (Having
|
33
|
+
two literals in a row is never optimal from a compression point of
|
34
|
+
view, but nevertheless fully permitted.) Each element starts with a tag byte,
|
35
|
+
and the lower two bits of this tag byte signal what type of element will
|
36
|
+
follow:
|
37
|
+
|
38
|
+
00: Literal
|
39
|
+
01: Copy with 1-byte offset
|
40
|
+
10: Copy with 2-byte offset
|
41
|
+
11: Copy with 4-byte offset
|
42
|
+
|
43
|
+
The interpretation of the upper six bits are element-dependent.
|
44
|
+
|
45
|
+
|
46
|
+
2.1. Literals (00)
|
47
|
+
|
48
|
+
Literals are uncompressed data stored directly in the byte stream.
|
49
|
+
The literal length is stored differently depending on the length
|
50
|
+
of the literal:
|
51
|
+
|
52
|
+
- For literals up to and including 60 bytes in length, the upper
|
53
|
+
six bits of the tag byte contain (len-1). The literal follows
|
54
|
+
immediately thereafter in the bytestream.
|
55
|
+
- For longer literals, the (len-1) value is stored after the tag byte,
|
56
|
+
little-endian. The upper six bits of the tag byte describe how
|
57
|
+
many bytes are used for the length; 60, 61, 62 or 63 for
|
58
|
+
1-4 bytes, respectively. The literal itself follows after the
|
59
|
+
length.
|
60
|
+
|
61
|
+
|
62
|
+
2.2. Copies
|
63
|
+
|
64
|
+
Copies are references back into previous decompressed data, telling
|
65
|
+
the decompressor to reuse data it has previously decoded.
|
66
|
+
They encode two values: The _offset_, saying how many bytes back
|
67
|
+
from the current position to read, and the _length_, how many bytes
|
68
|
+
to copy. Offsets of zero can be encoded, but are not legal;
|
69
|
+
similarly, it is possible to encode backreferences that would
|
70
|
+
go past the end of the block (offset > current decompressed position),
|
71
|
+
which is also nonsensical and thus not allowed.
|
72
|
+
|
73
|
+
As in most LZ77-based compressors, the length can be larger than the offset,
|
74
|
+
yielding a form of run-length encoding (RLE). For instance,
|
75
|
+
"xababab" could be encoded as
|
76
|
+
|
77
|
+
<literal: "xab"> <copy: offset=2 length=4>
|
78
|
+
|
79
|
+
Note that since the current Snappy compressor works in 32 kB
|
80
|
+
blocks and does not do matching across blocks, it will never produce
|
81
|
+
a bitstream with offsets larger than about 32768. However, the
|
82
|
+
decompressor should not rely on this, as it may change in the future.
|
83
|
+
|
84
|
+
There are several different kinds of copy elements, depending on
|
85
|
+
the amount of bytes to be copied (length), and how far back the
|
86
|
+
data to be copied is (offset).
|
87
|
+
|
88
|
+
|
89
|
+
2.2.1. Copy with 1-byte offset (01)
|
90
|
+
|
91
|
+
These elements can encode lengths between [4..11] bytes and offsets
|
92
|
+
between [0..2047] bytes. (len-4) occupies three bits and is stored
|
93
|
+
in bits [2..4] of the tag byte. The offset occupies 11 bits, of which the
|
94
|
+
upper three are stored in the upper three bits ([5..7]) of the tag byte,
|
95
|
+
and the lower eight are stored in a byte following the tag byte.
|
96
|
+
|
97
|
+
|
98
|
+
2.2.2. Copy with 2-byte offset (10)
|
99
|
+
|
100
|
+
These elements can encode lengths between [1..64] and offsets from
|
101
|
+
[0..65535]. (len-1) occupies six bits and is stored in the upper
|
102
|
+
six bits ([2..7]) of the tag byte. The offset is stored as a
|
103
|
+
little-endian 16-bit integer in the two bytes following the tag byte.
|
104
|
+
|
105
|
+
|
106
|
+
2.2.3. Copy with 4-byte offset (11)
|
107
|
+
|
108
|
+
These are like the copies with 2-byte offsets (see previous subsection),
|
109
|
+
except that the offset is stored as a 32-bit integer instead of a
|
110
|
+
16-bit integer (and thus will occupy four bytes).
|
@@ -0,0 +1,135 @@
|
|
1
|
+
Snappy framing format description
|
2
|
+
Last revised: 2013-10-25
|
3
|
+
|
4
|
+
This format decribes a framing format for Snappy, allowing compressing to
|
5
|
+
files or streams that can then more easily be decompressed without having
|
6
|
+
to hold the entire stream in memory. It also provides data checksums to
|
7
|
+
help verify integrity. It does not provide metadata checksums, so it does
|
8
|
+
not protect against e.g. all forms of truncations.
|
9
|
+
|
10
|
+
Implementation of the framing format is optional for Snappy compressors and
|
11
|
+
decompressor; it is not part of the Snappy core specification.
|
12
|
+
|
13
|
+
|
14
|
+
1. General structure
|
15
|
+
|
16
|
+
The file consists solely of chunks, lying back-to-back with no padding
|
17
|
+
in between. Each chunk consists first a single byte of chunk identifier,
|
18
|
+
then a three-byte little-endian length of the chunk in bytes (from 0 to
|
19
|
+
16777215, inclusive), and then the data if any. The four bytes of chunk
|
20
|
+
header is not counted in the data length.
|
21
|
+
|
22
|
+
The different chunk types are listed below. The first chunk must always
|
23
|
+
be the stream identifier chunk (see section 4.1, below). The stream
|
24
|
+
ends when the file ends -- there is no explicit end-of-file marker.
|
25
|
+
|
26
|
+
|
27
|
+
2. File type identification
|
28
|
+
|
29
|
+
The following identifiers for this format are recommended where appropriate.
|
30
|
+
However, note that none have been registered officially, so this is only to
|
31
|
+
be taken as a guideline. We use "Snappy framed" to distinguish between this
|
32
|
+
format and raw Snappy data.
|
33
|
+
|
34
|
+
File extension: .sz
|
35
|
+
MIME type: application/x-snappy-framed
|
36
|
+
HTTP Content-Encoding: x-snappy-framed
|
37
|
+
|
38
|
+
|
39
|
+
3. Checksum format
|
40
|
+
|
41
|
+
Some chunks have data protected by a checksum (the ones that do will say so
|
42
|
+
explicitly). The checksums are always masked CRC-32Cs.
|
43
|
+
|
44
|
+
A description of CRC-32C can be found in RFC 3720, section 12.1, with
|
45
|
+
examples in section B.4.
|
46
|
+
|
47
|
+
Checksums are not stored directly, but masked, as checksumming data and
|
48
|
+
then its own checksum can be problematic. The masking is the same as used
|
49
|
+
in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
|
50
|
+
0xa282ead8 (using wraparound as normal for unsigned integers). This is
|
51
|
+
equivalent to the following C code:
|
52
|
+
|
53
|
+
uint32_t mask_checksum(uint32_t x) {
|
54
|
+
return ((x >> 15) | (x << 17)) + 0xa282ead8;
|
55
|
+
}
|
56
|
+
|
57
|
+
Note that the masking is reversible.
|
58
|
+
|
59
|
+
The checksum is always stored as a four bytes long integer, in little-endian.
|
60
|
+
|
61
|
+
|
62
|
+
4. Chunk types
|
63
|
+
|
64
|
+
The currently supported chunk types are described below. The list may
|
65
|
+
be extended in the future.
|
66
|
+
|
67
|
+
|
68
|
+
4.1. Stream identifier (chunk type 0xff)
|
69
|
+
|
70
|
+
The stream identifier is always the first element in the stream.
|
71
|
+
It is exactly six bytes long and contains "sNaPpY" in ASCII. This means that
|
72
|
+
a valid Snappy framed stream always starts with the bytes
|
73
|
+
|
74
|
+
0xff 0x06 0x00 0x00 0x73 0x4e 0x61 0x50 0x70 0x59
|
75
|
+
|
76
|
+
The stream identifier chunk can come multiple times in the stream besides
|
77
|
+
the first; if such a chunk shows up, it should simply be ignored, assuming
|
78
|
+
it has the right length and contents. This allows for easy concatenation of
|
79
|
+
compressed files without the need for re-framing.
|
80
|
+
|
81
|
+
|
82
|
+
4.2. Compressed data (chunk type 0x00)
|
83
|
+
|
84
|
+
Compressed data chunks contain a normal Snappy compressed bitstream;
|
85
|
+
see the compressed format specification. The compressed data is preceded by
|
86
|
+
the CRC-32C (see section 3) of the _uncompressed_ data.
|
87
|
+
|
88
|
+
Note that the data portion of the chunk, i.e., the compressed contents,
|
89
|
+
can be at most 16777211 bytes (2^24 - 1, minus the checksum).
|
90
|
+
However, we place an additional restriction that the uncompressed data
|
91
|
+
in a chunk must be no longer than 65536 bytes. This allows consumers to
|
92
|
+
easily use small fixed-size buffers.
|
93
|
+
|
94
|
+
|
95
|
+
4.3. Uncompressed data (chunk type 0x01)
|
96
|
+
|
97
|
+
Uncompressed data chunks allow a compressor to send uncompressed,
|
98
|
+
raw data; this is useful if, for instance, uncompressible or
|
99
|
+
near-incompressible data is detected, and faster decompression is desired.
|
100
|
+
|
101
|
+
As in the compressed chunks, the data is preceded by its own masked
|
102
|
+
CRC-32C (see section 3).
|
103
|
+
|
104
|
+
An uncompressed data chunk, like compressed data chunks, should contain
|
105
|
+
no more than 65536 data bytes, so the maximum legal chunk length with the
|
106
|
+
checksum is 65540.
|
107
|
+
|
108
|
+
|
109
|
+
4.4. Padding (chunk type 0xfe)
|
110
|
+
|
111
|
+
Padding chunks allow a compressor to increase the size of the data stream
|
112
|
+
so that it complies with external demands, e.g. that the total number of
|
113
|
+
bytes is a multiple of some value.
|
114
|
+
|
115
|
+
All bytes of the padding chunk, except the chunk byte itself and the length,
|
116
|
+
should be zero, but decompressors must not try to interpret or verify the
|
117
|
+
padding data in any way.
|
118
|
+
|
119
|
+
|
120
|
+
4.5. Reserved unskippable chunks (chunk types 0x02-0x7f)
|
121
|
+
|
122
|
+
These are reserved for future expansion. A decoder that sees such a chunk
|
123
|
+
should immediately return an error, as it must assume it cannot decode the
|
124
|
+
stream correctly.
|
125
|
+
|
126
|
+
Future versions of this specification may define meanings for these chunks.
|
127
|
+
|
128
|
+
|
129
|
+
4.6. Reserved skippable chunks (chunk types 0x80-0xfd)
|
130
|
+
|
131
|
+
These are also reserved for future expansion, but unlike the chunks
|
132
|
+
described in 4.5, a decoder seeing these must skip them and continue
|
133
|
+
decoding.
|
134
|
+
|
135
|
+
Future versions of this specification may define meanings for these chunks.
|
@@ -0,0 +1,90 @@
|
|
1
|
+
// Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>.
|
2
|
+
//
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
4
|
+
// modification, are permitted provided that the following conditions are
|
5
|
+
// met:
|
6
|
+
//
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
11
|
+
// in the documentation and/or other materials provided with the
|
12
|
+
// distribution.
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
15
|
+
// this software without specific prior written permission.
|
16
|
+
//
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
28
|
+
|
29
|
+
#include "snappy.h"
|
30
|
+
#include "snappy-c.h"
|
31
|
+
|
32
|
+
extern "C" {
|
33
|
+
|
34
|
+
snappy_status snappy_compress(const char* input,
|
35
|
+
size_t input_length,
|
36
|
+
char* compressed,
|
37
|
+
size_t *compressed_length) {
|
38
|
+
if (*compressed_length < snappy_max_compressed_length(input_length)) {
|
39
|
+
return SNAPPY_BUFFER_TOO_SMALL;
|
40
|
+
}
|
41
|
+
snappy::RawCompress(input, input_length, compressed, compressed_length);
|
42
|
+
return SNAPPY_OK;
|
43
|
+
}
|
44
|
+
|
45
|
+
snappy_status snappy_uncompress(const char* compressed,
|
46
|
+
size_t compressed_length,
|
47
|
+
char* uncompressed,
|
48
|
+
size_t* uncompressed_length) {
|
49
|
+
size_t real_uncompressed_length;
|
50
|
+
if (!snappy::GetUncompressedLength(compressed,
|
51
|
+
compressed_length,
|
52
|
+
&real_uncompressed_length)) {
|
53
|
+
return SNAPPY_INVALID_INPUT;
|
54
|
+
}
|
55
|
+
if (*uncompressed_length < real_uncompressed_length) {
|
56
|
+
return SNAPPY_BUFFER_TOO_SMALL;
|
57
|
+
}
|
58
|
+
if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) {
|
59
|
+
return SNAPPY_INVALID_INPUT;
|
60
|
+
}
|
61
|
+
*uncompressed_length = real_uncompressed_length;
|
62
|
+
return SNAPPY_OK;
|
63
|
+
}
|
64
|
+
|
65
|
+
size_t snappy_max_compressed_length(size_t source_length) {
|
66
|
+
return snappy::MaxCompressedLength(source_length);
|
67
|
+
}
|
68
|
+
|
69
|
+
snappy_status snappy_uncompressed_length(const char *compressed,
|
70
|
+
size_t compressed_length,
|
71
|
+
size_t *result) {
|
72
|
+
if (snappy::GetUncompressedLength(compressed,
|
73
|
+
compressed_length,
|
74
|
+
result)) {
|
75
|
+
return SNAPPY_OK;
|
76
|
+
} else {
|
77
|
+
return SNAPPY_INVALID_INPUT;
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
snappy_status snappy_validate_compressed_buffer(const char *compressed,
|
82
|
+
size_t compressed_length) {
|
83
|
+
if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) {
|
84
|
+
return SNAPPY_OK;
|
85
|
+
} else {
|
86
|
+
return SNAPPY_INVALID_INPUT;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
} // extern "C"
|