couchbase 3.0.0.alpha.1 → 3.0.0.alpha.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/tests-6.0.3.yml +49 -0
  3. data/.github/workflows/tests.yml +47 -0
  4. data/.gitmodules +3 -0
  5. data/.idea/dictionaries/gem_terms.xml +5 -0
  6. data/.idea/inspectionProfiles/Project_Default.xml +1 -0
  7. data/.idea/vcs.xml +1 -0
  8. data/Gemfile +1 -0
  9. data/README.md +55 -2
  10. data/Rakefile +18 -0
  11. data/bin/init-cluster +62 -0
  12. data/bin/setup +1 -0
  13. data/couchbase.gemspec +3 -2
  14. data/examples/crud.rb +1 -2
  15. data/examples/managing_buckets.rb +47 -0
  16. data/examples/managing_collections.rb +58 -0
  17. data/examples/managing_query_indexes.rb +63 -0
  18. data/examples/query.rb +3 -2
  19. data/examples/query_with_consistency.rb +76 -0
  20. data/examples/subdocument.rb +23 -1
  21. data/ext/.clang-format +1 -1
  22. data/ext/.idea/dictionaries/couchbase_terms.xml +2 -0
  23. data/ext/.idea/vcs.xml +1 -0
  24. data/ext/CMakeLists.txt +30 -12
  25. data/ext/build_version.hxx.in +26 -0
  26. data/ext/couchbase/bucket.hxx +69 -8
  27. data/ext/couchbase/cluster.hxx +70 -54
  28. data/ext/couchbase/collections_manifest.hxx +3 -3
  29. data/ext/couchbase/configuration.hxx +14 -0
  30. data/ext/couchbase/couchbase.cxx +2044 -383
  31. data/ext/couchbase/{operations/document_id.hxx → document_id.hxx} +5 -4
  32. data/ext/couchbase/io/http_message.hxx +5 -1
  33. data/ext/couchbase/io/http_parser.hxx +2 -1
  34. data/ext/couchbase/io/http_session.hxx +6 -3
  35. data/ext/couchbase/io/{binary_message.hxx → mcbp_message.hxx} +15 -12
  36. data/ext/couchbase/io/mcbp_parser.hxx +99 -0
  37. data/ext/couchbase/io/{key_value_session.hxx → mcbp_session.hxx} +200 -95
  38. data/ext/couchbase/io/session_manager.hxx +37 -22
  39. data/ext/couchbase/mutation_token.hxx +2 -1
  40. data/ext/couchbase/operations.hxx +38 -8
  41. data/ext/couchbase/operations/bucket_create.hxx +138 -0
  42. data/ext/couchbase/operations/bucket_drop.hxx +65 -0
  43. data/ext/couchbase/operations/bucket_flush.hxx +65 -0
  44. data/ext/couchbase/operations/bucket_get.hxx +69 -0
  45. data/ext/couchbase/operations/bucket_get_all.hxx +62 -0
  46. data/ext/couchbase/operations/bucket_settings.hxx +111 -0
  47. data/ext/couchbase/operations/bucket_update.hxx +115 -0
  48. data/ext/couchbase/operations/cluster_developer_preview_enable.hxx +60 -0
  49. data/ext/couchbase/operations/collection_create.hxx +86 -0
  50. data/ext/couchbase/operations/collection_drop.hxx +82 -0
  51. data/ext/couchbase/operations/command.hxx +10 -10
  52. data/ext/couchbase/operations/document_decrement.hxx +80 -0
  53. data/ext/couchbase/operations/document_exists.hxx +80 -0
  54. data/ext/couchbase/operations/{get.hxx → document_get.hxx} +4 -2
  55. data/ext/couchbase/operations/document_get_and_lock.hxx +64 -0
  56. data/ext/couchbase/operations/document_get_and_touch.hxx +64 -0
  57. data/ext/couchbase/operations/document_increment.hxx +80 -0
  58. data/ext/couchbase/operations/document_insert.hxx +74 -0
  59. data/ext/couchbase/operations/{lookup_in.hxx → document_lookup_in.hxx} +2 -2
  60. data/ext/couchbase/operations/{mutate_in.hxx → document_mutate_in.hxx} +11 -2
  61. data/ext/couchbase/operations/{query.hxx → document_query.hxx} +101 -6
  62. data/ext/couchbase/operations/document_remove.hxx +67 -0
  63. data/ext/couchbase/operations/document_replace.hxx +76 -0
  64. data/ext/couchbase/operations/{upsert.hxx → document_touch.hxx} +14 -14
  65. data/ext/couchbase/operations/{remove.hxx → document_unlock.hxx} +12 -10
  66. data/ext/couchbase/operations/document_upsert.hxx +74 -0
  67. data/ext/couchbase/operations/query_index_build_deferred.hxx +85 -0
  68. data/ext/couchbase/operations/query_index_create.hxx +134 -0
  69. data/ext/couchbase/operations/query_index_drop.hxx +108 -0
  70. data/ext/couchbase/operations/query_index_get_all.hxx +106 -0
  71. data/ext/couchbase/operations/scope_create.hxx +81 -0
  72. data/ext/couchbase/operations/scope_drop.hxx +79 -0
  73. data/ext/couchbase/operations/scope_get_all.hxx +72 -0
  74. data/ext/couchbase/protocol/client_opcode.hxx +35 -0
  75. data/ext/couchbase/protocol/client_request.hxx +56 -9
  76. data/ext/couchbase/protocol/client_response.hxx +52 -15
  77. data/ext/couchbase/protocol/cmd_cluster_map_change_notification.hxx +81 -0
  78. data/ext/couchbase/protocol/cmd_decrement.hxx +187 -0
  79. data/ext/couchbase/protocol/cmd_exists.hxx +171 -0
  80. data/ext/couchbase/protocol/cmd_get.hxx +31 -8
  81. data/ext/couchbase/protocol/cmd_get_and_lock.hxx +142 -0
  82. data/ext/couchbase/protocol/cmd_get_and_touch.hxx +142 -0
  83. data/ext/couchbase/protocol/cmd_get_cluster_config.hxx +16 -3
  84. data/ext/couchbase/protocol/cmd_get_collections_manifest.hxx +16 -3
  85. data/ext/couchbase/protocol/cmd_get_error_map.hxx +16 -3
  86. data/ext/couchbase/protocol/cmd_hello.hxx +24 -8
  87. data/ext/couchbase/protocol/cmd_increment.hxx +187 -0
  88. data/ext/couchbase/protocol/cmd_info.hxx +1 -0
  89. data/ext/couchbase/protocol/cmd_insert.hxx +172 -0
  90. data/ext/couchbase/protocol/cmd_lookup_in.hxx +28 -13
  91. data/ext/couchbase/protocol/cmd_mutate_in.hxx +65 -13
  92. data/ext/couchbase/protocol/cmd_remove.hxx +59 -4
  93. data/ext/couchbase/protocol/cmd_replace.hxx +172 -0
  94. data/ext/couchbase/protocol/cmd_sasl_auth.hxx +15 -3
  95. data/ext/couchbase/protocol/cmd_sasl_list_mechs.hxx +15 -3
  96. data/ext/couchbase/protocol/cmd_sasl_step.hxx +15 -3
  97. data/ext/couchbase/protocol/cmd_select_bucket.hxx +14 -2
  98. data/ext/couchbase/protocol/cmd_touch.hxx +102 -0
  99. data/ext/couchbase/protocol/cmd_unlock.hxx +95 -0
  100. data/ext/couchbase/protocol/cmd_upsert.hxx +50 -14
  101. data/ext/couchbase/protocol/durability_level.hxx +67 -0
  102. data/ext/couchbase/protocol/frame_info_id.hxx +187 -0
  103. data/ext/couchbase/protocol/hello_feature.hxx +137 -0
  104. data/ext/couchbase/protocol/server_opcode.hxx +57 -0
  105. data/ext/couchbase/protocol/server_request.hxx +122 -0
  106. data/ext/couchbase/protocol/unsigned_leb128.h +15 -15
  107. data/ext/couchbase/utils/byteswap.hxx +1 -2
  108. data/ext/couchbase/utils/url_codec.hxx +225 -0
  109. data/ext/couchbase/version.hxx +3 -1
  110. data/ext/extconf.rb +4 -1
  111. data/ext/test/main.cxx +37 -113
  112. data/ext/third_party/snappy/.appveyor.yml +36 -0
  113. data/ext/third_party/snappy/.gitignore +8 -0
  114. data/ext/third_party/snappy/.travis.yml +98 -0
  115. data/ext/third_party/snappy/AUTHORS +1 -0
  116. data/ext/third_party/snappy/CMakeLists.txt +345 -0
  117. data/ext/third_party/snappy/CONTRIBUTING.md +26 -0
  118. data/ext/third_party/snappy/COPYING +54 -0
  119. data/ext/third_party/snappy/NEWS +188 -0
  120. data/ext/third_party/snappy/README.md +148 -0
  121. data/ext/third_party/snappy/cmake/SnappyConfig.cmake.in +33 -0
  122. data/ext/third_party/snappy/cmake/config.h.in +59 -0
  123. data/ext/third_party/snappy/docs/README.md +72 -0
  124. data/ext/third_party/snappy/format_description.txt +110 -0
  125. data/ext/third_party/snappy/framing_format.txt +135 -0
  126. data/ext/third_party/snappy/snappy-c.cc +90 -0
  127. data/ext/third_party/snappy/snappy-c.h +138 -0
  128. data/ext/third_party/snappy/snappy-internal.h +315 -0
  129. data/ext/third_party/snappy/snappy-sinksource.cc +121 -0
  130. data/ext/third_party/snappy/snappy-sinksource.h +182 -0
  131. data/ext/third_party/snappy/snappy-stubs-internal.cc +42 -0
  132. data/ext/third_party/snappy/snappy-stubs-internal.h +493 -0
  133. data/ext/third_party/snappy/snappy-stubs-public.h.in +63 -0
  134. data/ext/third_party/snappy/snappy-test.cc +613 -0
  135. data/ext/third_party/snappy/snappy-test.h +526 -0
  136. data/ext/third_party/snappy/snappy.cc +1770 -0
  137. data/ext/third_party/snappy/snappy.h +209 -0
  138. data/ext/third_party/snappy/snappy_compress_fuzzer.cc +60 -0
  139. data/ext/third_party/snappy/snappy_uncompress_fuzzer.cc +58 -0
  140. data/ext/third_party/snappy/snappy_unittest.cc +1512 -0
  141. data/ext/third_party/snappy/testdata/alice29.txt +3609 -0
  142. data/ext/third_party/snappy/testdata/asyoulik.txt +4122 -0
  143. data/ext/third_party/snappy/testdata/baddata1.snappy +0 -0
  144. data/ext/third_party/snappy/testdata/baddata2.snappy +0 -0
  145. data/ext/third_party/snappy/testdata/baddata3.snappy +0 -0
  146. data/ext/third_party/snappy/testdata/fireworks.jpeg +0 -0
  147. data/ext/third_party/snappy/testdata/geo.protodata +0 -0
  148. data/ext/third_party/snappy/testdata/html +1 -0
  149. data/ext/third_party/snappy/testdata/html_x_4 +1 -0
  150. data/ext/third_party/snappy/testdata/kppkn.gtb +0 -0
  151. data/ext/third_party/snappy/testdata/lcet10.txt +7519 -0
  152. data/ext/third_party/snappy/testdata/paper-100k.pdf +600 -2
  153. data/ext/third_party/snappy/testdata/plrabn12.txt +10699 -0
  154. data/ext/third_party/snappy/testdata/urls.10K +10000 -0
  155. data/lib/couchbase/binary_collection.rb +33 -76
  156. data/lib/couchbase/binary_collection_options.rb +94 -0
  157. data/lib/couchbase/bucket.rb +9 -3
  158. data/lib/couchbase/cluster.rb +161 -23
  159. data/lib/couchbase/collection.rb +108 -191
  160. data/lib/couchbase/collection_options.rb +430 -0
  161. data/lib/couchbase/errors.rb +136 -134
  162. data/lib/couchbase/json_transcoder.rb +32 -0
  163. data/lib/couchbase/management/analytics_index_manager.rb +185 -9
  164. data/lib/couchbase/management/bucket_manager.rb +84 -33
  165. data/lib/couchbase/management/collection_manager.rb +166 -1
  166. data/lib/couchbase/management/query_index_manager.rb +261 -0
  167. data/lib/couchbase/management/search_index_manager.rb +291 -0
  168. data/lib/couchbase/management/user_manager.rb +12 -10
  169. data/lib/couchbase/management/view_index_manager.rb +151 -1
  170. data/lib/couchbase/mutation_state.rb +11 -1
  171. data/lib/couchbase/scope.rb +4 -4
  172. data/lib/couchbase/version.rb +1 -1
  173. metadata +113 -18
  174. data/.travis.yml +0 -7
  175. data/ext/couchbase/io/binary_parser.hxx +0 -64
  176. data/lib/couchbase/results.rb +0 -307
@@ -0,0 +1,59 @@
1
+ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
2
+ #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
3
+
4
+ /* Define to 1 if the compiler supports __attribute__((always_inline)). */
5
+ #cmakedefine HAVE_ATTRIBUTE_ALWAYS_INLINE 1
6
+
7
+ /* Define to 1 if the compiler supports __builtin_ctz and friends. */
8
+ #cmakedefine HAVE_BUILTIN_CTZ 1
9
+
10
+ /* Define to 1 if the compiler supports __builtin_expect. */
11
+ #cmakedefine HAVE_BUILTIN_EXPECT 1
12
+
13
+ /* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
14
+ #cmakedefine HAVE_FUNC_MMAP 1
15
+
16
+ /* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
17
+ #cmakedefine HAVE_FUNC_SYSCONF 1
18
+
19
+ /* Define to 1 to use the gflags package for command-line parsing. */
20
+ #cmakedefine HAVE_GFLAGS 1
21
+
22
+ /* Define to 1 if you have Google Test. */
23
+ #cmakedefine HAVE_GTEST 1
24
+
25
+ /* Define to 1 if you have the `lzo2' library (-llzo2). */
26
+ #cmakedefine HAVE_LIBLZO2 1
27
+
28
+ /* Define to 1 if you have the `z' library (-lz). */
29
+ #cmakedefine HAVE_LIBZ 1
30
+
31
+ /* Define to 1 if you have the <sys/mman.h> header file. */
32
+ #cmakedefine HAVE_SYS_MMAN_H 1
33
+
34
+ /* Define to 1 if you have the <sys/resource.h> header file. */
35
+ #cmakedefine HAVE_SYS_RESOURCE_H 1
36
+
37
+ /* Define to 1 if you have the <sys/time.h> header file. */
38
+ #cmakedefine HAVE_SYS_TIME_H 1
39
+
40
+ /* Define to 1 if you have the <sys/uio.h> header file. */
41
+ #cmakedefine HAVE_SYS_UIO_H 1
42
+
43
+ /* Define to 1 if you have the <unistd.h> header file. */
44
+ #cmakedefine HAVE_UNISTD_H 1
45
+
46
+ /* Define to 1 if you have the <windows.h> header file. */
47
+ #cmakedefine HAVE_WINDOWS_H 1
48
+
49
+ /* Define to 1 if you target processors with SSSE3+ and have <tmmintrin.h>. */
50
+ #cmakedefine01 SNAPPY_HAVE_SSSE3
51
+
52
+ /* Define to 1 if you target processors with BMI2+ and have <bmi2intrin.h>. */
53
+ #cmakedefine01 SNAPPY_HAVE_BMI2
54
+
55
+ /* Define to 1 if your processor stores words with the most significant byte
56
+ first (like Motorola and SPARC, unlike Intel and VAX). */
57
+ #cmakedefine SNAPPY_IS_BIG_ENDIAN 1
58
+
59
+ #endif // THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
@@ -0,0 +1,72 @@
1
+ Snappy is a compression/decompression library. It does not aim for maximum
2
+ compression, or compatibility with any other compression library; instead, it
3
+ aims for very high speeds and reasonable compression. For instance, compared
4
+ to the fastest mode of zlib, Snappy is an order of magnitude faster for most
5
+ inputs, but the resulting compressed files are anywhere from 20% to 100%
6
+ bigger. On a single core of a Core i7 processor in 64-bit mode, Snappy
7
+ compresses at about 250 MB/sec or more and decompresses at about 500 MB/sec
8
+ or more.
9
+
10
+ Snappy is widely used inside Google, in everything from BigTable and MapReduce
11
+ to our internal RPC systems. (Snappy has previously been referred to as "Zippy"
12
+ in some presentations and the likes.)
13
+
14
+ For more information, please see the [README](../README.md). Benchmarks against
15
+ a few other compression libraries (zlib, LZO, LZF, FastLZ, and QuickLZ) are
16
+ included in the source code distribution. The source code also contains a
17
+ [formal format specification](../format_description.txt), as well
18
+ as a specification for a [framing format](../framing_format.txt) useful for
19
+ higher-level framing and encapsulation of Snappy data, e.g. for transporting
20
+ Snappy-compressed data across HTTP in a streaming fashion. Note that the Snappy
21
+ distribution currently has no code implementing the latter, but some of the
22
+ ports do (see below).
23
+
24
+ Snappy is written in C++, but C bindings are included, and several bindings to
25
+ other languages are maintained by third parties:
26
+
27
+ * C#: [Snappy for .NET](http://snappy4net.codeplex.com/) (P/Invoke wrapper),
28
+ [Snappy.NET](http://snappy.angeloflogic.com/) (P/Invoke wrapper),
29
+ [Snappy.Sharp](https://github.com/jeffesp/Snappy.Sharp) (native
30
+ reimplementation)
31
+ * [C port](http://github.com/andikleen/snappy-c)
32
+ * [C++ MSVC packaging](http://snappy.angeloflogic.com/) (plus Windows binaries,
33
+ NuGet packages and command-line tool)
34
+ * Common Lisp: [Library bindings](http://flambard.github.com/thnappy/),
35
+ [native reimplementation](https://github.com/brown/snappy)
36
+ * Erlang: [esnappy](https://github.com/thekvs/esnappy),
37
+ [snappy-erlang-nif](https://github.com/fdmanana/snappy-erlang-nif)
38
+ * [Go](https://github.com/golang/snappy/)
39
+ * [Haskell](http://hackage.haskell.org/package/snappy)
40
+ * [Haxe](https://github.com/MaddinXx/hxsnappy) (C++/Neko)
41
+ * [iOS packaging](https://github.com/ideawu/snappy-ios)
42
+ * Java: [JNI wrapper](https://github.com/xerial/snappy-java) (including the
43
+ framing format), [native reimplementation](http://code.google.com/p/jsnappy/),
44
+ [other native reimplementation](https://github.com/dain/snappy) (including
45
+ the framing format)
46
+ * [Lua](https://github.com/forhappy/lua-snappy)
47
+ * [Node.js](https://github.com/kesla/node-snappy) (including the [framing
48
+ format](https://github.com/kesla/node-snappy-stream))
49
+ * [Perl](http://search.cpan.org/dist/Compress-Snappy/)
50
+ * [PHP](https://github.com/kjdev/php-ext-snappy)
51
+ * [Python](http://pypi.python.org/pypi/python-snappy) (including a command-line
52
+ tool for the framing format)
53
+ * [R](https://github.com/lulyon/R-snappy)
54
+ * [Ruby](https://github.com/miyucy/snappy)
55
+ * [Rust](https://github.com/BurntSushi/rust-snappy)
56
+ * [Smalltalk](https://github.com/mumez/sqnappy) (including the framing format)
57
+
58
+ Snappy is used or is available as an alternative in software such as
59
+
60
+ * [MongoDB](https://www.mongodb.com/)
61
+ * [Cassandra](http://cassandra.apache.org/)
62
+ * [Couchbase](http://www.couchbase.com/)
63
+ * [Hadoop](http://hadoop.apache.org/)
64
+ * [LessFS](http://www.lessfs.com/wordpress/)
65
+ * [LevelDB](https://github.com/google/leveldb) (which is in turn used by
66
+ [Google Chrome](http://chrome.google.com/))
67
+ * [Lucene](http://lucene.apache.org/)
68
+ * [VoltDB](http://voltdb.com/)
69
+
70
+ If you know of more, do not hesitate to let us know. The easiest way to get in
71
+ touch is via the
72
+ [Snappy discussion mailing list](http://groups.google.com/group/snappy-compression).
@@ -0,0 +1,110 @@
1
+ Snappy compressed format description
2
+ Last revised: 2011-10-05
3
+
4
+
5
+ This is not a formal specification, but should suffice to explain most
6
+ relevant parts of how the Snappy format works. It is originally based on
7
+ text by Zeev Tarantov.
8
+
9
+ Snappy is a LZ77-type compressor with a fixed, byte-oriented encoding.
10
+ There is no entropy encoder backend nor framing layer -- the latter is
11
+ assumed to be handled by other parts of the system.
12
+
13
+ This document only describes the format, not how the Snappy compressor nor
14
+ decompressor actually works. The correctness of the decompressor should not
15
+ depend on implementation details of the compressor, and vice versa.
16
+
17
+
18
+ 1. Preamble
19
+
20
+ The stream starts with the uncompressed length (up to a maximum of 2^32 - 1),
21
+ stored as a little-endian varint. Varints consist of a series of bytes,
22
+ where the lower 7 bits are data and the upper bit is set iff there are
23
+ more bytes to be read. In other words, an uncompressed length of 64 would
24
+ be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
25
+ would be stored as 0xFE 0xFF 0x7F.
26
+
27
+
28
+ 2. The compressed stream itself
29
+
30
+ There are two types of elements in a Snappy stream: Literals and
31
+ copies (backreferences). There is no restriction on the order of elements,
32
+ except that the stream naturally cannot start with a copy. (Having
33
+ two literals in a row is never optimal from a compression point of
34
+ view, but nevertheless fully permitted.) Each element starts with a tag byte,
35
+ and the lower two bits of this tag byte signal what type of element will
36
+ follow:
37
+
38
+ 00: Literal
39
+ 01: Copy with 1-byte offset
40
+ 10: Copy with 2-byte offset
41
+ 11: Copy with 4-byte offset
42
+
43
+ The interpretation of the upper six bits are element-dependent.
44
+
45
+
46
+ 2.1. Literals (00)
47
+
48
+ Literals are uncompressed data stored directly in the byte stream.
49
+ The literal length is stored differently depending on the length
50
+ of the literal:
51
+
52
+ - For literals up to and including 60 bytes in length, the upper
53
+ six bits of the tag byte contain (len-1). The literal follows
54
+ immediately thereafter in the bytestream.
55
+ - For longer literals, the (len-1) value is stored after the tag byte,
56
+ little-endian. The upper six bits of the tag byte describe how
57
+ many bytes are used for the length; 60, 61, 62 or 63 for
58
+ 1-4 bytes, respectively. The literal itself follows after the
59
+ length.
60
+
61
+
62
+ 2.2. Copies
63
+
64
+ Copies are references back into previous decompressed data, telling
65
+ the decompressor to reuse data it has previously decoded.
66
+ They encode two values: The _offset_, saying how many bytes back
67
+ from the current position to read, and the _length_, how many bytes
68
+ to copy. Offsets of zero can be encoded, but are not legal;
69
+ similarly, it is possible to encode backreferences that would
70
+ go past the end of the block (offset > current decompressed position),
71
+ which is also nonsensical and thus not allowed.
72
+
73
+ As in most LZ77-based compressors, the length can be larger than the offset,
74
+ yielding a form of run-length encoding (RLE). For instance,
75
+ "xababab" could be encoded as
76
+
77
+ <literal: "xab"> <copy: offset=2 length=4>
78
+
79
+ Note that since the current Snappy compressor works in 32 kB
80
+ blocks and does not do matching across blocks, it will never produce
81
+ a bitstream with offsets larger than about 32768. However, the
82
+ decompressor should not rely on this, as it may change in the future.
83
+
84
+ There are several different kinds of copy elements, depending on
85
+ the amount of bytes to be copied (length), and how far back the
86
+ data to be copied is (offset).
87
+
88
+
89
+ 2.2.1. Copy with 1-byte offset (01)
90
+
91
+ These elements can encode lengths between [4..11] bytes and offsets
92
+ between [0..2047] bytes. (len-4) occupies three bits and is stored
93
+ in bits [2..4] of the tag byte. The offset occupies 11 bits, of which the
94
+ upper three are stored in the upper three bits ([5..7]) of the tag byte,
95
+ and the lower eight are stored in a byte following the tag byte.
96
+
97
+
98
+ 2.2.2. Copy with 2-byte offset (10)
99
+
100
+ These elements can encode lengths between [1..64] and offsets from
101
+ [0..65535]. (len-1) occupies six bits and is stored in the upper
102
+ six bits ([2..7]) of the tag byte. The offset is stored as a
103
+ little-endian 16-bit integer in the two bytes following the tag byte.
104
+
105
+
106
+ 2.2.3. Copy with 4-byte offset (11)
107
+
108
+ These are like the copies with 2-byte offsets (see previous subsection),
109
+ except that the offset is stored as a 32-bit integer instead of a
110
+ 16-bit integer (and thus will occupy four bytes).
@@ -0,0 +1,135 @@
1
+ Snappy framing format description
2
+ Last revised: 2013-10-25
3
+
4
+ This format decribes a framing format for Snappy, allowing compressing to
5
+ files or streams that can then more easily be decompressed without having
6
+ to hold the entire stream in memory. It also provides data checksums to
7
+ help verify integrity. It does not provide metadata checksums, so it does
8
+ not protect against e.g. all forms of truncations.
9
+
10
+ Implementation of the framing format is optional for Snappy compressors and
11
+ decompressor; it is not part of the Snappy core specification.
12
+
13
+
14
+ 1. General structure
15
+
16
+ The file consists solely of chunks, lying back-to-back with no padding
17
+ in between. Each chunk consists first a single byte of chunk identifier,
18
+ then a three-byte little-endian length of the chunk in bytes (from 0 to
19
+ 16777215, inclusive), and then the data if any. The four bytes of chunk
20
+ header is not counted in the data length.
21
+
22
+ The different chunk types are listed below. The first chunk must always
23
+ be the stream identifier chunk (see section 4.1, below). The stream
24
+ ends when the file ends -- there is no explicit end-of-file marker.
25
+
26
+
27
+ 2. File type identification
28
+
29
+ The following identifiers for this format are recommended where appropriate.
30
+ However, note that none have been registered officially, so this is only to
31
+ be taken as a guideline. We use "Snappy framed" to distinguish between this
32
+ format and raw Snappy data.
33
+
34
+ File extension: .sz
35
+ MIME type: application/x-snappy-framed
36
+ HTTP Content-Encoding: x-snappy-framed
37
+
38
+
39
+ 3. Checksum format
40
+
41
+ Some chunks have data protected by a checksum (the ones that do will say so
42
+ explicitly). The checksums are always masked CRC-32Cs.
43
+
44
+ A description of CRC-32C can be found in RFC 3720, section 12.1, with
45
+ examples in section B.4.
46
+
47
+ Checksums are not stored directly, but masked, as checksumming data and
48
+ then its own checksum can be problematic. The masking is the same as used
49
+ in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
50
+ 0xa282ead8 (using wraparound as normal for unsigned integers). This is
51
+ equivalent to the following C code:
52
+
53
+ uint32_t mask_checksum(uint32_t x) {
54
+ return ((x >> 15) | (x << 17)) + 0xa282ead8;
55
+ }
56
+
57
+ Note that the masking is reversible.
58
+
59
+ The checksum is always stored as a four bytes long integer, in little-endian.
60
+
61
+
62
+ 4. Chunk types
63
+
64
+ The currently supported chunk types are described below. The list may
65
+ be extended in the future.
66
+
67
+
68
+ 4.1. Stream identifier (chunk type 0xff)
69
+
70
+ The stream identifier is always the first element in the stream.
71
+ It is exactly six bytes long and contains "sNaPpY" in ASCII. This means that
72
+ a valid Snappy framed stream always starts with the bytes
73
+
74
+ 0xff 0x06 0x00 0x00 0x73 0x4e 0x61 0x50 0x70 0x59
75
+
76
+ The stream identifier chunk can come multiple times in the stream besides
77
+ the first; if such a chunk shows up, it should simply be ignored, assuming
78
+ it has the right length and contents. This allows for easy concatenation of
79
+ compressed files without the need for re-framing.
80
+
81
+
82
+ 4.2. Compressed data (chunk type 0x00)
83
+
84
+ Compressed data chunks contain a normal Snappy compressed bitstream;
85
+ see the compressed format specification. The compressed data is preceded by
86
+ the CRC-32C (see section 3) of the _uncompressed_ data.
87
+
88
+ Note that the data portion of the chunk, i.e., the compressed contents,
89
+ can be at most 16777211 bytes (2^24 - 1, minus the checksum).
90
+ However, we place an additional restriction that the uncompressed data
91
+ in a chunk must be no longer than 65536 bytes. This allows consumers to
92
+ easily use small fixed-size buffers.
93
+
94
+
95
+ 4.3. Uncompressed data (chunk type 0x01)
96
+
97
+ Uncompressed data chunks allow a compressor to send uncompressed,
98
+ raw data; this is useful if, for instance, uncompressible or
99
+ near-incompressible data is detected, and faster decompression is desired.
100
+
101
+ As in the compressed chunks, the data is preceded by its own masked
102
+ CRC-32C (see section 3).
103
+
104
+ An uncompressed data chunk, like compressed data chunks, should contain
105
+ no more than 65536 data bytes, so the maximum legal chunk length with the
106
+ checksum is 65540.
107
+
108
+
109
+ 4.4. Padding (chunk type 0xfe)
110
+
111
+ Padding chunks allow a compressor to increase the size of the data stream
112
+ so that it complies with external demands, e.g. that the total number of
113
+ bytes is a multiple of some value.
114
+
115
+ All bytes of the padding chunk, except the chunk byte itself and the length,
116
+ should be zero, but decompressors must not try to interpret or verify the
117
+ padding data in any way.
118
+
119
+
120
+ 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f)
121
+
122
+ These are reserved for future expansion. A decoder that sees such a chunk
123
+ should immediately return an error, as it must assume it cannot decode the
124
+ stream correctly.
125
+
126
+ Future versions of this specification may define meanings for these chunks.
127
+
128
+
129
+ 4.6. Reserved skippable chunks (chunk types 0x80-0xfd)
130
+
131
+ These are also reserved for future expansion, but unlike the chunks
132
+ described in 4.5, a decoder seeing these must skip them and continue
133
+ decoding.
134
+
135
+ Future versions of this specification may define meanings for these chunks.
@@ -0,0 +1,90 @@
1
+ // Copyright 2011 Martin Gieseking <martin.gieseking@uos.de>.
2
+ //
3
+ // Redistribution and use in source and binary forms, with or without
4
+ // modification, are permitted provided that the following conditions are
5
+ // met:
6
+ //
7
+ // * Redistributions of source code must retain the above copyright
8
+ // notice, this list of conditions and the following disclaimer.
9
+ // * Redistributions in binary form must reproduce the above
10
+ // copyright notice, this list of conditions and the following disclaimer
11
+ // in the documentation and/or other materials provided with the
12
+ // distribution.
13
+ // * Neither the name of Google Inc. nor the names of its
14
+ // contributors may be used to endorse or promote products derived from
15
+ // this software without specific prior written permission.
16
+ //
17
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+
29
+ #include "snappy.h"
30
+ #include "snappy-c.h"
31
+
32
+ extern "C" {
33
+
34
+ snappy_status snappy_compress(const char* input,
35
+ size_t input_length,
36
+ char* compressed,
37
+ size_t *compressed_length) {
38
+ if (*compressed_length < snappy_max_compressed_length(input_length)) {
39
+ return SNAPPY_BUFFER_TOO_SMALL;
40
+ }
41
+ snappy::RawCompress(input, input_length, compressed, compressed_length);
42
+ return SNAPPY_OK;
43
+ }
44
+
45
+ snappy_status snappy_uncompress(const char* compressed,
46
+ size_t compressed_length,
47
+ char* uncompressed,
48
+ size_t* uncompressed_length) {
49
+ size_t real_uncompressed_length;
50
+ if (!snappy::GetUncompressedLength(compressed,
51
+ compressed_length,
52
+ &real_uncompressed_length)) {
53
+ return SNAPPY_INVALID_INPUT;
54
+ }
55
+ if (*uncompressed_length < real_uncompressed_length) {
56
+ return SNAPPY_BUFFER_TOO_SMALL;
57
+ }
58
+ if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) {
59
+ return SNAPPY_INVALID_INPUT;
60
+ }
61
+ *uncompressed_length = real_uncompressed_length;
62
+ return SNAPPY_OK;
63
+ }
64
+
65
+ size_t snappy_max_compressed_length(size_t source_length) {
66
+ return snappy::MaxCompressedLength(source_length);
67
+ }
68
+
69
+ snappy_status snappy_uncompressed_length(const char *compressed,
70
+ size_t compressed_length,
71
+ size_t *result) {
72
+ if (snappy::GetUncompressedLength(compressed,
73
+ compressed_length,
74
+ result)) {
75
+ return SNAPPY_OK;
76
+ } else {
77
+ return SNAPPY_INVALID_INPUT;
78
+ }
79
+ }
80
+
81
+ snappy_status snappy_validate_compressed_buffer(const char *compressed,
82
+ size_t compressed_length) {
83
+ if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) {
84
+ return SNAPPY_OK;
85
+ } else {
86
+ return SNAPPY_INVALID_INPUT;
87
+ }
88
+ }
89
+
90
+ } // extern "C"