RubyGems - bson - Versions diffs - 4.2.2 → 4.12.1 - Mend

bson 4.2.2 → 4.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

checksums.yaml +5 -5
checksums.yaml.gz.sig +0 -0
data.tar.gz.sig +0 -0
data/README.md +25 -7
data/Rakefile +16 -9
data/ext/bson/{native-endian.h → bson-endian.h} +5 -99
data/ext/bson/bson-native.h +125 -0
data/ext/bson/bytebuf.c +133 -0
data/ext/bson/endian.c +117 -0
data/ext/bson/init.c +355 -0
data/ext/bson/libbson-utf8.c +230 -0
data/ext/bson/read.c +411 -0
data/ext/bson/util.c +95 -0
data/ext/bson/write.c +680 -0
data/lib/bson.rb +6 -3
data/lib/bson/active_support.rb +17 -0
data/lib/bson/array.rb +57 -17
data/lib/bson/binary.rb +185 -13
data/lib/bson/boolean.rb +12 -3
data/lib/bson/code.rb +16 -2
data/lib/bson/code_with_scope.rb +32 -5
data/lib/bson/config.rb +1 -1
data/lib/bson/date.rb +12 -2
data/lib/bson/date_time.rb +2 -2
data/lib/bson/db_pointer.rb +110 -0
data/lib/bson/decimal128.rb +17 -3
data/lib/bson/decimal128/builder.rb +1 -1
data/lib/bson/document.rb +152 -5
data/lib/bson/environment.rb +2 -1
data/lib/bson/error.rb +27 -0
data/lib/bson/ext_json.rb +383 -0
data/lib/bson/false_class.rb +1 -1
data/lib/bson/float.rb +48 -2
data/lib/bson/hash.rb +68 -17
data/lib/bson/int32.rb +52 -13
data/lib/bson/int64.rb +59 -15
data/lib/bson/integer.rb +36 -2
data/lib/bson/json.rb +1 -1
data/lib/bson/max_key.rb +13 -1
data/lib/bson/min_key.rb +13 -1
data/lib/bson/nil_class.rb +4 -2
data/lib/bson/object.rb +28 -1
data/lib/bson/object_id.rb +16 -2
data/lib/bson/open_struct.rb +1 -1
data/lib/bson/regexp.rb +27 -4
data/lib/bson/registry.rb +3 -3
data/lib/bson/specialized.rb +4 -2
data/lib/bson/string.rb +5 -3
data/lib/bson/symbol.rb +99 -7
data/lib/bson/time.rb +63 -4
data/lib/bson/time_with_zone.rb +54 -0
data/lib/bson/timestamp.rb +44 -6
data/lib/bson/true_class.rb +1 -1
data/lib/bson/undefined.rb +12 -1
data/lib/bson/version.rb +2 -2
data/spec/bson/array_spec.rb +18 -1
data/spec/bson/binary_spec.rb +100 -3
data/spec/bson/binary_uuid_spec.rb +189 -0
data/spec/bson/boolean_spec.rb +1 -1
data/spec/bson/byte_buffer_read_spec.rb +197 -0
data/spec/bson/byte_buffer_spec.rb +121 -381
data/spec/bson/byte_buffer_write_spec.rb +854 -0
data/spec/bson/code_spec.rb +1 -1
data/spec/bson/code_with_scope_spec.rb +1 -1
data/spec/bson/date_spec.rb +1 -1
data/spec/bson/date_time_spec.rb +54 -1
data/spec/bson/decimal128_spec.rb +35 -35
data/spec/bson/document_as_spec.rb +46 -0
data/spec/bson/document_spec.rb +197 -30
data/spec/bson/ext_json_parse_spec.rb +308 -0
data/spec/bson/false_class_spec.rb +1 -1
data/spec/bson/float_spec.rb +37 -1
data/spec/bson/hash_as_spec.rb +57 -0
data/spec/bson/hash_spec.rb +209 -1
data/spec/bson/int32_spec.rb +180 -6
data/spec/bson/int64_spec.rb +199 -6
data/spec/bson/integer_spec.rb +29 -3
data/spec/bson/json_spec.rb +1 -1
data/spec/bson/max_key_spec.rb +1 -1
data/spec/bson/min_key_spec.rb +1 -1
data/spec/bson/nil_class_spec.rb +1 -1
data/spec/bson/object_id_spec.rb +1 -1
data/spec/bson/object_spec.rb +1 -1
data/spec/bson/open_struct_spec.rb +1 -1
data/spec/bson/raw_spec.rb +34 -2
data/spec/bson/regexp_spec.rb +1 -1
data/spec/bson/registry_spec.rb +1 -1
data/spec/bson/string_spec.rb +19 -1
data/spec/bson/symbol_raw_spec.rb +45 -0
data/spec/bson/symbol_spec.rb +63 -3
data/spec/bson/time_spec.rb +205 -2
data/spec/bson/time_with_zone_spec.rb +68 -0
data/spec/bson/timestamp_spec.rb +56 -1
data/spec/bson/true_class_spec.rb +1 -1
data/spec/bson/undefined_spec.rb +1 -1
data/spec/bson_spec.rb +1 -1
data/spec/{support → runners}/common_driver.rb +1 -1
data/spec/runners/corpus.rb +185 -0
data/spec/{support/corpus.rb → runners/corpus_legacy.rb} +41 -59
data/spec/spec_helper.rb +40 -3
data/spec/{bson/driver_bson_spec.rb → spec_tests/common_driver_spec.rb} +1 -0
data/spec/{bson/corpus_spec.rb → spec_tests/corpus_legacy_spec.rb} +10 -7
data/spec/spec_tests/corpus_spec.rb +124 -0
data/spec/spec_tests/data/corpus/README.md +15 -0
data/spec/spec_tests/data/corpus/array.json +49 -0
data/spec/spec_tests/data/corpus/binary.json +113 -0
data/spec/spec_tests/data/corpus/boolean.json +27 -0
data/spec/spec_tests/data/corpus/code.json +67 -0
data/spec/spec_tests/data/corpus/code_w_scope.json +78 -0
data/spec/spec_tests/data/corpus/datetime.json +42 -0
data/spec/spec_tests/data/corpus/dbpointer.json +56 -0
data/spec/spec_tests/data/corpus/dbref.json +31 -0
data/spec/spec_tests/data/corpus/decimal128-1.json +317 -0
data/spec/spec_tests/data/corpus/decimal128-2.json +793 -0
data/spec/spec_tests/data/corpus/decimal128-3.json +1771 -0
data/spec/spec_tests/data/corpus/decimal128-4.json +117 -0
data/spec/spec_tests/data/corpus/decimal128-5.json +402 -0
data/spec/spec_tests/data/corpus/decimal128-6.json +119 -0
data/spec/spec_tests/data/corpus/decimal128-7.json +323 -0
data/spec/spec_tests/data/corpus/document.json +36 -0
data/spec/spec_tests/data/corpus/double.json +87 -0
data/spec/spec_tests/data/corpus/int32.json +43 -0
data/spec/spec_tests/data/corpus/int64.json +43 -0
data/spec/spec_tests/data/corpus/maxkey.json +12 -0
data/spec/spec_tests/data/corpus/minkey.json +12 -0
data/spec/spec_tests/data/corpus/multi-type-deprecated.json +15 -0
data/spec/spec_tests/data/corpus/multi-type.json +11 -0
data/spec/spec_tests/data/corpus/null.json +12 -0
data/spec/spec_tests/data/corpus/oid.json +28 -0
data/spec/spec_tests/data/corpus/regex.json +65 -0
data/spec/spec_tests/data/corpus/string.json +72 -0
data/spec/spec_tests/data/corpus/symbol.json +80 -0
data/spec/spec_tests/data/corpus/timestamp.json +34 -0
data/spec/spec_tests/data/corpus/top.json +236 -0
data/spec/spec_tests/data/corpus/undefined.json +15 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/array.json +8 -2
data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/binary.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/boolean.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code_w_scope.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/document.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/double.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/datetime.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/dbpointer.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/int64.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/symbol.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/int32.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/maxkey.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/minkey.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/null.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/oid.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/regex.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/string.json +0 -0
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/timestamp.json +1 -1
data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/top.json +0 -0
data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/undefined.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-1.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-2.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-3.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-4.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-5.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-6.json +0 -0
data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-7.json +0 -0
data/spec/support/shared_examples.rb +3 -5
data/spec/support/spec_config.rb +16 -0
data/spec/support/utils.rb +10 -0
metadata +227 -124
metadata.gz.sig +0 -0
data/ext/bson/bson_native.c +0 -762

data/ext/bson/libbson-utf8.c ADDED Viewed

@@ -0,0 +1,230 @@
+#include <ruby.h>
+#include <ruby/encoding.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <assert.h>
+#include "bson-native.h"
+/**
+ * Taken from libbson.
+ */
+#define BSON_ASSERT assert
+#define BSON_INLINE
+/*
+ *--------------------------------------------------------------------------
+ *
+ * _bson_utf8_get_sequence --
+ *
+ *       Determine the sequence length of the first UTF-8 character in
+ *       @utf8. The sequence length is stored in @seq_length and the mask
+ *       for the first character is stored in @first_mask.
+ *
+ * Returns:
+ *       None.
+ *
+ * Side effects:
+ *       @seq_length is set.
+ *       @first_mask is set.
+ *
+ *--------------------------------------------------------------------------
+ */
+static BSON_INLINE void
+_bson_utf8_get_sequence (const char *utf8,    /* IN */
+                         uint8_t *seq_length, /* OUT */
+                         uint8_t *first_mask) /* OUT */
+{
+   unsigned char c = *(const unsigned char *) utf8;
+   uint8_t m;
+   uint8_t n;
+   /*
+    * See the following[1] for a description of what the given multi-byte
+    * sequences will be based on the bits set of the first byte. We also need
+    * to mask the first byte based on that.  All subsequent bytes are masked
+    * against 0x3F.
+    *
+    * [1] http://www.joelonsoftware.com/articles/Unicode.html
+    */
+   if ((c & 0x80) == 0) {
+      n = 1;
+      m = 0x7F;
+   } else if ((c & 0xE0) == 0xC0) {
+      n = 2;
+      m = 0x1F;
+   } else if ((c & 0xF0) == 0xE0) {
+      n = 3;
+      m = 0x0F;
+   } else if ((c & 0xF8) == 0xF0) {
+      n = 4;
+      m = 0x07;
+   } else {
+      n = 0;
+      m = 0;
+   }
+   *seq_length = n;
+   *first_mask = m;
+}
+/*
+ *--------------------------------------------------------------------------
+ *
+ * bson_utf8_validate --
+ *
+ *       Validates that @utf8 is a valid UTF-8 string. Note that we only
+ *       support UTF-8 characters which have sequence length less than or equal
+ *       to 4 bytes (RFC 3629).
+ *
+ *       If @allow_null is true, then \0 is allowed within @utf8_len bytes
+ *       of @utf8.  Generally, this is bad practice since the main point of
+ *       UTF-8 strings is that they can be used with strlen() and friends.
+ *       However, some languages such as Python can send UTF-8 encoded
+ *       strings with NUL's in them.
+ *
+ * Parameters:
+ *       @utf8: A UTF-8 encoded string.
+ *       @utf8_len: The length of @utf8 in bytes.
+ *       @allow_null: If \0 is allowed within @utf8, exclusing trailing \0.
+ *       @data_type: The data type being serialized.
+ *
+ * Returns:
+ *       true if @utf8 is valid UTF-8. otherwise false.
+ *
+ * Side effects:
+ *       None.
+ *
+ *--------------------------------------------------------------------------
+ */
+void
+rb_bson_utf8_validate (const char *utf8, /* IN */
+                    size_t utf8_len,  /* IN */
+                    bool allow_null, /* IN */
+                    const char *data_type)  /* IN */
+{
+   uint32_t c;
+   uint8_t first_mask;
+   uint8_t seq_length;
+   unsigned i;
+   unsigned j;
+   bool not_shortest_form;
+   BSON_ASSERT (utf8);
+   for (i = 0; i < utf8_len; i += seq_length) {
+      _bson_utf8_get_sequence (&utf8[i], &seq_length, &first_mask);
+      /*
+       * Ensure we have a valid multi-byte sequence length.
+       */
+      if (!seq_length) {
+         rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: bogus initial bits", data_type, utf8);
+      }
+      /*
+       * Ensure we have enough bytes left.
+       */
+      if ((utf8_len - i) < seq_length) {
+         rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: truncated multi-byte sequence", data_type, utf8);
+      }
+      /*
+       * Also calculate the next char as a unichar so we can
+       * check code ranges for non-shortest form.
+       */
+      c = utf8[i] & first_mask;
+      /*
+       * Check the high-bits for each additional sequence byte.
+       */
+      for (j = i + 1; j < (i + seq_length); j++) {
+         c = (c << 6) | (utf8[j] & 0x3F);
+         if ((utf8[j] & 0xC0) != 0x80) {
+            rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: bogus high bits for continuation byte", data_type, utf8);
+         }
+      }
+      /*
+       * Check for NULL bytes afterwards.
+       *
+       * Hint: if you want to optimize this function, starting here to do
+       * this in the same pass as the data above would probably be a good
+       * idea. You would add a branch into the inner loop, but save possibly
+       * on cache-line bouncing on larger strings. Just a thought.
+       */
+      if (!allow_null) {
+         for (j = 0; j < seq_length; j++) {
+            if (((i + j) > utf8_len) || !utf8[i + j]) {
+               rb_raise(rb_eArgError, "%s %s contains null bytes", data_type, utf8);
+            }
+         }
+      }
+      /*
+       * Code point won't fit in utf-16, not allowed.
+       */
+      if (c > 0x0010FFFF) {
+         rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: code point %"PRIu32" does not fit in UTF-16", data_type, utf8, c);
+      }
+      /*
+       * Byte is in reserved range for UTF-16 high-marks
+       * for surrogate pairs.
+       */
+      if ((c & 0xFFFFF800) == 0xD800) {
+         rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: byte is in surrogate pair reserved range", data_type, utf8);
+      }
+      /*
+       * Check non-shortest form unicode.
+       */
+      not_shortest_form = false;
+      switch (seq_length) {
+      case 1:
+         if (c <= 0x007F) {
+            continue;
+         }
+         not_shortest_form = true;
+      case 2:
+         if ((c >= 0x0080) && (c <= 0x07FF)) {
+            continue;
+         } else if (c == 0) {
+            /* Two-byte representation for NULL. */
+            if (!allow_null) {
+               rb_raise(rb_eArgError, "%s %s contains null bytes", data_type, utf8);
+            }
+            continue;
+         }
+         not_shortest_form = true;
+      case 3:
+         if (((c >= 0x0800) && (c <= 0x0FFF)) ||
+             ((c >= 0x1000) && (c <= 0xFFFF))) {
+            continue;
+         }
+         not_shortest_form = true;
+      case 4:
+         if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
+             ((c >= 0x40000) && (c <= 0xFFFFF)) ||
+             ((c >= 0x100000) && (c <= 0x10FFFF))) {
+            continue;
+         }
+         not_shortest_form = true;
+      default:
+         not_shortest_form = true;
+      }
+      if (not_shortest_form) {
+        rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: not in shortest form", data_type, utf8);
+      }
+   }
+}

data/ext/bson/read.c ADDED Viewed

@@ -0,0 +1,411 @@
+/*
+ * Copyright (C) 2009-2020 MongoDB Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "bson-native.h"
+#include <ruby/encoding.h>
+static void pvt_raise_decode_error(volatile VALUE msg);
+static int32_t pvt_validate_length(byte_buffer_t *b);
+static uint8_t pvt_get_type_byte(byte_buffer_t *b);
+static VALUE pvt_get_int32(byte_buffer_t *b);
+static VALUE pvt_get_uint32(byte_buffer_t *b);
+static VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv);
+static VALUE pvt_get_double(byte_buffer_t *b);
+static VALUE pvt_get_string(byte_buffer_t *b, const char *data_type);
+static VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv);
+static VALUE pvt_get_boolean(byte_buffer_t *b);
+static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv);
+static void pvt_skip_cstring(byte_buffer_t *b);
+void pvt_raise_decode_error(volatile VALUE msg) {
+  VALUE klass = pvt_const_get_3("BSON", "Error", "BSONDecodeError");
+  rb_exc_raise(rb_exc_new_str(klass, msg));
+}
+/**
+ * validate the buffer contains the amount of bytes the array / hash claimns
+ * and that it is null terminated
+ */
+int32_t pvt_validate_length(byte_buffer_t *b)
+{
+  int32_t length;
+  ENSURE_BSON_READ(b, 4);
+  memcpy(&length, READ_PTR(b), 4);
+  length = BSON_UINT32_TO_LE(length);
+  /* minimum valid length is 4 (byte count) + 1 (terminating byte) */
+  if(length >= 5){
+    ENSURE_BSON_READ(b, length);
+    /* The last byte should be a null byte: it should be at length - 1 */
+    if( *(READ_PTR(b) + length - 1) != 0 ){
+      rb_raise(rb_eRangeError, "Buffer should have contained null terminator at %zu but contained %d", b->read_position + (size_t)length, (int)*(READ_PTR(b) + length));
+    }
+    b->read_position += 4;
+  }
+  else{
+    rb_raise(rb_eRangeError, "Buffer contained invalid length %d at %zu", length, b->read_position);
+  }
+  return length;
+}
+/**
+ * Read a single field from a hash or array
+ */
+VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv)
+{
+  switch(type) {
+    case BSON_TYPE_INT32: return pvt_get_int32(b);
+    case BSON_TYPE_INT64: return pvt_get_int64(b, argc, argv);
+    case BSON_TYPE_DOUBLE: return pvt_get_double(b);
+    case BSON_TYPE_STRING: return pvt_get_string(b, "String");
+    case BSON_TYPE_SYMBOL: return pvt_get_symbol(b, rb_buffer, argc, argv);
+    case BSON_TYPE_ARRAY: return rb_bson_byte_buffer_get_array(argc, argv, rb_buffer);
+    case BSON_TYPE_DOCUMENT: return rb_bson_byte_buffer_get_hash(argc, argv, rb_buffer);
+    case BSON_TYPE_BOOLEAN: return pvt_get_boolean(b);
+    default:
+    {
+      VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(type));
+      VALUE value = rb_funcall(klass, rb_intern("from_bson"),1, rb_buffer);
+      RB_GC_GUARD(klass);
+      return value;
+    }
+  }
+}
+/**
+ * Get a single byte from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_byte(VALUE self)
+{
+  byte_buffer_t *b;
+  VALUE byte;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  ENSURE_BSON_READ(b, 1);
+  byte = rb_str_new(READ_PTR(b), 1);
+  b->read_position += 1;
+  return byte;
+}
+uint8_t pvt_get_type_byte(byte_buffer_t *b){
+  int8_t byte;
+  ENSURE_BSON_READ(b, 1);
+  byte = *READ_PTR(b);
+  b->read_position += 1;
+  return (uint8_t)byte;
+}
+/**
+ * Get bytes from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
+{
+  byte_buffer_t *b;
+  VALUE bytes;
+  const uint32_t length = FIX2LONG(i);
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  ENSURE_BSON_READ(b, length);
+  bytes = rb_str_new(READ_PTR(b), length);
+  b->read_position += length;
+  return bytes;
+}
+VALUE pvt_get_boolean(byte_buffer_t *b){
+  VALUE result;
+  char byte_value;
+  ENSURE_BSON_READ(b, 1);
+  byte_value = *READ_PTR(b);
+  switch (byte_value) {
+    case 1:
+      result = Qtrue;
+      break;
+    case 0:
+      result = Qfalse;
+      break;
+    default:
+      pvt_raise_decode_error(rb_sprintf("Invalid boolean byte value: %d", (int) byte_value));
+  }
+  b->read_position += 1;
+  return result;
+}
+/**
+ * Get a string from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_string(VALUE self)
+{
+  byte_buffer_t *b;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  return pvt_get_string(b, "String");
+}
+VALUE pvt_get_string(byte_buffer_t *b, const char *data_type)
+{
+  int32_t length_le;
+  int32_t length;
+  char *str_ptr;
+  VALUE string;
+  unsigned char last_byte;
+  ENSURE_BSON_READ(b, 4);
+  memcpy(&length_le, READ_PTR(b), 4);
+  length = BSON_UINT32_FROM_LE(length_le);
+  if (length < 0) {
+    pvt_raise_decode_error(rb_sprintf("String length is negative: %d", length));
+  }
+  if (length == 0) {
+    pvt_raise_decode_error(rb_str_new_cstr("String length is zero but string must be null-terminated"));
+  }
+  ENSURE_BSON_READ(b, 4 + length);
+  str_ptr = READ_PTR(b) + 4;
+  last_byte = *(READ_PTR(b) + 4 + length - 1);
+  if (last_byte != 0) {
+    pvt_raise_decode_error(rb_sprintf("Last byte of the string is not null: 0x%x", (int) last_byte));
+  }
+  rb_bson_utf8_validate(str_ptr, length - 1, true, data_type);
+  string = rb_enc_str_new(str_ptr, length - 1, rb_utf8_encoding());
+  b->read_position += 4 + length;
+  return string;
+}
+/**
+ * Reads a UTF-8 string out of the byte buffer. If the argc/argv arguments
+ * have a :mode option with the value of :bson, wraps the string in a
+ * BSON::Symbol::Raw. Otherwise consults the BSON registry to determine
+ * which class to instantiate (String in bson-ruby, overridden to Symbol by
+ * the Ruby driver). Returns either a BSON::Symbol::Raw, Symbol or String
+ * value.
+ */
+VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv)
+{
+  VALUE value, klass;
+  if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
+    value = pvt_get_string(b, "Symbol");
+    klass = pvt_const_get_3("BSON", "Symbol", "Raw");
+    value = rb_funcall(klass, rb_intern("new"), 1, value);
+  } else {
+    klass = rb_funcall(rb_bson_registry, rb_intern("get"), 1, INT2FIX(BSON_TYPE_SYMBOL));
+    value = rb_funcall(klass, rb_intern("from_bson"), 1, rb_buffer);
+  }
+  RB_GC_GUARD(klass);
+  return value;
+}
+/**
+ * Get a cstring from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
+{
+  byte_buffer_t *b;
+  VALUE string;
+  int length;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  length = (int)strlen(READ_PTR(b));
+  ENSURE_BSON_READ(b, length);
+  string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
+  b->read_position += length + 1;
+  return string;
+}
+/**
+ * Reads but does not return a cstring from the buffer.
+ */
+void pvt_skip_cstring(byte_buffer_t *b)
+{
+  int length;
+  length = (int)strlen(READ_PTR(b));
+  ENSURE_BSON_READ(b, length);
+  b->read_position += length + 1;
+}
+/**
+ * Get a int32 from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_int32(VALUE self)
+{
+  byte_buffer_t *b;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  return pvt_get_int32(b);
+}
+VALUE pvt_get_int32(byte_buffer_t *b)
+{
+  int32_t i32;
+  ENSURE_BSON_READ(b, 4);
+  memcpy(&i32, READ_PTR(b), 4);
+  b->read_position += 4;
+  return INT2NUM(BSON_UINT32_FROM_LE(i32));
+}
+/**
+ * Get an unsigned int32 from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_uint32(VALUE self)
+{
+  byte_buffer_t *b;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  return pvt_get_uint32(b);
+}
+VALUE pvt_get_uint32(byte_buffer_t *b)
+{
+  uint32_t i32;
+  ENSURE_BSON_READ(b, 4);
+  memcpy(&i32, READ_PTR(b), 4);
+  b->read_position += 4;
+  return UINT2NUM(BSON_UINT32_FROM_LE(i32));
+}
+/**
+ * Get a int64 from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_int64(VALUE self)
+{
+  byte_buffer_t *b;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  return pvt_get_int64(b, 0, NULL);
+}
+/**
+ * Reads a 64-bit integer out of the byte buffer into a Ruby Integer instance.
+ * If the argc/argv arguments have a :mode option with the value of :bson,
+ * wraps the integer in a BSON::Int64. Returns either the Integer or the
+ * BSON::Int64 instance.
+ */
+VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv)
+{
+  int64_t i64;
+  VALUE num;
+  ENSURE_BSON_READ(b, 8);
+  memcpy(&i64, READ_PTR(b), 8);
+  b->read_position += 8;
+  num = LL2NUM(BSON_UINT64_FROM_LE(i64));
+  if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
+    VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(BSON_TYPE_INT64));
+    VALUE value = rb_funcall(klass, rb_intern("new"), 1, num);
+    RB_GC_GUARD(klass);
+    return value;
+  } else {
+    return num;
+  }
+  RB_GC_GUARD(num);
+}
+/**
+ * Get a double from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_double(VALUE self)
+{
+  byte_buffer_t *b;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  return pvt_get_double(b);
+}
+VALUE pvt_get_double(byte_buffer_t *b)
+{
+  double d;
+  ENSURE_BSON_READ(b, 8);
+  memcpy(&d, READ_PTR(b), 8);
+  b->read_position += 8;
+  return DBL2NUM(BSON_DOUBLE_FROM_LE(d));
+}
+/**
+ * Get the 16 bytes representing the decimal128 from the buffer.
+ */
+VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self)
+{
+  byte_buffer_t *b;
+  VALUE bytes;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  ENSURE_BSON_READ(b, 16);
+  bytes = rb_str_new(READ_PTR(b), 16);
+  b->read_position += 16;
+  return bytes;
+}
+VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){
+  VALUE doc = Qnil;
+  byte_buffer_t *b = NULL;
+  uint8_t type;
+  VALUE cDocument = pvt_const_get_2("BSON", "Document");
+  int32_t length;
+  char *start_ptr;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  start_ptr = READ_PTR(b);
+  length = pvt_validate_length(b);
+  doc = rb_funcall(cDocument, rb_intern("allocate"), 0);
+  while((type = pvt_get_type_byte(b)) != 0){
+    VALUE field = rb_bson_byte_buffer_get_cstring(self);
+    rb_hash_aset(doc, field, pvt_read_field(b, self, type, argc, argv));
+    RB_GC_GUARD(field);
+  }
+  if (READ_PTR(b) - start_ptr != length) {
+    pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
+  }
+  return doc;
+}
+VALUE rb_bson_byte_buffer_get_array(int argc, VALUE *argv, VALUE self){
+  byte_buffer_t *b;
+  VALUE array = Qnil;
+  uint8_t type;
+  int32_t length;
+  char *start_ptr;
+  TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
+  start_ptr = READ_PTR(b);
+  length = pvt_validate_length(b);
+  array = rb_ary_new();
+  while((type = pvt_get_type_byte(b)) != 0){
+    pvt_skip_cstring(b);
+    rb_ary_push(array,  pvt_read_field(b, self, type, argc, argv));
+  }
+  RB_GC_GUARD(array);
+  if (READ_PTR(b) - start_ptr != length) {
+    pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
+  }
+  return array;
+}