bson 3.2.7 → 4.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/Rakefile +2 -10
  5. data/ext/bson/native-endian.h +120 -0
  6. data/ext/bson/native.c +547 -581
  7. data/lib/bson.rb +0 -1
  8. data/lib/bson/array.rb +15 -14
  9. data/lib/bson/binary.rb +13 -13
  10. data/lib/bson/boolean.rb +3 -3
  11. data/lib/bson/code.rb +5 -8
  12. data/lib/bson/code_with_scope.rb +10 -13
  13. data/lib/bson/date.rb +2 -2
  14. data/lib/bson/date_time.rb +2 -2
  15. data/lib/bson/document.rb +33 -0
  16. data/lib/bson/false_class.rb +2 -2
  17. data/lib/bson/float.rb +5 -11
  18. data/lib/bson/hash.rb +15 -14
  19. data/lib/bson/int32.rb +8 -9
  20. data/lib/bson/int64.rb +3 -9
  21. data/lib/bson/integer.rb +6 -20
  22. data/lib/bson/nil_class.rb +4 -16
  23. data/lib/bson/object.rb +1 -1
  24. data/lib/bson/object_id.rb +14 -16
  25. data/lib/bson/regexp.rb +7 -7
  26. data/lib/bson/specialized.rb +6 -6
  27. data/lib/bson/string.rb +7 -91
  28. data/lib/bson/symbol.rb +8 -7
  29. data/lib/bson/time.rb +5 -5
  30. data/lib/bson/timestamp.rb +8 -6
  31. data/lib/bson/true_class.rb +2 -2
  32. data/lib/bson/undefined.rb +1 -26
  33. data/lib/bson/version.rb +1 -1
  34. data/spec/bson/array_spec.rb +1 -1
  35. data/spec/bson/byte_buffer_spec.rb +445 -0
  36. data/spec/bson/code_with_scope_spec.rb +3 -7
  37. data/spec/bson/document_spec.rb +66 -10
  38. data/spec/bson/hash_spec.rb +5 -5
  39. data/spec/bson/int32_spec.rb +7 -5
  40. data/spec/bson/integer_spec.rb +1 -6
  41. data/spec/bson/object_id_spec.rb +2 -39
  42. data/spec/bson/regexp_spec.rb +1 -1
  43. data/spec/bson/string_spec.rb +2 -204
  44. data/spec/bson/symbol_spec.rb +2 -17
  45. data/spec/support/shared_examples.rb +3 -26
  46. metadata +13 -11
  47. metadata.gz.sig +0 -0
  48. data/lib/bson/encodable.rb +0 -86
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51775b047f4c1019139fc14f1a387e4ca52e1d00
4
- data.tar.gz: 11aebbe7f66db816c1f37aa2ac65a7a0417bc405
3
+ metadata.gz: 30f969161b2003e31eff164aeece7642d94d1569
4
+ data.tar.gz: a6d728c087dcc9aed56d9a272ded92d1cdf661c6
5
5
  SHA512:
6
- metadata.gz: 1583872b9ca51739634993deb90b709f009a717bd4744927d19eb37312e5440ecd98d9446a1c3f3ac6cddca2b2f99ea29a65e67d48a7031e30add88ba6d87e31
7
- data.tar.gz: dbc588428eb14d506bb70fa86c6522f333cd7f8c8684c127d4950fd50e2b4426f5cefe6d103e53c4da152074fc55ecb17b119a874e807cc97cc09581bbf7efae
6
+ metadata.gz: 4b5ec98b56e79843b4351b2a001852c83b5f8ed360096fee11f8579a2d78acde5c57f0f77d05b6d894829ace638c6f754eb9b79640fd0059730c32522c132c34
7
+ data.tar.gz: b4456f8ea0924747e9a253e63aaeb8972267964b249656603f765a8b0f61b6832ea3f12d1dd6ac0d0c2a6a5fbac03b98ea736e9d9cefc261ac364590fce60d50
Binary file
data.tar.gz.sig CHANGED
Binary file
data/Rakefile CHANGED
@@ -49,7 +49,6 @@ end
49
49
 
50
50
  require_relative "perf/bench"
51
51
 
52
- RSpec::Core::RakeTask.new(:spec)
53
52
  RSpec::Core::RakeTask.new(:rspec)
54
53
 
55
54
  if jruby?
@@ -74,8 +73,7 @@ task :clean_all => :clean do
74
73
  end
75
74
  end
76
75
 
77
- task :ext_spec => :compile do
78
- ENV["WITH_EXT"] = "C"
76
+ task :spec => :compile do
79
77
  Rake::Task["rspec"].invoke
80
78
  end
81
79
 
@@ -110,12 +108,6 @@ namespace :benchmark do
110
108
  require "bson"
111
109
  benchmark!
112
110
  end
113
-
114
- task :profile => :compile do
115
- puts "Profiling with native extensions..."
116
- require "bson"
117
- profile!
118
- end
119
111
  end
120
112
 
121
- task :default => [ :clean_all, :spec, :ext_spec ]
113
+ task :default => [ :clean_all, :spec ]
@@ -0,0 +1,120 @@
1
+ // "License": Public Domain
2
+ // I, Mathias PanzenbГ¶ck, place this file hereby into the public domain. Use it at your own risk for whatever you like.
3
+ // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to
4
+ // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it
5
+ // an example on how to get the endian conversion functions on different platforms.
6
+
7
+ #ifndef PORTABLE_ENDIAN_H__
8
+ #define PORTABLE_ENDIAN_H__
9
+
10
+ #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
11
+
12
+ # define __WINDOWS__
13
+ # include <winsock2.h>
14
+ #else
15
+ # include <arpa/inet.h>
16
+ # include <sys/types.h>
17
+ #endif
18
+
19
+ #if defined(__linux__) || defined(__CYGWIN__)
20
+
21
+ # include <endian.h>
22
+
23
+ #elif defined(__APPLE__)
24
+
25
+ # include <libkern/OSByteOrder.h>
26
+
27
+ # define htobe16(x) OSSwapHostToBigInt16(x)
28
+ # define htole16(x) OSSwapHostToLittleInt16(x)
29
+ # define be16toh(x) OSSwapBigToHostInt16(x)
30
+ # define le16toh(x) OSSwapLittleToHostInt16(x)
31
+
32
+ # define htobe32(x) OSSwapHostToBigInt32(x)
33
+ # define htole32(x) OSSwapHostToLittleInt32(x)
34
+ # define be32toh(x) OSSwapBigToHostInt32(x)
35
+ # define le32toh(x) OSSwapLittleToHostInt32(x)
36
+
37
+ # define htobe64(x) OSSwapHostToBigInt64(x)
38
+ # define htole64(x) OSSwapHostToLittleInt64(x)
39
+ # define be64toh(x) OSSwapBigToHostInt64(x)
40
+ # define le64toh(x) OSSwapLittleToHostInt64(x)
41
+
42
+ # define __BYTE_ORDER BYTE_ORDER
43
+ # define __BIG_ENDIAN BIG_ENDIAN
44
+ # define __LITTLE_ENDIAN LITTLE_ENDIAN
45
+ # define __PDP_ENDIAN PDP_ENDIAN
46
+
47
+ #elif defined(__OpenBSD__)
48
+
49
+ # include <sys/endian.h>
50
+
51
+ #elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
52
+
53
+ # include <sys/endian.h>
54
+
55
+ # define be16toh(x) betoh16(x)
56
+ # define le16toh(x) letoh16(x)
57
+
58
+ # define be32toh(x) betoh32(x)
59
+ # define le32toh(x) letoh32(x)
60
+
61
+ # define be64toh(x) betoh64(x)
62
+ # define le64toh(x) letoh64(x)
63
+
64
+ #elif defined(__WINDOWS__)
65
+
66
+ # include <sys/param.h>
67
+
68
+ # if BYTE_ORDER == LITTLE_ENDIAN
69
+
70
+ # define htobe16(x) htons(x)
71
+ # define htole16(x) (x)
72
+ # define be16toh(x) ntohs(x)
73
+ # define le16toh(x) (x)
74
+
75
+ # define htobe32(x) htonl(x)
76
+ # define htole32(x) (x)
77
+ # define be32toh(x) ntohl(x)
78
+ # define le32toh(x) (x)
79
+
80
+ # define htobe64(x) htonll(x)
81
+ # define htole64(x) (x)
82
+ # define be64toh(x) ntohll(x)
83
+ # define le64toh(x) (x)
84
+
85
+ # elif BYTE_ORDER == BIG_ENDIAN
86
+
87
+ /* that would be xbox 360 */
88
+ # define htobe16(x) (x)
89
+ # define htole16(x) __builtin_bswap16(x)
90
+ # define be16toh(x) (x)
91
+ # define le16toh(x) __builtin_bswap16(x)
92
+
93
+ # define htobe32(x) (x)
94
+ # define htole32(x) __builtin_bswap32(x)
95
+ # define be32toh(x) (x)
96
+ # define le32toh(x) __builtin_bswap32(x)
97
+
98
+ # define htobe64(x) (x)
99
+ # define htole64(x) __builtin_bswap64(x)
100
+ # define be64toh(x) (x)
101
+ # define le64toh(x) __builtin_bswap64(x)
102
+
103
+ # else
104
+
105
+ # error byte order not supported
106
+
107
+ # endif
108
+
109
+ # define __BYTE_ORDER BYTE_ORDER
110
+ # define __BIG_ENDIAN BIG_ENDIAN
111
+ # define __LITTLE_ENDIAN LITTLE_ENDIAN
112
+ # define __PDP_ENDIAN PDP_ENDIAN
113
+
114
+ #else
115
+
116
+ # error platform not supported
117
+
118
+ #endif
119
+
120
+ #endif
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (C) 2009-2013 MongoDB Inc.
2
+ * Copyright (C) 2009-2015 MongoDB Inc.
3
3
  *
4
4
  * Licensed under the Apache License, Version 2.0 (the "License");
5
5
  * you may not use this file except in compliance with the License.
@@ -13,733 +13,699 @@
13
13
  * See the License for the specific language governing permissions and
14
14
  * limitations under the License.
15
15
  */
16
- #ifdef _WIN32
17
- #include <winsock2.h>
18
- #else
19
- #include <arpa/inet.h>
20
- #include <sys/types.h>
21
- #endif
22
-
23
- #include <stdint.h>
16
+ #include <ruby.h>
17
+ #include <ruby/encoding.h>
18
+ #include <stdbool.h>
24
19
  #include <time.h>
25
20
  #include <unistd.h>
26
- #include <ruby.h>
27
-
28
- /**
29
- * For 64 byte systems we convert to longs, for 32 byte systems we convert
30
- * to a long long.
31
- *
32
- * @since 2.0.0
33
- */
34
- #if SIZEOF_LONG == 8
35
- #define NUM2INT64(v) NUM2LONG(v)
36
- #define INT642NUM(v) LONG2NUM(v)
37
- #else
38
- #define NUM2INT64(v) NUM2LL(v)
39
- #define INT642NUM(v) LL2NUM(v)
40
- #endif
21
+ #include "native-endian.h"
41
22
 
42
- /**
43
- * Ruby 1.8.7 does not define DBL2NUM, so we define it if it's not there.
44
- *
45
- * @since 2.0.0
46
- */
47
- #ifndef DBL2NUM
48
- #define DBL2NUM(dbl) rb_float_new(dbl)
49
- #endif
23
+ #define BSON_BYTE_BUFFER_SIZE 1024
50
24
 
51
- /**
52
- * Define the max hostname hash length constant if nonexistant.
53
- *
54
- * @since 3.2.0
55
- */
56
25
  #ifndef HOST_NAME_HASH_MAX
57
26
  #define HOST_NAME_HASH_MAX 256
58
27
  #endif
59
28
 
60
- /**
61
- * Define index sizes for array serialization.
62
- *
63
- * @since 2.0.0
64
- */
65
- #define BSON_INDEX_SIZE 1024
66
- #define BSON_INDEX_CHAR_SIZE 5
67
- #define INTEGER_CHAR_SIZE 22
29
+ typedef struct {
30
+ size_t size;
31
+ size_t write_position;
32
+ size_t read_position;
33
+ char buffer[BSON_BYTE_BUFFER_SIZE];
34
+ char *b_ptr;
35
+ } byte_buffer_t;
36
+
37
+ #define READ_PTR(byte_buffer_ptr) \
38
+ (byte_buffer_ptr->b_ptr + byte_buffer_ptr->read_position)
39
+
40
+ #define READ_SIZE(byte_buffer_ptr) \
41
+ (byte_buffer_ptr->write_position - byte_buffer_ptr->read_position)
42
+
43
+ #define WRITE_PTR(byte_buffer_ptr) \
44
+ (byte_buffer_ptr->b_ptr + byte_buffer_ptr->write_position)
45
+
46
+ #define ENSURE_BSON_WRITE(buffer_ptr, length) \
47
+ { if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); }
48
+
49
+ #define ENSURE_BSON_READ(buffer_ptr, length) \
50
+ { if (buffer_ptr->read_position + length > buffer_ptr->write_position) \
51
+ rb_raise(rb_eRangeError, "Attempted to read %zu bytes, but only %zu bytes remain", (size_t)length, READ_SIZE(buffer_ptr)); }
52
+
53
+ static VALUE rb_bson_byte_buffer_allocate(VALUE klass);
54
+ static VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self);
55
+ static VALUE rb_bson_byte_buffer_length(VALUE self);
56
+ static VALUE rb_bson_byte_buffer_get_byte(VALUE self);
57
+ static VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i);
58
+ static VALUE rb_bson_byte_buffer_get_cstring(VALUE self);
59
+ static VALUE rb_bson_byte_buffer_get_double(VALUE self);
60
+ static VALUE rb_bson_byte_buffer_get_int32(VALUE self);
61
+ static VALUE rb_bson_byte_buffer_get_int64(VALUE self);
62
+ static VALUE rb_bson_byte_buffer_get_string(VALUE self);
63
+ static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte);
64
+ static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes);
65
+ static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string);
66
+ static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f);
67
+ static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i);
68
+ static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i);
69
+ static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string);
70
+ static VALUE rb_bson_byte_buffer_read_position(VALUE self);
71
+ static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i);
72
+ static VALUE rb_bson_byte_buffer_write_position(VALUE self);
73
+ static VALUE rb_bson_byte_buffer_to_s(VALUE self);
74
+ static VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self);
75
+
76
+ static size_t rb_bson_byte_buffer_memsize(const void *ptr);
77
+ static void rb_bson_byte_buffer_free(void *ptr);
78
+ static void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length);
79
+ static void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id);
80
+ static bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null);
81
+
82
+ static const rb_data_type_t rb_byte_buffer_data_type = {
83
+ "bson/byte_buffer",
84
+ { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize }
85
+ };
68
86
 
69
87
  /**
70
- * Constant for the intetger array indexes.
71
- *
72
- * @since 2.0.0
88
+ * Holds the machine id hash for object id generation.
73
89
  */
74
- static char rb_bson_array_indexes[BSON_INDEX_SIZE][BSON_INDEX_CHAR_SIZE];
90
+ static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX];
75
91
 
76
92
  /**
77
- * BSON::UTF8
78
- *
79
- * @since 2.0.0
93
+ * The counter for incrementing object ids.
80
94
  */
81
- static VALUE rb_bson_utf8_string;
95
+ static unsigned int rb_bson_object_id_counter = 0;
82
96
 
83
97
  /**
84
- * Set the UTC string method for reference at load.
85
- *
86
- * @since 2.0.0
98
+ * Initialize the native extension.
87
99
  */
88
- static VALUE rb_utc_method;
100
+ void Init_native()
101
+ {
102
+ char rb_bson_machine_id[256];
89
103
 
90
- #include <ruby/encoding.h>
104
+ VALUE rb_bson_module = rb_define_module("BSON");
105
+ VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject);
106
+ VALUE rb_bson_object_id_class = rb_const_get(rb_bson_module, rb_intern("ObjectId"));
107
+ VALUE rb_bson_object_id_generator_class = rb_const_get(rb_bson_object_id_class, rb_intern("Generator"));
108
+ VALUE rb_digest_class = rb_const_get(rb_cObject, rb_intern("Digest"));
109
+ VALUE rb_md5_class = rb_const_get(rb_digest_class, rb_intern("MD5"));
110
+
111
+ rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate);
112
+ rb_define_method(rb_byte_buffer_class, "initialize", rb_bson_byte_buffer_initialize, -1);
113
+ rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0);
114
+ rb_define_method(rb_byte_buffer_class, "get_byte", rb_bson_byte_buffer_get_byte, 0);
115
+ rb_define_method(rb_byte_buffer_class, "get_bytes", rb_bson_byte_buffer_get_bytes, 1);
116
+ rb_define_method(rb_byte_buffer_class, "get_cstring", rb_bson_byte_buffer_get_cstring, 0);
117
+ rb_define_method(rb_byte_buffer_class, "get_double", rb_bson_byte_buffer_get_double, 0);
118
+ rb_define_method(rb_byte_buffer_class, "get_int32", rb_bson_byte_buffer_get_int32, 0);
119
+ rb_define_method(rb_byte_buffer_class, "get_int64", rb_bson_byte_buffer_get_int64, 0);
120
+ rb_define_method(rb_byte_buffer_class, "get_string", rb_bson_byte_buffer_get_string, 0);
121
+ rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1);
122
+ rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1);
123
+ rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1);
124
+ rb_define_method(rb_byte_buffer_class, "put_double", rb_bson_byte_buffer_put_double, 1);
125
+ rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1);
126
+ rb_define_method(rb_byte_buffer_class, "put_int64", rb_bson_byte_buffer_put_int64, 1);
127
+ rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1);
128
+ rb_define_method(rb_byte_buffer_class, "read_position", rb_bson_byte_buffer_read_position, 0);
129
+ rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2);
130
+ rb_define_method(rb_byte_buffer_class, "write_position", rb_bson_byte_buffer_write_position, 0);
131
+ rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0);
132
+ rb_define_method(rb_bson_object_id_generator_class, "next_object_id", rb_bson_object_id_generator_next, -1);
91
133
 
92
- #if __BYTE_ORDER == __BIG_ENDIAN
93
- typedef union doublebyte
134
+ // Get the object id machine id and hash it.
135
+ rb_require("digest/md5");
136
+ gethostname(rb_bson_machine_id, sizeof(rb_bson_machine_id));
137
+ rb_bson_machine_id[255] = '\0';
138
+ rb_bson_generate_machine_id(rb_md5_class, rb_bson_machine_id);
139
+ }
140
+
141
+ void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id)
94
142
  {
95
- double d;
96
- unsigned char b[sizeof(double)];
97
- } doublebytet;
98
- #endif
143
+ VALUE digest = rb_funcall(rb_md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id));
144
+ memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest));
145
+ }
99
146
 
100
147
  /**
101
- * Convert the binary string to a ruby utf8 string.
102
- *
103
- * @example Convert the string to binary.
104
- * rb_bson_from_bson_string("test");
105
- *
106
- * @param [ String ] string The ruby string.
107
- *
108
- * @return [ String ] The encoded string.
109
- *
110
- * @since 2.0.0
148
+ * Allocates a bson byte buffer that wraps a byte_buffer_t.
111
149
  */
112
- static VALUE rb_bson_from_bson_string(VALUE string)
150
+ VALUE rb_bson_byte_buffer_allocate(VALUE klass)
113
151
  {
114
- return rb_enc_associate(string, rb_utf8_encoding());
152
+ byte_buffer_t *b;
153
+ VALUE obj = TypedData_Make_Struct(klass, byte_buffer_t, &rb_byte_buffer_data_type, b);
154
+ b->b_ptr = b->buffer;
155
+ b->size = BSON_BYTE_BUFFER_SIZE;
156
+ return obj;
115
157
  }
116
158
 
117
159
  /**
118
- * Provide default new string with binary encoding.
119
- *
120
- * @example Check encoded and provide default new binary encoded string.
121
- * if (NIL_P(encoded)) encoded = rb_str_new_encoded_binary();
122
- *
123
- * @return [ String ] The new string with binary encoding.
124
- *
125
- * @since 2.0.0
160
+ * Initialize a byte buffer.
126
161
  */
127
- static VALUE rb_str_new_encoded_binary(void)
162
+ VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self)
128
163
  {
129
- return rb_enc_str_new("", 0, rb_ascii8bit_encoding());
164
+ VALUE bytes;
165
+ rb_scan_args(argc, argv, "01", &bytes);
166
+
167
+ if (!NIL_P(bytes)) {
168
+ rb_bson_byte_buffer_put_bytes(self, bytes);
169
+ }
170
+
171
+ return self;
130
172
  }
131
173
 
132
174
  /**
133
- * Constant for a null byte.
134
- *
135
- * @since 2.0.0
175
+ * Get the length of the buffer.
136
176
  */
137
- static const char rb_bson_null_byte = 0;
177
+ VALUE rb_bson_byte_buffer_length(VALUE self)
178
+ {
179
+ byte_buffer_t *b;
180
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
181
+ return UINT2NUM(READ_SIZE(b));
182
+ }
138
183
 
139
184
  /**
140
- * Constant for a true byte.
141
- *
142
- * @since 2.0.0
185
+ * Get a single byte from the buffer.
143
186
  */
144
- static const char rb_bson_true_byte = 1;
187
+ VALUE rb_bson_byte_buffer_get_byte(VALUE self)
188
+ {
189
+ byte_buffer_t *b;
190
+ VALUE byte;
191
+
192
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
193
+ ENSURE_BSON_READ(b, 1);
194
+ byte = rb_str_new(READ_PTR(b), 1);
195
+ b->read_position += 1;
196
+ return byte;
197
+ }
145
198
 
146
199
  /**
147
- * Holds the machine id hash for object id generation.
148
- *
149
- * @since 3.2.0
150
- *
200
+ * Get bytes from the buffer.
151
201
  */
152
- static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX];
202
+ VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
203
+ {
204
+ byte_buffer_t *b;
205
+ VALUE bytes;
206
+ const long length = FIX2LONG(i);
207
+
208
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
209
+ ENSURE_BSON_READ(b, length);
210
+ bytes = rb_str_new(READ_PTR(b), length);
211
+ b->read_position += length;
212
+ return bytes;
213
+ }
153
214
 
154
215
  /**
155
- * The counter for incrementing object ids.
156
- *
157
- * @since 2.0.0
216
+ * Get a cstring from the buffer.
158
217
  */
159
- static unsigned int rb_bson_object_id_counter = 0;
218
+ VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
219
+ {
220
+ byte_buffer_t *b;
221
+ VALUE string;
222
+ int length;
223
+
224
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
225
+ length = (int)strlen(READ_PTR(b));
226
+ ENSURE_BSON_READ(b, length);
227
+ string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
228
+ b->read_position += length + 1;
229
+ return string;
230
+ }
160
231
 
161
232
  /**
162
- * Take the provided params and return the encoded bytes or a default one.
163
- *
164
- * @example Get the default encoded bytes.
165
- * rb_get_default_encoded(1, bytes);
166
- *
167
- * @param [ int ] argc The number of arguments.
168
- * @param [ Object ] argv The arguments.
169
- *
170
- * @return [ String ] The encoded string.
171
- *
172
- * @since 2.0.0
233
+ * Get a double from the buffer.
173
234
  */
174
- static VALUE rb_get_default_encoded(int argc, VALUE *argv)
235
+ VALUE rb_bson_byte_buffer_get_double(VALUE self)
175
236
  {
176
- VALUE encoded;
177
- rb_scan_args(argc, argv, "01", &encoded);
178
- if (NIL_P(encoded)) encoded = rb_str_new_encoded_binary();
179
- return encoded;
237
+ byte_buffer_t *b;
238
+ union { uint64_t i64; double d; } ucast;
239
+
240
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
241
+ ENSURE_BSON_READ(b, 8);
242
+ ucast.i64 = le64toh(*(uint64_t*)READ_PTR(b));
243
+ b->read_position += 8;
244
+ return DBL2NUM(ucast.d);
180
245
  }
181
246
 
182
247
  /**
183
- * Append the ruby float as 8-byte double value to buffer.
184
- *
185
- * @example Convert float to double and append.
186
- * rb_float_to_bson(..., 1.2311);
187
- *
188
- * @param [ String] encoded Optional string buffer, default provided by rb_str_encoded_binary
189
- * @param [ Float ] self The ruby float value.
190
- *
191
- * @return [ String ] The encoded bytes with double value appended.
192
- *
193
- * @since 2.0.0
248
+ * Get a int32 from the buffer.
194
249
  */
195
- static VALUE rb_float_to_bson(int argc, VALUE *argv, VALUE self)
250
+ VALUE rb_bson_byte_buffer_get_int32(VALUE self)
196
251
  {
197
- const double v = NUM2DBL(self);
198
- VALUE encoded = rb_get_default_encoded(argc, argv);
199
- # if __BYTE_ORDER == __LITTLE_ENDIAN
200
- rb_str_cat(encoded, (char*) &v, 8);
201
- #elif __BYTE_ORDER == __BIG_ENDIAN
202
- doublebytet swap;
203
- unsigned char b;
204
- swap.d = v;
205
- for (int i=0; i < sizeof(double)/2; i++) {
206
- b=swap.b[i];
207
- swap.b[i] = swap.b[((sizeof(double)-1)-i)];
208
- swap.b[((sizeof(double)-1)-i)]=b;
209
- }
210
- rb_str_cat(encoded, (char*)&swap.d, 8);
211
- #endif
212
- return encoded;
252
+ byte_buffer_t *b;
253
+ int32_t i32;
254
+
255
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
256
+ ENSURE_BSON_READ(b, 4);
257
+ i32 = le32toh(*((int32_t*)READ_PTR(b)));
258
+ b->read_position += 4;
259
+ return INT2NUM(i32);
213
260
  }
214
261
 
215
262
  /**
216
- * Convert the bytes for the double into a Ruby float.
217
- *
218
- * @example Convert the bytes to a float.
219
- * rb_float_from_bson_double(class, bytes);
220
- *
221
- * @param [ Class ] The float class.
222
- * @param [ String ] The double bytes.
223
- *
224
- * @return [ Float ] The ruby float value.
225
- *
226
- * @since 2.0.0
263
+ * Get a int64 from the buffer.
227
264
  */
228
- static VALUE rb_float_from_bson_double(VALUE self, VALUE value)
265
+ VALUE rb_bson_byte_buffer_get_int64(VALUE self)
229
266
  {
230
- const char * bytes;
231
- double v;
232
- bytes = StringValuePtr(value);
233
- #if __BYTE_ORDER == __LITTLE_ENDIAN
234
- memcpy(&v, bytes, RSTRING_LEN(value));
235
- #else
236
- doublebytet swap;
237
- unsigned char b;
238
- memcpy(&swap.d, bytes, RSTRING_LEN(value));
239
- for (int i=0; i < sizeof(double)/2; i++) {
240
- b=swap.b[i];
241
- swap.b[i] = swap.b[((sizeof(double)-1)-i)];
242
- swap.b[((sizeof(double)-1)-i)]=b;
243
- }
244
- memcpy(&v, swap.b, RSTRING_LEN(value));
245
- #endif
246
-
247
- return DBL2NUM(v);
267
+ byte_buffer_t *b;
268
+ int64_t i64;
269
+
270
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
271
+ ENSURE_BSON_READ(b, 8);
272
+ i64 = le64toh(*((int64_t*)READ_PTR(b)));
273
+ b->read_position += 8;
274
+ return LONG2NUM(i64);
248
275
  }
249
276
 
250
277
  /**
251
- * Generate the data for the next object id.
252
- *
253
- * @example Generate the data for the next object id.
254
- * rb_object_id_generator_next(0, NULL, object_id);
255
- *
256
- * @param [ int ] argc The argument count.
257
- * @param [ Time ] time The optional Ruby time.
258
- * @param [ BSON::ObjectId ] self The object id.
259
- *
260
- * @return [ String ] The raw bytes for the id.
261
- *
262
- * @since 2.0.0
278
+ * Get a string from the buffer.
263
279
  */
264
- static VALUE rb_object_id_generator_next(int argc, VALUE* args, VALUE self)
280
+ VALUE rb_bson_byte_buffer_get_string(VALUE self)
265
281
  {
266
- char bytes[12];
267
- unsigned long t;
268
- unsigned short pid = htons(getpid());
269
-
270
- if (argc == 0 || (argc == 1 && *args == Qnil)) {
271
- t = htonl((int) time(NULL));
272
- }
273
- else {
274
- t = htonl(NUM2UINT(rb_funcall(*args, rb_intern("to_i"), 0)));
275
- }
276
-
277
- unsigned long c;
278
- c = htonl(rb_bson_object_id_counter << 8);
279
-
280
- # if __BYTE_ORDER == __LITTLE_ENDIAN
281
- memcpy(&bytes, &t, 4);
282
- memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
283
- memcpy(&bytes[7], &pid, 2);
284
- memcpy(&bytes[9], (unsigned char*) &c, 3);
285
- #elif __BYTE_ORDER == __BIG_ENDIAN
286
- memcpy(&bytes, ((unsigned char*) &t) + 4, 4);
287
- memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
288
- memcpy(&bytes[7], &pid, 2);
289
- memcpy(&bytes[9], ((unsigned char*) &c) + 4, 3);
290
- #endif
291
- rb_bson_object_id_counter++;
292
- return rb_str_new(bytes, 12);
282
+ byte_buffer_t *b;
283
+ int32_t length;
284
+ VALUE string;
285
+
286
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
287
+ ENSURE_BSON_READ(b, 4);
288
+ length = le32toh(*((int32_t*)READ_PTR(b)));
289
+ b->read_position += 4;
290
+ ENSURE_BSON_READ(b, length);
291
+ string = rb_enc_str_new(READ_PTR(b), length - 1, rb_utf8_encoding());
292
+ b->read_position += length;
293
+ return string;
293
294
  }
294
295
 
295
296
  /**
296
- * Check if the integer is a 32 bit integer.
297
- *
298
- * @example Check if the integer is 32 bit.
299
- * rb_integer_is_bson_int32(integer);
300
- *
301
- * @param [ Integer ] self The ruby integer.
302
- *
303
- * @return [ true, false ] If the integer is 32 bit.
304
- *
305
- * @since 2.0.0
297
+ * Writes a byte to the byte buffer.
306
298
  */
307
- static VALUE rb_integer_is_bson_int32(VALUE self)
299
+ VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte)
308
300
  {
309
- const int64_t v = NUM2INT64(self);
310
- if (INT_MIN <= v && v <= INT_MAX) {
311
- return Qtrue;
312
- }
313
- else {
314
- return Qfalse;
315
- }
301
+ byte_buffer_t *b;
302
+ const char *str = RSTRING_PTR(byte);
303
+
304
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
305
+ ENSURE_BSON_WRITE(b, 1);
306
+ memcpy(WRITE_PTR(b), str, 1);
307
+ b->write_position += 1;
308
+
309
+ return self;
316
310
  }
317
311
 
318
312
  /**
319
- * Convert the Ruby integer into a BSON as per the 32 bit specification,
320
- * which is 4 bytes.
321
- *
322
- * @example Convert the integer to 32bit BSON.
323
- * rb_integer_to_bson_int32(128, encoded);
324
- *
325
- * @param [ Integer ] self The Ruby integer.
326
- * @param [ String ] encoded The Ruby binary string to append to.
327
- *
328
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
329
- *
330
- * @since 2.0.0
313
+ * Writes bytes to the byte buffer.
331
314
  */
332
- static VALUE rb_integer_to_bson_int32(VALUE self, VALUE encoded)
315
+ VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes)
333
316
  {
334
- const int32_t v = NUM2INT(self);
335
- const char bytes[4] = {
336
- v & 255,
337
- (v >> 8) & 255,
338
- (v >> 16) & 255,
339
- (v >> 24) & 255
340
- };
341
- return rb_str_cat(encoded, bytes, 4);
317
+ byte_buffer_t *b;
318
+ const char *str = RSTRING_PTR(bytes);
319
+ const size_t length = RSTRING_LEN(bytes);
320
+
321
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
322
+ ENSURE_BSON_WRITE(b, length);
323
+ memcpy(WRITE_PTR(b), str, length);
324
+ b->write_position += length;
325
+ return self;
342
326
  }
343
327
 
344
328
  /**
345
- * Initialize the bson array index for integers.
346
- *
347
- * @example Initialize the array.
348
- * rb_bson_init_integer_bson_array_indexes();
349
- *
350
- * @since 2.0.0
329
+ * Writes a cstring to the byte buffer.
351
330
  */
352
- static void rb_bson_init_integer_bson_array_indexes(void)
331
+ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string)
353
332
  {
354
- int i;
355
- for (i = 0; i < BSON_INDEX_SIZE; i++) {
356
- snprintf(rb_bson_array_indexes[i], BSON_INDEX_CHAR_SIZE, "%d", i);
333
+ byte_buffer_t *b;
334
+ char *c_str = RSTRING_PTR(string);
335
+ size_t length = RSTRING_LEN(string) + 1;
336
+
337
+ if (!rb_bson_utf8_validate(c_str, length - 1, false)) {
338
+ rb_raise(rb_eArgError, "String %s is not a valid UTF-8 CString.", c_str);
357
339
  }
340
+
341
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
342
+ ENSURE_BSON_WRITE(b, length);
343
+ memcpy(WRITE_PTR(b), c_str, length);
344
+ b->write_position += length;
345
+ return self;
358
346
  }
359
347
 
360
348
  /**
361
- * Convert the Ruby integer into a character string and append with nullchar to encoded BSON.
362
- *
363
- * @example Convert the integer to string and append with nullchar.
364
- * rb_integer_to_bson_key(128, encoded);
365
- *
366
- * @param [ Integer ] self The Ruby integer.
367
- * @param [ String ] encoded The Ruby binary string to append to.
368
- *
369
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
370
- *
371
- * @since 2.0.0
349
+ * Writes a 64 bit double to the buffer.
372
350
  */
373
- static VALUE rb_integer_to_bson_key(int argc, VALUE *argv, VALUE self)
351
+ VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f)
374
352
  {
375
- char bytes[INTEGER_CHAR_SIZE];
376
- const int64_t v = NUM2INT64(self);
377
- VALUE encoded = rb_get_default_encoded(argc, argv);
378
- int length;
379
- if (v < BSON_INDEX_SIZE)
380
- return rb_str_cat(encoded, rb_bson_array_indexes[v], strlen(rb_bson_array_indexes[v]) + 1);
381
- length = snprintf(bytes, INTEGER_CHAR_SIZE, "%ld", (long)v);
382
- return rb_str_cat(encoded, bytes, length + 1);
353
+ byte_buffer_t *b;
354
+ union {double d; uint64_t i64;} ucast;
355
+
356
+ ucast.d = NUM2DBL(f);
357
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
358
+ ENSURE_BSON_WRITE(b, 8);
359
+ ucast.i64 = htole64(ucast.i64);
360
+ *(int64_t*)WRITE_PTR(b) = ucast.i64;
361
+ b->write_position += 8;
362
+
363
+ return self;
383
364
  }
384
365
 
385
366
  /**
386
- * Convert the provided raw bytes into a 32bit Ruby integer.
387
- *
388
- * @example Convert the bytes to an Integer.
389
- * rb_integer_from_bson_int32(Int32, bytes);
390
- *
391
- * @param [ BSON::Int32 ] self The Int32 eigenclass.
392
- * @param [ String ] bytes The raw bytes.
393
- *
394
- * @return [ Integer ] The Ruby integer.
395
- *
396
- * @since 2.0.0
367
+ * Writes a 32 bit integer to the byte buffer.
397
368
  */
398
- static VALUE rb_integer_from_bson_int32(VALUE self, VALUE bson)
369
+ VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i)
399
370
  {
400
- const uint8_t *v = (const uint8_t*) StringValuePtr(bson);
401
- const int32_t integer = v[0] + (v[1] << 8) + (v[2] << 16) + (v[3] << 24);
402
- return INT2NUM(integer);
371
+ byte_buffer_t *b;
372
+ const int32_t i32 = NUM2INT(i);
373
+
374
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
375
+ ENSURE_BSON_WRITE(b, 4);
376
+ *((int32_t*)WRITE_PTR(b)) = htole32(i32);
377
+ b->write_position += 4;
378
+
379
+ return self;
403
380
  }
404
381
 
405
382
  /**
406
- * Convert the raw BSON bytes into an int64_t type.
407
- *
408
- * @example Convert the bytes into an int64_t.
409
- * rb_bson_to_int64_t(bson);
410
- *
411
- * @param [ String ] bson The raw bytes.
412
- *
413
- * @return [ int64_t ] The int64_t.
414
- *
415
- * @since 2.0.0
383
+ * Writes a 64 bit integer to the byte buffer.
416
384
  */
417
- static int64_t rb_bson_to_int64_t(VALUE bson)
385
+ VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i)
418
386
  {
419
- uint8_t *v;
420
- uint32_t byte_0, byte_1;
421
- int64_t byte_2, byte_3;
422
- int64_t lower, upper;
423
- v = (uint8_t*) StringValuePtr(bson);
424
- byte_0 = v[0];
425
- byte_1 = v[1];
426
- byte_2 = v[2];
427
- byte_3 = v[3];
428
- lower = byte_0 + (byte_1 << 8) + (byte_2 << 16) + (byte_3 << 24);
429
- byte_0 = v[4];
430
- byte_1 = v[5];
431
- byte_2 = v[6];
432
- byte_3 = v[7];
433
- upper = byte_0 + (byte_1 << 8) + (byte_2 << 16) + (byte_3 << 24);
434
- return lower + (upper << 32);
387
+ byte_buffer_t *b;
388
+ const int64_t i64 = NUM2LONG(i);
389
+
390
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
391
+ ENSURE_BSON_WRITE(b, 8);
392
+ *((int64_t*)WRITE_PTR(b)) = htole64(i64);
393
+ b->write_position += 8;
394
+
395
+ return self;
435
396
  }
436
397
 
437
398
  /**
438
- * Convert the provided raw bytes into a 64bit Ruby integer.
439
- *
440
- * @example Convert the bytes to an Integer.
441
- * rb_integer_from_bson_int64(Int64, bytes);
442
- *
443
- * @param [ BSON::Int64 ] self The Int64 eigenclass.
444
- * @param [ String ] bytes The raw bytes.
445
- *
446
- * @return [ Integer ] The Ruby integer.
447
- *
448
- * @since 2.0.0
399
+ * Writes a string to the byte buffer.
449
400
  */
450
- static VALUE rb_integer_from_bson_int64(VALUE self, VALUE bson)
401
+ VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string)
451
402
  {
452
- return INT642NUM(rb_bson_to_int64_t(bson));
403
+ byte_buffer_t *b;
404
+
405
+ char *str = RSTRING_PTR(string);
406
+ const size_t length = RSTRING_LEN(string) + 1;
407
+
408
+ if (!rb_bson_utf8_validate(str, length - 1, true)) {
409
+ rb_raise(rb_eArgError, "String %s is not valid UTF-8.", str);
410
+ }
411
+
412
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
413
+ ENSURE_BSON_WRITE(b, length + 4);
414
+ *((int32_t*)WRITE_PTR(b)) = htole32(length);
415
+ b->write_position += 4;
416
+ memcpy(WRITE_PTR(b), str, length);
417
+ b->write_position += length;
418
+
419
+ return self;
453
420
  }
454
421
 
455
422
  /**
456
- * Append the 64-bit integer to encoded BSON Ruby binary string.
457
- *
458
- * @example Append the 64-bit integer to encoded BSON.
459
- * int64_t_to_bson(128, encoded);
460
- *
461
- * @param [ int64_t ] self The 64-bit integer.
462
- * @param [ String ] encoded The BSON Ruby binary string to append to.
463
- *
464
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
465
- *
466
- * @since 2.0.0
423
+ * Get the read position.
467
424
  */
468
- static VALUE int64_t_to_bson(int64_t v, VALUE encoded)
425
+ VALUE rb_bson_byte_buffer_read_position(VALUE self)
469
426
  {
470
- const char bytes[8] = {
471
- v & 255,
472
- (v >> 8) & 255,
473
- (v >> 16) & 255,
474
- (v >> 24) & 255,
475
- (v >> 32) & 255,
476
- (v >> 40) & 255,
477
- (v >> 48) & 255,
478
- (v >> 56) & 255
479
- };
480
- return rb_str_cat(encoded, bytes, 8);
427
+ byte_buffer_t *b;
428
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
429
+ return INT2NUM(b->read_position);
481
430
  }
482
431
 
483
432
  /**
484
- * Convert the Ruby integer into a BSON as per the 64 bit specification,
485
- * which is 8 bytes.
486
- *
487
- * @example Convert the integer to 64bit BSON.
488
- * rb_integer_to_bson_int64(128, encoded);
489
- *
490
- * @param [ Integer ] self The Ruby integer.
491
- * @param [ String ] encoded The Ruby binary string to append to.
492
- *
493
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
494
- *
495
- * @since 2.0.0
433
+ * Replace a 32 bit integer int the byte buffer.
496
434
  */
497
- static VALUE rb_integer_to_bson_int64(VALUE self, VALUE encoded)
435
+ VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i)
498
436
  {
499
- return int64_t_to_bson(NUM2INT64(self), StringValue(encoded));
437
+ byte_buffer_t *b;
438
+ const int32_t position = NUM2INT(index);
439
+ const int32_t i32 = htole32(NUM2INT(i));
440
+
441
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
442
+
443
+ memcpy(READ_PTR(b) + position, &i32, 4);
444
+
445
+ return self;
500
446
  }
501
447
 
502
448
  /**
503
- * Converts the milliseconds time to the raw BSON bytes. We need to
504
- * explicitly convert using 64 bit here.
505
- *
506
- * @example Convert the milliseconds value to BSON bytes.
507
- * rb_time_to_bson(time, 2124132340000, encoded);
508
- *
509
- * @param [ Time ] self The Ruby Time object.
510
- * @param [ Integer ] milliseconds The milliseconds pre/post epoch.
511
- * @param [ String ] encoded The Ruby binary string to append to.
512
- *
513
- * @return [ String ] encoded Ruby binary string with time BSON raw bytes appended.
514
- *
515
- * @since 2.0.0
449
+ * Get the write position.
516
450
  */
517
- static VALUE rb_time_to_bson(int argc, VALUE *argv, VALUE self)
451
+ VALUE rb_bson_byte_buffer_write_position(VALUE self)
518
452
  {
519
- int64_t t = NUM2INT64(rb_funcall(self, rb_intern("to_i"), 0));
520
- int64_t milliseconds = (int64_t)(t * 1000);
521
- int32_t micro = NUM2INT(rb_funcall(self, rb_intern("usec"), 0));
522
- int64_t time = milliseconds + (micro / 1000);
523
- VALUE encoded = rb_get_default_encoded(argc, argv);
524
- return int64_t_to_bson(time, encoded);
453
+ byte_buffer_t *b;
454
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
455
+ return INT2NUM(b->write_position);
525
456
  }
526
457
 
527
458
  /**
528
- * Converts the raw BSON bytes into a UTC Ruby time.
529
- *
530
- * @example Convert the bytes to a Ruby time.
531
- * rb_time_from_bson(time, bytes);
532
- *
533
- * @param [ Class ] self The Ruby Time class.
534
- * @param [ String ] bytes The raw BSON bytes.
535
- *
536
- * @return [ Time ] The UTC time.
537
- *
538
- * @since 2.0.0
459
+ * Convert the buffer to a string.
539
460
  */
540
- static VALUE rb_time_from_bson(VALUE self, VALUE bytes)
461
+ VALUE rb_bson_byte_buffer_to_s(VALUE self)
541
462
  {
542
- const int64_t millis = rb_bson_to_int64_t(bytes);
543
- const VALUE time = rb_time_new(millis / 1000, (millis % 1000) * 1000);
544
- return rb_funcall(time, rb_utc_method, 0);
463
+ byte_buffer_t *b;
464
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
465
+ return rb_str_new(READ_PTR(b), READ_SIZE(b));
545
466
  }
546
467
 
547
468
  /**
548
- * Set four bytes for int32 in a binary string and return it.
549
- *
550
- * @example Set int32 in a BSON string.
551
- * rb_string_set_int32(self, pos, int32)
552
- *
553
- * @param [ String ] self The Ruby binary string.
554
- * @param [ Fixnum ] The position to set.
555
- * @param [ Fixnum ] The int32 value.
556
- *
557
- * @return [ String ] The binary string.
558
- *
559
- * @since 2.0.0
469
+ * Get the size of the byte_buffer_t in memory.
560
470
  */
561
- static VALUE rb_string_set_int32(VALUE str, VALUE pos, VALUE an_int32)
471
+ size_t rb_bson_byte_buffer_memsize(const void *ptr)
562
472
  {
563
- const int32_t offset = NUM2INT(pos);
564
- const int32_t v = NUM2INT(an_int32);
565
- const char bytes[4] = {
566
- v & 255,
567
- (v >> 8) & 255,
568
- (v >> 16) & 255,
569
- (v >> 24) & 255
570
- };
571
- rb_str_modify(str);
572
- if (offset < 0 || offset + 4 > RSTRING_LEN(str)) {
573
- rb_raise(rb_eArgError, "invalid position");
574
- }
575
- memcpy(RSTRING_PTR(str) + offset, bytes, 4);
576
- return str;
473
+ return ptr ? sizeof(byte_buffer_t) : 0;
577
474
  }
578
475
 
579
476
  /**
580
- * Check for illegal characters in string.
581
- *
582
- * @example Check for illegal characters.
583
- * rb_string_check_for_illegal_characters("test");
584
- *
585
- * @param [ String ] self The string value.
586
- *
587
- * @since 2.0.0
477
+ * Free the memory for the byte buffer.
588
478
  */
589
- static VALUE rb_string_check_for_illegal_characters(VALUE self)
479
+ void rb_bson_byte_buffer_free(void *ptr)
590
480
  {
591
- if (strlen(RSTRING_PTR(self)) != (size_t) RSTRING_LEN(self))
592
- rb_raise(rb_eArgError, "Illegal C-String contains a null byte.");
593
- return self;
481
+ byte_buffer_t *b = ptr;
482
+ if (b->b_ptr != b->buffer) {
483
+ xfree(b->b_ptr);
484
+ }
485
+ xfree(b);
594
486
  }
595
487
 
596
488
  /**
597
- * Encode a false value to bson.
598
- *
599
- * @example Encode the false value.
600
- * rb_false_class_to_bson(0, false);
601
- *
602
- * @param [ int ] argc The number or arguments.
603
- * @param [ Array<Object> ] argv The arguments.
604
- * @param [ TrueClass ] self The true value.
605
- *
606
- * @return [ String ] The encoded string.
607
- *
608
- * @since 2.0.0
489
+ * Expand the byte buffer linearly.
609
490
  */
610
- static VALUE rb_false_class_to_bson(int argc, VALUE *argv, VALUE self)
491
+ void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length)
611
492
  {
612
- VALUE encoded = rb_get_default_encoded(argc, argv);
613
- rb_str_cat(encoded, &rb_bson_null_byte, 1);
614
- return encoded;
493
+ const size_t required_size = buffer_ptr->write_position - buffer_ptr->read_position + length;
494
+ if (required_size <= buffer_ptr->size) {
495
+ memmove(buffer_ptr->b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
496
+ buffer_ptr->write_position -= buffer_ptr->read_position;
497
+ buffer_ptr->read_position = 0;
498
+ } else {
499
+ char *new_b_ptr;
500
+ const size_t new_size = required_size * 2;
501
+ new_b_ptr = ALLOC_N(char, new_size);
502
+ memcpy(new_b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
503
+ if (buffer_ptr->b_ptr != buffer_ptr->buffer) {
504
+ xfree(buffer_ptr->b_ptr);
505
+ }
506
+ buffer_ptr->b_ptr = new_b_ptr;
507
+ buffer_ptr->size = new_size;
508
+ buffer_ptr->write_position -= buffer_ptr->read_position;
509
+ buffer_ptr->read_position = 0;
510
+ }
615
511
  }
616
512
 
617
513
  /**
618
- * Encode a true value to bson.
619
- *
620
- * @example Encode the true value.
621
- * rb_true_class_to_bson(0, true);
622
- *
623
- * @param [ int ] argc The number or arguments.
624
- * @param [ Array<Object> ] argv The arguments.
625
- * @param [ TrueClass ] self The true value.
626
- *
627
- * @return [ String ] The encoded string.
628
- *
629
- * @since 2.0.0
514
+ * Generate the next object id.
630
515
  */
631
- static VALUE rb_true_class_to_bson(int argc, VALUE *argv, VALUE self)
516
+ VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self)
632
517
  {
633
- VALUE encoded = rb_get_default_encoded(argc, argv);
634
- rb_str_cat(encoded, &rb_bson_true_byte, 1);
635
- return encoded;
518
+ char bytes[12];
519
+ unsigned long t;
520
+ unsigned long c;
521
+ unsigned short pid = htons(getpid());
522
+
523
+ if (argc == 0 || (argc == 1 && *args == Qnil)) {
524
+ t = htonl((int) time(NULL));
525
+ }
526
+ else {
527
+ t = htonl(NUM2UINT(rb_funcall(*args, rb_intern("to_i"), 0)));
528
+ }
529
+
530
+ c = htonl(rb_bson_object_id_counter << 8);
531
+
532
+ # if __BYTE_ORDER == __LITTLE_ENDIAN
533
+ memcpy(&bytes, &t, 4);
534
+ memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
535
+ memcpy(&bytes[7], &pid, 2);
536
+ memcpy(&bytes[9], (unsigned char*) &c, 3);
537
+ #elif __BYTE_ORDER == __BIG_ENDIAN
538
+ memcpy(&bytes, ((unsigned char*) &t) + 4, 4);
539
+ memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
540
+ memcpy(&bytes[7], &pid, 2);
541
+ memcpy(&bytes[9], ((unsigned char*) &c) + 4, 3);
542
+ #endif
543
+ rb_bson_object_id_counter++;
544
+ return rb_str_new(bytes, 12);
636
545
  }
637
546
 
638
547
  /**
639
- * Decode a string from bson.
640
- *
641
- * @example Decode a string.
642
- * rb_bson_string_from_bson(string, io);
643
- *
644
- * @param [ String ] self The string class.
645
- * @param [ IO ] bson The io stream of BSON.
646
- *
647
- * @return [ String ] The decoded string.
648
- *
649
- * @since 3.2.5
548
+ * Taken from libbson.
650
549
  */
651
- static VALUE rb_bson_string_from_bson(VALUE self, VALUE bson)
550
+ static void _bson_utf8_get_sequence(const char *utf8, uint8_t *seq_length, uint8_t *first_mask)
652
551
  {
653
- ID read_method = rb_intern("read");
654
- VALUE int_bytes = rb_funcall(bson, read_method, 1, 4);
655
- VALUE size = rb_integer_from_bson_int32(self, int_bytes);
656
- VALUE string_bytes = rb_funcall(bson, read_method, 1, size - 1);
657
- return rb_bson_from_bson_string(string_bytes);
552
+ unsigned char c = *(const unsigned char *)utf8;
553
+ uint8_t m;
554
+ uint8_t n;
555
+
556
+ /*
557
+ * See the following[1] for a description of what the given multi-byte
558
+ * sequences will be based on the bits set of the first byte. We also need
559
+ * to mask the first byte based on that. All subsequent bytes are masked
560
+ * against 0x3F.
561
+ *
562
+ * [1] http://www.joelonsoftware.com/articles/Unicode.html
563
+ */
564
+
565
+ if ((c & 0x80) == 0) {
566
+ n = 1;
567
+ m = 0x7F;
568
+ } else if ((c & 0xE0) == 0xC0) {
569
+ n = 2;
570
+ m = 0x1F;
571
+ } else if ((c & 0xF0) == 0xE0) {
572
+ n = 3;
573
+ m = 0x0F;
574
+ } else if ((c & 0xF8) == 0xF0) {
575
+ n = 4;
576
+ m = 0x07;
577
+ } else if ((c & 0xFC) == 0xF8) {
578
+ n = 5;
579
+ m = 0x03;
580
+ } else if ((c & 0xFE) == 0xFC) {
581
+ n = 6;
582
+ m = 0x01;
583
+ } else {
584
+ n = 0;
585
+ m = 0;
586
+ }
587
+
588
+ *seq_length = n;
589
+ *first_mask = m;
658
590
  }
659
591
 
660
592
  /**
661
- * Initialize the bson c extension.
662
- *
663
- * @since 2.0.0
593
+ * Taken from libbson.
664
594
  */
665
- void Init_native()
595
+ bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null)
666
596
  {
667
- // Get all the constants to be used in the extensions.
668
- VALUE bson = rb_const_get(rb_cObject, rb_intern("BSON"));
669
- VALUE integer = rb_const_get(bson, rb_intern("Integer"));
670
- VALUE floats = rb_const_get(bson, rb_intern("Float"));
671
- VALUE float_class = rb_const_get(floats, rb_intern("ClassMethods"));
672
- VALUE time = rb_const_get(bson, rb_intern("Time"));
673
- VALUE time_class = rb_singleton_class(time);
674
- VALUE int32 = rb_const_get(bson, rb_intern("Int32"));
675
- VALUE int32_class = rb_singleton_class(int32);
676
- VALUE int64 = rb_const_get(bson, rb_intern("Int64"));
677
- VALUE int64_class = rb_singleton_class(int64);
678
- VALUE object_id = rb_const_get(bson, rb_intern("ObjectId"));
679
- VALUE generator = rb_const_get(object_id, rb_intern("Generator"));
680
- VALUE string = rb_const_get(bson, rb_intern("String"));
681
- VALUE string_class = rb_singleton_class(string);
682
- VALUE true_class = rb_const_get(bson, rb_intern("TrueClass"));
683
- VALUE false_class = rb_const_get(bson, rb_intern("FalseClass"));
684
- // needed to hash the machine id
685
- rb_require("digest/md5");
686
- VALUE digest_class = rb_const_get(rb_cObject, rb_intern("Digest"));
687
- VALUE md5_class = rb_const_get(digest_class, rb_intern("MD5"));
688
- rb_bson_utf8_string = rb_const_get(bson, rb_intern("UTF8"));
689
- rb_utc_method = rb_intern("utc");
597
+ uint32_t c;
598
+ uint8_t first_mask;
599
+ uint8_t seq_length;
600
+ unsigned i;
601
+ unsigned j;
602
+
603
+ if (!utf8) {
604
+ return false;
605
+ }
690
606
 
691
- // Get the object id machine id and hash it.
692
- char rb_bson_machine_id[256];
693
- gethostname(rb_bson_machine_id, sizeof rb_bson_machine_id);
694
- rb_bson_machine_id[255] = '\0';
695
- VALUE digest = rb_funcall(md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id));
696
- memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest));
607
+ for (i = 0; i < utf8_len; i += seq_length) {
608
+ _bson_utf8_get_sequence(&utf8[i], &seq_length, &first_mask);
609
+
610
+ /*
611
+ * Ensure we have a valid multi-byte sequence length.
612
+ */
613
+ if (!seq_length) {
614
+ return false;
615
+ }
616
+
617
+ /*
618
+ * Ensure we have enough bytes left.
619
+ */
620
+ if ((utf8_len - i) < seq_length) {
621
+ return false;
622
+ }
623
+
624
+ /*
625
+ * Also calculate the next char as a unichar so we can
626
+ * check code ranges for non-shortest form.
627
+ */
628
+ c = utf8 [i] & first_mask;
629
+
630
+ /*
631
+ * Check the high-bits for each additional sequence byte.
632
+ */
633
+ for (j = i + 1; j < (i + seq_length); j++) {
634
+ c = (c << 6) | (utf8 [j] & 0x3F);
635
+ if ((utf8[j] & 0xC0) != 0x80) {
636
+ return false;
637
+ }
638
+ }
639
+
640
+ /*
641
+ * Check for NULL bytes afterwards.
642
+ *
643
+ * Hint: if you want to optimize this function, starting here to do
644
+ * this in the same pass as the data above would probably be a good
645
+ * idea. You would add a branch into the inner loop, but save possibly
646
+ * on cache-line bouncing on larger strings. Just a thought.
647
+ */
648
+ if (!allow_null) {
649
+ for (j = 0; j < seq_length; j++) {
650
+ if (((i + j) > utf8_len) || !utf8[i + j]) {
651
+ return false;
652
+ }
653
+ }
654
+ }
655
+
656
+ /*
657
+ * Code point wont fit in utf-16, not allowed.
658
+ */
659
+ if (c > 0x0010FFFF) {
660
+ return false;
661
+ }
662
+
663
+ /*
664
+ * Byte is in reserved range for UTF-16 high-marks
665
+ * for surrogate pairs.
666
+ */
667
+ if ((c & 0xFFFFF800) == 0xD800) {
668
+ return false;
669
+ }
670
+
671
+ /*
672
+ * Check non-shortest form unicode.
673
+ */
674
+ switch (seq_length) {
675
+ case 1:
676
+ if (c <= 0x007F) {
677
+ continue;
678
+ }
679
+ return false;
680
+
681
+ case 2:
682
+ if ((c >= 0x0080) && (c <= 0x07FF)) {
683
+ continue;
684
+ } else if (c == 0) {
685
+ /* Two-byte representation for NULL. */
686
+ continue;
687
+ }
688
+ return false;
689
+
690
+ case 3:
691
+ if (((c >= 0x0800) && (c <= 0x0FFF)) ||
692
+ ((c >= 0x1000) && (c <= 0xFFFF))) {
693
+ continue;
694
+ }
695
+ return false;
696
+
697
+ case 4:
698
+ if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
699
+ ((c >= 0x40000) && (c <= 0xFFFFF)) ||
700
+ ((c >= 0x100000) && (c <= 0x10FFFF))) {
701
+ continue;
702
+ }
703
+ return false;
704
+
705
+ default:
706
+ return false;
707
+ }
708
+ }
697
709
 
698
- // Integer optimizations.
699
- rb_undef_method(integer, "to_bson_int32");
700
- rb_define_method(integer, "to_bson_int32", rb_integer_to_bson_int32, 1);
701
- rb_undef_method(integer, "to_bson_int64");
702
- rb_define_method(integer, "to_bson_int64", rb_integer_to_bson_int64, 1);
703
- rb_undef_method(integer, "bson_int32?");
704
- rb_define_method(integer, "bson_int32?", rb_integer_is_bson_int32, 0);
705
- rb_bson_init_integer_bson_array_indexes();
706
- rb_undef_method(integer, "to_bson_key");
707
- rb_define_method(integer, "to_bson_key", rb_integer_to_bson_key, -1);
708
- rb_undef_method(int32_class, "from_bson_int32");
709
- rb_define_private_method(int32_class, "from_bson_int32", rb_integer_from_bson_int32, 1);
710
- rb_undef_method(int64_class, "from_bson_int64");
711
- rb_define_private_method(int64_class, "from_bson_int64", rb_integer_from_bson_int64, 1);
712
-
713
- // Float optimizations.
714
- rb_undef_method(floats, "to_bson");
715
- rb_define_method(floats, "to_bson", rb_float_to_bson, -1);
716
- rb_undef_method(float_class, "from_bson_double");
717
- rb_define_private_method(float_class, "from_bson_double", rb_float_from_bson_double, 1);
718
-
719
- // Boolean optimizations - deserialization has no benefit so we provide
720
- // no extensions there.
721
- rb_undef_method(true_class, "to_bson");
722
- rb_define_method(true_class, "to_bson", rb_true_class_to_bson, -1);
723
- rb_undef_method(false_class, "to_bson");
724
- rb_define_method(false_class, "to_bson", rb_false_class_to_bson, -1);
725
-
726
- // Optimizations around time serialization and deserialization.
727
- rb_undef_method(time, "to_bson");
728
- rb_define_method(time, "to_bson", rb_time_to_bson, -1);
729
- rb_undef_method(time_class, "from_bson");
730
- rb_define_method(time_class, "from_bson", rb_time_from_bson, 1);
731
-
732
- // String optimizations.
733
- rb_undef_method(string, "set_int32");
734
- rb_define_method(string, "set_int32", rb_string_set_int32, 2);
735
- rb_undef_method(string, "from_bson_string");
736
- rb_define_method(string, "from_bson_string", rb_bson_from_bson_string, 0);
737
- rb_undef_method(string_class, "from_bson");
738
- rb_define_method(string_class, "from_bson", rb_bson_string_from_bson, 1);
739
- rb_undef_method(string, "check_for_illegal_characters!");
740
- rb_define_private_method(string, "check_for_illegal_characters!", rb_string_check_for_illegal_characters, 0);
741
-
742
- // Redefine the next method on the object id generator.
743
- rb_undef_method(generator, "next_object_id");
744
- rb_define_method(generator, "next_object_id", rb_object_id_generator_next, -1);
710
+ return true;
745
711
  }