bson 3.2.7 → 4.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/Rakefile +2 -10
  5. data/ext/bson/native-endian.h +120 -0
  6. data/ext/bson/native.c +547 -581
  7. data/lib/bson.rb +0 -1
  8. data/lib/bson/array.rb +15 -14
  9. data/lib/bson/binary.rb +13 -13
  10. data/lib/bson/boolean.rb +3 -3
  11. data/lib/bson/code.rb +5 -8
  12. data/lib/bson/code_with_scope.rb +10 -13
  13. data/lib/bson/date.rb +2 -2
  14. data/lib/bson/date_time.rb +2 -2
  15. data/lib/bson/document.rb +33 -0
  16. data/lib/bson/false_class.rb +2 -2
  17. data/lib/bson/float.rb +5 -11
  18. data/lib/bson/hash.rb +15 -14
  19. data/lib/bson/int32.rb +8 -9
  20. data/lib/bson/int64.rb +3 -9
  21. data/lib/bson/integer.rb +6 -20
  22. data/lib/bson/nil_class.rb +4 -16
  23. data/lib/bson/object.rb +1 -1
  24. data/lib/bson/object_id.rb +14 -16
  25. data/lib/bson/regexp.rb +7 -7
  26. data/lib/bson/specialized.rb +6 -6
  27. data/lib/bson/string.rb +7 -91
  28. data/lib/bson/symbol.rb +8 -7
  29. data/lib/bson/time.rb +5 -5
  30. data/lib/bson/timestamp.rb +8 -6
  31. data/lib/bson/true_class.rb +2 -2
  32. data/lib/bson/undefined.rb +1 -26
  33. data/lib/bson/version.rb +1 -1
  34. data/spec/bson/array_spec.rb +1 -1
  35. data/spec/bson/byte_buffer_spec.rb +445 -0
  36. data/spec/bson/code_with_scope_spec.rb +3 -7
  37. data/spec/bson/document_spec.rb +66 -10
  38. data/spec/bson/hash_spec.rb +5 -5
  39. data/spec/bson/int32_spec.rb +7 -5
  40. data/spec/bson/integer_spec.rb +1 -6
  41. data/spec/bson/object_id_spec.rb +2 -39
  42. data/spec/bson/regexp_spec.rb +1 -1
  43. data/spec/bson/string_spec.rb +2 -204
  44. data/spec/bson/symbol_spec.rb +2 -17
  45. data/spec/support/shared_examples.rb +3 -26
  46. metadata +13 -11
  47. metadata.gz.sig +0 -0
  48. data/lib/bson/encodable.rb +0 -86
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51775b047f4c1019139fc14f1a387e4ca52e1d00
4
- data.tar.gz: 11aebbe7f66db816c1f37aa2ac65a7a0417bc405
3
+ metadata.gz: 30f969161b2003e31eff164aeece7642d94d1569
4
+ data.tar.gz: a6d728c087dcc9aed56d9a272ded92d1cdf661c6
5
5
  SHA512:
6
- metadata.gz: 1583872b9ca51739634993deb90b709f009a717bd4744927d19eb37312e5440ecd98d9446a1c3f3ac6cddca2b2f99ea29a65e67d48a7031e30add88ba6d87e31
7
- data.tar.gz: dbc588428eb14d506bb70fa86c6522f333cd7f8c8684c127d4950fd50e2b4426f5cefe6d103e53c4da152074fc55ecb17b119a874e807cc97cc09581bbf7efae
6
+ metadata.gz: 4b5ec98b56e79843b4351b2a001852c83b5f8ed360096fee11f8579a2d78acde5c57f0f77d05b6d894829ace638c6f754eb9b79640fd0059730c32522c132c34
7
+ data.tar.gz: b4456f8ea0924747e9a253e63aaeb8972267964b249656603f765a8b0f61b6832ea3f12d1dd6ac0d0c2a6a5fbac03b98ea736e9d9cefc261ac364590fce60d50
Binary file
data.tar.gz.sig CHANGED
Binary file
data/Rakefile CHANGED
@@ -49,7 +49,6 @@ end
49
49
 
50
50
  require_relative "perf/bench"
51
51
 
52
- RSpec::Core::RakeTask.new(:spec)
53
52
  RSpec::Core::RakeTask.new(:rspec)
54
53
 
55
54
  if jruby?
@@ -74,8 +73,7 @@ task :clean_all => :clean do
74
73
  end
75
74
  end
76
75
 
77
- task :ext_spec => :compile do
78
- ENV["WITH_EXT"] = "C"
76
+ task :spec => :compile do
79
77
  Rake::Task["rspec"].invoke
80
78
  end
81
79
 
@@ -110,12 +108,6 @@ namespace :benchmark do
110
108
  require "bson"
111
109
  benchmark!
112
110
  end
113
-
114
- task :profile => :compile do
115
- puts "Profiling with native extensions..."
116
- require "bson"
117
- profile!
118
- end
119
111
  end
120
112
 
121
- task :default => [ :clean_all, :spec, :ext_spec ]
113
+ task :default => [ :clean_all, :spec ]
@@ -0,0 +1,120 @@
1
+ // "License": Public Domain
2
+ // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like.
3
+ // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to
4
+ // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it
5
+ // an example on how to get the endian conversion functions on different platforms.
6
+
7
+ #ifndef PORTABLE_ENDIAN_H__
8
+ #define PORTABLE_ENDIAN_H__
9
+
10
+ #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
11
+
12
+ # define __WINDOWS__
13
+ # include <winsock2.h>
14
+ #else
15
+ # include <arpa/inet.h>
16
+ # include <sys/types.h>
17
+ #endif
18
+
19
+ #if defined(__linux__) || defined(__CYGWIN__)
20
+
21
+ # include <endian.h>
22
+
23
+ #elif defined(__APPLE__)
24
+
25
+ # include <libkern/OSByteOrder.h>
26
+
27
+ # define htobe16(x) OSSwapHostToBigInt16(x)
28
+ # define htole16(x) OSSwapHostToLittleInt16(x)
29
+ # define be16toh(x) OSSwapBigToHostInt16(x)
30
+ # define le16toh(x) OSSwapLittleToHostInt16(x)
31
+
32
+ # define htobe32(x) OSSwapHostToBigInt32(x)
33
+ # define htole32(x) OSSwapHostToLittleInt32(x)
34
+ # define be32toh(x) OSSwapBigToHostInt32(x)
35
+ # define le32toh(x) OSSwapLittleToHostInt32(x)
36
+
37
+ # define htobe64(x) OSSwapHostToBigInt64(x)
38
+ # define htole64(x) OSSwapHostToLittleInt64(x)
39
+ # define be64toh(x) OSSwapBigToHostInt64(x)
40
+ # define le64toh(x) OSSwapLittleToHostInt64(x)
41
+
42
+ # define __BYTE_ORDER BYTE_ORDER
43
+ # define __BIG_ENDIAN BIG_ENDIAN
44
+ # define __LITTLE_ENDIAN LITTLE_ENDIAN
45
+ # define __PDP_ENDIAN PDP_ENDIAN
46
+
47
+ #elif defined(__OpenBSD__)
48
+
49
+ # include <sys/endian.h>
50
+
51
+ #elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
52
+
53
+ # include <sys/endian.h>
54
+
55
+ # define be16toh(x) betoh16(x)
56
+ # define le16toh(x) letoh16(x)
57
+
58
+ # define be32toh(x) betoh32(x)
59
+ # define le32toh(x) letoh32(x)
60
+
61
+ # define be64toh(x) betoh64(x)
62
+ # define le64toh(x) letoh64(x)
63
+
64
+ #elif defined(__WINDOWS__)
65
+
66
+ # include <sys/param.h>
67
+
68
+ # if BYTE_ORDER == LITTLE_ENDIAN
69
+
70
+ # define htobe16(x) htons(x)
71
+ # define htole16(x) (x)
72
+ # define be16toh(x) ntohs(x)
73
+ # define le16toh(x) (x)
74
+
75
+ # define htobe32(x) htonl(x)
76
+ # define htole32(x) (x)
77
+ # define be32toh(x) ntohl(x)
78
+ # define le32toh(x) (x)
79
+
80
+ # define htobe64(x) htonll(x)
81
+ # define htole64(x) (x)
82
+ # define be64toh(x) ntohll(x)
83
+ # define le64toh(x) (x)
84
+
85
+ # elif BYTE_ORDER == BIG_ENDIAN
86
+
87
+ /* that would be xbox 360 */
88
+ # define htobe16(x) (x)
89
+ # define htole16(x) __builtin_bswap16(x)
90
+ # define be16toh(x) (x)
91
+ # define le16toh(x) __builtin_bswap16(x)
92
+
93
+ # define htobe32(x) (x)
94
+ # define htole32(x) __builtin_bswap32(x)
95
+ # define be32toh(x) (x)
96
+ # define le32toh(x) __builtin_bswap32(x)
97
+
98
+ # define htobe64(x) (x)
99
+ # define htole64(x) __builtin_bswap64(x)
100
+ # define be64toh(x) (x)
101
+ # define le64toh(x) __builtin_bswap64(x)
102
+
103
+ # else
104
+
105
+ # error byte order not supported
106
+
107
+ # endif
108
+
109
+ # define __BYTE_ORDER BYTE_ORDER
110
+ # define __BIG_ENDIAN BIG_ENDIAN
111
+ # define __LITTLE_ENDIAN LITTLE_ENDIAN
112
+ # define __PDP_ENDIAN PDP_ENDIAN
113
+
114
+ #else
115
+
116
+ # error platform not supported
117
+
118
+ #endif
119
+
120
+ #endif
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (C) 2009-2013 MongoDB Inc.
2
+ * Copyright (C) 2009-2015 MongoDB Inc.
3
3
  *
4
4
  * Licensed under the Apache License, Version 2.0 (the "License");
5
5
  * you may not use this file except in compliance with the License.
@@ -13,733 +13,699 @@
13
13
  * See the License for the specific language governing permissions and
14
14
  * limitations under the License.
15
15
  */
16
- #ifdef _WIN32
17
- #include <winsock2.h>
18
- #else
19
- #include <arpa/inet.h>
20
- #include <sys/types.h>
21
- #endif
22
-
23
- #include <stdint.h>
16
+ #include <ruby.h>
17
+ #include <ruby/encoding.h>
18
+ #include <stdbool.h>
24
19
  #include <time.h>
25
20
  #include <unistd.h>
26
- #include <ruby.h>
27
-
28
- /**
29
- * For 64 byte systems we convert to longs, for 32 byte systems we convert
30
- * to a long long.
31
- *
32
- * @since 2.0.0
33
- */
34
- #if SIZEOF_LONG == 8
35
- #define NUM2INT64(v) NUM2LONG(v)
36
- #define INT642NUM(v) LONG2NUM(v)
37
- #else
38
- #define NUM2INT64(v) NUM2LL(v)
39
- #define INT642NUM(v) LL2NUM(v)
40
- #endif
21
+ #include "native-endian.h"
41
22
 
42
- /**
43
- * Ruby 1.8.7 does not define DBL2NUM, so we define it if it's not there.
44
- *
45
- * @since 2.0.0
46
- */
47
- #ifndef DBL2NUM
48
- #define DBL2NUM(dbl) rb_float_new(dbl)
49
- #endif
23
+ #define BSON_BYTE_BUFFER_SIZE 1024
50
24
 
51
- /**
52
- * Define the max hostname hash length constant if nonexistant.
53
- *
54
- * @since 3.2.0
55
- */
56
25
  #ifndef HOST_NAME_HASH_MAX
57
26
  #define HOST_NAME_HASH_MAX 256
58
27
  #endif
59
28
 
60
- /**
61
- * Define index sizes for array serialization.
62
- *
63
- * @since 2.0.0
64
- */
65
- #define BSON_INDEX_SIZE 1024
66
- #define BSON_INDEX_CHAR_SIZE 5
67
- #define INTEGER_CHAR_SIZE 22
29
+ typedef struct {
30
+ size_t size;
31
+ size_t write_position;
32
+ size_t read_position;
33
+ char buffer[BSON_BYTE_BUFFER_SIZE];
34
+ char *b_ptr;
35
+ } byte_buffer_t;
36
+
37
+ #define READ_PTR(byte_buffer_ptr) \
38
+ (byte_buffer_ptr->b_ptr + byte_buffer_ptr->read_position)
39
+
40
+ #define READ_SIZE(byte_buffer_ptr) \
41
+ (byte_buffer_ptr->write_position - byte_buffer_ptr->read_position)
42
+
43
+ #define WRITE_PTR(byte_buffer_ptr) \
44
+ (byte_buffer_ptr->b_ptr + byte_buffer_ptr->write_position)
45
+
46
+ #define ENSURE_BSON_WRITE(buffer_ptr, length) \
47
+ { if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); }
48
+
49
+ #define ENSURE_BSON_READ(buffer_ptr, length) \
50
+ { if (buffer_ptr->read_position + length > buffer_ptr->write_position) \
51
+ rb_raise(rb_eRangeError, "Attempted to read %zu bytes, but only %zu bytes remain", (size_t)length, READ_SIZE(buffer_ptr)); }
52
+
53
+ static VALUE rb_bson_byte_buffer_allocate(VALUE klass);
54
+ static VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self);
55
+ static VALUE rb_bson_byte_buffer_length(VALUE self);
56
+ static VALUE rb_bson_byte_buffer_get_byte(VALUE self);
57
+ static VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i);
58
+ static VALUE rb_bson_byte_buffer_get_cstring(VALUE self);
59
+ static VALUE rb_bson_byte_buffer_get_double(VALUE self);
60
+ static VALUE rb_bson_byte_buffer_get_int32(VALUE self);
61
+ static VALUE rb_bson_byte_buffer_get_int64(VALUE self);
62
+ static VALUE rb_bson_byte_buffer_get_string(VALUE self);
63
+ static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte);
64
+ static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes);
65
+ static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string);
66
+ static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f);
67
+ static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i);
68
+ static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i);
69
+ static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string);
70
+ static VALUE rb_bson_byte_buffer_read_position(VALUE self);
71
+ static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i);
72
+ static VALUE rb_bson_byte_buffer_write_position(VALUE self);
73
+ static VALUE rb_bson_byte_buffer_to_s(VALUE self);
74
+ static VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self);
75
+
76
+ static size_t rb_bson_byte_buffer_memsize(const void *ptr);
77
+ static void rb_bson_byte_buffer_free(void *ptr);
78
+ static void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length);
79
+ static void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id);
80
+ static bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null);
81
+
82
+ static const rb_data_type_t rb_byte_buffer_data_type = {
83
+ "bson/byte_buffer",
84
+ { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize }
85
+ };
68
86
 
69
87
  /**
70
- * Constant for the intetger array indexes.
71
- *
72
- * @since 2.0.0
88
+ * Holds the machine id hash for object id generation.
73
89
  */
74
- static char rb_bson_array_indexes[BSON_INDEX_SIZE][BSON_INDEX_CHAR_SIZE];
90
+ static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX];
75
91
 
76
92
  /**
77
- * BSON::UTF8
78
- *
79
- * @since 2.0.0
93
+ * The counter for incrementing object ids.
80
94
  */
81
- static VALUE rb_bson_utf8_string;
95
+ static unsigned int rb_bson_object_id_counter = 0;
82
96
 
83
97
  /**
84
- * Set the UTC string method for reference at load.
85
- *
86
- * @since 2.0.0
98
+ * Initialize the native extension.
87
99
  */
88
- static VALUE rb_utc_method;
100
+ void Init_native()
101
+ {
102
+ char rb_bson_machine_id[256];
89
103
 
90
- #include <ruby/encoding.h>
104
+ VALUE rb_bson_module = rb_define_module("BSON");
105
+ VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject);
106
+ VALUE rb_bson_object_id_class = rb_const_get(rb_bson_module, rb_intern("ObjectId"));
107
+ VALUE rb_bson_object_id_generator_class = rb_const_get(rb_bson_object_id_class, rb_intern("Generator"));
108
+ VALUE rb_digest_class = rb_const_get(rb_cObject, rb_intern("Digest"));
109
+ VALUE rb_md5_class = rb_const_get(rb_digest_class, rb_intern("MD5"));
110
+
111
+ rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate);
112
+ rb_define_method(rb_byte_buffer_class, "initialize", rb_bson_byte_buffer_initialize, -1);
113
+ rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0);
114
+ rb_define_method(rb_byte_buffer_class, "get_byte", rb_bson_byte_buffer_get_byte, 0);
115
+ rb_define_method(rb_byte_buffer_class, "get_bytes", rb_bson_byte_buffer_get_bytes, 1);
116
+ rb_define_method(rb_byte_buffer_class, "get_cstring", rb_bson_byte_buffer_get_cstring, 0);
117
+ rb_define_method(rb_byte_buffer_class, "get_double", rb_bson_byte_buffer_get_double, 0);
118
+ rb_define_method(rb_byte_buffer_class, "get_int32", rb_bson_byte_buffer_get_int32, 0);
119
+ rb_define_method(rb_byte_buffer_class, "get_int64", rb_bson_byte_buffer_get_int64, 0);
120
+ rb_define_method(rb_byte_buffer_class, "get_string", rb_bson_byte_buffer_get_string, 0);
121
+ rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1);
122
+ rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1);
123
+ rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1);
124
+ rb_define_method(rb_byte_buffer_class, "put_double", rb_bson_byte_buffer_put_double, 1);
125
+ rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1);
126
+ rb_define_method(rb_byte_buffer_class, "put_int64", rb_bson_byte_buffer_put_int64, 1);
127
+ rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1);
128
+ rb_define_method(rb_byte_buffer_class, "read_position", rb_bson_byte_buffer_read_position, 0);
129
+ rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2);
130
+ rb_define_method(rb_byte_buffer_class, "write_position", rb_bson_byte_buffer_write_position, 0);
131
+ rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0);
132
+ rb_define_method(rb_bson_object_id_generator_class, "next_object_id", rb_bson_object_id_generator_next, -1);
91
133
 
92
- #if __BYTE_ORDER == __BIG_ENDIAN
93
- typedef union doublebyte
134
+ // Get the object id machine id and hash it.
135
+ rb_require("digest/md5");
136
+ gethostname(rb_bson_machine_id, sizeof(rb_bson_machine_id));
137
+ rb_bson_machine_id[255] = '\0';
138
+ rb_bson_generate_machine_id(rb_md5_class, rb_bson_machine_id);
139
+ }
140
+
141
+ void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id)
94
142
  {
95
- double d;
96
- unsigned char b[sizeof(double)];
97
- } doublebytet;
98
- #endif
143
+ VALUE digest = rb_funcall(rb_md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id));
144
+ memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest));
145
+ }
99
146
 
100
147
  /**
101
- * Convert the binary string to a ruby utf8 string.
102
- *
103
- * @example Convert the string to binary.
104
- * rb_bson_from_bson_string("test");
105
- *
106
- * @param [ String ] string The ruby string.
107
- *
108
- * @return [ String ] The encoded string.
109
- *
110
- * @since 2.0.0
148
+ * Allocates a bson byte buffer that wraps a byte_buffer_t.
111
149
  */
112
- static VALUE rb_bson_from_bson_string(VALUE string)
150
+ VALUE rb_bson_byte_buffer_allocate(VALUE klass)
113
151
  {
114
- return rb_enc_associate(string, rb_utf8_encoding());
152
+ byte_buffer_t *b;
153
+ VALUE obj = TypedData_Make_Struct(klass, byte_buffer_t, &rb_byte_buffer_data_type, b);
154
+ b->b_ptr = b->buffer;
155
+ b->size = BSON_BYTE_BUFFER_SIZE;
156
+ return obj;
115
157
  }
116
158
 
117
159
  /**
118
- * Provide default new string with binary encoding.
119
- *
120
- * @example Check encoded and provide default new binary encoded string.
121
- * if (NIL_P(encoded)) encoded = rb_str_new_encoded_binary();
122
- *
123
- * @return [ String ] The new string with binary encoding.
124
- *
125
- * @since 2.0.0
160
+ * Initialize a byte buffer.
126
161
  */
127
- static VALUE rb_str_new_encoded_binary(void)
162
+ VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self)
128
163
  {
129
- return rb_enc_str_new("", 0, rb_ascii8bit_encoding());
164
+ VALUE bytes;
165
+ rb_scan_args(argc, argv, "01", &bytes);
166
+
167
+ if (!NIL_P(bytes)) {
168
+ rb_bson_byte_buffer_put_bytes(self, bytes);
169
+ }
170
+
171
+ return self;
130
172
  }
131
173
 
132
174
  /**
133
- * Constant for a null byte.
134
- *
135
- * @since 2.0.0
175
+ * Get the length of the buffer.
136
176
  */
137
- static const char rb_bson_null_byte = 0;
177
+ VALUE rb_bson_byte_buffer_length(VALUE self)
178
+ {
179
+ byte_buffer_t *b;
180
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
181
+ return UINT2NUM(READ_SIZE(b));
182
+ }
138
183
 
139
184
  /**
140
- * Constant for a true byte.
141
- *
142
- * @since 2.0.0
185
+ * Get a single byte from the buffer.
143
186
  */
144
- static const char rb_bson_true_byte = 1;
187
+ VALUE rb_bson_byte_buffer_get_byte(VALUE self)
188
+ {
189
+ byte_buffer_t *b;
190
+ VALUE byte;
191
+
192
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
193
+ ENSURE_BSON_READ(b, 1);
194
+ byte = rb_str_new(READ_PTR(b), 1);
195
+ b->read_position += 1;
196
+ return byte;
197
+ }
145
198
 
146
199
  /**
147
- * Holds the machine id hash for object id generation.
148
- *
149
- * @since 3.2.0
150
- *
200
+ * Get bytes from the buffer.
151
201
  */
152
- static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX];
202
+ VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
203
+ {
204
+ byte_buffer_t *b;
205
+ VALUE bytes;
206
+ const long length = FIX2LONG(i);
207
+
208
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
209
+ ENSURE_BSON_READ(b, length);
210
+ bytes = rb_str_new(READ_PTR(b), length);
211
+ b->read_position += length;
212
+ return bytes;
213
+ }
153
214
 
154
215
  /**
155
- * The counter for incrementing object ids.
156
- *
157
- * @since 2.0.0
216
+ * Get a cstring from the buffer.
158
217
  */
159
- static unsigned int rb_bson_object_id_counter = 0;
218
+ VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
219
+ {
220
+ byte_buffer_t *b;
221
+ VALUE string;
222
+ int length;
223
+
224
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
225
+ length = (int)strlen(READ_PTR(b));
226
+ ENSURE_BSON_READ(b, length);
227
+ string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
228
+ b->read_position += length + 1;
229
+ return string;
230
+ }
160
231
 
161
232
  /**
162
- * Take the provided params and return the encoded bytes or a default one.
163
- *
164
- * @example Get the default encoded bytes.
165
- * rb_get_default_encoded(1, bytes);
166
- *
167
- * @param [ int ] argc The number of arguments.
168
- * @param [ Object ] argv The arguments.
169
- *
170
- * @return [ String ] The encoded string.
171
- *
172
- * @since 2.0.0
233
+ * Get a double from the buffer.
173
234
  */
174
- static VALUE rb_get_default_encoded(int argc, VALUE *argv)
235
+ VALUE rb_bson_byte_buffer_get_double(VALUE self)
175
236
  {
176
- VALUE encoded;
177
- rb_scan_args(argc, argv, "01", &encoded);
178
- if (NIL_P(encoded)) encoded = rb_str_new_encoded_binary();
179
- return encoded;
237
+ byte_buffer_t *b;
238
+ union { uint64_t i64; double d; } ucast;
239
+
240
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
241
+ ENSURE_BSON_READ(b, 8);
242
+ ucast.i64 = le64toh(*(uint64_t*)READ_PTR(b));
243
+ b->read_position += 8;
244
+ return DBL2NUM(ucast.d);
180
245
  }
181
246
 
182
247
  /**
183
- * Append the ruby float as 8-byte double value to buffer.
184
- *
185
- * @example Convert float to double and append.
186
- * rb_float_to_bson(..., 1.2311);
187
- *
188
- * @param [ String] encoded Optional string buffer, default provided by rb_str_encoded_binary
189
- * @param [ Float ] self The ruby float value.
190
- *
191
- * @return [ String ] The encoded bytes with double value appended.
192
- *
193
- * @since 2.0.0
248
+ * Get a int32 from the buffer.
194
249
  */
195
- static VALUE rb_float_to_bson(int argc, VALUE *argv, VALUE self)
250
+ VALUE rb_bson_byte_buffer_get_int32(VALUE self)
196
251
  {
197
- const double v = NUM2DBL(self);
198
- VALUE encoded = rb_get_default_encoded(argc, argv);
199
- # if __BYTE_ORDER == __LITTLE_ENDIAN
200
- rb_str_cat(encoded, (char*) &v, 8);
201
- #elif __BYTE_ORDER == __BIG_ENDIAN
202
- doublebytet swap;
203
- unsigned char b;
204
- swap.d = v;
205
- for (int i=0; i < sizeof(double)/2; i++) {
206
- b=swap.b[i];
207
- swap.b[i] = swap.b[((sizeof(double)-1)-i)];
208
- swap.b[((sizeof(double)-1)-i)]=b;
209
- }
210
- rb_str_cat(encoded, (char*)&swap.d, 8);
211
- #endif
212
- return encoded;
252
+ byte_buffer_t *b;
253
+ int32_t i32;
254
+
255
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
256
+ ENSURE_BSON_READ(b, 4);
257
+ i32 = le32toh(*((int32_t*)READ_PTR(b)));
258
+ b->read_position += 4;
259
+ return INT2NUM(i32);
213
260
  }
214
261
 
215
262
  /**
216
- * Convert the bytes for the double into a Ruby float.
217
- *
218
- * @example Convert the bytes to a float.
219
- * rb_float_from_bson_double(class, bytes);
220
- *
221
- * @param [ Class ] The float class.
222
- * @param [ String ] The double bytes.
223
- *
224
- * @return [ Float ] The ruby float value.
225
- *
226
- * @since 2.0.0
263
+ * Get a int64 from the buffer.
227
264
  */
228
- static VALUE rb_float_from_bson_double(VALUE self, VALUE value)
265
+ VALUE rb_bson_byte_buffer_get_int64(VALUE self)
229
266
  {
230
- const char * bytes;
231
- double v;
232
- bytes = StringValuePtr(value);
233
- #if __BYTE_ORDER == __LITTLE_ENDIAN
234
- memcpy(&v, bytes, RSTRING_LEN(value));
235
- #else
236
- doublebytet swap;
237
- unsigned char b;
238
- memcpy(&swap.d, bytes, RSTRING_LEN(value));
239
- for (int i=0; i < sizeof(double)/2; i++) {
240
- b=swap.b[i];
241
- swap.b[i] = swap.b[((sizeof(double)-1)-i)];
242
- swap.b[((sizeof(double)-1)-i)]=b;
243
- }
244
- memcpy(&v, swap.b, RSTRING_LEN(value));
245
- #endif
246
-
247
- return DBL2NUM(v);
267
+ byte_buffer_t *b;
268
+ int64_t i64;
269
+
270
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
271
+ ENSURE_BSON_READ(b, 8);
272
+ i64 = le64toh(*((int64_t*)READ_PTR(b)));
273
+ b->read_position += 8;
274
+ return LONG2NUM(i64);
248
275
  }
249
276
 
250
277
  /**
251
- * Generate the data for the next object id.
252
- *
253
- * @example Generate the data for the next object id.
254
- * rb_object_id_generator_next(0, NULL, object_id);
255
- *
256
- * @param [ int ] argc The argument count.
257
- * @param [ Time ] time The optional Ruby time.
258
- * @param [ BSON::ObjectId ] self The object id.
259
- *
260
- * @return [ String ] The raw bytes for the id.
261
- *
262
- * @since 2.0.0
278
+ * Get a string from the buffer.
263
279
  */
264
- static VALUE rb_object_id_generator_next(int argc, VALUE* args, VALUE self)
280
+ VALUE rb_bson_byte_buffer_get_string(VALUE self)
265
281
  {
266
- char bytes[12];
267
- unsigned long t;
268
- unsigned short pid = htons(getpid());
269
-
270
- if (argc == 0 || (argc == 1 && *args == Qnil)) {
271
- t = htonl((int) time(NULL));
272
- }
273
- else {
274
- t = htonl(NUM2UINT(rb_funcall(*args, rb_intern("to_i"), 0)));
275
- }
276
-
277
- unsigned long c;
278
- c = htonl(rb_bson_object_id_counter << 8);
279
-
280
- # if __BYTE_ORDER == __LITTLE_ENDIAN
281
- memcpy(&bytes, &t, 4);
282
- memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
283
- memcpy(&bytes[7], &pid, 2);
284
- memcpy(&bytes[9], (unsigned char*) &c, 3);
285
- #elif __BYTE_ORDER == __BIG_ENDIAN
286
- memcpy(&bytes, ((unsigned char*) &t) + 4, 4);
287
- memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
288
- memcpy(&bytes[7], &pid, 2);
289
- memcpy(&bytes[9], ((unsigned char*) &c) + 4, 3);
290
- #endif
291
- rb_bson_object_id_counter++;
292
- return rb_str_new(bytes, 12);
282
+ byte_buffer_t *b;
283
+ int32_t length;
284
+ VALUE string;
285
+
286
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
287
+ ENSURE_BSON_READ(b, 4);
288
+ length = le32toh(*((int32_t*)READ_PTR(b)));
289
+ b->read_position += 4;
290
+ ENSURE_BSON_READ(b, length);
291
+ string = rb_enc_str_new(READ_PTR(b), length - 1, rb_utf8_encoding());
292
+ b->read_position += length;
293
+ return string;
293
294
  }
294
295
 
295
296
  /**
296
- * Check if the integer is a 32 bit integer.
297
- *
298
- * @example Check if the integer is 32 bit.
299
- * rb_integer_is_bson_int32(integer);
300
- *
301
- * @param [ Integer ] self The ruby integer.
302
- *
303
- * @return [ true, false ] If the integer is 32 bit.
304
- *
305
- * @since 2.0.0
297
+ * Writes a byte to the byte buffer.
306
298
  */
307
- static VALUE rb_integer_is_bson_int32(VALUE self)
299
+ VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte)
308
300
  {
309
- const int64_t v = NUM2INT64(self);
310
- if (INT_MIN <= v && v <= INT_MAX) {
311
- return Qtrue;
312
- }
313
- else {
314
- return Qfalse;
315
- }
301
+ byte_buffer_t *b;
302
+ const char *str = RSTRING_PTR(byte);
303
+
304
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
305
+ ENSURE_BSON_WRITE(b, 1);
306
+ memcpy(WRITE_PTR(b), str, 1);
307
+ b->write_position += 1;
308
+
309
+ return self;
316
310
  }
317
311
 
318
312
  /**
319
- * Convert the Ruby integer into a BSON as per the 32 bit specification,
320
- * which is 4 bytes.
321
- *
322
- * @example Convert the integer to 32bit BSON.
323
- * rb_integer_to_bson_int32(128, encoded);
324
- *
325
- * @param [ Integer ] self The Ruby integer.
326
- * @param [ String ] encoded The Ruby binary string to append to.
327
- *
328
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
329
- *
330
- * @since 2.0.0
313
+ * Writes bytes to the byte buffer.
331
314
  */
332
- static VALUE rb_integer_to_bson_int32(VALUE self, VALUE encoded)
315
+ VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes)
333
316
  {
334
- const int32_t v = NUM2INT(self);
335
- const char bytes[4] = {
336
- v & 255,
337
- (v >> 8) & 255,
338
- (v >> 16) & 255,
339
- (v >> 24) & 255
340
- };
341
- return rb_str_cat(encoded, bytes, 4);
317
+ byte_buffer_t *b;
318
+ const char *str = RSTRING_PTR(bytes);
319
+ const size_t length = RSTRING_LEN(bytes);
320
+
321
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
322
+ ENSURE_BSON_WRITE(b, length);
323
+ memcpy(WRITE_PTR(b), str, length);
324
+ b->write_position += length;
325
+ return self;
342
326
  }
343
327
 
344
328
  /**
345
- * Initialize the bson array index for integers.
346
- *
347
- * @example Initialize the array.
348
- * rb_bson_init_integer_bson_array_indexes();
349
- *
350
- * @since 2.0.0
329
+ * Writes a cstring to the byte buffer.
351
330
  */
352
- static void rb_bson_init_integer_bson_array_indexes(void)
331
+ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string)
353
332
  {
354
- int i;
355
- for (i = 0; i < BSON_INDEX_SIZE; i++) {
356
- snprintf(rb_bson_array_indexes[i], BSON_INDEX_CHAR_SIZE, "%d", i);
333
+ byte_buffer_t *b;
334
+ char *c_str = RSTRING_PTR(string);
335
+ size_t length = RSTRING_LEN(string) + 1;
336
+
337
+ if (!rb_bson_utf8_validate(c_str, length - 1, false)) {
338
+ rb_raise(rb_eArgError, "String %s is not a valid UTF-8 CString.", c_str);
357
339
  }
340
+
341
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
342
+ ENSURE_BSON_WRITE(b, length);
343
+ memcpy(WRITE_PTR(b), c_str, length);
344
+ b->write_position += length;
345
+ return self;
358
346
  }
359
347
 
360
348
  /**
361
- * Convert the Ruby integer into a character string and append with nullchar to encoded BSON.
362
- *
363
- * @example Convert the integer to string and append with nullchar.
364
- * rb_integer_to_bson_key(128, encoded);
365
- *
366
- * @param [ Integer ] self The Ruby integer.
367
- * @param [ String ] encoded The Ruby binary string to append to.
368
- *
369
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
370
- *
371
- * @since 2.0.0
349
+ * Writes a 64 bit double to the buffer.
372
350
  */
373
- static VALUE rb_integer_to_bson_key(int argc, VALUE *argv, VALUE self)
351
+ VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f)
374
352
  {
375
- char bytes[INTEGER_CHAR_SIZE];
376
- const int64_t v = NUM2INT64(self);
377
- VALUE encoded = rb_get_default_encoded(argc, argv);
378
- int length;
379
- if (v < BSON_INDEX_SIZE)
380
- return rb_str_cat(encoded, rb_bson_array_indexes[v], strlen(rb_bson_array_indexes[v]) + 1);
381
- length = snprintf(bytes, INTEGER_CHAR_SIZE, "%ld", (long)v);
382
- return rb_str_cat(encoded, bytes, length + 1);
353
+ byte_buffer_t *b;
354
+ union {double d; uint64_t i64;} ucast;
355
+
356
+ ucast.d = NUM2DBL(f);
357
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
358
+ ENSURE_BSON_WRITE(b, 8);
359
+ ucast.i64 = htole64(ucast.i64);
360
+ *(int64_t*)WRITE_PTR(b) = ucast.i64;
361
+ b->write_position += 8;
362
+
363
+ return self;
383
364
  }
384
365
 
385
366
  /**
386
- * Convert the provided raw bytes into a 32bit Ruby integer.
387
- *
388
- * @example Convert the bytes to an Integer.
389
- * rb_integer_from_bson_int32(Int32, bytes);
390
- *
391
- * @param [ BSON::Int32 ] self The Int32 eigenclass.
392
- * @param [ String ] bytes The raw bytes.
393
- *
394
- * @return [ Integer ] The Ruby integer.
395
- *
396
- * @since 2.0.0
367
+ * Writes a 32 bit integer to the byte buffer.
397
368
  */
398
- static VALUE rb_integer_from_bson_int32(VALUE self, VALUE bson)
369
+ VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i)
399
370
  {
400
- const uint8_t *v = (const uint8_t*) StringValuePtr(bson);
401
- const int32_t integer = v[0] + (v[1] << 8) + (v[2] << 16) + (v[3] << 24);
402
- return INT2NUM(integer);
371
+ byte_buffer_t *b;
372
+ const int32_t i32 = NUM2INT(i);
373
+
374
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
375
+ ENSURE_BSON_WRITE(b, 4);
376
+ *((int32_t*)WRITE_PTR(b)) = htole32(i32);
377
+ b->write_position += 4;
378
+
379
+ return self;
403
380
  }
404
381
 
405
382
  /**
406
- * Convert the raw BSON bytes into an int64_t type.
407
- *
408
- * @example Convert the bytes into an int64_t.
409
- * rb_bson_to_int64_t(bson);
410
- *
411
- * @param [ String ] bson The raw bytes.
412
- *
413
- * @return [ int64_t ] The int64_t.
414
- *
415
- * @since 2.0.0
383
+ * Writes a 64 bit integer to the byte buffer.
416
384
  */
417
- static int64_t rb_bson_to_int64_t(VALUE bson)
385
+ VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i)
418
386
  {
419
- uint8_t *v;
420
- uint32_t byte_0, byte_1;
421
- int64_t byte_2, byte_3;
422
- int64_t lower, upper;
423
- v = (uint8_t*) StringValuePtr(bson);
424
- byte_0 = v[0];
425
- byte_1 = v[1];
426
- byte_2 = v[2];
427
- byte_3 = v[3];
428
- lower = byte_0 + (byte_1 << 8) + (byte_2 << 16) + (byte_3 << 24);
429
- byte_0 = v[4];
430
- byte_1 = v[5];
431
- byte_2 = v[6];
432
- byte_3 = v[7];
433
- upper = byte_0 + (byte_1 << 8) + (byte_2 << 16) + (byte_3 << 24);
434
- return lower + (upper << 32);
387
+ byte_buffer_t *b;
388
+ const int64_t i64 = NUM2LONG(i);
389
+
390
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
391
+ ENSURE_BSON_WRITE(b, 8);
392
+ *((int64_t*)WRITE_PTR(b)) = htole64(i64);
393
+ b->write_position += 8;
394
+
395
+ return self;
435
396
  }
436
397
 
437
398
  /**
438
- * Convert the provided raw bytes into a 64bit Ruby integer.
439
- *
440
- * @example Convert the bytes to an Integer.
441
- * rb_integer_from_bson_int64(Int64, bytes);
442
- *
443
- * @param [ BSON::Int64 ] self The Int64 eigenclass.
444
- * @param [ String ] bytes The raw bytes.
445
- *
446
- * @return [ Integer ] The Ruby integer.
447
- *
448
- * @since 2.0.0
399
+ * Writes a string to the byte buffer.
449
400
  */
450
- static VALUE rb_integer_from_bson_int64(VALUE self, VALUE bson)
401
+ VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string)
451
402
  {
452
- return INT642NUM(rb_bson_to_int64_t(bson));
403
+ byte_buffer_t *b;
404
+
405
+ char *str = RSTRING_PTR(string);
406
+ const size_t length = RSTRING_LEN(string) + 1;
407
+
408
+ if (!rb_bson_utf8_validate(str, length - 1, true)) {
409
+ rb_raise(rb_eArgError, "String %s is not valid UTF-8.", str);
410
+ }
411
+
412
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
413
+ ENSURE_BSON_WRITE(b, length + 4);
414
+ *((int32_t*)WRITE_PTR(b)) = htole32(length);
415
+ b->write_position += 4;
416
+ memcpy(WRITE_PTR(b), str, length);
417
+ b->write_position += length;
418
+
419
+ return self;
453
420
  }
454
421
 
455
422
  /**
456
- * Append the 64-bit integer to encoded BSON Ruby binary string.
457
- *
458
- * @example Append the 64-bit integer to encoded BSON.
459
- * int64_t_to_bson(128, encoded);
460
- *
461
- * @param [ int64_t ] self The 64-bit integer.
462
- * @param [ String ] encoded The BSON Ruby binary string to append to.
463
- *
464
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
465
- *
466
- * @since 2.0.0
423
+ * Get the read position.
467
424
  */
468
- static VALUE int64_t_to_bson(int64_t v, VALUE encoded)
425
+ VALUE rb_bson_byte_buffer_read_position(VALUE self)
469
426
  {
470
- const char bytes[8] = {
471
- v & 255,
472
- (v >> 8) & 255,
473
- (v >> 16) & 255,
474
- (v >> 24) & 255,
475
- (v >> 32) & 255,
476
- (v >> 40) & 255,
477
- (v >> 48) & 255,
478
- (v >> 56) & 255
479
- };
480
- return rb_str_cat(encoded, bytes, 8);
427
+ byte_buffer_t *b;
428
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
429
+ return INT2NUM(b->read_position);
481
430
  }
482
431
 
483
432
  /**
484
- * Convert the Ruby integer into a BSON as per the 64 bit specification,
485
- * which is 8 bytes.
486
- *
487
- * @example Convert the integer to 64bit BSON.
488
- * rb_integer_to_bson_int64(128, encoded);
489
- *
490
- * @param [ Integer ] self The Ruby integer.
491
- * @param [ String ] encoded The Ruby binary string to append to.
492
- *
493
- * @return [ String ] encoded Ruby binary string with BSON raw bytes appended.
494
- *
495
- * @since 2.0.0
433
+ * Replace a 32 bit integer int the byte buffer.
496
434
  */
497
- static VALUE rb_integer_to_bson_int64(VALUE self, VALUE encoded)
435
+ VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i)
498
436
  {
499
- return int64_t_to_bson(NUM2INT64(self), StringValue(encoded));
437
+ byte_buffer_t *b;
438
+ const int32_t position = NUM2INT(index);
439
+ const int32_t i32 = htole32(NUM2INT(i));
440
+
441
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
442
+
443
+ memcpy(READ_PTR(b) + position, &i32, 4);
444
+
445
+ return self;
500
446
  }
501
447
 
502
448
  /**
503
- * Converts the milliseconds time to the raw BSON bytes. We need to
504
- * explicitly convert using 64 bit here.
505
- *
506
- * @example Convert the milliseconds value to BSON bytes.
507
- * rb_time_to_bson(time, 2124132340000, encoded);
508
- *
509
- * @param [ Time ] self The Ruby Time object.
510
- * @param [ Integer ] milliseconds The milliseconds pre/post epoch.
511
- * @param [ String ] encoded The Ruby binary string to append to.
512
- *
513
- * @return [ String ] encoded Ruby binary string with time BSON raw bytes appended.
514
- *
515
- * @since 2.0.0
449
+ * Get the write position.
516
450
  */
517
- static VALUE rb_time_to_bson(int argc, VALUE *argv, VALUE self)
451
+ VALUE rb_bson_byte_buffer_write_position(VALUE self)
518
452
  {
519
- int64_t t = NUM2INT64(rb_funcall(self, rb_intern("to_i"), 0));
520
- int64_t milliseconds = (int64_t)(t * 1000);
521
- int32_t micro = NUM2INT(rb_funcall(self, rb_intern("usec"), 0));
522
- int64_t time = milliseconds + (micro / 1000);
523
- VALUE encoded = rb_get_default_encoded(argc, argv);
524
- return int64_t_to_bson(time, encoded);
453
+ byte_buffer_t *b;
454
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
455
+ return INT2NUM(b->write_position);
525
456
  }
526
457
 
527
458
  /**
528
- * Converts the raw BSON bytes into a UTC Ruby time.
529
- *
530
- * @example Convert the bytes to a Ruby time.
531
- * rb_time_from_bson(time, bytes);
532
- *
533
- * @param [ Class ] self The Ruby Time class.
534
- * @param [ String ] bytes The raw BSON bytes.
535
- *
536
- * @return [ Time ] The UTC time.
537
- *
538
- * @since 2.0.0
459
+ * Convert the buffer to a string.
539
460
  */
540
- static VALUE rb_time_from_bson(VALUE self, VALUE bytes)
461
+ VALUE rb_bson_byte_buffer_to_s(VALUE self)
541
462
  {
542
- const int64_t millis = rb_bson_to_int64_t(bytes);
543
- const VALUE time = rb_time_new(millis / 1000, (millis % 1000) * 1000);
544
- return rb_funcall(time, rb_utc_method, 0);
463
+ byte_buffer_t *b;
464
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
465
+ return rb_str_new(READ_PTR(b), READ_SIZE(b));
545
466
  }
546
467
 
547
468
  /**
548
- * Set four bytes for int32 in a binary string and return it.
549
- *
550
- * @example Set int32 in a BSON string.
551
- * rb_string_set_int32(self, pos, int32)
552
- *
553
- * @param [ String ] self The Ruby binary string.
554
- * @param [ Fixnum ] The position to set.
555
- * @param [ Fixnum ] The int32 value.
556
- *
557
- * @return [ String ] The binary string.
558
- *
559
- * @since 2.0.0
469
+ * Get the size of the byte_buffer_t in memory.
560
470
  */
561
- static VALUE rb_string_set_int32(VALUE str, VALUE pos, VALUE an_int32)
471
+ size_t rb_bson_byte_buffer_memsize(const void *ptr)
562
472
  {
563
- const int32_t offset = NUM2INT(pos);
564
- const int32_t v = NUM2INT(an_int32);
565
- const char bytes[4] = {
566
- v & 255,
567
- (v >> 8) & 255,
568
- (v >> 16) & 255,
569
- (v >> 24) & 255
570
- };
571
- rb_str_modify(str);
572
- if (offset < 0 || offset + 4 > RSTRING_LEN(str)) {
573
- rb_raise(rb_eArgError, "invalid position");
574
- }
575
- memcpy(RSTRING_PTR(str) + offset, bytes, 4);
576
- return str;
473
+ return ptr ? sizeof(byte_buffer_t) : 0;
577
474
  }
578
475
 
579
476
  /**
580
- * Check for illegal characters in string.
581
- *
582
- * @example Check for illegal characters.
583
- * rb_string_check_for_illegal_characters("test");
584
- *
585
- * @param [ String ] self The string value.
586
- *
587
- * @since 2.0.0
477
+ * Free the memory for the byte buffer.
588
478
  */
589
- static VALUE rb_string_check_for_illegal_characters(VALUE self)
479
+ void rb_bson_byte_buffer_free(void *ptr)
590
480
  {
591
- if (strlen(RSTRING_PTR(self)) != (size_t) RSTRING_LEN(self))
592
- rb_raise(rb_eArgError, "Illegal C-String contains a null byte.");
593
- return self;
481
+ byte_buffer_t *b = ptr;
482
+ if (b->b_ptr != b->buffer) {
483
+ xfree(b->b_ptr);
484
+ }
485
+ xfree(b);
594
486
  }
595
487
 
596
488
  /**
597
- * Encode a false value to bson.
598
- *
599
- * @example Encode the false value.
600
- * rb_false_class_to_bson(0, false);
601
- *
602
- * @param [ int ] argc The number or arguments.
603
- * @param [ Array<Object> ] argv The arguments.
604
- * @param [ TrueClass ] self The true value.
605
- *
606
- * @return [ String ] The encoded string.
607
- *
608
- * @since 2.0.0
489
+ * Expand the byte buffer linearly.
609
490
  */
610
- static VALUE rb_false_class_to_bson(int argc, VALUE *argv, VALUE self)
491
+ void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length)
611
492
  {
612
- VALUE encoded = rb_get_default_encoded(argc, argv);
613
- rb_str_cat(encoded, &rb_bson_null_byte, 1);
614
- return encoded;
493
+ const size_t required_size = buffer_ptr->write_position - buffer_ptr->read_position + length;
494
+ if (required_size <= buffer_ptr->size) {
495
+ memmove(buffer_ptr->b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
496
+ buffer_ptr->write_position -= buffer_ptr->read_position;
497
+ buffer_ptr->read_position = 0;
498
+ } else {
499
+ char *new_b_ptr;
500
+ const size_t new_size = required_size * 2;
501
+ new_b_ptr = ALLOC_N(char, new_size);
502
+ memcpy(new_b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr));
503
+ if (buffer_ptr->b_ptr != buffer_ptr->buffer) {
504
+ xfree(buffer_ptr->b_ptr);
505
+ }
506
+ buffer_ptr->b_ptr = new_b_ptr;
507
+ buffer_ptr->size = new_size;
508
+ buffer_ptr->write_position -= buffer_ptr->read_position;
509
+ buffer_ptr->read_position = 0;
510
+ }
615
511
  }
616
512
 
617
513
  /**
618
- * Encode a true value to bson.
619
- *
620
- * @example Encode the true value.
621
- * rb_true_class_to_bson(0, true);
622
- *
623
- * @param [ int ] argc The number or arguments.
624
- * @param [ Array<Object> ] argv The arguments.
625
- * @param [ TrueClass ] self The true value.
626
- *
627
- * @return [ String ] The encoded string.
628
- *
629
- * @since 2.0.0
514
+ * Generate the next object id.
630
515
  */
631
- static VALUE rb_true_class_to_bson(int argc, VALUE *argv, VALUE self)
516
+ VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self)
632
517
  {
633
- VALUE encoded = rb_get_default_encoded(argc, argv);
634
- rb_str_cat(encoded, &rb_bson_true_byte, 1);
635
- return encoded;
518
+ char bytes[12];
519
+ unsigned long t;
520
+ unsigned long c;
521
+ unsigned short pid = htons(getpid());
522
+
523
+ if (argc == 0 || (argc == 1 && *args == Qnil)) {
524
+ t = htonl((int) time(NULL));
525
+ }
526
+ else {
527
+ t = htonl(NUM2UINT(rb_funcall(*args, rb_intern("to_i"), 0)));
528
+ }
529
+
530
+ c = htonl(rb_bson_object_id_counter << 8);
531
+
532
+ # if __BYTE_ORDER == __LITTLE_ENDIAN
533
+ memcpy(&bytes, &t, 4);
534
+ memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
535
+ memcpy(&bytes[7], &pid, 2);
536
+ memcpy(&bytes[9], (unsigned char*) &c, 3);
537
+ #elif __BYTE_ORDER == __BIG_ENDIAN
538
+ memcpy(&bytes, ((unsigned char*) &t) + 4, 4);
539
+ memcpy(&bytes[4], rb_bson_machine_id_hash, 3);
540
+ memcpy(&bytes[7], &pid, 2);
541
+ memcpy(&bytes[9], ((unsigned char*) &c) + 4, 3);
542
+ #endif
543
+ rb_bson_object_id_counter++;
544
+ return rb_str_new(bytes, 12);
636
545
  }
637
546
 
638
547
  /**
639
- * Decode a string from bson.
640
- *
641
- * @example Decode a string.
642
- * rb_bson_string_from_bson(string, io);
643
- *
644
- * @param [ String ] self The string class.
645
- * @param [ IO ] bson The io stream of BSON.
646
- *
647
- * @return [ String ] The decoded string.
648
- *
649
- * @since 3.2.5
548
+ * Taken from libbson.
650
549
  */
651
- static VALUE rb_bson_string_from_bson(VALUE self, VALUE bson)
550
+ static void _bson_utf8_get_sequence(const char *utf8, uint8_t *seq_length, uint8_t *first_mask)
652
551
  {
653
- ID read_method = rb_intern("read");
654
- VALUE int_bytes = rb_funcall(bson, read_method, 1, 4);
655
- VALUE size = rb_integer_from_bson_int32(self, int_bytes);
656
- VALUE string_bytes = rb_funcall(bson, read_method, 1, size - 1);
657
- return rb_bson_from_bson_string(string_bytes);
552
+ unsigned char c = *(const unsigned char *)utf8;
553
+ uint8_t m;
554
+ uint8_t n;
555
+
556
+ /*
557
+ * See the following[1] for a description of what the given multi-byte
558
+ * sequences will be based on the bits set of the first byte. We also need
559
+ * to mask the first byte based on that. All subsequent bytes are masked
560
+ * against 0x3F.
561
+ *
562
+ * [1] http://www.joelonsoftware.com/articles/Unicode.html
563
+ */
564
+
565
+ if ((c & 0x80) == 0) {
566
+ n = 1;
567
+ m = 0x7F;
568
+ } else if ((c & 0xE0) == 0xC0) {
569
+ n = 2;
570
+ m = 0x1F;
571
+ } else if ((c & 0xF0) == 0xE0) {
572
+ n = 3;
573
+ m = 0x0F;
574
+ } else if ((c & 0xF8) == 0xF0) {
575
+ n = 4;
576
+ m = 0x07;
577
+ } else if ((c & 0xFC) == 0xF8) {
578
+ n = 5;
579
+ m = 0x03;
580
+ } else if ((c & 0xFE) == 0xFC) {
581
+ n = 6;
582
+ m = 0x01;
583
+ } else {
584
+ n = 0;
585
+ m = 0;
586
+ }
587
+
588
+ *seq_length = n;
589
+ *first_mask = m;
658
590
  }
659
591
 
660
592
  /**
661
- * Initialize the bson c extension.
662
- *
663
- * @since 2.0.0
593
+ * Taken from libbson.
664
594
  */
665
- void Init_native()
595
+ bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null)
666
596
  {
667
- // Get all the constants to be used in the extensions.
668
- VALUE bson = rb_const_get(rb_cObject, rb_intern("BSON"));
669
- VALUE integer = rb_const_get(bson, rb_intern("Integer"));
670
- VALUE floats = rb_const_get(bson, rb_intern("Float"));
671
- VALUE float_class = rb_const_get(floats, rb_intern("ClassMethods"));
672
- VALUE time = rb_const_get(bson, rb_intern("Time"));
673
- VALUE time_class = rb_singleton_class(time);
674
- VALUE int32 = rb_const_get(bson, rb_intern("Int32"));
675
- VALUE int32_class = rb_singleton_class(int32);
676
- VALUE int64 = rb_const_get(bson, rb_intern("Int64"));
677
- VALUE int64_class = rb_singleton_class(int64);
678
- VALUE object_id = rb_const_get(bson, rb_intern("ObjectId"));
679
- VALUE generator = rb_const_get(object_id, rb_intern("Generator"));
680
- VALUE string = rb_const_get(bson, rb_intern("String"));
681
- VALUE string_class = rb_singleton_class(string);
682
- VALUE true_class = rb_const_get(bson, rb_intern("TrueClass"));
683
- VALUE false_class = rb_const_get(bson, rb_intern("FalseClass"));
684
- // needed to hash the machine id
685
- rb_require("digest/md5");
686
- VALUE digest_class = rb_const_get(rb_cObject, rb_intern("Digest"));
687
- VALUE md5_class = rb_const_get(digest_class, rb_intern("MD5"));
688
- rb_bson_utf8_string = rb_const_get(bson, rb_intern("UTF8"));
689
- rb_utc_method = rb_intern("utc");
597
+ uint32_t c;
598
+ uint8_t first_mask;
599
+ uint8_t seq_length;
600
+ unsigned i;
601
+ unsigned j;
602
+
603
+ if (!utf8) {
604
+ return false;
605
+ }
690
606
 
691
- // Get the object id machine id and hash it.
692
- char rb_bson_machine_id[256];
693
- gethostname(rb_bson_machine_id, sizeof rb_bson_machine_id);
694
- rb_bson_machine_id[255] = '\0';
695
- VALUE digest = rb_funcall(md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id));
696
- memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest));
607
+ for (i = 0; i < utf8_len; i += seq_length) {
608
+ _bson_utf8_get_sequence(&utf8[i], &seq_length, &first_mask);
609
+
610
+ /*
611
+ * Ensure we have a valid multi-byte sequence length.
612
+ */
613
+ if (!seq_length) {
614
+ return false;
615
+ }
616
+
617
+ /*
618
+ * Ensure we have enough bytes left.
619
+ */
620
+ if ((utf8_len - i) < seq_length) {
621
+ return false;
622
+ }
623
+
624
+ /*
625
+ * Also calculate the next char as a unichar so we can
626
+ * check code ranges for non-shortest form.
627
+ */
628
+ c = utf8 [i] & first_mask;
629
+
630
+ /*
631
+ * Check the high-bits for each additional sequence byte.
632
+ */
633
+ for (j = i + 1; j < (i + seq_length); j++) {
634
+ c = (c << 6) | (utf8 [j] & 0x3F);
635
+ if ((utf8[j] & 0xC0) != 0x80) {
636
+ return false;
637
+ }
638
+ }
639
+
640
+ /*
641
+ * Check for NULL bytes afterwards.
642
+ *
643
+ * Hint: if you want to optimize this function, starting here to do
644
+ * this in the same pass as the data above would probably be a good
645
+ * idea. You would add a branch into the inner loop, but save possibly
646
+ * on cache-line bouncing on larger strings. Just a thought.
647
+ */
648
+ if (!allow_null) {
649
+ for (j = 0; j < seq_length; j++) {
650
+ if (((i + j) > utf8_len) || !utf8[i + j]) {
651
+ return false;
652
+ }
653
+ }
654
+ }
655
+
656
+ /*
657
+ * Code point wont fit in utf-16, not allowed.
658
+ */
659
+ if (c > 0x0010FFFF) {
660
+ return false;
661
+ }
662
+
663
+ /*
664
+ * Byte is in reserved range for UTF-16 high-marks
665
+ * for surrogate pairs.
666
+ */
667
+ if ((c & 0xFFFFF800) == 0xD800) {
668
+ return false;
669
+ }
670
+
671
+ /*
672
+ * Check non-shortest form unicode.
673
+ */
674
+ switch (seq_length) {
675
+ case 1:
676
+ if (c <= 0x007F) {
677
+ continue;
678
+ }
679
+ return false;
680
+
681
+ case 2:
682
+ if ((c >= 0x0080) && (c <= 0x07FF)) {
683
+ continue;
684
+ } else if (c == 0) {
685
+ /* Two-byte representation for NULL. */
686
+ continue;
687
+ }
688
+ return false;
689
+
690
+ case 3:
691
+ if (((c >= 0x0800) && (c <= 0x0FFF)) ||
692
+ ((c >= 0x1000) && (c <= 0xFFFF))) {
693
+ continue;
694
+ }
695
+ return false;
696
+
697
+ case 4:
698
+ if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
699
+ ((c >= 0x40000) && (c <= 0xFFFFF)) ||
700
+ ((c >= 0x100000) && (c <= 0x10FFFF))) {
701
+ continue;
702
+ }
703
+ return false;
704
+
705
+ default:
706
+ return false;
707
+ }
708
+ }
697
709
 
698
- // Integer optimizations.
699
- rb_undef_method(integer, "to_bson_int32");
700
- rb_define_method(integer, "to_bson_int32", rb_integer_to_bson_int32, 1);
701
- rb_undef_method(integer, "to_bson_int64");
702
- rb_define_method(integer, "to_bson_int64", rb_integer_to_bson_int64, 1);
703
- rb_undef_method(integer, "bson_int32?");
704
- rb_define_method(integer, "bson_int32?", rb_integer_is_bson_int32, 0);
705
- rb_bson_init_integer_bson_array_indexes();
706
- rb_undef_method(integer, "to_bson_key");
707
- rb_define_method(integer, "to_bson_key", rb_integer_to_bson_key, -1);
708
- rb_undef_method(int32_class, "from_bson_int32");
709
- rb_define_private_method(int32_class, "from_bson_int32", rb_integer_from_bson_int32, 1);
710
- rb_undef_method(int64_class, "from_bson_int64");
711
- rb_define_private_method(int64_class, "from_bson_int64", rb_integer_from_bson_int64, 1);
712
-
713
- // Float optimizations.
714
- rb_undef_method(floats, "to_bson");
715
- rb_define_method(floats, "to_bson", rb_float_to_bson, -1);
716
- rb_undef_method(float_class, "from_bson_double");
717
- rb_define_private_method(float_class, "from_bson_double", rb_float_from_bson_double, 1);
718
-
719
- // Boolean optimizations - deserialization has no benefit so we provide
720
- // no extensions there.
721
- rb_undef_method(true_class, "to_bson");
722
- rb_define_method(true_class, "to_bson", rb_true_class_to_bson, -1);
723
- rb_undef_method(false_class, "to_bson");
724
- rb_define_method(false_class, "to_bson", rb_false_class_to_bson, -1);
725
-
726
- // Optimizations around time serialization and deserialization.
727
- rb_undef_method(time, "to_bson");
728
- rb_define_method(time, "to_bson", rb_time_to_bson, -1);
729
- rb_undef_method(time_class, "from_bson");
730
- rb_define_method(time_class, "from_bson", rb_time_from_bson, 1);
731
-
732
- // String optimizations.
733
- rb_undef_method(string, "set_int32");
734
- rb_define_method(string, "set_int32", rb_string_set_int32, 2);
735
- rb_undef_method(string, "from_bson_string");
736
- rb_define_method(string, "from_bson_string", rb_bson_from_bson_string, 0);
737
- rb_undef_method(string_class, "from_bson");
738
- rb_define_method(string_class, "from_bson", rb_bson_string_from_bson, 1);
739
- rb_undef_method(string, "check_for_illegal_characters!");
740
- rb_define_private_method(string, "check_for_illegal_characters!", rb_string_check_for_illegal_characters, 0);
741
-
742
- // Redefine the next method on the object id generator.
743
- rb_undef_method(generator, "next_object_id");
744
- rb_define_method(generator, "next_object_id", rb_object_id_generator_next, -1);
710
+ return true;
745
711
  }