bson 4.2.2 → 4.12.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (169) hide show
  1. checksums.yaml +5 -5
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/README.md +25 -7
  5. data/Rakefile +16 -9
  6. data/ext/bson/{native-endian.h → bson-endian.h} +5 -99
  7. data/ext/bson/bson-native.h +125 -0
  8. data/ext/bson/bytebuf.c +133 -0
  9. data/ext/bson/endian.c +117 -0
  10. data/ext/bson/init.c +355 -0
  11. data/ext/bson/libbson-utf8.c +230 -0
  12. data/ext/bson/read.c +411 -0
  13. data/ext/bson/util.c +95 -0
  14. data/ext/bson/write.c +680 -0
  15. data/lib/bson.rb +6 -3
  16. data/lib/bson/active_support.rb +17 -0
  17. data/lib/bson/array.rb +57 -17
  18. data/lib/bson/binary.rb +185 -13
  19. data/lib/bson/boolean.rb +12 -3
  20. data/lib/bson/code.rb +16 -2
  21. data/lib/bson/code_with_scope.rb +32 -5
  22. data/lib/bson/config.rb +1 -1
  23. data/lib/bson/date.rb +12 -2
  24. data/lib/bson/date_time.rb +2 -2
  25. data/lib/bson/db_pointer.rb +110 -0
  26. data/lib/bson/decimal128.rb +17 -3
  27. data/lib/bson/decimal128/builder.rb +1 -1
  28. data/lib/bson/document.rb +152 -5
  29. data/lib/bson/environment.rb +2 -1
  30. data/lib/bson/error.rb +27 -0
  31. data/lib/bson/ext_json.rb +383 -0
  32. data/lib/bson/false_class.rb +1 -1
  33. data/lib/bson/float.rb +48 -2
  34. data/lib/bson/hash.rb +68 -17
  35. data/lib/bson/int32.rb +52 -13
  36. data/lib/bson/int64.rb +59 -15
  37. data/lib/bson/integer.rb +36 -2
  38. data/lib/bson/json.rb +1 -1
  39. data/lib/bson/max_key.rb +13 -1
  40. data/lib/bson/min_key.rb +13 -1
  41. data/lib/bson/nil_class.rb +4 -2
  42. data/lib/bson/object.rb +28 -1
  43. data/lib/bson/object_id.rb +16 -2
  44. data/lib/bson/open_struct.rb +1 -1
  45. data/lib/bson/regexp.rb +27 -4
  46. data/lib/bson/registry.rb +3 -3
  47. data/lib/bson/specialized.rb +4 -2
  48. data/lib/bson/string.rb +5 -3
  49. data/lib/bson/symbol.rb +99 -7
  50. data/lib/bson/time.rb +63 -4
  51. data/lib/bson/time_with_zone.rb +54 -0
  52. data/lib/bson/timestamp.rb +44 -6
  53. data/lib/bson/true_class.rb +1 -1
  54. data/lib/bson/undefined.rb +12 -1
  55. data/lib/bson/version.rb +2 -2
  56. data/spec/bson/array_spec.rb +18 -1
  57. data/spec/bson/binary_spec.rb +100 -3
  58. data/spec/bson/binary_uuid_spec.rb +189 -0
  59. data/spec/bson/boolean_spec.rb +1 -1
  60. data/spec/bson/byte_buffer_read_spec.rb +197 -0
  61. data/spec/bson/byte_buffer_spec.rb +121 -381
  62. data/spec/bson/byte_buffer_write_spec.rb +854 -0
  63. data/spec/bson/code_spec.rb +1 -1
  64. data/spec/bson/code_with_scope_spec.rb +1 -1
  65. data/spec/bson/date_spec.rb +1 -1
  66. data/spec/bson/date_time_spec.rb +54 -1
  67. data/spec/bson/decimal128_spec.rb +35 -35
  68. data/spec/bson/document_as_spec.rb +46 -0
  69. data/spec/bson/document_spec.rb +197 -30
  70. data/spec/bson/ext_json_parse_spec.rb +308 -0
  71. data/spec/bson/false_class_spec.rb +1 -1
  72. data/spec/bson/float_spec.rb +37 -1
  73. data/spec/bson/hash_as_spec.rb +57 -0
  74. data/spec/bson/hash_spec.rb +209 -1
  75. data/spec/bson/int32_spec.rb +180 -6
  76. data/spec/bson/int64_spec.rb +199 -6
  77. data/spec/bson/integer_spec.rb +29 -3
  78. data/spec/bson/json_spec.rb +1 -1
  79. data/spec/bson/max_key_spec.rb +1 -1
  80. data/spec/bson/min_key_spec.rb +1 -1
  81. data/spec/bson/nil_class_spec.rb +1 -1
  82. data/spec/bson/object_id_spec.rb +1 -1
  83. data/spec/bson/object_spec.rb +1 -1
  84. data/spec/bson/open_struct_spec.rb +1 -1
  85. data/spec/bson/raw_spec.rb +34 -2
  86. data/spec/bson/regexp_spec.rb +1 -1
  87. data/spec/bson/registry_spec.rb +1 -1
  88. data/spec/bson/string_spec.rb +19 -1
  89. data/spec/bson/symbol_raw_spec.rb +45 -0
  90. data/spec/bson/symbol_spec.rb +63 -3
  91. data/spec/bson/time_spec.rb +205 -2
  92. data/spec/bson/time_with_zone_spec.rb +68 -0
  93. data/spec/bson/timestamp_spec.rb +56 -1
  94. data/spec/bson/true_class_spec.rb +1 -1
  95. data/spec/bson/undefined_spec.rb +1 -1
  96. data/spec/bson_spec.rb +1 -1
  97. data/spec/{support → runners}/common_driver.rb +1 -1
  98. data/spec/runners/corpus.rb +185 -0
  99. data/spec/{support/corpus.rb → runners/corpus_legacy.rb} +41 -59
  100. data/spec/spec_helper.rb +40 -3
  101. data/spec/{bson/driver_bson_spec.rb → spec_tests/common_driver_spec.rb} +1 -0
  102. data/spec/{bson/corpus_spec.rb → spec_tests/corpus_legacy_spec.rb} +10 -7
  103. data/spec/spec_tests/corpus_spec.rb +124 -0
  104. data/spec/spec_tests/data/corpus/README.md +15 -0
  105. data/spec/spec_tests/data/corpus/array.json +49 -0
  106. data/spec/spec_tests/data/corpus/binary.json +113 -0
  107. data/spec/spec_tests/data/corpus/boolean.json +27 -0
  108. data/spec/spec_tests/data/corpus/code.json +67 -0
  109. data/spec/spec_tests/data/corpus/code_w_scope.json +78 -0
  110. data/spec/spec_tests/data/corpus/datetime.json +42 -0
  111. data/spec/spec_tests/data/corpus/dbpointer.json +56 -0
  112. data/spec/spec_tests/data/corpus/dbref.json +31 -0
  113. data/spec/spec_tests/data/corpus/decimal128-1.json +317 -0
  114. data/spec/spec_tests/data/corpus/decimal128-2.json +793 -0
  115. data/spec/spec_tests/data/corpus/decimal128-3.json +1771 -0
  116. data/spec/spec_tests/data/corpus/decimal128-4.json +117 -0
  117. data/spec/spec_tests/data/corpus/decimal128-5.json +402 -0
  118. data/spec/spec_tests/data/corpus/decimal128-6.json +119 -0
  119. data/spec/spec_tests/data/corpus/decimal128-7.json +323 -0
  120. data/spec/spec_tests/data/corpus/document.json +36 -0
  121. data/spec/spec_tests/data/corpus/double.json +87 -0
  122. data/spec/spec_tests/data/corpus/int32.json +43 -0
  123. data/spec/spec_tests/data/corpus/int64.json +43 -0
  124. data/spec/spec_tests/data/corpus/maxkey.json +12 -0
  125. data/spec/spec_tests/data/corpus/minkey.json +12 -0
  126. data/spec/spec_tests/data/corpus/multi-type-deprecated.json +15 -0
  127. data/spec/spec_tests/data/corpus/multi-type.json +11 -0
  128. data/spec/spec_tests/data/corpus/null.json +12 -0
  129. data/spec/spec_tests/data/corpus/oid.json +28 -0
  130. data/spec/spec_tests/data/corpus/regex.json +65 -0
  131. data/spec/spec_tests/data/corpus/string.json +72 -0
  132. data/spec/spec_tests/data/corpus/symbol.json +80 -0
  133. data/spec/spec_tests/data/corpus/timestamp.json +34 -0
  134. data/spec/spec_tests/data/corpus/top.json +236 -0
  135. data/spec/spec_tests/data/corpus/undefined.json +15 -0
  136. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/array.json +8 -2
  137. data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/binary.json +0 -0
  138. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/boolean.json +0 -0
  139. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code.json +1 -1
  140. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code_w_scope.json +1 -1
  141. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/document.json +1 -1
  142. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/double.json +1 -1
  143. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/datetime.json +0 -0
  144. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/dbpointer.json +0 -0
  145. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/int64.json +0 -0
  146. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/symbol.json +0 -0
  147. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/int32.json +1 -1
  148. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/maxkey.json +1 -1
  149. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/minkey.json +1 -1
  150. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/null.json +1 -1
  151. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/oid.json +0 -0
  152. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/regex.json +1 -1
  153. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/string.json +0 -0
  154. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/timestamp.json +1 -1
  155. data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/top.json +0 -0
  156. data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/undefined.json +0 -0
  157. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-1.json +0 -0
  158. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-2.json +0 -0
  159. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-3.json +0 -0
  160. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-4.json +0 -0
  161. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-5.json +0 -0
  162. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-6.json +0 -0
  163. data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-7.json +0 -0
  164. data/spec/support/shared_examples.rb +3 -5
  165. data/spec/support/spec_config.rb +16 -0
  166. data/spec/support/utils.rb +10 -0
  167. metadata +227 -124
  168. metadata.gz.sig +0 -0
  169. data/ext/bson/bson_native.c +0 -762
@@ -0,0 +1,230 @@
1
+ #include <ruby.h>
2
+ #include <ruby/encoding.h>
3
+ #include <stdbool.h>
4
+ #include <unistd.h>
5
+ #include <assert.h>
6
+ #include "bson-native.h"
7
+
8
+ /**
9
+ * Taken from libbson.
10
+ */
11
+
12
+ #define BSON_ASSERT assert
13
+ #define BSON_INLINE
14
+
15
+
16
+ /*
17
+ *--------------------------------------------------------------------------
18
+ *
19
+ * _bson_utf8_get_sequence --
20
+ *
21
+ * Determine the sequence length of the first UTF-8 character in
22
+ * @utf8. The sequence length is stored in @seq_length and the mask
23
+ * for the first character is stored in @first_mask.
24
+ *
25
+ * Returns:
26
+ * None.
27
+ *
28
+ * Side effects:
29
+ * @seq_length is set.
30
+ * @first_mask is set.
31
+ *
32
+ *--------------------------------------------------------------------------
33
+ */
34
+
35
+ static BSON_INLINE void
36
+ _bson_utf8_get_sequence (const char *utf8, /* IN */
37
+ uint8_t *seq_length, /* OUT */
38
+ uint8_t *first_mask) /* OUT */
39
+ {
40
+ unsigned char c = *(const unsigned char *) utf8;
41
+ uint8_t m;
42
+ uint8_t n;
43
+
44
+ /*
45
+ * See the following[1] for a description of what the given multi-byte
46
+ * sequences will be based on the bits set of the first byte. We also need
47
+ * to mask the first byte based on that. All subsequent bytes are masked
48
+ * against 0x3F.
49
+ *
50
+ * [1] http://www.joelonsoftware.com/articles/Unicode.html
51
+ */
52
+
53
+ if ((c & 0x80) == 0) {
54
+ n = 1;
55
+ m = 0x7F;
56
+ } else if ((c & 0xE0) == 0xC0) {
57
+ n = 2;
58
+ m = 0x1F;
59
+ } else if ((c & 0xF0) == 0xE0) {
60
+ n = 3;
61
+ m = 0x0F;
62
+ } else if ((c & 0xF8) == 0xF0) {
63
+ n = 4;
64
+ m = 0x07;
65
+ } else {
66
+ n = 0;
67
+ m = 0;
68
+ }
69
+
70
+ *seq_length = n;
71
+ *first_mask = m;
72
+ }
73
+
74
+
75
+ /*
76
+ *--------------------------------------------------------------------------
77
+ *
78
+ * bson_utf8_validate --
79
+ *
80
+ * Validates that @utf8 is a valid UTF-8 string. Note that we only
81
+ * support UTF-8 characters which have sequence length less than or equal
82
+ * to 4 bytes (RFC 3629).
83
+ *
84
+ * If @allow_null is true, then \0 is allowed within @utf8_len bytes
85
+ * of @utf8. Generally, this is bad practice since the main point of
86
+ * UTF-8 strings is that they can be used with strlen() and friends.
87
+ * However, some languages such as Python can send UTF-8 encoded
88
+ * strings with NUL's in them.
89
+ *
90
+ * Parameters:
91
+ * @utf8: A UTF-8 encoded string.
92
+ * @utf8_len: The length of @utf8 in bytes.
93
+ * @allow_null: If \0 is allowed within @utf8, exclusing trailing \0.
94
+ * @data_type: The data type being serialized.
95
+ *
96
+ * Returns:
97
+ * true if @utf8 is valid UTF-8. otherwise false.
98
+ *
99
+ * Side effects:
100
+ * None.
101
+ *
102
+ *--------------------------------------------------------------------------
103
+ */
104
+
105
+ void
106
+ rb_bson_utf8_validate (const char *utf8, /* IN */
107
+ size_t utf8_len, /* IN */
108
+ bool allow_null, /* IN */
109
+ const char *data_type) /* IN */
110
+ {
111
+ uint32_t c;
112
+ uint8_t first_mask;
113
+ uint8_t seq_length;
114
+ unsigned i;
115
+ unsigned j;
116
+ bool not_shortest_form;
117
+
118
+ BSON_ASSERT (utf8);
119
+
120
+ for (i = 0; i < utf8_len; i += seq_length) {
121
+ _bson_utf8_get_sequence (&utf8[i], &seq_length, &first_mask);
122
+
123
+ /*
124
+ * Ensure we have a valid multi-byte sequence length.
125
+ */
126
+ if (!seq_length) {
127
+ rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: bogus initial bits", data_type, utf8);
128
+ }
129
+
130
+ /*
131
+ * Ensure we have enough bytes left.
132
+ */
133
+ if ((utf8_len - i) < seq_length) {
134
+ rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: truncated multi-byte sequence", data_type, utf8);
135
+ }
136
+
137
+ /*
138
+ * Also calculate the next char as a unichar so we can
139
+ * check code ranges for non-shortest form.
140
+ */
141
+ c = utf8[i] & first_mask;
142
+
143
+ /*
144
+ * Check the high-bits for each additional sequence byte.
145
+ */
146
+ for (j = i + 1; j < (i + seq_length); j++) {
147
+ c = (c << 6) | (utf8[j] & 0x3F);
148
+ if ((utf8[j] & 0xC0) != 0x80) {
149
+ rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: bogus high bits for continuation byte", data_type, utf8);
150
+ }
151
+ }
152
+
153
+ /*
154
+ * Check for NULL bytes afterwards.
155
+ *
156
+ * Hint: if you want to optimize this function, starting here to do
157
+ * this in the same pass as the data above would probably be a good
158
+ * idea. You would add a branch into the inner loop, but save possibly
159
+ * on cache-line bouncing on larger strings. Just a thought.
160
+ */
161
+ if (!allow_null) {
162
+ for (j = 0; j < seq_length; j++) {
163
+ if (((i + j) > utf8_len) || !utf8[i + j]) {
164
+ rb_raise(rb_eArgError, "%s %s contains null bytes", data_type, utf8);
165
+ }
166
+ }
167
+ }
168
+
169
+ /*
170
+ * Code point won't fit in utf-16, not allowed.
171
+ */
172
+ if (c > 0x0010FFFF) {
173
+ rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: code point %"PRIu32" does not fit in UTF-16", data_type, utf8, c);
174
+ }
175
+
176
+ /*
177
+ * Byte is in reserved range for UTF-16 high-marks
178
+ * for surrogate pairs.
179
+ */
180
+ if ((c & 0xFFFFF800) == 0xD800) {
181
+ rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: byte is in surrogate pair reserved range", data_type, utf8);
182
+ }
183
+
184
+ /*
185
+ * Check non-shortest form unicode.
186
+ */
187
+ not_shortest_form = false;
188
+ switch (seq_length) {
189
+ case 1:
190
+ if (c <= 0x007F) {
191
+ continue;
192
+ }
193
+ not_shortest_form = true;
194
+
195
+ case 2:
196
+ if ((c >= 0x0080) && (c <= 0x07FF)) {
197
+ continue;
198
+ } else if (c == 0) {
199
+ /* Two-byte representation for NULL. */
200
+ if (!allow_null) {
201
+ rb_raise(rb_eArgError, "%s %s contains null bytes", data_type, utf8);
202
+ }
203
+ continue;
204
+ }
205
+ not_shortest_form = true;
206
+
207
+ case 3:
208
+ if (((c >= 0x0800) && (c <= 0x0FFF)) ||
209
+ ((c >= 0x1000) && (c <= 0xFFFF))) {
210
+ continue;
211
+ }
212
+ not_shortest_form = true;
213
+
214
+ case 4:
215
+ if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
216
+ ((c >= 0x40000) && (c <= 0xFFFFF)) ||
217
+ ((c >= 0x100000) && (c <= 0x10FFFF))) {
218
+ continue;
219
+ }
220
+ not_shortest_form = true;
221
+
222
+ default:
223
+ not_shortest_form = true;
224
+ }
225
+
226
+ if (not_shortest_form) {
227
+ rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: not in shortest form", data_type, utf8);
228
+ }
229
+ }
230
+ }
data/ext/bson/read.c ADDED
@@ -0,0 +1,411 @@
1
+ /*
2
+ * Copyright (C) 2009-2020 MongoDB Inc.
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #include "bson-native.h"
18
+ #include <ruby/encoding.h>
19
+
20
+ static void pvt_raise_decode_error(volatile VALUE msg);
21
+ static int32_t pvt_validate_length(byte_buffer_t *b);
22
+ static uint8_t pvt_get_type_byte(byte_buffer_t *b);
23
+ static VALUE pvt_get_int32(byte_buffer_t *b);
24
+ static VALUE pvt_get_uint32(byte_buffer_t *b);
25
+ static VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv);
26
+ static VALUE pvt_get_double(byte_buffer_t *b);
27
+ static VALUE pvt_get_string(byte_buffer_t *b, const char *data_type);
28
+ static VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv);
29
+ static VALUE pvt_get_boolean(byte_buffer_t *b);
30
+ static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv);
31
+ static void pvt_skip_cstring(byte_buffer_t *b);
32
+
33
+ void pvt_raise_decode_error(volatile VALUE msg) {
34
+ VALUE klass = pvt_const_get_3("BSON", "Error", "BSONDecodeError");
35
+ rb_exc_raise(rb_exc_new_str(klass, msg));
36
+ }
37
+
38
+ /**
39
+ * validate the buffer contains the amount of bytes the array / hash claimns
40
+ * and that it is null terminated
41
+ */
42
+ int32_t pvt_validate_length(byte_buffer_t *b)
43
+ {
44
+ int32_t length;
45
+
46
+ ENSURE_BSON_READ(b, 4);
47
+ memcpy(&length, READ_PTR(b), 4);
48
+ length = BSON_UINT32_TO_LE(length);
49
+
50
+ /* minimum valid length is 4 (byte count) + 1 (terminating byte) */
51
+ if(length >= 5){
52
+ ENSURE_BSON_READ(b, length);
53
+
54
+ /* The last byte should be a null byte: it should be at length - 1 */
55
+ if( *(READ_PTR(b) + length - 1) != 0 ){
56
+ rb_raise(rb_eRangeError, "Buffer should have contained null terminator at %zu but contained %d", b->read_position + (size_t)length, (int)*(READ_PTR(b) + length));
57
+ }
58
+ b->read_position += 4;
59
+ }
60
+ else{
61
+ rb_raise(rb_eRangeError, "Buffer contained invalid length %d at %zu", length, b->read_position);
62
+ }
63
+
64
+ return length;
65
+ }
66
+
67
+ /**
68
+ * Read a single field from a hash or array
69
+ */
70
+ VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv)
71
+ {
72
+ switch(type) {
73
+ case BSON_TYPE_INT32: return pvt_get_int32(b);
74
+ case BSON_TYPE_INT64: return pvt_get_int64(b, argc, argv);
75
+ case BSON_TYPE_DOUBLE: return pvt_get_double(b);
76
+ case BSON_TYPE_STRING: return pvt_get_string(b, "String");
77
+ case BSON_TYPE_SYMBOL: return pvt_get_symbol(b, rb_buffer, argc, argv);
78
+ case BSON_TYPE_ARRAY: return rb_bson_byte_buffer_get_array(argc, argv, rb_buffer);
79
+ case BSON_TYPE_DOCUMENT: return rb_bson_byte_buffer_get_hash(argc, argv, rb_buffer);
80
+ case BSON_TYPE_BOOLEAN: return pvt_get_boolean(b);
81
+ default:
82
+ {
83
+ VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(type));
84
+ VALUE value = rb_funcall(klass, rb_intern("from_bson"),1, rb_buffer);
85
+ RB_GC_GUARD(klass);
86
+ return value;
87
+ }
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Get a single byte from the buffer.
93
+ */
94
+ VALUE rb_bson_byte_buffer_get_byte(VALUE self)
95
+ {
96
+ byte_buffer_t *b;
97
+ VALUE byte;
98
+
99
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
100
+ ENSURE_BSON_READ(b, 1);
101
+ byte = rb_str_new(READ_PTR(b), 1);
102
+ b->read_position += 1;
103
+ return byte;
104
+ }
105
+
106
+ uint8_t pvt_get_type_byte(byte_buffer_t *b){
107
+ int8_t byte;
108
+ ENSURE_BSON_READ(b, 1);
109
+ byte = *READ_PTR(b);
110
+ b->read_position += 1;
111
+ return (uint8_t)byte;
112
+ }
113
+
114
+ /**
115
+ * Get bytes from the buffer.
116
+ */
117
+ VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
118
+ {
119
+ byte_buffer_t *b;
120
+ VALUE bytes;
121
+ const uint32_t length = FIX2LONG(i);
122
+
123
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
124
+ ENSURE_BSON_READ(b, length);
125
+ bytes = rb_str_new(READ_PTR(b), length);
126
+ b->read_position += length;
127
+ return bytes;
128
+ }
129
+
130
+ VALUE pvt_get_boolean(byte_buffer_t *b){
131
+ VALUE result;
132
+ char byte_value;
133
+ ENSURE_BSON_READ(b, 1);
134
+ byte_value = *READ_PTR(b);
135
+ switch (byte_value) {
136
+ case 1:
137
+ result = Qtrue;
138
+ break;
139
+ case 0:
140
+ result = Qfalse;
141
+ break;
142
+ default:
143
+ pvt_raise_decode_error(rb_sprintf("Invalid boolean byte value: %d", (int) byte_value));
144
+ }
145
+ b->read_position += 1;
146
+ return result;
147
+ }
148
+
149
+ /**
150
+ * Get a string from the buffer.
151
+ */
152
+ VALUE rb_bson_byte_buffer_get_string(VALUE self)
153
+ {
154
+ byte_buffer_t *b;
155
+
156
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
157
+ return pvt_get_string(b, "String");
158
+ }
159
+
160
+ VALUE pvt_get_string(byte_buffer_t *b, const char *data_type)
161
+ {
162
+ int32_t length_le;
163
+ int32_t length;
164
+ char *str_ptr;
165
+ VALUE string;
166
+ unsigned char last_byte;
167
+
168
+ ENSURE_BSON_READ(b, 4);
169
+ memcpy(&length_le, READ_PTR(b), 4);
170
+ length = BSON_UINT32_FROM_LE(length_le);
171
+ if (length < 0) {
172
+ pvt_raise_decode_error(rb_sprintf("String length is negative: %d", length));
173
+ }
174
+ if (length == 0) {
175
+ pvt_raise_decode_error(rb_str_new_cstr("String length is zero but string must be null-terminated"));
176
+ }
177
+ ENSURE_BSON_READ(b, 4 + length);
178
+ str_ptr = READ_PTR(b) + 4;
179
+ last_byte = *(READ_PTR(b) + 4 + length - 1);
180
+ if (last_byte != 0) {
181
+ pvt_raise_decode_error(rb_sprintf("Last byte of the string is not null: 0x%x", (int) last_byte));
182
+ }
183
+ rb_bson_utf8_validate(str_ptr, length - 1, true, data_type);
184
+ string = rb_enc_str_new(str_ptr, length - 1, rb_utf8_encoding());
185
+ b->read_position += 4 + length;
186
+ return string;
187
+ }
188
+
189
+ /**
190
+ * Reads a UTF-8 string out of the byte buffer. If the argc/argv arguments
191
+ * have a :mode option with the value of :bson, wraps the string in a
192
+ * BSON::Symbol::Raw. Otherwise consults the BSON registry to determine
193
+ * which class to instantiate (String in bson-ruby, overridden to Symbol by
194
+ * the Ruby driver). Returns either a BSON::Symbol::Raw, Symbol or String
195
+ * value.
196
+ */
197
+ VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv)
198
+ {
199
+ VALUE value, klass;
200
+
201
+ if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
202
+ value = pvt_get_string(b, "Symbol");
203
+ klass = pvt_const_get_3("BSON", "Symbol", "Raw");
204
+ value = rb_funcall(klass, rb_intern("new"), 1, value);
205
+ } else {
206
+ klass = rb_funcall(rb_bson_registry, rb_intern("get"), 1, INT2FIX(BSON_TYPE_SYMBOL));
207
+ value = rb_funcall(klass, rb_intern("from_bson"), 1, rb_buffer);
208
+ }
209
+
210
+ RB_GC_GUARD(klass);
211
+ return value;
212
+ }
213
+
214
+ /**
215
+ * Get a cstring from the buffer.
216
+ */
217
+ VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
218
+ {
219
+ byte_buffer_t *b;
220
+ VALUE string;
221
+ int length;
222
+
223
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
224
+ length = (int)strlen(READ_PTR(b));
225
+ ENSURE_BSON_READ(b, length);
226
+ string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
227
+ b->read_position += length + 1;
228
+ return string;
229
+ }
230
+
231
+ /**
232
+ * Reads but does not return a cstring from the buffer.
233
+ */
234
+ void pvt_skip_cstring(byte_buffer_t *b)
235
+ {
236
+ int length;
237
+ length = (int)strlen(READ_PTR(b));
238
+ ENSURE_BSON_READ(b, length);
239
+ b->read_position += length + 1;
240
+ }
241
+
242
+ /**
243
+ * Get a int32 from the buffer.
244
+ */
245
+ VALUE rb_bson_byte_buffer_get_int32(VALUE self)
246
+ {
247
+ byte_buffer_t *b;
248
+
249
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
250
+ return pvt_get_int32(b);
251
+ }
252
+
253
+ VALUE pvt_get_int32(byte_buffer_t *b)
254
+ {
255
+ int32_t i32;
256
+
257
+ ENSURE_BSON_READ(b, 4);
258
+ memcpy(&i32, READ_PTR(b), 4);
259
+ b->read_position += 4;
260
+ return INT2NUM(BSON_UINT32_FROM_LE(i32));
261
+ }
262
+
263
+ /**
264
+ * Get an unsigned int32 from the buffer.
265
+ */
266
+ VALUE rb_bson_byte_buffer_get_uint32(VALUE self)
267
+ {
268
+ byte_buffer_t *b;
269
+
270
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
271
+ return pvt_get_uint32(b);
272
+ }
273
+
274
+ VALUE pvt_get_uint32(byte_buffer_t *b)
275
+ {
276
+ uint32_t i32;
277
+
278
+ ENSURE_BSON_READ(b, 4);
279
+ memcpy(&i32, READ_PTR(b), 4);
280
+ b->read_position += 4;
281
+ return UINT2NUM(BSON_UINT32_FROM_LE(i32));
282
+ }
283
+
284
+
285
+ /**
286
+ * Get a int64 from the buffer.
287
+ */
288
+ VALUE rb_bson_byte_buffer_get_int64(VALUE self)
289
+ {
290
+ byte_buffer_t *b;
291
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
292
+ return pvt_get_int64(b, 0, NULL);
293
+ }
294
+
295
+ /**
296
+ * Reads a 64-bit integer out of the byte buffer into a Ruby Integer instance.
297
+ * If the argc/argv arguments have a :mode option with the value of :bson,
298
+ * wraps the integer in a BSON::Int64. Returns either the Integer or the
299
+ * BSON::Int64 instance.
300
+ */
301
+ VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv)
302
+ {
303
+ int64_t i64;
304
+ VALUE num;
305
+
306
+ ENSURE_BSON_READ(b, 8);
307
+ memcpy(&i64, READ_PTR(b), 8);
308
+ b->read_position += 8;
309
+ num = LL2NUM(BSON_UINT64_FROM_LE(i64));
310
+
311
+ if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
312
+ VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(BSON_TYPE_INT64));
313
+ VALUE value = rb_funcall(klass, rb_intern("new"), 1, num);
314
+ RB_GC_GUARD(klass);
315
+ return value;
316
+ } else {
317
+ return num;
318
+ }
319
+
320
+ RB_GC_GUARD(num);
321
+ }
322
+
323
+ /**
324
+ * Get a double from the buffer.
325
+ */
326
+ VALUE rb_bson_byte_buffer_get_double(VALUE self)
327
+ {
328
+ byte_buffer_t *b;
329
+
330
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
331
+ return pvt_get_double(b);
332
+ }
333
+
334
+ VALUE pvt_get_double(byte_buffer_t *b)
335
+ {
336
+ double d;
337
+
338
+ ENSURE_BSON_READ(b, 8);
339
+ memcpy(&d, READ_PTR(b), 8);
340
+ b->read_position += 8;
341
+ return DBL2NUM(BSON_DOUBLE_FROM_LE(d));
342
+ }
343
+
344
+ /**
345
+ * Get the 16 bytes representing the decimal128 from the buffer.
346
+ */
347
+ VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self)
348
+ {
349
+ byte_buffer_t *b;
350
+ VALUE bytes;
351
+
352
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
353
+ ENSURE_BSON_READ(b, 16);
354
+ bytes = rb_str_new(READ_PTR(b), 16);
355
+ b->read_position += 16;
356
+ return bytes;
357
+ }
358
+
359
+ VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){
360
+ VALUE doc = Qnil;
361
+ byte_buffer_t *b = NULL;
362
+ uint8_t type;
363
+ VALUE cDocument = pvt_const_get_2("BSON", "Document");
364
+ int32_t length;
365
+ char *start_ptr;
366
+
367
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
368
+
369
+ start_ptr = READ_PTR(b);
370
+ length = pvt_validate_length(b);
371
+
372
+ doc = rb_funcall(cDocument, rb_intern("allocate"), 0);
373
+
374
+ while((type = pvt_get_type_byte(b)) != 0){
375
+ VALUE field = rb_bson_byte_buffer_get_cstring(self);
376
+ rb_hash_aset(doc, field, pvt_read_field(b, self, type, argc, argv));
377
+ RB_GC_GUARD(field);
378
+ }
379
+
380
+ if (READ_PTR(b) - start_ptr != length) {
381
+ pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
382
+ }
383
+
384
+ return doc;
385
+ }
386
+
387
+ VALUE rb_bson_byte_buffer_get_array(int argc, VALUE *argv, VALUE self){
388
+ byte_buffer_t *b;
389
+ VALUE array = Qnil;
390
+ uint8_t type;
391
+ int32_t length;
392
+ char *start_ptr;
393
+
394
+ TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
395
+
396
+ start_ptr = READ_PTR(b);
397
+ length = pvt_validate_length(b);
398
+
399
+ array = rb_ary_new();
400
+ while((type = pvt_get_type_byte(b)) != 0){
401
+ pvt_skip_cstring(b);
402
+ rb_ary_push(array, pvt_read_field(b, self, type, argc, argv));
403
+ }
404
+ RB_GC_GUARD(array);
405
+
406
+ if (READ_PTR(b) - start_ptr != length) {
407
+ pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
408
+ }
409
+
410
+ return array;
411
+ }