bson 4.2.2 → 4.12.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/README.md +25 -7
- data/Rakefile +16 -9
- data/ext/bson/{native-endian.h → bson-endian.h} +5 -99
- data/ext/bson/bson-native.h +125 -0
- data/ext/bson/bytebuf.c +133 -0
- data/ext/bson/endian.c +117 -0
- data/ext/bson/init.c +355 -0
- data/ext/bson/libbson-utf8.c +230 -0
- data/ext/bson/read.c +411 -0
- data/ext/bson/util.c +95 -0
- data/ext/bson/write.c +680 -0
- data/lib/bson.rb +6 -3
- data/lib/bson/active_support.rb +17 -0
- data/lib/bson/array.rb +57 -17
- data/lib/bson/binary.rb +185 -13
- data/lib/bson/boolean.rb +12 -3
- data/lib/bson/code.rb +16 -2
- data/lib/bson/code_with_scope.rb +32 -5
- data/lib/bson/config.rb +1 -1
- data/lib/bson/date.rb +12 -2
- data/lib/bson/date_time.rb +2 -2
- data/lib/bson/db_pointer.rb +110 -0
- data/lib/bson/decimal128.rb +17 -3
- data/lib/bson/decimal128/builder.rb +1 -1
- data/lib/bson/document.rb +152 -5
- data/lib/bson/environment.rb +2 -1
- data/lib/bson/error.rb +27 -0
- data/lib/bson/ext_json.rb +383 -0
- data/lib/bson/false_class.rb +1 -1
- data/lib/bson/float.rb +48 -2
- data/lib/bson/hash.rb +68 -17
- data/lib/bson/int32.rb +52 -13
- data/lib/bson/int64.rb +59 -15
- data/lib/bson/integer.rb +36 -2
- data/lib/bson/json.rb +1 -1
- data/lib/bson/max_key.rb +13 -1
- data/lib/bson/min_key.rb +13 -1
- data/lib/bson/nil_class.rb +4 -2
- data/lib/bson/object.rb +28 -1
- data/lib/bson/object_id.rb +16 -2
- data/lib/bson/open_struct.rb +1 -1
- data/lib/bson/regexp.rb +27 -4
- data/lib/bson/registry.rb +3 -3
- data/lib/bson/specialized.rb +4 -2
- data/lib/bson/string.rb +5 -3
- data/lib/bson/symbol.rb +99 -7
- data/lib/bson/time.rb +63 -4
- data/lib/bson/time_with_zone.rb +54 -0
- data/lib/bson/timestamp.rb +44 -6
- data/lib/bson/true_class.rb +1 -1
- data/lib/bson/undefined.rb +12 -1
- data/lib/bson/version.rb +2 -2
- data/spec/bson/array_spec.rb +18 -1
- data/spec/bson/binary_spec.rb +100 -3
- data/spec/bson/binary_uuid_spec.rb +189 -0
- data/spec/bson/boolean_spec.rb +1 -1
- data/spec/bson/byte_buffer_read_spec.rb +197 -0
- data/spec/bson/byte_buffer_spec.rb +121 -381
- data/spec/bson/byte_buffer_write_spec.rb +854 -0
- data/spec/bson/code_spec.rb +1 -1
- data/spec/bson/code_with_scope_spec.rb +1 -1
- data/spec/bson/date_spec.rb +1 -1
- data/spec/bson/date_time_spec.rb +54 -1
- data/spec/bson/decimal128_spec.rb +35 -35
- data/spec/bson/document_as_spec.rb +46 -0
- data/spec/bson/document_spec.rb +197 -30
- data/spec/bson/ext_json_parse_spec.rb +308 -0
- data/spec/bson/false_class_spec.rb +1 -1
- data/spec/bson/float_spec.rb +37 -1
- data/spec/bson/hash_as_spec.rb +57 -0
- data/spec/bson/hash_spec.rb +209 -1
- data/spec/bson/int32_spec.rb +180 -6
- data/spec/bson/int64_spec.rb +199 -6
- data/spec/bson/integer_spec.rb +29 -3
- data/spec/bson/json_spec.rb +1 -1
- data/spec/bson/max_key_spec.rb +1 -1
- data/spec/bson/min_key_spec.rb +1 -1
- data/spec/bson/nil_class_spec.rb +1 -1
- data/spec/bson/object_id_spec.rb +1 -1
- data/spec/bson/object_spec.rb +1 -1
- data/spec/bson/open_struct_spec.rb +1 -1
- data/spec/bson/raw_spec.rb +34 -2
- data/spec/bson/regexp_spec.rb +1 -1
- data/spec/bson/registry_spec.rb +1 -1
- data/spec/bson/string_spec.rb +19 -1
- data/spec/bson/symbol_raw_spec.rb +45 -0
- data/spec/bson/symbol_spec.rb +63 -3
- data/spec/bson/time_spec.rb +205 -2
- data/spec/bson/time_with_zone_spec.rb +68 -0
- data/spec/bson/timestamp_spec.rb +56 -1
- data/spec/bson/true_class_spec.rb +1 -1
- data/spec/bson/undefined_spec.rb +1 -1
- data/spec/bson_spec.rb +1 -1
- data/spec/{support → runners}/common_driver.rb +1 -1
- data/spec/runners/corpus.rb +185 -0
- data/spec/{support/corpus.rb → runners/corpus_legacy.rb} +41 -59
- data/spec/spec_helper.rb +40 -3
- data/spec/{bson/driver_bson_spec.rb → spec_tests/common_driver_spec.rb} +1 -0
- data/spec/{bson/corpus_spec.rb → spec_tests/corpus_legacy_spec.rb} +10 -7
- data/spec/spec_tests/corpus_spec.rb +124 -0
- data/spec/spec_tests/data/corpus/README.md +15 -0
- data/spec/spec_tests/data/corpus/array.json +49 -0
- data/spec/spec_tests/data/corpus/binary.json +113 -0
- data/spec/spec_tests/data/corpus/boolean.json +27 -0
- data/spec/spec_tests/data/corpus/code.json +67 -0
- data/spec/spec_tests/data/corpus/code_w_scope.json +78 -0
- data/spec/spec_tests/data/corpus/datetime.json +42 -0
- data/spec/spec_tests/data/corpus/dbpointer.json +56 -0
- data/spec/spec_tests/data/corpus/dbref.json +31 -0
- data/spec/spec_tests/data/corpus/decimal128-1.json +317 -0
- data/spec/spec_tests/data/corpus/decimal128-2.json +793 -0
- data/spec/spec_tests/data/corpus/decimal128-3.json +1771 -0
- data/spec/spec_tests/data/corpus/decimal128-4.json +117 -0
- data/spec/spec_tests/data/corpus/decimal128-5.json +402 -0
- data/spec/spec_tests/data/corpus/decimal128-6.json +119 -0
- data/spec/spec_tests/data/corpus/decimal128-7.json +323 -0
- data/spec/spec_tests/data/corpus/document.json +36 -0
- data/spec/spec_tests/data/corpus/double.json +87 -0
- data/spec/spec_tests/data/corpus/int32.json +43 -0
- data/spec/spec_tests/data/corpus/int64.json +43 -0
- data/spec/spec_tests/data/corpus/maxkey.json +12 -0
- data/spec/spec_tests/data/corpus/minkey.json +12 -0
- data/spec/spec_tests/data/corpus/multi-type-deprecated.json +15 -0
- data/spec/spec_tests/data/corpus/multi-type.json +11 -0
- data/spec/spec_tests/data/corpus/null.json +12 -0
- data/spec/spec_tests/data/corpus/oid.json +28 -0
- data/spec/spec_tests/data/corpus/regex.json +65 -0
- data/spec/spec_tests/data/corpus/string.json +72 -0
- data/spec/spec_tests/data/corpus/symbol.json +80 -0
- data/spec/spec_tests/data/corpus/timestamp.json +34 -0
- data/spec/spec_tests/data/corpus/top.json +236 -0
- data/spec/spec_tests/data/corpus/undefined.json +15 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/array.json +8 -2
- data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/binary.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/boolean.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/code_w_scope.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/document.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/double.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/datetime.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/dbpointer.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/int64.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/failures/symbol.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/int32.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/maxkey.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/minkey.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/null.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/oid.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/regex.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/string.json +0 -0
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/timestamp.json +1 -1
- data/spec/{support/corpus-tests → spec_tests/data/corpus_legacy}/top.json +0 -0
- data/spec/{support/corpus-tests/failures → spec_tests/data/corpus_legacy}/undefined.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-1.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-2.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-3.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-4.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-5.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-6.json +0 -0
- data/spec/{support/driver-spec-tests → spec_tests/data}/decimal128/decimal128-7.json +0 -0
- data/spec/support/shared_examples.rb +3 -5
- data/spec/support/spec_config.rb +16 -0
- data/spec/support/utils.rb +10 -0
- metadata +227 -124
- metadata.gz.sig +0 -0
- data/ext/bson/bson_native.c +0 -762
@@ -0,0 +1,230 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <ruby/encoding.h>
|
3
|
+
#include <stdbool.h>
|
4
|
+
#include <unistd.h>
|
5
|
+
#include <assert.h>
|
6
|
+
#include "bson-native.h"
|
7
|
+
|
8
|
+
/**
|
9
|
+
* Taken from libbson.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#define BSON_ASSERT assert
|
13
|
+
#define BSON_INLINE
|
14
|
+
|
15
|
+
|
16
|
+
/*
|
17
|
+
*--------------------------------------------------------------------------
|
18
|
+
*
|
19
|
+
* _bson_utf8_get_sequence --
|
20
|
+
*
|
21
|
+
* Determine the sequence length of the first UTF-8 character in
|
22
|
+
* @utf8. The sequence length is stored in @seq_length and the mask
|
23
|
+
* for the first character is stored in @first_mask.
|
24
|
+
*
|
25
|
+
* Returns:
|
26
|
+
* None.
|
27
|
+
*
|
28
|
+
* Side effects:
|
29
|
+
* @seq_length is set.
|
30
|
+
* @first_mask is set.
|
31
|
+
*
|
32
|
+
*--------------------------------------------------------------------------
|
33
|
+
*/
|
34
|
+
|
35
|
+
static BSON_INLINE void
|
36
|
+
_bson_utf8_get_sequence (const char *utf8, /* IN */
|
37
|
+
uint8_t *seq_length, /* OUT */
|
38
|
+
uint8_t *first_mask) /* OUT */
|
39
|
+
{
|
40
|
+
unsigned char c = *(const unsigned char *) utf8;
|
41
|
+
uint8_t m;
|
42
|
+
uint8_t n;
|
43
|
+
|
44
|
+
/*
|
45
|
+
* See the following[1] for a description of what the given multi-byte
|
46
|
+
* sequences will be based on the bits set of the first byte. We also need
|
47
|
+
* to mask the first byte based on that. All subsequent bytes are masked
|
48
|
+
* against 0x3F.
|
49
|
+
*
|
50
|
+
* [1] http://www.joelonsoftware.com/articles/Unicode.html
|
51
|
+
*/
|
52
|
+
|
53
|
+
if ((c & 0x80) == 0) {
|
54
|
+
n = 1;
|
55
|
+
m = 0x7F;
|
56
|
+
} else if ((c & 0xE0) == 0xC0) {
|
57
|
+
n = 2;
|
58
|
+
m = 0x1F;
|
59
|
+
} else if ((c & 0xF0) == 0xE0) {
|
60
|
+
n = 3;
|
61
|
+
m = 0x0F;
|
62
|
+
} else if ((c & 0xF8) == 0xF0) {
|
63
|
+
n = 4;
|
64
|
+
m = 0x07;
|
65
|
+
} else {
|
66
|
+
n = 0;
|
67
|
+
m = 0;
|
68
|
+
}
|
69
|
+
|
70
|
+
*seq_length = n;
|
71
|
+
*first_mask = m;
|
72
|
+
}
|
73
|
+
|
74
|
+
|
75
|
+
/*
|
76
|
+
*--------------------------------------------------------------------------
|
77
|
+
*
|
78
|
+
* bson_utf8_validate --
|
79
|
+
*
|
80
|
+
* Validates that @utf8 is a valid UTF-8 string. Note that we only
|
81
|
+
* support UTF-8 characters which have sequence length less than or equal
|
82
|
+
* to 4 bytes (RFC 3629).
|
83
|
+
*
|
84
|
+
* If @allow_null is true, then \0 is allowed within @utf8_len bytes
|
85
|
+
* of @utf8. Generally, this is bad practice since the main point of
|
86
|
+
* UTF-8 strings is that they can be used with strlen() and friends.
|
87
|
+
* However, some languages such as Python can send UTF-8 encoded
|
88
|
+
* strings with NUL's in them.
|
89
|
+
*
|
90
|
+
* Parameters:
|
91
|
+
* @utf8: A UTF-8 encoded string.
|
92
|
+
* @utf8_len: The length of @utf8 in bytes.
|
93
|
+
* @allow_null: If \0 is allowed within @utf8, exclusing trailing \0.
|
94
|
+
* @data_type: The data type being serialized.
|
95
|
+
*
|
96
|
+
* Returns:
|
97
|
+
* true if @utf8 is valid UTF-8. otherwise false.
|
98
|
+
*
|
99
|
+
* Side effects:
|
100
|
+
* None.
|
101
|
+
*
|
102
|
+
*--------------------------------------------------------------------------
|
103
|
+
*/
|
104
|
+
|
105
|
+
void
|
106
|
+
rb_bson_utf8_validate (const char *utf8, /* IN */
|
107
|
+
size_t utf8_len, /* IN */
|
108
|
+
bool allow_null, /* IN */
|
109
|
+
const char *data_type) /* IN */
|
110
|
+
{
|
111
|
+
uint32_t c;
|
112
|
+
uint8_t first_mask;
|
113
|
+
uint8_t seq_length;
|
114
|
+
unsigned i;
|
115
|
+
unsigned j;
|
116
|
+
bool not_shortest_form;
|
117
|
+
|
118
|
+
BSON_ASSERT (utf8);
|
119
|
+
|
120
|
+
for (i = 0; i < utf8_len; i += seq_length) {
|
121
|
+
_bson_utf8_get_sequence (&utf8[i], &seq_length, &first_mask);
|
122
|
+
|
123
|
+
/*
|
124
|
+
* Ensure we have a valid multi-byte sequence length.
|
125
|
+
*/
|
126
|
+
if (!seq_length) {
|
127
|
+
rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: bogus initial bits", data_type, utf8);
|
128
|
+
}
|
129
|
+
|
130
|
+
/*
|
131
|
+
* Ensure we have enough bytes left.
|
132
|
+
*/
|
133
|
+
if ((utf8_len - i) < seq_length) {
|
134
|
+
rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: truncated multi-byte sequence", data_type, utf8);
|
135
|
+
}
|
136
|
+
|
137
|
+
/*
|
138
|
+
* Also calculate the next char as a unichar so we can
|
139
|
+
* check code ranges for non-shortest form.
|
140
|
+
*/
|
141
|
+
c = utf8[i] & first_mask;
|
142
|
+
|
143
|
+
/*
|
144
|
+
* Check the high-bits for each additional sequence byte.
|
145
|
+
*/
|
146
|
+
for (j = i + 1; j < (i + seq_length); j++) {
|
147
|
+
c = (c << 6) | (utf8[j] & 0x3F);
|
148
|
+
if ((utf8[j] & 0xC0) != 0x80) {
|
149
|
+
rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: bogus high bits for continuation byte", data_type, utf8);
|
150
|
+
}
|
151
|
+
}
|
152
|
+
|
153
|
+
/*
|
154
|
+
* Check for NULL bytes afterwards.
|
155
|
+
*
|
156
|
+
* Hint: if you want to optimize this function, starting here to do
|
157
|
+
* this in the same pass as the data above would probably be a good
|
158
|
+
* idea. You would add a branch into the inner loop, but save possibly
|
159
|
+
* on cache-line bouncing on larger strings. Just a thought.
|
160
|
+
*/
|
161
|
+
if (!allow_null) {
|
162
|
+
for (j = 0; j < seq_length; j++) {
|
163
|
+
if (((i + j) > utf8_len) || !utf8[i + j]) {
|
164
|
+
rb_raise(rb_eArgError, "%s %s contains null bytes", data_type, utf8);
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
/*
|
170
|
+
* Code point won't fit in utf-16, not allowed.
|
171
|
+
*/
|
172
|
+
if (c > 0x0010FFFF) {
|
173
|
+
rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: code point %"PRIu32" does not fit in UTF-16", data_type, utf8, c);
|
174
|
+
}
|
175
|
+
|
176
|
+
/*
|
177
|
+
* Byte is in reserved range for UTF-16 high-marks
|
178
|
+
* for surrogate pairs.
|
179
|
+
*/
|
180
|
+
if ((c & 0xFFFFF800) == 0xD800) {
|
181
|
+
rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: byte is in surrogate pair reserved range", data_type, utf8);
|
182
|
+
}
|
183
|
+
|
184
|
+
/*
|
185
|
+
* Check non-shortest form unicode.
|
186
|
+
*/
|
187
|
+
not_shortest_form = false;
|
188
|
+
switch (seq_length) {
|
189
|
+
case 1:
|
190
|
+
if (c <= 0x007F) {
|
191
|
+
continue;
|
192
|
+
}
|
193
|
+
not_shortest_form = true;
|
194
|
+
|
195
|
+
case 2:
|
196
|
+
if ((c >= 0x0080) && (c <= 0x07FF)) {
|
197
|
+
continue;
|
198
|
+
} else if (c == 0) {
|
199
|
+
/* Two-byte representation for NULL. */
|
200
|
+
if (!allow_null) {
|
201
|
+
rb_raise(rb_eArgError, "%s %s contains null bytes", data_type, utf8);
|
202
|
+
}
|
203
|
+
continue;
|
204
|
+
}
|
205
|
+
not_shortest_form = true;
|
206
|
+
|
207
|
+
case 3:
|
208
|
+
if (((c >= 0x0800) && (c <= 0x0FFF)) ||
|
209
|
+
((c >= 0x1000) && (c <= 0xFFFF))) {
|
210
|
+
continue;
|
211
|
+
}
|
212
|
+
not_shortest_form = true;
|
213
|
+
|
214
|
+
case 4:
|
215
|
+
if (((c >= 0x10000) && (c <= 0x3FFFF)) ||
|
216
|
+
((c >= 0x40000) && (c <= 0xFFFFF)) ||
|
217
|
+
((c >= 0x100000) && (c <= 0x10FFFF))) {
|
218
|
+
continue;
|
219
|
+
}
|
220
|
+
not_shortest_form = true;
|
221
|
+
|
222
|
+
default:
|
223
|
+
not_shortest_form = true;
|
224
|
+
}
|
225
|
+
|
226
|
+
if (not_shortest_form) {
|
227
|
+
rb_raise(rb_eEncodingError, "%s %s is not valid UTF-8: not in shortest form", data_type, utf8);
|
228
|
+
}
|
229
|
+
}
|
230
|
+
}
|
data/ext/bson/read.c
ADDED
@@ -0,0 +1,411 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (C) 2009-2020 MongoDB Inc.
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include "bson-native.h"
|
18
|
+
#include <ruby/encoding.h>
|
19
|
+
|
20
|
+
static void pvt_raise_decode_error(volatile VALUE msg);
|
21
|
+
static int32_t pvt_validate_length(byte_buffer_t *b);
|
22
|
+
static uint8_t pvt_get_type_byte(byte_buffer_t *b);
|
23
|
+
static VALUE pvt_get_int32(byte_buffer_t *b);
|
24
|
+
static VALUE pvt_get_uint32(byte_buffer_t *b);
|
25
|
+
static VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv);
|
26
|
+
static VALUE pvt_get_double(byte_buffer_t *b);
|
27
|
+
static VALUE pvt_get_string(byte_buffer_t *b, const char *data_type);
|
28
|
+
static VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv);
|
29
|
+
static VALUE pvt_get_boolean(byte_buffer_t *b);
|
30
|
+
static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv);
|
31
|
+
static void pvt_skip_cstring(byte_buffer_t *b);
|
32
|
+
|
33
|
+
void pvt_raise_decode_error(volatile VALUE msg) {
|
34
|
+
VALUE klass = pvt_const_get_3("BSON", "Error", "BSONDecodeError");
|
35
|
+
rb_exc_raise(rb_exc_new_str(klass, msg));
|
36
|
+
}
|
37
|
+
|
38
|
+
/**
|
39
|
+
* validate the buffer contains the amount of bytes the array / hash claimns
|
40
|
+
* and that it is null terminated
|
41
|
+
*/
|
42
|
+
int32_t pvt_validate_length(byte_buffer_t *b)
|
43
|
+
{
|
44
|
+
int32_t length;
|
45
|
+
|
46
|
+
ENSURE_BSON_READ(b, 4);
|
47
|
+
memcpy(&length, READ_PTR(b), 4);
|
48
|
+
length = BSON_UINT32_TO_LE(length);
|
49
|
+
|
50
|
+
/* minimum valid length is 4 (byte count) + 1 (terminating byte) */
|
51
|
+
if(length >= 5){
|
52
|
+
ENSURE_BSON_READ(b, length);
|
53
|
+
|
54
|
+
/* The last byte should be a null byte: it should be at length - 1 */
|
55
|
+
if( *(READ_PTR(b) + length - 1) != 0 ){
|
56
|
+
rb_raise(rb_eRangeError, "Buffer should have contained null terminator at %zu but contained %d", b->read_position + (size_t)length, (int)*(READ_PTR(b) + length));
|
57
|
+
}
|
58
|
+
b->read_position += 4;
|
59
|
+
}
|
60
|
+
else{
|
61
|
+
rb_raise(rb_eRangeError, "Buffer contained invalid length %d at %zu", length, b->read_position);
|
62
|
+
}
|
63
|
+
|
64
|
+
return length;
|
65
|
+
}
|
66
|
+
|
67
|
+
/**
|
68
|
+
* Read a single field from a hash or array
|
69
|
+
*/
|
70
|
+
VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv)
|
71
|
+
{
|
72
|
+
switch(type) {
|
73
|
+
case BSON_TYPE_INT32: return pvt_get_int32(b);
|
74
|
+
case BSON_TYPE_INT64: return pvt_get_int64(b, argc, argv);
|
75
|
+
case BSON_TYPE_DOUBLE: return pvt_get_double(b);
|
76
|
+
case BSON_TYPE_STRING: return pvt_get_string(b, "String");
|
77
|
+
case BSON_TYPE_SYMBOL: return pvt_get_symbol(b, rb_buffer, argc, argv);
|
78
|
+
case BSON_TYPE_ARRAY: return rb_bson_byte_buffer_get_array(argc, argv, rb_buffer);
|
79
|
+
case BSON_TYPE_DOCUMENT: return rb_bson_byte_buffer_get_hash(argc, argv, rb_buffer);
|
80
|
+
case BSON_TYPE_BOOLEAN: return pvt_get_boolean(b);
|
81
|
+
default:
|
82
|
+
{
|
83
|
+
VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(type));
|
84
|
+
VALUE value = rb_funcall(klass, rb_intern("from_bson"),1, rb_buffer);
|
85
|
+
RB_GC_GUARD(klass);
|
86
|
+
return value;
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Get a single byte from the buffer.
|
93
|
+
*/
|
94
|
+
VALUE rb_bson_byte_buffer_get_byte(VALUE self)
|
95
|
+
{
|
96
|
+
byte_buffer_t *b;
|
97
|
+
VALUE byte;
|
98
|
+
|
99
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
100
|
+
ENSURE_BSON_READ(b, 1);
|
101
|
+
byte = rb_str_new(READ_PTR(b), 1);
|
102
|
+
b->read_position += 1;
|
103
|
+
return byte;
|
104
|
+
}
|
105
|
+
|
106
|
+
uint8_t pvt_get_type_byte(byte_buffer_t *b){
|
107
|
+
int8_t byte;
|
108
|
+
ENSURE_BSON_READ(b, 1);
|
109
|
+
byte = *READ_PTR(b);
|
110
|
+
b->read_position += 1;
|
111
|
+
return (uint8_t)byte;
|
112
|
+
}
|
113
|
+
|
114
|
+
/**
|
115
|
+
* Get bytes from the buffer.
|
116
|
+
*/
|
117
|
+
VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
|
118
|
+
{
|
119
|
+
byte_buffer_t *b;
|
120
|
+
VALUE bytes;
|
121
|
+
const uint32_t length = FIX2LONG(i);
|
122
|
+
|
123
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
124
|
+
ENSURE_BSON_READ(b, length);
|
125
|
+
bytes = rb_str_new(READ_PTR(b), length);
|
126
|
+
b->read_position += length;
|
127
|
+
return bytes;
|
128
|
+
}
|
129
|
+
|
130
|
+
VALUE pvt_get_boolean(byte_buffer_t *b){
|
131
|
+
VALUE result;
|
132
|
+
char byte_value;
|
133
|
+
ENSURE_BSON_READ(b, 1);
|
134
|
+
byte_value = *READ_PTR(b);
|
135
|
+
switch (byte_value) {
|
136
|
+
case 1:
|
137
|
+
result = Qtrue;
|
138
|
+
break;
|
139
|
+
case 0:
|
140
|
+
result = Qfalse;
|
141
|
+
break;
|
142
|
+
default:
|
143
|
+
pvt_raise_decode_error(rb_sprintf("Invalid boolean byte value: %d", (int) byte_value));
|
144
|
+
}
|
145
|
+
b->read_position += 1;
|
146
|
+
return result;
|
147
|
+
}
|
148
|
+
|
149
|
+
/**
|
150
|
+
* Get a string from the buffer.
|
151
|
+
*/
|
152
|
+
VALUE rb_bson_byte_buffer_get_string(VALUE self)
|
153
|
+
{
|
154
|
+
byte_buffer_t *b;
|
155
|
+
|
156
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
157
|
+
return pvt_get_string(b, "String");
|
158
|
+
}
|
159
|
+
|
160
|
+
VALUE pvt_get_string(byte_buffer_t *b, const char *data_type)
|
161
|
+
{
|
162
|
+
int32_t length_le;
|
163
|
+
int32_t length;
|
164
|
+
char *str_ptr;
|
165
|
+
VALUE string;
|
166
|
+
unsigned char last_byte;
|
167
|
+
|
168
|
+
ENSURE_BSON_READ(b, 4);
|
169
|
+
memcpy(&length_le, READ_PTR(b), 4);
|
170
|
+
length = BSON_UINT32_FROM_LE(length_le);
|
171
|
+
if (length < 0) {
|
172
|
+
pvt_raise_decode_error(rb_sprintf("String length is negative: %d", length));
|
173
|
+
}
|
174
|
+
if (length == 0) {
|
175
|
+
pvt_raise_decode_error(rb_str_new_cstr("String length is zero but string must be null-terminated"));
|
176
|
+
}
|
177
|
+
ENSURE_BSON_READ(b, 4 + length);
|
178
|
+
str_ptr = READ_PTR(b) + 4;
|
179
|
+
last_byte = *(READ_PTR(b) + 4 + length - 1);
|
180
|
+
if (last_byte != 0) {
|
181
|
+
pvt_raise_decode_error(rb_sprintf("Last byte of the string is not null: 0x%x", (int) last_byte));
|
182
|
+
}
|
183
|
+
rb_bson_utf8_validate(str_ptr, length - 1, true, data_type);
|
184
|
+
string = rb_enc_str_new(str_ptr, length - 1, rb_utf8_encoding());
|
185
|
+
b->read_position += 4 + length;
|
186
|
+
return string;
|
187
|
+
}
|
188
|
+
|
189
|
+
/**
|
190
|
+
* Reads a UTF-8 string out of the byte buffer. If the argc/argv arguments
|
191
|
+
* have a :mode option with the value of :bson, wraps the string in a
|
192
|
+
* BSON::Symbol::Raw. Otherwise consults the BSON registry to determine
|
193
|
+
* which class to instantiate (String in bson-ruby, overridden to Symbol by
|
194
|
+
* the Ruby driver). Returns either a BSON::Symbol::Raw, Symbol or String
|
195
|
+
* value.
|
196
|
+
*/
|
197
|
+
VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv)
|
198
|
+
{
|
199
|
+
VALUE value, klass;
|
200
|
+
|
201
|
+
if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
|
202
|
+
value = pvt_get_string(b, "Symbol");
|
203
|
+
klass = pvt_const_get_3("BSON", "Symbol", "Raw");
|
204
|
+
value = rb_funcall(klass, rb_intern("new"), 1, value);
|
205
|
+
} else {
|
206
|
+
klass = rb_funcall(rb_bson_registry, rb_intern("get"), 1, INT2FIX(BSON_TYPE_SYMBOL));
|
207
|
+
value = rb_funcall(klass, rb_intern("from_bson"), 1, rb_buffer);
|
208
|
+
}
|
209
|
+
|
210
|
+
RB_GC_GUARD(klass);
|
211
|
+
return value;
|
212
|
+
}
|
213
|
+
|
214
|
+
/**
|
215
|
+
* Get a cstring from the buffer.
|
216
|
+
*/
|
217
|
+
VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
|
218
|
+
{
|
219
|
+
byte_buffer_t *b;
|
220
|
+
VALUE string;
|
221
|
+
int length;
|
222
|
+
|
223
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
224
|
+
length = (int)strlen(READ_PTR(b));
|
225
|
+
ENSURE_BSON_READ(b, length);
|
226
|
+
string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
|
227
|
+
b->read_position += length + 1;
|
228
|
+
return string;
|
229
|
+
}
|
230
|
+
|
231
|
+
/**
|
232
|
+
* Reads but does not return a cstring from the buffer.
|
233
|
+
*/
|
234
|
+
void pvt_skip_cstring(byte_buffer_t *b)
|
235
|
+
{
|
236
|
+
int length;
|
237
|
+
length = (int)strlen(READ_PTR(b));
|
238
|
+
ENSURE_BSON_READ(b, length);
|
239
|
+
b->read_position += length + 1;
|
240
|
+
}
|
241
|
+
|
242
|
+
/**
|
243
|
+
* Get a int32 from the buffer.
|
244
|
+
*/
|
245
|
+
VALUE rb_bson_byte_buffer_get_int32(VALUE self)
|
246
|
+
{
|
247
|
+
byte_buffer_t *b;
|
248
|
+
|
249
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
250
|
+
return pvt_get_int32(b);
|
251
|
+
}
|
252
|
+
|
253
|
+
VALUE pvt_get_int32(byte_buffer_t *b)
|
254
|
+
{
|
255
|
+
int32_t i32;
|
256
|
+
|
257
|
+
ENSURE_BSON_READ(b, 4);
|
258
|
+
memcpy(&i32, READ_PTR(b), 4);
|
259
|
+
b->read_position += 4;
|
260
|
+
return INT2NUM(BSON_UINT32_FROM_LE(i32));
|
261
|
+
}
|
262
|
+
|
263
|
+
/**
|
264
|
+
* Get an unsigned int32 from the buffer.
|
265
|
+
*/
|
266
|
+
VALUE rb_bson_byte_buffer_get_uint32(VALUE self)
|
267
|
+
{
|
268
|
+
byte_buffer_t *b;
|
269
|
+
|
270
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
271
|
+
return pvt_get_uint32(b);
|
272
|
+
}
|
273
|
+
|
274
|
+
VALUE pvt_get_uint32(byte_buffer_t *b)
|
275
|
+
{
|
276
|
+
uint32_t i32;
|
277
|
+
|
278
|
+
ENSURE_BSON_READ(b, 4);
|
279
|
+
memcpy(&i32, READ_PTR(b), 4);
|
280
|
+
b->read_position += 4;
|
281
|
+
return UINT2NUM(BSON_UINT32_FROM_LE(i32));
|
282
|
+
}
|
283
|
+
|
284
|
+
|
285
|
+
/**
|
286
|
+
* Get a int64 from the buffer.
|
287
|
+
*/
|
288
|
+
VALUE rb_bson_byte_buffer_get_int64(VALUE self)
|
289
|
+
{
|
290
|
+
byte_buffer_t *b;
|
291
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
292
|
+
return pvt_get_int64(b, 0, NULL);
|
293
|
+
}
|
294
|
+
|
295
|
+
/**
|
296
|
+
* Reads a 64-bit integer out of the byte buffer into a Ruby Integer instance.
|
297
|
+
* If the argc/argv arguments have a :mode option with the value of :bson,
|
298
|
+
* wraps the integer in a BSON::Int64. Returns either the Integer or the
|
299
|
+
* BSON::Int64 instance.
|
300
|
+
*/
|
301
|
+
VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv)
|
302
|
+
{
|
303
|
+
int64_t i64;
|
304
|
+
VALUE num;
|
305
|
+
|
306
|
+
ENSURE_BSON_READ(b, 8);
|
307
|
+
memcpy(&i64, READ_PTR(b), 8);
|
308
|
+
b->read_position += 8;
|
309
|
+
num = LL2NUM(BSON_UINT64_FROM_LE(i64));
|
310
|
+
|
311
|
+
if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
|
312
|
+
VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(BSON_TYPE_INT64));
|
313
|
+
VALUE value = rb_funcall(klass, rb_intern("new"), 1, num);
|
314
|
+
RB_GC_GUARD(klass);
|
315
|
+
return value;
|
316
|
+
} else {
|
317
|
+
return num;
|
318
|
+
}
|
319
|
+
|
320
|
+
RB_GC_GUARD(num);
|
321
|
+
}
|
322
|
+
|
323
|
+
/**
|
324
|
+
* Get a double from the buffer.
|
325
|
+
*/
|
326
|
+
VALUE rb_bson_byte_buffer_get_double(VALUE self)
|
327
|
+
{
|
328
|
+
byte_buffer_t *b;
|
329
|
+
|
330
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
331
|
+
return pvt_get_double(b);
|
332
|
+
}
|
333
|
+
|
334
|
+
VALUE pvt_get_double(byte_buffer_t *b)
|
335
|
+
{
|
336
|
+
double d;
|
337
|
+
|
338
|
+
ENSURE_BSON_READ(b, 8);
|
339
|
+
memcpy(&d, READ_PTR(b), 8);
|
340
|
+
b->read_position += 8;
|
341
|
+
return DBL2NUM(BSON_DOUBLE_FROM_LE(d));
|
342
|
+
}
|
343
|
+
|
344
|
+
/**
|
345
|
+
* Get the 16 bytes representing the decimal128 from the buffer.
|
346
|
+
*/
|
347
|
+
VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self)
|
348
|
+
{
|
349
|
+
byte_buffer_t *b;
|
350
|
+
VALUE bytes;
|
351
|
+
|
352
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
353
|
+
ENSURE_BSON_READ(b, 16);
|
354
|
+
bytes = rb_str_new(READ_PTR(b), 16);
|
355
|
+
b->read_position += 16;
|
356
|
+
return bytes;
|
357
|
+
}
|
358
|
+
|
359
|
+
VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){
|
360
|
+
VALUE doc = Qnil;
|
361
|
+
byte_buffer_t *b = NULL;
|
362
|
+
uint8_t type;
|
363
|
+
VALUE cDocument = pvt_const_get_2("BSON", "Document");
|
364
|
+
int32_t length;
|
365
|
+
char *start_ptr;
|
366
|
+
|
367
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
368
|
+
|
369
|
+
start_ptr = READ_PTR(b);
|
370
|
+
length = pvt_validate_length(b);
|
371
|
+
|
372
|
+
doc = rb_funcall(cDocument, rb_intern("allocate"), 0);
|
373
|
+
|
374
|
+
while((type = pvt_get_type_byte(b)) != 0){
|
375
|
+
VALUE field = rb_bson_byte_buffer_get_cstring(self);
|
376
|
+
rb_hash_aset(doc, field, pvt_read_field(b, self, type, argc, argv));
|
377
|
+
RB_GC_GUARD(field);
|
378
|
+
}
|
379
|
+
|
380
|
+
if (READ_PTR(b) - start_ptr != length) {
|
381
|
+
pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
|
382
|
+
}
|
383
|
+
|
384
|
+
return doc;
|
385
|
+
}
|
386
|
+
|
387
|
+
VALUE rb_bson_byte_buffer_get_array(int argc, VALUE *argv, VALUE self){
|
388
|
+
byte_buffer_t *b;
|
389
|
+
VALUE array = Qnil;
|
390
|
+
uint8_t type;
|
391
|
+
int32_t length;
|
392
|
+
char *start_ptr;
|
393
|
+
|
394
|
+
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
|
395
|
+
|
396
|
+
start_ptr = READ_PTR(b);
|
397
|
+
length = pvt_validate_length(b);
|
398
|
+
|
399
|
+
array = rb_ary_new();
|
400
|
+
while((type = pvt_get_type_byte(b)) != 0){
|
401
|
+
pvt_skip_cstring(b);
|
402
|
+
rb_ary_push(array, pvt_read_field(b, self, type, argc, argv));
|
403
|
+
}
|
404
|
+
RB_GC_GUARD(array);
|
405
|
+
|
406
|
+
if (READ_PTR(b) - start_ptr != length) {
|
407
|
+
pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
|
408
|
+
}
|
409
|
+
|
410
|
+
return array;
|
411
|
+
}
|